Getting nulls while selecting a dataframe from a JSON file in PySpark - json
I am using spark 3.1 and trying to read a JSON file
I have defined the schema for below file as:
StructType([
StructField('search_metadata', MapType(StringType(),StringType())),
StructField('search_parameters', MapType(StringType(),StringType())),
StructField('search_information', MapType(StringType(),StringType())),
StructField('local_results',StructType([
StructField('position', StringType(), True),
StructField('title', StringType(), True),
StructField('place_id', StringType(), True),
StructField('data_id', StringType(), True),
StructField('data_cid', StringType(), True),
StructField('reviews_link', StringType(), True),
StructField('photos_link', StringType(), True),
StructField('gps_coordinates', MapType(StringType(),StringType()), True),
StructField('place_id_search', StringType(), True),
StructField('unclaimed_listing', BinaryType(), True),
StructField('type', StringType(), True),
StructField('address', StringType(), True),
StructField('open_state', StringType(), True),
StructField('hours', StringType(), True),
StructField('phone', MapType(StringType(),StringType()), True),
StructField('thumbnail', StringType(), True),
]), True),
StructField('serpapi_pagination',MapType(StringType(),StringType())),
StructField('search_query', StringType(), True),
])
And my JSON file is:
[{
"search_metadata": {
"id": "63560cab66440a949ade5d72",
"status": "Success",
"json_endpoint": "https://serpapi.com/searches/b6986ff9ff715b13/63560cab66440a949ade5d72.json",
"created_at": "2022-10-24 03:55:23 UTC",
"processed_at": "2022-10-24 03:55:23 UTC",
"google_maps_url": "https://www.google.com/maps/search/WH?hl=en",
"raw_html_file": "https://serpapi.com/searches/b6986ff9ff715b13/63560cab66440a949ade5d72.html",
"total_time_taken": 1.91
},
"search_parameters": {
"engine": "google_maps",
"type": "search",
"q": "WH",
"google_domain": "google.com",
"hl": "en"
},
"search_information": {
"local_results_state": "Results for exact spelling",
"query_displayed": "WH"
},
"local_results": [{
"position": 1,
"title": "WH International Casting, LLC",
"place_id": "ChIJh0wvXcu_a4gRWuH-O1ltlPg",
"data_id": "0x886bbfcb5d2f4c87:0xf8946d593bfee15a",
"data_cid": "17912061847985381722",
"reviews_link": "https://serpapi.com/search.json?data_id=0x886bbfcb5d2f4c87%3A0xf8946d593bfee15a&engine=google_maps_reviews&hl=en",
"photos_link": "https://serpapi.com/search.json?data_id=0x886bbfcb5d2f4c87%3A0xf8946d593bfee15a&engine=google_maps_photos&hl=en",
"gps_coordinates": {
"latitude": 38.295865,
"longitude": -85.73001099999999
},
"place_id_search": "https://serpapi.com/search.json?data=%214m5%213m4%211s0x886bbfcb5d2f4c87%3A0xf8946d593bfee15a%218m2%213d38.295865%214d-85.73001099999999&engine=google_maps&google_domain=google.com&hl=en&type=place",
"unclaimed_listing": true,
"type": "Warehouse",
"address": "260 America Pl Dr, Jeffersonville, IN 47130",
"open_state": "Closed ⋅ Opens 8AM Mon",
"hours": "Closed ⋅ Opens 8AM Mon",
"operating_hours": {
"sunday": "Closed",
"monday": "8AM–4:30PM",
"tuesday": "8AM–4:30PM",
"wednesday": "8AM–4:30PM",
"thursday": "8AM–4:30PM",
"friday": "8AM–4:30PM",
"saturday": "Closed"
},
"phone": "(812) 725-8029",
"thumbnail": "https://lh5.googleusercontent.com/p/AF1QipPWDyyzxp1MG27vv3WVZbzy5WVI-Qh2u2jEDb-C=w122-h92-k-no"
},
{
"position": 2,
"title": "W.H. Smith Manor",
"place_id": "ChIJ9584e22DXIgR5w2f2saKBOU",
"data_id": "0x885c836d7b389ff7:0xe5048ac6da9f0de7",
"data_cid": "16502467521268354535",
"reviews_link": "https://serpapi.com/search.json?data_id=0x885c836d7b389ff7%3A0xe5048ac6da9f0de7&engine=google_maps_reviews&hl=en",
"photos_link": "https://serpapi.com/search.json?data_id=0x885c836d7b389ff7%3A0xe5048ac6da9f0de7&engine=google_maps_photos&hl=en",
"gps_coordinates": {
"latitude": 36.581589799999996,
"longitude": -83.6581731
},
"place_id_search": "https://serpapi.com/search.json?data=%214m5%213m4%211s0x885c836d7b389ff7%3A0xe5048ac6da9f0de7%218m2%213d36.581589799999996%214d-83.6581731&engine=google_maps&google_domain=google.com&hl=en&type=place",
"unclaimed_listing": true,
"type": "University department",
"address": "184 Robertson Ave, Harrogate, TN 37752",
"open_state": "Closed ⋅ Opens 8AM Mon",
"hours": "Closed ⋅ Opens 8AM Mon",
"operating_hours": {
"sunday": "Closed",
"monday": "8AM–4:30PM",
"tuesday": "8AM–4:30PM",
"wednesday": "8AM–4:30PM",
"thursday": "8AM–4:30PM",
"friday": "8AM–4:30PM",
"saturday": "Closed"
},
"phone": "(423) 869-3611",
"website": "http://lmunet.edu/",
"thumbnail": "https://streetviewpixels-pa.googleapis.com/v1/thumbnail?panoid=mJwpOER-2yIbmD3xSwQ2pQ&cb_client=search.gws-prod.gps&w=80&h=92&yaw=307.97266&pitch=0&thumbfov=100"
}
],
"serpapi_pagination": {
"next": "https://serpapi.com/search.json?engine=google_maps&google_domain=google.com&hl=en&q=WH&start=20&type=search"
},
"search_query": "WH.json"
}]
I am trying to select some rows as below, but I am getting null values, I am assuming there is an issue with my schema definition.
df = df.select(col('local_results'),
col('local_results.position').alias('position'),
col('local_results.title').alias('title'))
df.show()
Can someone help me to correct the schema and show the results?
You get nulls, because your schema definition is not correct.
To find out correct schema, you can try reading the file without schema like this:
df = spark.read.option('multiline', 'true').json('file.json')
Then you will be able to modify the schema in places which can be modified.
Using the above schema, the select will works:
from pyspark.sql import functions as F
df = df.select(F.col('local_results'),
F.col('local_results.position').alias('position'),
F.col('local_results.title').alias('title'))
df.show()
# +--------------------+--------+--------------------+
# | local_results|position| title|
# +--------------------+--------+--------------------+
# |[{260 America Pl ...| [1, 2]|[WH International...|
# +--------------------+--------+--------------------+
Schemas
Using spark.read.option('multiline', 'true').json('file.json'), this schema is created:
StructType([
StructField('local_results', ArrayType(StructType([
StructField('address', StringType(), True),
StructField('data_cid', StringType(), True),
StructField('data_id', StringType(), True),
StructField('gps_coordinates', StructType([
StructField('latitude', DoubleType(), True),
StructField('longitude', DoubleType(), True)
]), True),
StructField('hours', StringType(), True),
StructField('open_state', StringType(), True),
StructField('operating_hours', StructType([
StructField('friday', StringType(), True),
StructField('monday', StringType(), True),
StructField('saturday', StringType(), True),
StructField('sunday', StringType(), True),
StructField('thursday', StringType(), True),
StructField('tuesday', StringType(), True),
StructField('wednesday', StringType(), True)
]), True),
StructField('phone', StringType(), True),
StructField('photos_link', StringType(), True),
StructField('place_id', StringType(), True),
StructField('place_id_search', StringType(), True),
StructField('position', LongType(), True),
StructField('reviews_link', StringType(), True),
StructField('thumbnail', StringType(), True),
StructField('title', StringType(), True),
StructField('type', StringType(), True),
StructField('unclaimed_listing', BooleanType(), True),
StructField('website', StringType(), True)
]), True), True),
StructField('search_information', StructType([
StructField('local_results_state', StringType(), True),
StructField('query_displayed', StringType(), True)
]), True),
StructField('search_metadata', StructType([
StructField('created_at', StringType(), True),
StructField('google_maps_url', StringType(), True),
StructField('id', StringType(), True),
StructField('json_endpoint', StringType(), True),
StructField('processed_at', StringType(), True),
StructField('raw_html_file', StringType(), True),
StructField('status', StringType(), True),
StructField('total_time_taken', DoubleType(), True)
]), True),
StructField('search_parameters', StructType([
StructField('engine', StringType(), True),
StructField('google_domain', StringType(), True),
StructField('hl', StringType(), True),
StructField('q', StringType(), True),
StructField('type', StringType(), True)
]), True),
StructField('search_query', StringType(), True),
StructField('serpapi_pagination', StructType([
StructField('next', StringType(), True)
]), True)
])
You can extract it using df.schema.
It is different from yours, because you also have some map type columns in your schema. If you need map columns, you can do it.
The following schema would also work:
schema = StructType([
StructField('local_results', ArrayType(StructType([
StructField('address', StringType(), True),
StructField('data_cid', StringType(), True),
StructField('data_id', StringType(), True),
StructField('gps_coordinates', MapType(StringType(), StringType()), True),
StructField('hours', StringType(), True),
StructField('open_state', StringType(), True),
StructField('operating_hours', MapType(StringType(), StringType()), True),
StructField('phone', StringType(), True),
StructField('photos_link', StringType(), True),
StructField('place_id', StringType(), True),
StructField('place_id_search', StringType(), True),
StructField('position', LongType(), True),
StructField('reviews_link', StringType(), True),
StructField('thumbnail', StringType(), True),
StructField('title', StringType(), True),
StructField('type', StringType(), True),
StructField('unclaimed_listing', BooleanType(), True),
StructField('website', StringType(), True)
]), True), True),
StructField('search_information', MapType(StringType(), StringType()), True),
StructField('search_metadata', MapType(StringType(), StringType()), True),
StructField('search_parameters', MapType(StringType(), StringType()), True),
StructField('search_query', StringType(), True),
StructField('serpapi_pagination', MapType(StringType(), StringType()), True)
])
df = spark.read.option('multiline', 'true').json('file.json', schema)
For full flattening even the following would work:
schema = StructType([
StructField('local_results', ArrayType(MapType(StringType(), StringType()), True), True),
StructField('search_information', MapType(StringType(), StringType()), True),
StructField('search_metadata', MapType(StringType(), StringType()), True),
StructField('search_parameters', MapType(StringType(), StringType()), True),
StructField('search_query', StringType(), True),
StructField('serpapi_pagination', MapType(StringType(), StringType()), True)
])
df = spark.read.option('multiline', 'true').json('file.json', schema)
Result:
df.show(truncate=0)

# |local_results |search_information |search_metadata |search_parameters |search_query|serpapi_pagination |

# |[{position -> 1, title -> WH International Casting, LLC, place_id -> ChIJh0wvXcu_a4gRWuH-O1ltlPg, data_id -> 0x886bbfcb5d2f4c87:0xf8946d593bfee15a, data_cid -> 17912061847985381722, reviews_link -> https://serpapi.com/search.json?data_id=0x886bbfcb5d2f4c87%3A0xf8946d593bfee15a&engine=google_maps_reviews&hl=en, photos_link -> https://serpapi.com/search.json?data_id=0x886bbfcb5d2f4c87%3A0xf8946d593bfee15a&engine=google_maps_photos&hl=en, gps_coordinates -> {"latitude":38.295865,"longitude":-85.73001099999999}, place_id_search -> https://serpapi.com/search.json?data=%214m5%213m4%211s0x886bbfcb5d2f4c87%3A0xf8946d593bfee15a%218m2%213d38.295865%214d-85.73001099999999&engine=google_maps&google_domain=google.com&hl=en&type=place, unclaimed_listing -> true, type -> Warehouse, address -> 260 America Pl Dr, Jeffersonville, IN 47130, open_state -> Closed ⋅ Opens 8AM Mon, hours -> Closed ⋅ Opens 8AM Mon, operating_hours -> {"sunday":"Closed","monday":"8AM–4:30PM","tuesday":"8AM–4:30PM","wednesday":"8AM–4:30PM","thursday":"8AM–4:30PM","friday":"8AM–4:30PM","saturday":"Closed"}, phone -> (812) 725-8029, thumbnail -> https://lh5.googleusercontent.com/p/AF1QipPWDyyzxp1MG27vv3WVZbzy5WVI-Qh2u2jEDb-C=w122-h92-k-no}, {position -> 2, title -> W.H. Smith Manor, place_id -> ChIJ9584e22DXIgR5w2f2saKBOU, data_id -> 0x885c836d7b389ff7:0xe5048ac6da9f0de7, data_cid -> 16502467521268354535, reviews_link -> https://serpapi.com/search.json?data_id=0x885c836d7b389ff7%3A0xe5048ac6da9f0de7&engine=google_maps_reviews&hl=en, photos_link -> https://serpapi.com/search.json?data_id=0x885c836d7b389ff7%3A0xe5048ac6da9f0de7&engine=google_maps_photos&hl=en, gps_coordinates -> {"latitude":36.581589799999996,"longitude":-83.6581731}, place_id_search -> https://serpapi.com/search.json?data=%214m5%213m4%211s0x885c836d7b389ff7%3A0xe5048ac6da9f0de7%218m2%213d36.581589799999996%214d-83.6581731&engine=google_maps&google_domain=google.com&hl=en&type=place, unclaimed_listing -> true, type -> University department, address -> 184 Robertson Ave, Harrogate, TN 37752, open_state -> Closed ⋅ Opens 8AM Mon, hours -> Closed ⋅ Opens 8AM Mon, operating_hours -> {"sunday":"Closed","monday":"8AM–4:30PM","tuesday":"8AM–4:30PM","wednesday":"8AM–4:30PM","thursday":"8AM–4:30PM","friday":"8AM–4:30PM","saturday":"Closed"}, phone -> (423) 869-3611, website -> http://lmunet.edu/, thumbnail -> https://streetviewpixels-pa.googleapis.com/v1/thumbnail?panoid=mJwpOER-2yIbmD3xSwQ2pQ&cb_client=search.gws-prod.gps&w=80&h=92&yaw=307.97266&pitch=0&thumbfov=100}]|{local_results_state -> Results for exact spelling, query_displayed -> WH}|{id -> 63560cab66440a949ade5d72, status -> Success, json_endpoint -> https://serpapi.com/searches/b6986ff9ff715b13/63560cab66440a949ade5d72.json, created_at -> 2022-10-24 03:55:23 UTC, processed_at -> 2022-10-24 03:55:23 UTC, google_maps_url -> https://www.google.com/maps/search/WH?hl=en, raw_html_file -> https://serpapi.com/searches/b6986ff9ff715b13/63560cab66440a949ade5d72.html, total_time_taken -> 1.91}|{engine -> google_maps, type -> search, q -> WH, google_domain -> google.com, hl -> en}|WH.json |{next -> https://serpapi.com/search.json?engine=google_maps&google_domain=google.com&hl=en&q=WH&start=20&type=search}|

But since all map values are forced to be strings, the inner objects cannot become maps. E.g. gps_coordinates result after the full flattening would look like this:
gps_coordinates -> {"latitude":38.295865,"longitude":-85.73001099999999}
Related
pygal + Bar Chart + Changing the color of values within the same series depending on the x_label
Hello People. I would like to use pygal to plot an image as shown in the one above: Plotting the same series with different colors based on the x_label. What I would like to show, basically, is the difference between weekdays and weekends. However, it seems that pygal only allows color settings per series. What I was able to accomplish so far is one of the following: 1- consider each day of the week as a series of its own, with a color of its own, without showing the x_lables. code: import pygal from pygal.style import Style y_values = [12.85, 12.78, 13.74, 16.73, 12.52, 3.71, 1.96] x_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] filename = 'barchar_test.png' chart_config = { "human_readable": True, "pretty_print": True, "truncate_legend": -1, "value_font_size": 15, "print_values": True, "show_legend": False, "print_values_position": "top", "print_labels": True, "value_formatter": lambda x: "{0: .2f}".format(x), } style_config = { "font_family": "googlefont:lato", "plot_background": "white", "value_font_size": 15, "show_y_guides": False, "show_y_labels": False, "colors": ("#0099d6", "#0099d6", "#0099d6", "#0099d6", "#0099d6", "#6d6f71", "#6d6f71"), } def _plot_bar_chart(y_values, x_labels, filename): bar_chart = pygal.Bar(style=Style(**style_config), **chart_config) for i, item in enumerate(y_values): bar_chart.add( x_labels[i], {item}, ) bar_chart.render_to_png(filename) _plot_bar_chart(y_values, x_labels, filename) 2- consider the values as one series, displaying the x_labels, but only in one color: code: dimport pygal from pygal.style import Style y_values = [12.85, 12.78, 13.74, 16.73, 12.52, 3.71, 1.96] x_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] filename = 'barchar_test.png' chart_config = { "human_readable": True, "pretty_print": True, "truncate_legend": -1, "value_font_size": 15, "print_values": True, "show_legend": False, "print_values_position": "top", "print_labels": True, "value_formatter": lambda x: "{0: .2f}".format(x), } style_config = { "font_family": "googlefont:lato", "plot_background": "white", "value_font_size": 15, "show_y_guides": False, "show_y_labels": False, "colors": ("#0099d6", "#0099d6", "#0099d6", "#0099d6", "#0099d6", "#6d6f71", "#6d6f71"), } def _plot_bar_chart(y_values, x_labels, filename): bar_chart = pygal.Bar(style=Style(**style_config), **chart_config) bar_chart.x_labels = x_labels bar_chart.add('', y_values) bar_chart.render_to_png(filename) _plot_bar_chart(y_values, x_labels, filename) 3- Consider the values as separate series, show the x_labels, but have all the bars stacked in the first x_label: code: import pygal from pygal.style import Style y_values = [12.85, 12.78, 13.74, 16.73, 12.52, 3.71, 1.96] x_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] filename = 'barchar_test.png' chart_config = { "human_readable": True, "pretty_print": True, "truncate_legend": -1, "value_font_size": 15, "print_values": True, "show_legend": False, "print_values_position": "top", "print_labels": True, "value_formatter": lambda x: "{0: .2f}".format(x), } style_config = { "font_family": "googlefont:lato", "plot_background": "white", "value_font_size": 15, "show_y_guides": False, "show_y_labels": False, "colors": ("#0099d6", "#0099d6", "#0099d6", "#0099d6", "#0099d6", "#6d6f71", "#6d6f71"), } def _plot_bar_chart(y_values, x_labels, filename): bar_chart = pygal.Bar(style=Style(**style_config), **chart_config) bar_chart.x_labels = x_labels for i, item in enumerate(y_values): bar_chart.add( x_labels[i], {item}, ) bar_chart.render_to_png(filename) _plot_bar_chart(y_values, x_labels, filename) Any thoughts on this? Thanks :)
Invoking endpoint with CURL- Post method and json is corrupt
I am invoking an endpoint of mine with CURL with the following command: curl -H "Content-Type: application/json" -X POST -d '{"TransmissionID":"SO000001","CustomerSO":"SO000001","EndUserName":"Roi_Test","Hold":"","RequestedDate":"2019-02-24 15:00","Currency":"EUR","Address1":"Calle del Maestro Bagant","Address2":"","BuildingName":"","BuildingNumber":"1","Floor":"1","ContactPerson":"roi","City":"Valencia","CountryAbbriviation":"ES","Email":"aaa#joomi.co.il","Phone":"050-7680249","Zip":"46015","Remark":"","Incoterm":"DDP","Status":"","Item":["OrderLine":1,"ItemName":"cl111","ItemDescription":"Description","Quantity":"1.","PriceCurrency":"EUR","Price":"219.9","HSCode":"9900000003463","AWBNumber":"","CarrierName":"PostNL","CountryOfManufacturer":"CN","Base64String":""],"NumberOfSku":1,"NumberOfUnits":1}' "http://server.com/Magicxpi4.6/MgWebRequester.dll?appname=IFSCarolina_Prod&prgname=HTTP&arguments=-AREST_Incoming%%23IncomingFile" and the JSON received is as follow: {TransmissionID:SO000001,CustomerSO:SO000001,EndUserName:Roi_Test,Hold:,RequestedDate:2019-02-24 15:00,Currency:EUR,Address1:Calle del Maestro Bagant,Address2:,BuildingName:,BuildingNumber:1,Floor:1,ContactPerson:roi,City:Valencia,CountryAbbriviation:ES,Email:aaa#joomi.co.il,Phone:050-7680249,Zip:46015,Remark:,Incoterm:DDP,Status:,Item:[OrderLine:1,ItemName:cl111,ItemDescription:Description,Quantity:1.,PriceCurrency:EUR,Price:219.9,HSCode:9900000003463,AWBNumber:,CarrierName:PostNL,CountryOfManufacturer:CN,Base64String:],NumberOfSku:1,NumberOfUnits:1} The data received is looking like a string not like a JSON, the fields and values are missing the " signs... I have already tried to change between single/double quotes and it did not work. Any ideas on how to resolve this?
you ARE sending a corrupt json, specifically "Item": [ "OrderLine": 1, "ItemName": "cl111", "ItemDescription": "Description", "Quantity": "1.", "PriceCurrency": "EUR", "Price": "219.9", "HSCode": "9900000003463", "AWBNumber": "", "CarrierName": "PostNL", "CountryOfManufacturer": "CN", "Base64String": "" ], is not valid JSON. in PHP this would be a legal array, as PHP allows string-keys in arrays, but JSON (and JavaScript) does not. but in JSON, objects can have string keys, so the closest thing you'll get to a legal json would be to make "Item" an object instead of an array, for example this would be legal JSON: { "TransmissionID": "SO000001", "CustomerSO": "SO000001", "EndUserName": "Roi_Test", "Hold": "", "RequestedDate": "2019-02-24 15:00", "Currency": "EUR", "Address1": "Calle del Maestro Bagant", "Address2": "", "BuildingName": "", "BuildingNumber": "1", "Floor": "1", "ContactPerson": "roi", "City": "Valencia", "CountryAbbriviation": "ES", "Email": "aaa#joomi.co.il", "Phone": "050-7680249", "Zip": "46015", "Remark": "", "Incoterm": "DDP", "Status": "", "Item": { "OrderLine": 1, "ItemName": "cl111", "ItemDescription": "Description", "Quantity": "1.", "PriceCurrency": "EUR", "Price": "219.9", "HSCode": "9900000003463", "AWBNumber": "", "CarrierName": "PostNL", "CountryOfManufacturer": "CN", "Base64String": "" }, "NumberOfSku": 1, "NumberOfUnits": 1 } btw, are you hand-crafting such large jsons complex jsons? i think you should switch to a scripting language instead to make it more readable and maintainable, ... for example, here is how to do it with PHP-cli: #!/usr/bin/env php <?php $ch = curl_init(); curl_setopt_array($ch, array( CURLOPT_URL => 'http://server.com/Magicxpi4.6/MgWebRequester.dll?appname=IFSCarolina_Prod&prgname=HTTP&arguments=-AREST_Incoming%%23IncomingFile', CURLOPT_HTTPHEADER => array( 'Content-Type: application/json' ) , CURLOPT_POST => 1, CURLOPT_POSTFIELDS => json_encode(array( 'TransmissionID' => 'SO000001', 'CustomerSO' => 'SO000001', 'EndUserName' => 'Roi_Test', 'Hold' => '', 'RequestedDate' => '2019-02-24 15:00', 'Currency' => 'EUR', 'Address1' => 'Calle del Maestro Bagant', 'Address2' => '', 'BuildingName' => '', 'BuildingNumber' => 1, 'Floor' => 1, 'ContactPerson' => 'roi', 'City' => 'Valencia', 'CountryAbbriviation' => 'ES', 'Email' => 'aaa#joomi.co.il', 'Phone' => '050-7680249', 'Zip' => '46015', 'Remark' => '', 'Incoterm' => 'DDP', 'Status' => '', 'Item' => array( 'OrderLine' => 1, 'ItemName' => 'cl111', 'ItemDescription' => 'Description', 'Quantity' => '1.', 'PriceCurrency' => 'EUR', 'Price' => 219.9, 'HSCode' => '9900000003463', 'AWBNumber' => '', 'CarrierName' => 'PostNL', 'CountryOfManufacturer' => 'CN', 'Base64String' => '', ) , 'NumberOfSku' => 1, 'NumberOfUnits' => 1, )) , )); curl_exec($ch); curl_close($ch); 2 last things, the "Quantity" of the Item in your JSON is 1. - is the dot supposed to be there, or is it a typo? i haven't touched the PayPal REST api in a long time, but this reminds me of the PayPal REST api, and in that API, i believe Item is supposed to be an array-of-objects, not just an object, if that's what you want then it would be "Item":[{...}] (in JSON) or 'Item' => array(array(...)) (in PHP)
Chartjs does not show on pdf in yii2 despite showing in html view
I have developed an app using Yii2, mpdf and chartjs. The page works well in html but doesn't show the chart when I export to pdf using mpdf. How can I solve this? <?= ChartJs::widget([ 'type' => 'line', 'options' => [ 'height' => 400, 'width' => 400, 'show_as_html'=> true ], 'data' => [ 'labels' => ["January", "February", "March", "April", "May", "June", "July"], 'datasets' => [ [ 'label' => "Sales", 'backgroundColor' => "rgba(255,99,132,0.2)", 'borderColor' => "rgba(255,99,132,1)", 'pointBackgroundColor' => "rgba(255,99,132,1)", 'pointBorderColor' => "#fff", 'pointHoverBackgroundColor' => "#fff", 'pointHoverBorderColor' => "rgba(255,99,132,1)", 'data' => [28, 48, 40, 19, 96, 27, 100] ] ] ] ]); ?>
You can see in the MPDF docs: https://mpdf.github.io/html-support/html-tags.html that canvas and script tags are not supported. in the case of charts, most use canvas, you can see that chartsjs also uses canvas to render its charts. https://www.chartjs.org/docs/latest/general/responsive.html Most PDF generator does not support rich HTML and charts. I suggest you use phantomJS for generating pdf. http://phantomjs.org/ http://jonnnnyw.github.io/php-phantomjs/ for PDF: http://jonnnnyw.github.io/php-phantomjs/4.0/3-usage/#output-to-pdf
Doctrine json_array config wrong order in keys
I have this weird problem with the json_array field configuration. I have configured field meant to store some configuration. Its configured like this: <field name="config" type="json_array" /> For example, I have an array like this: [ 'choices' => [ 'Other' => 'other', 'Male' => 'male', 'Female' => 'female' ] ] I set the entity property: $entity->setConfig($config); And I persist it to the database. The result is this: "choices": { "Male": "male", "Other": "other", "Female": "female" } When I do json_encode on the same array, the order is not changed, but somehow Doctrine does change the order. Is there a way to prevent this from happening?
Using one of the enumerated versions will prevent this behaviour: $v1 = [ 'choices' => [ 'Other', 'Male', 'Female' ] ]; $v2 = [ 'choices' => [ ['label' => 'Other', 'value' => 'other'], ['label' => 'Male', 'value' => 'male'], ['label' => 'Female', 'value' => 'female'] ] ]; More information you can find here Does JavaScript Guarantee Object Property Order?
SPARK : How to create aggregate from RDD[Row] in Scala
How do I create a List/Map inside a RDD/DF so that I can get the aggregate ? I have a file where each row is a JSON object : { itemId :1122334, language: [ { name: [ "US", "FR" ], value: [ "english", "french" ] }, { name: [ "IND" ], value: [ "hindi" ] } ], country: [ { US: [ { startTime: 2016-06-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ], CANADA: [ { startTime: 2016-06-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ], DENMARK: [ { startTime: 2016-06-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ], FRANCE: [ { startTime: 2016-08-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ] } ] }, { itemId :1122334, language: [ { name: [ "US", "FR" ], value: [ "english", "french" ] }, { name: [ "IND" ], value: [ "hindi" ] } ], country: [ { US: [ { startTime: 2016-06-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ], CANADA: [ { startTime: 2016-07-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ], DENMARK: [ { startTime: 2016-06-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ], FRANCE: [ { startTime: 2016-08-06T17: 39: 35.000Z, endTime: 2016-07-28T07: 00: 00.000Z } ] } ] } I have matching POJO which gets me the values from the JSON. import com.mapping.data.model.MappingUtils import com.mapping.data.model.CountryInfo val mappingPath = "s3://.../" val timeStamp = "2016-06-06T17: 39: 35.000Z" val endTimeStamp = "2016-06-07T17: 39: 35.000Z" val COUNTRY_US = "US" val COUNTRY_CANADA = "CANADA" val COUNTRY_DENMARK = "DENMARK" val COUNTRY_FRANCE = "FRANCE" val input = sc.textFile(mappingPath) The input is list of jsons where each line is json which I am mapping to the POJO class CountryInfo using MappingUtils which takes care of JSON parsing and conversion: val MappingsList = input.map(x=> { val countryInfo = MappingUtils.getCountryInfoString(x); (countryInfo.getItemId(), countryInfo) }).collectAsMap MappingsList: scala.collection.Map[String,com.mapping.data.model.CountryInfo] def showCountryInfo(x: Option[CountryInfo]) = x match { case Some(s) => s } But I need to create a DF/RDD so that I can get the aggregates of country and language for based on itemId. In the given example, if the country's start time is not lesser than "2016-06-07T17: 39: 35.000Z" then the value will be zero. Which format will be good to create the final aggregate json : 1. List ? |-----itemId-------|----country-------------------|-----language---------------------| | 1122334 | [US, CANADA,DENMARK] | [english,hindi,french] | | 1122334 | [US,DENMARK] | [english] | |------------------|------------------------------|----------------------------------| 2. Map ? |-----itemId-------|----country---------------------------------|-----language---------------------| | 1122334 | (US,2) (CANADA,1) (DENMARK,2) (FRANCE, 0) |(english,2) (hindi,1) (french,1) | |.... | |.... | |.... | |------------------|--------------------------------------------|----------------------------------| I would like to create a final json which has the aggregate value like : { itemId: "1122334", country: { "US" : 2, "CANADA" : 1, "DENMARK" : 2, "FRANCE" : 0 }, language: { "english" : 2, "french" : 1, "hindi" : 1 } } I tried List : val events = sqlContext.sql( "select itemId EventList") val itemList = events.map(row => { val itemId = row.getAs[String](1); val countryInfo = showTitleInfo(MappingsList.get(itemId)); val country = new ListBuffer[String]() country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) COUNTRY_US; country += if (countryInfo.getCountry().getCANADA().get(0).getStartTime() < endTimeStamp) COUNTRY_CANADA; country += if (countryInfo.getCountry().getDENMARK().get(0).getStartTime() < endTimeStamp) COUNTRY_DENMARK; country += if (countryInfo.getCountry().getFRANCE().get(0).getStartTime() < endTimeStamp) COUNTRY_FRANCE; val languageList = new ListBuffer[String]() val language = countryInfo.getLanguages().collect.foreach(x => languageList += x.getValue()); Row(itemId, country.toList, languageList.toList) }) and Map : val itemList = events.map(row => { val itemId = row.getAs[String](1); val countryInfo = showTitleInfo(MappingsList.get(itemId)); val country: Map[String, Int] = Map() country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_US' -> 1) else ('COUNTRY_US' -> 0) country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_CANADA' -> 1) else ('COUNTRY_CANADA' -> 0) country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_DENMARK' -> 1) else ('COUNTRY_DENMARK' -> 0) country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_FRANCE' -> 1) else ('COUNTRY_FRANCE' -> 0) val language: Map[String, Int] = Map() countryInfo.getLanguages().collect.foreach(x => language += (x.getValue -> 1)) ; Row(itemId, country, language) }) But both are getting frozen in Zeppelin. Is there any better way to get aggregates as json ? Which is better List/Map construct the final aggreagate ?
It would be helpful if you restated your question in terms of Spark DataFrame/Dataset and Row; I understand that you ultimately want to use JSON but the details of the JSON input/output are a separate concern. The function you are looking for is a Spark SQL aggregate function (see the group of them on that page). The functions collect_list and collect_set are related, but the function you need is not already implemented. You can implement what I'll call count_by_value by deriving from org.spark.spark.sql.expressions.UserDefinedAggregateFunction. This will require some in-depth knowledge of how Spark SQL works. Once count_by_value is implemented, you can use it like this: df.groupBy("itemId").agg(count_by_value(df("country")), count_by_value(df("language")))