Reading JSON in Spark - json
I'm reading a JSON file in spark but the dataframe output keeps giving me a table with all null values when I use df.show()....................... I was just wondering if there's something with my structure schema? Or maybe I'm missing something? I've also attached the json data below. Any help would be appreciate it.....................................
discount_type = StructType([StructField("amount", IntegerType(), True),
StructField("description", StringType(), True)
])
child_item_type = StructType([StructField("InsuranceNumber", StringType(), True),
StructField("InsuranceLabel", StringType(), True),
StructField("Insurancequantity", DoubleType(), True),
StructField("Insuranceprice", IntegerType(), True),
StructField("discountsreceived",discount_type , True),
])
item_type = StructType([StructField("InsuranceNumber", StringType(), True),
StructField("InsuranceLabel", StringType(), True),
StructField("Insurancequantity", IntegerType(), True),
StructField("Insuranceprice", IntegerType(), True),
StructField("discountsreceived", discount_type, True),
StructField("childItems",child_item_type , True),
])
order_paid_type = StructType([StructField("Insuranceid", StringType(), True),
StructField("Insurancedesc", StringType(), True),
StructField("purchaseditems", item_type, True),
])
message_type = StructType([StructField("PaidIn", order_paid_type, True)])
data_type = StructType([StructField("Client", message_type, True)])
body_type = StructType([StructField("id", StringType(), True),
StructField("InsuranceProvider", StringType(), True),
StructField("Type", data_type, True),
StructField("eventTime", StringType(), True),
])
[
df = spark.read.schema(body_type).json(INPUT_FILE)
[
{
"id": "164651478631223455788978942317",
"InsuranceProvider": "Embroker",
"Type": {
"Client": {
"PaidIn": {
"Insuranceid": "97331549875122744335422",
"Insurancedesc": "Magic happens here",
"purchaseditems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "DNO",
"Insurancequantity": 1,
"Insuranceprice": 345,
"discountsreceived": [
{
"amount": 495,
"description": "Item 1, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 2495,
"description": "Insurance item 1, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "2",
"InsuranceLabel": "LPL",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": -295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 495,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "3",
"InsuranceLabel": "LPL",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 400,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "4",
"InsuranceLabel": "LPL",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 335,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "5",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 1295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 195,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "6",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 805,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 501,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "7",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 521,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 533,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "8",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 422,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 333,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "9",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 444,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 666,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "10",
"InsuranceLabel": "DNO",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 10,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 63,
"description": "Insurance item 2, Discount 1"
}
]
}
]
}
]
}
}
},
"eventTime": "2020-05-19T01:59:10.379Z"
}
]
Related
Laravel Left Join ( get particular field ) and Group by ID
Using Laravel 8.12 Unable to get Printer title details in Child Array. Order Table : id, name, printer_id printer Table : id, title Resources Table : id. name $resources = Resource::with('orders') ->leftJoin('orders', 'resources.id', '=', 'orders.resource_id') ->leftJoin('printers', 'orders.printer_id', '=', 'printers.id') ->select('resources.*') ->groupBy('resources.id'); Using this code i am getting below result. But missing "Title" in child array. Current Output Result [ { "id": 2, "title": "user 2", "orders": [ { "id": 190, "resource_id": "2", "printer_id": 2 }, { "id": 193, "resource_id": "2", "printer_id": 3 }, ] }, { "id": 3, "title": "user 3", "orders": [ { "id": 207, "resource_id": "3", "printer_id": 3 }, ] }, { "id": 4, "title": "user 4", "orders": [ { "id": 466, "resource_id": "4", "printer_id": 4, }, { "id": 370, "resource_id": "4", "printer_id": 5, } ] } ] Required Result [ { "id": 2, "orders": [ { "id": 190, "resource_id": "2", "title": "user 2", "printer_id": 2 }, { "id": 193, "resource_id": "2", "title": "user 3", "printer_id": 3 }, ] }, { "id": 3, "orders": [ { "id": 207, "title": "user 3", "resource_id": "4", "printer_id": 3 }, ] }, { "id": 4, "orders": [ { "id": 466, "title": "user 4", "resource_id": "4", "printer_id": 4, }, { "id": 370, "title": "user 5", "resource_id": "4", "printer_id": 5, } ] } ] Missing Printer table "title" in child array.
You can simply run $resources = Resource::with('orders.printer')->paginate(6); an you will get all the data you need [ { "id": 2, "title": "user 2", "orders": [ { "id": 190, "resource_id": "2", "printer_id": 2, "printer" : { "id" : 2, "title" : "user 2" } }, { "id": 193, "resource_id": "2", "printer_id": 3, "printer" : { "id" : 3, "title" : "user 3" } } ] } ] Edit You need the printer() relation declared in the model Order::class class Order extends Model { public function printer() { return $this->belongsTo(Printer::class); } }
How produces statistics with laravel?
I work on a laravel application that is a bit like a quiz. You create an exercise, you add questions and each question to answers. Until then everything is fine. My interest is to present statistics for each exercise after schoolchildren have answered the tests. I want to be able to display the answer percentage for each question. Here is an example of the data structure I have: [ { "id": 1, "exercice_id": 1, "question": "Lorem ipsum ?", "responses": [ { "id": 1, "exercice_id": 1, "question_id": 1, "response_text": "Yes", }, { "id": 2, "exercice_id": 1, "question_id": 1, "response_text": "No", } ], "choice": [ { "id": 1, "exercice_id": 1, "question_id": 1, "response_id": 1, }, { "id": 2, "exercice_id": 1, "question_id": 1, "response_id": 1, }, { "id": 3, "exercice_id": 1, "question_id": 1, "response_id": 2, } ] }, { "id": 2, "exercice_id": 1, "question": "fake text ?", "responses": [ { "id": 3, "exercice_id": 1, "question_id": 2, "response_text": "A", }, { "id": 4, "exercice_id": 1, "question_id": 2, "response_text": "B", }, { "id": 5, "exercice_id": 1, "question_id": 2, "response_text": "C", } ], "choice": [ { "id": 4, "exercice_id": 1, "question_id": 2, "response_id": 5, }, { "id": 5, "exercice_id": 1, "question_id": 2, "response_id": 3, } ] } ] I tried the groupBy method on several elements. but I have not found the formula yet. return response()->json(Question::with('responses','choice') ->where('exercice_id',$exo->id) //->groupBy('choice') ->get(), 200,[], JSON_NUMERIC_CHECK);
Export mysql data to JSON in a nested and recursivelty way starting from a table?
I have seen several examples to export MySQL tables to JSON, however such examples export data in an aggregated way. For example, take a database where you have two tables "Invoice head" and "Invoice details". "Invoice details" is a child of "Invoice head".The data in JSON is usually represented like: { "invoice_head": [ { "number": 1, "cliente": "Carlos", "date": "2016-12-12" }, { "number": 2, "cliente": "Fernando", "date": "2017-01-01" } ], "invoice_details": [ { "headnumber": 1, "lineno": 1, "product": "Shoes", "quantity": 2 }, { "headnumber": 1, "lineno": 2, "product": "Socks", "quantity": 1 }, { "headnumber": 2, "lineno": 1, "product": "Laptop", "quantity": 1 } ], } I need to export data in a nested way: { "invice_head": [ { "number": 1, "cliente": "Carlos", "date": "2016-12-12", "invice_details": [ { "headnumber": 1, "lineno": 1, "product": "Shoes", "quantity": 2 }, { "headnumber": 1, "lineno": 2, "product": "Socks", "quantity": 1 } ] }, { "number": 2, "cliente": "Fernando", "date": "2017-01-01" "invice_details": [ { "headnumber": 2, "lineno": 1, "product": "Laptop", "quantity": 1 } ] } ] } For this I guess one needs to start in a table and recursively go through its childs for each record. Does anybody knows if there is anything that does it? I don't want to reinvent the wheel.
Getting error while parsing json response from a dynamic {System.RuntimeType} variable
I'm working on some code in which uses dynamic variables jsonResponse . dynamic jsonResponse = JsonConvert.DeserializeObject(response); This variable contains collection of hotel list in json format. From this collection I am getting roomlist collection in a new variable roomResponseList : var roomResponseList = jsonResponse["hotels"]["hotels"][rooms].roomResponseList; I am getting first room detail into **JObject responseRateKeys **: foreach (var roomByResponse in roomResponseList) { JObject responseRateKeys = JObject.Parse(roomByResponse.ToString()); var boardNameListByResponse = responseRateKeys.AsJEnumerable().AsEnumerable() .Select(t => t["rates"]["boardName"].ToString().Trim()) .Distinct() .ToList(); } But when I am trying to get any item list from JObject by using linq lambda, I am getting error, "Cannot access child value on Newtonsoft.Json.Linq.JProperty." Value of roomByResponse= { "code": "DBL.KG-NM", "name": "DOUBLE KING BED NON SMOKING", "rates": [ { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|RO|IWH25|1~1~0||N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NRF", "rateType": "RECHECK", "net": "186.04", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "RO", "boardName": "ROOM ONLY", "cancellationPolicies": [ { "amount": "149.63", "from": "2017-07-14T03:29:00+05:30" } ], "rooms": 1, "adults": 1, "children": 0, "dailyRates": [ { "offset": 1, "dailyNet": "93.02" }, { "offset": 2, "dailyNet": "93.02" } ] }, { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|BB|IWB25|1~1~0||N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NOR", "rateType": "RECHECK", "net": "238.92", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "BB", "boardName": "BED AND BREAKFAST", "rooms": 1, "adults": 1, "children": 0, "dailyRates": [ { "offset": 1, "dailyNet": "119.46" }, { "offset": 2, "dailyNet": "119.46" } ] }, { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|RO|IWH25|2~2~1|2|N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NRF", "rateType": "RECHECK", "net": "372.06", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "RO", "boardName": "ROOM ONLY", "cancellationPolicies": [ { "amount": "299.25", "from": "2017-07-14T03:29:00+05:30" } ], "rooms": 2, "adults": 2, "children": 1, "childrenAges": "2", "dailyRates": [ { "offset": 1, "dailyNet": "186.03" }, { "offset": 2, "dailyNet": "186.03" } ] }, { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|BB|IWB25|2~2~1|2|N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NOR", "rateType": "RECHECK", "net": "477.84", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "BB", "boardName": "BED AND BREAKFAST", "rooms": 2, "adults": 2, "children": 1, "childrenAges": "2", "dailyRates": [ { "offset": 1, "dailyNet": "238.92" }, { "offset": 2, "dailyNet": "238.92" } ] } ] } Thank you Pravesh Singh
change linq to responseRateKeys["rates"].AsJEnumerable().Select(t=>t["boardName"]).Distinct().ToList()
Parse Json with different type of inner array
I have a great problem for parse a very difficult json string. For Eg { "facilityDetails": [ { "tableName": "FACILITY", "facilityDetails": [ { "id": 1, "itemId": "s101", "name": "facility", "status": 1, "lastEditedOn": "01/Jan/201200: 00: 00.000" } ] }, { "tableName": "PLACE_SERVICE", "facilityDetails": [ { "id": 1, "itemId": "22", "name": "placeservice", "facility": "5", "status": 1, "lastEditedOn": "01/Jan/201000: 00: 00.000" }, { "id": 2, "itemId": "55", "name": "placeservice", "facility": "t", "status": 2, "lastEditedOn": "01/Jan/201000: 00: 00.000" }, { "id": 3, "itemId": "99", "name": "placeservice", "facility": "r", "status": 33, "lastEditedOn": "01/Jan/201000: 00: 00.000" }, { "id": 4, "itemId": "22", "name": "placeservice", "facility": "", "status": 0, "lastEditedOn": "01/Jan/201000: 00: 00.000" } ] }, { "tableName": "AGENT", "facilityDetails": [ { "agentId": 2, "itemId": "1", "name": "agent", "defUnitId": 0, "defRouteId": 0, "color": "", "synonyms": "", "administrationType": 0, "status": 0, "lastEditedOn": "01/Jan/201200: 00: 00.000" } ] } ] } for this json string "facilityDetails" is an inner array that have different details at each time. How can i parse this type of json. If anyone know please help me
You should be able to parse this with the Json.NET library. http://james.newtonking.com/projects/json-net.aspx