I am trying to process a nested JSON and flatten it in Apache NiFi, with the help of the JoltTransformation processor by supplying a spec.
Sample JSON:
Input
{
"product": "astro",
"init": "2022091400",
"dataseries": [
{
"timepoint": 3,
"cloudcover": 2,
"seeing": 6,
"transparency": 2,
"lifted_index": 2,
"rh2m": 3,
"wind10m": {
"direction": "N",
"speed": 3
},
"temp2m": 33,
"prec_type": "none"
},
{
"timepoint": 6,
"cloudcover": 2,
"seeing": 6,
"transparency": 2,
"lifted_index": 2,
"rh2m": 1,
"wind10m": {
"direction": "NW",
"speed": 3
},
"temp2m": 35,
"prec_type": "none"
},
{
"timepoint": 9,
"cloudcover": 1,
"seeing": 6,
"transparency": 2,
"lifted_index": 2,
"rh2m": 2,
"wind10m": {
"direction": "N",
"speed": 3
},
"temp2m": 35,
"prec_type": "none"
}
]
}
Jolt Spec
[
{
"operation": "shift",
"spec": {
"product": "product",
"init": "init",
"dataseries": {
"*": {
"timepoint": "timepoint",
"cloudcover": "cloudcover",
"seeing": "seeing",
"transparency": "transparency",
"lifted_index": "lifted_index",
"rh2m": "rh2m",
"wind10m": {
"direction": "direction",
"speed": "speed"
},
"temp2m": "temp2m",
"prec_type": "prec_type"
}
}
}
}
]
Output
{
"product" : "astro",
"init" : "2022091400",
"timepoint" : [ 3, 6, 9 ],
"cloudcover" : [ 2, 2, 1 ],
"seeing" : [ 6, 6, 6 ],
"transparency" : [ 2, 2, 2 ],
"lifted_index" : [ 2, 2, 2 ],
"rh2m" : [ 3, 1, 2 ],
"direction" : [ "N", "NW", "N" ],
"speed" : [ 3, 3, 3 ],
"temp2m" : [ 33, 35, 35 ],
"prec_type" : [ "none", "none", "none" ]
}
Expected Output
{
"product" : "astro",
"init" : "2022091400",
"timepoint" : 3,
"cloudcover" : 2,
"seeing" : 6,
"transparency" : 2,
"lifted_index" : 2,
"rh2m" : 3,
"direction" : "N",
"speed" : 3,
"temp2m" : 33,
"prec_type" : "none"
},
{
"product" : "astro",
"init" : "2022091400",
"timepoint" : 6,
"cloudcover" : 2,
"seeing" : 6,
"transparency" : 2,
"lifted_index" : 2,
"rh2m" : 1,
"direction" : "NW",
"speed" : 3,
"temp2m" : 35,
"prec_type" : "none"
},
{
"product" : "astro",
"init" : "2022091400",
"timepoint" : 9,
"cloudcover" : 1,
"seeing" : 6,
"transparency" : 2,
"lifted_index" : 2,
"rh2m" : 2,
"direction" : "N",
"speed" : 3,
"temp2m" : 35,
"prec_type" : "none"
}
So my expectation is to have flatten the JSON and have single values for each main object in this case product and init, after which I plan to send this over to the ConvertJsontoSql processor within the NiFi to have the records inserted into PostgresDB.
https://jolt-demo.appspot.com/
No need to write each attribute individually, but just use # and & wildcards, except for product and init those should be taken after going the tree two levels up such as
[
{
"operation": "shift",
"spec": {
"dataseries": {
"*": {
"#(2,product)": "[&1].product",
"#(2,init)": "[&1].init", // 2 stands for reaching the level of the "init" aatribute, [&1] is for reaching the level of indexes of "dataseries" array and shaping the result as array(nested within square brackets)
"*": "[&1].&",
"w*": {
"*": "[&2].&"
}
}
}
}
}
]
the demo on the site http://jolt-demo.appspot.com/ is :
Related
{ "product" : "astro" , "init" : "2020091906" , "dataseries" : [ { "timepoint" : 3, "cloudcover" : 7, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 6, "wind10m" : { "direction" : "S", "speed" : 2 }, "temp2m" : 33, "prec_type" : "rain" }, { "timepoint" : 6, "cloudcover" : 6, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 7, "wind10m" : { "direction" : "S", "speed" : 2 }, "temp2m" : 30, "prec_type" : "rain" }, { "timepoint" : 9, "cloudcover" : 1, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 7, "wind10m" : { "direction" : "SE", "speed" : 3 }, "temp2m" : 30, "prec_type" : "none" }, { "timepoint" : 12, "cloudcover" : 4, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 8, "wind10m" : { "direction" : "SE", "speed" : 2 }, "temp2m" : 29, "prec_type" : "none" }, { "timepoint" : 15, "cloudcover" : 2, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 9, "wind10m" : { "direction" : "E", "speed" : 2 }, "temp2m" : 29, "prec_type" : "none" }, { "timepoint" : 18, "cloudcover" : 3, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 8, "wind10m" : { "direction" : "E", "speed" : 2 }, "temp2m" : 29, "prec_type" : "rain" }, { "timepoint" : 21, "cloudcover" : 7, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 6, "wind10m" : { "direction" : "E", "speed" : 2 }, "temp2m" : 33, "prec_type" : "rain" }, { "timepoint" : 24, "cloudcover" : 8, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 5, "wind10m" : { "direction" : "SE", "speed" : 2 }, "temp2m" : 35, "prec_type" : "rain" }, { "timepoint" : 27, "cloudcover" : 9, "seeing" : 6, "transparency" : 3, "lifted_index" : -4, "rh2m" : 6, "wind10m" : { "direction" : "SW", "speed" : 3 }, "temp2m" : 32, "prec_type" : "rain" }, { "timepoint" : 30, "cloudcover" : 9, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 7, "wind10m" : { "direction" : "SW", "speed" : 3 }, "temp2m" : 30, "prec_type" : "rain" }, { "timepoint" : 33, "cloudcover" : 9, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 8, "wind10m" : { "direction" : "SW", "speed" : 2 }, "temp2m" : 30, "prec_type" : "none" }, { "timepoint" : 36, "cloudcover" : 9, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 9, "wind10m" : { "direction" : "W", "speed" : 2 }, "temp2m" : 29, "prec_type" : "none" }, { "timepoint" : 39, "cloudcover" : 6, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 9, "wind10m" : { "direction" : "N", "speed" : 2 }, "temp2m" : 28, "prec_type" : "rain" }, { "timepoint" : 42, "cloudcover" : 8, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 8, "wind10m" : { "direction" : "SW", "speed" : 2 }, "temp2m" : 29, "prec_type" : "rain" }, { "timepoint" : 45, "cloudcover" : 9, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 6, "wind10m" : { "direction" : "SE", "speed" : 2 }, "temp2m" : 32, "prec_type" : "rain" }, { "timepoint" : 48, "cloudcover" : 9, "seeing" : 6, "transparency" : 3, "lifted_index" : -1, "rh2m" : 6, "wind10m" : { "direction" : "S", "speed" : 3 }, "temp2m" : 33, "prec_type" : "rain" }, { "timepoint" : 51, "cloudcover" : 9, "seeing" : 6, "transparency" : 3, "lifted_index" : -4, "rh2m" : 7, "wind10m" : { "direction" : "S", "speed" : 3 }, "temp2m" : 32, "prec_type" : "rain" }, { "timepoint" : 54, "cloudcover" : 7, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 9, "wind10m" : { "direction" : "S", "speed" : 3 }, "temp2m" : 30, "prec_type" : "rain" }, { "timepoint" : 57, "cloudcover" : 9, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 9, "wind10m" : { "direction" : "S", "speed" : 3 }, "temp2m" : 29, "prec_type" : "rain" }, { "timepoint" : 60, "cloudcover" : 9, "seeing" : 6, "transparency" : 5, "lifted_index" : -1, "rh2m" : 10, "wind10m" : { "direction" : "S", "speed" : 3 }, "temp2m" : 28, "prec_type" : "rain" }, { "timepoint" : 63, "cloudcover" : 9, "seeing" : 6, "transparency" : 5, "lifted_index" : -1, "rh2m" : 10, "wind10m" : { "direction" : "SE", "speed" : 2 }, "temp2m" : 28, "prec_type" : "rain" }, { "timepoint" : 66, "cloudcover" : 8, "seeing" : 6, "transparency" : 4, "lifted_index" : -1, "rh2m" : 9, "wind10m" : { "direction" : "SE", "speed" : 2 }, "temp2m" : 29, "prec_type" : "rain" }, { "timepoint" : 69, "cloudcover" : 9, "seeing" : 6, "transparency" : 4, "lifted_index" : -4, "rh2m" : 7, "wind10m" : { "direction" : "SE", "speed" : 2 }, "temp2m" : 31, "prec_type" : "rain" }, { "timepoint" : 72, "cloudcover" : 9, "seeing" : 6, "transparency" : 3, "lifted_index" : -4, "rh2m" : 7, "wind10m" : { "direction" : "SW", "speed" : 2 }, "temp2m" : 32, "prec_type" : "rain" } ] }
Their documentation is here: http://www.7timer.info/doc.php?lang=en#introduction
It says nothing about what the timepoint is, what it represents, or how to use it.
Any help here is appreciated, thank you.
It seems to be an old question but I think i have got the answer to that. Perhaps someone else is thankfull for that.
First you'll see the init date. It stands for the date and time when the forecast was made. In you example it was 2020-09-19 at 6:OO AM .
From this time/date you have to add the timepoint from the forecast. For example timepoint 3 means 9:00 AM and timepoint 6 means 12:00 AM.
The "timepoint" variable is hours past the "init" time. In your example: "init": "2020091906" corresponds to 2020-09-19 at 6 AM. At "timepoint": 3, that is 9 AM, and it goes up to 72 hours after that init datetime. It is the same thing on the METEO forecast, too, but that one is 192 hours.
I work on a laravel application that is a bit like a quiz. You create an exercise, you add questions and each question to answers.
Until then everything is fine.
My interest is to present statistics for each exercise after schoolchildren have answered the tests.
I want to be able to display the answer percentage for each question.
Here is an example of the data structure I have:
[
{
"id": 1,
"exercice_id": 1,
"question": "Lorem ipsum ?",
"responses": [
{
"id": 1,
"exercice_id": 1,
"question_id": 1,
"response_text": "Yes",
},
{
"id": 2,
"exercice_id": 1,
"question_id": 1,
"response_text": "No",
}
],
"choice": [
{
"id": 1,
"exercice_id": 1,
"question_id": 1,
"response_id": 1,
},
{
"id": 2,
"exercice_id": 1,
"question_id": 1,
"response_id": 1,
},
{
"id": 3,
"exercice_id": 1,
"question_id": 1,
"response_id": 2,
}
]
},
{
"id": 2,
"exercice_id": 1,
"question": "fake text ?",
"responses": [
{
"id": 3,
"exercice_id": 1,
"question_id": 2,
"response_text": "A",
},
{
"id": 4,
"exercice_id": 1,
"question_id": 2,
"response_text": "B",
},
{
"id": 5,
"exercice_id": 1,
"question_id": 2,
"response_text": "C",
}
],
"choice": [
{
"id": 4,
"exercice_id": 1,
"question_id": 2,
"response_id": 5,
},
{
"id": 5,
"exercice_id": 1,
"question_id": 2,
"response_id": 3,
}
]
}
]
I tried the groupBy method on several elements. but I have not found the formula yet.
return response()->json(Question::with('responses','choice')
->where('exercice_id',$exo->id)
//->groupBy('choice')
->get(),
200,[], JSON_NUMERIC_CHECK);
I'm working on some code in which uses dynamic variables jsonResponse .
dynamic jsonResponse = JsonConvert.DeserializeObject(response);
This variable contains collection of hotel list in json format. From this collection I am getting roomlist collection in a new variable roomResponseList :
var roomResponseList = jsonResponse["hotels"]["hotels"][rooms].roomResponseList;
I am getting first room detail into **JObject responseRateKeys **:
foreach (var roomByResponse in roomResponseList)
{
JObject responseRateKeys = JObject.Parse(roomByResponse.ToString());
var boardNameListByResponse = responseRateKeys.AsJEnumerable().AsEnumerable()
.Select(t => t["rates"]["boardName"].ToString().Trim())
.Distinct()
.ToList();
}
But when I am trying to get any item list from JObject by using linq lambda, I am getting error,
"Cannot access child value on Newtonsoft.Json.Linq.JProperty."
Value of roomByResponse=
{ "code": "DBL.KG-NM", "name": "DOUBLE KING BED NON SMOKING", "rates": [ { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|RO|IWH25|1~1~0||N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NRF", "rateType": "RECHECK", "net": "186.04", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "RO", "boardName": "ROOM ONLY", "cancellationPolicies": [ { "amount": "149.63", "from": "2017-07-14T03:29:00+05:30" } ], "rooms": 1, "adults": 1, "children": 0, "dailyRates": [ { "offset": 1, "dailyNet": "93.02" }, { "offset": 2, "dailyNet": "93.02" } ] }, { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|BB|IWB25|1~1~0||N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NOR", "rateType": "RECHECK", "net": "238.92", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "BB", "boardName": "BED AND BREAKFAST", "rooms": 1, "adults": 1, "children": 0, "dailyRates": [ { "offset": 1, "dailyNet": "119.46" }, { "offset": 2, "dailyNet": "119.46" } ] }, { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|RO|IWH25|2~2~1|2|N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NRF", "rateType": "RECHECK", "net": "372.06", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "RO", "boardName": "ROOM ONLY", "cancellationPolicies": [ { "amount": "299.25", "from": "2017-07-14T03:29:00+05:30" } ], "rooms": 2, "adults": 2, "children": 1, "childrenAges": "2", "dailyRates": [ { "offset": 1, "dailyNet": "186.03" }, { "offset": 2, "dailyNet": "186.03" } ] }, { "rateKey": "20171217|20171219|W|256|237403|DBL.KG-NM|ID_B2B_26|BB|IWB25|2~2~1|2|N#AFF5C93E36054661ADCBC14A78A532AE1007", "rateClass": "NOR", "rateType": "RECHECK", "net": "477.84", "allotment": 99, "paymentType": "AT_WEB", "packaging": false, "boardCode": "BB", "boardName": "BED AND BREAKFAST", "rooms": 2, "adults": 2, "children": 1, "childrenAges": "2", "dailyRates": [ { "offset": 1, "dailyNet": "238.92" }, { "offset": 2, "dailyNet": "238.92" } ] } ] }
Thank you
Pravesh Singh
change linq to
responseRateKeys["rates"].AsJEnumerable().Select(t=>t["boardName"]).Distinct().ToList()
I am attempting to get an element in my JSON with a query.
I am using Groovy, Postgres 9.4 and JSONB.
Here is my JSON
{
"id": "${ID}",
"team": {
"id": "123",
"name": "Shire Soldiers"
},
"playersContainer": {
"series": [
{
"id": "1",
"name": "Nick",
"teamName": "Shire Soldiers",
"ratings": [
1,
5,
6,
9
],
"assists": 17,
"manOfTheMatches": 20,
"cleanSheets": 1,
"data": [
3,
2,
3,
5,
6
],
"totalGoals": 19
},
{
"id": "2",
"name": "Pasty",
"teamName": "Shire Soldiers",
"ratings": [
6,
8,
9,
10
],
"assists": 25,
"manOfTheMatches": 32,
"cleanSheets": 2,
"data": [
3,
5,
7,
9,
10
],
"totalGoals": 24
}
]
}
}
I want to fetch the individual elements in the series array by their ID, I am currently using this query below
select content->'playersContainer'->'series' from site_content
where content->'playersContainer'->'series' #> '[{"id":"1"}]';
However this brings me back me back both the element with an id of 1 and 2
Below is what I get back
"[{"id": "1", "data": [3, 2, 3, 5, 6], "name": "Nick", "assists": 17, "ratings": [1, 5, 6, 9], "teamName": "Shire Soldiers", "totalGoals": 19, "cleanSheets": 1, "manOfTheMatches": 20}, {"id": "2", "data": [3, 5, 7, 9, 10], "name": "Pasty", "assists": 25, "r (...)"
Can anyone see where I am going wrong? I have seen some other questions on here but they don't help with this.
content->'playersContainer'->'series' is an array. Use jsonb_array_elements() if you want to find a specific element in an array.
select elem
from site_content,
lateral jsonb_array_elements(content->'playersContainer'->'series') elem
where elem #> '{"id":"1"}';
Test it here.
I have some JSON in my postgres DB, it's in a table called site_content, the table has two rows, id and content, in content is where I store my JSON. I want to be able to find the a player given his id, my players are stored under the key series as this is the key needed to create my charts from JSON.
Here is the query I am currently using:
Blocking.get {
sql.firstRow("""SELECT * from site_content where content -> 'playersContainer' -> 'series' -> 'id' = ${id} """)
}.map { row ->
log.info("row is: ${row}")
if (row) {
objectMapper.readValue(row.getAt(0).toString(), Player)
}
}
}
However I get back this error:
org.postgresql.util.PSQLException: ERROR: operator does not exist:
json = character varying Hint: No operator matches the given name
and argument type(s). You might need to add explicit type casts.
Here is an example of my JSON:
"id": "${ID}",
"team": {
"id": "123",
"name": "Shire Soldiers"
},
"playersContainer": {
"series": [
{
"id": "1",
"name": "Nick",
"teamName": "Shire Soldiers",
"ratings": [
1,
5,
6,
9
],
"assists": 17,
"manOfTheMatches": 20,
"cleanSheets": 1,
"data": [
3,
2,
3,
5,
6
],
"totalGoals": 19
},
{
"id": "2",
"name": "Pasty",
"teamName": "Shire Soldiers",
"ratings": [
6,
8,
9,
10
],
"assists": 25,
"manOfTheMatches": 32,
"cleanSheets": 2,
"data": [
3,
5,
7,
9,
10
],
"totalGoals": 24
}
]
}
I am using Groovy for this project, but I guess it's just the general JSON postgres syntax I am having problems with.
You're right, that's a problem with SQL syntax. Correct you query:
select * from json_test where content->'playersContainer'->'series' #> '[{"id":"1"}]';
Full example:
CREATE TABLE json_test (
content jsonb
);
insert into json_test(content) VALUES ('{"id": "1",
"team": {
"id": "123",
"name": "Shire Soldiers"
},
"playersContainer": {
"series": [
{
"id": "1",
"name": "Nick",
"teamName": "Shire Soldiers",
"ratings": [
1,
5,
6,
9
],
"assists": 17,
"manOfTheMatches": 20,
"cleanSheets": 1,
"data": [
3,
2,
3,
5,
6
],
"totalGoals": 19
},
{
"id": "2",
"name": "Pasty",
"teamName": "Shire Soldiers",
"ratings": [
6,
8,
9,
10
],
"assists": 25,
"manOfTheMatches": 32,
"cleanSheets": 2,
"data": [
3,
5,
7,
9,
10
],
"totalGoals": 24
}
]
}}');
select * from json_test where content->'playersContainer'->'series' #> '[{"id":"1"}]';
About #> operator. This question might be also useful.
May be it could help: Into the sql statement, I added this 'cast' where I have the json field:
INSERT INTO map_file(type, data)
VALUES (?, CAST(? AS json))
RETURNING id
the datatype of 'data' into map_file table is: json