Jolt Transform to "tabular format" - json

I'm using Nifi to pull in some Meteomatics API data and store it in our SQL Datawarehouse.
Nifi architecture is mostly set-up, but I would like to convert the JSON response of the API call to row/column format. I get the feeling this should be possible with a single shift operation, but I'm struggling to get multiple levels of data into my output.
See sample input (single set of coordinates, and same three parameters can be assumed):
{
"version": "3.0",
"user": "USERNAME",
"dateGenerated": "2022-09-01T07:52:43Z",
"status": "OK",
"data": [
{
"parameter": "precip_24h:mm",
"coordinates": [
{
"lat": 123,
"lon": 456,
"dates": [
{
"date": "2022-09-01T00:00:00Z",
"value": 11.6
},
{
"date": "2022-09-02T00:00:00Z",
"value": 4.49
},
{
"date": "2022-09-03T00:00:00Z",
"value": 7.79
},
{
"date": "2022-09-04T00:00:00Z",
"value": 6.6
},
{
"date": "2022-09-05T00:00:00Z",
"value": 12.7
},
{
"date": "2022-09-06T00:00:00Z",
"value": 2.01
}
]
}
]
},
{
"parameter": "heavy_rain_warning_24h:idx",
"coordinates": [
{
"lat": 123,
"lon": 456,
"dates": [
{
"date": "2022-09-01T00:00:00Z",
"value": 0
},
{
"date": "2022-09-02T00:00:00Z",
"value": 0
},
{
"date": "2022-09-03T00:00:00Z",
"value": 0
},
{
"date": "2022-09-04T00:00:00Z",
"value": 0
},
{
"date": "2022-09-05T00:00:00Z",
"value": 0
},
{
"date": "2022-09-06T00:00:00Z",
"value": 0
}
]
}
]
},
{
"parameter": "t_0m:C",
"coordinates": [
{
"lat": 123,
"lon": 456,
"dates": [
{
"date": "2022-09-01T00:00:00Z",
"value": 27.2
},
{
"date": "2022-09-02T00:00:00Z",
"value": 27.3
},
{
"date": "2022-09-03T00:00:00Z",
"value": 27.3
},
{
"date": "2022-09-04T00:00:00Z",
"value": 27.3
},
{
"date": "2022-09-05T00:00:00Z",
"value": 27.3
},
{
"date": "2022-09-06T00:00:00Z",
"value": 28.2
}
]
}
]
}
]
}
Desired Output:
[
{
"date": "2022-09-01T00:00:00Z",
"value": 11.6,
"parameter": "precip_24h:mm",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-02T00:00:00Z",
"value": 4.49,
"parameter": "precip_24h:mm",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-03T00:00:00Z",
"value": 7.79,
"parameter": "precip_24h:mm",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-04T00:00:00Z",
"value": 6.6,
"parameter": "precip_24h:mm",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-05T00:00:00Z",
"value": 12.7,
"parameter": "precip_24h:mm",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-06T00:00:00Z",
"value": 2.01,
"parameter": "precip_24h:mm",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-01T00:00:00Z",
"value": 0,
"parameter": "heavy_rain_warning_24h:idx",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-02T00:00:00Z",
"value": 0,
"parameter": "heavy_rain_warning_24h:idx",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
},
{
"date": "2022-09-03T00:00:00Z",
"value": 0,
"parameter": "heavy_rain_warning_24h:idx",
"lat": "123",
"lon": "456",
"dateGenerated": "2022-09-01T07:52:43Z"
etc...
This way I can put in a simple avro converter and push directly to a SQL table with columns "date,value,parameter,lat,lon,dateGenerated".
Currently, I've gotten to this response:
[
{
"date": "2022-09-01T00:00:00Z",
"value": 11.6
},
{
"date": "2022-09-02T00:00:00Z",
"value": 4.49
},
{
"date": "2022-09-03T00:00:00Z",
"value": 7.79
}
etc...
Using the following spec (loop through at the lowest level, return the data and put in a big array):
[
{
"operation": "shift",
"spec": {
"data": {
"*": {
"coordinates": {
"*": {
"dates": {
"*": {
"#": "[]"
}
}
}
}
}
}
}
}
]
Including dateGenerated/other fields from further up the tree is giving me difficulty. I wanted to include something like this (traverse 5 levels up, get the DataGenerated Field and map it to DateGen?). This doesn't work, and I'm a little lost:
[
{
"operation": "shift",
"spec": {
"data": {
"*": {
"coordinates": {
"*": {
"dates": {
"*": {
"#": "[]",
"#5.DateGenerated":"[].DateGen"
}
}
}
}
}
}
}
}
]

You can use two succesive shift transformations such as
[
{
// distinguish each objects by some common values
"operation": "shift",
"spec": {
"data": {
"*": {
"coo*": {
"*": {
"da*": {
"*": {
"#": "#(5,parameter)[&1]", // separate the object by their presumed id values(parameter) after going five levels up the tree to reach its level
"#(2,lat)": "#(5,parameter)[&1].lat", // go two levels up the tree to reach the level of "lat" and "lon"
"#(2,lon)": "#(5,parameter)[&1].lon",
"#(4,parameter)": "#(5,parameter)[&1].parameter",
"#(6,dateGenerated)": "#(5,parameter)[&1].dateGenerated"
}
}
}
}
}
}
}
},
{
// get rid of labels and square brackets
"operation": "shift",
"spec": {
"*": {
"*": ""
}
}
}
]

Related

How to Add an Object to a Json Array Depending a value of another Key in the File Using JoltTransformationJson in NiFi

This is my first time to use JoltTransformationJson, so I have limited knowledge and experience on that. Please help me with this complicated project.
Request:
when the payment.code <> "paid", I have to do the following two things for the file.
to change the payment.code ="denied" and payment.text ="denied"
to add a JSON object to item.ADJ
When the payment.code =="paid", don't need to change anything.
Input :
{
"resourceType": "E",
"id": "11",
"identifier": [
{
"type": {
"coding": [
{
"system": "sys1",
"code": "aaa"
}
]
},
"value": "212"
},
{
"type": {
"coding": [
{
"system": "sys2",
"code": "RRR"
}
]
},
"value": "367"
}
],
"status": "active",
"created": "2021-08-05T02:43:48+00:00",
"outcome": "complete",
"item": [
{
"sequence": 1,
"product": {
"coding": [
{
"system": "example",
"code": "abc",
"display": "ABC"
}
],
"text": "ABC"
},
"servicedDate": "2021-08-04",
"quantity": {
"value": 60
},
"ADJ": [
{
"category": {
"coding": [
{
"system": "code1",
"code": "code1",
"display": "CODE1"
}
],
"text": "CODE1"
},
"amount": {
"value": 46.45,
"currency": "USD"
}
},
{
"category": {
"coding": [
{
"system": "code2",
"code": "code2",
"display": "CODE2"
}
],
"text": "CODE2"
},
"amount": {
"value": 12.04,
"currency": "USD"
}
}
]
}
],
"payment": {
"type": {
"coding": [
{
"system": "http://payment.com",
"code": "reversed/cancelled"
}
],
"text": "cancelled"
}
}
}
My Expected Output :
{
"resourceType": "E",
"id": "11",
"identifier": [
{
"type": {
"coding": [
{
"system": "sys1",
"code": "aaa"
}
]
},
"value": "212"
},
{
"type": {
"coding": [
{
"system": "sys2",
"code": "RRR"
}
]
},
"value": "367"
}
],
"status": "active",
"created": "2021-08-05T02:43:48+00:00",
"outcome": "complete",
"item": [
{
"sequence": 1,
"product": {
"coding": [
{
"system": "example",
"code": "abc",
"display": "ABC"
}
],
"text": "ABC"
},
"servicedDate": "2021-08-04",
"quantity": {
"value": 60
},
"ADJ": [
{
"category": {
"coding": [
{
"system": "code1",
"code": "code1",
"display": "CODE1"
}
],
"text": "CODE1"
},
"amount": {
"value": 46.45,
"currency": "USD"
}
},
{
"category": {
"coding": [
{
"system": "code2",
"code": "code2",
"display": "CODE2"
}
],
"text": "CODE2"
},
"amount": {
"value": 12.04,
"currency": "USD"
}
},
{// new object I want to insert into
"category": {
"coding": [
{
"system": "sys_denail",
"code": "denialreason"
}
],
"reason": {
"coding": [
{
"system": "https://example.com",
"code": "A1"
}
],
"text": "unknown"
}}
}
]
}
],
"payment": {
"type": {
"coding": [
{
"system": "http://payment.com",
"code": "denied" //change the value to denied
}
],
"text": "denied" //change the value to denied
}
}
}
Edit : I've tried to answer the second case by myself to be evaluated after the first case is answered
Welcome to SO, please ask minimal and reproducible questions, and show your effort tried for the future.
What you need is to use a conditional logic along with placeholder values with ampersand symbols depending on the levels of each key name within the tree.
I have partially answered, which will handle the bottom part of your question. Indeed the logic for the rest(inserting an object to the array will be similiar)
So, consider having a look at the following solution
[
{
"operation": "shift",
"spec": {
"*": "&",
"payment": {
"type": {
"coding": {
"*": {
"*": "&4.&3.&2[&1].&",
"code": {
"paid": {
"#1": "&6.&5.&4[&3].&2",
"#(4,text)": "&6.text"
},
"*": {
"#denied": "&6.&5.&4[&3].code",
"#(4,text)": {
"#denied": "&6.text"
}
}
}
}
}
}
}
}
}
]
Edit(for your own answer related to adding an object):
your current idea of using shift after default transformation spec is pretty good, you can rephrase like
[
{
"operation": "default",
"spec": {
"temp_deny": {
"denialreason": {
"category": {
"coding": [
{
"system": "sys_denail",
"code": "denialreason"
}
],
"reason": {
"coding": [
{
"system": "https://example.com",
"code": "A1"
}
],
"text": "unknown"
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"item": {
"*": {
"*": "&2[&1].&",
"ADJ": {
"#": "&3[&2].&",
"#(4,temp_deny)": "&3[&2].&"
}
}
}
}
}
]

apache nifi- how to create a custom date format

I am new to nifi and I am trying to create a week_start_date and week_number from the date in json format.
I am using jolt transform.
The input is google ads api response.
This is the spec I use:
[
{
"operation": "shift",
"spec": {
"customer_id": {
"*": "[&].customer_id"
},
"customer_name": {
"*": "[&].customer_name"
},
"account_currency_code": {
"*": "[&].account_currency_code"
},
"campaign_id": {
"*": "[&].campaign_id"
},
"campaign_name": {
"*": "[&].campaign_name"
},
"campaign_status": {
"*": "[&].campaign_status"
},
"ad_group_id": {
"*": "[&].ad_group_id"
},
"ad_group_name": {
"*": "[&].ad_group_name"
},
"clicks": {
"*": "[&].clicks"
},
"cost": {
"*": "[&].cost"
},
"impressions": {
"*": "[&].impressions"
},
"device": {
"*": "[&].device"
},
"date": {
"*": "[&].date"
},
"week_number": {
"*": "[&].week_number"
},
"year": {
"*": "[&].year"
},
"keywords": {
"*": "[&].keywords"
},
"keywords_id": {
"*": "[&].keywords_id"
}
}
},
{
"operation": "modify-default-beta",
"spec": {
"date": {
"date": "=intSubtract(#(1,date))"
}
}
}
]
The expected output should be:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "48",
"week_start_date": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
the output I have:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
I am not sure on how to use correctly the modify-default-beta
Also I tried looking at the docs:
https://github.com/bazaarvoice/jolt/tree/master/jolt-core/src/test/resources/json/shiftr
What is the correct way also to understand the structure?

Selectively get certain arrays from inside an array in JSON using JOLT

I've been trying to get some fields out of a very long and complicated json format but not getting the output I want.
MY current spec obtains all events from the event array and lists them in the output. I'm unsure how to select specific events and only output those. Am not quite sure of the syntax
My JSON:
{
"rootid": "19718",
"clloadm": "2021-06-01T22:40:02",
"clload": "2021-06-01T21:21:39",
"date": "2021-05-25T21:52:30",
"events": [
{
"done": {
"id": "e0",
"value": "2021-05-29T08:08:19"
},
"id": "e0_event",
"started": {
"id": "e0",
"value": "2021-05-29T08:08:19"
},
"status": "complete"
},
{
"done": {
"id": "e1",
"value": "2021-05-27T02:20:25"
},
"id": "e1_event",
"started": {
"id": "e1",
"value": "2021-05-27T02:20:25"
},
"status": "complete"
},
{
"done": {
"id": "e2",
"value": "2021-05-29T08:08:19"
},
"id": "e2_event",
"started": {
"id": "e2",
"value": "2021-05-29T08:08:19"
},
"status": "complete"
},
{
"done": {
"id": "e3",
"value": "2021-05-29T08:08:19"
},
"id": "e3_event",
"started": {
"id": "e3",
"value": "2021-05-29T08:08:19"
},
"status": "complete"
},
{
"done": {
"id": "e4",
"value": "2021-05-29T08:08:19"
},
"id": "e4_event",
"started": {
"id": "e4",
"value": "2021-05-29T08:08:19"
},
"status": "complete"
}
],
"ids": [
{
"id": "id",
"source": "source",
"value": "value"
},
{
"id": "new_id",
"source": "new_source",
"value": "value"
}
]
}
My Jolt Spec that gets all events for now:
[
{
"operation": "shift",
"spec": {
"rootid": "rootid",
"clloadm": "clloadm",
"clload": "clload",
"date": "date",
"events": {
"*": {
"*": {
"#value": "#id"
}
}
},
"ids": {
"*": {
"#value": "#id"
}
}
}
}
]
The output I get:
{
"rootid" : "19718",
"clloadm" : "2021-06-01T22:40:02",
"clload" : "2021-06-01T21:21:39",
"date" : "2021-05-25T21:52:30",
"e0" : [ "2021-05-29T08:08:19", "2021-05-29T08:08:19" ],
"e1" : [ "2021-05-27T02:20:25", "2021-05-27T02:20:25" ],
"e2" : [ "2021-05-29T08:08:19", "2021-05-29T08:08:19" ],
"e3" : [ "2021-05-29T08:08:19", "2021-05-29T08:08:19" ],
"e4" : [ "2021-05-29T08:08:19", "2021-05-29T08:08:19" ],
"id" : "value",
"new_id" : "value"
}
The output I would like
{
"rootid" : "19718",
"clloadm" : "2021-06-01T22:40:02",
"clload" : "2021-06-01T21:21:39",
"date" : "2021-05-25T21:52:30",
"e0" : [ "2021-05-29T08:08:19", "2021-05-29T08:08:19" ],
"e4" : [ "2021-05-29T08:08:19", "2021-05-29T08:08:19" ],
"id" : "value",
"new_id" : "value"
}
You can write the individual keys e0 and e4 as conditional cases for #id key while rewriting the rest of the key-value pairs through "*":"&" representation such as
[
{
"operation": "shift",
"spec": {
"*": "&",
"events": {
"*": {
"*": {
"#id": {
"e0": { "#(2,value)": "&" },
"e4": { "#(2,value)": "&" }
}
}
}
},
"ids": {
"*": {
"#value": "#id"
}
}
}
}
]

Convert sample JSON to nested JSON array using JOLT Transformation

I am facing a problem, transforming flat JSON to the nested JSON using jolt transformation. And I am very new to jolt Transformation. Input and output detail is given below.
My input:
[
{
"policyNo": 1,
"lProdCode": 500,
"name": "Prasad",
"id": "10",
"Age": "56"
},
{
"policyNo": 1,
"lProdCode": 500,
"name": "Mahapatra",
"id": "101",
"Age": "56"
},
{
"policyNo": 2,
"lProdCode": 500,
"name": "Pra",
"id": "109",
"Age": "56"
},
{
"policyNo": 3,
"lProdCode": 400,
"name": "Pra",
"id": "108",
"Age": "56"
},
{
"policyNo": 1,
"lProdCode": 500,
"name": "Pra",
"id": "108",
"Age": "56"
}
]
expected output
[
{
"policyNo": 1,
"lProdCode": 500,
"beneficiaries": [
{
"name": "Prasad",
"id": "10900629001",
"Age": "56"
},
{
"name": "Mahapatra",
"id": "10900629001",
"Age": "56"
},
{
"name": "Pra",
"id": "108",
"Age": "56"
}
]
},
{
"policyNo": 2,
"lProdCode": 500,
"beneficiaries": [
{
"name": "Pra",
"id": "10900629001",
"Age": "56"
}
]
},
{
"policyNo": 3,
"lProdCode": 400,
"beneficiaries": [
{
"name": "Pra",
"id": "108",
"Age": "56"
}
]
}
]
Principally you need to group by policyNo attribute along with generating a new list(beneficiaries) for the attributes other than policyNo&lProdCode. That might be handled within a shift transformation. Then add three more steps to prune the roughnesses stems from the first transformation such as
[
{
"operation": "shift",
"spec": {
"*": {
"policyNo": "#(1,policyNo).&",
"lProdCode": "#(1,policyNo).&",
"*": "#(1,policyNo).beneficiaries[&1].&"
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"policyNo": "ONE",
"lProdCode": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]

JOLT transform flatten nested array with key value pairs

I'm trying to transform the following JSON
{
"data": {
"keyvalues": [
{
"key": "location",
"value": "sydney, au"
},
{
"key": "weather",
"value": "sunny"
}
]
},
"food": {
"name": "AllFoods",
"date": "2018-03-08T09:35:17-03:00",
"count": 2,
"food": [
{
"name": "chocolate",
"date": "2018-03-08T12:59:58-03:00",
"rating": "10",
"data": null
},
{
"name": "hot dog",
"date": "2018-03-08T09:35:17-03:00",
"rating": "7",
"data": {
"keyvalues": [
{
"key": "topping",
"value": "mustard"
},
{
"key": "BUN type",
"value": "toasted"
},
{
"key": "servings",
"value": "2"
}
]
}
}
]
}
}
Into, something simpler like this, using JOLT (in NIFI). Bringing the first top-level food attributes (name, date, count) into the header and then pulling the nested food array up, and then flattening out the food.data.keyvalues into a dict/hashmap.
{
"header": {
"location": "sydney, au",
"weather": "sunny",
"date": "2018-03-08",
"count": 2
},
"foods": [
{
"name": "chocolate",
"date": "2018-03-08T12:59:58-03:00",
"rating": "10"
},
{
"name": "hot dog",
"date": "2018-03-08T09:35:17-03:00",
"rating": "7",
"topping": "mustard",
"bun_type": "toasted",
"servings": "2"
}
]
}
I've got the first data part working, but I'm not sure how to handle the nested food element. The top level food info needs to move into the header section, and the second level food array, needs to flatten out the data.keyvalues.
Current spec... (only handles the top data.keyvalues)
[
{
"operation": "shift",
"spec": {
"data": {
"keyvalues": {
"*": { "#value": "#key" }
}
}
}
}
]
Spec
[
{
"operation": "shift",
"spec": {
"data": {
"keyvalues": {
"*": {
"value": "header.#(1,key)"
}
}
},
"food": {
"date": "header.date",
"count": "header.count",
"food": {
"*": {
"name": "foods[&1].name",
"date": "foods[&1].date",
"rating": "foods[&1].rating",
"data": {
"keyvalues": {
"*": {
"value": "foods[&4].#(1,key)"
}
}
}
}
}
}
}
}
]