Convert sample JSON to nested JSON array using JOLT Transformation - json

I am facing a problem, transforming flat JSON to the nested JSON using jolt transformation. And I am very new to jolt Transformation. Input and output detail is given below.
My input:
[
{
"policyNo": 1,
"lProdCode": 500,
"name": "Prasad",
"id": "10",
"Age": "56"
},
{
"policyNo": 1,
"lProdCode": 500,
"name": "Mahapatra",
"id": "101",
"Age": "56"
},
{
"policyNo": 2,
"lProdCode": 500,
"name": "Pra",
"id": "109",
"Age": "56"
},
{
"policyNo": 3,
"lProdCode": 400,
"name": "Pra",
"id": "108",
"Age": "56"
},
{
"policyNo": 1,
"lProdCode": 500,
"name": "Pra",
"id": "108",
"Age": "56"
}
]
expected output
[
{
"policyNo": 1,
"lProdCode": 500,
"beneficiaries": [
{
"name": "Prasad",
"id": "10900629001",
"Age": "56"
},
{
"name": "Mahapatra",
"id": "10900629001",
"Age": "56"
},
{
"name": "Pra",
"id": "108",
"Age": "56"
}
]
},
{
"policyNo": 2,
"lProdCode": 500,
"beneficiaries": [
{
"name": "Pra",
"id": "10900629001",
"Age": "56"
}
]
},
{
"policyNo": 3,
"lProdCode": 400,
"beneficiaries": [
{
"name": "Pra",
"id": "108",
"Age": "56"
}
]
}
]

Principally you need to group by policyNo attribute along with generating a new list(beneficiaries) for the attributes other than policyNo&lProdCode. That might be handled within a shift transformation. Then add three more steps to prune the roughnesses stems from the first transformation such as
[
{
"operation": "shift",
"spec": {
"*": {
"policyNo": "#(1,policyNo).&",
"lProdCode": "#(1,policyNo).&",
"*": "#(1,policyNo).beneficiaries[&1].&"
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"policyNo": "ONE",
"lProdCode": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]

Related

Applying cardinality for multiple columns in Jolt

I am trying to apply Jolt for below data
input:
[
{
"id": "500",
"code": "abc",
"date": "2020-10-10",
"category": 1,
"amount": 100,
"result": 0
},
{
"id": "500",
"code": "abc",
"date": "2020-10-10",
"category": 2,
"amount": 200,
"result": 1
}
]
jolt used:
[
{
"operation": "shift",
"spec": {
"*": {
"id": "#(1,id).id",
"code": "#(1,id).code",
"date": "#(1,id).group1.date",
"category": "#(1,id).group1.group2[&1].category"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"id": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
current output:
{
"id": "500",
"code": [
"abc",
"abc"
],
"group1": {
"date": [
"2020-10-10",
"2020-10-10"
],
"group2": [
{
"category": 1
},
{
"category": 2
}
]
}
}
expected:
{
"id": "500",
"code": "abc",
"group1": {
"date": "2020-10-10",
"group2": [
{
"category": 1
},
{
"category": 2
}
]
}
}
If i keep column of code & date in cardinality, it's fine. But in my use case, there are multiple such columns to be added. Are there any better ways to handle this scenario?
You should add each added node and use "*" wildcard to represent the rest of the attributes within the cardinality transformation such as
{
"operation": "cardinality",
"spec": {
"*": {
"*": "ONE",
"group1": {
"*": "ONE",
"group2": "MANY"
}
}
}
}
where "group2": "MANY" will make group2 to be excepted for extracting only the first element of the respective list.
the demo on the site http://jolt-demo.appspot.com/ :

Grouping JSON elements using Jolt transform

I need help in jolt transform spec. Below is my work till now.
Input:
[
{
"ID": "1234",
"Date": "2020-12-10",
"Time": "06:00:00",
"Rate": null,
"Interest": null,
"Term": 99
},
{
"ID": "1234",
"Date": "2020-12-11",
"Time": "07:00:00",
"Rate": 8,
"Interest": null,
"Term": 99
}
]
Jolt Code used:
[
{
"operation": "shift",
"spec": {
"*": {
"ID": "#(1,ID).id",
"Date": "#(1,ID).date",
"Time": "#(1,ID).group.time",
"Rate": "#(1,ID).group.rate",
"Interest": "#(1,ID).group.interest",
"Term": "#(1,ID).group.term"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"id": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
Current output:
[
{
"id": "1234",
"date": ["2020-12-10", "2020-12-11"],
"group": {
"time": ["06:00:00", "07:00:00"],
"rate": 8,
"interest": null,
"term": [99, 99]
}
}
]
Expected output
[
{
"id": "1234",
"date": "2020-12-10",
"group": {
"time": "06:00:00",
"rate": null,
"interest": null,
"term": 99
}
},
{
"id": "1234",
"date": "2020-12-11",
"group": {
"time": "07:00:00",
"rate": 8,
"interest": null,
"term": 99
}
}
]
When using only single json object, this code works fine. But when we use multiple items with same id, it starts grouping all related fields.
You can use square bracketed notation([&1]) as the common factor while qualifying rest of the elements other than id and Date as group such as
[
{
"operation": "shift",
"spec": {
"*": {
"ID": "[&1].&",
"Date": "[&1].&",
"*": "[&1].group.&"
}
}
}
]

apache nifi- how to create a custom date format

I am new to nifi and I am trying to create a week_start_date and week_number from the date in json format.
I am using jolt transform.
The input is google ads api response.
This is the spec I use:
[
{
"operation": "shift",
"spec": {
"customer_id": {
"*": "[&].customer_id"
},
"customer_name": {
"*": "[&].customer_name"
},
"account_currency_code": {
"*": "[&].account_currency_code"
},
"campaign_id": {
"*": "[&].campaign_id"
},
"campaign_name": {
"*": "[&].campaign_name"
},
"campaign_status": {
"*": "[&].campaign_status"
},
"ad_group_id": {
"*": "[&].ad_group_id"
},
"ad_group_name": {
"*": "[&].ad_group_name"
},
"clicks": {
"*": "[&].clicks"
},
"cost": {
"*": "[&].cost"
},
"impressions": {
"*": "[&].impressions"
},
"device": {
"*": "[&].device"
},
"date": {
"*": "[&].date"
},
"week_number": {
"*": "[&].week_number"
},
"year": {
"*": "[&].year"
},
"keywords": {
"*": "[&].keywords"
},
"keywords_id": {
"*": "[&].keywords_id"
}
}
},
{
"operation": "modify-default-beta",
"spec": {
"date": {
"date": "=intSubtract(#(1,date))"
}
}
}
]
The expected output should be:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "48",
"week_start_date": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
the output I have:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
I am not sure on how to use correctly the modify-default-beta
Also I tried looking at the docs:
https://github.com/bazaarvoice/jolt/tree/master/jolt-core/src/test/resources/json/shiftr
What is the correct way also to understand the structure?

Shift JOLT transformation - facing problem with below transformation

I'm trying to convert below input json to flatten necessary column names and its values while retaining all metadata.
Below is the input json that I've for my CDC use-case.
{
"type": "update",
"timestamp": 1558346256000,
"binlog_filename": "mysql-bin-changelog.000889",
"binlog_position": 635,
"database": "books",
"table_name": "publishers",
"table_id": 111,
"columns": [
{
"id": 1,
"name": "id",
"column_type": 4,
"last_value": 2,
"value": 2
},
{
"id": 2,
"name": "name",
"column_type": 12,
"last_value": "Suresh",
"value": "Suresh123"
},
{
"id": 3,
"name": "email",
"column_type": 12,
"last_value": "Suresh#yahoo.com",
"value": "Suresh#yahoo.com"
}
]
}
Below is the expected output json
[
{
"type": "update",
"timestamp": 1558346256000,
"binlog_filename": "mysql-bin-changelog.000889",
"binlog_position": 635,
"database": "books",
"table_name": "publishers",
"table_id": 111,
"columns": {
"id": "2",
"name": "Suresh123",
"email": "Suresh#yahoo.com"
}
}
]
I tried the below spec from which I'm able to retrieve columns object but not the rest of the metadata.
[
{
"operation": "shift",
"spec": {
"columns": {
"*": {
"#(value)": "[#1].#(1,name)"
}
}
}
}
]
Any leads would be very much appreciated.
I got the JOLT spec for above transformation. I'm posting it here incase if anyone stumbles upon the something like this.
[
{
"operation": "shift",
"spec": {
"columns": {
"*": {
"#(value)": "columns.#(1,name)"
}
},
"*": "&"
}
}
]

JOLT transform flatten nested array with key value pairs

I'm trying to transform the following JSON
{
"data": {
"keyvalues": [
{
"key": "location",
"value": "sydney, au"
},
{
"key": "weather",
"value": "sunny"
}
]
},
"food": {
"name": "AllFoods",
"date": "2018-03-08T09:35:17-03:00",
"count": 2,
"food": [
{
"name": "chocolate",
"date": "2018-03-08T12:59:58-03:00",
"rating": "10",
"data": null
},
{
"name": "hot dog",
"date": "2018-03-08T09:35:17-03:00",
"rating": "7",
"data": {
"keyvalues": [
{
"key": "topping",
"value": "mustard"
},
{
"key": "BUN type",
"value": "toasted"
},
{
"key": "servings",
"value": "2"
}
]
}
}
]
}
}
Into, something simpler like this, using JOLT (in NIFI). Bringing the first top-level food attributes (name, date, count) into the header and then pulling the nested food array up, and then flattening out the food.data.keyvalues into a dict/hashmap.
{
"header": {
"location": "sydney, au",
"weather": "sunny",
"date": "2018-03-08",
"count": 2
},
"foods": [
{
"name": "chocolate",
"date": "2018-03-08T12:59:58-03:00",
"rating": "10"
},
{
"name": "hot dog",
"date": "2018-03-08T09:35:17-03:00",
"rating": "7",
"topping": "mustard",
"bun_type": "toasted",
"servings": "2"
}
]
}
I've got the first data part working, but I'm not sure how to handle the nested food element. The top level food info needs to move into the header section, and the second level food array, needs to flatten out the data.keyvalues.
Current spec... (only handles the top data.keyvalues)
[
{
"operation": "shift",
"spec": {
"data": {
"keyvalues": {
"*": { "#value": "#key" }
}
}
}
}
]
Spec
[
{
"operation": "shift",
"spec": {
"data": {
"keyvalues": {
"*": {
"value": "header.#(1,key)"
}
}
},
"food": {
"date": "header.date",
"count": "header.count",
"food": {
"*": {
"name": "foods[&1].name",
"date": "foods[&1].date",
"rating": "foods[&1].rating",
"data": {
"keyvalues": {
"*": {
"value": "foods[&4].#(1,key)"
}
}
}
}
}
}
}
}
]