JOLT - Remove duplicates in array - json

I need to remove duplicates from docAddrs array from my document and keep the rest of the json unchanged. The last transformation is moving all the data into docAddrs array, instead of just the addr objects. This is what I tried:
Input:
{
"docId1": "1",
"docId2": "2",
"docInfo": {
"info1": "info1",
"info2": "info2",
"lines": [
{
"lineNum": "1",
"val": "1"
},
{
"lineNum": "2",
"val": "2"
}
]
},
"docAddrs": [
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
}
]
}
Spec:
[
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": "#addrId[]"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": "ONE"
}
},
{
"operation": "shift",
"spec": {
"*": {
"docId1": "docId1",
"docId2": "docId2",
"docInfo": "docInfo",
"#": "docAddrs.[]"
}
}
}
]
Output:
{
"docId1": "1",
"docId2": "2",
"docInfo": {
"info1": "info1",
"info2": "info2",
"lines": [
{
"lineNum": "1",
"val": "1"
},
{
"lineNum": "2",
"val": "2"
}
]
},
"111": [
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
}
],
"112": [
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
}
]
}
Expected Output:
{
"docId1": "1",
"docId2": "2",
"docInfo": {
"info1": "info1",
"info2": "info2",
"lines": [
{
"lineNum": "1",
"val": "1"
},
{
"lineNum": "2",
"val": "2"
}
]
},
"docAddrs": [
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
}
]
}
Can someone please suggest how I can get this to work. Thanks in advance

You can use the combination of the following specs
[
//exchange key-value pairs for "docAddrs" array
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": {
"*": {
"$": "&3[#(2,addrId)].#(0)"
}
}
}
}
},
// pick only the first components of the values from the newly formed array type values
// those already have identical components per each
{
"operation": "cardinality",
"spec": {
"docAddrs": {
"*": {
"*": "ONE"
}
}
}
},
// exchange key-value pairs again in order to collect each value array pairs
// under common keys respectively
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": {
"*": {
"$": "&3.#(0)"
}
}
}
}
},
// dissipate each value components to their related object
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": {
"*": "&2[&].&1"
}
}
}
}
]
The Demo on http://jolt-demo.appspot.com/

Related

JOLT: Combine JSON array using common key

My JSON looks something like this, its an array of workers and an array of worksites. The workers start with a worksite id so I need to merge them with the worksites (inside an array).
[
{
"worksite_candidates": {
"worksite_id": "12345",
"worker": {
"id": "1232",
"managerId": "09"
},
"openDate": "2022-10-14",
"finalDate": "2022-12-16"
}
},
{
"worksite_candidates": {
"worksite_id": "12345",
"worker": {
"id": "1233",
"managerId": "08"
},
"openDate": "",
"finalDate": ""
}
},
{
"worksite_candidates": {
"worksite_id": "12347",
"worker": {
"id": "1234",
"managerId": "09"
},
"openDate": "2022-11-10",
"finalDate": "2022-11-12"
}
},
{
"worksite": {
"id": "12345",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8"
}
},
{
"worksite": {
"id": "12347",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8"
}
}
]
and I need to transform it using a JOLT to something like this, workers go inside the worksite as an array
[
{
"worksite": {
"id": "12345",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8",
"Candidates": [
{
"worker": {
"id": "1232",
"managerId": "09"
},
"openDate": "2022-10-14",
"finalDate": "2022-12-16"
},
{
"worker": {
"id": "1233",
"managerId": "08"
},
"openDate": "",
"finalDate": ""
}
]
}
},
{
"worksite": {
"id": "12347",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8",
"Candidates": [
{
"worker": {
"id": "1234",
"managerId": "09"
},
"openDate": "2022-11-10",
"finalDate": "2022-11-12"
}
]
}
}
]
Please suggest a JOLT to get the desired output. Can't find a way to do it
You can walk within worksite_candidates and worksite objects
while grouping the elements by worksite id values
(values of the worksite_candidates.worksite_id and worksite.id attributes) such as
[
{
"operation": "shift",
"spec": {
"*": {
"worksite_c*": {
"*": "#(1,worksite_id).worksite.Candidates[&2].&" // [&2] is for going two levels up to reach the top level indexes and grabbing them to generate array results
},
"worksite": {
"*": "#(1,id).&1.&"
}
}
}
},
{
// get rid of redundant "worksite_id" attribute
"operation": "remove",
"spec": {
"*": {
"*": {
"*": {
"*": {
"worksite_id": ""
}
}
}
}
}
},
{
// get rid of null components
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
},
{
// get rid of topmost labels of objects
"operation": "shift",
"spec": {
"*": ""
}
}
]
the demo on the site http://jolt-demo.appspot.com/ is
I think this solution is correct
[
{
//group element for id and worksite_id
"operation": "shift",
"spec": {
"*": {
"worksite": {
"id": "data.#(1,id).&",
"status": "data.#(1,id).&",
"dayid": "data.#(1,id).&",
"vacancys": "data.#(1,id).&",
"countryid": "data.#(1,id).&",
"arenaid": "data.#(1,id).&"
},
"worksite_candidates": {
"worksite_id": {
"*": {
"#2": "data.&.Candidates"
}
}
}
}
}
},
//create output with array
{
"operation": "shift",
"spec": {
"data": {
"*": "[]"
}
}
},
//remove field worksite_id
{
"operation": "remove",
"spec": {
"*": {
"Candidates": {
"*": {
"worksite_id": ""
}
}
}
}
}
]

JOLT transformation with two level nested array objects

I want to use Jolt Shift to transform below input to a desire output. The input have two level array nested. Please help me out if you have a solution for this. Thanks in advance.
Input:
{
"P": {
"type": "manager",
"metadata": {
"id": "mgr1"
},
"properties": {
"firstName": "managerFirstName",
"lastName": "managerLastName",
"employmentId": "employmentId"
},
"S": [
{
"type": "employee",
"metadata": {
"id": "empl1"
},
"properties": {
"last": "employeeOneLastName",
"first": "employeeOneFirstName"
},
"S": [
{
"type": "address",
"metadata": {
"identifier": "addr1"
},
"properties": {
"city": "addressOneCity",
"street": "addressOneStreet"
}
},
{
"type": "address",
"metadata": {
"id": "addr2"
},
"properties": {
"city": "addressTwoCity",
"street": "addressTwoStreet"
}
}
]
},
{
"type": "employee",
"metadata": {
"id": "empl2"
},
"properties": {
"last": "employeeTwoLastName",
"first": "employeeTwoFirstName"
},
"S": [
{
"type": "address",
"metadata": {
"id": "addr3"
},
"properties": {
"city": "addressThreeCity",
"street": "addressThreeStreet"
}
},
{
"type": "address",
"metadata": {
"id": "addr4"
},
"properties": {
"city": "addressFourCity",
"street": "addressFourStreet"
}
},
{
"type": "address",
"metadata": {
"id": "addr5"
},
"properties": {
"city": "addressFiveCity",
"street": "addressFiveStreet"
}
}
]
},
{
"type": "officeinfo",
"metadata": {
"id": "office1"
},
"properties": {
"country": "myCountry",
"city": "myCity",
"employmentId": "employmentId"
}
}
]
}
}
Desired Output:
{
"manager": {
"type": "manager",
"firstName": "managerFirstName",
"lastName": "managerLastName",
"employmentId": "employmentId",
"employee": [
{
"type": "employee",
"lastName": "employeeOneLastName",
"firstName": "employeeOneFirstName",
"address": [
{
"type": "address",
"city": "addressOneCity",
"street": "addressOneStreet"
},
{
"type": "address",
"city": "addressTwoCity",
"street": "addressTwoStreet"
}
]
},
{
"address": [
{
"city": "addressTwoCity",
"street": "addressThreeStreet",
"type": "address"
},
{
"type": "address",
"city": "addressFourCity",
"street": "addressFourStreet"
},
{
"city": "addressFiveCity",
"street": "addressFiveStreet",
"type": "address"
}
],
"type": "employee",
"lastName": "employeeTwoLastName",
"firstName": "employeeTwoFirstName"
}
],
"office": {
"type": "officeinfo",
"country": "myCountry",
"city": "myCity",
"employmentId": "employmentId"
}
}
}
Here is my spec which is not working as desired. The addresses associated with the employee is not correct. Notice the employeeOne should have both address One and Two, and the employeeTwo should have the address of Three, Four, and Five.
[
{
"operation": "shift",
"spec": {
"P": {
"type": "manager.&",
"properties": {
"firstName": "manager.firstName",
"lastName": "manager.lastName",
"employmentId": "manager.employmentId"
},
"S": {
"*": {
"type": {
"officeinfo": {
"$": "manager.office.type"
},
"employee": {
"$": "manager.employee[&3].type"
}
},
"*": {
"last": "manager.employee[&2].lastName",
"first": "manager.employee[&2].firstName",
"country": "manager.office.country",
"city": "manager.office.city",
"employmentId": "manager.office.employmentId"
},
"S": {
"*": {
"type": "manager.employee[&3].address[&1].type",
"*": {
"city": "manager.employee[&2].address[&4].city",
"street": "manager.employee[&2].address[&4].street"
}
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
}
]
The trick is using #(<int>,type) at appropriate locations in order to separate the sub-objects and sub-arrays properly such as
[
{
"operation": "shift",
"spec": {
"*": {
"type": "#(1,type).&",
"prop*": {
"*": "#(2,type).&"
},
"S": {
"*": {
"type": {
"employee": {
"#1": "#(5,type).&1[&3].&2" // count how many ":" and/or "{" chars should be traversed while going up the tree in order to reach the level of "type" to determine the <int>(it's 5 in this case)
},
"*info": {
"#1": "#(5,type).&(1,1).&2" // "&(1,1)" represents the piece where "*" stays, so it's "office" in this case
}
},
"prop*": {
"*st": "#(4,type).#(2,type)[&2].&Name"
},
"S": {
"*": {
"prop*": {
"#(1,type)": "#(6,type).#(4,type)[&4].#(2,type)[&2].type",
"*": "#(6,type).#(4,type)[&4].#(2,type)[&2].&"
}
}
}
}
}
}
}
},
{
// stands for nothing but sorting
"operation": "shift",
"spec": {
"*": {
"type": "&1.&",
"firstName": "&1.&",
"lastName": "&1.&",
"employmentId": "&1.&",
"employee": "&1.&",
"offi*": "&1.&"
}
}
}
]
Note that, no need to repeat the key names at right-hand-side of the key-value pairs in order to replicate them, but just use ampersands. Btw, the second shift transformation is just added to provide getting the desired order, eg. might be removed without affecting the currently handled transformation.

Convert Flat json to Nested Json with multiple arrays using Jolt transform

I'm trying to write a spec to do the below transformation using jolt transformation. I need to convert the flat json to nested Json
I am having some trouble with converting the flat JSON to Nested JSON. I have looked at examples and didn't get any closer as to what is mentioned above. I need to transform a JSON structure by using a JOLT spec. I use https://jolt-demo.appspot.com to test the following below.
Input :
[
{
"container_id": "a",
"carrier_scac": "b",
"location": "banglore",
"state": "karnataka",
"country": "India"
},
{
"container_id": "a",
"carrier_scac": "b",
"location": "pune",
"state": "maharashtra",
"country": "India"
},
{
"container_id": "c",
"carrier_scac": "d",
"location": "dharwad",
"state": "kan",
"country": "India"
},
{
"container_id": "c",
"carrier_scac": "d",
"location": "hubli",
"state": "kant",
"country": "India"
}
]
Desired Output:
[
{
"load": {
"container_id": "a",
"carrier_scac": "b",
"stops": [
{
"location": "banglore",
"state": "karnataka"
},
{
"location": "pune",
"state": "maharashtra"
}
]
},
"containerInfo": {
"country": "India"
}
},
{
"load": {
"container_id": "c",
"carrier_scac": "d",
"stops": [
{
"location": "dharwad",
"state": "kan"
},
{
"location": "hubli",
"state": "kant"
}
]
},
"containerInfo": {
"country": "India"
}
}
]
Jolt Spec that I'm using :
[
{
"operation": "shift",
"spec": {
"*": {
"container_id": "#(1,container_id).&",
"carrier_scac": "#(1,container_id).&",
"*": "#(1,container_id).stops[&1].&"
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"container_id": "ONE",
"carrier_scac": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
Current spec is pretty good, just needs some little modifications such as adding load and containerInfo nodes, and shortening a bit as below
[
{
"operation": "shift",
"spec": {
"*": {
"*": "#(1,container_id).load.stops[&1].&", // "else" case
"country": "#(1,container_id).load.containerInfo.&",
"c*": "#(1,container_id).load.&" // the attributes starting with "c" but other than "country"
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"*": {
"c*": "ONE",
"containerInfo": {
"*": "ONE"
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]

apache nifi- how to create a custom date format

I am new to nifi and I am trying to create a week_start_date and week_number from the date in json format.
I am using jolt transform.
The input is google ads api response.
This is the spec I use:
[
{
"operation": "shift",
"spec": {
"customer_id": {
"*": "[&].customer_id"
},
"customer_name": {
"*": "[&].customer_name"
},
"account_currency_code": {
"*": "[&].account_currency_code"
},
"campaign_id": {
"*": "[&].campaign_id"
},
"campaign_name": {
"*": "[&].campaign_name"
},
"campaign_status": {
"*": "[&].campaign_status"
},
"ad_group_id": {
"*": "[&].ad_group_id"
},
"ad_group_name": {
"*": "[&].ad_group_name"
},
"clicks": {
"*": "[&].clicks"
},
"cost": {
"*": "[&].cost"
},
"impressions": {
"*": "[&].impressions"
},
"device": {
"*": "[&].device"
},
"date": {
"*": "[&].date"
},
"week_number": {
"*": "[&].week_number"
},
"year": {
"*": "[&].year"
},
"keywords": {
"*": "[&].keywords"
},
"keywords_id": {
"*": "[&].keywords_id"
}
}
},
{
"operation": "modify-default-beta",
"spec": {
"date": {
"date": "=intSubtract(#(1,date))"
}
}
}
]
The expected output should be:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "48",
"week_start_date": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
the output I have:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
I am not sure on how to use correctly the modify-default-beta
Also I tried looking at the docs:
https://github.com/bazaarvoice/jolt/tree/master/jolt-core/src/test/resources/json/shiftr
What is the correct way also to understand the structure?

Jolt Conversion - iterate list within a list and form single list

I am trying to iterate lists inside a list and form a single list with multiple objects. Iterating lists, I am able to achieve. But applying tags before the list iterating to each object is not happening if there are more objects in single list.
My input request is like below:
[
{
"success": [
{
"id": "4",
"Offers": [
{
"name": "Optional",
"type": {
"id": "1",
"name": "Optional"
},
"productOfferings": [
{
"id": "3",
"name": "Test1"
}
]
},
{
"name": "Default",
"type": {
"id": "2",
"name": "Default"
},
"productOfferings": [
{
"id": "1",
"name": "Test2"
},
{
"id": "2",
"name": "Test3"
}
]
}
]
}
]
}
]
My spec is like below:
[
{
"operation": "shift",
"spec": {
"*": {
"success": {
"*": {
"Offers": {
"*": {
"name": "[&1].[&3].typeName",
"type": {
"id": "[&2].[&4].typeNameId",
"name": "[&2].[&4].typeNameValue"
},
"productOfferings": {
"*": {
"id": "[&3].[&1].id",
"name": "[&3].[&1].name"
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "[]"
}
}
}
]
Output Received from spec:
[
{
"typeName": "Optional",
"typeNameId": "1",
"typeNameValue": "Optional",
"id": "3",
"name": "Test1"
},
{
"typeName": "Default",
"typeNameId": "2",
"typeNameValue": "Default",
"id": "1",
"name": "Test2"
},
{
"id": "2",
"name": "Test3"
}
]
But Expected output is like below:
[
{
"typeName": "Optional",
"typeNameId": "1",
"typeNameValue": "Optional",
"id": "3",
"name": "Test1"
},
{
"typeName": "Default",
"typeNameId": "2",
"typeNameValue": "Default",
"id": "1",
"name": "Test2"
},
{
"typeName": "Default",
"typeNameId": "2",
"typeNameValue": "Default",
"id": "2",
"name": "Test3"
}
]
If there are more objects inside productOfferings object, I am not able to add typeName,typeNameId, typeNameValue to the actual object. Please help to fix this issue.
You seem just needing to collect all into the productOfferings array while prepending each key by the common identifier [&3].[&1] such as
[
{
"operation": "shift",
"spec": {
"*": {
"success": {
"*": {
"Offers": {
"*": {
"productOfferings": {
"*": {
"#(2,name)": "[&3].[&1].typeName",
"#(2,type.id)": "[&3].[&1].typeNameId",
"#(2,type.name)": "[&3].[&1].typeNameValue",
"*": "[&3].[&1].&"
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "[]"
}
}
}
]