Split string inside json into mulitple parameters - json

I have a JSON like this:
[
{
"id": "28573041|utm_source=vodafone&utm_medium=banner&utm_campaign=smartphones",
"date": "2022-05-03"
},
{
"id": "28573041|utm_campaign=Vodafone_uppers_2022",
"date": "2022-05-03"
}
]
I want to split these ids like:
before : - this is actual id;
before = - this is a property name for parameters;
after = - this is a property value;
I want to parse it and get this result:
{
{
"id" : "28573041",
"date" : "2022-05-03",
"utm_source" : "vodafone",
"utm_medium" : "banner",
"utm_campaign" : "smartphones"
},
{
"id" : "28573041",
"date" : "2022-05-03",
"utm_campaign" : "Vodafone_uppers_2022"
}
}
Parameters can be different after | and order is not guaranteed, but only possible 5 variants: -
utm_source
utm_medium
utm_campaign
utm_term
utm_content
Any ways to do it with JOLT or other NiFi tools?

Other solution in this case.
[
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"aux": "=split(\\|,#(1,id))",
"id": "=firstElement(#(1,aux))",
"newInfo": "=lastElement(#(1,aux))",
"auxFinal": "=split(&,#(1,newInfo))"
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"id": "[#2].&",
"date": "[#2].&",
"auxFinal": {
"*": {
"#": "[#4].fields[].field"
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"fields": {
"*": {
"aux": "=split(=,#(1,field))",
"key": "=firstElement(#(1,aux))",
"value": "=lastElement(#(1,aux))"
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"id": "[#2].&",
"date": "[#2].&",
"fields": {
"*": {
"value": "[#4].#(1,key)"
}
}
}
}
}
]

Splitting by pipes and equality signs might be used along with distinguishin by #(..,id) qualifiers presuming that the provided id values are unique per each object such as 28573041 and 28573042 respectively
[
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"idd": "=split('\\|', #(1,id))",
"id": "#(1,idd[0])",
"idd1": "=split('&', #(1,idd[1]))"
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "#(1,id).&",
"idd1": {
"*": "#(2,id).ide&"
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"ide*": "=split('=', #(1,&))"
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "&1.&",
"ide*": {
"0": "&2.key[]",
"1": "&2.val[]"
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"id": "&1.&",
"date": "&1.&",
"val": {
"*": {
"#": "&3.#(3,key[&])"
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
the demo on the site http://jolt-demo.appspot.com/ is

Hi You can use following operations and split it with |,&,= and then shift the relative values and remove the extra attributes :
[
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"id_Or": "=split('\\|',#(1,id))",
"id": "#(1,id_Or[0])",
"id_without_Or": "#(1,id_Or[1])",
"id_And": "=split('&',#(1,id_without_Or))",
"attribute1": "#(1,id_And[0])",
"attribute11": "=split('=',#(1,attribute1))",
"attributev1": "#(1,attribute11[1])",
"attributev11": "#(1,attribute11[0])",
"attribute2": "#(1,id_And[1])",
"attribute12": "=split('=',#(1,attribute2))",
"attributev12": "#(1,attribute12[0])",
"attributev2": "#(1,attribute12[1])",
"attribute3": "#(1,id_And[2])",
"attribute13": "=split('=',#(1,attribute3))",
"attributev13": "#(1,attribute13[0])",
"attributev3": "#(1,attribute13[1])",
"attribute4": "#(1,id_And[3])",
"attribute14": "=split('=',#(1,attribute4))",
"attributev14": "#(1,attribute14[0])",
"attributev4": "#(1,attribute14[1])",
"attribute5": "#(1,id_And[4])",
"attribute15": "=split('=',#(1,attribute5))",
"attributev5": "#(1,attribute15[1])",
"attributev15": "#(1,attribute15[0])"
}
}
}, {
"operation": "shift",
"spec": {
"*": {
"*": "[&1].&",
"#attributev1": "[#2].#attributev11",
"#attributev2": "[#2].#attributev12",
"#attributev3": "[#2].#attributev13",
"#attributev4": "[#2].#attributev14",
"#attributev5": "[#2].#attributev15"
}
}
},
{
"operation": "remove",
"spec": {
"*": {
"id_Or": "",
"id_without_Or": "",
"id_And": "",
"attribute1": "",
"attribute2": "",
"attribute3": "",
"attribute4": "",
"attribute5": "",
"attribute11": "",
"attribute12": "",
"attribute13": "",
"attribute14": "",
"attribute15": "",
"attributev1": "",
"attributev11": "",
"attributev12": "",
"attributev2": "",
"attributev13": "",
"attributev3": "",
"attributev14": "",
"attributev4": "",
"attributev5": "",
"attributev15": ""
}
}
}
]

Related

How can we extract key value from JSON array literal in Jolt Spec/ Transformation

I have a use case, where we're getting JSON data in complicated fashion, i have translate this object as JSON array and now am unable to extract the key/value from the array, and also there is no guarantee that every time we'll receive the Tags.
Please suggest how we can extract this from array.
Input JSON :
[
{
"SourceId": "/Apple/bb842437dd4/sourceGroups/ALPHABAT/providers/Mobile.com/phone/isp",
"Tags": "Name\": \"OMapplication\",\"Owner\": \"Breily",
"Tagscopy": [
"Name\": \"OMapplication\"",
"\"Owner\": \"Breily"
],
"ResourceName": "omapps"
},
{
"SourceId": "/Apple/bb842437dd4/sourceGroups/ALPHABAT/providers/Mobile.com/phone/isp",
"Tags": "mobile-source-usage\": \"apple-cloud",
"Tagscopy": [
"mobile-source-usage\": \"apple-cloud"
],
"ResourceName": "omapps"
}
]
Need to do operation on "Tagscopy", we're getting random data in this.
Desired JSON :
[
{
"SourceId": "/Apple/bb842437dd4/sourceGroups/ALPHABAT/providers/Mobile.com/phone/isp",
"Tags": "Name\": \"OMapplication\",\"Owner\": \"Breily",
"Name": "OMapplication",
"Owner": "Breily",
"ResourceName": "omapps"
},
{
"SourceId": "/Apple/bb842437dd4/sourceGroups/ALPHABAT/providers/Mobile.com/phone/isp",
"Tags": "mobile-source-usage\": \"apple-cloud",
"mobile-source-usage": "apple-cloud",
"ResourceName": "omapps"
}
]
Jolt Spec Used :
[
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"SourceIdcopy": "=split('/', #(1,SourceId))",
"Tagscopy": "=split(',', #(1,Tags))",
"SourceName": "=lastElement(#(1,SourceIdcopy))"
}
}
},
{
"operation": "remove",
"spec": {
"*": {
"SourceIdcopy": ""
}
}
},
{
"operation": "modify-default-beta",
"spec": {
"*": {
"*": "&",
"Tagscopy": {
"*": "&"
}
}
}
}
]
You can use the following transformation spec
[
{// Split members of the "Tagscopy" array with integer(0,1) suffixed keys
"operation": "shift",
"spec": {
"*": {
"*": "&1.&",
"Tagscopy": {
"*": {
"#": "&3.&2&1"
}
}
}
}
},
{// Split related strings by colon characters
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"Tagscopy*": "=split(': ', #(1,&))"
}
}
},
{// Match components of those array component1 against component2
"operation": "shift",
"spec": {
"*": {
"*": "&1.&",
"Tagscopy*": {
"#1,&[1]": "&2.&1.#(2,&[0])"
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"Tagscopy*": {
"*": "=split('\"', #(1,&))"
}
}
}
},
{// Prune undesired values for right-hand-side
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"Tagscopy*": {
"*": "=join('', #(1,&))"
}
}
}
},
{// Prune undesired values for left-hand-side(keys)
"operation": "shift",
"spec": {
"*": {
"*": "[&1].&",
"Tagscopy*": {
"\"*\"": "[&2].&(0,1)",
"*\"": "[&2].&(0,1)"
}
}
}
}
]

JOLT apply different transformations based on success of the call

I'm calling a rest web service and I receive the following responses depending if the call was successful or not.
Input 1:
{
"success": true,
"data": {
"series": "XX/32/V32/LM",
"number": 242,
"end_date": "31/12/2023",
"premium": "2309.68",
"premium_net": "2286.58",
"total_premium": 2494.46,
"commission": 1,
"installments": [
{
"number": 1,
"due_date": "30/12/2022",
"value": "2494.46",
"currency": "RON"
}
],
"reference_premium": 1061,
"direct_settlement_cover": 1,
"direct_settlement": 184.78,
"direct_settlement_net": 182.93,
"bm": "B0",
"exclusion_countries": [
"BY",
"IL",
"IR",
"MA",
"RUS",
"TN",
"UA"
]
},
"message": "Polița a fost emisă cu succes"
}
Input 2:
{
"success": false,
"message": "Eroare validare date",
"data": {
"pay_document": [
"Trebuie să menționați modalitatea de plată"
]
}
}
And I wrote the following operations that work individually
Success spec:
[
{
"operation": "shift",
"spec": {
"message": "body.info_message",
"data": {
"#1": "body.good_id",
"series": "body.policy_series",
"number": "body.policy_number",
"end_date": "body.policy_end_date",
"direct_settlement_cover": {
"1": {
"#Decontare directa": "clauses[0].clause_name",
"#1": "clauses[0].clause_id"
}
},
"direct_settlement": "clauses[0].premium",
"installments": {
"*": {
"number": "body.installments[&1].number",
"value": "body.installments[&1].amount",
"currency": "body.installments[&1].currency",
"due_date": "body.installments[&1].due_date"
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"body": {
"good_id": "=toInteger"
}
}
}
]
Error spec:
[
{
"operation": "shift",
"spec": {
"message": "body.info_message",
"data": {
"*": {
"*": "body.error_message"
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"error_message": "=join(' | ',#(1,&))"
}
}
}
]
Now I'm trying to combine those two and display the error messages if success is false or return the model to be saved. I tried to place success inside my transformation but it keeps returning null. what is the best practice in this regard...
You can use this spec:
I'm just adding the first shift operator and changing the data key with true and false in your code. So the final code is like the below:
[
{
"operation": "shift",
"spec": {
"*": "&",
"data": "#(1,success)"
}
},
{
"operation": "shift",
"spec": {
"message": "body.info_&",
"true": {
"#1": "body.good_id",
"series": "body.policy_&",
"number": "body.policy_&",
"end_date": "body.policy_&",
"direct_settlement_cover": {
"1": {
"#Decontare directa": "clauses[0].clause_name",
"#1": "clauses[0].clause_id"
}
},
"direct_settlement": "clauses[0].premium",
"installments": {
"*": {
"*": "body.&2[&1].&",
"value": "body.&2[&1].amount"
}
}
},
"false": {
"*": {
"*": "body.error_message"
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"good_id": "=toInteger",
"error_message": "=join(' | ',#(1,&))"
}
}
}
]

JOLT shift through properties with different names

I have a JSON:
{
"relations": {
"advertiser_id": {
"9968": {
"name": "Advance/Unicredit",
"id": 9968
},
"10103": {
"name": "Advance/ ORIMI",
"id": 10103
}
},
"campaign_id": {
"256292": {
"name": "Interests_Aidata",
"id": 256292,
"advertiser_id": 9968
},
"257717": {
"name": "G_14.04",
"id": 257717,
"advertiser_id": 10103
}
}
}
}
I thought that it's an easy shift operation, but I'm stuck because of all values inside random property names like "9968": I don't understand how to move through json with these different propertie names.
Expected Output:
[
{
"name": "Interests_Aidata",
"id": 256292,
"advertiser_id": 9968
},
{
"name": "G_14.04",
"id": 257717,
"advertiser_id": 10103
}
]
UPDATE
Is it possible to add top-level (under relations) advertiser_id or campaign_id as additional propety like in an example?
[
{
"name": "Interests_Aidata",
"id": 256292,
"advertiser_id": 9968,
"entity_type": "campaign_id"
},
{
"name": "G_14.04",
"id": 257717,
"advertiser_id": 10103,
"entity_type": "campaign_id"
}
]
If your aim is to accumulate each array of objects under those properties(advertiser_id and campaign_id), then use the following spec
[
{
"operation": "shift",
"spec": {
"*": {
"*": {
"*": {
"#": "&2"
}
}
}
}
}
]
If you're interested in only the stuff under campaign_id as lately edited, then use
[
{
"operation": "shift",
"spec": {
"*": {
"campaign_id": {
"*": {
"#": ""
}
}
}
}
}
]
The following spec can be used the desired result determined by the last update
[
{
"operation": "shift",
"spec": {
"*": {
"campaign_id": {
"*": {
"*": "[#2].&",
"$1": "[#2].entity_type"
}
}
}
}
}
]

JOLT moving from LHS to RHS and flattening

I am trying to convert the following JSON with JOLT but struggling, any help would be appreciated.
{
"2021-04-14T00:00:00+02:00": {
"249184": {
"SRAD": null,
"T": -50.00000000000001
},
"249185": {
"SRAD": 0.46133333444595337,
"T": null
}
},
"2021-04-14T00:15:00+02:00": {
"249184": {
"SRAD": null,
"T": -50.00000000000001
},
"249185": {
"SRAD": 0.4593333303928375,
"T": null
}
}
}
Desired output: Note here that the timestamps are repeated for each deviceID along with the SRAD and T values.
{
"timestamp": "2021-04-14T00:00:00+02:00",
"deviceID": 249184,
"SRAD":null,
"T":-50.00000000000001
},
{
"timestamp": "2021-04-14T00:00:00+02:00",
"deviceID": 249185,
"SRAD":0.46133333444595337,
"T":null
},
{
"timestamp": "2021-04-14T00:15:00+02:00",
"deviceID": 249184,
"SRAD":null,
"T":-50.00000000000001
},
{
"timestamp": "2021-04-14T00:15:00+02:00",
"deviceID": 249185,
"SRAD": 0.4593333303928375,
"T":null
}
I have tried a bunch of things but keep going in circles.
Here's an alternate spec with 2 shifts that supports any number of fields inside the deviceId object:
[
{
"operation": "shift",
"spec": {
"*": {
"*": {
"$(1)": "a[].timestamp",
"$": "b[].deviceId",
"#": "c[]"
}
}
}
},
{
"operation": "shift",
"spec": {
"a": {
"*": {
"timestamp": "[&1].timestamp",
"#(2,b[&])": "[&1]"
}
},
"b": null,
"c": {
"*": {
"*": "[&1].&"
}
}
}
}
]
[
// Change null as String, as jolt will not
// process null values
{
"operation": "modify-default-beta",
"spec": {
"*": {
"*": {
"SRAD": "null",
"T": "null"
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": {
"SRAD": "&4.SRAD",
"T": "&4.T",
"$0": "&4.deviceID",
"$1": "&4.timestamp"
}
}
}
}, {
"operation": "shift",
"spec": {
"*": {
"*": {
"*": {
"#": "&"
}
}
}
}
}, {
"operation": "shift",
"spec": {
"*": {
"0": "[&1].deviceID",
"1": "[&1].timestamp",
"2": "[&1].SRAD",
"3": "[&1].T"
}
}
}
]
You cannot process the JSON having null as value, so null is replaced with "null" using modify-default-beta operation.
Input JSON is grouped by timestamp and deviceid, so deconstruct the json by assigning names to the children nodes and then construct the output JSON.

inner join between two "tables" using JOLT

I'm trying to convert a json to json file using two object arrays and "join", this is the input file:
{
"Employee": [
{
"id": "emp_1",
"firstName": "Jose",
"lastName": "Perez",
"ssn": "ssn1",
"depId": "dep_1"
},
{
"id": "emp_2",
"firstName": "Antonio",
"lastName": "Ramirez",
"ssn": "ssn2",
"depId": "dep_2"
}
],
"Department": [
{
"id": "dep_1",
"description": "Instituto nacional de investigaciones nucleares (ININ)",
"division": "Research"
},
{
"id": "dep_2",
"description": "Instituto Mexicano de Seguro Social (IMSS)",
"division": "Healthcare"
},
{
"id": "dep_3",
"description": "Comision Nacional Bancaria y de Valores (CNBV)",
"division": "Financial"
}
]
}
This is the expected output:
{
"Employee": [
{
"id": "emp_1",
"firstName": "Jose",
"lasttName": "Perez",
"ssn": "ssn1",
"department": "Instituto nacional de investigaciones nucleares (ININ)",
"division": "Research"
},
{
"id": "emp_2",
"firstName": "Antonio",
"lasttName": "Ramirez",
"ssn": "ssn2",
"department": "Instituto Mexicano de Seguro Social (IMSS)",
"division": "Healthcare"
}
]
}
I've been trying to do it, but is not getting mapped, what am I doing wrong?
This is my spec:
[
{
"operation": "shift",
"spec": {
"Department": {
"*": {
"#" : "Department.#id"
}
},
"Employee" : "Employee"
}
},
{
"operation": "shift",
"spec": {
"Employee": {
"*": {
"depId" : {
"*" : {
"#2" : {
"Department" : {
"&4" : "test"
}
}
}
}
}
}
}
}
]
Please I already spent a lot of time trying to solve it, does anyone has any idea of how to solve it using Jolt: https://github.com/bazaarvoice/jolt ?
Check this spec, make the id in department easier to reach and then compare the values,
[
{
"operation": "shift",
"spec": {
"Employee": "Employee",
//make the dep id easier to compare
"Department": {
"*": {
"#": "Department.#(0,id)"
}
}
}
}, {
"operation": "shift",
"spec": {
"Employee": {
"*": {
"depId": {
"*": {
"#(4,Department)": {
// Compare values and move everything into the employee object
"#3": "Employee.&",
"#(&)": "Employee.&.department"
}
}
}
}
}
}
}, {
"operation": "shift",
"spec": {
"Employee": {
"*": {
"#": "Employee[]"
}
}
}
}, {
// Object cleansing
"operation": "shift",
"spec": {
"Employee": {
"*": {
"id": "Employee[].id",
"firstName": "Employee[&1].firstName",
"lastName": "Employee[&1].lastName",
"ssn": "Employee[&1].ssn",
"department": {
"description": "Employee[&2].department",
"division": "Employee[&2].division"
}
}
}
}
}
]
Thank you very much for your answer #Jagadesh
I followed more or less your same approach with a minor modifications:
[
{
"operation": "shift",
"spec": {
"Employee": "Employee",
"Department": {
"*": {
"#": "Department.#id"
}
}
}
},
{
"operation": "shift",
"spec": {
"Employee": {
"*": {
"depId": {
"*": {
"#(4,Department)": {
"#(&)": {
"#(0,description)": "Employee[&5].department",
"#(0,division)": "Employee[&5].division"
}
}
}
},
"#": "Employee[&]"
}
}
}
},
// just to remove depId from Employee
{
"operation": "remove",
"spec": {
"Employee": {
"*": {
"depId": ""
}
}
}
}
]