Retain some json key value pairs while transforming them using jolt - json

I just started learning jolt. I want to transform a json to a desired format. I'm almost done but stuck with this point
My input json looks like
{ "first_name": {
"label": "First name",
"type": "text",
"value": "John"
},
"last_name": {
"label": "Last name",
"type": "text",
"value": "Doe"
},
"email": {
"label": "Email",
"type": "text",
"value": "johndoe#gmail.com"
}
"id": 123,
"marital_status": "Single",
"author_id": null,
"company": null,
"address": {
"city": {
"label": "city",
"dom_type": "dropdown",
"value": "test"
},
"state": {
"label": "state",
"dom_type": "dropdown",
"value": "state"
},
"country": {
"label": "country",
"dom_type": "dropdown",
"value": "country"
}
}
}
to an output format like this
{
"first_name" : "John", "last_name" : "Doe", "email" : "johndoe#gmail.com",
"id": 123, "marital_status": "Single", "author_id": null, "company": null,
"address" : { "city" : "test", "state" : "test", "country" : "test" }
}
I have tried this shift spec
[
{
"operation": "shift",
"spec": {
"address": {
"*": {
"#value": "address.&1"
}
},
"*": {
"#value": "&1"
}
}
}
]
And got
{
"first_name" : "John", "last_name" : "Doe", "email" : "johndoe#gmail.com", "address" : { "city" : "test", "state" : "test", "country" : "test" }
}
Because the matcher "*" discards the simple key value pairs. I know that I'm missing something. Any help?

Because the matcher "*" discards the simple key value pairs. -> It isn't discarding them, it is matching them, but not finding a sub-property of "value".
Your input data is fundamentally in 3 different formats
Things below address
Things that are singular in value like "id"
Things that have nested data
The "*" just matches the Left-hand-side / key.
In this case you will need to explicitly list either the keys that are singular, or the keys that have nested data.
Spec
[
{
"operation": "shift",
"spec": {
"address": {
"*": {
"#value": "address.&1"
}
},
"id|marital_status|author_id|company": "&",
"*": {
"#value": "&1"
}
}
}
]

Related

How to correct this jolt transformation

I have an input JSON like this.
{
"trackingNumber": "1ZEA83550362028861",
"localActivityDate": "20210324",
"localActivityTime": "183500",
"scheduledDeliveryDate": "20220525",
"actualDeliveryDate": "20220729",
"actualdeliveryTime": "183500",
"gmtActivityDate": "20210324",
"gmtActivityTime": "223500",
"activityStatus": {
"type": "G",
"code": "OR",
"description": "Origin Scan"
},
"activityLocation": {
"city": "RANDALLSTOWN,",
"stateProvince": "MD",
"postalCode": "21133",
"country": "US"
}
}
I have written a jolt transformation for this JSON
[
{
"operation": "shift",
"spec": {
"trackingNumber": "transformedPayload.trackingInfo",
"localActivityDate": "tmp_Date",
"localActivityTime": "tmp_Time",
"scheduledDeliveryDate": "tmp_App",
"actualDeliveryDate": "tmp_Del_Date",
"actualdeliveryTime": "tmp_Del_Time",
"activityStatus": {
"type": "transformedPayload.events.type",
"code": "transformedPayload.events.statusCode",
"description": "transformedPayload.events.statusDescription"
},
"activityLocation": {
"city": "transformedPayload.address.city",
"stateProvince": "transformedPayload.address.state",
"postalCode": "transformedPayload.address.postalCode",
"country": "transformedPayload.address.country"
}
}
},
{
"operation": "modify-default-beta",
"spec": {
"tmp_Year": "=substring(#(1,tmp_Date),0,4)",
"tmp_Month": "=substring(#(1,tmp_Date),4,6)",
"tmp_Day": "=substring(#(1,tmp_Date),6,8)",
"tmp_Hours": "=substring(#(1,tmp_Time),0,2)",
"tmp_Minutes": "=substring(#(1,tmp_Time),2,4)",
"tmp_Seconds": "=substring(#(1,tmp_Time),4,6)",
"timeStamp": "=concat(#(1,tmp_Year),'-',#(1,tmp_Month),'-',#(1,tmp_Day),'T',#(1,tmp_Hours),':',#(1,tmp_Minutes),':',#(1,tmp_Seconds),'Z')",
"tmp_App_Year": "=substring(#(1,tmp_App),0,4)",
"tmp_App_Month": "=substring(#(1,tmp_App),4,6)",
"tmp_App_Day": "=substring(#(1,tmp_App),6,8)",
"appointmentTime": "=concat(#(1,tmp_App_Year),'-',#(1,tmp_App_Month),'-',#(1,tmp_App_Day))",
"tmp__Del_Year": "=substring(#(1,tmp_Del_Date),0,4)",
"tmp_Del_Month": "=substring(#(1,tmp_Del_Date),4,6)",
"tmp_Del_Day": "=substring(#(1,tmp_Del_Date),6,8)",
"tmp_Del_Hours": "=substring(#(1,tmp_Del_Time),0,2)",
"tmp_Del_Minutes": "=substring(#(1,tmp_Del_Time),2,4)",
"tmp_Del_Seconds": "=substring(#(1,tmp_Del_Time),4,6)",
"deliveryTime": "=concat(#(1,tmp__Del_Year),'-',#(1,tmp_Del_Month),'-',#(1,tmp_Del_Day),'T',#(1,tmp_Del_Hours),':',#(1,tmp_Del_Minutes),':',#(1,tmp_Del_Seconds),'Z')"
}
},
{
"operation": "remove",
"spec": {
"tmp_*": ""
}
}
]
This transforms the data into this format.
{
"transformedPayload" : {
"trackingInfo" : "1ZEA83550362028861",
"events" : {
"type" : "G",
"statusCode" : "OR",
"statusDescription" : "Origin Scan"
},
"address" : {
"city" : "RANDALLSTOWN,",
"state" : "MD",
"postalCode" : "21133",
"country" : "US"
}
},
"timeStamp" : "2021-03-24T18:35:00Z",
"appointmentTime" : "2022-05-25",
"deliveryTime" : "2022-07-29T18:35:00Z"
}
What changes do i need to make in the transformation such that the timestamp, appointmentTime and deliveryTime are also nested under transformedPayload i.e it looks like this (correct format).
{
"transformedPayload" : {
"trackingInfo" : "1ZEA83550362028861",
"events" : {
"type" : "G",
"statusCode" : "OR",
"statusDescription" : "Origin Scan"
},
"address" : {
"city" : "RANDALLSTOWN,",
"state" : "MD",
"postalCode" : "21133",
"country" : "US"
},
"timeStamp" : "2021-03-24T18:35:00Z",
"appointmentTime" : "2022-05-25",
"deliveryTime" : "2022-07-29T18:35:00Z"
}
}
This is my first time doing a jolt transformation so i am confused on how to resolve this. Any help is appreciated.
You are already so close to solution,I can offer the following spec similar to yours to the desired output :
[
{
"operation": "modify-overwrite-beta",
"spec": {
"tsY": "=substring(#(1,localActivityDate),0,4)",
"tsM": "=substring(#(1,localActivityDate),4,6)",
"tsD": "=substring(#(1,localActivityDate),6,8)",
"tsH": "=substring(#(1,localActivityTime),0,2)",
"tsMi": "=substring(#(1,localActivityTime),2,4)",
"tsS": "=substring(#(1,localActivityTime),4,6)",
"timeStamp": "=concat(#(1,tsY),'-',#(1,tsM),'-',#(1,tsD),'T',#(1,tsH),':',#(1,tsMi),':',#(1,tsS),'Z')",
"aTY": "=substring(#(1,scheduledDeliveryDate),0,4)",
"aTM": "=substring(#(1,scheduledDeliveryDate),4,6)",
"aTD": "=substring(#(1,scheduledDeliveryDate),6,8)",
"appointmentTime": "=concat(#(1,aTY),'-',#(1,aTM),'-',#(1,aTD))",
"dTY": "=substring(#(1,actualDeliveryDate),0,4)",
"dTM": "=substring(#(1,actualDeliveryDate),4,6)",
"dTD": "=substring(#(1,actualDeliveryDate),6,8)",
"dTH": "=substring(#(1,actualdeliveryTime),0,2)",
"dTMi": "=substring(#(1,actualdeliveryTime),2,4)",
"dTS": "=substring(#(1,actualdeliveryTime),4,6)",
"deliveryTime": "=concat(#(1,dTY),'-',#(1,dTM),'-',#(1,dTD),'T',#(1,dTH),':',#(1,dTMi),':',#(1,dTS),'Z')"
}
},
{
"operation": "shift",
"spec": {
"*Number": "&(0,1)Info",
"activityStatus": {
"*": "events.&"
},
"activityLocation": {
"*": "address.&"
},
"timeStamp": "&",
"appointmentTime": "&",
"deliveryTime": "&"
}
}
]

Jolt transform expand dot as a JSON Object

Is there a way in Jolt to convert a dot operator in JSON value to a nested JSON object while transforming the input ?
For example in the below json I would like target_property of id=2 to be converted to a json object as shown in the expected O/P. Any help is appreciated
Input:
{
"name": "Test",
"email": "Test",
"form_id": "123",
"field_list": [
{
"id": 1,
"value": "Test Subject",
"is_custom_field": false,
"target_property": "subject"
},
{
"id": 2,
"value": "Test Description",
"is_custom_field": false,
"target_property": "comment.body"
}
]
}
Jolt Transform tried
[
{
"operation": "shift",
"spec": {
"form_id": "ticket.ticket_form_id",
"name": "ticket.requester.name",
"email": "ticket.requester.email",
"field_list": {
"*": {
"is_custom_field": {
"true": {
"#(2,target_property)": "ticket.custom_fields[&3].id",
"#(2,value)": "ticket.custom_fields[&3].value"
},
"*": {
"#(2,value)": "ticket.#(3,target_property)"
}
}
}
}
}
}
]
Current Output
{
"ticket" : {
"ticket_form_id" : "123",
"requester" : {
"name" : "Test",
"email" : "Test"
},
"subject" : "Test Subject",
"comment.body" : "Test Description"
}
}
Expected Output
{
"ticket": {
"ticket_form_id": "123",
"requester": {
"name": "Test",
"email": "Test"
},
"subject": "Test Subject",
"comment": {
"body": "Test Description"
}
}
}
You can add another shift transformation spec such that
{
"operation": "shift",
"spec": {
"*": {
"*": "&1.&",
"*.*": "&1.&(0,1).&(0,2)"
}
}
}
where
"*.*" represents the attributes having a dot within them. the zeroes
within &(0,..) expressions stand for the current level
1,2 as second arguments of them represent the first and the second pieces of the splitted keys (in this case comment.body)
respectively.

json - jolt : How to extract common value to key

I am trying to convert a JSON to different format using JOLT (using NiFi JoltTransformJson processor).
Input Json
[
{
"date": "202001010000",
"name": "test1",
"val": "1",
"status": "0"
},
{
"date": "202001010000",
"name": "test2",
"val": "2",
"status": "0"
},
{
"date": "202001010001",
"name": "test1",
"val": "3",
"status": "0"
},
{
"date": "202001010001",
"name": "test2",
"val": "4",
"status": "0"
}
]
and I want to Output like
{
"202001010000" : [ {
"name" : "test1",
"val" : "1",
"status" : "0"
}, {
"name" : "test2",
"val" : "2",
"status" : "0"
}
],
"202001010001" : [ {
"name" : "test1",
"val" : "3",
"status" : "0"
}, {
"name" : "test2",
"val" : "4",
"status" : "0"
}
]
}
I'm trying to convert JSON format using Jolt Transform but it can't.
Make the date node value as key for each object in the array.
Remove the date node from the object.
Spec :
[
{
"operation": "shift",
"spec": {
"*": {
"date": {
"#1": "#(2,date)"
}
}
}
},
{
"operation": "remove",
"spec": {
"*": {
"*": {
"date": ""
}
}
}
}
]

Jolt Transform - Unable to transform json array into elements are objects with the specified key/value pairs

I'm trying to transform JSON array into elements are objects with the specified key/value pairs
{
"Resource": "sorPersonRole",
"Roleid": "1",
"Timestamp": "2010-06-30 00:00:00.0",
"Release": "Public",
"DOB": "2064-09-05",
"Active": "Y",
"enterprise_id": "a12s33",
"Inactive_enterprise_id": "",
"emp_ID": "123456",
"Inactive_emp_id": "000821972",
"Username": "",
"A_ID": "fsgf1234jhgfs3",
"P_ID": "w123456",
"Is Email Valid": "Y",
"Flag": "N",
"Registered": "spring",
"Description": "mainland corp",
"End Date": null
}
Expected output:
{
"meta" : {
"Resource" : "sorPersonRole",
"Roleid" : "1",
"Timestamp" : "2010-06-30 00:00:00.0",
"Release" : "Public",
"Sorid" : "w123456"
},
"sorAttributes" : {
"DOB" : "2064-09-05",
"Active" : "Y",
"End Date" : null,
"identifiers":
[
{
"type" : "enterprise"
"enterprise_id" : "a12s33",
"Username" : ""
},
{
"type" : "former-enterprise"
"Inactive_enterprise_id" : ""
},
{
"type" : "UID"
"emp_ID" : "123456",
"Inactive_emp_id" : "000821972"
},
{
"type" : "National"
"A_ID" : "fsgf1234jhgfs3"
}
],
"mainLand:com:adhoc" : {
"Is Email Valid" : "Y",
"Flag" : "N",
"Registered" : "spring",
"Description" : "mainland corp"
}
}
}
current Jolt spec: which I am not getting desired output
[
{
"operation": "shift",
"spec": {
"Resource": "meta.&",
"P_ID": "meta.Sorid",
"Roleid": "meta.&",
"Timestamp": "meta.&",
"Release": "meta.&",
"enterprise_id": "sorAttributes.Identifiers.type.enterprise.&",
"Inactive_enterprise_id": "sorAttributes.Identifiers.type.former-enterprise.&",
"emp_ID": "sorAttributes.Identifiers.type.UID.&",
"Inactive_emp_id": "sorAttributes.Identifiers.type.UID.&",
"Username": "sorAttributes.Identifiers.type.enterprise.&",
"A_ID": "sorAttributes.Identifiers.type.National.&",
"Is Email Valid": "sorAttributes.mainLand:com:adhoc.&",
"Flag": "sorAttributes.mainLand:com:adhoc.&",
"Registered": "sorAttributes.mainLand:com:adhoc.&",
"Description": "sorAttributes.mainLand:com:adhoc.&",
"*": "sorAttributes.&"
}
}
]
I have tried the different JsonSpecs provided on different websites, could able to match expected output. Also tried using two-shift operations but no luck, Any help or suggestion will be appreciated.
Thanks.
This can help,
For the nodes to be shifted into the identifier array, shift one level more.
[
{
"operation": "shift",
"spec": {
"Resource": "meta.&",
"Roleid": "meta.&",
"Timestamp": "meta.&",
"Release": "meta.&",
"P_ID": "meta.Sorid",
"DOB": "sorAttributes.&",
"Active": "sorAttributes.&",
"End Date": "sorAttributes.&",
"Is Email Valid": "sorAttributes.mainLand:com:adhoc.&",
"Flag": "sorAttributes.mainLand:com:adhoc.&",
"Registered": "sorAttributes.mainLand:com:adhoc.&",
"Description": "sorAttributes.mainLand:com:adhoc.&",
"enterprise_id": {
"#enterprise": "sorAttributes.identifiers[#2].type",
"#": "sorAttributes.identifiers[#2].&",
"#(1,Username)": "sorAttributes.identifiers[#2].Username"
},
"Inactive_enterprise_id": {
"#former-enterprise": "sorAttributes.identifiers[#2].type",
"#": "sorAttributes.identifiers[#2].&"
},
"Inactive_emp_id": {
"#UID": "sorAttributes.identifiers[#2].type",
"#": "sorAttributes.identifiers[#2].&",
"#(1,emp_ID)": "sorAttributes.identifiers[#2].emp_ID"
},
"A_ID": {
"#National": "sorAttributes.identifiers[#2].type",
"#": "sorAttributes.identifiers[#2].&"
}
}
}, {
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
}
]

Jolt grouping together

I have this input in JSON, I am having difficulty grouping things together:
[
{
"PK": "123",
"SURNAME": "CHEN",
"SEX": "F",
"DATE_OF_BIRTH": "1962-08-29 00:00:00.0",
"PHONE_TYPE": "05",
"PHONE_NO": "12312312",
"OPERATION": "INSERT",
}, {
"PK": "123",
"SURNAME": "CHEN",
"SEX": "F",
"DATE_OF_BIRTH": "1962-08-29 00:00:00.0",
"PHONE_TYPE": "04",
"PHONE_NO": "78787878",
"OPERATION": "UPDATE"
},{
"PK": "456",
"SURNAME": "DEV",
"SEX": "M",
"DATE_OF_BIRTH": "1953-06-06 00:00:00.0",
"PHONE_TYPE": "05",
"PHONE_NO": "34343434",
"OPERATION": "INSERT"
}, {
"CLIENT_ID": "456",
"SURNAME": "DEV",
"SEX": "M",
"DATE_OF_BIRTH": "1953-06-06 00:00:00.0",
"PHONE_TYPE": "02",
"PHONE_NO": "56565656",
"OPERATION": "DELETE",
}
]
And this is the expected output:
{
"Customers": [{
"MatchingProfile": {
"CustomerNumber": "", // leave blank
"DBType": "Oracle",
"DBKey": "123",
"LastName": "CHEN",
"Gender": "Female",
"Birthdate": "1962-08-29",
},
"Contacts": [{
"ContactType": "Fax",
"CountryCode": "", // leave blank
"Phone_Number": "12312312",
"Status": "Active"
}, {
"ContactType": "Mobile",
"CountryCode": "", // leave blank
"PhoneNumber": "78787878",
"Status": "Active"
}
]
},{
"MatchingProfile": {
"CustomerNumber": "", // leave blank
"DBType": "Oracle",
"DBKey": "456",
"LastName": "DEV",
"Gender": "Male",
"Birthdate": "1953-06-06",
},
"Contacts": [{
"ContactType": "Fax",
"CountryCode": "", // leave blank
"PhoneNumber": "34343434",
"Status": "Active"
}, {
"ContactType": "Office",
"CountryCode": "", // leave blank
"PhoneNumber": "56565656",
"Status": "Inactive"
}
]
}
]
}
The SEX from input is "M", "F", plus some other coded values. Corresponding values for the output Gender is "Male", "Female" and left "" (blank) otherwise. (Don't accuse me of being gender-biased, I know, this is a project requirement, okay? Not my call)
The OPERATION from input that is "INSERT" and "UPDATE" will be a corresponding Status: "Active" ; for "DELETE" it will be Status : "Inactive".
Plus the Birthdate output is truncated equivalent of DATE_OF_BIRTH, minus the time.
The PHONE_TYPE are the following: 02 - "Office", 04 - "Mobile", 05 - "Fax" (I purposedly left out the others).
Is it possible to have a mapping for this in Jolt? Can you show a spec? I'm new with Jolt and I am bit confused. This is 10x harder than Excel Pivot.
This is pretty much as close as OOTB Jolt can get. Note Jolt is for changing the structure of your data, not doing custom data mappings of things like "PHONE_TYPE": "04" means "Fax".
Transformed Output
{
"Customers" : [ {
"MatchingProfile" : {
"DBKey" : "123",
"Gender" : "F",
"LastName" : "CHEN",
"Birthdate" : "1962-08-29 00:00:00.0",
"Contacts" : [ {
"ContactType" : "05",
"Phone_Number" : "12312312",
"Status" : "INSERT"
}, {
"ContactType" : "04",
"Phone_Number" : "78787878",
"Status" : "UPDATE"
} ]
}
}, {
"MatchingProfile" : {
"DBKey" : "456",
"Gender" : "M",
"LastName" : "DEV",
"Birthdate" : "1953-06-06 00:00:00.0",
"Contacts" : [ {
"ContactType" : "05",
"Phone_Number" : "34343434",
"Status" : "INSERT"
}, {
"ContactType" : "02",
"Phone_Number" : "56565656",
"Status" : "DELETE"
} ]
}
} ]
}
Jolt Spec
[
// first pivot by the value of SURNAME
{
"operation": "shift",
"spec": {
"*": { // for each item in the array
"SURNAME": { // match SURNAME
"*": { // match any value of SURNAME
"#2": "&[]" // copy the whole record from 2 levels up to the SURNAME as an array, so we know that in the next step it is always an array
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": { // match CHEN or DEV
"0": {
// only pull pk, sex, dob from the first entry of the SURNAME array so as to not duplicate output
"PK": "Customers[#3].MatchingProfile.DBKey",
"SEX": "Customers[#3].MatchingProfile.Gender",
"SURNAME": "Customers[#3].MatchingProfile.LastName",
"DATE_OF_BIRTH": "Customers[#3].MatchingProfile.Birthdate",
// this does mean that the PHONE_TYPE has to be dealt with twice
// once for the zeroth item, and then once again for the rest
"PHONE_TYPE": "Customers[#3].MatchingProfile.Contacts[0].ContactType",
"PHONE_NO": "Customers[#3].MatchingProfile.Contacts[0].Phone_Number",
"OPERATION": "Customers[#3].MatchingProfile.Contacts[0].Status"
},
"*": {
// handle PHONE_TYPE and friends for the other records
"PHONE_TYPE": "Customers[#3].MatchingProfile.Contacts[&1].ContactType",
"PHONE_NO": "Customers[#3].MatchingProfile.Contacts[&1].Phone_Number",
"OPERATION": "Customers[#3].MatchingProfile.Contacts[&1].Status"
}
}
}
}
]
If if you find Jolt valuable for the pivot and the structure change, then your best bet is to "fixup" your input data array, aka map "PHONE_TYPE": "04" to "Fax", trim the 00:00:00 from the birthday, and then use Jolt to make the nested "Customers[].MatchingProfile.Contacts[]" structure.