Nifi Jolt nested arrays turn into array of custom objects - json

Am absolutely stumped trying to take nested arrays from JSON input and turn them into an array of objects with proper Keys and values using Nifi Jolt Transform.
The issue I'm reaching is that I need to manually specify the column names. They are not apparent from the JSON response.
Column names are
{
"column_names": [
"icao24",
"callsign",
"origin_country",
"time_position",
"last_contact",
"long",
"lat",
"baro_altitude",
"on_ground",
"velocity",
"true_track",
"vertical_rate",
"sensors",
"geo_altitude",
"squawk",
"spi",
"position_source"
]
}
Example input data:
{
"time": 1675791934,
"states": [
[
"a57b26",
"N452SM ",
"United States",
1675791621,
1675791621,
-105.1168,
39.9103,
null,
true,
0,
90,
null,
null,
null,
null,
false,
0
],
[
"aa56da",
"UAL1986 ",
"United States",
1675791933,
1675791933,
-122.1349,
41.1152,
10972.8,
false,
235.33,
2.51,
0,
null,
11049,
null,
false,
0
]
]
}
Expected output:
[
{
"icao24": "a57b26",
"callsign": "N452SM ",
"origin_country": "United States",
"time_position": 1675791621,
"last_contact": 1675791621,
"long": -105.1168,
"lat": 39.9103,
"baro_altitude": null,
"on_ground": true,
"velocity": 0,
"true_track": 90,
"vertical_rate": null,
"sensors": null,
"geo_altitude": null,
"squawk": null,
"spi": false,
"position_source": 0
},
{...}
]
The result that I'm coming up with from shift Jolt spec is...
[
{
"operation": "shift",
"spec": {
"states": {
"*": {
"*": "[&1]"
}
}
}
}
]
Result:
[
[
"a57b26",
"N452SM ",
"United States",
1675791621,
1675791621,
-105.1168,
39.9103,
null,
true,
0,
90,
null,
null,
null,
null,
false,
0
],
[
"aa56da",
"UAL1986 ",
"United States",
1675791933,
1675791933,
-122.1349,
41.1152,
10972.8,
false,
235.33,
2.51,
0,
null,
11049,
null,
false,
0
]
]
I'd be able to possibly figure it out if the column names were sent in the payload, but unfortunately I need to set them manually in the transformation.
I did happen to find a helpful stack of a similar situation, but again, the solution looks like heavy duty regex and I'm just not that quite well versed in Nifi Jolt yet.
Jolt reference first element in array as target name

Ended up solving this on my own :
[
{
"operation": "default",
"spec": {
"temp": [
[
"icao24",
"callsign",
"origin_country",
"time_position",
"last_contact",
"long",
"lat",
"baro_altitude",
"on_ground",
"velocity",
"true_track",
"vertical_rate",
"sensors",
"geo_altitude",
"squawk",
"spi",
"position_source"
]
]
}
},
{
"operation": "shift",
"spec": {
"temp": {
"*": "states[]"
},
"states": {
"*": "states[]"
}
}
},
{
"operation": "shift",
"spec": {
"states": {
"*": {
"*": "[&1].#(2,[0].[&])"
}
}
}
},
{
"operation": "shift",
"spec": {
"0": null,
"*": "[]"
}
}
]
And this is the expected output :
[
{
"icao24": "a57b26",
"callsign": "N452SM ",
"origin_country": "United States",
"time_position": 1675791621,
"last_contact": 1675791621,
"long": -105.1168,
"lat": 39.9103,
"baro_altitude": null,
"on_ground": true,
"velocity": 0,
"true_track": 90,
"vertical_rate": null,
"sensors": null,
"geo_altitude": null,
"squawk": null,
"spi": false,
"position_source": 0
},
{
"icao24": "aa56da",
"callsign": "UAL1986 ",
"origin_country": "United States",
"time_position": 1675791933,
"last_contact": 1675791933,
"long": -122.1349,
"lat": 41.1152,
"baro_altitude": 10972.8,
"on_ground": false,
"velocity": 235.33,
"true_track": 2.51,
"vertical_rate": 0,
"sensors": null,
"geo_altitude": 11049,
"squawk": null,
"spi": false,
"position_source": 0
}
]

Related

How to change key name using JoltTransformJSON in Nifi

Pls help with this jolt transformation.
Note:
If there is field "ServiceFamily" then change the field name to
"tag1"
If there is field "PublisherName" then change the field name to
"tag2"
Input:
[
{
"ServiceFamily": "Compute",
"CostAllocationRuleName": null,
"benefitId": null,
"benefitName": null
},
{
"PublisherName": "Microsoft",
"ChargeType": "Usage",
"Frequency": "UsageBased",
"PricingModel": "OnDemand",
"benefitName": null
}
] 
Expected output:
[
{
"Tag1": "Compute",
"CostAllocationRuleName": null,
"benefitId": null,
"benefitName": null
},
{
"Tag2": "Microsoft",
"ChargeType": "Usage",
"Frequency": "UsageBased",
"PricingModel": "OnDemand",
"benefitName": null
}
]
You can use such a shift transformation spec
[
{
"operation": "shift",
"spec": {
"*": {
"ServiceFamily": "[#2].Tag1",
"PublisherName": "[#2].Tag2",
"*": "[#2].&"
}
}
}
]
the demo on the site http://jolt-demo.appspot.com/ is
Alternatively you can use the following one which consecutively applies modify and remove transformation specs
[
{
"operation": "modify-overwrite-beta",
"spec": {
"*": {
"Tag1": "=(#(1,ServiceFamily))",
"Tag2": "=(#(1,PublisherName))"
}
}
},
{
"operation": "remove",
"spec": {
"*": {
"ServiceFamily": "",
"PublisherName": ""
}
}
}
]
the demo on the site http://jolt-demo.appspot.com/ is

Using jolt transform, converting to nested json for multiple payloads

Convert the flat json to nested json for the multiple payloads. I am having some trouble with converting the flat JSON to nested JSON. Here, i want to aggregate the data to stops and need to be aggregated for unique payloads. I use https://jolt-demo.appspot.com to test the following below.
input:
[
{
"container_id": "DEF_id",
"haulType": "OL",
"loadNumber": "DO123345",
"billOfLading": "DO12345",
"referenceNumbers": "LoadIDEF",
"addressLine1": "DEF_address",
"stopReferenceId": "0004",
"stopType": "PL",
"containerNumber": "454545"
},
{
"container_id": "DEF_id",
"haulType": "OL",
"loadNumber": "DO123345",
"billOfLading": "DO12345",
"referenceNumbers": "LoadIDEF",
"addressLine1": null,
"stopReferenceId": "0003",
"stopType": "PU",
"containerNumber": "454545"
},
{
"container_id": "ABC_id",
"haulType": "IL",
"loadNumber": "BO123345",
"billOfLading": "BO12345",
"referenceNumbers": "LoadID",
"addressLine1": null,
"stopReferenceId": "0002",
"stopType": "PL",
"containerNumber": "232323"
},
{
"container_id": "ABC_id",
"haulType": "IL",
"loadNumber": "BO123345",
"billOfLading": "BO12345",
"referenceNumbers": "LoadID",
"addressLine1": "ABC Street",
"stopReferenceId": "0001",
"stopType": "PU",
"containerNumber": "232323"
}
]
Expected Output:
[
{
"load": {
"container_id": "DEF_id",
"haulType": [
"OL"
],
"loadNumber": "DO123345",
"billOfLading": "DO12345",
"referenceNumbers": [
"LoadIDEF"
],
"stops": [
{
"addressLine1": "DEF_address",
"stopReferenceId": "0004",
"stopType": "PL"
},
{
"addressLine1": null,
"stopReferenceId": "0003",
"stopType": "PU"
}
]
},
"containerInfo": {
"containerNumber": "454545"
}
},
{
"load": {
"container_id": "ABC_id",
"haulType": [
"IL"
],
"loadNumber": "BO123345",
"billOfLading": "BO12345",
"referenceNumbers": [
"LoadID"
],
"stops": [
{
"addressLine1": null,
"stopReferenceId": "0002",
"stopType": "PL"
},
{
"addressLine1": "ABC Street",
"stopReferenceId": "0001",
"stopType": "PU"
}
]
},
"containerInfo": {
"containerNumber": "232323"
}
}
]
Here it is my jolt spec used
[
{
"operation": "shift",
"spec": {
"*": {
"container_id": "#(1,containerNumber).load.&",
"haulType": "#(1,containerNumber).load.&",
"loadNumber": "#(1,containerNumber).load.&",
"billOfLading": "#(1,containerNumber).load.&",
"referenceNumbers": "#(1,containerNumber).load.&",
"addressLine1": "#(1,containerNumber).load.stops[&1].&",
"stopReferenceId": "#(1,containerNumber).load.stops[&1].&",
"stopType": "#(1,containerNumber).load.stops[&1].&",
"containerNumber": "#(1,containerNumber).containerInfo.&"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"*": {
"*": "ONE",
"stops": "MANY"
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "&",
"load": {
"haulType|referenceNumbers": "&1.&[]",
"*": "&1.&"
}
}
}
}
]
No need to individually write the attributes considering the expected result. You can partition by containerNumber values along with * and & wildcards to reperesent the key-value pairs of all attributes within the first spec. Then the separation of attributes(conditional logic) should be performed within the second spec in order to distinguish the display style of each key-value pairs such as
[
{
"operation": "shift",
"spec": {
"*": {
"*": "#(1,containerNumber).&"
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": {
"0": "&2.load.&1"
},
"haulType|referenceN*": {
"0": "&2.load.&1[]" // 0 : pick only value of the first index from the array, &1[] : wrap up the values with square brackets
},
"addressLine1|stop*": {
"*": "&2.load.stops[&].&1"
},
"containerN*": {
"0": "&2.load.containerInfo.&1"
}
}
}
},
{
// get rid of object labels
"operation": "shift",
"spec": {
"*": ""
}
}
]
the demo on the site is http://jolt-demo.appspot.com/ :

Convert Flat json to Nested Json with multiple arrays and keep null values in output using Jolt transform

I'm trying to write a spec to do the below transformation using jolt transformation. I need to convert the flat JSON to nested JSON by keeping null values. I attached the input, expected output and jolt transform. I need to keep the null values in the output but it doesn't show in output after jolt transform. I didn't get exact output with my jolt transform.
I am having some trouble with converting the flat JSON to nested JSON. I have looked at examples and didn't get any closer as to what is mentioned above. I need to transform a JSON structure by using a JOLT spec. I use https://jolt-demo.appspot.com to test the following below.
Input:
[
{
"container_id": "ABC",
"shipperN": null,
"PNumber": null,
"trackingNumber": null,
"priority": null,
"HType": "IN_Load",
"loadNumber": "123345",
"billOfLading": "12345",
"referenceNumbers": "LID",
"addressLine1": "ABC Street",
"addressLine2": "null",
"city": "Chicago",
"country": "US",
"latitude": "null",
"longitude": "null",
"earliestAppointmentTime": "XXXXX09:25",
"latestAppointmentTime": "XXXXX09:25",
"postalCode": "XXXXX3",
"sequence": "1",
"state": "XY",
"stopReferenceId": "0001",
"stopType": "PU",
"truckNumber": null,
"trailerNumber": null,
"driverPhone": null,
"railEquipmentInitials": null,
"railEquipmentNumber": null,
"containerNumber": "XXXXXXXX"
},
{
"container_id": "ABC",
"shipperN": null,
"PNumber": null,
"trackingNumber": null,
"priority": null,
"HType": "IN_Load",
"loadNumber": "123345",
"billOfLading": "12345",
"referenceNumbers": "LID",
"addressLine1": "null",
"addressLine2": "null",
"city": "null",
"country": "null",
"latitude": null,
"longitude": null,
"earliestAppointmentTime": "XXXXX09:25",
"latestAppointmentTime": "XXXXX09:25",
"name": "null",
"postalCode": "null",
"sequence": "2",
"state": "null",
"stopReferenceId": "XXXXD",
"stopType": "PL",
"truckNumber": null,
"trailerNumber": null,
"driverPhone": null,
"railEquipmentInitials": null,
"railEquipmentNumber": null,
"containerNumber": "XXXXXXXX"
}
]
Desired Output:
{
"load": {
"container_id": "ABC",
"shipperN": null,
"PNumber": null,
"trackingNumber": null,
"priority": null,
"HType": [ "IN_Load" ],
"loadNumber": "123345",
"billOfLading": "12345",
"referenceNumbers": [ "LID" ],
"stops": [
{
"addressLine1": "ABC Street",
"addressLine2": "null",
"city": "Chicago",
"country": "US",
"earliestAppointmentTime": "XXXXX09:25",
"latestAppointmentTime": "XXXXX09:25",
"postalCode": "XXXXX3",
"sequence": "1",
"state": "XY",
"stopReferenceId": "0001",
"stopType": "PU"
},
{
"earliestAppointmentTime": "2021-03-09T15:25:00.203Z",
"latestAppointmentTime": "2021-03-09T15:25:00.203Z",
"sequence": "2",
"stopReferenceId": "dummy",
"stopType": "PL",
"externalAddressId": "dummy"
}
]
},
"containerInfo": {
"containerNumber": "XXXXXXXX"
},
"trackingInfo": {
"truckNumber": null,
"trailerNumber": null,
"driverPhone": null,
"railEquipmentInitials": null,
"railEquipmentNumber": null
}
}
Jolt Spec that I'm using :
[
{
"operation": "shift",
"spec": {
"*": {
"*": "#(1,container_id).load.stops[&1].&",
"container_id": "#(1,container_id).load.&", // "else" case
"shipperN": "#(1,container_id).load.&",
"PNumber": "#(1,container_id).load.&",
"trackingNumber": "#(1,container_id).load.&",
"priority": "#(1,container_id).load.&",
"HType": "#(1,container_id).load.&",
"loadNumber": "#(1,container_id).load.&",
"billOfLading": "#(1,container_id).load.&",
"referenceNumbers": "#(1,container_id).load.&",
"containerNumber": "#(1,container_id).containerInfo.&",
"truckNumber": "#(1,container_id).trackingInfo.&",
"trailerNumber": "#(1,container_id).trackingInfo.&",
"driverPhone": "#(1,container_id).trackingInfo.&",
"railEquipmentInitials": "#(1,container_id).trackingInfo.&",
"railEquipmentNumber": "#(1,container_id).trackingInfo.&"
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"*": {
"container_id": "ONE",
"shipperN": "ONE",
"PNumber": "ONE",
"trackingNumber": "ONE",
"priority": "ONE",
"HType": "ONE",
"referenceNumbers": "ONE",
"loadNumber": "ONE",
"billOfLading": "ONE",
"containerInfo": {
"*": "ONE"
},
"trackingInfo": {
"*": "ONE"
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
You're so close;
The spec containing recursivelySquashNulls should be removed
The identifier .&[] should be used proper to the attributes HType and referenceNumbers
The cardinality spec preferably be shortened
So use the following as a whole spec
[
{
"operation": "shift",
"spec": {
"*": {
"*": "#(1,container_id).load.stops[&1].&",
"container_id": "#(1,container_id).load.&", // "else" case
"shipperN": "#(1,container_id).load.&",
"PNumber": "#(1,container_id).load.&",
"trackingNumber": "#(1,container_id).load.&",
"priority": "#(1,container_id).load.&",
"HType": "#(1,container_id).load.&",
"loadNumber": "#(1,container_id).load.&",
"billOfLading": "#(1,container_id).load.&",
"referenceNumbers": "#(1,container_id).load.&",
"containerNumber": "#(1,container_id).containerInfo.&",
"truckNumber": "#(1,container_id).trackingInfo.&",
"trailerNumber": "#(1,container_id).trackingInfo.&",
"driverPhone": "#(1,container_id).trackingInfo.&",
"railEquipmentInitials": "#(1,container_id).trackingInfo.&",
"railEquipmentNumber": "#(1,container_id).trackingInfo.&"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"*": {
"*": "ONE",
"stops": "MANY"
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "&",
"load": {
"HType|referenceNumbers": "&1.&[]",
"*": "&1.&" // &1 stands for the key "load", and & replicates the leaf values
}
}
}
}
]

Grouping JSON elements using Jolt transform

I need help in jolt transform spec. Below is my work till now.
Input:
[
{
"ID": "1234",
"Date": "2020-12-10",
"Time": "06:00:00",
"Rate": null,
"Interest": null,
"Term": 99
},
{
"ID": "1234",
"Date": "2020-12-11",
"Time": "07:00:00",
"Rate": 8,
"Interest": null,
"Term": 99
}
]
Jolt Code used:
[
{
"operation": "shift",
"spec": {
"*": {
"ID": "#(1,ID).id",
"Date": "#(1,ID).date",
"Time": "#(1,ID).group.time",
"Rate": "#(1,ID).group.rate",
"Interest": "#(1,ID).group.interest",
"Term": "#(1,ID).group.term"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"id": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
Current output:
[
{
"id": "1234",
"date": ["2020-12-10", "2020-12-11"],
"group": {
"time": ["06:00:00", "07:00:00"],
"rate": 8,
"interest": null,
"term": [99, 99]
}
}
]
Expected output
[
{
"id": "1234",
"date": "2020-12-10",
"group": {
"time": "06:00:00",
"rate": null,
"interest": null,
"term": 99
}
},
{
"id": "1234",
"date": "2020-12-11",
"group": {
"time": "07:00:00",
"rate": 8,
"interest": null,
"term": 99
}
}
]
When using only single json object, this code works fine. But when we use multiple items with same id, it starts grouping all related fields.
You can use square bracketed notation([&1]) as the common factor while qualifying rest of the elements other than id and Date as group such as
[
{
"operation": "shift",
"spec": {
"*": {
"ID": "[&1].&",
"Date": "[&1].&",
"*": "[&1].group.&"
}
}
}
]

Jolt Transform Nested Grouping

I have a JSON that has this flat structure:
[{
"PK": "1111",
"SOURCE_DB": "Oracle",
"CONTACT_TYPE": "Phone",
"CONTACT_SUBTYPE": "Work",
"EMAIL": null
"PHONE_COUNTRY_CODE": "44",
"PHONE_NUMBER": "12345678",
"PHONE_EXT": "907643",
"STATUS": "Active"
}, {
"PK": "1111",
"SOURCE_DB": "Oracle",
"CONTACT_TYPE": "Phone",
"CONTACT_SUBTYPE": "Home",
"EMAIL": null
"PHONE_COUNTRY_CODE": "353",
"PHONE_NUMBER": "87654321",
"PHONE_EXT": null,
"STATUS": "Active"
}, {
"PK": "1111",
"SOURCE_DB": "",
"CONTACT_TYPE": "Email",
"CONTACT_SUBTYPE": "Personal",
"EMAIL": "me#mail.com"
"PHONE_COUNTRY_CODE": null,
"PHONE_NUMBER": null,
"PHONE_EXT": null,
"STATUS": "Active"
},
{
"PK": "2222",
"SOURCE_DB": "DB2",
"CONTACT_TYPE": "Phone",
"CONTACT_SUBTYPE": "Home",
"EMAIL": null
"PHONE_COUNTRY_CODE": "44",
"PHONE_NUMBER": "98761234",
"PHONE_EXT": null,
"STATUS": "Inactive"
}, {
"PK": "2222",
"SOURCE_DB": "DB2",
"CONTACT_TYPE": "Email",
"CONTACT_SUBTYPE": "Work",
"EMAIL": "you#mail.co.uk"
"PHONE_COUNTRY_CODE": null,
"PHONE_NUMBER": null,
"PHONE_EXT": null,
"STATUS": "Active"
}
]
Then, I want to group them, first by Key (PK), then within each entry, ContactMethods will be grouped together. This is the output:
{
"Accounts": [{
"Reference": {
"Key": "1111",
"System": "Oracle"
},
"ContactMethods": {
"Phone": [{
"Subtype": "Work",
"CountryCode": "44",
"Number": "12345678",
"Extension": "907643",
"Active": true
}, {
"Subtype": "Home",
"CountryCode": "353",
"Number": "87654321",
"Extension": null,
"Active": true
}
],
"Email": [{
"Subtype": "Personal",
"EmailAddress": "my#mail.com",
"Active": true
}
]
}
}, {
"Reference": {
"Key": "2222",
"System": "DB2"
},
"ContactMethods": {
"Phone": [{
"Subtype": "Home",
"CountryCode": "44",
"Number": "98761234",
"Extension": null,
"Active": false
}
],
"Email": [{
"Subtype": "Work",
"EmailAddress": "you#mail.co.uk",
"Active": true
}
]
}
}
]
}
I am able to group this by PK, but I am having difficulty on the second part, on how to do the grouping within the nested structure. Can you show a sample spec and put some explanation?
Possible but really convoluted / verbose. This is pushing the bounds of what should be done with Jolt.
One pivot and some remapping is maintainable, but this is complicated enough that it will be very hard to debug if something goes wrong / you data is weird.
Requires 5 steps. Two to fix STATUS from a string to a boolean. Two to pivot and sub-pivot the data. And the last one to put everything in the right final place.
I recommend examine each step examine each step in it's own tab/copy of the Jolt demo site to see / grok what each step is doing.
Spec
[
{
// ninja in a true and false value so that
// Status "Active" / "Inactive" can be "mapped" to booleans
"operation": "default",
"spec": {
"*": {
"FALSE": false,
"TRUE": true
}
}
},
{
// fix STATUS
"operation": "shift",
"spec": {
"*": {
//
"STATUS": {
// Match "Active" as make STATUS be true
"Active": {
"#(2,TRUE)": "[&3].STATUS"
},
// Everything else set to false
"*": {
"#(2,FALSE)": "[&3].STATUS"
}
},
// match and discard TRUE and FALSE
"TRUE|FALSE": null,
// pass everything else thru
"*": "[&1].&"
}
}
},
{
// now, group by PK value
"operation": "shift",
"spec": {
// top level array
"*": {
"PK": {
"*": { // match any value of PK
// go back up and grab the whole block and write
// it to the ouput where the key, is the value of PK
"#2": "&1[]"
}
}
}
}
},
{
// sub group by CONTACT_TYPE, with the complication of
// pulling one entry off to serve as the "Reference"
"operation": "shift",
"spec": {
"*": { // pk value
"0": { // special case the Zeroth item so that
// we can pull off once copy to serve as the
// Reference
"#": "&2.Reference",
// sub group by CONTACT_TYPE
"CONTACT_TYPE": {
"*": {
"#2": "&4.ContactMethods.&1[]"
}
}
},
"*": { // all the rest of the array indicies
// sub group by CONTACT_TYPE
"CONTACT_TYPE": {
"*": {
"#2": "&4.ContactMethods.&1[]"
}
}
}
}
}
},
{
// Data fixing and Grouping done, now put everything
// in its final place
"operation": "shift",
"spec": {
"*": { // top level pk
"Reference": {
"PK": "Accounts[#3].Reference.Key",
"SOURCE_DB": "Accounts[#3].Reference.System"
},
"ContactMethods": {
"Phone": {
"*": {
"CONTACT_SUBTYPE": "Accounts[#5].ContactMethods.Phone[&1].Subtype",
"PHONE_COUNTRY_CODE": "Accounts[#5].ContactMethods.Phone[&1].CountryCode",
"PHONE_NUMBER": "Accounts[#5].ContactMethods.Phone[&1].Number",
"PHONE_EXT": "Accounts[#5].ContactMethods.Phone[&1].Extension",
"STATUS": "Accounts[#5].ContactMethods.Phone[&1].Active"
}
},
"Email": {
"*": {
"CONTACT_SUBTYPE": "Accounts[#5].ContactMethods.Email[&1].Subtype",
"EMAIL": "Accounts[#5].ContactMethods.Email[&1].EmailAddress",
"STATUS": "Accounts[#5].ContactMethods.Email[&1].Active"
}
}
}
}
}
}
]