apache nifi- how to create a custom date format

apache nifi- how to create a custom date format - json

I am new to nifi and I am trying to create a week_start_date and week_number from the date in json format.
I am using jolt transform.
The input is google ads api response.
This is the spec I use:
[
{
"operation": "shift",
"spec": {
"customer_id": {
"*": "[&].customer_id"
},
"customer_name": {
"*": "[&].customer_name"
},
"account_currency_code": {
"*": "[&].account_currency_code"
},
"campaign_id": {
"*": "[&].campaign_id"
},
"campaign_name": {
"*": "[&].campaign_name"
},
"campaign_status": {
"*": "[&].campaign_status"
},
"ad_group_id": {
"*": "[&].ad_group_id"
},
"ad_group_name": {
"*": "[&].ad_group_name"
},
"clicks": {
"*": "[&].clicks"
},
"cost": {
"*": "[&].cost"
},
"impressions": {
"*": "[&].impressions"
},
"device": {
"*": "[&].device"
},
"date": {
"*": "[&].date"
},
"week_number": {
"*": "[&].week_number"
},
"year": {
"*": "[&].year"
},
"keywords": {
"*": "[&].keywords"
},
"keywords_id": {
"*": "[&].keywords_id"
}
}
},
{
"operation": "modify-default-beta",
"spec": {
"date": {
"date": "=intSubtract(#(1,date))"
}
}
}
]
The expected output should be:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "48",
"week_start_date": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
the output I have:
[
{
"customer_id": "2538943578",
"customer_name": "test.com",
"account_currency_code": "USD",
"campaign_id": "11137311251",
"campaign_name": "testers",
"campaign_status": "ENABLED",
"ad_group_id": "1111",
"ad_group_name": "tesst- E",
"clicks": "6",
"cost": "26580000",
"impressions": "40",
"device": "DESKTOP",
"date": "2021-12-01",
"week_number": "2021-11-29",
"year": 2021,
"keywords": "test",
"keywords_id": "56357925842"
}
]
I am not sure on how to use correctly the modify-default-beta
Also I tried looking at the docs:
https://github.com/bazaarvoice/jolt/tree/master/jolt-core/src/test/resources/json/shiftr
What is the correct way also to understand the structure?

Related

JOLT: Combine JSON array using common key

My JSON looks something like this, its an array of workers and an array of worksites. The workers start with a worksite id so I need to merge them with the worksites (inside an array).
[
{
"worksite_candidates": {
"worksite_id": "12345",
"worker": {
"id": "1232",
"managerId": "09"
},
"openDate": "2022-10-14",
"finalDate": "2022-12-16"
}
},
{
"worksite_candidates": {
"worksite_id": "12345",
"worker": {
"id": "1233",
"managerId": "08"
},
"openDate": "",
"finalDate": ""
}
},
{
"worksite_candidates": {
"worksite_id": "12347",
"worker": {
"id": "1234",
"managerId": "09"
},
"openDate": "2022-11-10",
"finalDate": "2022-11-12"
}
},
{
"worksite": {
"id": "12345",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8"
}
},
{
"worksite": {
"id": "12347",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8"
}
}
]
and I need to transform it using a JOLT to something like this, workers go inside the worksite as an array
[
{
"worksite": {
"id": "12345",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8",
"Candidates": [
{
"worker": {
"id": "1232",
"managerId": "09"
},
"openDate": "2022-10-14",
"finalDate": "2022-12-16"
},
{
"worker": {
"id": "1233",
"managerId": "08"
},
"openDate": "",
"finalDate": ""
}
]
}
},
{
"worksite": {
"id": "12347",
"status": "2",
"dayid": "2",
"vacancys": "1",
"countryid": "2",
"arenaid": "8",
"Candidates": [
{
"worker": {
"id": "1234",
"managerId": "09"
},
"openDate": "2022-11-10",
"finalDate": "2022-11-12"
}
]
}
}
]
Please suggest a JOLT to get the desired output. Can't find a way to do it

You can walk within worksite_candidates and worksite objects
while grouping the elements by worksite id values
(values of the worksite_candidates.worksite_id and worksite.id attributes) such as
[
{
"operation": "shift",
"spec": {
"*": {
"worksite_c*": {
"*": "#(1,worksite_id).worksite.Candidates[&2].&" // [&2] is for going two levels up to reach the top level indexes and grabbing them to generate array results
},
"worksite": {
"*": "#(1,id).&1.&"
}
}
}
},
{
// get rid of redundant "worksite_id" attribute
"operation": "remove",
"spec": {
"*": {
"*": {
"*": {
"*": {
"worksite_id": ""
}
}
}
}
}
},
{
// get rid of null components
"operation": "modify-overwrite-beta",
"spec": {
"*": "=recursivelySquashNulls"
}
},
{
// get rid of topmost labels of objects
"operation": "shift",
"spec": {
"*": ""
}
}
]
the demo on the site http://jolt-demo.appspot.com/ is

I think this solution is correct
[
{
//group element for id and worksite_id
"operation": "shift",
"spec": {
"*": {
"worksite": {
"id": "data.#(1,id).&",
"status": "data.#(1,id).&",
"dayid": "data.#(1,id).&",
"vacancys": "data.#(1,id).&",
"countryid": "data.#(1,id).&",
"arenaid": "data.#(1,id).&"
},
"worksite_candidates": {
"worksite_id": {
"*": {
"#2": "data.&.Candidates"
}
}
}
}
}
},
//create output with array
{
"operation": "shift",
"spec": {
"data": {
"*": "[]"
}
}
},
//remove field worksite_id
{
"operation": "remove",
"spec": {
"*": {
"Candidates": {
"*": {
"worksite_id": ""
}
}
}
}
}
]

Applying cardinality for multiple columns in Jolt

I am trying to apply Jolt for below data
input:
[
{
"id": "500",
"code": "abc",
"date": "2020-10-10",
"category": 1,
"amount": 100,
"result": 0
},
{
"id": "500",
"code": "abc",
"date": "2020-10-10",
"category": 2,
"amount": 200,
"result": 1
}
]
jolt used:
[
{
"operation": "shift",
"spec": {
"*": {
"id": "#(1,id).id",
"code": "#(1,id).code",
"date": "#(1,id).group1.date",
"category": "#(1,id).group1.group2[&1].category"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"id": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
current output:
{
"id": "500",
"code": [
"abc",
"abc"
],
"group1": {
"date": [
"2020-10-10",
"2020-10-10"
],
"group2": [
{
"category": 1
},
{
"category": 2
}
]
}
}
expected:
{
"id": "500",
"code": "abc",
"group1": {
"date": "2020-10-10",
"group2": [
{
"category": 1
},
{
"category": 2
}
]
}
}
If i keep column of code & date in cardinality, it's fine. But in my use case, there are multiple such columns to be added. Are there any better ways to handle this scenario?

You should add each added node and use "*" wildcard to represent the rest of the attributes within the cardinality transformation such as
{
"operation": "cardinality",
"spec": {
"*": {
"*": "ONE",
"group1": {
"*": "ONE",
"group2": "MANY"
}
}
}
}
where "group2": "MANY" will make group2 to be excepted for extracting only the first element of the respective list.
the demo on the site http://jolt-demo.appspot.com/ :

JOLT - Remove duplicates in array

I need to remove duplicates from docAddrs array from my document and keep the rest of the json unchanged. The last transformation is moving all the data into docAddrs array, instead of just the addr objects. This is what I tried:
Input:
{
"docId1": "1",
"docId2": "2",
"docInfo": {
"info1": "info1",
"info2": "info2",
"lines": [
{
"lineNum": "1",
"val": "1"
},
{
"lineNum": "2",
"val": "2"
}
]
},
"docAddrs": [
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
}
]
}
Spec:
[
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": "#addrId[]"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": "ONE"
}
},
{
"operation": "shift",
"spec": {
"*": {
"docId1": "docId1",
"docId2": "docId2",
"docInfo": "docInfo",
"#": "docAddrs.[]"
}
}
}
]
Output:
{
"docId1": "1",
"docId2": "2",
"docInfo": {
"info1": "info1",
"info2": "info2",
"lines": [
{
"lineNum": "1",
"val": "1"
},
{
"lineNum": "2",
"val": "2"
}
]
},
"111": [
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
}
],
"112": [
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
}
]
}
Expected Output:
{
"docId1": "1",
"docId2": "2",
"docInfo": {
"info1": "info1",
"info2": "info2",
"lines": [
{
"lineNum": "1",
"val": "1"
},
{
"lineNum": "2",
"val": "2"
}
]
},
"docAddrs": [
{
"addrId": "111",
"street": "street1",
"city": "city1",
"st": "st"
},
{
"addrId": "112",
"street": "street2",
"city": "city2",
"st": "st2"
}
]
}
Can someone please suggest how I can get this to work. Thanks in advance

You can use the combination of the following specs
[
//exchange key-value pairs for "docAddrs" array
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": {
"*": {
"$": "&3[#(2,addrId)].#(0)"
}
}
}
}
},
// pick only the first components of the values from the newly formed array type values
// those already have identical components per each
{
"operation": "cardinality",
"spec": {
"docAddrs": {
"*": {
"*": "ONE"
}
}
}
},
// exchange key-value pairs again in order to collect each value array pairs
// under common keys respectively
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": {
"*": {
"$": "&3.#(0)"
}
}
}
}
},
// dissipate each value components to their related object
{
"operation": "shift",
"spec": {
"*": "&",
"docAddrs": {
"*": {
"*": "&2[&].&1"
}
}
}
}
]
The Demo on http://jolt-demo.appspot.com/

Grouping JSON elements using Jolt transform

I need help in jolt transform spec. Below is my work till now.
Input:
[
{
"ID": "1234",
"Date": "2020-12-10",
"Time": "06:00:00",
"Rate": null,
"Interest": null,
"Term": 99
},
{
"ID": "1234",
"Date": "2020-12-11",
"Time": "07:00:00",
"Rate": 8,
"Interest": null,
"Term": 99
}
]
Jolt Code used:
[
{
"operation": "shift",
"spec": {
"*": {
"ID": "#(1,ID).id",
"Date": "#(1,ID).date",
"Time": "#(1,ID).group.time",
"Rate": "#(1,ID).group.rate",
"Interest": "#(1,ID).group.interest",
"Term": "#(1,ID).group.term"
}
}
},
{
"operation": "cardinality",
"spec": {
"*": {
"id": "ONE"
}
}
},
{
"operation": "shift",
"spec": {
"*": ""
}
}
]
Current output:
[
{
"id": "1234",
"date": ["2020-12-10", "2020-12-11"],
"group": {
"time": ["06:00:00", "07:00:00"],
"rate": 8,
"interest": null,
"term": [99, 99]
}
}
]
Expected output
[
{
"id": "1234",
"date": "2020-12-10",
"group": {
"time": "06:00:00",
"rate": null,
"interest": null,
"term": 99
}
},
{
"id": "1234",
"date": "2020-12-11",
"group": {
"time": "07:00:00",
"rate": 8,
"interest": null,
"term": 99
}
}
]
When using only single json object, this code works fine. But when we use multiple items with same id, it starts grouping all related fields.

You can use square bracketed notation([&1]) as the common factor while qualifying rest of the elements other than id and Date as group such as
[
{
"operation": "shift",
"spec": {
"*": {
"ID": "[&1].&",
"Date": "[&1].&",
"*": "[&1].group.&"
}
}
}
]

Jolt Conversion - iterate list within a list and form single list

I am trying to iterate lists inside a list and form a single list with multiple objects. Iterating lists, I am able to achieve. But applying tags before the list iterating to each object is not happening if there are more objects in single list.
My input request is like below:
[
{
"success": [
{
"id": "4",
"Offers": [
{
"name": "Optional",
"type": {
"id": "1",
"name": "Optional"
},
"productOfferings": [
{
"id": "3",
"name": "Test1"
}
]
},
{
"name": "Default",
"type": {
"id": "2",
"name": "Default"
},
"productOfferings": [
{
"id": "1",
"name": "Test2"
},
{
"id": "2",
"name": "Test3"
}
]
}
]
}
]
}
]
My spec is like below:
[
{
"operation": "shift",
"spec": {
"*": {
"success": {
"*": {
"Offers": {
"*": {
"name": "[&1].[&3].typeName",
"type": {
"id": "[&2].[&4].typeNameId",
"name": "[&2].[&4].typeNameValue"
},
"productOfferings": {
"*": {
"id": "[&3].[&1].id",
"name": "[&3].[&1].name"
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "[]"
}
}
}
]
Output Received from spec:
[
{
"typeName": "Optional",
"typeNameId": "1",
"typeNameValue": "Optional",
"id": "3",
"name": "Test1"
},
{
"typeName": "Default",
"typeNameId": "2",
"typeNameValue": "Default",
"id": "1",
"name": "Test2"
},
{
"id": "2",
"name": "Test3"
}
]
But Expected output is like below:
[
{
"typeName": "Optional",
"typeNameId": "1",
"typeNameValue": "Optional",
"id": "3",
"name": "Test1"
},
{
"typeName": "Default",
"typeNameId": "2",
"typeNameValue": "Default",
"id": "1",
"name": "Test2"
},
{
"typeName": "Default",
"typeNameId": "2",
"typeNameValue": "Default",
"id": "2",
"name": "Test3"
}
]
If there are more objects inside productOfferings object, I am not able to add typeName,typeNameId, typeNameValue to the actual object. Please help to fix this issue.

You seem just needing to collect all into the productOfferings array while prepending each key by the common identifier [&3].[&1] such as
[
{
"operation": "shift",
"spec": {
"*": {
"success": {
"*": {
"Offers": {
"*": {
"productOfferings": {
"*": {
"#(2,name)": "[&3].[&1].typeName",
"#(2,type.id)": "[&3].[&1].typeNameId",
"#(2,type.name)": "[&3].[&1].typeNameValue",
"*": "[&3].[&1].&"
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": {
"*": "[]"
}
}
}
]

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

apache nifi- how to create a custom date format - json

Related

JOLT: Combine JSON array using common key

Applying cardinality for multiple columns in Jolt

JOLT - Remove duplicates in array

Grouping JSON elements using Jolt transform

Jolt Conversion - iterate list within a list and form single list

Categories

Resources