replace specific keys in a deeply embedded json file - json

I have a json file (origin.json) generated locally, I'd like to replace some keys in this origin.json and generate a remote.json so that I could send it to a remote server following it's endpoint payload format.
My origin.json is large and deeply embedded , I could iterate each keys and replace those I need to. But I am wondering is there an efficient and fancy tool could do the same ? Something like jq ?
Below are my embedded json
{
"timeoutMs": 3000,
"requestTopic": "local-cron",
"searchQuery": {
"checkin": "2023-01-10",
"checkout": "2023-01-11",
"numberRoomsNeeded": 0,
"adultsTotal": 2,
"childrenTotal": 0,
"currency": "EUR"
},
"requestContext": {
"userId": 666666666,
"userAuthLevel": 2,
"isUserstar": true,
"visitorCc1": "cn",
"trafficSourceId": 0,
"siteTypeId": 9,
"detectedUserType": "normal",
"travelPurpose": 2,
"affiliateId": 12345,
"languageCode": "en-us",
"currency": "CNY",
"siteType": 1,
"serverRole": "cron",
"action": "bp",
"visitorIdentifier": [
{
"type": "id-single",
"uvi": "00000000000000000000000"
},
{
"type": "user-identity",
"uvi": "66666666"
},
{
"type": "user",
"uvi": "77777777777"
}
],
"isInternal": true,
"enableExperiments": true,
"shouldTrackRequestInExperiments": true,
"starSettings": {
"isUserstar": true,
"isUserstarControlGroup": false,
"canStarUserSeeFreeBreakfast": true,
"canStarUserSeeFreeRoomUpgrade": true,
"starTier": 5,
"topstarBenefit": "",
"isRightstar": true,
"starDynamicPricing": {
"canSeestarDynamicPricingLevel3": true
},
"canStarUserSeeFreeCleaningFee": true,
"starVipSettings": [
{
"eligible": true,
"benefitName": "no_et",
"programType": "PriceMatchTrial",
"percentage": 0
}
]
},
"isCsRelocationMode": false,
"tripValueContext": {},
"visitorCountryRegion": "sh",
"paymentTiming": 1,
"includeConditional": false
},
"showDebug": false,
"hits": [
{
"hhhhid": 8228082,
"ppblock": {
"allMatchingBlocks": [
{
"blockId": 1,
"rawBlock": {
"occupancy": 2,
"price": 34425,
"roomId": 822808201,
"policygroupId": 346547507,
"mealplan": 2,
"channel": 581,
"currencyId": 2,
"maxPersons": 3,
"flags": 0,
"freeCancelUntil": 0,
"priceBase10Exponent": -2,
"packageId": 0,
"paymenttermsId": 38,
"vrFlags": 0,
"bundleId": 0
},
"blockStay": {
"stayNights": [
{
"polId": 346547507,
"rateId": 25728208,
"curId": 2,
"price": 344.25,
"price1": 0,
"channelId": 581,
"occupancy": 2,
"roomId": 822808201,
"initialPrice": 405,
"initialPrice1": 0
}
],
"stayNrRooms": 1,
"stayAvailableUntil": 1956105,
"stayPrice": 344.25,
"stayFlashDeal": 0,
"stayPromoTextId": 0,
"stayMinAdvanceRes": 1673388000,
"stayInventorySegmentId": 0,
"stayExperimentFlags": 0,
"stayRoomRateFlags": 4,
"stayIncludedProducts": 0
}
}
]
},
"selectedBlocks": [
"822808201_346547507_2_2_0"
],
"selected": {
"822808201_346547507_2_2_0": 1
}
}
],
"pipeline": 3
}
Here I flagged severval keys I'd like to replace with '==> (new key)'
jq '.. | keys?' star-dragongate.json
[
"hits",
"pipeline",
"requestContext",
"requestTopic",
"searchQuery",
"showDebug", ==> showdebug
"timeoutMs"
]
[
"adultsTotal",
"checkin",
"checkout",
"childrenTotal",
"currency",
"numberRoomsNeeded"
]
[
"action",
"affiliateId", ==> Affilateid
"currency",
"detectedUserType",
"enableExperiments",
"starSettings",
"includeConditional",
"isCsRelocationMode",
"isInternal",
"isUserstar",
"languageCode",
"paymentTiming",
"serverRole",
"shouldTrackRequestInExperiments", ==> inexperiments
"siteType",
"siteTypeId",
"trafficSourceId",
"travelPurpose",
"tripValueContext",
"userAuthLevel",
"userId",
"visitorCc1",
"visitorCountryRegion",
"visitorIdentifier"
]
[
0,
1,
2
]
[
"type",
"uvi"
]
[
"type",
"uvi"
]
[
"type",
"uvi"
]
[
"canStarUserSeeFreeBreakfast",
"canStarUserSeeFreeCleaningFee",
"canStarUserSeeFreeRoomUpgrade", ==> freeroom_upgrade
"starDynamicPricing",
"starTier",
"starVipSettings",
"isRightstar",
"isUserstar",
"isUserstarControlGroup",
"topstarBenefit"
]
[
"canSeestarDynamicPricingLevel3"
]
[
0
]
[
"benefitName",
"eligible",
"percentage",
"programType"
]
[]
[
0
]
[
"hhhhid",
"ppblock",
"selected",
"selectedBlocks"
]
[
"allMatchingBlocks"
]
[
0
]
[
"blockId",
"blockStay",
"rawBlock"
]
[
"bundleId", ==> bundle_id
"channel",
"currencyId",
"flags",
"freeCancelUntil",
"maxPersons",
"mealplan",
"occupancy",
"packageId",
"paymenttermsId",
"policygroupId",
"price",
"priceBase10Exponent",
"roomId",
"vrFlags"
]
[
"stayAvailableUntil",
"stayExperimentFlags",
"stayFlashDeal",
"stayIncludedProducts",
"stayInventorySegmentId",
"stayMinAdvanceRes",
"stayNights",
"stayNrRooms",
"stayPrice",
"stayPromoTextId", ==> staypromotextid
"stayRoomRateFlags"
]
[
0
]
[
"channelId",
"curId",
"initialPrice",
"initialPrice1",
"occupancy",
"polId",
"price",
"price1",
"rateId",
"roomId"
]
[
0
]
[
"822808201_346547507_2_2_0"
]
The keys I need to replace located in different embed layer and blocks of this json.
Is there any suggestion on how to replace those keys in an efficient way ?
PS: The keys in json file are in static location, not dymanically change all the time.

To rename a field name, you could use with_entries, which gives you access to each .key. Reset it by assignment.
Now, what's still unclear is how you want to find the keys in question (programmatically). If their locations are static, and you know them, address them individually, as in:
.requestContext.starSettings |= with_entries((.key | select(. == "canStarUserSeeFreeRoomUpgrade")) = "freeroom_upgrade")
| .hits[].ppblock.allMatchingBlocks[].rawBlock |= with_entries((.key | select(. == "bundleId")) = "bundle_id")
# and so on...
To reduce redundant code, you could also move the renaming portion of it into its own function, and then just call that, e.g.:
def rename($old; $new):
with_entries((.key | select(. == $old)) = $new);
.requestContext.starSettings |= rename("canStarUserSeeFreeRoomUpgrade"; "freeroom_upgrade")
| .hits[].ppblock.allMatchingBlocks[].rawBlock |= rename("bundleId"; "bundle_id")
# and so on...
Or move the location also into the function, e.g.:
def rename_at(path; $old; $new):
path |= with_entries((.key | select(. == $old)) = $new);
rename_at(.requestContext.starSettings; "canStarUserSeeFreeRoomUpgrade"; "freeroom_upgrade")
| rename_at(.hits[].ppblock.allMatchingBlocks[].rawBlock; "bundleId"; "bundle_id")
# and so on...
If their location is unknown, and you want to replace them just based on their (local) name, you need to traverse the document, and check if you hit a matching name. The walk function provides you with the traversal, objects reduces the action to objects:
walk(objects |= with_entries(
if .key == "canStarUserSeeFreeRoomUpgrade" then .key = "freeroom_upgrade"
elif .key == "bundleId" then .key = "bundle_id"
# and so on...
else . end
))

Related

How to filter an array of json with jq in linux?

I have the following JSON input:
{
"paging": {
"count": 0,
"total": 0,
"offset": 0,
"max": 0
},
"executions": [
{
"id": 5,
"href": "https://localhost.com.br",
"permalink": "https://localhost.com.br",
"status": "succeeded",
"project": "PROJETO",
"executionType": "scheduled",
"date-started": {
"unixtime": 1660793400012,
"date": "2022-08-18T03:30:00Z"
},
"date-ended": {
"unixtime": 1660793409694,
"date": "2022-08-18T03:30:09Z"
},
"job": {
"id": "cdkwednweoi-8745bjdf-kcjkjr8745",
"averageDuration": 0,
"name": "routine",
"group": "",
"project": "PROJECT",
"description": "",
"href": "https://localhost.com.br",
"permalink": "https://localhost.com.br"
},
"description": "runner",
"argstring": null,
"serverUUID": "jdnsdnasldnaje382nf5ubv",
"successfulNodes": [
"84jsk937nf"
]
}
]
}
First I want to select an array by a property name. And then I want to select an object of the array by the value of the propertyes.
Example of the desired informations on output:
"href"
"status"
"project"
"date-started":
"unixtime": 48298437239847,
"date": "2022-07-17"
"date-ended":
"unixtime": 48298437239847,
"date": "2022-07-17"
"job":
"name": "cleaner"
I knew how to get the firts values:
jq -r '.executions[] | [.href, .status, .project']
But the other ones I don't know how to do, I've tried with:
jq '.executions[] | with_entries( select(.value | has("date-started") ) )'
But it doesn't works.
Your first query produces a JSON array, so in this response, I'll assume it will suffice to produce an array of the eight values of interest in the order you've specified.
With your input, the following invocation produces the eight values as shown below:
jq '.executions[]
| [.href, .status, .project,
(."date-started" | (.unixtime, .date)),
(."date-ended" | (.unixtime, .date)),
.job.name]'
Output:
[
"https://localhost.com.br/rundeck/api/40/execution/2340",
"succeeded",
"PROJETO",
1660793400012,
"2022-08-18T03:30:00Z",
1660793409694,
"2022-08-18T03:30:09Z",
"proc_limpeza_saft"
]

jq to filter inner array elements but return the whole JSON

TL;DR
How can I return the whole JSON after filtering inner array elements of a top-level key?
Detailed explanation
I have a JSON describing the COCO image database and it is formatted as follows (irrelevant elements truncated as ...).
{
"info": {
"description": "COCO 2017 Dataset",
...
},
"licenses": [
{
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
...
},
...
],
"images": [
{
"license": 4,
...
},
"annotations": [
{
"segmentation": [
[
510.66,
...
]
],
"area": 702.1057499999998,
"iscrowd": 0,
"image_id": 289343,
"bbox": [
473.07,
395.93,
38.65,
28.67
],
"category_id": 18,
"id": 1768
},
"categories": [
{
"supercategory": "person",
...
},
]
}
I need to filter annotations where category_id has one of several values, for example 1, 2.
I can successfully filter such category_ids with
jq -C ' .annotations[] | select( .category_id == 1 or .category_id == 2 ) ' instances_val2017.json | less -R
However, what is returned are only the annotations element of the total JSON as below.
{
"segmentation": [
[
162.72,
...
]
],
"area": 426.9120499999995,
"iscrowd": 0,
"image_id": 45596,
"bbox": [
161.52,
507.18,
46.45,
19.16
],
"category_id": 2,
"id": 124742
}
{
...
{
I know it's possible to return these elements as an array by wrapping the expression in [] but how can I return the entire original JSON after filtering the specified category ids?
Okay I spent 3 hours trying to solve this yesterday then this morning I posted this question and subsequently figured it out!
Here is the solution which uses the |= operator which modifies an element in place.
jq '.annotations |= map(select(.category_id | contains(1,2)))' instances_val2017.json
As per the suggestion of #peak, here is the command with == instead of contains.
jq '.annotations |= map(select(.category_id == (1,2)))' instances_val2017.json

How to find and replace json with shell variables using jq?

I have read properties with jq from a json object and have stored them to variables.
I want to now read these variables and essentially find and replace a word inside the string with a global shell variable.
I've set my json ID's from my JSON file
# Set Json ID's
TARGET_ID=$(jq '.DefaultCacheBehavior.TargetOriginId' distconfig.json)
DOMAIN_NAME=$(jq '.Origins.Items[0].DomainName' distconfig.json)
ORIGIN_ID=$(jq '.Origins.Items[0].Id' distconfig.json)
echo "$TARGET_ID"
echo "$DOMAIN_NAME"
echo "$ORIGIN_ID"
This returns
"S3-Website-stag4.example.io.s3-website.us-east-2.amazonaws.com"
"stag4.example.io.s3-website.us-east-2.amazonaws.com"
"S3-Website-stag4.example.io.s3-website.us-east-2.amazonaws.com"
I have my location id variable and would like to write it to find and replace all stag4 references in those 3 ID's.
Then I would like to write those 3 ID's to the initial json object, or create a temp version of it.
Example, if:
$DOMAIN_NAME is"stag4.example.io.s3-website.us-east-2.amazonaws.com"
I would like to essentially have it set to:
$LOCATION_NAME="stag6"
DOMAIN_LOCATION="example.io"
"$DOMAIN_NAME=S3-Website-\$LOCATION_NAME\.example.io.s3-website.us-east-2.amazonaws.com"
"$TARGET_ID=\$LOCATION_NAME\.example.io.s3-website.us-east-2.amazonaws.com"
"$ORIGIN_ID=S3-Website-\$LOCATION_NAME\.example.io.s3-website.us-east-2.amazonaws.com"
Then write those 3 to the temp or new json file so I can run my cloudformation command:
aws cloudfront create-distribution --distribution-config file://disttemp.json
I have now built out the proper variables from the initial json file like so:
$LOCATION_NAME="stag6"
DOMAIN_LOCATION="example.io"
echo "Build New IDs"
TARGET_ID_BUILT="S3-Website-$LOCATION_NAME.$DOMAIN_LOCATION.s3-website.us-east-2.amazonaws.com"
DOMAIN_NAME_BUILT="$LOCATION_NAME.$DOMAIN_LOCATION.s3-website.us-east-2.amazonaws.com"
ORIGIN_ID_BUILT="S3-Website-$LOCATION_NAME.$DOMAIN_LOCATION.s3-website.us-east-2.amazonaws.com"
echo "$TARGET_ID_BUILT"
echo "$DOMAIN_NAME_BUILT"
echo "$ORIGIN_ID_BUILT"
How do I write these variables to the json file with jq?
EDIT: Sample of distconfig.json requested – domain/creds swapped to example
{
"CallerReference": "my-test-distribution-2",
"Comment": "",
"CacheBehaviors": {
"Quantity": 0
},
"IsIPV6Enabled": true,
"Logging": {
"Bucket": "",
"Prefix": "",
"Enabled": false,
"IncludeCookies": false
},
"WebACLId": "",
"Origins": {
"Items": [
{
"OriginPath": "",
"CustomOriginConfig": {
"OriginSslProtocols": {
"Items": [
"TLSv1",
"TLSv1.1",
"TLSv1.2"
],
"Quantity": 3
},
"OriginProtocolPolicy": "http-only",
"OriginReadTimeout": 30,
"HTTPPort": 80,
"HTTPSPort": 443,
"OriginKeepaliveTimeout": 5
},
"CustomHeaders": {
"Quantity": 0
},
"Id": "S3-Website-stag4.example.io.s3-website.us-east-2.amazonaws.com",
"DomainName": "stag4.example.io.s3-website.us-east-2.amazonaws.com"
}
],
"Quantity": 1
},
}
"DefaultRootObject": "",
"PriceClass": "PriceClass_All",
"Enabled": true,
"DefaultCacheBehavior": {
"TrustedSigners": {
"Enabled": false,
"Quantity": 0
},
"LambdaFunctionAssociations": {
"Quantity": 0
},
"TargetOriginId": "S3-Website-stag4.example.io.s3-website.us-east-2.amazonaws.com",
"ViewerProtocolPolicy": "redirect-to-https",
"ForwardedValues": {
"Headers": {
"Quantity": 0
},
"Cookies": {
"Forward": "none"
},
"QueryStringCacheKeys": {
"Quantity": 0
},
"QueryString": false
},
"MaxTTL": 31536000,
"SmoothStreaming": false,
"DefaultTTL": 86400,
"AllowedMethods": {
"Items": [
"HEAD",
"GET"
],
"CachedMethods": {
"Items": [
"HEAD",
"GET"
],
"Quantity": 2
},
"Quantity": 2
},
"MinTTL": 0,
"Compress": true
},
"ViewerCertificate": {
"SSLSupportMethod": "sni-only",
"ACMCertificateArn": "xxxx",
"MinimumProtocolVersion": "TLSv1.1_2016",
"Certificate": "xxxx",
"CertificateSource": "acm"
},
"CustomErrorResponses": {
"Quantity": 0
},
"HttpVersion": "http2",
"Restrictions": {
"GeoRestriction": {
"RestrictionType": "none",
"Quantity": 0
}
},
"Aliases": {
"Quantity": 0
}
}
You should use sed to do the substitution and then inject the value back into the JSON.
echo $TARGET_ID | sed 's/stag4/stag5/g'
Outputs
S3-Website-stag5.example.io.s3-website.us-east-2.amazonaws.com
Next we'll put the value back into the original JSON, this will technically output a new JSON and does not edit the file, however, you can easily solve for this on the output by temporarily saving to a tmp file.
We will use the --arg flag to reference our bash variable and set the new value for our field
cat distconfig.json | jq --arg  TARGET_ID $TARGET_ID '.DefaultCacheBehavior.TargetOriginId = $TARGET_ID' > tmp.json && mv tmp.json distconfig.json

How to format a csv file using json data?

I have a json file that I need to convert to a csv file, but I am a little wary of trusting a json-to-csv converter site as the outputted data seems to be incorrect... so I was hoping to get some help here!
I have the following json file structure:
{
"GroupName": "GrpName13",
"Number": 3,
"Notes": "Test Group ",
"Units": [
{
"UnitNumber": "TestUnit13",
"DataSource": "Factory",
"ContractNumber": "TestContract13",
"CarNumber": "2",
"ControllerTypeMessageId" : 4,
"NumberOfLandings": 4,
"CreatedBy": "user1",
"CommissionModeMessageId": 2,
"Details": [
{
"DetailName": "TestFloor13",
"DetailNumber": "5"
}
],
"UnitDevices": [
{
"DeviceTypeMessageId": 1,
"CreatedBy": "user1"
}
]
}
]
}
The issue I think Im seeing is that the converters seem to not be able to comprehend the many nested data values. And the reason I think the converters are wrong is because when I try to convert back to json using them, I dont receive the same structure.
Does anyone know how to manually format this json into csv format, or know of a reliable converter than can handle nested values?
Try
www.json-buddy.com/convert-json-csv-xml.htm
if not working for you then you can try this tool
http://download.cnet.com/JSON-to-CSV/3000-2383_4-76680683.html
should be helpful!
I have tried your json on this for url:
http://www.convertcsv.com/json-to-csv.htm
As a result:
UnitNumber,DataSource,ContractNumber,CarNumber,ControllerTypeMessageId,NumberOfLandings,CreatedBy,CommissionModeMessageId,Details/0/DetailName,Details/0/DetailNumber,UnitDevices/0/DeviceTypeMessageId,UnitDevices/0/CreatedBy
TestUnit13,Factory,TestContract13,2,4,4,user1,2,TestFloor13,5,1,user1
Because it could save the path of the key,like the 'DeviceTypeMessageId' in list 'UnitDevices': it will named the columns name with 'UnitDevices/0/DeviceTypeMessageId', this could avoid the same name mistake, so you can get the columns name by its converter rules.
Hope helpful.
Here is a solution using jq
If the file filter.jq contains
def denormalize:
def headers($p):
keys_unsorted[] as $k
| if .[$k]|type == "array" then (.[$k]|first|headers("\($p)\($k)_"))
else "\($p)\($k)"
end
;
def setup:
[
keys_unsorted[] as $k
| if .[$k]|type == "array" then [ .[$k][]| setup ]
else .[$k]
end
]
;
def iter:
if length == 0 then []
elif .[0]|type != "array" then
[.[0]] + (.[1:] | iter)
else
(.[0][] | iter) as $x
| (.[1:] | iter) as $y
| [$x[]] + $y
end
;
[ headers("") ], (setup | iter)
;
denormalize | #csv
and data.json contains (note extra samples added)
{
"GroupName": "GrpName13",
"Notes": "Test Group ",
"Number": 3,
"Units": [
{
"CarNumber": "2",
"CommissionModeMessageId": 2,
"ContractNumber": "TestContract13",
"ControllerTypeMessageId": 4,
"CreatedBy": "user1",
"DataSource": "Factory",
"Details": [
{
"DetailName": "TestFloor13",
"DetailNumber": "5"
}
],
"NumberOfLandings": 4,
"UnitDevices": [
{
"CreatedBy": "user1",
"DeviceTypeMessageId": 1
},
{
"CreatedBy": "user10",
"DeviceTypeMessageId": 10
}
],
"UnitNumber": "TestUnit13"
},
{
"CarNumber": "99",
"CommissionModeMessageId": 99,
"ContractNumber": "Contract99",
"ControllerTypeMessageId": 99,
"CreatedBy": "user99",
"DataSource": "Another Factory",
"Details": [
{
"DetailName": "TestFloor99",
"DetailNumber": "99"
}
],
"NumberOfLandings": 99,
"UnitDevices": [
{
"CreatedBy": "user99",
"DeviceTypeMessageId": 99
}
],
"UnitNumber": "Unit99"
}
]
}
then the command
jq -M -r -f filter.jq data.json
will produce
"GroupName","Notes","Number","Units_CarNumber","Units_CommissionModeMessageId","Units_ContractNumber","Units_ControllerTypeMessageId","Units_CreatedBy","Units_DataSource","Units_Details_DetailName","Units_Details_DetailNumber","Units_NumberOfLandings","Units_UnitDevices_CreatedBy","Units_UnitDevices_DeviceTypeMessageId","Units_UnitNumber"
"GrpName13","Test Group ",3,"2",2,"TestContract13",4,"user1","Factory","TestFloor13","5",4,"user1",1,"TestUnit13"
"GrpName13","Test Group ",3,"2",2,"TestContract13",4,"user1","Factory","TestFloor13","5",4,"user10",10,"TestUnit13"
"GrpName13","Test Group ",3,"99",99,"Contract99",99,"user99","Another Factory","TestFloor99","99",99,"user99",99,"Unit99"

jq: output values of ids instead of numbers

Here's my input json:
{
"channels": [
{ "id": 1, "name": "Pop"},
{ "id": 2, "name": "Rock"}
],
"links": [
{ "id": 2, "streams": [ {"url": "http://example.com/rock"} ] },
{ "id": 1, "streams": [ {"url": "http://example.com/pop"} ] }
]
}
This is what I want as an output:
"http://example.com/pop"
"Pop"
"http://example.com/rock"
"Rock"
So I need jq to replace .channels[].id with .links[].streams[0].url based on .links[].id
I don't know if it's right, but this is how I managed to output the urls:
(.channels[].id | tostring) as $ids | [.links[]] | map({(.id | tostring): .streams[0].url}) | add as $urls | $urls[$ids]
"http://example.com/pop"
"http://example.com/rock"
The question is, how do I add .channels[].name to it?
You sometimes have to be careful what you ask for, but this will produce the result you said you want:
.channels[] as $channel
| $channel.name,
(.links[] | select(.id == $channel.id) | .streams[0].url)
Output for the given input:
"Pop"
"http://example.com/pop"
"Rock"
"http://example.com/rock"
Here is a solution which uses reduce and setpath to make a $urls lookup table from .links and then scans .channels generating corresponding urls and names.
(
reduce .links[] as $l (
{};
setpath([ $l.id|tostring ]; [$l.streams[].url])
)
) as $urls
| .channels[]
| $urls[ .id|tostring ][], .name
If multiple urls are present in the "streams" attribute this will
print them all before printing the name. e.g. if the input is
{
"channels": [
{ "id": 1, "name": "Pop"},
{ "id": 2, "name": "Rock"}
],
"links": [
{ "id": 2, "streams": [ {"url": "http://example.com/rock"},
{"url": "http://example.com/hardrock"} ] },
{ "id": 1, "streams": [ {"url": "http://example.com/pop"} ] }
]
}
the output will be
"http://example.com/pop"
"Pop"
"http://example.com/rock"
"http://example.com/hardrock"
"Rock"