jq - extract additional JSON object into new array - json

I have some JSON. It looks like this:
{
"Volumes": [
{
"Attachments": [
{
"VolumeId": "vol-11111111",
"State": "attached",
"DeleteOnTermination": false,
"Device": "/dev/sdz"
}
],
"Tags": [
{
"Value": "volume1",
"Key": "Name"
},
{
"Value": "00:00",
"Key": "Start"
},
{
"Value": "00:20",
"Key": "Finish"
},
{
"Value": "2",
"Key": "Period"
}
],
"VolumeId": "vol-11111111"
},
{
"Attachments": [
{
"VolumeId": "vol-22222222",
"State": "attached",
"DeleteOnTermination": false,
"Device": "/dev/sdz"
}
],
"Tags": [
{
"Value": "volume2",
"Key": "Name"
},
{
"Value": "00:00",
"Key": "Start"
},
{
"Value": "00:20",
"Key": "Finish"
},
{
"Value": "2",
"Key": "Period"
}
],
"VolumeId": "vol-22222222"
},
{
"Attachments": [
{
"VolumeId": "vol-333333333",
"State": "attached",
"DeleteOnTermination": false,
"Device": "/dev/sdz"
}
],
"Tags": [
{
"Value": "volume3",
"Key": "Name"
},
{
"Value": "00:00",
"Key": "Start"
},
{
"Value": "00:20",
"Key": "Finish"
},
{
"Value": "2",
"Key": "Period"
}
],
"VolumeId": "vol-33333333"
}
]
}
Using jq, I am able to extract the following information:
VolumeId,Finish,Start,Period
using the jq command
cat json | jq -r '[.Volumes[]|({VolumeId}+(.Tags|from_entries))|{VolumeId,Finish,Start,Period}]'
[
{
"VolumeId": "vol-11111111",
"Finish": "00:20",
"Start": "00:00",
"Period": "2"
},
{
"VolumeId": "vol-22222222",
"Finish": "00:20",
"Start": "00:00",
"Period": "2"
},
{
"VolumeId": "vol-33333333",
"Finish": "00:20",
"Start": "00:00",
"Period": "2"
}
]
All this works fine. However I have the need to additional extract .Attachments.Device. I am looking for output for each array similar to:
[
{
"VolumeId": "vol-11111111",
"Finish": "00:20",
"Start": "00:00",
"Period": "2",
"DeviceId": "/dev/sdz"
},
{
"VolumeId": "vol-22222222",
"Finish": "00:20",
"Start": "00:00",
"Period": "2",
"DeviceId": "/dev/sdz"
},
{
"VolumeId": "vol-33333333",
"Finish": "00:20",
"Start": "00:00",
"Period": "2",
"DeviceId": "/dev/sdz"
}
]
However I can't figure out how to do this without getting an error. The most logical approach for me would be to do something like:
cat json | jq -r '[.Volumes[]|({VolumeId}+(.Attachments|from_entries)+(.Tags|from_entries))|{VolumeId,Finish,Start,Period,DeviceId}]'
However I get the error:
jq: error (at <stdin>:91): Cannot use null (null) as object key
Any help figuring out what I am not doing correct and how to fix it would be greatly appreciated.
thanks

Ultimately, the problem is that you're using from_entries on the Attachments array when it wouldn't work. from_entries takes an array of key/value pair objects to create an object with those values. However, you don't have key/value pairs, but objects. If you're just trying to combine them, you should use add.
Also, there is no property named DeviceId, it's Device. If you want to select the Device property and get it as DeviceId, you need to provide the correct name.
.Volumes | map(
({ VolumeId } + (.Attachments | add) + (.Tags | from_entries))
| { VolumeId, Finish, Start, Period, DeviceId: .Device }
)

Related

Kubernetes + jq - retrieving containers list per pod yields cartesian product

Im trying to use jq on kubernetes json output, to create new json object containing list of objects - container and image per pod, however im getting cartesian product.
my input data (truncated from sensitive info):
{
"apiVersion": "v1",
"items": [
{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"creationTimestamp": "2021-06-30T12:45:40Z",
"name": "pod-1",
"namespace": "default",
"resourceVersion": "757679286",
"selfLink": "/api/v1/namespaces/default/pods/pod-1"
},
"spec": {
"containers": [
{
"image": "image-1",
"imagePullPolicy": "Always",
"name": "container-1",
"resources": {},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File",
"volumeMounts": [
{
"mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
"readOnly": true
}
]
},
{
"image": "image-2",
"imagePullPolicy": "Always",
"name": "container-2",
"resources": {},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File",
"volumeMounts": [
{
"mountPath": "/var/run/secrets/kubernetes.io/serviceaccount",
"readOnly": true
}
]
}
],
"dnsPolicy": "ClusterFirst",
"enableServiceLinks": true,
"priority": 0,
"restartPolicy": "Always",
"schedulerName": "default-scheduler",
"securityContext": {},
"serviceAccount": "default",
"serviceAccountName": "default",
"terminationGracePeriodSeconds": 30,
"tolerations": [
{
"effect": "NoExecute",
"key": "node.kubernetes.io/not-ready",
"operator": "Exists",
"tolerationSeconds": 300
},
{
"effect": "NoExecute",
"key": "node.kubernetes.io/unreachable",
"operator": "Exists",
"tolerationSeconds": 300
}
],
"volumes": [
{
"name": "default-token-b954f",
"secret": {
"defaultMode": 420,
"secretName": "default-token-b954f"
}
}
]
},
"status": {
"conditions": [
{
"lastProbeTime": null,
"lastTransitionTime": "2021-06-30T12:45:40Z",
"status": "True",
"type": "Initialized"
},
{
"lastProbeTime": null,
"lastTransitionTime": "2021-06-30T12:45:40Z",
"message": "containers with unready status: [container-1 container-2]",
"reason": "ContainersNotReady",
"status": "False",
"type": "Ready"
},
{
"lastProbeTime": null,
"lastTransitionTime": "2021-06-30T12:45:40Z",
"message": "containers with unready status: [container-1 container-2]",
"reason": "ContainersNotReady",
"status": "False",
"type": "ContainersReady"
},
{
"lastProbeTime": null,
"lastTransitionTime": "2021-06-30T12:45:40Z",
"status": "True",
"type": "PodScheduled"
}
],
"containerStatuses": [
{
"image": "image-1",
"imageID": "",
"lastState": {},
"name": "container-1",
"ready": false,
"restartCount": 0,
"started": false,
"state": {
"waiting": {
"message": "Back-off pulling image \"image-1\"",
"reason": "ImagePullBackOff"
}
}
},
{
"image": "image-2",
"imageID": "",
"lastState": {},
"name": "container-2",
"ready": false,
"restartCount": 0,
"started": false,
"state": {
"waiting": {
"message": "Back-off pulling image \"image-2\"",
"reason": "ImagePullBackOff"
}
}
}
],
"qosClass": "BestEffort",
"startTime": "2021-06-30T12:45:40Z"
}
}
],
"kind": "List",
"metadata": {
"resourceVersion": "",
"selfLink": ""
}
}
my command:
jq '.items[] | { "name": .metadata.name, "containers": [{ "name": .spec.containers[].name, "image": .spec.containers[].image }]} '
desired output:
{
"name": "pod_1",
"containers": [
{
"name": "container_1",
"image": "image_1"
},
{
"name": "container_2",
"image": "image_2"
}
]
}
output I get:
{
"name": "pod-1",
"containers": [
{
"name": "container-1",
"image": "image-1"
},
{
"name": "container-1",
"image": "image-2"
},
{
"name": "container-2",
"image": "image-1"
},
{
"name": "container-2",
"image": "image-2"
}
]
}
Could anyone explain what am I doing wrong?
Best Regards, Piotr.
The problem is "name": .spec.containers[].name and "image": .spec.containers[].image:
Both expressions generate a sequence of each value for name and image which will than be combined.
Simplified example of why you get a Cartesian product:
jq -c -n '{name: ("A", "B"), value: ("C", "D")}'
outputs:
{"name":"A","value":"C"}
{"name":"A","value":"D"}
{"name":"B","value":"C"}
{"name":"B","value":"D"}
You get the desired output using this jq filter on your input:
jq '
.items[]
| {
"name": .metadata.name,
"containers": .spec.containers
| map({name, image})
}'
output:
{
"name": "pod-1",
"containers": [
{
"name": "container-1",
"image": "image-1"
},
{
"name": "container-2",
"image": "image-2"
}
]
}

jq return a json array in a very specifique way

I have this Json ( is a test database, no data is true here )
{
"pguid": "4EA979A2-E578-4DA3-89DB-24082F3092AA",
"lastEnrollTguid": "EA98B161-04D3-4F0A-920A-58DBFF3C2274",
"timestamp": 1016086888000,
"keys": [
{
"id": "gr",
"value": "1907971"
}
],
"biographics": [
{
"id": "localNascimento",
"value": "JOINVILLE SC"
},
{
"id": "dataNascimento",
"value": "1859-03-08"
},
{
"id": "mae",
"value": "ANTA MARCIA PINHEAD"
},
{
"id": "nome",
"value": "MIR PINHEAD"
}
],
"biometric": [
{
"source": "ORIGINAL",
"type": "FACE",
"format": "JPEG",
"properties": {
"width": 0,
"height": 0,
"resolution": 500,
"ratio": 0,
"matcherId": 0,
"extractorId": 0
},
"index": 10,
"content": "5215421547"
}
],
"labels": [
"SC",
"CIVIL",
"MASCULINO",
"JOINVILLE"
],
"history": {
"events": [
{
"type": "ENROLL",
"tguid": "3C1B0D1F-9143-4C24-A351-E88A19317AC9",
"timestamp": 1014086658288
},
{
"type": "UPDATE",
"tguid": "EA98B161-04D3-4F0A-920A-58DBFF3C2274",
"timestamp": 1016786888028
}
]
}
}
I want to retrive only de tguid in history array, and if exist a way to do this, use de index of the array to acomplish that.
Here I tryed to acomplish that ( and miserable failed in that )
example ( and it do not work ):
jq '.[].history.events.tguid[1]' /tmp/teste.json
I want to retrieve the pguid in a index to work with that.
Someone have any ideas?
try this
jq '.history.events | .[1].tguid' /tmp/teste.json
tnks everyone
jq '.[].history.events | .[0].tguid' /tmp/teste1.json

Rename JSON key field with value in object

Have the following json output:
[
{
"id": "47",
"canUpdate": true,
"canDelete": true,
"canArchive": true,
"info": [
{
"key": "problem_type",
"value": "PAN",
"valueCaption": "PAN",
"keyCaption": "Category"
},
{
"key": "status",
"value": 3,
"valueCaption": "Closed",
"keyCaption": "Status"
},
{
"key": "insert_time",
"value": 1466446314000,
"valueCaption": "2016-06-20 14:11:54.0",
"keyCaption": "Request time"
}
As you can see under "info" they actually label the key:value pair as "key": "problem_type" and "value": "PAN" and then "valueCaption": "PAN" "keyCaption": "Category". What I need to do is remap the file so that, in this example, it shows as "problem_type": "PAN" and "Category": "PAN". What would be the best method to iterate through the output to remap the key:value pairs in this manner?
How it needs to be:
[
{
"id": "47",
"canUpdate": true,
"canDelete": true,
"canArchive": true,
"info": [
{
"problem_type": "PAN",
"Category": "PAN"
},
{
"status": 3,
"Status": "Closed"
},
{
"insert_time": 1466446314000,
"Request time": "2016-06-20 14:11:54.0"
}
Here is a jq solution which uses Update assignment |=
.[].info[] |= {(.key):.value, (.keyCaption):.valueCaption}
Sample Run (assumes data in data.json)
$ jq -M '.[].info[] |= {(.key):.value, (.keyCaption):.valueCaption}' data.json
[
{
"id": "47",
"canUpdate": true,
"canDelete": true,
"canArchive": true,
"info": [
{
"problem_type": "PAN",
"Category": "PAN"
},
{
"status": 3,
"Status": "Closed"
},
{
"insert_time": 1466446314000,
"Request time": "2016-06-20 14:11:54.0"
}
]
}
]
Try it online at jqplay.org

Json file editing through jq

I have json
{
"file1": [{
"username": "myname",
"groupname": "mypassword",
"environment": [{
"name": "UMASK",
"value": "022"
},
{
"name": "DEBUG",
"value": "2"
}]
}]
}
and want to change the value of DEBUG to 5.
Tried with below command
jq .file1[0].environment sandeep.json |jq '(.[] |select(.name ==
"DEBUG") | .value) |= "5"'
this will return me specific portion of json like
[
{
"name": "UMASK",
"value": "022"
},
{
"name": "DEBUG",
"value": "5"
}
]
but I want to see full json with changed value
{
"file1": [{
"username": "myname",
"groupname": "mypassword",
"environment": [{
"name": "UMASK",
"value": "022"
},
{
"name": "DEBUG",
"value": "5"
}]
}]
}
Please suggest me
It should be:
jq '(.file1[].environment[]|select(.name=="DEBUG").value) |= 5' file.json
Output:
{
"file1": [
{
"username": "myname",
"groupname": "mypassword",
"environment": [
{
"name": "UMASK",
"value": "022"
},
{
"name": "DEBUG",
"value": 5
}
]
}
]
}

Extract values in json object with awk/sed, but cannot get it to work

I have a file with the return of a curl statement in it, in the form of json. Each object has a set of values, but the parameters for these values are all called the same names. See code below.
These objects are part of a larger object called workflow. The Cleaning up object is the last process that runs in our workflow. For every video that passes through the workflow, a json file in this format is created. (There are more than only these three objects, this is just for illustrative purposes)
I want to take the value of completed of the object with "description": "Cleaning up" and store it as a variable $end_time. Then I want to take the value of completed of the object with "description": "Ingest" and store it as a variable $start_time. These two values are then subtracted to give me an integer time in milliseconds so I can calculate the time it took for the video to go through this part of the process. With the maths part I am fine, and know how to do it. It is the extraction of the values that I am struggling with.
I hope this makes sense? ANY help would be appreciated. Thank you in advance!
EDIT: Had to delete original code in post, due to character limitations
Here is a proper example of the file that I have to work with:
{
"workflows": {
"count": "20",
"searchTime": "1",
"startPage": "0",
"totalCount": "1",
"workflow": {
"configurations": {
"configuration": [
{
"$": "1409750880000",
"key": "schedule.start"
},
{
"$": "1409755980000",
"key": "schedule.stop"
},
{
"$": "Capture_agent",
"key": "schedule.location"
},
{
"$": "false",
"key": "trimHold"
},
{
"$": "true",
"key": "archiveOp"
},
{
"$": "false",
"key": "captionHold"
},
{
"$": "false",
"key": "videoPreview"
}
]
},
"creator": {
"organization": "mh_default_org",
"roles": [
"76b1bdde-a080-40a4-b929-bde89af6a0a8_Instructor",
"ROLE_ADMIN",
"ROLE_ANONYMOUS",
"ROLE_USER"
],
"userName": user_name
},
"description": "This workflow definition defines the steps involved in scheduling a recording, capturing it, and\n ingesting it, after which processing operations may be added.\n ",
"errors": "",
"id": "15518",
"mediapackage": {
"attachments": "",
"creators": {
"creator": "Name"
},
"id": "2d25ed19-2978-458d-a4a0-c9c56d791c68",
"license": "Creative Commons 3.0: Attribution-NonCommercial-NoDerivs",
"media": "",
"metadata": "",
"publications": {
"publication": {
"channel": "engage-player",
"id": "b7b68f91-2c33-4673-ba7c-2e9b891788f9",
"mimetype": "text/html",
"tags": "",
"url": "http://some.url.com:80/engage/ui/watch.html?id=2d25ed19-2978-458d-a4a0-c9c56d791c68"
}
},
"series": "76b1bdde-a080-40a4-b929-bde89af6a0a8",
"seriestitle": "Recording_Title_user_name",
"start": "2014-09-03T13:28:00Z",
"title": "Recording_Title"
},
"operations": {
"operation": [
{
"abortable": "false",
"completed": 1409750882092,
"configurations": {
"configuration": [
{
"$": "1409750880000",
"key": "schedule.start"
},
{
"$": "1409755980000",
"key": "schedule.stop"
},
{
"$": "Capture_agent",
"key": "schedule.location"
}
]
},
"continuable": "false",
"description": "Scheduled",
"execution-history": "",
"execution-host": "http://some.url.com:8080",
"fail-on-error": "true",
"failed-attempts": "0",
"hold-action-title": "View schedule",
"holdurl": "/workflow/hold/org.opencastproject.workflow.handler.scheduleworkflowoperationhandler",
"id": "schedule",
"job": "15519",
"max-attempts": "1",
"retry-strategy": "none",
"started": 1409750881745,
"state": "SUCCEEDED",
"time-in-queue": 0
},
{
"abortable": "false",
"configurations": "",
"continuable": "false",
"description": "Capture",
"execution-history": "",
"execution-host": "http://some.url.com:8080",
"fail-on-error": "true",
"failed-attempts": "0",
"hold-action-title": "Monitor capture",
"holdurl": "/workflow/hold/org.opencastproject.workflow.handler.captureworkflowoperationhandler",
"id": "capture",
"job": "42894",
"max-attempts": "1",
"retry-strategy": "none",
"started": 1409750884085,
"state": "SKIPPED",
"time-in-queue": 0
},
{
"completed": 1409756171224,
"configurations": "",
"description": "Ingest",
"execution-history": "",
"fail-on-error": "true",
"failed-attempts": "0",
"id": "ingest",
"max-attempts": "1",
"retry-strategy": "none",
"state": "SUCCEEDED"
},
{
"completed": 1409854379552,
"configurations": {
"configuration": {
"key": "preserve-flavors"
}
},
"description": "Cleaning up",
"execution-history": "",
"execution-host": "http://some.url.com:8080",
"fail-on-error": "false",
"failed-attempts": "0",
"id": "cleanup",
"job": "45113",
"max-attempts": "1",
"retry-strategy": "none",
"started": 1409854378128,
"state": "SUCCEEDED",
"time-in-queue": 0
}
]
},
"organization": {
"adminRole": "ROLE_ADMIN",
"anonymousRole": "ROLE_ANONYMOUS",
"id": "mh_default_org",
"name": "Opencast Project",
"properties": {
"property": [
{
"$": "true",
"key": "adminui.i18n_tab_episode.enable"
},
{
"$": "false",
"key": "adminui.i18n_tab_users.enable"
},
{
"$": "/engage/ui/img/mh_logos/OpencastLogo.png",
"key": "logo_small"
},
{
"$": "http://opencast.org/matterhorn/",
"key": "engageui.link_mobile_redirect.url"
},
{
"$": "false",
"key": "engageui.annotations.enable"
},
{
"$": "true",
"key": "engageui.links_media_module.enable"
},
{
"$": "2024",
"key": "adminui.chunksize"
},
{
"$": "false",
"key": "adminui.series_prepopulate.enable"
},
{
"$": "true",
"key": "engageui.link_download.enable"
},
{
"$": "false",
"key": "engageui.link_mobile_redirect.enable"
},
{
"$": "For more information have a look at the official site.",
"key": "engageui.link_mobile_redirect.description"
},
{
"$": "/engage/ui/img/mh_logos/MatterhornLogo_large.png",
"key": "logo_large"
}
]
},
"servers": {
"server": {
"name": "localhost",
"port": "8080"
}
}
},
"parent": {
"nil": "true"
},
"state": "SUCCEEDED",
"template": "full",
"title": "Scheduled Workflow"
}
}
}
Here is a jq example that should point you to getting what you want:
#!/bin/bash
# Assuming the json is in a file workflow.json
end_time=$( jq '.workflows.workflow.operations.operation[] | select(.description == "Cleaning up") | .completed' < workflow.json )
start_time=$( jq '.workflows.workflow.operations.operation[] | select(.description == "Ingest") | .completed' < workflow.json )
This is assuming the input you have is in an JSON array called workflow at the top level. Here's this on the command line:
$ jq '.workflows.workflow.operations.operation[] | select(.description == "Ingest") | .completed' < workflow.json
1406051539118
$ jq '.workflows.workflow.operations.operation[] | select(.description == "Cleaning up") | .completed' < workflow.json
1406051695440