How to insert fields into a JSON file using jq? - json

Let's say I have a JSON file recipe.json containing this:
{
"name": "Curried Lentils and Rice",
"ingredients": [
{
"quantity": "1 quart",
"name": "beef broth",
"type": "Misc"
},
{
"quantity": "1 cup",
"name": "dried green lentils",
"type": "Misc"
},
{
"quantity": "1/2 cup",
"name": "basmati rice",
"type": "Misc"
},
{
"quantity": "1 tsp",
"name": "curry powder",
"type": "Condiments"
},
{
"quantity": "1 tsp",
"name": "salt",
"type": "Condiments"
}
],
"steps": [
"Bring broth to a low boil.",
"Add curry powder and salt.",
"Cook lentils for 20 minutes.",
"Add rice and simmer for 20 minutes.",
"Enjoy!"
],
"timers": [
0,
0,
20,
20,
0
],
"imageURL": "http://dagzhsfg97k4.cloudfront.net/wp-content/uploads/2012/05/lentils3.jpg"
}
And I'm trying to insert new JSON fields into the file at specific positions in the file, using jq such as:
"cuisine": "meditaranean"
will become the 2nd entry, and
"meal": "lunch"
will become the 4th entry. So the file after the command is like this:
{
"name": "Curried Lentils and Rice",
"cuisine": "meditaranean"
"ingredients": [
{
"quantity": "1 quart",
"name": "beef broth",
"type": "Misc"
},
{
"quantity": "1 cup",
"name": "dried green lentils",
"type": "Misc"
},
{
"quantity": "1/2 cup",
"name": "basmati rice",
"type": "Misc"
},
{
"quantity": "1 tsp",
"name": "curry powder",
"type": "Condiments"
},
{
"quantity": "1 tsp",
"name": "salt",
"type": "Condiments"
}
],
"meal": "lunch"
"steps": [
"Bring broth to a low boil.",
"Add curry powder and salt.",
"Cook lentils for 20 minutes.",
"Add rice and simmer for 20 minutes.",
"Enjoy!"
],
"timers": [
0,
0,
20,
20,
0
],
"imageURL": "http://dagzhsfg97k4.cloudfront.net/wp-content/uploads/2012/05/lentils3.jpg"
}
My question is how to do this with jq?
Note: This other question addresses performing updates of single fields, while the current question is about inserts. They are as different as orange and yellow bell peppers, which is different! (Don't make me add a picture of bell peppers to this post, I swear I'll do it if I have to.)

With the help of a helper function, the task becomes trivial:
def insert_kv($key; $value; $ix):
to_entries
| .[0:$ix] + [{key: $key, value: $value}] + .[$ix:]
| from_entries;
insert_kv("cuisine"; "mediterranean"; 1)
| insert_kv("meal"; "lunch"; 3)
You could (alternatively or in addition) define:
def insert_kv($object; $ix):
to_entries
| .[0:$ix] + ($object|to_entries) + .[$ix:]
| from_entries;

Where your json is saved to meals.json, executing the following command will result into exactly what you need.
jq '.cusine="mediteranean"' meals.json

Related

Nested json - store values in csv

I am trying to convert a nested json file into csv. It's data from a darts API and the structure is always the same. Nevertheless I got some problems flattening and storing the values in a csv because of the nested structure.
json:
{
"summaries": [{
"sport_event": {
"id": "sr:sport_event:12967512",
"start_time": "2017-11-11T13:15:00+00:00",
"start_time_confirmed": true,
"sport_event_context": {
"sport": {
"id": "sr:sport:22",
"name": "Darts"
},
"category": {
"id": "sr:category:104",
"name": "International"
},
"competition": {
"id": "sr:competition:597",
"name": "Grand Slam of Darts"
},
"season": {
"id": "sr:season:47332",
"name": "Grand Slam of Darts 2017",
"start_date": "2017-11-11",
"end_date": "2017-11-20",
"year": "2017",
"competition_id": "sr:competition:597"
},
"stage": {
"order": 1,
"type": "league",
"phase": "stage_1",
"start_date": "2017-11-11",
"end_date": "2017-11-15",
"year": "2017"
},
"round": {
"number": 1
},
"groups": [{
"id": "sr:league:29766",
"name": "Grand Slam of Darts 2017, Group G",
"group_name": "G"
}]
},
"coverage": {
"live": true
},
"competitors": [{
"id": "sr:competitor:35936",
"name": "Smith, Michael",
"abbreviation": "SMI",
"qualifier": "home"
}, {
"id": "sr:competitor:83895",
"name": "Wilson, James",
"abbreviation": "WIL",
"qualifier": "away"
}]
},
"sport_event_status": {
"status": "closed",
"match_status": "ended",
"home_score": 5,
"away_score": 3,
"winner_id": "sr:competitor:35936"
}
}, {
"sport_event": {
"id": "sr:sport_event:12967508",
"start_time": "2017-11-11T13:40:00+00:00",
"start_time_confirmed": true,
"sport_event_context": {
"sport": {
"id": "sr:sport:22",
"name": "Darts"
},
"category": {
"id": "sr:category:104",
"name": "International"
},
"competition": {
"id": "sr:competition:597",
"name": "Grand Slam of Darts"
},
"season": {
"id": "sr:season:47332",
"name": "Grand Slam of Darts 2017",
"start_date": "2017-11-11",
"end_date": "2017-11-20",
"year": "2017",
"competition_id": "sr:competition:597"
},
"stage": {
"order": 1,
"type": "league",
"phase": "stage_1",
"start_date": "2017-11-11",
"end_date": "2017-11-15",
"year": "2017"
},
"round": {
"number": 1
},
"groups": [{
"id": "sr:league:29764",
"name": "Grand Slam of Darts 2017, Group F",
"group_name": "F"
}]
},
"coverage": {
"live": true
},
"competitors": [{
"id": "sr:competitor:70916",
"name": "Bunting, Stephen",
"abbreviation": "BUN",
"qualifier": "home"
}, {
"id": "sr:competitor:191262",
"name": "de Zwaan, Jeffrey",
"abbreviation": "DEZ",
"qualifier": "away"
}]
},
"sport_event_status": {
"status": "closed",
"match_status": "ended",
"home_score": 5,
"away_score": 4,
"winner_id": "sr:competitor:70916"
}
}
So for each sport_event I would like to store the variables:
"start_time"
from "season" the variable "name"
from "competitors" both "id" and "name"
from "sport_event_status" the "winner_id"
I have already tried to flatten the json file with this code:
import json
f = open(r'path of file.json')
data = json.load(f)
def flatten(data):
for key,value in data.items():
print (str(key)+'->'+str(value))
if type(value) == type(dict()):
flatten(value)
elif type(value) == type(list()):
for val in value:
if type(val) == type(str()):
pass
elif type(val) == type(list()):
pass
else:
flatten(val)
flatten(data)
print(data)
This actually prints out the following:
id->sr:season:47332
name->Grand Slam of Darts 2017
start_date->2017-11-11
end_date->2017-11-20
year->2017
competition_id->sr:competition:597
Now my question is how to store the values I mentioned above in a csv file.
Thanks in advance for your support.
Using jq, you basically just have to transcribe your specification, adding a bit of context and taking care of an embedded array:
.summaries[]
| .sport_event # Your specification:
| [.start_time, # start_time
.sport_event_context.season.name] # from "season" the variable "name"
+ [.competitors[] | .id, .name] # from "competitors" both "id" and "name"
+ [.sport_event_status.winner_id] # from "sport_event_status" the "winner_id"
| #csv
Invocation
E.g.
jq -rf program.jq my.json

How to avoid generating all combinations of selected data while constructing an object?

My original JSON is given below.
[
{
"id": "1",
"name": "AA_1",
"total": "100002",
"files": [
{
"filename": "8665b987ab48511eda9e458046fbc42e.csv",
"filename_original": "some.csv",
"status": "3",
"total": "100002",
"time": "2020-08-24 23:25:49"
}
],
"status": "3",
"created": "2020-08-24 23:25:49",
"filenames": "8665b987ab48511eda9e458046fbc42e.csv",
"is_append": "0",
"is_deleted": "0",
"comment": null
},
{
"id": "4",
"name": "AA_2",
"total": "43806503",
"files": [
{
"filename": "1b4812fe634938928953dd40db1f70b2.csv",
"filename_original": "other.csv",
"status": "3",
"total": "21903252",
"time": "2020-08-24 23:33:43"
},
{
"filename": "63ab85fef2412ce80ae8bd018497d8bf.csv",
"filename_original": "some.csv",
"status": "2",
"total": 0,
"time": "2020-08-24 23:29:30"
}
],
"status": "2",
"created": "2020-08-24 23:35:51",
"filenames": "1b4812fe634938928953dd40db1f70b2.csv&&63ab85fef2412ce80ae8bd018497d8bf.csv",
"is_append": "0",
"is_deleted": "0",
"comment": null
}
]
From this JSON I want to create new objects by combining fields from objects which have status: 2 and their files which also have the same pair, status: 2.
So, I am expecting a JSON array as below.
[
{
"id": "4",
"name": "AA_2",
"file_filename": "63ab85fef2412ce80ae8bd018497d8bf.csv",
"file_status": 2
}
]
So far I tried with this JQ filter:
.[]|select(.status=="2")|[{id:.id,file_filename:.files[].filename,file_status:.files[].status}]
But this produces some invalid data.
[
{
"id": "4", # want to remove this as file.status != 2
"file_filename": "1b4812fe634938928953dd40db1f70b2.csv",
"file_status": "3"
},
{
"id": "4",
"file_filename": "1b4812fe634938928953dd40db1f70b2.csv",
"file_status": "2"
},
{
"id": "4", # Repeat
"file_filename": "63ab85fef2412ce80ae8bd018497d8bf.csv",
"file_status": "3"
},
{
"id": "4", # Repeat
"file_filename": "63ab85fef2412ce80ae8bd018497d8bf.csv",
"file_status": "2"
}
]
How do I filter the new JSON using JQ and remove these duplicate objects?
By applying [] operator to files twice, you're running into a combinatorial explosion. That needs to be avoided, for example:
[ .[] | select(.status == "2") | {id, name} + (.files[] | select(.status == "2") | {file_filename: .filename, file_status: .status}) ]
Online demo

Using jq to filter dataset from The Mathematics Genealogy Project

With this Json Data set.
{
"nodes": [
{
"students": [],
"advisors": [
258
],
"name": "Archie Higdon",
"school": "Iowa State University",
"subject": "74—Mechanics of deformable solids",
"thesis": "Stresses in Moderately Thick Rectangular Plates",
"country": "UnitedStates",
"year": 1936,
"id": 2
},
{
"students": [],
"advisors": [
258
],
"name": "Donald Hill Rock",
"school": "Iowa State University",
"subject": null,
"thesis": "Finite Strain Analysis in Elastic Theory",
"country": "UnitedStates",
"year": 1939,
"id": 3
},
{
"students": [],
"advisors": [
258
],
"name": "William B. Stiles",
"school": "Iowa State University",
"subject": null,
"thesis": "Solutions of Clamped Plated Problems by Means of Functions Derived from Membrane Characteristic Functions",
"country": "UnitedStates",
"year": 1945,
"id": 6
}
]
}
I was wondering how I can use jq in order to find people who graduated from Georgia Tech with a non empty list of students? As in, I want to find all students who graduated from Georgia Tech who went on to advise students themselves.
Assuming simplified json like this which has some values matching the conditions you mention
{
"nodes": [
{ "students": [2, 3], "advisors": [], "name": "Person 1", "school": "Georgia Tech", "id": 1 },
{ "students": [], "advisors": [1], "name": "Person 2", "school": "Georgia Tech", "id": 2 },
{ "students": [], "advisors": [1], "name": "Person 3", "school": "Georgia Tech", "id": 3 }
]
}
You can prune nodes to only people at "Georgia Tech" with
.nodes |= map( select( .school == "Georgia Tech" ) )
You can prune nodes to people with non-empty students with
.nodes |= map( select( .students | length > 0 ) )
You can combine the conditions with and
.nodes |= map(
select( (.school == "Georgia Tech") and (.students | length > 0) )
)
Try it online!

Convert nested json to csv to sheets json api

I'm want to make my json to csv so that i can upload it on google sheets and make it as json api. Whenever i have change data i will just change it on google sheets. But I'm having problems on converting my json file to csv because it changes the variables whenever i convert it. I'm using https://toolslick.com/csv-to-json-converter to convert my json file to csv.
What is the best way to convert json nested to csv ?
JSON
{
"options": [
{
"id": "1",
"value": "Jumbo",
"shortcut": "J",
"textColor": "#FFFFFF",
"backgroundColor": "#00000"
},
{
"id": "2",
"value": "Hot",
"shortcut": "D",
"textColor": "#FFFFFF",
"backgroundColor": "#FFFFFF"
}
],
"categories": [
{
"id": "1",
"order": 1,
"name": "First Category",
"active": true
},
{
"id": "2",
"order": 2,
"name": "Second Category",
"shortcut": "MT",
"active": true
}
],
"products": [
{
"id": "03c6787c-fc2a-4aa8-93a3-5e0f0f98cfb2",
"categoryId": "1",
"name": "First Product",
"shortcut": "First",
"options": [
{
"optionId": "1",
"price": 23
},
{
"optionId": "2",
"price": 45
}
],
"active": true
},
{
"id": "e8669cea-4c9c-431c-84ba-0b014f0f9bc2",
"categoryId": "2",
"name": "Second Product",
"shortcut": "Second",
"options": [
{
"optionId": "1",
"price": 11
},
{
"optionId": "2",
"price": 20
}
],
"active": true
}
],
"discounts": [
{
"id": "1",
"name": "S",
"type": 1,
"amount": 20,
"active": true
},
{
"id": "2",
"name": "P",
"type": 1,
"amount": 20,
"active": true
},
{
"id": "3",
"name": "G",
"type": 2,
"amount": 5,
"active": true
}
]
}
Using python, this can be easily done or almost done. Maybe this code will help you in some way to understand that.
import json,csv
data = []
with open('your_json_file_here.json') as file:
for line in file:
data.append(json.loads(line))
length = len(data)
with open('create_new_file.csv','w') as f:
writer = csv.writer(f)
writers = csv.DictWriter(f, fieldnames=['header1','header2'])
writers.writeheader()
for iter in range(length):
writer.writerow((data[iter]['specific_col_name1'],data[iter]['specific_col_name2']))
f.close()

How to form JSON path

I have employers array as below; how to get employers:id and featuredReview:id using JSON expression.
"employers": [
{
"id": 194,
"name": "Target",
"website": "www.target.com",
"isEEP": false,
"exactMatch": false,
"industry": "Department, Clothing, & Shoe Stores",
"numberOfRatings": 11531,
"squareLogo": "http://media.glassdoor.com/sqll/194/target-squarelogo.png",
"overallRating": 3.2,
"ratingDescription": "OK",
"cultureAndValuesRating": "3.3",
"seniorLeadershipRating": "2.8",
"compensationAndBenefitsRating": "3.0",
"careerOpportunitiesRating": "3.0",
"workLifeBalanceRating": "3.0",
"recommendToFriendRating": "0.6",
"featuredReview": {
"id": 6613365,
"currentJob": false,
"reviewDateTime": "2015-05-15 16:32:06.997",
"jobTitle": "Executive Team Leader",
"location": "Buena Park, CA",
"jobTitleFromDb": "Executive Team Leader",
"headline": "Unrealistic expectations for leadership",
"overall": 4,
"overallNumeric": 4
},
"ceo": {
"name": "Brian Cornell",
"title": "CEO",
"numberOfRatings": 1127,
"pctApprove": 66,
"pctDisapprove": 34
}
}]
employers[0].id
employers[0].featuredReview.id