How to extract specific data from JSON file - json

I have this database http://ashleyw.co.uk/project/food-nutrient-database, and I would like to extract the group Dairy and Egg Products into a separate Json file.
Also, within the group some of the data is triplicated. For example:
{
"id": 1008,
"description": "Cheese, caraway",
"tags": [],
"manufacturer": "",
"group": "Dairy and Egg Products",
"portions": [
{
"amount": 1,
"unit": "oz",
"grams": 28.35
}
],
"nutrients": [
{
"value": 25.18,
"units": "g",
"description": "Protein",
"group": "Composition"
},
{
"value": 29.2,
"units": "g",
"description": "Total lipid (fat)",
"group": "Composition"
},
{
"value": 3.06,
"units": "g",
"description": "Carbohydrate, by difference",
"group": "Composition"
},
{
"value": 25.18,
"units": "g",
"description": "Protein",
"group": "Composition"
},
{
"value": 29.2,
"units": "g",
"description": "Total lipid (fat)",
"group": "Composition"
},
{
"value": 3.06,
"units": "g",
"description": "Carbohydrate, by difference",
"group": "Composition"
},
{
"value": 25.18,
"units": "g",
"description": "Protein",
"group": "Composition"
},
{
"value": 29.2,
"units": "g",
"description": "Total lipid (fat)",
"group": "Composition"
},
{
"value": 3.06,
"units": "g",
"description": "Carbohydrate, by difference",
"group": "Composition"
}
]
}
Every child of the nutrients node is triplicated. How can the extras be stripped away?

Using Python:
import json
with open("data.json") as f, open("nutrients.json", "w") as g:
d = json.loads(f.read())
json.dump(d["nutrients"], g)

Related

Metadata converter?

Im currently working on a generated collection and got a huge metadata file. Its build up like this:
[
{
"name": "amongnfts #1",
"description": "This is one member of the AmongNFT collection. It's unique and only exists once, but may be a bit cooler than the others.",
"image": "/1.png",
"dna": "055b93fd187e52f85e3ea8b0ad660fa51aaea9e0",
"edition": 1,
"date": 1645368881553,
"attributes": [
{
"trait_type": "background",
"value": "blue"
},
{
"trait_type": "body",
"value": "pink"
},
{
"trait_type": "clothing",
"value": "teamred"
},
{
"trait_type": "hat",
"value": "pilot"
},
{
"trait_type": "visor",
"value": "krieghaus"
},
{
"trait_type": "pet",
"value": "minimateorange"
}
],
"compiler": "HashLips Art Engine"
},
{
"name": "amongnfts #2",
"description": "This is one member of the AmongNFT collection. It's unique and only exists once, but may be a bit cooler than the others.",
"image": "/2.png",
"dna": "a05e9acf4665b9e9e5adbcb9dfc33df255e2e58f",
"edition": 2,
"date": 1645368883622,
"attributes": [
{
"trait_type": "background",
"value": "orange"
},
{
"trait_type": "body",
"value": "white"
},
{
"trait_type": "clothing",
"value": "elf"
},
{
"trait_type": "hat",
"value": "wethair"
},
{
"trait_type": "visor",
"value": "none"
},
{
"trait_type": "pet",
"value": "minimatetan"
}
],
"compiler": "HashLips Art Engine"
},
]
(Goes down for 10.000 editions so thats why I dont do it manually.)
But it should look like this:
{
"nft": [
{
"file_path": "/1.png",
"nft_name": "amongnfts #1",
"external_link": "",
"description": "This is one member of the AmongNFT collection. It's unique and only exists once, but may be a bit cooler than the others.",
"collection": "AmongNFTs",
"properties": [
{
"type": "background",
"name": "blue"
},
{
"type": "body",
"name": "pink"
},
{
"type": "clothing",
"name": "teamred"
},
{
"type": "hat",
"name": "pilot"
},
{
"type": "visor",
"name": "krieghaus"
},
{
"type": "pet",
"name": "minimateorange"
},
],
"levels": "",
"stats": "",
"unlockable_content": "",
"explicit_and_sensitive_content": "",
"supply": "1",
"blockchain": "Polygon",
"sale_type": "",
"price": 0.001,
"method": "",
"duration": "",
"specific_buyer": "",
"quantity": ""
}
]
}
Is there an easy way of converting them automatically and if so, how would the code look like?
The metadata.json file is here.

How to sum values in json using jq

I just started using jq parser as i need to parse some values out of a json file and make few changes and output an updated json file with the changes. Here is one of the samples out of many i'm working on.
input json:
{
"name": ".",
"metadata": {
"path": ".",
"type": "terraform_dir",
"terraformWorkspace": "default"
},
"pastBreakdown": {
"resources": [],
"totalHourlyCost": "0",
"totalMonthlyCost": "0"
},
"breakdown": {
"resources": [
{
"name": "vm01",
"metadata": {},
"hourlyCost": "0.007989726027400584",
"monthlyCost": 3.9660999999999986,
"costComponents": [
{
"name": "vm01-201",
"unit": "years",
"hourlyQuantity": "0.0001141552511416",
"monthlyQuantity": "0.0833333333333333",
"price": "69.99",
"hourlyCost": "0.007989726027400584",
"monthlyCost": "5.832499999999997667"
}
]
},
{
"name": "public_ip.example",
"metadata": {},
"hourlyCost": "0.005",
"monthlyCost": 2.482,
"costComponents": [
{
"name": "IP address (static)",
"unit": "hours",
"hourlyQuantity": "1",
"monthlyQuantity": "730",
"price": "0.005",
"hourlyCost": "0.005",
"monthlyCost": "3.65"
}
]
},
{
"name": "storage_account",
"metadata": {},
"hourlyCost": "0.17575342465753424425",
"monthlyCost": 87.24400000000001,
"costComponents": [
{
"name": "Write",
"unit": "10K operations",
"hourlyQuantity": "0.136986301369863",
"monthlyQuantity": "100",
"price": "0.1",
"hourlyCost": "0.0136986301369863",
"monthlyCost": "10"
},
{
"name": "List",
"unit": "10K operations",
"hourlyQuantity": "0.136986301369863",
"monthlyQuantity": "100",
"price": "0.05",
"hourlyCost": "0.00684931506849315",
"monthlyCost": "5"
},
{
"name": "Read",
"unit": "10K operations",
"hourlyQuantity": "0.0136986301369863",
"monthlyQuantity": "10",
"price": "0.01",
"hourlyCost": "0.000136986301369863",
"monthlyCost": "0.1"
},
{
"name": "All other operations",
"unit": "10K operations",
"hourlyQuantity": "0.136986301369863",
"monthlyQuantity": "100",
"price": "0.004",
"hourlyCost": "0.000547945205479452",
"monthlyCost": "0.4"
},
{
"name": "retrieval",
"unit": "GB",
"hourlyQuantity": "1.3698630136986301",
"monthlyQuantity": "1000",
"price": "0.01",
"hourlyCost": "0.013698630136986301",
"monthlyCost": "10"
},
{
"name": "write",
"unit": "GB",
"hourlyQuantity": "1.3698630136986301",
"monthlyQuantity": "1000",
"price": "0.0025",
"hourlyCost": "0.00342465753424657525",
"monthlyCost": "2.5"
},
{
"name": "index",
"unit": "10K tags",
"hourlyQuantity": "0.0136986301369863",
"monthlyQuantity": "10",
"price": "0.03",
"hourlyCost": "0.000410958904109589",
"monthlyCost": "0.3"
}
]
}
],
"totalMonthlyCost": "sum value"
From the above json i need to sum the monthlyCost of each resources and update total value in totalMonthlyCost
So the input would be "3.9660999999999986 + 2.482 + 87.24400000000001" the sum of these values should be updated in totalMonthlyCost
I have been trying various options but no luck. I could calculate the values within the array
but not sure how to update the value in totalMonthlyCost
In two steps in accordance with the task description:
(.breakdown.resources | map(.monthlyCost) | add) as $sum
| .breakdown.totalMonthlyCost = $sum
This can be written without the intermediate variable:
.breakdown.totalMonthlyCost
= (.breakdown.resources | map(.monthlyCost) | add)
or more DRYly:
.breakdown
|= (.totalMonthlyCost = (.resources | map(.monthlyCost) | add))

Flatten a JSON document using jq by filtering an array by keys

I have a JSON in the following format:
{
"subFields": [
{
"id": "question_1",
"type": "radioGroup",
"description": "Description1",
"title": "title1",
"subFields": [
{
"type": "radio",
"label": "Yes",
"value": 1
},
{
"type": "radio",
"label": "No",
"value": 0
},
{
"uiComponent": "SmallContent",
"componentProps": {
"text": "* If the answer to the above question is “Yes”, please contact the Support immediately."
}
}
]
},
{
"uiComponent": "Spacer"
},
{
"id": "question_2",
"type": "radioGroup",
"description": "Description2",
"title": "Title2",
"subFields": [
{
"type": "radio",
"label": "Label - Value 1",
"value": 1
},
{
"type": "radio",
"label": "Label - Value 2",
"value": 2
},
{
"type": "radio",
"label": "Label - Value 3",
"value": 3
},
{
"type": "radio",
"label": "Other",
"value": 13,
"subFields": [
{
"id": "question_2a",
"type": "string",
"condition": {
"type": "BinaryExpression",
"operator": "==",
"left": {
"type": "Identifier",
"name": "question_2"
},
"right": {
"type": "Literal",
"value": 13
}
}
}
]
}
]
},
{
"id": "question_2_b",
"style": {
"marginTop": "30px"
},
"type": "radioGroup",
"description": "Description3",
"title": "",
"subFields": [
{
"type": "radio",
"label": "Label - Radio 1",
"value": 1
},
{
"type": "radio",
"label": "Label - Radio 2",
"value": 2
},
{
"type": "radio",
"label": "Label - Radio 3",
"value": 3
}
]
},
{
"uiComponent": "Spacer"
},
{
"id": "question_3",
"type": "radioGroup",
"description": "Description3",
"title": "Title3",
"subFields": [
{
"type": "radio",
"label": "Yes",
"value": 1
},
{
"type": "radio",
"label": "No",
"value": 0
}
]
},
{
"uiComponent": "Spacer"
},
{
"condition": {
"type": "BinaryExpression",
"operator": "==",
"left": {
"type": "Identifier",
"name": "signer_type"
},
"right": {
"type": "Literal",
"value": "entity"
}
},
"subFields": [
{
"uiComponent": "Spacer"
},
{
"id": "question_4",
"type": "radioGroup",
"description": "Description_4",
"title": "Title_4",
"subFields": [
{
"type": "radio",
"label": "Yes",
"value": 1
},
{
"type": "radio",
"label": "No",
"value": 0
}
]
},
{
"uiComponent": "Spacer"
}
],
"uiComponent": "Block"
},
{
"uiComponent": "Spacer"
}
],
"uiComponent": "Container"
}
and I would like to generate the following output:
[
{
"id": "question_1",
"title": "title1",
"description": "Description1",
"type": "radioGroup",
"questions": "radio,Yes,1"
},
{
"id": "question_1",
"title": "title1",
"description": "Description1",
"type": "radioGroup",
"questions": "radio,No,0"
},
{
"id": "question_2",
"title": "Title2",
"description": "Description2",
"type": "radioGroup",
"questions": "radio,Label - Value 1,1"
},
{
"id": "question_2",
"title": "Title2",
"description": "Description2",
"type": "radioGroup",
"questions": "radio,Label - Value 2,2"
},
{
"id": "question_2",
"title": "Title2",
"description": "Description2",
"type": "radioGroup",
"questions": "radio,Label - Value 3,3"
},
{
"id": "question_2_b",
"title": "",
"description": "Description3",
"type": "radioGroup",
"questions": "radio,Label - Value 1,1"
},
{
"id": "question_2_b",
"title": "",
"description": "Description3",
"type": "radioGroup",
"questions": "radio,Label - Value 2,2"
},
{
"id": "question_2_b",
"title": "",
"description": "Description3",
"type": "radioGroup",
"questions": "radio,Label - Value 3,3"
},
{
"id": "question_3",
"title": "Title3",
"description": "Description3",
"type": "radioGroup",
"questions": "radio,Yes,1"
},
{
"id": "question_3",
"title": "Title3",
"description": "Description3",
"type": "radioGroup",
"questions": "radio,No,0"
}
]
or in alternative a reduced version:
[
"question_1",
"title1",
"Description1",
"radioGroup",
"radio,Yes,1",
"radio,No,0"
],
[
"question_2",
"title2",
"Description2",
"radioGroup",
"radio,Label - Value 1,1",
"radio,Label - Value 2,2",
"radio,Label - Value 3,3",
],
[
"question_2_b",
"Description3",
"radioGroup",
"radio,Label - Value 1,1",
"radio,Label - Value 2,2",
"radio,Label - Value 3,3",
],
[
"question_3",
"Title3",
"Description3",
"radioGroup",
"radio,Yes,1",
"radio,No,0"
]
The objective is to get only the objects that contain the id (to remove the {"uiComponent": "Spacer"} objects) and get only the subFields with these tags inside the array:
"subFields": [
{
"type": "xxxx",
"label": "xxxx",
"value": xxxx
},
I was able to flatten the JSON array by using the following JQ pattern:
jq play 1
.subFields[] | select(has("id") and .id != null)| {id: .id, type: .type, description: .description, anwers: .subFields}
and generated this result:
{
"id": "question_1",
"type": "radioGroup",
"description": "Description1",
"anwers": [
{
"type": "radio",
"label": "Yes",
"value": 1
},
{
"type": "radio",
"label": "No",
"value": 0
},
{
"uiComponent": "SmallContent",
"componentProps": {
"text": "* If the answer to the above question is “Yes”, please contact the Support immediately."
}
}
]
}
{
"id": "question_2",
"type": "radioGroup",
"description": "Description2",
"anwers": [
{
"type": "radio",
"label": "Label - Value 1",
"value": 1
},
{
"type": "radio",
"label": "Label - Value 2",
"value": 2
},
{
"type": "radio",
"label": "Label - Value 3",
"value": 3
},
{
"type": "radio",
"label": "Other",
"value": 13,
"subFields": [
{
"id": "question_2a",
"type": "string",
"condition": {
"type": "BinaryExpression",
"operator": "==",
"left": {
"type": "Identifier",
"name": "question_2"
},
"right": {
"type": "Literal",
"value": 13
}
}
}
]
}
]
}
{
"id": "question_2_b",
"type": "radioGroup",
"description": "Description3",
"anwers": [
{
"type": "radio",
"label": "Label - Radio 1",
"value": 1
},
{
"type": "radio",
"label": "Label - Radio 2",
"value": 2
},
{
"type": "radio",
"label": "Label - Radio 3",
"value": 3
}
]
}
{
"id": "question_3",
"type": "radioGroup",
"description": "Description3",
"anwers": [
{
"type": "radio",
"label": "Yes",
"value": 1
},
{
"type": "radio",
"label": "No",
"value": 0
}
]
}
My problem is that I don't know how to remove these sections:
{
"uiComponent": "SmallContent",
"componentProps": {
"text": "* If the answer to the above question is “Yes”, please contact the Support immediately."
}
}
and
"subFields": [
{
"id": "question_2a",
"type": "string",
"condition": {
"type": "BinaryExpression",
"operator": "==",
"left": {
"type": "Identifier",
"name": "question_2"
},
"right": {
"type": "Literal",
"value": 13
}
}
}
]
I played a little bit arround with this jq for the question_3 only:
jq play 2
.subFields[] | {id: .id, title: .title, description: .description, type: .type, subFields: .subFields} | select(has("id") and .id != null) | select(.id=="question_3") | {id: .id, title: .title, description: .description, type: .type, questions: (.subFields[]|join(","))}
and produced this result:
{
"id": "question_3",
"title": "Title3",
"description": "Description3",
"type": "radioGroup",
"questions": "radio,Yes,1"
}
{
"id": "question_3",
"title": "Title3",
"description": "Description3",
"type": "radioGroup",
"questions": "radio,No,0"
}
and also
jq play 3
.subFields[] | {id: .id, title: .title, description: .description, type: .type, subFields: .subFields} | select(has("id") and .id != null) | select(.id=="question_3") | [.id, .title, .description, .type, (.subFields[]|join(","))]
resulting on this:
[
"question_3",
"Title3",
"Description3",
"radioGroup",
"radio,Yes,1",
"radio,No,0"
]
Can you help me improve those JQ pattern I created to get the intended results?
Thanks in advance!
The following produces your first alternative:
.subFields[]
| select(.id?)
| { id, title, description, type} +
(.subFields[]
| select(.type?)
| [.type,.label,.value] | join(",")
| { questions: .} )
Notice the two select() filters.
The key names are specified explicitly here to ensure the ordering you specified is honored.

How to extract the path info?

For the following JSON, I'd like to extract something like this ( is a TAB character).
CHROMOSOMES<TAB>HUMAN<TAB>1<TAB>1
...
STATUSES<TAB>name<TAB>Approved
...
ATTRIBUTES<TAB>HGNC<TAB>HGNC ID<TAB>gd_hgnc_id
...
ATTRIBUTES<TAB>EXTERNAL<TAB>NCBI Gene ID<TAB>md_eg_id<TAB>NCBI
...
ORDER_BY<TAB>HGNC ID<TAB>gd_hgnc_id
...
I'd like a smart way to extract the path info of this tree structure. Could you anybody show me the best way to do so? Thanks.
{
"CHROMOSOMES": {
"HUMAN": [
{
"name": "1",
"value": "1"
},
{
"name": "2",
"value": "2"
},
{
"name": "3",
"value": "3"
},
{
"name": "4",
"value": "4"
},
{
"name": "5",
"value": "5"
},
{
"name": "6",
"value": "6"
},
{
"name": "7",
"value": "7"
},
{
"name": "8",
"value": "8"
},
{
"name": "9",
"value": "9"
},
{
"name": "10",
"value": "10"
},
{
"name": "11",
"value": "11"
},
{
"name": "12",
"value": "12"
},
{
"name": "13",
"value": "13"
},
{
"name": "14",
"value": "14"
},
{
"name": "15",
"value": "15"
},
{
"name": "16",
"value": "16"
},
{
"name": "17",
"value": "17"
},
{
"name": "18",
"value": "18"
},
{
"name": "19",
"value": "19"
},
{
"name": "20",
"value": "20"
},
{
"name": "21",
"value": "21"
},
{
"name": "22",
"value": "22"
},
{
"name": "X",
"value": "X"
},
{
"name": "Y",
"value": "Y"
},
{
"name": "reserved loci",
"value": "reserved"
},
{
"name": "mitochondrial",
"value": "mito"
},
{
"name": "pseudoautosomal",
"value": "XandY"
}
]
},
"STATUSES": [
{
"name": "Approved",
"value": "Approved"
},
{
"name": "Entry and symbol withdrawn",
"value": "Entry Withdrawn"
}
],
"ATTRIBUTES": {
"HGNC": [
{
"name": "HGNC ID",
"value": "gd_hgnc_id"
},
{
"name": "Approved symbol",
"value": "gd_app_sym"
},
{
"name": "Approved name",
"value": "gd_app_name"
},
{
"name": "Status",
"value": "gd_status"
},
{
"name": "Locus type",
"value": "gd_locus_type"
},
{
"name": "Locus group",
"value": "gd_locus_group"
},
{
"name": "Previous symbols",
"value": "gd_prev_sym"
},
{
"name": "Previous name",
"value": "gd_prev_name"
},
{
"name": "Synonyms",
"value": "gd_aliases"
},
{
"name": "Name synonyms",
"value": "gd_name_aliases"
},
{
"name": "Chromosome",
"value": "gd_pub_chrom_map"
},
{
"name": "Date approved",
"value": "gd_date2app_or_res"
},
{
"name": "Date modified",
"value": "gd_date_mod"
},
{
"name": "Date symbol changed",
"value": "gd_date_sym_change"
},
{
"name": "Date name changed",
"value": "gd_date_name_change"
},
{
"name": "Accession numbers",
"value": "gd_pub_acc_ids"
},
{
"name": "Enzyme IDs",
"value": "gd_enz_ids"
},
{
"name": "NCBI Gene ID",
"value": "gd_pub_eg_id"
},
{
"name": "Ensembl gene ID",
"value": "gd_pub_ensembl_id"
},
{
"name": "Mouse genome database ID",
"value": "gd_mgd_id"
},
{
"name": "Specialist database links",
"value": "gd_other_ids"
},
{
"name": "Specialist database IDs",
"value": "gd_other_ids_list"
},
{
"name": "Pubmed IDs",
"value": "gd_pubmed_ids"
},
{
"name": "RefSeq IDs",
"value": "gd_pub_refseq_ids"
},
{
"name": "Gene group ID",
"value": "family.id"
},
{
"name": "Gene group name",
"value": "family.name"
},
{
"name": "CCDS IDs",
"value": "gd_ccds_ids"
},
{
"name": "Vega IDs",
"value": "gd_vega_ids"
},
{
"name": "Locus specific databases",
"value": "gd_lsdb_links"
}
],
"EXTERNAL": [
{
"name": "NCBI Gene ID",
"source": "NCBI",
"value": "md_eg_id"
},
{
"name": "OMIM ID",
"source": "OMIM",
"value": "md_mim_id"
},
{
"name": "RefSeq",
"source": "NCBI",
"value": "md_refseq_id"
},
{
"name": "UniProt ID",
"source": "UniProt",
"value": "md_prot_id"
},
{
"name": "Ensembl ID",
"source": "Ensembl",
"value": "md_ensembl_id"
},
{
"name": "Vega ID",
"source": "Vega",
"value": "md_vega_id"
},
{
"name": "UCSC ID",
"source": "UCSC",
"value": "md_ucsc_id"
},
{
"name": "Mouse genome database ID",
"source": "MGI",
"value": "md_mgd_id"
},
{
"name": "Rat genome database ID",
"source": "RGD",
"value": "md_rgd_id"
},
{
"name": "LNCipedia",
"source": "LNCipedia",
"value": "md_lncipedia"
},
{
"name": "GtRNAdb",
"source": "GtRNAdb",
"value": "md_gtrnadb"
}
]
},
"ORDER_BY": [
{
"name": "HGNC ID",
"value": "gd_hgnc_id"
},
{
"name": "Approved symbol",
"value": "gd_app_sym_sort"
},
{
"name": "Approved name",
"value": "gd_app_name"
},
{
"name": "Status",
"value": "gd_status"
},
{
"name": "Locus type",
"value": "gd_locus_type"
},
{
"name": "Locus group",
"value": "gd_locus_group"
},
{
"name": "Previous symbols",
"value": "gd_prev_sym"
},
{
"name": "Previous name",
"value": "gd_prev_name"
},
{
"name": "Synonyms",
"value": "gd_aliases"
},
{
"name": "Name synonyms",
"value": "gd_name_aliases"
},
{
"name": "Chromosome",
"value": "gd_pub_chrom_map_sort"
},
{
"name": "Date approved",
"value": "gd_date2app_or_res"
},
{
"name": "Date modified",
"value": "gd_date_mod"
},
{
"name": "Date symbol changed",
"value": "gd_date_sym_change"
},
{
"name": "Date name changed",
"value": "gd_date_name_change"
},
{
"name": "Accession numbers",
"value": "gd_pub_acc_ids"
},
{
"name": "Enzyme IDs",
"value": "gd_enz_ids"
},
{
"name": "NCBI Gene ID",
"value": "gd_pub_eg_id"
},
{
"name": "Ensembl gene ID",
"value": "gd_pub_ensembl_id"
},
{
"name": "Mouse genome database ID",
"value": "gd_mgd_id"
},
{
"name": "Specialist database links",
"value": "gd_other_ids"
},
{
"name": "Specialist database IDs",
"value": "gd_other_ids_list"
},
{
"name": "Pubmed IDs",
"value": "gd_pubmed_ids"
},
{
"name": "RefSeq IDs",
"value": "gd_pub_refseq_ids"
},
{
"name": "Gene group ID",
"value": "family.id"
},
{
"name": "Gene group name",
"value": "family.name"
},
{
"name": "CCDS IDs",
"value": "gd_ccds_ids"
},
{
"name": "Vega IDs",
"value": "gd_vega_ids"
},
{
"name": "Locus specific databases",
"value": "gd_lsdb_links"
},
{
"name": "NCBI Gene ID (supplied by NCBI)",
"value": "md_eg_id"
},
{
"name": "OMIM ID (supplied by OMIM)",
"value": "md_mim_id"
},
{
"name": "RefSeq (supplied by NCBI)",
"value": "md_refseq_id"
},
{
"name": "UniProt ID (supplied by UniProt)",
"value": "md_prot_id"
},
{
"name": "Ensembl ID (supplied by Ensembl)",
"value": "md_ensembl_id"
},
{
"name": "Vega ID (supplied by Vega)",
"value": "md_vega_id"
},
{
"name": "UCSC ID (supplied by UCSC)",
"value": "md_ucsc_id"
},
{
"name": "Mouse genome database ID (supplied by MGI)",
"value": "md_mgd_id"
},
{
"name": "Rat genome database ID (supplied by RGD)",
"value": "md_rgd_id"
},
{
"name": "LNCipedia ID (supplied by LNCipedia)",
"value": "md_lncipedia"
},
{
"name": "GtRNAdb ID (supplied by GtRNAdb)",
"value": "md_gtrnadb"
}
],
"OUTPUT": [
"Text",
"Make URL for text"
]
}
I'd like a smart way to extract the path info of this tree structure.
paths is your friend.
Given certain irregularities in the input, the exact requirements are
not always clear, but the following might be what you are looking for
and even if not, it would be easy to tweak in accordance with your
detailed requirements.
totsv.jq
def s: map(select(type=="string"));
paths as $p
| getpath($p)
| if type == "object" and has("name")
then ($p|s) + [.name, .value, (.source // empty)]
elif type == "array" and .[0] == "Text" then ($p|s) + .
else empty
end
| #tsv
Invocation
jq -crf totsv.jq chromosomes.json
Selection from output
CHROMOSOMES HUMAN 1 1
CHROMOSOMES HUMAN 2 2
...
STATUSES Approved Approved
STATUSES Entry and symbol withdrawn Entry Withdrawn
ATTRIBUTES HGNC HGNC ID gd_hgnc_id
...
ORDER_BY GtRNAdb ID (supplied by GtRNAdb) md_gtrnadb
OUTPUT Text Make URL for text
For future reference
Rather than give a very long sample input, it would be better
to give a small sample that is tightly woven with detailed requirements.

Convert nested json to csv to sheets json api

I'm want to make my json to csv so that i can upload it on google sheets and make it as json api. Whenever i have change data i will just change it on google sheets. But I'm having problems on converting my json file to csv because it changes the variables whenever i convert it. I'm using https://toolslick.com/csv-to-json-converter to convert my json file to csv.
What is the best way to convert json nested to csv ?
JSON
{
"options": [
{
"id": "1",
"value": "Jumbo",
"shortcut": "J",
"textColor": "#FFFFFF",
"backgroundColor": "#00000"
},
{
"id": "2",
"value": "Hot",
"shortcut": "D",
"textColor": "#FFFFFF",
"backgroundColor": "#FFFFFF"
}
],
"categories": [
{
"id": "1",
"order": 1,
"name": "First Category",
"active": true
},
{
"id": "2",
"order": 2,
"name": "Second Category",
"shortcut": "MT",
"active": true
}
],
"products": [
{
"id": "03c6787c-fc2a-4aa8-93a3-5e0f0f98cfb2",
"categoryId": "1",
"name": "First Product",
"shortcut": "First",
"options": [
{
"optionId": "1",
"price": 23
},
{
"optionId": "2",
"price": 45
}
],
"active": true
},
{
"id": "e8669cea-4c9c-431c-84ba-0b014f0f9bc2",
"categoryId": "2",
"name": "Second Product",
"shortcut": "Second",
"options": [
{
"optionId": "1",
"price": 11
},
{
"optionId": "2",
"price": 20
}
],
"active": true
}
],
"discounts": [
{
"id": "1",
"name": "S",
"type": 1,
"amount": 20,
"active": true
},
{
"id": "2",
"name": "P",
"type": 1,
"amount": 20,
"active": true
},
{
"id": "3",
"name": "G",
"type": 2,
"amount": 5,
"active": true
}
]
}
Using python, this can be easily done or almost done. Maybe this code will help you in some way to understand that.
import json,csv
data = []
with open('your_json_file_here.json') as file:
for line in file:
data.append(json.loads(line))
length = len(data)
with open('create_new_file.csv','w') as f:
writer = csv.writer(f)
writers = csv.DictWriter(f, fieldnames=['header1','header2'])
writers.writeheader()
for iter in range(length):
writer.writerow((data[iter]['specific_col_name1'],data[iter]['specific_col_name2']))
f.close()