I'm building a windows application using vb.net to take a Json file that will be downloaded daily and parse it and enter the values into a SQL database. The only part I'm getting stuck on is being about to parse the Json data to get the values I need.
Public Sub json_parse(ByVal result_json As String)
Try
Dim json As String = result_json
Dim ser As JObject = JObject.Parse(json)
Dim data As List(Of JToken) = ser.Children().ToList
For Each item As JProperty In data
item.CreateReader()
Select Case item.Name
Case "data"
For Each msg As JObject In item.Values
MsgBox(msg.ToString)
Next
End Select
Next
Catch __unusedException1__ As Exception
'MsgBox(__unusedException1__.ToString)
End Try
End Sub
{
"data": {
"wsj_mdstrip_na,us": {
"set": 1562756496071,
"ttl": 300000,
"data": {
"id": "na,us",
"type": "wsj_mdstrip",
"data": [
{
"timeZone": "CDT",
"localTimeZoneTimestamp": "5:51 AM CDT 07/10/19",
"localTimestamp": "5:51 AM 07/10/19",
"etTimestamp": "6:51 AM ET 07/10/19",
"mdTimestamp": "2019-07-10T06:51:35-04:00",
"timestamp": "2019-07-10T05:51:35.365",
"chartingSymbol": "Future/US/XCBT/YM00",
"updatedTimestamp": "2019-07-10T07:01:36-04:00"
},
{
"deltaBarPosNeg": "deltaBar-neg",
"etTimestamp": "6:51 AM ET 07/10/19",
"chartingSymbol": "Future/US/XCME/ES00",
"pastCloses": [
{
"range": "P1Y",
"price": 2775.75
},
{
"range": "P3Y",
"price": 2145.75
}
]
}
]
}
},
"wsj_nav_na,us": {
"set": 1562754843231,
"ttl": 1800000,
"isStale": false
},
"mdc_cashprices_": {
"set": 1562756402351,
"ttl": 90000,
"data": {
"id": "{\"requestedCommodities\":\"all\",\"groupIntoMapBy\":\"type\",\"groupIntoSetsBy\":\"subType\"}",
"type": "mdc_cashprices",
"data": {
"instruments": [
{
"code": null,
"djShortName": "Gold-EnglehardFab",
"instrumentID": "511498",
"name": "Engelhard fabricated products",
"subType": "Gold, per troy oz",
"type": "Precious metals",
"ask": "-",
"bid": "-",
"last": "1500.72",
"previousDay": "1509.33",
"previousYear": "1351.58"
},
{
"code": null,
"djShortName": "Silver-EngelhrdFab",
"instrumentID": "511558",
"name": "Engelhard fabricated products",
"subType": "Silver, troy oz.",
"type": "Precious metals",
"ask": "-",
"bid": "-",
"last": "18.0600",
"previousDay": "18.0960",
"previousYear": "19.3200"
},
{
"code": null,
"djShortName": "SilverHandy&HarmB",
"instrumentID": "511560",
"name": "Handy & Harman base price",
"subType": "Silver, troy oz.",
"type": "Precious metals",
"ask": "-",
"bid": "-",
"last": "15.1070",
"previousDay": "15.0420",
"previousYear": "16.1000"
},
{
"code": null,
"djShortName": "Platinum-EngelhFab",
"instrumentID": "511554",
"name": "Platinum, Engelhard fabricated products",
"subType": "Other precious metals",
"type": "Precious metals",
"ask": "-",
"bid": "-",
"last": "912.0",
"previousDay": "914.0",
"previousYear": "946.0"
},
{
"code": null,
"djShortName": "Palladium-EngelInd",
"instrumentID": "511553",
"name": "Palladium, Engelhard industrial bullion",
"subType": "Other precious metals",
"type": "Precious metals",
"ask": "-",
"bid": "-",
"last": "1561.0",
"previousDay": "1573.0",
"previousYear": "955.0"
},
{
"code": null,
"djShortName": "Palladium-EngelFab",
"instrumentID": "511552",
"name": "Palladium, Engelhard fabricated products",
"subType": "Other precious metals",
"type": "Precious metals",
"ask": "-",
"bid": "-",
"last": "1661.0",
"previousDay": "1673.0",
"previousYear": "1055.0"
}
],
"timestamp": "7/09/19"
}
}
},
"consumer_corphat_corphat": {
"set": 1562756495973,
"ttl": 90000,
"data": {
"id": "corphat",
"type": "consumer_corphat",
"data": [
{
"ncLinks": [
{
"title": "Big Decisions",
"url": "http://www.bigdecisions.com/",
"nofollow": "false"
},
{
"title": "Business Spectator",
"url": "https://www.businessspectator.com.au/",
"nofollow": "false"
}
]
},
{
"djLinks": [
{
"title": "Barron's",
"url": "https://www.barrons.com",
"nofollow": "false"
},
{
"title": "BigCharts",
"url": "http://bigcharts.marketwatch.com",
"nofollow": "false"
}
]
}
]
},
"isStale": false
}
},
"currentState": {
"data": [
"0.0.3.0.0",
"0.0.3.1.0.0.2"
],
"components": {
"code___decoratedComponent___275181c7-8620-4df3-a008-d0cd9937db22___WSJTheme---WSJBase---WSJForms---WSJTables": {
"id": "275181c7-8620-4df3-a008-d0cd9937db22",
"decorators": [
"WSJTheme",
"WSJBase",
"WSJForms",
"WSJTables"
]
},
"code___decoratedComponent___c8882c9c-15d3-4d1f-9b0e-81b6f321365d___WSJTheme---WSJBase---WSJForms---WSJTables": {
"id": "c8882c9c-15d3-4d1f-9b0e-81b6f321365d",
"decorators": [
"WSJTheme",
"WSJBase",
"WSJForms",
"WSJTables"
]
}
}
}
}
I'm trying to get the values for "last", "previousDay" and "previousYear" from:
data > mdc_cashprices_ > data > instruments > (where djShortName = "Gold-EnglehardFab" and djShortName = "Silver-EngelhrdFab" and djShortName = "Platinum-EngelhFab" and djShortName = "Palladium-EngelFab")
I'm also trying to get the value for "timestamp" from:
data > mdc_cashprices_ > data > timestamp
In my current code I can only get to the 1st "data" tier.
Any help would be greatly appreciated.
Use something like https://jsonformatter.curiousconcept.com/ to view it more easily.
Then simply
Dim Something as JObject = JObject.Parse(<your JSON>)
Then get what you want with
Something1 = Something("data")("mdc_cashprices")("data")("data")("instruments")(0)("last") 'basically you just traverse the levels, you use the id/text for regular stuff (inside {}) and numbers for arrays (inside [])
Since it's an array you would probably want to loop it
For Each item In Something("data")("mdc_cashprices")("data")("data")("instruments")
Something2 = item("last")
Next
Btw there are some formatting issues here >>> mdc_cashprices_{\"req... <<< that _ and \ shouldn't be there"
Related
I need help with jq syntax on how to return the Gitlab job ID if it contains an artifact. The JSON output looks like this (removed a lot of unrelated info from it and added [...]):
[{
"id": 3219589880,
"status": "success",
"stage": "test",
"name": "job_with_no_artifact",
"ref": "main",
"tag": false,
"coverage": null,
"allow_failure": false,
"created_at": "2022-10-24T18:21:25.119Z",
"started_at": "2022-10-24T18:21:25.986Z",
"finished_at": "2022-10-24T18:21:38.464Z",
"duration": 12.478682,
"queued_duration": 0.499786,
"user": {
"id": 123456789,
[...]
},
"commit": {
"id": "5e0e1f287d20daf2036a3ca71c656dce55999265",
[...]
"pipeline": {
"id": 123456789,
[...]
"project": {
"ci_job_token_scope_enabled": false
},
"artifacts": [],
"runner": {
"id": 12270859,
[...]
},
"artifacts_expire_at": null,
"tag_list": []
}, {
"id": 3219589878,
"status": "success",
"stage": "test",
"name": "create_artifact_job_2",
"ref": "main",
"tag": false,
"coverage": null,
"allow_failure": false,
"created_at": "2022-10-24T18:21:25.111Z",
"started_at": "2022-10-24T18:21:25.922Z",
"finished_at": "2022-10-24T18:21:39.090Z",
"duration": 13.168405,
"queued_duration": 0.464364,
"user": {
"id": 123456789,
[...]
},
"commit": {
"id": "5e0e1f287d20daf2036a3ca71c656dce55999265",
[...]
},
"pipeline": {
"id": 675641982,
[...],
"project": {
"ci_job_token_scope_enabled": false
},
"artifacts_file": {
"filename": "artifacts.zip",
"size": 223
},
"artifacts": [{
"file_type": "archive",
"size": 223,
"filename": "artifacts.zip",
"file_format": "zip"
}, {
"file_type": "metadata",
"size": 153,
"filename": "metadata.gz",
"file_format": "gzip"
}],
"runner": {
"id": 12270845,
[...]
},
"artifacts_expire_at": "2022-10-25T18:21:35.859Z",
"tag_list": []
}, {
"id": 3219589876,
"status": "success",
"stage": "test",
"name": "create_artifact_job_1",
"ref": "main",
"tag": false,
"coverage": null,
"allow_failure": false,
"created_at": "2022-10-24T18:21:25.103Z",
"started_at": "2022-10-24T18:21:25.503Z",
"finished_at": "2022-10-24T18:21:41.407Z",
"duration": 15.904028,
"queued_duration": 0.098837,
"user": {
"id": 123456789,
[...]
},
"commit": {
"id": "5e0e1f287d20daf2036a3ca71c656dce55999265",
[...]
},
"pipeline": {
"id": 123456789,
[...]
},
"web_url": "WEB_URL",
"project": {
"ci_job_token_scope_enabled": false
},
"artifacts_file": {
"filename": "artifacts.zip",
"size": 217
},
"artifacts": [{
"file_type": "archive",
"size": 217,
"filename": "artifacts.zip",
"file_format": "zip"
}, {
"file_type": "metadata",
"size": 152,
"filename": "metadata.gz",
"file_format": "gzip"
}],
"runner": {
"id": 12270857,
},
"artifacts_expire_at": "2022-10-25T18:21:37.808Z",
"tag_list": []
}]
I've been trying to do either of the following using jQ:
Either:
Check if artifacts_file key exists in each iteration and if it does return the (job) id (so .[].id)
Check if artifacts array is empty in each iteration and if it is empty return the (job) id.
In both cases I'm able to do the first part but I am not sure how to return the .id key.
Related stackoverflow questions that I've been trying to utilize and adapt to my case:
jq - return array value if its length is not null
How to check for presence of 'key' in jq before iterating over the values
What I have so far: jq '[.[].artifacts[]|select(length > 0)] | .[]' which returns all the artifacts found (but it doesn't contain the .id of the job).
Checking the existence of a field using has:
.[] | select(has("artifacts_file")).id
3219589878
3219589876
Demo
Checking if a field is an empty array by comparing it to []:
.[] | select(.artifacts == []).id
3219589880
Demo
Hi i want to add some property in the give json string at the time of creating record
So this is my give Json String
{
"id": "123",
"type": "Red",
"bn": "TTY",
"isp": {
"object": "obj202",
"time": "19:30",
"providedBy": {
"object": "obj201"
}
},
"seed":35,
"tem": {
"value": 25,
"time": "20:20",
"uc": "CEL"
},
"loc": {
"value": {
"type": "p",
"coor": [
2.6,
9.6
]
}
}
}
IN this string i want to add type so response like this
{
"id": "123",
"type": "Red",
"bn": {
"type": "P",
"value": "TTY"
},
"isp": {
"type": "R",
"providedBy": {
"type": "R",
"object": "obj201"
},
"object": "obj202"
},
"seed": {
"type": "P",
"value": 85
},
"tem": {
"type": "P",
"value": 25,
"time": "20:20",
"uc": "CEL"
},
"location": {
"type": "GP",
"value": {
"type": "P",
"coor": [
2.6,
9.6
]
}
}}
so please guide me how to do this without using ObjectMapper class in the code
please provide the code
This question already has answers here:
How to use jq when the variable has reserved characters?
(3 answers)
Closed 6 months ago.
I have a JSON file that I am trying to query with jq. I am unable to retrieve the observations. I am trying to retieve each of the "observations using the following command and not able to get to the result:
cat sample3.json | jq .dataSets[0].series.0:0:0:0:0.observations.0[0]
I am able to retieve up to the series using:
cat sample3.json | jq .dataSets[0].series
But once I try to drill down further I am getting a compile error:
$ cat sample3.json | jq .dataSets[0].series.0:0:0:0:0
jq: error: syntax error, unexpected LITERAL, expecting end of file (Unix shell quoting issues?) at <top-level>, line 1:
.dataSets[0].series.0:0:0:0:0
jq: 1 compile error
I am not sure what I am doing wrong here....
The input file is:
{
"header": {
"id": "b8be2cd5-33bf-4687-9e81-eb032f6f8a71",
"test": false,
"prepared": "2022-09-01T13:30:57.013+02:00",
"sender": {
"id": "ECB"
}
},
"dataSets": [
{
"action": "Replace",
"validFrom": "2022-09-01T13:30:57.013+02:00",
"series": {
"0:0:0:0:0": {
"attributes": [
0,
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
0,
null,
0,
null,
0,
0,
0,
0
],
"observations": {
"0": [
1.4529,
0,
0,
null,
null
],
"1": [
1.4472,
0,
0,
null,
null
],
"2": [
1.4591,
0,
0,
null,
null
]
}
}
}
}
],
"structure": {
"links": [
{
"title": "Exchange Rates",
"rel": "dataflow",
"href": "https://sdw-wsrest.ecb.europa.eu:443/service/dataflow/ECB/EXR/1.0"
}
],
"name": "Exchange Rates",
"dimensions": {
"series": [
{
"id": "FREQ",
"name": "Frequency",
"values": [
{
"id": "D",
"name": "Daily"
}
]
},
{
"id": "CURRENCY",
"name": "Currency",
"values": [
{
"id": "AUD",
"name": "Australian dollar"
}
]
},
{
"id": "CURRENCY_DENOM",
"name": "Currency denominator",
"values": [
{
"id": "EUR",
"name": "Euro"
}
]
},
{
"id": "EXR_TYPE",
"name": "Exchange rate type",
"values": [
{
"id": "SP00",
"name": "Spot"
}
]
},
{
"id": "EXR_SUFFIX",
"name": "Series variation - EXR context",
"values": [
{
"id": "A",
"name": "Average"
}
]
}
],
"observation": [
{
"id": "TIME_PERIOD",
"name": "Time period or range",
"role": "time",
"values": [
{
"id": "2022-08-29",
"name": "2022-08-29",
"start": "2022-08-29T00:00:00.000+02:00",
"end": "2022-08-29T23:59:59.999+02:00"
},
{
"id": "2022-08-30",
"name": "2022-08-30",
"start": "2022-08-30T00:00:00.000+02:00",
"end": "2022-08-30T23:59:59.999+02:00"
},
{
"id": "2022-08-31",
"name": "2022-08-31",
"start": "2022-08-31T00:00:00.000+02:00",
"end": "2022-08-31T23:59:59.999+02:00"
}
]
}
]
},
"attributes": {
"series": [
{
"id": "TIME_FORMAT",
"name": "Time format code",
"values": [
{
"name": "P1D"
}
]
},
{
"id": "BREAKS",
"name": "Breaks",
"values": []
},
{
"id": "COLLECTION",
"name": "Collection indicator",
"values": [
{
"id": "A",
"name": "Average of observations through period"
}
]
},
{
"id": "COMPILING_ORG",
"name": "Compiling organisation",
"values": []
},
{
"id": "DISS_ORG",
"name": "Data dissemination organisation",
"values": []
},
{
"id": "DOM_SER_IDS",
"name": "Domestic series ids",
"values": []
},
{
"id": "PUBL_ECB",
"name": "Source publication (ECB only)",
"values": []
},
{
"id": "PUBL_MU",
"name": "Source publication (Euro area only)",
"values": []
},
{
"id": "PUBL_PUBLIC",
"name": "Source publication (public)",
"values": []
},
{
"id": "UNIT_INDEX_BASE",
"name": "Unit index base",
"values": []
},
{
"id": "COMPILATION",
"name": "Compilation",
"values": []
},
{
"id": "COVERAGE",
"name": "Coverage",
"values": []
},
{
"id": "DECIMALS",
"name": "Decimals",
"values": [
{
"id": "4",
"name": "Four"
}
]
},
{
"id": "NAT_TITLE",
"name": "National language title",
"values": []
},
{
"id": "SOURCE_AGENCY",
"name": "Source agency",
"values": [
{
"id": "4F0",
"name": "European Central Bank (ECB)"
}
]
},
{
"id": "SOURCE_PUB",
"name": "Publication source",
"values": []
},
{
"id": "TITLE",
"name": "Title",
"values": [
{
"name": "Australian dollar/Euro"
}
]
},
{
"id": "TITLE_COMPL",
"name": "Title complement",
"values": [
{
"name": "ECB reference exchange rate, Australian dollar/Euro, 2:15 pm (C.E.T.)"
}
]
},
{
"id": "UNIT",
"name": "Unit",
"values": [
{
"id": "AUD",
"name": "Australian dollar"
}
]
},
{
"id": "UNIT_MULT",
"name": "Unit multiplier",
"values": [
{
"id": "0",
"name": "Units"
}
]
}
],
"observation": [
{
"id": "OBS_STATUS",
"name": "Observation status",
"values": [
{
"id": "A",
"name": "Normal value"
}
]
},
{
"id": "OBS_CONF",
"name": "Observation confidentiality",
"values": [
{
"id": "F",
"name": "Free"
}
]
},
{
"id": "OBS_PRE_BREAK",
"name": "Pre-break observation value",
"values": []
},
{
"id": "OBS_COM",
"name": "Observation comment",
"values": []
}
]
}
}
}
The .foo syntax cannot be used if the key name has anything but alphanumeric characters or the underscore, or if the first character of the key name is numeric.
Assuming you are using a recent version of jq,
you can always use the form: ."foo", which is actually an abbreviation of the basic form, .["foo"].
So assuming you're using a sufficiently recent version of jq, your query could begin with:
.dataSets[0].series."0:0:0:0:0"
If you are presenting the jq query on a command line, then you may have to escape the double-quotes appropriately, e.g. in a bash shell, by enclosing the jq query in single-quotes.
I have a long list of json file . I want to extract the subdomain of harvard.edu which is in the variable host in "host": "ceonlineb2b.hms.harvard.edu using bash . I would be happy if anyone can help out .Below is only a snippet of json file.
{
"data": {
"total_items": 3,
"offset": 0,
"limit": 1,
"items": [
{
"name": "ceonlineb2b.hms.harvard.edu",
"alexa": null,
"cert_summary": null,
"dns_records": {
"A": [
"3.221.168.206",
"54.174.253.3"
],
"AAAA": null,
"CAA": null,
"CNAME": [
"hms-moodleb2b-prod.cabem.com"
],
"MX": null,
"NS": null,
"SOA": null,
"TXT": null,
"SPF": null,
"updated_at": "2021-05-14T23:12:43.332816923Z"
},
"hosts_enrichment": [
{
"ip": "3.221.168.206",
"as_num": 14618,
"as_org": "amazon-aes",
"isp": "amazon.com",
"city_name": "ashburn",
"country": "united states",
"country_iso_code": "us",
"location": {
"lat": 39.0481,
"lon": -77.4728
}
},
{
"ip": "54.174.253.3",
"as_num": 14618,
"as_org": "amazon-aes",
"isp": "amazon.com",
"city_name": "ashburn",
"country": "united states",
"country_iso_code": "us",
"location": {
"lat": 39.0481,
"lon": -77.4728
}
}
],
"http_extract": {
"cookies": [
{
"domain": "",
"expire": "0001-01-01T00:00:00Z",
"http_only": true,
"key": "MoodleSession",
"max_age": 0,
"path": "/",
"security": true,
"value": "tqhmqc4muk513sad1bmnl3kocj"
}
],
"description": "",
"emails": null,
"final_redirect_url": {
"full_uri": "https://ceonlineb2b.hms.harvard.edu/login/index.php",
"host": "ceonlineb2b.hms.harvard.edu",
"path": "/login/index.php"
},
"extracted_at": "2020-10-04T20:55:26.043777194Z",
"favicon_sha256": "",
"http_headers": [
{
"name": "date",
"value": "Sun, 04 Oct 2020 20:55:25 GMT"
},
{
"name": "content-type",
"value": "text/html; charset=utf-8"
},
{
"name": "server",
"value": "Apache/2.4.46 () OpenSSL/1.0.2k-fips"
},
{
"name": "x-powered-by",
"value": "PHP/7.2.24"
},
{
"name": "content-language",
"value": "en"
},
{
"name": "content-script-type",
"value": "text/javascript"
},
{
"name": "content-style-type",
"value": "text/css"
},
{
"name": "x-ua-compatible",
"value": "IE=edge"
},
{
"name": "cache-control",
"value": "private, pre-check=0, post-check=0, max-age=0, no-transform"
},
{
"name": "pragma",
"value": "no-cache"
},
{
"name": "expires",
"value": ""
},
{
"name": "accept-ranges",
"value": "none"
},
{
"name": "set-cookie",
"value": "MoodleSession=tqhmqc4muk513sad1bmnl3kocj; path=/; secure;HttpOnly;Secure;SameSite=None"
}
],
"http_status_code": 200,
"links": [
{
"anchor": "Forgotten your username or password?",
"url": "https://ceonlineb2b.hms.harvard.edu/login/forgot_password.php",
"url_host": "ceonlineb2b.hms.harvard.edu"
},
{
"anchor": "Privacy Statement",
"url": "/local/staticpage/view.php?page=privacy-statement",
"url_host": ""
},
{
"anchor": "Terms of Service",
"url": "/local/staticpage/view.php?page=terms-of-service",
"url_host": ""
},
{
"anchor": "Copyright Information",
"url": "/local/staticpage/view.php?page=copyright-information",
"url_host": ""
}
],
"meta_tags": [
{
"name": "keywords",
"value": "moodle, HMS Postgraduate Courses: Log in to the site"
},
{
"name": "format-detection",
"value": "telephone=no"
},
{
"name": "robots",
"value": "noindex"
},
{
"name": "viewport",
"value": "width=device-width, initial-scale=1.0"
}
],
"robots_txt": "",
"scripts": [
"https://ceonlineb2b.hms.harvard.edu/theme/yui_combo.php?rollup/3.17.2/yui-moodlesimple-min.js",
"https://ceonlineb2b.hms.harvard.edu/lib/javascript.php/1589465014/lib/javascript-static.js",
"https://ceonlineb2b.hms.harvard.edu/lib/javascript.php/1589465014/lib/requirejs/require.min.js",
"https://ceonlineb2b.hms.harvard.edu/theme/javascript.php/hms/1589465013/footer"
],
"styles": [
"https://ceonlineb2b.hms.harvard.edu/theme/yui_combo.php?rollup/3.17.2/yui-moodlesimple-min.css",
"https://ceonlineb2b.hms.harvard.edu/theme/styles.php/hms/1589465013_1/all"
],
"title": "HMS Postgraduate Courses: Log in to the site"
},
"is_CNAME": null,
"is_MX": null,
"is_NS": null,
"is_PTR": null,
"is_subdomain": true,
"name_without_suffix": "ceonlineb2b.hms.harvard",
"updated_at": "2021-05-16T10:25:01.59086376Z",
"user_scan_at": null,
"whois_parsed": null,
"security_score": {
"score": 100
},
"cve_list": null,
"technologies": [
{
"name": "Moodle",
"version": ""
},
{
"name": "RequireJS",
"version": ""
}
],
"trackers": null,
"organizations": null
}
]
}
}
For json parsing on bash, I recommend checking out jq. It's lightweight and versatile.
We can use the -r flag to output only values.
Output the fields of each object with the keys in sorted order.
--raw-output / -r:
The structure of the JSON you provided has the subdomain at .data.items[].http_extract.final_redirect_url.host
{
"data": {
"items": [
{
"http_extract": {
"final_redirect_url": {
"full_uri": "https://ceonlineb2b.hms.harvard.edu/login/index.php",
"host": "ceonlineb2b.hms.harvard.edu",
"path": "/login/index.php"
},
...
I've saved your json to a file, se.json
Example extracting full domain with jq
jq -r '.data.items[].http_extract.final_redirect_url.host' se.json
Output
ceonlineb2b.hms.harvard.edu
To extract the subdomain, just perform a search/replace using sub().
sub(regex; tostring) sub(regex; string; flags)
Emit the string obtained by replacing the first match of regex in the input string with tostring, after interpolation. tostring should be a jq string, and may contain references to named captures. The named captures are, in effect, presented as a JSON object (as constructed by capture) to tostring, so a reference to a captured variable named "x" would take the form: "(.x)".
Extracting subdomain using jq
jq -r '.data.items[].http_extract.final_redirect_url.host | sub(".hms.harvard.edu";"")' se.json
Output
ceonlineb2b
I am trying to convert a nested json file into csv. It's data from a darts API and the structure is always the same. Nevertheless I got some problems flattening and storing the values in a csv because of the nested structure.
json:
{
"summaries": [{
"sport_event": {
"id": "sr:sport_event:12967512",
"start_time": "2017-11-11T13:15:00+00:00",
"start_time_confirmed": true,
"sport_event_context": {
"sport": {
"id": "sr:sport:22",
"name": "Darts"
},
"category": {
"id": "sr:category:104",
"name": "International"
},
"competition": {
"id": "sr:competition:597",
"name": "Grand Slam of Darts"
},
"season": {
"id": "sr:season:47332",
"name": "Grand Slam of Darts 2017",
"start_date": "2017-11-11",
"end_date": "2017-11-20",
"year": "2017",
"competition_id": "sr:competition:597"
},
"stage": {
"order": 1,
"type": "league",
"phase": "stage_1",
"start_date": "2017-11-11",
"end_date": "2017-11-15",
"year": "2017"
},
"round": {
"number": 1
},
"groups": [{
"id": "sr:league:29766",
"name": "Grand Slam of Darts 2017, Group G",
"group_name": "G"
}]
},
"coverage": {
"live": true
},
"competitors": [{
"id": "sr:competitor:35936",
"name": "Smith, Michael",
"abbreviation": "SMI",
"qualifier": "home"
}, {
"id": "sr:competitor:83895",
"name": "Wilson, James",
"abbreviation": "WIL",
"qualifier": "away"
}]
},
"sport_event_status": {
"status": "closed",
"match_status": "ended",
"home_score": 5,
"away_score": 3,
"winner_id": "sr:competitor:35936"
}
}, {
"sport_event": {
"id": "sr:sport_event:12967508",
"start_time": "2017-11-11T13:40:00+00:00",
"start_time_confirmed": true,
"sport_event_context": {
"sport": {
"id": "sr:sport:22",
"name": "Darts"
},
"category": {
"id": "sr:category:104",
"name": "International"
},
"competition": {
"id": "sr:competition:597",
"name": "Grand Slam of Darts"
},
"season": {
"id": "sr:season:47332",
"name": "Grand Slam of Darts 2017",
"start_date": "2017-11-11",
"end_date": "2017-11-20",
"year": "2017",
"competition_id": "sr:competition:597"
},
"stage": {
"order": 1,
"type": "league",
"phase": "stage_1",
"start_date": "2017-11-11",
"end_date": "2017-11-15",
"year": "2017"
},
"round": {
"number": 1
},
"groups": [{
"id": "sr:league:29764",
"name": "Grand Slam of Darts 2017, Group F",
"group_name": "F"
}]
},
"coverage": {
"live": true
},
"competitors": [{
"id": "sr:competitor:70916",
"name": "Bunting, Stephen",
"abbreviation": "BUN",
"qualifier": "home"
}, {
"id": "sr:competitor:191262",
"name": "de Zwaan, Jeffrey",
"abbreviation": "DEZ",
"qualifier": "away"
}]
},
"sport_event_status": {
"status": "closed",
"match_status": "ended",
"home_score": 5,
"away_score": 4,
"winner_id": "sr:competitor:70916"
}
}
So for each sport_event I would like to store the variables:
"start_time"
from "season" the variable "name"
from "competitors" both "id" and "name"
from "sport_event_status" the "winner_id"
I have already tried to flatten the json file with this code:
import json
f = open(r'path of file.json')
data = json.load(f)
def flatten(data):
for key,value in data.items():
print (str(key)+'->'+str(value))
if type(value) == type(dict()):
flatten(value)
elif type(value) == type(list()):
for val in value:
if type(val) == type(str()):
pass
elif type(val) == type(list()):
pass
else:
flatten(val)
flatten(data)
print(data)
This actually prints out the following:
id->sr:season:47332
name->Grand Slam of Darts 2017
start_date->2017-11-11
end_date->2017-11-20
year->2017
competition_id->sr:competition:597
Now my question is how to store the values I mentioned above in a csv file.
Thanks in advance for your support.
Using jq, you basically just have to transcribe your specification, adding a bit of context and taking care of an embedded array:
.summaries[]
| .sport_event # Your specification:
| [.start_time, # start_time
.sport_event_context.season.name] # from "season" the variable "name"
+ [.competitors[] | .id, .name] # from "competitors" both "id" and "name"
+ [.sport_event_status.winner_id] # from "sport_event_status" the "winner_id"
| #csv
Invocation
E.g.
jq -rf program.jq my.json