I'm having a difficult time figuring out how to pull specific information from a json file.
So far I have this:
# Import json library
import json
# Open json database file
with open('jsondatabase.json', 'r') as f:
data = json.load(f)
# assign variables from json data and convert to usable information
identifier = data['ID']
identifier = str(identifier)
name = data['name']
name = str(name)
# Collect data from user to compare with data in json file
print("Please enter your numerical identifier and name: ")
user_id = input("Numerical identifier: ")
user_name = input("Name: ")
if user_id == identifier and user_name == name:
print("Your inputs matched. Congrats.")
else:
print("Your inputs did not match our data. Please try again.")
And that works great for a simple JSON file like this:
{
"ID": "123",
"name": "Bobby"
}
But ideally I need to create a more complex JSON file and can't find deeper information on how to pull specific information from something like this:
{
"Parent": [
{
"Parent_1": [
{
"Name": "Bobby",
"ID": "123"
}
],
"Parent_2": [
{
"Name": "Linda",
"ID": "321"
}
]
}
]
}
Here is an example that you might be able to pick apart.
You could either:
Make a custom de-jsonify object_hook as shown below and do something with it. There is a good tutorial here.
Just gobble up the whole dictionary that you get without a custom de-jsonify and drill down into it and make a list or set of the results. (not shown)
Example:
import json
from collections import namedtuple
data = '''
{
"Parents":
[
{
"Name": "Bobby",
"ID": "123"
},
{
"Name": "Linda",
"ID": "321"
}
]
}
'''
Parent = namedtuple('Parent', ['name', 'id'])
def dejsonify(json_str: dict):
if json_str.get("Name"):
parent = Parent(json_str.get('Name'), int(json_str.get('ID')))
return parent
return json_str
res = json.loads(data, object_hook=dejsonify)
print(res)
# then we can do whatever... if you need lookups by name/id,
# we could put the result into a dictionary
all_parents = {(p.name, p.id) : p for p in res['Parents']}
lookup_from_input = ('Bobby', 123)
print(f'found match: {all_parents.get(lookup_from_input)}')
Result:
{'Parents': [Parent(name='Bobby', id=123), Parent(name='Linda', id=321)]}
found match: Parent(name='Bobby', id=123)
I'm trying to parse the NIH grant API and am running into a complex layering issue. In the JSON output below, I've been able to navigate into the "results" section which contains all the fields I want, except some are layered within another dictionary. What I'm trying to do is get the JSON data within "full_study_section", "organization", and "project_num_split" to be in the same layer as "appl_id", "contact_pi_name", "fiscal_year", and so forth. This post was helpful but I'm not quite sure how to level the layers through iteration.
{
"meta":{
"limit":25,
"offset":0,
"properties":{},
"search_id":null,
"sort_field":"project_start_date",
"sort_order":"desc",
"sorted_by_relevance":false,
"total":78665
},
"results":[
{
"appl_id":10314644,
"contact_pi_name":"BROCATO, EMILY ROSE",
"fiscal_year":2021,
"full_study_section":{
"group_code":"32",
"name":"Special Emphasis Panel[ZAA1 GG (32)]",
"sra_designator_code":"GG",
"sra_flex_code":"",
"srg_code":"ZAA1",
"srg_flex":""
},
"organization":{
"city":null,
"country":null,
"dept_type":"PHARMACOLOGY",
"external_org_id":353201,
"fips_country_code":null,
"org_city":"RICHMOND",
"org_country":"UNITED STATES",
"org_duns":[
"105300446"
],
"org_fips":"US",
"org_ipf_code":"353201",
"org_name":"VIRGINIA COMMONWEALTH UNIVERSITY",
"org_state":"VA",
"org_state_name":null,
"org_zipcode":"232980568"
},
"project_end_date":null,
"project_num":"1F31AA029259-01A1",
"project_num_split":{
"activity_code":"F31",
"appl_type_code":"1",
"full_support_year":"01A1",
"ic_code":"AA",
"serial_num":"029259",
"suffix_code":"A1",
"support_year":"01"
},
"project_start_date":"2022-03-07T05:00:00Z",
"subproject_id":null
},
Code:
import requests
import json
import csv
params = {
"criteria":
{
"fiscal_years":[2021]
},
"include_fields": [
"ApplId","ContactPiName","FiscalYear",
"OrgCountry","AllText",
"FullStudySection","Organization","ProjectEndDate",
"ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
],
"offset":0,
"limit":25,
"sort_field":"project_start_date",
"sort_order":"desc"
}
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)
#print(response.status_code)
#print(response.text)
resdecode = json.loads(response.text)
#print(json.dumps(resdecode, sort_keys=True, indent=4, separators=(',', ':')))
data = resdecode["results"]
#print(json.dumps(data, sort_keys=True, indent=4, separators=(',', ':')))
pns = resdecode["results"][0]["project_num_split"]
#print(json.dumps(pns, sort_keys=True, indent=4, separators=(',', ':')))
# for item in data:
# appl_id = item.get("appl_id")
# print(appl_id)
writerr = csv.writer(open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = ''))
count = 0
for row in resdecode:
if count == 0:
header = resdecode.keys()
writerr.writerow(header)
count += 1
writerr.writerow(row)
writerr.close()
In order to move the items under full_study_section, organization and project_num_split to same level as appl_id, contact_pi_name and fiscal_year you will have to loop through each of the results and recreate those key value pairs for those three dicts and then remove the full_study_section, organization and project_num_split keys once done. Below code should work as you expected.
import requests
import json
import csv
params = {
"criteria":
{
"fiscal_years":[2021]
},
"include_fields": [
"ApplId","ContactPiName","FiscalYear",
"OrgCountry","AllText",
"FullStudySection","Organization","ProjectEndDate",
"ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
],
"offset":0,
"limit":25,
"sort_field":"project_start_date",
"sort_order":"desc"
}
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)
resdecode = json.loads(response.text)
data = resdecode["results"]
for item in data:
x = ["full_study_section","organization","project_num_split"]
for i in x:
for key, value in item[i].items():
item[key] = value
del item[i]
with open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = '') as f:
writer = csv.writer(f)
count = 0
for row in data:
if count == 0:
header = row.keys()
writer.writerow(header)
count =+ 1
writer.writerow(row.values())
You can move the items to the required level and remove the dict.
import json
import pprint
pp = pprint
file = open("test.json")
jsonData = json.load(file)
full_study_section = jsonData['results'][0]['full_study_section']
organization = jsonData['results'][0]['organization']
project_num_split = jsonData['results'][0]['project_num_split']
jsonData['results'][0].update(full_study_section)
jsonData['results'][0].update(project_num_split)
jsonData['results'][0].update(organization)
jsonData['results'][0].pop('full_study_section')
jsonData['results'][0].pop('project_num_split')
jsonData['results'][0].pop('organization')
pp.pprint(jsonData)
Output:
{u'meta': {u'limit': 25,
u'offset': 0,
u'properties': {},
u'search_id': None,
u'sort_field': u'project_start_date',
u'sort_order': u'desc',
u'sorted_by_relevance': False,
u'total': 78665},
u'results': [{u'activity_code': u'F31',
u'appl_id': 10314644,
u'appl_type_code': u'1',
u'city': None,
u'contact_pi_name': u'BROCATO, EMILY ROSE',
u'country': None,
u'dept_type': u'PHARMACOLOGY',
u'external_org_id': 353201,
u'fips_country_code': None,
u'fiscal_year': 2021,
u'full_support_year': u'01A1',
u'group_code': u'32',
u'ic_code': u'AA',
u'name': u'Special Emphasis Panel[ZAA1 GG (32)]',
u'org_city': u'RICHMOND',
u'org_country': u'UNITED STATES',
u'org_duns': [u'105300446'],
u'org_fips': u'US',
u'org_ipf_code': u'353201',
u'org_name': u'VIRGINIA COMMONWEALTH UNIVERSITY',
u'org_state': u'VA',
u'org_state_name': None,
u'org_zipcode': u'232980568',
u'project_end_date': None,
u'project_num': u'1F31AA029259-01A1',
u'project_start_date': u'2022-03-07T05:00:00Z',
u'serial_num': u'029259',
u'sra_designator_code': u'GG',
u'sra_flex_code': u'',
u'srg_code': u'ZAA1',
u'srg_flex': u'',
u'subproject_id': None,
u'suffix_code': u'A1',
u'support_year': u'01'}]}
I am trying to generate auto json paths from given json structure but stuck in the programatic part. Can someone please help out with the idea to take it further?
Below is the code so far i have achieved.
def iterate_dict(dict_data, key, tmp_key):
for k, v in dict_data.items():
key = key + tmp_key + '.' + k
key = key.replace('$$', '$')
if type(v) is dict:
tmp_key = key
key = '$'
iterate_dict(v, key, tmp_key)
elif type(v) is list:
str_encountered = False
for i in v:
if type(i) is str:
str_encountered = True
tmp_key = key
break
tmp_key = key
key = '$'
iterate_dict(i, key, tmp_key)
if str_encountered:
print(key, v)
if tmp_key is not None:
tmp_key = str(tmp_key)[:-str(k).__len__() - 1]
key = '$'
else:
print(key, v)
key = '$'
import json
iterate_dict_new(dict(json.loads(d_data)), '$', '')
consider the below json structure
{
"id": "1",
"categories": [
{
"name": "author",
"book": "fiction",
"leaders": [
{
"ref": ["wiki", "google"],
"athlete": {
"$ref": "some data"
},
"data": {
"$data": "some other data"
}
}
]
},
{
"name": "dummy name"
}
]
}
Expected output out of python script:
$id = 1
$categories[0].name = author
$categories[0].book = fiction
$categories[0].leaders[0].ref[0] = wiki
$categories[0].leaders[0].ref[1] = google
$categories[0].leaders[0].athlete.$ref = some data
$categories[0].leaders[0].data.$data = some other data
$categories[1].name = dummy name
Current output with above python script:
$.id 1
$$.categories.name author
$$.categories.book fiction
$$$.categories.leaders.ref ["wiki", "google"]
$$$$$.categories.leaders.athlete.$ref some data
$$$$$$.categories.leaders.athlete.data.$data some other data
$$.name dummy name
The following recursive function is similar to yours, but instead of just displaying a dictionary, it can also take a list. This means that if you passed in a dictionary where one of the values was a nested list, then the output would still be correct (printing things like dict.key[3][4] = element).
def disp_paths(it, p='$'):
for k, v in (it.items() if type(it) is dict else enumerate(it)):
if type(v) is dict:
disp_paths(v, '{}.{}'.format(p, k))
elif type(v) is list:
for i, e in enumerate(v):
if type(e) is dict or type(e) is list:
disp_paths(e, '{}.{}[{}]'.format(p, k, i))
else:
print('{}.{}[{}] = {}'.format(p, k, i, e))
else:
f = '{}.{} = {}' if type(it) is dict else '{}[{}] = {}'
print(f.format(p, k, v))
which, when ran with your dictionary (disp_paths(d)), gives the expected output of:
$.categories[0].leaders[0].athlete.$ref = some data
$.categories[0].leaders[0].data.$data = some other data
$.categories[0].leaders[0].ref[0] = wiki
$.categories[0].leaders[0].ref[1] = google
$.categories[0].book = fiction
$.categories[0].name = author
$.categories[1].name = dummy name
$.id = 1
Note that this is unfortunately not ordered, but that is unavoidable as dictionaries have no inherent order (they are just sets of key:value pairs)
If you need help understanding my modifications, just drop a comment!
I've wrote a program which process JSON objects. Now I want to verify if I've missed something.
Is there an JSON-example of all allowed JSON structure combinations? Something like this:
{
"key1" : "value",
"key2" : 1,
"key3" : {"key1" : "value"},
"key4" : [
[
"string1",
"string2"
],
[
1,
2
],
...
],
"key5" : true,
"key6" : false,
"key7" : null,
...
}
As you can see at http://json.org/ on the right hand side the grammar of JSON isn't quite difficult, but I've got several exceptions because I've forgotten to handles some structure combinations which are possible. E.g. inside an array there can be "string, number, object, array, true, false, null" but my program couldn't handle arrays inside an array until I ran into an exception. So everything was fine until I got this valid JSON object with arrays inside an array.
I want to test my program with a JSON object (which I'm looking for). After this test I want to be feel certain that my program handle every possible valid JSON structure on earth without an exception.
I don't need nesting in depth 5 or so. I only need something in nested depth 2 or max 3. With all base types which nested all allowed base types, inside this base type.
Have you thought of escaped characters and objects within an object?
{
"key1" : {
"key1" : "value",
"key2" : [
"String1",
"String2"
],
},
"key2" : "\"This is a quote\"",
"key3" : "This contains an escaped slash: \\",
"key4" : "This contains accent charachters: \u00eb \u00ef",
}
Note: \u00eb and \u00ef are resp. charachters ë and ï
Choose a programming language that support json.
Try to load your json, on fail the exception's message is descriptive.
Example:
Python:
import json, sys;
json.loads(open(sys.argv[1]).read())
Generate:
import random, json, os, string
def json_null(depth = 0):
return None
def json_int(depth = 0):
return random.randint(-999, 999)
def json_float(depth = 0):
return random.uniform(-999, 999)
def json_string(depth = 0):
return ''.join(random.sample(string.printable, random.randrange(10, 40)))
def json_bool(depth = 0):
return random.randint(0, 1) == 1
def json_list(depth):
lst = []
if depth:
for i in range(random.randrange(8)):
lst.append(gen_json(random.randrange(depth)))
return lst
def json_object(depth):
obj = {}
if depth:
for i in range(random.randrange(8)):
obj[json_string()] = gen_json(random.randrange(depth))
return obj
def gen_json(depth = 8):
if depth:
return random.choice([json_list, json_object])(depth)
else:
return random.choice([json_null, json_int, json_float, json_string, json_bool])(depth)
print(json.dumps(gen_json(), indent = 2))
I have the following code chunk:
def response = '[{"id": "121","startTime": "2013-11-10T20:48:54Z", "reqId": 123456, "endTime": null, "numFiles" :null},
{"id": "123","startTime": "2013-11-29T21:45:00Z","reqId": 123458,"endTime": "2013-11-30T21:45:00Z", "numFiles" :null },
{"id": "121","startTime": "2013-11-8T20:48:54Z", "reqId": 123111, "endTime": null, "numFiles" :null}]'
def sortedResponse = response.sort { a,b -> b.reqId <=> a.reqId}
def reqRespAPI = new JsonSlurper().parseText(sortedResponse )
def id = reqRespAPI.id
def stTime = reqRespAPI.startTime
def eTime = reqRespAPI.endTime
def rqId = reqRespAPI.reqId
def numRec = reqRespAPI.numFiles
...some other stuff here....
I am trying to sort by reqId (rqId) descending. Do I have to use a for loop? The current sortedResponse is throwing an exception:
groovy.lang.MissingMethodException: No signature of method: java.lang.String.sort() is applicable for argument types: (...Controller$_closure2_closure8) values: [....Controller$_closure2_closure8#5976ac5b]
I have also tried sort (new OrderBy(...)) but that also did not work...
Any help would be appreciated.
The issue seems to be that you're trying to sort the response String, rather than the collection of JSONObjects.
Try this?
def reqRespJSON = new JsonSlurper().parseText( response )
def sortedJSON = reqRespJSON.sort { a,b -> b.reqId <=> a.reqId}
def id = sortedJSON[0].id
Note that the sortedJSON is an List of Maps, so you have to specify which one you want the id from (using [0]).