Python- Issue parsing multi-layered API JSON into CSV

Python- Issue parsing multi-layered API JSON into CSV - json

I'm trying to parse the NIH grant API and am running into a complex layering issue. In the JSON output below, I've been able to navigate into the "results" section which contains all the fields I want, except some are layered within another dictionary. What I'm trying to do is get the JSON data within "full_study_section", "organization", and "project_num_split" to be in the same layer as "appl_id", "contact_pi_name", "fiscal_year", and so forth. This post was helpful but I'm not quite sure how to level the layers through iteration.
{
"meta":{
"limit":25,
"offset":0,
"properties":{},
"search_id":null,
"sort_field":"project_start_date",
"sort_order":"desc",
"sorted_by_relevance":false,
"total":78665
},
"results":[
{
"appl_id":10314644,
"contact_pi_name":"BROCATO, EMILY ROSE",
"fiscal_year":2021,
"full_study_section":{
"group_code":"32",
"name":"Special Emphasis Panel[ZAA1 GG (32)]",
"sra_designator_code":"GG",
"sra_flex_code":"",
"srg_code":"ZAA1",
"srg_flex":""
},
"organization":{
"city":null,
"country":null,
"dept_type":"PHARMACOLOGY",
"external_org_id":353201,
"fips_country_code":null,
"org_city":"RICHMOND",
"org_country":"UNITED STATES",
"org_duns":[
"105300446"
],
"org_fips":"US",
"org_ipf_code":"353201",
"org_name":"VIRGINIA COMMONWEALTH UNIVERSITY",
"org_state":"VA",
"org_state_name":null,
"org_zipcode":"232980568"
},
"project_end_date":null,
"project_num":"1F31AA029259-01A1",
"project_num_split":{
"activity_code":"F31",
"appl_type_code":"1",
"full_support_year":"01A1",
"ic_code":"AA",
"serial_num":"029259",
"suffix_code":"A1",
"support_year":"01"
},
"project_start_date":"2022-03-07T05:00:00Z",
"subproject_id":null
},
Code:
import requests
import json
import csv
params = {
"criteria":
{
"fiscal_years":[2021]
},
"include_fields": [
"ApplId","ContactPiName","FiscalYear",
"OrgCountry","AllText",
"FullStudySection","Organization","ProjectEndDate",
"ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
],
"offset":0,
"limit":25,
"sort_field":"project_start_date",
"sort_order":"desc"
}
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)
#print(response.status_code)
#print(response.text)
resdecode = json.loads(response.text)
#print(json.dumps(resdecode, sort_keys=True, indent=4, separators=(',', ':')))
data = resdecode["results"]
#print(json.dumps(data, sort_keys=True, indent=4, separators=(',', ':')))
pns = resdecode["results"][0]["project_num_split"]
#print(json.dumps(pns, sort_keys=True, indent=4, separators=(',', ':')))
# for item in data:
# appl_id = item.get("appl_id")
# print(appl_id)
writerr = csv.writer(open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = ''))
count = 0
for row in resdecode:
if count == 0:
header = resdecode.keys()
writerr.writerow(header)
count += 1
writerr.writerow(row)
writerr.close()

In order to move the items under full_study_section, organization and project_num_split to same level as appl_id, contact_pi_name and fiscal_year you will have to loop through each of the results and recreate those key value pairs for those three dicts and then remove the full_study_section, organization and project_num_split keys once done. Below code should work as you expected.
import requests
import json
import csv
params = {
"criteria":
{
"fiscal_years":[2021]
},
"include_fields": [
"ApplId","ContactPiName","FiscalYear",
"OrgCountry","AllText",
"FullStudySection","Organization","ProjectEndDate",
"ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
],
"offset":0,
"limit":25,
"sort_field":"project_start_date",
"sort_order":"desc"
}
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)
resdecode = json.loads(response.text)
data = resdecode["results"]
for item in data:
x = ["full_study_section","organization","project_num_split"]
for i in x:
for key, value in item[i].items():
item[key] = value
del item[i]
with open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = '') as f:
writer = csv.writer(f)
count = 0
for row in data:
if count == 0:
header = row.keys()
writer.writerow(header)
count =+ 1
writer.writerow(row.values())

You can move the items to the required level and remove the dict.
import json
import pprint
pp = pprint
file = open("test.json")
jsonData = json.load(file)
full_study_section = jsonData['results'][0]['full_study_section']
organization = jsonData['results'][0]['organization']
project_num_split = jsonData['results'][0]['project_num_split']
jsonData['results'][0].update(full_study_section)
jsonData['results'][0].update(project_num_split)
jsonData['results'][0].update(organization)
jsonData['results'][0].pop('full_study_section')
jsonData['results'][0].pop('project_num_split')
jsonData['results'][0].pop('organization')
pp.pprint(jsonData)
Output:
{u'meta': {u'limit': 25,
u'offset': 0,
u'properties': {},
u'search_id': None,
u'sort_field': u'project_start_date',
u'sort_order': u'desc',
u'sorted_by_relevance': False,
u'total': 78665},
u'results': [{u'activity_code': u'F31',
u'appl_id': 10314644,
u'appl_type_code': u'1',
u'city': None,
u'contact_pi_name': u'BROCATO, EMILY ROSE',
u'country': None,
u'dept_type': u'PHARMACOLOGY',
u'external_org_id': 353201,
u'fips_country_code': None,
u'fiscal_year': 2021,
u'full_support_year': u'01A1',
u'group_code': u'32',
u'ic_code': u'AA',
u'name': u'Special Emphasis Panel[ZAA1 GG (32)]',
u'org_city': u'RICHMOND',
u'org_country': u'UNITED STATES',
u'org_duns': [u'105300446'],
u'org_fips': u'US',
u'org_ipf_code': u'353201',
u'org_name': u'VIRGINIA COMMONWEALTH UNIVERSITY',
u'org_state': u'VA',
u'org_state_name': None,
u'org_zipcode': u'232980568',
u'project_end_date': None,
u'project_num': u'1F31AA029259-01A1',
u'project_start_date': u'2022-03-07T05:00:00Z',
u'serial_num': u'029259',
u'sra_designator_code': u'GG',
u'sra_flex_code': u'',
u'srg_code': u'ZAA1',
u'srg_flex': u'',
u'subproject_id': None,
u'suffix_code': u'A1',
u'support_year': u'01'}]}

Related

Convert request.json to DataFrame

Hi I'm trying to convert the string received from a POST method that results in a json string to my Flask API. I receive the Error: Expected object or value.
How can I convert the malformed json to correct form to fix this? Here is the code in my backend and I'm using jupyter to test
#app.expect(model)
def post(self):
try:
formData = request.json
formData = {"0": formData}
print(formData)
df_json = pipelineTransform(formData, headers_df)
df_predict = reorder(df_json, headers_df)
#data = [val for val in formData.values()]
predictVal = classifier.predict_proba(df_predict)
print(predictVal)
#types = { 0: "Iris Setosa", 1: "Iris Versicolour ", 2: "Iris Virginica"}
response = jsonify({
"statusCode": 200,
"status": "Prediction made",
"result": "Probability of Heart Disease: " + predictVal + "%"
})
response.headers.add('Access-Control-Allow-Origin', '*')
print(response)
return response
In jupyter:
json_str2 = '''{'Age': '62', 'Sex': 'M', 'Chestpain': 'ASY', 'RestingBP': '140', 'Cholesterol': '175', 'FastingBS': '0',
'RestingECG': 'Normal', 'MaxHR': '205', 'ExerciseAngina': 'N', 'Oldpeak': '0', 'ST_slope': 'Up'}'''
df = pd.read_json(json_str2, orient='columns')
the above string is what is printed from the print statement for formData

Convert to valid JSON string by replacing single quotes with double quotes:
json_str2 = json_str2.replace("'", "\"")
You don't have an index column in the data. So, you can use orient="index" and transpose it:
df = pd.read_json(json_str2, orient="index").transpose()
[Out]:
Age Sex Chestpain RestingBP Cholesterol FastingBS RestingECG MaxHR ExerciseAngina Oldpeak ST_slope
0 62 M ASY 140 175 0 Normal 205 N 0 Up

Pulling specific Parent/Child JSON data with Python

I'm having a difficult time figuring out how to pull specific information from a json file.
So far I have this:
# Import json library
import json
# Open json database file
with open('jsondatabase.json', 'r') as f:
data = json.load(f)
# assign variables from json data and convert to usable information
identifier = data['ID']
identifier = str(identifier)
name = data['name']
name = str(name)
# Collect data from user to compare with data in json file
print("Please enter your numerical identifier and name: ")
user_id = input("Numerical identifier: ")
user_name = input("Name: ")
if user_id == identifier and user_name == name:
print("Your inputs matched. Congrats.")
else:
print("Your inputs did not match our data. Please try again.")
And that works great for a simple JSON file like this:
{
"ID": "123",
"name": "Bobby"
}
But ideally I need to create a more complex JSON file and can't find deeper information on how to pull specific information from something like this:
{
"Parent": [
{
"Parent_1": [
{
"Name": "Bobby",
"ID": "123"
}
],
"Parent_2": [
{
"Name": "Linda",
"ID": "321"
}
]
}
]
}

Here is an example that you might be able to pick apart.
You could either:
Make a custom de-jsonify object_hook as shown below and do something with it. There is a good tutorial here.
Just gobble up the whole dictionary that you get without a custom de-jsonify and drill down into it and make a list or set of the results. (not shown)
Example:
import json
from collections import namedtuple
data = '''
{
"Parents":
[
{
"Name": "Bobby",
"ID": "123"
},
{
"Name": "Linda",
"ID": "321"
}
]
}
'''
Parent = namedtuple('Parent', ['name', 'id'])
def dejsonify(json_str: dict):
if json_str.get("Name"):
parent = Parent(json_str.get('Name'), int(json_str.get('ID')))
return parent
return json_str
res = json.loads(data, object_hook=dejsonify)
print(res)
# then we can do whatever... if you need lookups by name/id,
# we could put the result into a dictionary
all_parents = {(p.name, p.id) : p for p in res['Parents']}
lookup_from_input = ('Bobby', 123)
print(f'found match: {all_parents.get(lookup_from_input)}')
Result:
{'Parents': [Parent(name='Bobby', id=123), Parent(name='Linda', id=321)]}
found match: Parent(name='Bobby', id=123)

Splitting Json array values to k/v and sequencing

So I managed to split the following data to k/v pairs
"tags": [
"category--Cola",
"sugar--3.000000",
"barcode--cola001",
"barcode--cola001_1",
"language--en",
"sku--cola_classic",
"sku--cola_cherry",
],
like so...
t = product['tags']
t_filtered = [k for k in t if '--' in k]
product['tags'] = dict(s.split('--') for s in t_filtered)
I want the output to be something like this
{
"category": [Cola],
"sugar":[3.0],
"barcode":[cola001,cola001_1],
"language":[en],
"sku": [cola_classic, cola_cherry],
}
so I tried this... (ref: https://docs.python.org/3/library/collections.html#collections.defaultdict)
product['tags'] = dict(s.split('--') for s in t_filtered)
s = product['tags']
d = {}
for k, v in s:
d.setdefault(k, []).append(v)
print(d)
but getting this error:
ValueError: too many values to unpack (expected 2)
Also, just to verify s is a <classic 'dict'> so I can't figure out the issue.

Formating adding nested dictionary to JSON file in specific format

My Python script is working and appends to my JSON file; however, I have tried to add a numbered entry identification with no success. Additionally, I am trying to get a specific output each time the calculations are iterated. Looking for detailed examples and guidance.
Current Python Script
import json
# Dictionary All-Calculations
def dict_calc(num1, num2):
add = str(float(num1)+float(num2))
sub = str(float(num1)-float(num2))
mul = str(float(num1)*float(num2))
div = str(float(num1)/float(num2))
calc_d = {"Add" : add, "Subtract" : sub, "Multiply" : mul, "Divide" : div}
return calc_d
# Yes or No
def y_n(answer):
if answer[:1] == 'y':
return True
if answer[:1] == 'n':
return False
# Main Dictionary
data_table = {}
while True:
num1 = input("\n Enter first number: ")
num2 = input("\n Enter second number: ")
data_table = dict_calc(num1, num2)
with open('dict_calc.json', 'a', encoding='utf-8') as f:
json.dump(data_table, f, ensure_ascii=True, indent=4)
answer = input("\n Run Again? (Y/N) ").lower().strip()
if y_n(answer) == True:
continue
else:
print("\n Thank You and Goodbye")
break
Current Output Example
{
"Add": "579.0",
"Subtract": "-333.0",
"Multiply": "56088.0",
"Divide": "0.26973684210526316"
}{
"Add": "1245.0",
"Subtract": "-333.0",
"Multiply": "359784.0",
"Divide": "0.5779467680608364"
}{
"Add": "1396.0",
"Subtract": "554.0",
"Multiply": "410475.0",
"Divide": "2.315914489311164"
}
Desired Output Example - I am trying to add the Entry plus number, which increases after each iteration. In addition, I am also trying emulate this same output.
[
{
"Entry": "1",
"Add": "579.0",
"Subtract": "-333.0",
"Multiply": "56088.0",
"Divide": "0.26973684210526316"
},
{
"Entry": "2",
"Add": "1245.0",
"Subtract": "-333.0",
"Multiply": "359784.0",
"Divide": "0.5779467680608364"
},
{
"Entry": "3",
"Add": "1396.0",
"Subtract": "554.0",
"Multiply": "410475.0",
"Divide": "2.315914489311164"
}
]

JSON is a nested structure. You can't simply append more data to it. See JSON Lines format for that.
If using regular JSON format, you must read the whole JSON structure in, update it, then write it out fully again, or simply write it once the structure is complete.
Example:
import json
# Dictionary All-Calculations
def dict_calc(num1, num2, entry):
add = str(float(num1)+float(num2))
sub = str(float(num1)-float(num2))
mul = str(float(num1)*float(num2))
div = str(float(num1)/float(num2))
calc_d = {"Entry": str(entry), "Add" : add, "Subtract" : sub, "Multiply" : mul, "Divide" : div}
return calc_d
# Yes or No
def y_n(answer):
if answer[:1] == 'y':
return True
if answer[:1] == 'n':
return False
# Empty List that will hold dictionaries.
data_table = []
entry = 0 # for tracking entry numbers
while True:
num1 = input("\n Enter first number: ")
num2 = input("\n Enter second number: ")
# Count entry and add it to dictionary list.
entry += 1
data_table.append(dict_calc(num1, num2, entry))
answer = input("\n Run Again? (Y/N) ").lower().strip()
if y_n(answer) == True:
continue
else:
print("\n Thank You and Goodbye")
# Write the complete list of dictionaries in one operation.
with open('dict_calc.json', 'w', encoding='utf-8') as f:
json.dump(data_table, f, ensure_ascii=True, indent=4)
break
Output:
[
{
"Entry": "1",
"Add": "3.0",
"Subtract": "-1.0",
"Multiply": "2.0",
"Divide": "0.5"
},
{
"Entry": "2",
"Add": "8.0",
"Subtract": "-1.0",
"Multiply": "15.75",
"Divide": "0.7777777777777778"
},
{
"Entry": "3",
"Add": "13.399999999999999",
"Subtract": "-2.2",
"Multiply": "43.68",
"Divide": "0.717948717948718"
}
]

A few things you might need to change:
you need to change data_table type to list.
you need to append dict_calc function result to it.
Add a counter
Here is your code:
import json
# Dictionary All-Calculations
def dict_calc(counter, num1, num2):
add = str(float(num1)+float(num2))
sub = str(float(num1)-float(num2))
mul = str(float(num1)*float(num2))
div = str(float(num1)/float(num2))
calc_d = {"Entry": str(counter), "Add" : add, "Subtract" : sub, "Multiply" : mul, "Divide" : div}
return calc_d
# Yes or No
def y_n(answer):
if answer[:1] == 'y':
return True
if answer[:1] == 'n':
return False
# Main Dictionary
data_table = []
counter = 1
while True:
num1 = input("\n Enter first number: ")
num2 = input("\n Enter second number: ")
data_table.append( dict_calc(counter, num1, num2))
counter += 1
with open('dict_calc.json', 'a', encoding='utf-8') as f:
json.dump(data_table, f, ensure_ascii=True, indent=4)
answer = input("\n Run Again? (Y/N) ").lower().strip()
if y_n(answer) == True:
continue
else:
print("\n Thank You and Goodbye")
break

GAE python27 return nested json

This seems such a simple task, yet it eludes me...
class ViewAllDogs(webapp2.RequestHandler):
""" Returns an array of json objects representing all dogs. """
def get(self):
query = Dog.query()
results = query.fetch(limit = MAX_DOGS) # 100
aList = []
for match in results:
aList.append({'id': match.id, 'name': match.name,
'owner': match.owner, arrival_date':match.arrival_date})
aList.append({'departure_history':{'departure_date': match.departure_date,
'departed_dog': match.departed_dog}})
self.response.headers['Content-Type'] = 'application/json'
self.response.write(json.dumps(aList))
The above, my best attempt to date, gets me:
[
{
"arrival_date": null,
"id": "a link to self",
"owner": 354773,
"name": "Rover"
},
{
"departure_history": {
"departed_dog": "Jake",
"departure_date": 04/24/2017
}
},
# json array of objects continues...
]
What I'm trying to get is the departure_history nested:
[
{
"id": "a link to self...",
"owner": 354773,
"name": "Rover",
"departure_history": {
"departed_dog": "Jake",
"departure_date": 04/24/2017
},
"arrival_date": 04/25/2017,
},
# json array of objects continues...
]
I've tried a bunch of different combinations, looked at json docs, python27 docs, no joy, and burned about way too many hours with this. I got this far with the many related SO posts on this topic. Thanks in advance.

You can simplify a little:
aList = []
for match in results:
aDog = {'id': match.id,
'name': match.name,
'owner': match.owner,
'arrival_date':match.arrival_date,
'departure_history': {
'departure_date': match.departure_date,
'departed_dog': match.departed_dog}
}
aList.append(aDog)

This seems a bit hackish, but it works. If you know a better way, by all means, let me know. Thanks.
class ViewAllDogs(webapp2.RequestHandler):
""" Returns an array of json objects representing all dogs. """
def get(self):
query = Dog.query()
results = query.fetch(limit = MAX_DOGS) # 100
aList = []
i = 0
for match in results:
aList.append({'id': match.id, 'name': match.name,
'owner': match.owner, arrival_date':match.arrival_date})
aList[i]['departure_history'] = ({'departure_history':{'departure_date': match.departure_date,
'departed_dog': match.departed_dog}})
i += 1
self.response.headers['Content-Type'] = 'application/json'
self.response.write(json.dumps(aList))

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Python- Issue parsing multi-layered API JSON into CSV - json

Related

Convert request.json to DataFrame

Pulling specific Parent/Child JSON data with Python

Splitting Json array values to k/v and sequencing

Formating adding nested dictionary to JSON file in specific format

GAE python27 return nested json

Categories

Resources