python how to convert json contains multiple arrays to pandas dataframe - json

hey im having trouble converting json to dataframe using pandas here is my solution
import json
import pandas as pd
f = open('write.json')
data = json.load(f)
df = pd.DataFrame.from_dict(data,orient = 'index').reset_index()
print(df)
and here is the json file
{"_id":"60b53d92ccb1483964da45f9","Avg_sm":[26.66953125,26.66953125,26.666666666666668,26.666666666666668,26.666666666666668,26.666666666666668,26.666666666666668,26.666666666666668,26.6647859922179,26.6647859922179,26.45263157894737,26.45263157894737],"Avg_st":[22.6517578125,22.6517578125,22.65204678362573,22.65204678362573,22.65204678362573,22.65204678362573,22.65204678362573,22.65204678362573,22.65272373540856,22.65272373540856,22.694567062818336,22.694567062818336],"SensorCoordinates":[10.33363276545083,36.8434191667489],"SensorIdentifier":["CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC"],"count":24,"date":["25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","26-06-2021","26-06-2021","26-06-2021","26-06-2021"],"min_sm":[21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1],"sensorId":["60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285"],"status":[true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true]}

IIUC:
you can try:
df=pd.json_normalize(data).apply(pd.Series.explode,ignore_index=True)
OR
df = pd.DataFrame.from_dict(data,orient = 'index').T.apply(pd.Series.explode,ignore_index=True)

Related

How to covert data to pandas dataframe [duplicate]

REST API call response in bytes, how to convert the data in bytes to dataframe object
import requests
import pandas as pd
from io import StringIO
url ='url_path with_ending_&format=csv'
response =requests.get(url, auth=(user,password), allow_redirects=True)
result = str((response.content, 'utf-8'))
data = StringIO(result)
df = pd.DataFrame(data)
I have solved the same issue with Pandas.read_csv();
result = str(response.content, 'utf-8')
data = StringIO(result)
df = pd.read_csv(data)

JSON list items to dataframe

I am using an API that has changed its spec and now my JSON feed is slightly different.
BEFORE:
{"code":2000,"message":"SUCCESS","data":
{"1":
{"id":1,
"name":"Amanda",
"score":"57.36%",
"average":"53.47%"
}
}
}
Then, I used something to this effect:
import json
import pandas as pd
jsonfile = 'file.json'
with open(jsonfile) as j:
data = json.load(j)
rows = [v for k, v in data["data"].items()]
df = pd.DataFrame(rows, columns=['id', 'name', 'score', 'average'])
Source AFTER:
{"status":"success","code":0,"data":
{"data":
[
{
"id":1,
"name":"Robert",
"score":"48.85%",
"average":"40.52%"
}
]
}
}
So I'm attempting to adjust using some of the resources:
Convert JSON list to pandas dataframe
JSON to pandas DataFrame
how to convert json data with list of list to dataframe using python pandas
What I've tried so far:
import json
import pandas as pd
from pandas import json_normalize
jsonfile = 'file.json'
with open(jsonfile) as j:
data = json.load(j)
df = json_normalize(data, ['data'])
I've also tried:
df = pd.DataFrame.from_records(data)
I get the following:
TypeError: {....} for path data. Must be list or null.
What am I missing here?

how to read large json file on s3 to dataframe using sagemaker

I tried using the code:
from sagemaker import get_execution_role
import pandas as pd
bucket = 'xxx'
data_key = 'TV.json'
data_location = 's3://{}/{}'.format(bucket, data_key)
textfilereader=pd.read_json(
data_location,lines=True,chunksize=1000)
dflist=[]
for df in textfilereader:
dfList.append(df)
df=pd.concat(dflist,sort=False)
error:sequence item 0: expected str instance, bytes found

Pandas dataframe extracting value from json, which returned from as content(JSON) from request,

Pandas dataframe extracting value from JSON, which returned from as content from request.
import pandas as pd
import pandas as pd
import json
import requests
import ast
from pandas.io.json import json_normalize
df['response'] = df.URL.apply(lambda u: requests.get(u).content)
df.head()
b'{"error":0,"short":"http:\\/\\/192.168.42.72\\/ECyKY"}'
b'{"error":0,"short":"http:\\/\\/192.168.42.72\\/IsMgE"}'
When we use Python without Pandas, we can just use:
resp = requests.get(u)
y=resp.json()
print(y)
print(y['short'])
to store the short value as "http://192.168.42.72/ECyKY"
spend hours trying to get it work with Pandas without luck, any hint?
Instead of using response.get.content directly use response.get.json then use Series.str.get to extract the value corresponding to key short from the dictionary and then assign it to new column short:
df['response'] = df['URL'].apply(lambda u: requests.get(u).json())
df['short'] = df['response'].str.get('short')
# print(df)
response short
0 {'error': 0, 'short': 'http://192.168.42.72/EC... http://192.168.42.72/ECyKY
1 {'error': 0, 'short': 'http://192.168.42.72/Is... http://192.168.42.72/IsMgE

I am having trouble converting my nested json into a dataframe. I am getting the json from an API and want it in a dataframe

This code is from Sportradar API. The API outputs the data as JSON or XML; below is my attempt at taking the JSON and making it into a dataframe.
import numpy as np
import pandas as pd
import http.client
import json
from pandas.io.json import json_normalize
#API Call including my key
conn = http.client.HTTPSConnection("api.sportradar.us")
conn.request("GET", "/nfl/official/trial/v5/en/players/0acdcd3b-5442-4311-a139-ae7c506faf88/profile.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
#conn.request("GET", "/nfl/official/trial/v5/en/games/b7aeb58f-7987-4202-bc41-3ad9a5b83fa4/pbp.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
#conn.request("GET", "/nfl/official/trial/v5/en/teams/0d855753-ea21-4953-89f9-0e20aff9eb73/full_roster.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
#conn.request("GET", "/nfl/official/trial/v5/en/games/030d37cf-b896-4f10-b16e-2a5120fef6cf/pbp.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
res = conn.getresponse()
data = res.read()
data_dec = data.decode("utf-8")
json_data = json.loads(data_dec)
flat_data = json_normalize(json_data)
print(json_data)
df = pd.DataFrame.from_records(flat_data)
df2 = pd.DataFrame.from_dict(json_data, orient='index')
df2.reset_index(level=0, inplace=True)
#The closest thing to a dataframe I can get
df.head()
Why not make use of a Python Wrapper that is publicly available and maintained.
See link.