hey im having trouble converting json to dataframe using pandas here is my solution
import json
import pandas as pd
f = open('write.json')
data = json.load(f)
df = pd.DataFrame.from_dict(data,orient = 'index').reset_index()
print(df)
and here is the json file
{"_id":"60b53d92ccb1483964da45f9","Avg_sm":[26.66953125,26.66953125,26.666666666666668,26.666666666666668,26.666666666666668,26.666666666666668,26.666666666666668,26.666666666666668,26.6647859922179,26.6647859922179,26.45263157894737,26.45263157894737],"Avg_st":[22.6517578125,22.6517578125,22.65204678362573,22.65204678362573,22.65204678362573,22.65204678362573,22.65204678362573,22.65204678362573,22.65272373540856,22.65272373540856,22.694567062818336,22.694567062818336],"SensorCoordinates":[10.33363276545083,36.8434191667489],"SensorIdentifier":["CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC","CCCCCCCCCCCCCCCC"],"count":24,"date":["25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","25-06-2021","26-06-2021","26-06-2021","26-06-2021","26-06-2021"],"min_sm":[21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1,21.1],"sensorId":["60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285","60b54789a21c170aecb25285"],"status":[true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true,true]}
IIUC:
you can try:
df=pd.json_normalize(data).apply(pd.Series.explode,ignore_index=True)
OR
df = pd.DataFrame.from_dict(data,orient = 'index').T.apply(pd.Series.explode,ignore_index=True)
Pandas dataframe extracting value from JSON, which returned from as content from request.
import pandas as pd
import pandas as pd
import json
import requests
import ast
from pandas.io.json import json_normalize
df['response'] = df.URL.apply(lambda u: requests.get(u).content)
df.head()
b'{"error":0,"short":"http:\\/\\/192.168.42.72\\/ECyKY"}'
b'{"error":0,"short":"http:\\/\\/192.168.42.72\\/IsMgE"}'
When we use Python without Pandas, we can just use:
resp = requests.get(u)
y=resp.json()
print(y)
print(y['short'])
to store the short value as "http://192.168.42.72/ECyKY"
spend hours trying to get it work with Pandas without luck, any hint?
Instead of using response.get.content directly use response.get.json then use Series.str.get to extract the value corresponding to key short from the dictionary and then assign it to new column short:
df['response'] = df['URL'].apply(lambda u: requests.get(u).json())
df['short'] = df['response'].str.get('short')
# print(df)
response short
0 {'error': 0, 'short': 'http://192.168.42.72/EC... http://192.168.42.72/ECyKY
1 {'error': 0, 'short': 'http://192.168.42.72/Is... http://192.168.42.72/IsMgE
This code is from Sportradar API. The API outputs the data as JSON or XML; below is my attempt at taking the JSON and making it into a dataframe.
import numpy as np
import pandas as pd
import http.client
import json
from pandas.io.json import json_normalize
#API Call including my key
conn = http.client.HTTPSConnection("api.sportradar.us")
conn.request("GET", "/nfl/official/trial/v5/en/players/0acdcd3b-5442-4311-a139-ae7c506faf88/profile.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
#conn.request("GET", "/nfl/official/trial/v5/en/games/b7aeb58f-7987-4202-bc41-3ad9a5b83fa4/pbp.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
#conn.request("GET", "/nfl/official/trial/v5/en/teams/0d855753-ea21-4953-89f9-0e20aff9eb73/full_roster.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
#conn.request("GET", "/nfl/official/trial/v5/en/games/030d37cf-b896-4f10-b16e-2a5120fef6cf/pbp.json?api_key=99s3ewmn5rrdrd9r3v5wrfgd")
res = conn.getresponse()
data = res.read()
data_dec = data.decode("utf-8")
json_data = json.loads(data_dec)
flat_data = json_normalize(json_data)
print(json_data)
df = pd.DataFrame.from_records(flat_data)
df2 = pd.DataFrame.from_dict(json_data, orient='index')
df2.reset_index(level=0, inplace=True)
#The closest thing to a dataframe I can get
df.head()
Why not make use of a Python Wrapper that is publicly available and maintained.
See link.
i have a code to parse JSON data through API using urllib as the following:
import pandas as pd
import json
import urllib.request
import os
import time
import csv
import datetime
# Send URL Request & Get JSON Data
with urllib.request.urlopen("https://bittrex.com/api/v1.1/public/getmarketsummaries") as url:
data = json.loads(url.read().decode())
# Select Data from result section
df = pd.DataFrame(data=data['result'])
# tickers = df['MarketName']
tickers= ["BTC-1ST", "BTC-2GIVE", "BTC-ABY", "BTC-ARDR", "BTC-WAVE"]
print(tickers)
for ticker in tickers:
with urllib.request.urlopen("https://bittrex.com/Api/v2.0/pub/market/GetTicks?marketName=" + ticker + "&tickInterval=thirtyMin") as URL:
data = json.loads(URL.read().decode())
df2 = pd.DataFrame(data=data['result'])
Market01 = "Market"
df2[Market01] = ticker
df2.to_csv('all.csv', encoding="utf-8-sig", index=False, mode='a', header=False)
print("done " + ticker)
actually it's not request for five currency only .. they are 295 request for 295 currency .. which take 5 minutes to complete all required data to csv file (very long time)
i wonder if there are a method to send all requests in parallel to decrease time with the same option to save data to csv file as dataframe
i searched many times and found multiprocessor module but couldn't found a sample similar to my case
any one can help me please!!!!!!
What about something like this?
import pandas as pd
import json
import urllib.request
import os
from urllib import parse
import csv
import datetime
from multiprocessing import Process, Pool
import time
# Send URL Request & Get JSON Data
with urllib.request.urlopen("https://bittrex.com/api/v1.1/public/getmarketsummaries") as url:
data = json.loads(url.read().decode())
# Select Data from result section
df = pd.DataFrame(data=data['result'])
# tickers = df['MarketName']
tickers= ["BTC-1ST", "BTC-2GIVE", "BTC-ABY", "BTC-ARDR", "BTC-WAVE"]
print(tickers)
def http_get(url):
result = {"url": url, "data": urllib.request.urlopen(url, timeout=5).read()}
return result
urls = [ "https://bittrex.com/Api/v2.0/pub/market/GetTicks?marketName=" + ticker + "&tickInterval=thirtyMin" for ticker in tickers ]
pool = Pool(processes=5)
results = pool.map(http_get, urls)
for result in results:
j = json.loads(result['data'].decode())
df2 = pd.DataFrame(data=j)
Market01 = "Market"
marketName = parse.parse_qs(parse.urlparse(result['url']).query)['marketName'][0]
df2[Market01] = marketName
df2.to_csv('all.csv', encoding="utf-8-sig", index=False, mode='a', header=False)
print("done " + marketName)