I am struggling to convert a json file to a csv file. Any help would be appreciated. I am using Python3
Code
import json
import urllib.request
url = 'https://api.coingecko.com/api/v3/coins/bitcoin/market_chart?vs_currency=usd&days=1&interval=daily&sparkline=false'
req = urllib.request.Request(url)
##parsing response
myfile=open("coingecko1.csv","w",encoding="utf8")
headers="Prices,MrkCap,TolVol \n"
myfile.write(headers)
r = urllib.request.urlopen(req).read()
cont = json.loads(r.decode('utf-8'))
print (cont)#Just to check json result
for market in cont:
prices =(cont["prices"])
market_caps = (cont["market_caps"])
total_volumes = (cont["total_volumes"])
content= prices+","+str(market_caps)+","+str(total_volumes)+" \n"
myfile.write(content)
print("job complete")
Python Result
{'prices': [[1629331200000, 45015.46554608543], [1629361933000, 44618.52978218442]], 'market_caps': [[1629331200000, 847143004614.999], [1629361933000, 837151985590.3453]], 'total_volumes': [[1629331200000, 34668999387.83819], [1629361933000, 33367392889.386738]]}
Traceback (most recent call last):
File "ma1.py", line 22, in <module>
content= prices+","+str(market_caps)+","+str(total_volumes)+" \n"
TypeError: can only concatenate list (not "str") to list
CSV Result
CSV Result
Thank You
Your JSON is nested which is list of lists. To read easily in CSV you must flatten it out
I've reformatted the code to dump to CSV. check below
import csv
import json
import urllib.request
url = 'https://api.coingecko.com/api/v3/coins/bitcoin/market_chart?vs_currency=usd&days=1&interval=daily&sparkline=false'
req = urllib.request.Request(url)
r = urllib.request.urlopen(req).read()
cont = json.loads(r.decode('utf-8'))
# flatten the JSON data to read csv easily
flatten_data = {}
for key in cont:
for value in cont[key]:
if value[0] not in flatten_data:
flatten_data[value[0]] = {}
flatten_data[value[0]].update({key: value[1]})
# write csv with DictWriter
with open('coingecko1.csv', 'w', encoding='utf-8') as csvfile:
headers = ['Item', 'Prices', 'MrkCap', 'TolVol']
writer = csv.DictWriter(csvfile, fieldnames=headers)
writer.writeheader()
for k, v in flatten_data.items():
v.update({'Item': k})
# renamed the columns as required
v['Prices'] = v.pop('prices')
v['MrkCap'] = v.pop('market_caps')
v['TolVol'] = v.pop('total_volumes')
writer.writerow(v)
print("job complete")
I am trying to convert a JSON file to CSV format using Python. I am using JSON.loads() function and then using json_normalize() to flatten the objects. I was wondering if there is better way of doing this.
this is the input file, one row form it:
{"ID": "02","Date": "2019-08-01","Total": 400,"QTY": 12,"Item": [{"NM": "0000000001","CD": "item_CD1","SRL": "25","Disc": [{"CD": "discount_CD1","Amount": 2}],"TxLns": {"TX": [{"TXNM": "000001-001","TXCD": "TX_CD1"}]}},{"NM": "0000000002","CD": "item_CD2","SRL": "26","Disc": [{"CD": "discount_CD2","Amount": 4}],"TxLns": {"TX": [{"TXNM": "000002-001","TXCD": "TX_CD2"}]}},{"NM": "0000000003","CD": "item_CD3","SRL": "27"}],"Cust": {"CustID": 10,"Email": "01#abc.com"},"Address": [{"FirstName": "firstname","LastName": "lastname","Address": "address"}]}
Code
import json
import pandas as pd
from pandas.io.json import json_normalize
data_final=pd.DataFrame()
with open("sample.json") as f:
for line in f:
json_obj = json.loads(line)
ID = json_obj['ID']
Item = json_obj['Item']
dataMain = json_normalize(json_obj)
dataMain=dataMain.drop(['Item','Address'], axis=1)
#dataMain.to_csv("main.csv",index=False)
dataItem = json_normalize(json_obj,'Item',['ID'])
dataItem=dataItem.drop(['Disc','TxLns.TX'],axis=1)
#dataItem.to_csv("Item.csv",index=False)
dataDisc = pd.DataFrame()
dataTx = pd.DataFrame()
for rt in Item:
NM=rt['NM']
rt['ID'] = ID
if 'Disc' in rt:
data = json_normalize(rt, 'Disc', ['NM','ID'])
dataDisc = dataDisc.append(data, sort=False)
if 'TxLns' in rt:
tx=rt['TxLns']
tx['NM'] = NM
tx['ID'] = ID
if 'TX' in tx:
data = json_normalize(tx, 'TX', ['NM','ID'])
dataTx = dataTx.append(data, sort=False)
dataDIS = pd.merge(dataItem, dataDisc, on=['NM','ID'],how='left')
dataTX = pd.merge(dataDIS, dataTx, on=['NM','ID'],how='left')
dataAddress = json_normalize(json_obj,'Address',['ID'])
data_IT = pd.merge(dataMain, dataTX, on=['ID'])
data_merge=pd.merge(data_IT,dataAddress, on=['ID'])
data_final=data_final.append(data_merge,sort=False)
data_final=data_final.drop_duplicates(keep = 'first')
data_final.to_csv("data_merged.csv",index=False)
this is the output:
ID,Date,Total,QTY,Cust.CustID,Cust.Email,NM,CD_x,SRL,CD_y,Amount,TXNM,TXCD,FirstName,LastName,Address
02,2019-08-01,400,12,10,01#abc.com,0000000001,item_CD1,25,discount_CD1,2.0,000001-001,TX_CD1,firstname,lastname,address
02,2019-08-01,400,12,10,01#abc.com,0000000002,item_CD2,26,discount_CD2,4.0,000002-001,TX_CD2,firstname,lastname,address
02,2019-08-01,400,12,10,01#abc.com,0000000003,item_CD3,27,,,,,firstname,lastname,address
The code is working fine for now. By Better I mean:
Is it efficient in terms of time and space complexity? If this code has to process around 10K records in a file, is this the optimized solution?
I am trying to create a json file which reads data from test.xlsx. My sample code is below.
Instead of "WO-12345" and other values, I want that to be read from the excel sheet, like I want it to be read from a particular cell in excel.
import xlrd
from collections import OrderedDict
import simplejson as json
import json
jsonfile = open('data1.json', 'w')
data_list = []
data = OrderedDict()
data['workOrder'] = "WO-12345"
data['alternateStart'] = "2018-01-13T10:00:00Z"
data['mobileNumber'] = "(555) 555-5555"
data['officeNumber'] = "(555) 555-5554"
data['description'] = "Testing"
data['equipment'] = "Testing"
data_list.append(data)
j = json.dumps(data_list)
json.dump(data, jsonfile, indent=3, sort_keys=False)
jsonfile.write('\n')
If you want to read an Excel there's pandas pandas.read_excel, it returns a pandas.DataFrame that has the to_json method.
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
this is my code covert json into csv.
import csv
import json
with open('Documents/SampleCSVStory.csv', 'r') as f:
reader = csv.reader(f, delimiter=';')
data_list = list()
for row in reader:
data_list.append(row)
data = [dict(zip(data_list[0],row)) for row in data_list]
data.pop(0)
s = json.dumps(data)
print (s)
but the output coming like this
[{"Id,Name,Description": "1,User 1,Python Developer"}
my expectation is
[{"Id:"1",Name:"User 1",Description:"Python Developer"}
can anyone helping me in this please.?
import csv
import json
with open('Documents/SampleCSVStory.csv', 'r') as f:
reader = csv.DictReader(f, delimiter=';')
json.dumps([row for row in reader])
How to create a json object/csv using below variables data:
out = [['core java'],['angular js']]
skills = 'Java'
Can someone please tell me how I can get a json object/csv as shown in expected output?
Expected Output:
Java
0 core java
1 angular js
try this example:
import json
data = {}
data['dynamic_col_name'] = 'dynamic_upcoming_value'
json_data = json.dumps(data)
Use csv module:
With list:
import csv
out = [['Java'], ['core java'],['angular js']]
with open('some.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(out)
With dict:
import csv
out = [['core javaaaa'],['angular js']]
skills = ['Java']
with open('names.csv', 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=skills)
writer.writeheader()
rows = [{skills[idx]: cell for idx, cell in enumerate(row)} for row in out]
writer.writerows(rows)