Writing Nested JSON Dictionary List To CSV - json

Issue
I'm trying to write the following nested list of dictionary which has another list of dictionary to csv. I tried multiple ways but I can not get it to properly write it:
Json Data
[
{
"Basic_Information_Source": [
{
"Image": "image1.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277274
}
],
"Basic_Information_Destination": [
{
"Image": "image1_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277539
}
],
"Values": [
{
"Value1": 75.05045463635267,
"Value2": 0.006097560975609756,
"Value3": 0.045083481733371615,
"Value4": 0.008639858263904898
}
]
},
{
"Basic_Information_Source": [
{
"Image": "image2.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1786254
}
],
"Basic_Information_Destination": [
{
"Image": "image2_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1782197
}
],
"Values": [
{
"Value1": 85.52662890580055,
"Value2": 0.0005464352720450282,
"Value3": 0.013496113910369758,
"Value4": 0.003800236380811839
}
]
}
]
Working Code
I tried to use the following code and it works, but it only saved the headers and then dumps all the underlying list as text in the csv file:
import json
import csv
def Convert_CSV():
ar_enc_file = open('analysis_results_enc.json','r')
json_data = json.load(ar_enc_file)
keys = json_data[0].keys()
with open('test.csv', 'w', encoding='utf8', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(json_data)
ar_enc_file.close()
Convert_CSV()
Working Output / Issue with it
The output writes the following header:
Basic_Information_Source
Basic_Information_Destination
Values
And then it dumps all other data inside each header as a list like this:
[{'Image': 'image1.png', 'Image_Format': 'PNG', 'Image_Mode': 'RGB', 'Image_Width': 574, 'Image_Height': 262, 'Image_Size': 277274}]
Expected Output / Sample
Trying to generate the above type of output for each dictionary in the array of dictionaries.
How do it properly write it?

I'm sure someone will come by with a much more elegant solution. That being said:
You have a few problems.
You have inconsistent entries with the fields you want to align.
Even if you pad your data you have intermediate lists that need flattened out.
Then you still have separated data that needs to be merged together.
DictWriter AFAIK expects it's data in the format of [{'column': 'entry'},{'column': 'entry'} so even if you do all the previous steps you're still not in the right format.
So let's get started.
For the first two parts we can combine.
def pad_list(lst, size, padding=None):
# we wouldn't have to make a copy but I prefer to
# avoid the possibility of getting bitten by mutability
_lst = lst[:]
for _ in range(len(lst), size):
_lst.append(padding)
return _lst
# this expects already parsed json data
def flatten(json_data):
lst = []
for dct in json_data:
# here we're just setting a max size of all dict entries
# this is in case the shorter entry is in the first iteration
max_size = 0
# we initialize a dict for each of the list entries
# this is in case you have inconsistent lengths between lists
flattened = dict()
for k, v in dct.items():
entries = list(next(iter(v), dict()).values())
flattened[k] = entries
max_size = max(len(entries), max_size)
# here we append the padded version of the keys for the dict
lst.append({k: pad_list(v, max_size) for k, v in flattened.items()})
return lst
So now we have a flattened, list of dicts whos values are lists of consistent length. Essentially:
[
{
"Basic_Information_Source": [
"image1.png",
"PNG",
"RGB",
574,
262,
277274
],
"Basic_Information_Destination": [
"image1_dst.png",
"PNG",
"RGB",
574,
262,
277539
],
"Values": [
75.05045463635267,
0.006097560975609756,
0.045083481733371615,
0.008639858263904898,
None,
None
]
}
]
But this list has multiple dicts that need to be merged, not just one.
So we need to merge.
# this should be self explanatory
def merge(flattened):
merged = dict()
for dct in flattened:
for k, v in dct.items():
if k not in merged:
merged[k] = []
merged[k].extend(v)
return merged
This gives us something close to this:
{
"Basic_Information_Source": [
"image1.png",
"PNG",
"RGB",
574,
262,
277274,
"image2.png",
"PNG",
"RGB",
1600,
1066,
1786254
],
"Basic_Information_Destination": [
"image1_dst.png",
"PNG",
"RGB",
574,
262,
277539,
"image2_dst.png",
"PNG",
"RGB",
1600,
1066,
1782197
],
"Values": [
75.05045463635267,
0.006097560975609756,
0.045083481733371615,
0.008639858263904898,
None,
None,
85.52662890580055,
0.0005464352720450282,
0.013496113910369758,
0.003800236380811839,
None,
None
]
}
But wait, we still need to format it for the writer.
Our data needs to be in the format of [{'column_1': 'entry', column_2: 'entry'},{'column_1': 'entry', column_2: 'entry'}
So we format:
def format_for_writer(merged):
formatted = []
for k, v in merged.items():
for i, item in enumerate(v):
# on the first pass this will append an empty dict
# on subsequent passes it will be ignored
# and add keys into the existing dict
if i >= len(formatted):
formatted.append(dict())
formatted[i][k] = item
return formatted
So finally, we have a nice clean formatted data structure we can just hand to our writer function.
def convert_csv(formatted):
keys = formatted[0].keys()
with open('test.csv', 'w', encoding='utf8', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(formatted)
Full code with json string:
import json
import csv
json_raw = """\
[
{
"Basic_Information_Source": [
{
"Image": "image1.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277274
}
],
"Basic_Information_Destination": [
{
"Image": "image1_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277539
}
],
"Values": [
{
"Value1": 75.05045463635267,
"Value2": 0.006097560975609756,
"Value3": 0.045083481733371615,
"Value4": 0.008639858263904898
}
]
},
{
"Basic_Information_Source": [
{
"Image": "image2.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1786254
}
],
"Basic_Information_Destination": [
{
"Image": "image2_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1782197
}
],
"Values": [
{
"Value1": 85.52662890580055,
"Value2": 0.0005464352720450282,
"Value3": 0.013496113910369758,
"Value4": 0.003800236380811839
}
]
}
]
"""
def pad_list(lst, size, padding=None):
_lst = lst[:]
for _ in range(len(lst), size):
_lst.append(padding)
return _lst
def flatten(json_data):
lst = []
for dct in json_data:
max_size = 0
flattened = dict()
for k, v in dct.items():
entries = list(next(iter(v), dict()).values())
flattened[k] = entries
max_size = max(len(entries), max_size)
lst.append({k: pad_list(v, max_size) for k, v in flattened.items()})
return lst
def merge(flattened):
merged = dict()
for dct in flattened:
for k, v in dct.items():
if k not in merged:
merged[k] = []
merged[k].extend(v)
return merged
def format_for_writer(merged):
formatted = []
for k, v in merged.items():
for i, item in enumerate(v):
if i >= len(formatted):
formatted.append(dict())
formatted[i][k] = item
return formatted
def convert_csv(formatted):
keys = formatted[0].keys()
with open('test.csv', 'w', encoding='utf8', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(formatted)
def main():
json_data = json.loads(json_raw)
flattened = flatten(json_data)
merged = merge(flattened)
formatted = format_for_writer(merged)
convert_csv(formatted)
if __name__ == '__main__':
main()

Related

pandas column to list for a json file

from a Dataframe, I want to have a JSON output file with one key having a list:
Expected output:
[
{
"model": "xx",
"id": 1,
"name": "xyz",
"categories": [1,2],
},
{
...
},
]
What I have:
[
{
"model": "xx",
"id": 1,
"name": "xyz",
"categories": "1,2",
},
{
...
},
]
The actual code is :
df = pd.read_excel('data_threated.xlsx')
result = df.reset_index(drop=True).to_json("output_json.json", orient='records')
parsed = json.dumps(result)
jsonfile = open("output_json.json", 'r')
data = json.load(jsonfile)
How can I achive this easily?
EDIT:
print(df['categories'].unique().tolist())
['1,2,3', 1, nan, '1,2,3,6', 9, 8, 11, 4, 5, 2, '1,2,3,4,5,6,7,8,9']
You can use:
df = pd.read_excel('data_threated.xlsx').reset_index(drop=True)
df['categories'] = df['categories'].apply(lambda x: [int(i) for i in x.split(',')] if isinstance(x, str) else '')
df.to_json('output.json', orient='records', indent=4)
Content of output.json
[
{
"model":"xx",
"id":1,
"name":"xyz",
"categories":[
1,
2
]
}
]
Note you can also use:
df['categories'] = pd.eval(df['categories'])

JSON to CSV - Go 4 level deep

I would like to extract only a small fraction of my JSON response in a .csv file. However, I need to go to 4 levels deep and I am currently only able to go to 3 level deep. My goal is to have a .csv with 3 columns campaign_id, campaign_name, cost_per_click and 3 lines for each of my campaigns.
Original JSON
{
"318429215527453": {
"conversion_events": {
"data": [
{
"id": "djdfhdf",
"name": "Total",
"cost": 328.14,
"metrics_breakdown": {
"data": [
{
"campaign_id": 2364,
"campaign_name": "uk",
"cost_per_click": 1345
},
{
"campaign_id": 7483,
"campaign_name": "fr",
"cost_per_click": 756
},
{
"campaign_id": 8374,
"campaign_name": "spain",
"cost_per_click": 545
},
{
"campaign_id": 2431,
"campaign_name": "ge",
"cost_per_click": 321
}
],
"paging": {
"cursors": {
"after": "MjUZD"
},
"next": "https://graph.facebook.com/v9.0/xxxx"
}
}
}
],
"summary": {
"count": 1,
"metric_date_range": {
"date_range": {
"begin_date": "2021-01-09T00:00:00+0100",
"end_date": "2021-02-08T00:00:00+0100",
"time_zone": "Europe/Paris"
},
"prior_period_date_range": {
"begin_date": "2020-12-10T00:00:00+0100",
"end_date": "2021-01-09T00:00:00+0100"
}
}
}
},
"id": "xxx"
}
}
reformated.py
import json
with open('campaigns.json') as json_file:
data = json.load(json_file)
reformated_json = data['318429215527453']['conversion_events']['data']
with open('data.json', 'w') as outfile:
json.dump(reformated_json, outfile)
I tried to add ['metrics_breakdown'] or another ['data'] at the end of reformated_json but I am getting TypeError: list indices must be integers or slices, not str.
{
"id": "djdfhdf",
"name": "Total",
"cost": 328.14,
"metrics_breakdown": {
"data": [
{
"campaign_id": 2364,
"campaign_name": "uk",
"cost_per_click": 1345,
},
{
"campaign_id": 7483,
"campaign_name": "fr",
"cost_per_click": 756,
},
{
"campaign_id": 8374,
"campaign_name": "spain",
"cost_per_click": 545,
},
{
"campaign_id": 2431,
"campaign_name": "ge",
"cost_per_click": 321,
},
],
"paging": {
"cursors": {
"after": "MjUZD"
},
"next": "https://graph.facebook.com/v9.0/xxxx"
}
}
}
]
import csv
import json
from typing import Dict, List, Union # typing for easy development
# read json function
def read_json(json_path: str) -> Union[Dict, List]:
with open(json_path, 'r') as file_io:
return json.load(file_io)
# write csv function
def write_csv(data: List[Dict], csv_path: str) -> None:
with open(csv_path, 'w') as file:
fieldnames = set().union(*data)
writer = csv.DictWriter(file, fieldnames=fieldnames,
lineterminator='\n')
writer.writeheader()
writer.writerows(data)
# parse campaigns using a comprehension
def parse_campaigns(data: Dict) -> List[Dict]:
return [row
for value in data.values() # first level (conversion events)
for root_data in value['conversion_events']['data'] # conversion events/data
for row in root_data['metrics_breakdown']['data']] # data/metrics_breakdown/data
json_data = read_json('./campaigns.json')
campaign_data = parse_campaigns(json_data)
write_csv(campaign_data, 'campaigns.csv')
campaigns.csv (I copied the data to multiple root dictionary objects):
cost_per_click,campaign_id,campaign_name
1345,2364,uk
756,7483,fr
545,8374,spain
321,2431,ge
1345,2364,uk
756,7483,fr
545,8374,spain
321,2431,ge
The first data subkey contains a single-element list. Dereference with [0] to get the element, then fetch the next layers of keys. Then a DictWriter can be used to write the CSV lines:
import json
import csv
with open('campaigns.json') as json_file:
data = json.load(json_file)
items = data['318429215527453']['conversion_events']['data'][0]['metrics_breakdown']['data']
with open('data.csv', 'w', newline='') as outfile:
w = csv.DictWriter(outfile,fieldnames=items[0].keys())
w.writeheader()
w.writerows(items)
Output:
campaign_id,campaign_name,cost_per_click
2364,uk,1345
7483,fr,756
8374,spain,545
2431,ge,321

Add a # to beginning of each key in Json Python2.7

I'm trying to add a "#" at the beginning to each key of a Json object (got it from RabbitMQ api calls)
here is my attempt :
#!/bin/python
# Libraries import
import requests
import json
import sys
import os
# Define URLs
overview="/api/overview"
nodes="/api/nodes"
queues="/api/queues"
# Get credentials from file
with open('/credentials') as json_file:
data = json.load(json_file)
user = data['user']
passwd = data['pass']
# Test which URL we want to call
if ''.join(sys.argv[1]) == "overview":
commande=overview
if ''.join(sys.argv[1]) == "queues":
commande=queues
if ''.join(sys.argv[1]) == "nodes":
commande=nodes
def append(mydict):
return dict(map(lambda (key, value): ("#"+str(key), value), mydict.items()))
def transform(multileveldict):
new = append(multileveldict)
for key, value in new.items():
if isinstance(value, dict):
new[key] = transform(value)
return new
def upper_keys(x):
if isinstance(x, list):
return [upper_keys(v) for v in x]
elif isinstance(x, dict):
return dict((k.upper(), upper_keys(v)) for k, v in x.iteritems())
else:
return x
# Main
response = requests.get("http://localhost:15672" + commande, auth=(user, passwd))
if(response.ok):
json_data = json.loads(response.content)
json = json.dumps(upper_keys(json_data), indent=4)
print(json)
Here is the JSON that I get in "response.content" :
[
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-01",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-02",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-03",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"MESSAGES_UNACKNOWLEDGED_RAM": 0,
"RECOVERABLE_SLAVES": null,
"CONSUMERS": 0,
"REDUCTIONS": 9700519,
"AUTO_DELETE": false,
"MESSAGE_BYTES_PAGED_OUT": 0,
"MESSAGE_BYTES_UNACKNOWLEDGED": 0,
"REDUCTIONS_DETAILS": {
"RATE": 0.0
},
"MESSAGE_BYTES": 0,
"MESSAGES_UNACKNOWLEDGED": 0,
"CONSUMER_UTILISATION": null,
"EXCLUSIVE": false,
"VHOST": "/",
"GARBAGE_COLLECTION": {
"MAX_HEAP_SIZE": 0,
"MIN_HEAP_SIZE": 233,
"FULLSWEEP_AFTER": 65535,
"MINOR_GCS": 15635,
"MIN_BIN_VHEAP_SIZE": 46422
},
"MESSAGES_DETAILS": {
"RATE": 0.0
},
"SLAVE_NODES": [
"rabbit#server567"
],
"MESSAGE_BYTES_PERSISTENT": 0,
"POLICY": "ha-all",
"MESSAGES_PAGED_OUT": 0,
"NODE": "rabbit#server566",
"HEAD_MESSAGE_TIMESTAMP": null,
"DURABLE": false,
"MESSAGES_READY_RAM": 0,
"STATE": "running",
"ARGUMENTS": {},
"EFFECTIVE_POLICY_DEFINITION": {
"HA-MODE": "all"
},
"MESSAGES_READY": 0,
"MESSAGES_RAM": 0,
"MESSAGE_BYTES_READY": 0,
"SINGLE_ACTIVE_CONSUMER_TAG": null,
"NAME": "test-04",
"MESSAGES_PERSISTENT": 0,
"BACKING_QUEUE_STATUS": {
"MIRROR_SENDERS": 0,
"Q1": 0,
"Q3": 0,
"Q2": 0,
"Q4": 0,
"AVG_ACK_EGRESS_RATE": 0.0,
"MIRROR_SEEN": 0,
"LEN": 0,
"TARGET_RAM_COUNT": "infinity",
"MODE": "default",
"NEXT_SEQ_ID": 0,
"DELTA": [
"delta",
"undefined",
0,
0,
"undefined"
],
"AVG_ACK_INGRESS_RATE": 0.0,
"AVG_EGRESS_RATE": 0.0,
"AVG_INGRESS_RATE": 0.0
},
"MESSAGES": 0,
"IDLE_SINCE": "2020-10-16 13:50:50",
"OPERATOR_POLICY": null,
"SYNCHRONISED_SLAVE_NODES": [
"rabbit#server567"
],
"MEMORY": 10556,
"EXCLUSIVE_CONSUMER_TAG": null,
"MESSAGES_READY_DETAILS": {
"RATE": 0.0
},
"TYPE": "classic",
"MESSAGES_UNACKNOWLEDGED_DETAILS": {
"RATE": 0.0
},
"MESSAGE_BYTES_RAM": 0
}
]
Here, I made every key in uppercase and can display it has JSON but can't find anything to add this "#" to the beginning of each key
PS : I'm new to Python development
Thank you very much
Since you mentioned that you have successfully converted every keys in a dictionary into upper case keys, why don't you reuse the method and change the part where you do upper case into prepending "#"
# the one you provided
def upper_keys(x):
if isinstance(x, list):
return [upper_keys(v) for v in x]
elif isinstance(x, dict):
return dict((k.upper(), upper_keys(v)) for k, v in x.iteritems())
else:
return x
# the modified method
def prepend_hash_keys(x):
if isinstance(x, list):
return [prepend_hash_keys(v) for v in x]
elif isinstance(x, dict):
# this part from k.upper() to "#" + k
return dict(("#" + k, prepend_hash_keys(v)) for k, v in x.iteritems())
else:
return x
Your transform function actually works fine (for Python 2), you just forgot to actually call it! Instead, you call only upper_keys, but not transform:
json = json.dumps(upper_keys(json_data), indent=4) # where's transform?
If you use both one after the other (order does not matter) it should work:
json = {"nested": {"dict": {"with": {"lowercase": "keys"}}}}
print(transform(upper_keys(json)))
# {'#NESTED': {'#DICT': {'#WITH': {'#LOWERCASE': 'keys'}}}}
However, both transform and upper_keys can be simplified a lot using dictionary comprehensions (also available in Python 2), and you can combine both in one function:
def transform_upper(d):
if isinstance(d, dict):
return {"#" + k.upper(): transform_upper(v) for k, v in d.items()}
else:
return d
print(transform_upper(json))
# {'#NESTED': {'#DICT': {'#WITH': {'#LOWERCASE': 'keys'}}}}
From the look of it you already tried something like that in append() function.
If you modify that a bit to have something like this, it may do what you are looking for:
mydict = {
'name':1,
'surname':2
}
def append(mydict):
new_dict = {}
for key, val in mydict.items():
new_dict['#'+key]=val
return new_dict
print(append(mydict))

Python: Combine multiple lists into one JSON array

I want to merge several lists into one JSON array.
These are my two lists:
address = ['address1','address2']
temp = ['temp1','temp2']
I combine both lists by the following call and create a JSON .
new_list = list(map(list, zip(address, temp)))
jsonify({
'data': new_list
})
This is my result for the call:
{
"data": [
[
"address1",
"temp1"
],
[
"address2",
"temp2"
]
]
}
However, I would like to receive the following issue. How do I do that and how can I insert the identifier address and hello.
{
"data": [
{
"address": "address1",
"temp": "temp1"
},
{
"address": "address2",
"temp": "temp2"
}
]
}
You can use a list-comprehension:
import json
address = ['address1','address2']
temp = ['temp1','temp2']
d = {'data': [{'address': a, 'temp': t} for a, t in zip(address, temp)]}
print( json.dumps(d, indent=4) )
Prints:
{
"data": [
{
"address": "address1",
"temp": "temp1"
},
{
"address": "address2",
"temp": "temp2"
}
]
}
You can just change your existing code like this. That lambda function will do the trick of converting it into a dict.
address = ['address1','address2']
temp = ['temp1','temp2']
new_list = list(map(lambda x : {'address': x[0], 'temp': x[1]}, zip(address, temp)))
jsonify({
'data': new_list
})

JSON to CSV conversion Linux terminal

I have the following example.json. How can I parse it to csv in order to get the mean value (between ** mean_value **).
I want something like in example.csv:
305152,277504,320512
[
{
"name": "stats",
"columns": [
"time",
"mean"
],
"points": [
[
1444038496000,
**305152**
],
[
1444038494000,
**277504**
],
[
1444038492000,
**320512**
]
]
}
]
In python it looks like this
import json
results = []
with open('example.json', 'r') as f:
content = json.loads(f.read())
for element in content:
results.append(','.join([str(y[1]) for y in element['points']]))
with open('example.csv', 'w') as f:
f.write('\n'.join(results))