Add a # to beginning of each key in Json Python2.7 - json

I'm trying to add a "#" at the beginning to each key of a Json object (got it from RabbitMQ api calls)
here is my attempt :
#!/bin/python
# Libraries import
import requests
import json
import sys
import os
# Define URLs
overview="/api/overview"
nodes="/api/nodes"
queues="/api/queues"
# Get credentials from file
with open('/credentials') as json_file:
data = json.load(json_file)
user = data['user']
passwd = data['pass']
# Test which URL we want to call
if ''.join(sys.argv[1]) == "overview":
commande=overview
if ''.join(sys.argv[1]) == "queues":
commande=queues
if ''.join(sys.argv[1]) == "nodes":
commande=nodes
def append(mydict):
return dict(map(lambda (key, value): ("#"+str(key), value), mydict.items()))
def transform(multileveldict):
new = append(multileveldict)
for key, value in new.items():
if isinstance(value, dict):
new[key] = transform(value)
return new
def upper_keys(x):
if isinstance(x, list):
return [upper_keys(v) for v in x]
elif isinstance(x, dict):
return dict((k.upper(), upper_keys(v)) for k, v in x.iteritems())
else:
return x
# Main
response = requests.get("http://localhost:15672" + commande, auth=(user, passwd))
if(response.ok):
json_data = json.loads(response.content)
json = json.dumps(upper_keys(json_data), indent=4)
print(json)
Here is the JSON that I get in "response.content" :
[
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-01",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-02",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-03",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"MESSAGES_UNACKNOWLEDGED_RAM": 0,
"RECOVERABLE_SLAVES": null,
"CONSUMERS": 0,
"REDUCTIONS": 9700519,
"AUTO_DELETE": false,
"MESSAGE_BYTES_PAGED_OUT": 0,
"MESSAGE_BYTES_UNACKNOWLEDGED": 0,
"REDUCTIONS_DETAILS": {
"RATE": 0.0
},
"MESSAGE_BYTES": 0,
"MESSAGES_UNACKNOWLEDGED": 0,
"CONSUMER_UTILISATION": null,
"EXCLUSIVE": false,
"VHOST": "/",
"GARBAGE_COLLECTION": {
"MAX_HEAP_SIZE": 0,
"MIN_HEAP_SIZE": 233,
"FULLSWEEP_AFTER": 65535,
"MINOR_GCS": 15635,
"MIN_BIN_VHEAP_SIZE": 46422
},
"MESSAGES_DETAILS": {
"RATE": 0.0
},
"SLAVE_NODES": [
"rabbit#server567"
],
"MESSAGE_BYTES_PERSISTENT": 0,
"POLICY": "ha-all",
"MESSAGES_PAGED_OUT": 0,
"NODE": "rabbit#server566",
"HEAD_MESSAGE_TIMESTAMP": null,
"DURABLE": false,
"MESSAGES_READY_RAM": 0,
"STATE": "running",
"ARGUMENTS": {},
"EFFECTIVE_POLICY_DEFINITION": {
"HA-MODE": "all"
},
"MESSAGES_READY": 0,
"MESSAGES_RAM": 0,
"MESSAGE_BYTES_READY": 0,
"SINGLE_ACTIVE_CONSUMER_TAG": null,
"NAME": "test-04",
"MESSAGES_PERSISTENT": 0,
"BACKING_QUEUE_STATUS": {
"MIRROR_SENDERS": 0,
"Q1": 0,
"Q3": 0,
"Q2": 0,
"Q4": 0,
"AVG_ACK_EGRESS_RATE": 0.0,
"MIRROR_SEEN": 0,
"LEN": 0,
"TARGET_RAM_COUNT": "infinity",
"MODE": "default",
"NEXT_SEQ_ID": 0,
"DELTA": [
"delta",
"undefined",
0,
0,
"undefined"
],
"AVG_ACK_INGRESS_RATE": 0.0,
"AVG_EGRESS_RATE": 0.0,
"AVG_INGRESS_RATE": 0.0
},
"MESSAGES": 0,
"IDLE_SINCE": "2020-10-16 13:50:50",
"OPERATOR_POLICY": null,
"SYNCHRONISED_SLAVE_NODES": [
"rabbit#server567"
],
"MEMORY": 10556,
"EXCLUSIVE_CONSUMER_TAG": null,
"MESSAGES_READY_DETAILS": {
"RATE": 0.0
},
"TYPE": "classic",
"MESSAGES_UNACKNOWLEDGED_DETAILS": {
"RATE": 0.0
},
"MESSAGE_BYTES_RAM": 0
}
]
Here, I made every key in uppercase and can display it has JSON but can't find anything to add this "#" to the beginning of each key
PS : I'm new to Python development
Thank you very much

Since you mentioned that you have successfully converted every keys in a dictionary into upper case keys, why don't you reuse the method and change the part where you do upper case into prepending "#"
# the one you provided
def upper_keys(x):
if isinstance(x, list):
return [upper_keys(v) for v in x]
elif isinstance(x, dict):
return dict((k.upper(), upper_keys(v)) for k, v in x.iteritems())
else:
return x
# the modified method
def prepend_hash_keys(x):
if isinstance(x, list):
return [prepend_hash_keys(v) for v in x]
elif isinstance(x, dict):
# this part from k.upper() to "#" + k
return dict(("#" + k, prepend_hash_keys(v)) for k, v in x.iteritems())
else:
return x

Your transform function actually works fine (for Python 2), you just forgot to actually call it! Instead, you call only upper_keys, but not transform:
json = json.dumps(upper_keys(json_data), indent=4) # where's transform?
If you use both one after the other (order does not matter) it should work:
json = {"nested": {"dict": {"with": {"lowercase": "keys"}}}}
print(transform(upper_keys(json)))
# {'#NESTED': {'#DICT': {'#WITH': {'#LOWERCASE': 'keys'}}}}
However, both transform and upper_keys can be simplified a lot using dictionary comprehensions (also available in Python 2), and you can combine both in one function:
def transform_upper(d):
if isinstance(d, dict):
return {"#" + k.upper(): transform_upper(v) for k, v in d.items()}
else:
return d
print(transform_upper(json))
# {'#NESTED': {'#DICT': {'#WITH': {'#LOWERCASE': 'keys'}}}}

From the look of it you already tried something like that in append() function.
If you modify that a bit to have something like this, it may do what you are looking for:
mydict = {
'name':1,
'surname':2
}
def append(mydict):
new_dict = {}
for key, val in mydict.items():
new_dict['#'+key]=val
return new_dict
print(append(mydict))

Related

How to use json_normalize to create subcolumns in csv file

I need some help on using json_normalizer to create multiple subcolumns from main columns ? The below code is able to generate an output file but its puts everything in one column however i need something seperate columns with heading like moniker.config, moniker.type, moniker.key, moniker.keyparts for each row
from pandas.io.json import json_normalize
import os
import pandas as pd
def json_normalize_recursive(base_column, data, df=pd.DataFrame()):
if df.empty:
df = json_normalize(data, record_prefix=base_column+'.')
nested = df.select_dtypes(include='object')
for col in nested.columns:
try:
nested_df = json_normalize(nested[col].tolist())
nested_df.columns = [base_column+'.'+str(col)+'.'+str(c) for c in nested_df.columns]
df = pd.concat([df.drop(col, axis=1), nested_df], axis=1)
except ValueError:
pass
return df
data = {
"errors":[
],
"data":[
{
"moniker":{
"config":"fx.ipv.london.eod",
"type":"fx.spot",
"key":"EUR/CZK",
"keyParts":[
"EUR",
"CZK"
],
"configType":"fx.ipv.london.eod/fx.spot",
"live":True
},
"queryMoniker":{
"config":"fx.ipv.london.eod",
"type":"EUR/CZK",
"key":"EUR/CZK",
"tag":{
"owner":"official",
"type":"fx.spot",
"key":"EUR/CZK",
"tag":{
"owner":"official",
"date":13434324400999,
"cutoff":"London",
"name":"ipv",
"live":True
},
"keyParts":[
"EUR",
"CZK"
],
"configType":"fx.ipv.london.eod/fx.spot",
"live":False
},
"instance":{
"data":"<FxSpot Currency1=\"EUR\"Currency2=\"CZK\" bid=\"24.14\" ask=\"24.147\"/>",
"unmarshalled":True,
"marshalled":True,
"format":"fx/xml/1",
"valid":True,
"sequence":1643434234234,
"instanceMoniker":{
"source":"viper.tagcopy",
"config":"fx.london.official.copy",
"keyParts":[
"EUR",
"CZK"
]
}
}
}
}
]
}
df = json_normalize_recursive('', data)
print(df)
cwd = os.getcwd()
filepath = os.path.join(cwd, 'Desktop', 'output.csv')
df.to_csv(filepath, index=False)
Desired output:
try using df = pd.json_normalize(data,'data'['monier','queryMonier']
You can try:
data = {
"errors": [],
"data": [
{
"moniker": {
"config": "fx.ipv.london.eod",
"type": "fx.spot",
"key": "EUR/CZK",
"keyParts": ["EUR", "CZK"],
"configType": "fx.ipv.london.eod/fx.spot",
"live": True,
},
"queryMoniker": {
"config": "fx.ipv.london.eod",
"type": "EUR/CZK",
"key": "EUR/CZK",
"tag": {
"owner": "official",
"type": "fx.spot",
"key": "EUR/CZK",
"tag": {
"owner": "official",
"date": 13434324400999,
"cutoff": "London",
"name": "ipv",
"live": True,
},
"keyParts": ["EUR", "CZK"],
"configType": "fx.ipv.london.eod/fx.spot",
"live": False,
},
"instance": {
"data": '<FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/>',
"unmarshalled": True,
"marshalled": True,
"format": "fx/xml/1",
"valid": True,
"sequence": 1643434234234,
"instanceMoniker": {
"source": "viper.tagcopy",
"config": "fx.london.official.copy",
"keyParts": ["EUR", "CZK"],
},
},
},
}
],
}
df = pd.DataFrame(data['data'])
df = pd.concat([df, df.pop('moniker').apply(pd.Series).add_prefix('moniker.')], axis=1)
df = pd.concat([df, df.pop('queryMoniker').apply(pd.Series).add_prefix('queryMoniker.')], axis=1)
df = pd.concat([df, df.pop('queryMoniker.tag').apply(pd.Series).add_prefix('queryMoniker.tag.')], axis=1)
df = pd.concat([df, df.pop('queryMoniker.instance').apply(pd.Series).add_prefix('queryMoniker.instance.')], axis=1)
df = df.explode('moniker.keyParts')
print(df)
Prints:
moniker.config moniker.type moniker.key moniker.keyParts moniker.configType moniker.live queryMoniker.config queryMoniker.type queryMoniker.key queryMoniker.tag.owner queryMoniker.tag.type queryMoniker.tag.key queryMoniker.tag.tag queryMoniker.tag.keyParts queryMoniker.tag.configType queryMoniker.tag.live queryMoniker.instance.data queryMoniker.instance.unmarshalled queryMoniker.instance.marshalled queryMoniker.instance.format queryMoniker.instance.valid queryMoniker.instance.sequence queryMoniker.instance.instanceMoniker
0 fx.ipv.london.eod fx.spot EUR/CZK EUR fx.ipv.london.eod/fx.spot True fx.ipv.london.eod EUR/CZK EUR/CZK official fx.spot EUR/CZK {'owner': 'official', 'date': 13434324400999, 'cutoff': 'London', 'name': 'ipv', 'live': True} [EUR, CZK] fx.ipv.london.eod/fx.spot False <FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/> True True fx/xml/1 True 1643434234234 {'source': 'viper.tagcopy', 'config': 'fx.london.official.copy', 'keyParts': ['EUR', 'CZK']}
0 fx.ipv.london.eod fx.spot EUR/CZK CZK fx.ipv.london.eod/fx.spot True fx.ipv.london.eod EUR/CZK EUR/CZK official fx.spot EUR/CZK {'owner': 'official', 'date': 13434324400999, 'cutoff': 'London', 'name': 'ipv', 'live': True} [EUR, CZK] fx.ipv.london.eod/fx.spot False <FxSpot Currency1="EUR"Currency2="CZK" bid="24.14" ask="24.147"/> True True fx/xml/1 True 1643434234234 {'source': 'viper.tagcopy', 'config': 'fx.london.official.copy', 'keyParts': ['EUR', 'CZK']}

How to read from a JSON with two keys

I have a json that I need to import and then return a certain value. The json has two keys, like
{
"NUM_High_Objects": {
"abseta_pt": {
"field1:[0.0,0.9]": {
"field2:[15,20]": {
"tagIso": 0.00012,
"value": 0.99
},
"field2:[20,25]": {
"tagIso": 0.00035,
"value": 0.98
}
},
"field1:[0.91,1.2]": {
"field2:[15,20]": {
"tagIso": 0.00013,
"value": 0.991
},
"field2:[20,25]": {
"tagIso": 0.00036,
"value": 0.975
}
},
"binning": [
{
"binning": [
0.0,
0.9,
1.2,
2.1,
2.4
],
"variable": "abseta"
},
{
"binning": [
15,
20,
25,
30,
40,
50,
60,
120
],
"variable": "pt"
}
]
}
},
What I need is to search if a pair of values is within the range of "field1" and "field2" and return the corresponding "value"
I tried following this Search nested json / dict for multiple key values matching specified keys but could not make it to work...
I ve tried something like
class checkJSON() :
def __init__(self,filein) :
self.good, self.bad = 0, 0
print 'inside json function : will use the JSON', filein
input_file = open (filein)
self.json_array = json.load(input_file)
def checkJSON(self,LS,run) :
try :
LSlist = self.json_array[str(run)]
for LSrange in LSlist :print LSrange, run
except KeyError :
pass
self.bad += 1
return False
CJ=''
CJ=checkJSON(filein='test.json')
isInJSON = CJ.checkJSON("0.5", "20")
print isInJSON
but this does not work as I am not sure how to loop inside the keys
If I am understanding your question correctly then the relevant portion of your JSON is:
{
"field1:[0.0,0.9]": {
"field2:[15,20]": {
"tagIso": 0.00012,
"value": 0.99
},
"field2:[20,25]": {
"tagIso": 0.00035,
"value": 0.98
}
},
"field1:[0.91,1.2]": {
"field2:[15,20]": {
"tagIso": 0.00013,
"value": 0.991
},
"field2:[20,25]": {
"tagIso": 0.00036,
"value": 0.975
}
},
"binning": [
{
"binning": [
0.0,
0.9,
1.2,
2.1,
2.4
],
"variable": "abseta"
},
{
"binning": [
15,
20,
25,
30,
40,
50,
60,
120
],
"variable": "pt"
}
]
}
Then the following code should do what you are trying to achieve. It doesn't look like you need to search for nested keys, you simply need to parse your field1[...] and field2[...]. The code below is a quick implementation of what I understand you are trying to achieve. It will return the value if the first parameter is in the range of a field1[...] and the second parameter is in the range of a field2[...]. Otherwise, it will return None.
import json
def check_json(jsondict, l1val, l2val):
def parse_key(keystr):
level, lrange = keystr.split(':')
return level, eval(lrange)
for l1key, l2dict in jsondict.items():
if 'field' in l1key:
l1, l1range = parse_key(l1key)
if l1val >= l1range[0] and l1val <= l1range[1]:
for l2key, vals in l2dict.items():
l2, l2range = parse_key(l2key)
if l2val >= l2range[0] and l2val <= l2range[1]:
return vals['value']
return None
Here is a driver code to test the implementation above.
if __name__ == '__main__':
with open('data.json', 'r') as f:
myjson = json.load(f)
print(check_json(myjson, 0.5, 20))

Finding Values in Python json.loads Dictionary

I'm working with a REST API that returns data in the following format:
{
"id": "2902cbad6da44459ad05abd1305eed14",
"displayName": "",
"sourceHost": "dev01.test.lan",
"sourceIP": "192.168.145.1",
"messagesPerSecond": 0,
"messages": 2733,
"size": 292062,
"archiveSize": 0,
"dates": [
{
"date": 1624921200000,
"messages": 279,
"size": 29753,
"archiveSize": 0
},
{
"date": 1625007600000,
"messages": 401,
"size": 42902,
"archiveSize": 0
}
]
}
I'm using json.loads to successfully pull the data from the API, and I now need to search for a particular "date:" value and read the corresponding "messages", "size" and "archiveSize" values.
I'm trying to use the "if-in" method to find the value I'm interested in, for example:
response = requests.request("GET", apiQuery, headers=headers, data=payload)
json_response = json.loads(response.text)
test = 2733
if test in json_response.values():
print(f"Yes, value: '{test}' exist in dictionary")
else:
print(f"No, value: '{test}' does not exist in dictionary")
This works fine for any value in the top section of the JSON return, but it never finds any values in the "dates" sub-branches.
I have two questions, firstly, how do I find the target "date" value? Secondly, once I find that "sub-branch" what would be the best way to extract the three values I need?
Thanks.
from json import load
def list_dates_whose_message_count_equals(dates=None, message_count=0):
return list(filter(
lambda date: date.get("messages") == message_count, dates
))
def main():
json_ = {}
with open("values.json", "r") as fp:
json_ = load(fp)
print(list_dates_whose_message_count_equals(json_["dates"], message_count=279))
print(list_dates_whose_message_count_equals(json_["dates"], message_count=401))
if __name__ == "__main__":
main()
Returns this
[{'date': 1624921200000, 'messages': 279, 'size': 29753, 'archiveSize': 0}]
[{'date': 1625007600000, 'messages': 401, 'size': 42902, 'archiveSize': 0}]

Writing Nested JSON Dictionary List To CSV

Issue
I'm trying to write the following nested list of dictionary which has another list of dictionary to csv. I tried multiple ways but I can not get it to properly write it:
Json Data
[
{
"Basic_Information_Source": [
{
"Image": "image1.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277274
}
],
"Basic_Information_Destination": [
{
"Image": "image1_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277539
}
],
"Values": [
{
"Value1": 75.05045463635267,
"Value2": 0.006097560975609756,
"Value3": 0.045083481733371615,
"Value4": 0.008639858263904898
}
]
},
{
"Basic_Information_Source": [
{
"Image": "image2.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1786254
}
],
"Basic_Information_Destination": [
{
"Image": "image2_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1782197
}
],
"Values": [
{
"Value1": 85.52662890580055,
"Value2": 0.0005464352720450282,
"Value3": 0.013496113910369758,
"Value4": 0.003800236380811839
}
]
}
]
Working Code
I tried to use the following code and it works, but it only saved the headers and then dumps all the underlying list as text in the csv file:
import json
import csv
def Convert_CSV():
ar_enc_file = open('analysis_results_enc.json','r')
json_data = json.load(ar_enc_file)
keys = json_data[0].keys()
with open('test.csv', 'w', encoding='utf8', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(json_data)
ar_enc_file.close()
Convert_CSV()
Working Output / Issue with it
The output writes the following header:
Basic_Information_Source
Basic_Information_Destination
Values
And then it dumps all other data inside each header as a list like this:
[{'Image': 'image1.png', 'Image_Format': 'PNG', 'Image_Mode': 'RGB', 'Image_Width': 574, 'Image_Height': 262, 'Image_Size': 277274}]
Expected Output / Sample
Trying to generate the above type of output for each dictionary in the array of dictionaries.
How do it properly write it?
I'm sure someone will come by with a much more elegant solution. That being said:
You have a few problems.
You have inconsistent entries with the fields you want to align.
Even if you pad your data you have intermediate lists that need flattened out.
Then you still have separated data that needs to be merged together.
DictWriter AFAIK expects it's data in the format of [{'column': 'entry'},{'column': 'entry'} so even if you do all the previous steps you're still not in the right format.
So let's get started.
For the first two parts we can combine.
def pad_list(lst, size, padding=None):
# we wouldn't have to make a copy but I prefer to
# avoid the possibility of getting bitten by mutability
_lst = lst[:]
for _ in range(len(lst), size):
_lst.append(padding)
return _lst
# this expects already parsed json data
def flatten(json_data):
lst = []
for dct in json_data:
# here we're just setting a max size of all dict entries
# this is in case the shorter entry is in the first iteration
max_size = 0
# we initialize a dict for each of the list entries
# this is in case you have inconsistent lengths between lists
flattened = dict()
for k, v in dct.items():
entries = list(next(iter(v), dict()).values())
flattened[k] = entries
max_size = max(len(entries), max_size)
# here we append the padded version of the keys for the dict
lst.append({k: pad_list(v, max_size) for k, v in flattened.items()})
return lst
So now we have a flattened, list of dicts whos values are lists of consistent length. Essentially:
[
{
"Basic_Information_Source": [
"image1.png",
"PNG",
"RGB",
574,
262,
277274
],
"Basic_Information_Destination": [
"image1_dst.png",
"PNG",
"RGB",
574,
262,
277539
],
"Values": [
75.05045463635267,
0.006097560975609756,
0.045083481733371615,
0.008639858263904898,
None,
None
]
}
]
But this list has multiple dicts that need to be merged, not just one.
So we need to merge.
# this should be self explanatory
def merge(flattened):
merged = dict()
for dct in flattened:
for k, v in dct.items():
if k not in merged:
merged[k] = []
merged[k].extend(v)
return merged
This gives us something close to this:
{
"Basic_Information_Source": [
"image1.png",
"PNG",
"RGB",
574,
262,
277274,
"image2.png",
"PNG",
"RGB",
1600,
1066,
1786254
],
"Basic_Information_Destination": [
"image1_dst.png",
"PNG",
"RGB",
574,
262,
277539,
"image2_dst.png",
"PNG",
"RGB",
1600,
1066,
1782197
],
"Values": [
75.05045463635267,
0.006097560975609756,
0.045083481733371615,
0.008639858263904898,
None,
None,
85.52662890580055,
0.0005464352720450282,
0.013496113910369758,
0.003800236380811839,
None,
None
]
}
But wait, we still need to format it for the writer.
Our data needs to be in the format of [{'column_1': 'entry', column_2: 'entry'},{'column_1': 'entry', column_2: 'entry'}
So we format:
def format_for_writer(merged):
formatted = []
for k, v in merged.items():
for i, item in enumerate(v):
# on the first pass this will append an empty dict
# on subsequent passes it will be ignored
# and add keys into the existing dict
if i >= len(formatted):
formatted.append(dict())
formatted[i][k] = item
return formatted
So finally, we have a nice clean formatted data structure we can just hand to our writer function.
def convert_csv(formatted):
keys = formatted[0].keys()
with open('test.csv', 'w', encoding='utf8', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(formatted)
Full code with json string:
import json
import csv
json_raw = """\
[
{
"Basic_Information_Source": [
{
"Image": "image1.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277274
}
],
"Basic_Information_Destination": [
{
"Image": "image1_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 574,
"Image_Height": 262,
"Image_Size": 277539
}
],
"Values": [
{
"Value1": 75.05045463635267,
"Value2": 0.006097560975609756,
"Value3": 0.045083481733371615,
"Value4": 0.008639858263904898
}
]
},
{
"Basic_Information_Source": [
{
"Image": "image2.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1786254
}
],
"Basic_Information_Destination": [
{
"Image": "image2_dst.png",
"Image_Format": "PNG",
"Image_Mode": "RGB",
"Image_Width": 1600,
"Image_Height": 1066,
"Image_Size": 1782197
}
],
"Values": [
{
"Value1": 85.52662890580055,
"Value2": 0.0005464352720450282,
"Value3": 0.013496113910369758,
"Value4": 0.003800236380811839
}
]
}
]
"""
def pad_list(lst, size, padding=None):
_lst = lst[:]
for _ in range(len(lst), size):
_lst.append(padding)
return _lst
def flatten(json_data):
lst = []
for dct in json_data:
max_size = 0
flattened = dict()
for k, v in dct.items():
entries = list(next(iter(v), dict()).values())
flattened[k] = entries
max_size = max(len(entries), max_size)
lst.append({k: pad_list(v, max_size) for k, v in flattened.items()})
return lst
def merge(flattened):
merged = dict()
for dct in flattened:
for k, v in dct.items():
if k not in merged:
merged[k] = []
merged[k].extend(v)
return merged
def format_for_writer(merged):
formatted = []
for k, v in merged.items():
for i, item in enumerate(v):
if i >= len(formatted):
formatted.append(dict())
formatted[i][k] = item
return formatted
def convert_csv(formatted):
keys = formatted[0].keys()
with open('test.csv', 'w', encoding='utf8', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(formatted)
def main():
json_data = json.loads(json_raw)
flattened = flatten(json_data)
merged = merge(flattened)
formatted = format_for_writer(merged)
convert_csv(formatted)
if __name__ == '__main__':
main()

How to loop different types of nested JSON objects multiple times in the same message

Python noob here, again. I'm trying to create a python script to auto-generate a JSON with multiple item but records multiple times using a for loop to generate them, the JSON message is structured and cardinality are as follows:
messageHeader[1]
-item [1-*]
--itemAttributesA [0-1]
--itemAttributesB [0-1]
--itemAttributesC [0-1]
--itemLocaton [1]
--itemRelationships [0-1]
I've had some really good help before for looping through the same object but for one record for example just the itemRelationships record. However as soon as I try to create one message with many items (i.e. 5) and a single instance of an itemAttribute, itemLocation and itemRelationships it does not work as I keep getting a key error. I've tried to define what a keyError is in relation to what I am trying to do but cannot link what I am doing wrong to the examples else where.
Here's my code as it stands:
import json
import random
data = {'messageID': random.randint(0, 2147483647), 'messageType': 'messageType'}
data['item'] = list()
itemAttributeType = input("Please selct what type of Attribute item has, either 'A', 'B' or 'C' :")
for x in range(0, 5):
data['item'].append({
'itemId': "I",
'itemType': "T"})
if itemAttributeType == "A":
data['item'][0]['itemAttributesA']
data['item'][0]['itemAttributesA'].append({
'attributeA': "ITA"})
elif itemAttributeType == "B":
data['item'][0]['itemAttributesB']
data['item'][0]['itemAttributesB'].append({
'attributeC': "ITB"})
else:
data['item'][0]['itemAttributesC']
data['item'][0]['itemAttributesC'].append({
'attributeC': "ITC"})
pass
data['item'][0]['itemLocation'] = {
'itemDetail': "ITC"}
itemRelation = input("Does the item have a relation: ")
if itemRelation > '':
data['item'][0]['itemRelations'] = {
'itemDetail': "relation"}
else:
pass
print(json.dumps(data, indent=4))
I have tried also tried this code which gives me better results:
import json
import random
data = {'messageID': random.randint(0, 2147483647), 'messageType': 'messageType'}
data['item'] = list()
itemAttributeType = input("Please selct what type of Attribute item has, either 'A', 'B' or 'C' :")
for x in range(0, 5):
data['item'].append({
'itemId': "I",
'itemType': "T"})
if itemAttributeType == "A":
data['item'][0]['itemAttributesA'] = {
'attributeA': "ITA"}
elif itemAttributeType == "B":
data['item'][0]['itemAttributesB'] = {
'attributeB': "ITB"}
else:
data['item'][0]['itemAttributesC'] = {
'attributeC': "ITC"}
pass
data['item'][0]['itemLocation'] = {
'itemDetail': "ITC"}
itemRelation = input("Does the item have a relation: ")
if itemRelation > '':
data['item'][0]['itemRelations'] = {
'itemDetail': "relation"}
else:
pass
print(json.dumps(data, indent=4))
This actually gives me a result but gives me messageHeader, item, itemAttributeA, itemLocation, itemRelations, and then four items records at the end as follows:
{
"messageID": 1926708779,
"messageType": "messageType",
"item": [
{
"itemId": "I",
"itemType": "T",
"itemAttributesA": {
"itemLocationType": "ITA"
},
"itemLocation": {
"itemDetail": "location"
},
"itemRelations": {
"itemDetail": "relation"
}
},
{
"itemId": "I",
"itemType": "T"
},
{
"itemId": "I",
"itemType": "T"
},
{
"itemId": "I",
"itemType": "T"
},
{
"itemId": "I",
"itemType": "T"
}
]
}
What I am trying to achieve is this output:
{
"messageID": 2018369867,
"messageType": "messageType",
"item": [{
"itemId": "I",
"itemType": "T",
"itemAttributesA": {
"attributeA": "ITA"
},
"itemLocation": {
"itemDetail": "Location"
},
"itemRelation": [{
"itemDetail": "D"
}]
}, {
"item": [{
"itemId": "I",
"itemType": "T",
"itemAttributesB": {
"attributeA": "ITB"
},
"itemLocation": {
"itemDetail": "Location"
},
"itemRelation": [{
"itemDetail": "D"
}]
}, {
"item": [{
"itemId": "I",
"itemType": "T",
"itemAttributesC": {
"attributeA": "ITC"
},
"itemLocation": {
"itemDetail": "Location"
},
"itemRelation": [{
"itemDetail": "D"
}]
}, {
"item": [{
"itemId": "I",
"itemType": "T",
"itemAttributesA": {
"attributeA": "ITA"
},
"itemLocation": {
"itemDetail": "Location"
},
"itemRelation": [{
"itemDetail": "D"
}]
},
{
"item": [{
"itemId": "I",
"itemType": "T",
"itemAttributesB": {
"attributeA": "ITB"
},
"itemLocation": {
"itemDetail": "Location"
},
"itemRelation": [{
"itemDetail": "D"
}]
}]
}
]
}]
}]
}]
}
I've been at this for the best part of a whole day trying to get it to work, butchering away at code, where am I going wrong, any help would be greatly appreciated
Your close. I think the part your are missing is adding the dict to your current dict and indentation with your for loop.
import json
import random
data = {'messageID': random.randint(0, 2147483647), 'messageType': 'messageType'}
data['item'] = list()
itemAttributeType = input("Please selct what type of Attribute item has, either 'A', 'B' or 'C' :")
for x in range(0, 5):
data['item'].append({
'itemId': "I",
'itemType': "T"})
if itemAttributeType == "A":
# First you need to add `itemAttributesA` to your dict:
data['item'][x]['itemAttributesA'] = dict()
# You could also do data['item'][x] = {'itemAttributesA': = dict()}
data['item'][x]['itemAttributesA']['attributeA'] = "ITA"
elif itemAttributeType == "B":
data['item'][x]['itemAttributesB'] = dict()
data['item'][x]['itemAttributesB']['attributeC'] = "ITB"
else:
data['item'][x]['itemAttributesC'] = dict()
data['item'][x]['itemAttributesC']['attributeC'] = "ITC"
data['item'][x]['itemLocation'] = {'itemDetail': "ITC"}
itemRelation = input("Does the item have a relation: ")
if itemRelation > '':
data['item'][x]['itemRelations'] = {'itemDetail': "relation"}
else:
pass
print(json.dumps(data, indent=4))
This code can also be shortened considerably if your example is close to what you truly desire:
import json
import random
data = {'messageID': random.randint(0, 2147483647), 'messageType': 'messageType'}
data['item'] = list()
itemAttributeType = input("Please selct what type of Attribute item has, either 'A', 'B' or 'C' :")
for x in range(0, 5):
new_item = {
'itemId': "I",
'itemType': "T",
'itemAttributes' + str(itemAttributeType): {
'attribute' + str(itemAttributeType): "IT" + str(itemAttributeType)
},
'itemLocation': {'itemDetail': "ITC"}
}
itemRelation = input("Does the item have a relation: ")
if itemRelation > '':
new_item['itemRelations'] = {'itemDetail': itemRelation}
data['item'].append(new_item)
print(json.dumps(data, indent=4))
Another note: If you want messageID to be truly unique than you should probably look into a UUID; otherwise you may have message ids that match.
import uuid
unique_id = str(uuid.uuid4())
print(unique_id)