Objective: Match key and Override values in json element values on two json files
Input:
Source json file looks like:
{"CLIENT_CODE":"client-d",
"ARM_SUBSCRIPTION_ID":"abced-edfgh-dk6dkk-97dke",
"location":"easteurope"}
Target json file looks like:
{"CLIENT_CODE":"dummy",
"ARM_SUBSCRIPTION_ID":"dummy",
"prefix":"orp",
"location":"westeurope",
"address_space":"10.0.0.0/16",
"aad_login_name":"abcd.onmicrosoft.com",
"aad_login_object_id":"dummy",
"aad_login_tenant_id":"dummy",
"bastion_allocation_method":"Static",
"bastion_sku_type":"premium",
"kv_sku_name":"premium",
"storage_account_tier":"Standard",
"storage_account_replication_type":"LRS",
"storage_account_kind":"StorageV2",
"sql_pool_sku_name":"DW100C",
"node_size_family":"MemoryOptimized"}
Output Expected:
{"CLIENT_CODE":"client-d",
"ARM_SUBSCRIPTION_ID":"abced-edfgh-dk6dkk-97dke",
"prefix":"orp",
"location":"easteurope",
"address_space":"10.0.0.0/16",
"aad_login_name":"abcd.onmicrosoft.com",
"aad_login_object_id":"dummy",
"aad_login_tenant_id":"dummy",
"bastion_allocation_method":"Static",
"bastion_sku_type":"premium",
"kv_sku_name":"premium",
"storage_account_tier":"Standard",
"storage_account_replication_type":"LRS",
"storage_account_kind":"StorageV2"}
What I tried:
import json
with open("D:\ABTest\source.json", encoding='utf-8') as f:
dataset1 = json.loads(f.read())
#print(dataset1)
with open("D:\ABTest\\target.json", encoding='utf-8') as f:
dataset2 = json.loads(f.read())
#print(dataset2)
if dataset1.keys() == dataset2.keys():
dataset2.update(dataset1)
print(dataset2)
But I am not getting any output
Update1 : Now I am able to write it in 3rd file. But not able to update same 2nd file which is target.json
import json
with open("D:\ABTest\source.json", encoding='utf-8') as f:
d1 = json.loads(f.read())
with open("D:\ABTest\\target.json", encoding='utf-8') as f:
d2 = json.loads(f.read())
for key in d1.keys():
if key in d2.keys():
d2[key] = d1[key]
print(d2)
with open('D:\ABTest\combined.json', 'w', ) as f:
json.dump(d2, f, ensure_ascii=False, indent=4)
Update 2:
I made it work. Updated working code in answer section.
Ok, I have now have following worked. It can help someone looking at similar issue
import json
# input file for d1
with open("D:\ABTest\source.json", encoding='utf-8') as f:
d1 = json.load(f)
# input file for d2
with open("D:\ABTest\\target.json", encoding='utf-8') as f:
d2 = json.loads(f)
# output file
with open('D:\ABTest\\target.json', 'w', ) as f:
# update values in d2 with values from d1
for key in d2:
try:
# raise an KeyError if d1 doesn't have the key
d2[key] = d1[key]
except KeyError:
pass
json.dump(d2, f, ensure_ascii=False, indent=4)
print(d2)
Eliminate the if statement and replace it with a try...except... block make the code more pythonic and performant.
Related
How can my python code change all json values to 0.
Code
str(ctx.author.id): score
This is the result I want:
str(ctx.author.id): 4
str(ctx.author.id): 3
str(ctx.author.id) is It can change at any time.
str(ctx.author.id) is Consists of 18 digits
You want all JSON values to be 0? then why are you giving 3, 4 in example
Anyways here is how you will do it
import json
with open("filename.json") as f:
data = json.load(f)
for i in data:
data[i] = 0
with open("filename.json", "w") as f:
json.dump(data, f, indent=4)
So I'm making a command that is !command
When you type that in a chat it will update the member(s) score in the database.
The database will look something like this
{
"person": "epikUbuntu"
"score": "22"
}
How would I go on doing that?
edit:
If I wasn't clear I meant ho would I go on doing the python part of it?
JSON objects in python work like dictionaries.
You can write a new dictionary and save it:
data = {"foo": "bar"}
with open("file.json", "w+") as fp:
json.dump(data, fp, sort_keys=True, indent=4) # kwargs for beautification
And you can load data from a file (this will be for updating the file):
with open("file.json", "r") as fp:
data = json.load(fp) # loading json contents into data variable - this will be a dict
data["foo"] = "baz" # updating values
data["bar"] = "foo" # writing new values
with open("file.json", "w+") as fp:
json.dump(data, fp, sort_keys=True, indent=4)
Discord.py example:
import os # for checking the file exists
def add_score(member: discord.Member, amount: int):
if os.path.isfile("file.json"):
with open("file.json", "r") as fp:
data = json.load(fp)
try:
data[f"{member.id}"]["score"] += amount
except KeyError: # if the user isn't in the file, do the following
data[f"{member.id}"] = {"score": amount} # add other things you want to store
else:
data = {f"{member.id}": {"score": amount}}
# saving the file outside of the if statements saves us having to write it twice
with open("file.json", "w+") as fp:
json.dump(data, fp, sort_keys=True, indent=4) # kwargs for beautification
# you can also return the new/updated score here if you want
def get_score(member: discord.Member):
with open("file.json", "r") as fp:
data = json.load(fp)
return data[f"{member.id}"]["score"]
#bot.command()
async def cmd(ctx):
# some code here
add_score(ctx.author, 10)
# 10 is just an example
# you can use the random module if you want - random.randint(x, y)
await ctx.send(f"You now have {get_score(ctx.author)} score!")
References:
Context managers
Dictionaries
Loading data from a json
Writing to a json
I am using the following code in Python 3 to convert ~30,000 json files to a csv.
with open('out.csv', 'w') as f:
for fname in glob("*.json"): # Reads all json from the current directory
with open(fname) as j:
f.write(str(json.load(j)))
f.write('\n')
The json files are timestamps and values, for example {"1501005600":956170,"1501048800":970046,...
The output currently is
.
How can I put each in their own respective cells so the output is ?
I have tried many approaches with csv.writer but I cannot figure this out.
UPDATE
with open('out.csv', 'w') as f:
for fname in glob("*.json"):
with open(fname) as j:
values = json.load(j)
for k, v in values.items():
f.write("{},{},".format(str(k), str(v)))
Parsing is correct but each .json file is on one row now.
A friend helped me get to the bottom of this, hope this may help others.
with open('[insert].csv', 'w') as f:
for fname in glob("*.json"):
with open(fname) as j:
values = json.load(j)
for k, v in values.items():
f.write("{},{},".format(str(k), str(v)))
f.write('\n')
For Python3.
How would you approach the following problem? (I did not find anything like this in some other post)
I need to open/load 72 different .json files and assign each one of them to a variable. Like this:
import json,
with open('/Users/Data/netatmo_20171231_0000.json') as f:
d1 = json.load(f)
with open('/Users/Data/netatmo_20171231_0010.json') as f:
d2 = json.load(f)
with open('/Users/Data/netatmo_20171231_0020.json') as f:
d3 = json.load(f)
with open('/Users/Data/netatmo_20171231_0030.json') as f:
d4 = json.load(f)
with open('/Users/Data/netatmo_20171231_0040.json') as f:
d5 = json.load(f)
with open('/Users/Data/netatmo_20171231_0050.json') as f:
d6 = json.load(f)
with open('/Users/Data/netatmo_20171231_0100.json') as f:
d7 = json.load(f)
with open('/Users/Data/netatmo_20171231_0110.json') as f:
d8 = json.load(f)
with open('/Users/Data/netatmo_20171231_0120.json') as f:
d9 = json.load(f)
with open('/Users/Data/netatmo_20171231_0130.json') as f:
d10 = json.load(f)
But I don't want to (also think it is inefficient) perform this for 72 times.
At the end I will create a pandas dataframe, but first I need the json(s) in variables because I'm applying a function to them to flatten the data (these Jsons are very nested).
I also tried to join the JSON files successfully, but the resulting JSON is 5GB, and my PC takes 12 hours to load it. (So this is not an option)
Thanks, and kind regards.
First, find out where your bottlenecks are.
If it is on the json decoding/encoding step, try switching to ultrajson
I have not tested it but one way how you could improve is via multiple process.
import os
import pandas as pd
from multiprocessing import Pool
# wrap your json importer in a function that can be mapped
def read_json(pos_json):
return json.load(pos_json)
def main():
# set up your pool
pool = Pool(processes=8) # or whatever your hardware can support
# get a list of file names
path_to_json = '/Users/Data/'
file_list = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
list = pool.map(read_json, file_list)
if __name__ == '__main__':
main()
#OzkanSener Thanks again for the reply. And for the tip. As you said, First I needed to identify my bottle neck. The bottleneck was in memory consumption. So, the method you suggested did not help so much. Instead I did the following:
with open('/Users/Data/netatmo_20171231_0000.json') as f:
d = json.load(f)
data1 = [flatten(i) for i in d]
with open('/Users/Data/netatmo_20171231_0000.json') as f:
d = json.load(f)
data2 = [flatten(i) for i in d]
with open('/Users/Data/netatmo_20171231_0010.json') as f:
d = json.load(f)
data3 = [flatten(i) for i in d]
And so on, reusing the d variable instead of creating new ones all the time.
At the end I can create only one big list:
from itertools import chain
data= list(chain(data1, data2, data3))
I'm reading json arrays from a text file and then create an empty dataframe. I want to add a new column 'id' to the empty dataframe. 'id' comes from the json arrays in the text file.
Error message reads "Cannot set a frame with no defined index and a value that canot be converted to a series". I tried to overcome this error by defining dataframe size upfront which did not help. Any ideas?
import json
import pandas as pd
path = 'my/path'
mydata = []
myfile = open(path, "r")
for line in myfile:
try:
myline = json.loads(line)
mydata.append(myline)
except:
continue
mydf = pd.DataFrame()
mydf['id'] = map(lambda myline: myline['id'], mydata)
I think better is use:
for line in myfile:
try:
#extract only id to list
myline = json.loads(line)['id']
mydata.append(myline)
except:
continue
print (mydata)
[10, 5]
#create DataFrame by constructor
mydf = pd.DataFrame({'id':mydata})
print (mydf)
id
0 10
1 5