The input is a list and the output is in the form of a nested dictionary in a list - json

Input:
input_list=['1.exe','2.exe','3.exe','4.exe']
Output format:
out_dict=[{'name':'1.exe',
'children':[{'name':'2.exe',
'children':[{'name':'3.exe
'children':[{'name':'4.exe'}]}}}]
The input is the a list as above mentioned and we have to obtain the output in the format as mentioned in the above lines.
I tried using nested for loops but it isn't working. How can we implement JSON in this?

input_list=['1.exe','2.exe','3.exe','4.exe']
def split(data):
try:
first_value = data[0]
data = [{'name': first_value, 'children': split(data[1:])} if split(data[1:]) != [] else {'name': first_value}]
return data
except:
return data
print (split(input_list))
output:
[{'name': '1.exe', 'children':
[{'name': '2.exe', 'children':
[{'name': '3.exe', 'children':
[{'name': '4.exe'}]}]}]}]
code which is a little bit more easier to understand (with explinations):
input_list=['1.exe','2.exe','3.exe','4.exe']
def split(input_list):
if len(input_list) == 0:
return input_list # if there is no data return empty list
else: # if we have elements
first_value = input_list[0] # first value
if split(input_list[1:]) != []: # data[1:] will return a list with all values except the first value
input_list = [{'name':first_value ,'children': split(input_list[1:])}]
return input_list # return after the last recursion is called
else:
input_list = [{'name': first_value}]
return input_list
print (split(input_list))
output:
[{'name': '1.exe', 'children':
[{'name': '2.exe', 'children':
[{'name': '3.exe', 'children':
[{'name': '4.exe'}]}]}]}]
or:
input_list=['1.exe','2.exe','3.exe','4.exe']
def split(input_list):
if input_list:
head, *tail = input_list # This is a nicer way of doing head, tail = data[0], data[1:]
if split(tail) != []:
return [{'name': head, 'children':split(tail)}]
else:
return [{'name': head}]
else:
return {}
print (split(input_list))
Convert from Python to JSON:
import json
# a Python object (dict):
x = {
"name": "John",
"age": 30,
"city": "New York"
}
# convert into JSON:
y = json.dumps(x)
# the result is a JSON string:
print(y)
JSON is a syntax for storing and exchanging data. Convert from Python
to JSON If you have a Python object, you can convert it into a JSON
string by using the json.dumps() method.
import json
input_list=['1.exe','2.exe','3.exe','4.exe']
def split(input_list):
try:
first_value = input_list[0]
input_list = {'name': first_value, 'children': split(input_list[1:])} if split(input_list[1:]) != [] else {'name': first_value}
return input_list
except:
return input_list
data = split(input_list)
print (json.dumps(data))

Related

Take Input Dynamically from user in Python Dictionary

I've created a Python Dictionary Structure as below:
import pprint
log_data = {
'Date':'',
'Prayers':{
'Fajr':'',
'Dhuhr/Jumu\'ah':'',
'Asr':'',
'Maghrib':'',
'Isha\'a':''
},
'Task List':[{
'Task':'',
'Timeline':'',
'Status':''
}],
'Meals':{
'Breakfast':{
'Menu':'',
'Place':'',
'Time':''
},
'Lunch':{
'Menu':'',
'Place':'',
'Time':''
},
'Evening Snacks':{
'Menu':'',
'Place':'',
'Time':''
},
'Dinner':{
'Menu':'',
'Place':'',
'Time':''
}
},
'Exercises':[{
'Exercise':'',
'Duration':''
}]
}
pprint.pprint(log_data)
As you see this is just an dictionary structure without data. I want to iterate over all the keys and take input data as value from user using input().
Then I would like to save this dictionary as json file.
Could you please help on how I can iterate over all keys and take input from user.
Thanks.
Searched but couldn't found exact type of help that I need.
For this kind of thing, one needs to use recursion.
This is not fancy, but will get the job done:
from copy import deepcopy
import json
import pprint
log_data = {
'Date':'',
'Prayers':{
'Fajr':'',
'Dhuhr/Jumu\'ah':'',
'Asr':'',
'Maghrib':'',
'Isha\'a':''
},
'Task List':[{
'Task':'',
'Timeline':'',
'Status':''
}],
# ...
}
def input_fields(substruct, path=""):
print(f"Inputing values '{path}':")
for fieldname, value in substruct.items():
if isinstance(value, (str, int)):
substruct[fieldname] = input(f"{path}.{fieldname}: ")
elif isinstance(value, dict):
input_fields(value, f"{path}.{fieldname}")
elif isinstance(value, list):
original = value[0]
value.pop()
counter = 0
if not isinstance(original, dict):
raise ValueError("Not supported: A list should contain a dictionary-substructure")
while True:
item = deepcopy(original)
input_fields(item, f"{path}.{fieldname}.[{counter}]")
value.append(item)
continue_ = input(f"Enter one more {path}.{fieldname} item? (y/n) ").lower().strip()[0] == "y"
if not continue_:
break
counter+=1
return substruct
def main():
values = input_fields(deepcopy(log_data))
json.dump(values, open("myfile.json", "wt"), indent=4)
if __name__ == "__main__":
main()

skipping Attribute error while importing twitter data into pandas

I have almost 1 gb file storing almost .2 mln tweets. And, the huge size of file obviously carries some errors. The errors are shown as
AttributeError: 'int' object has no attribute 'items'. This occurs when I try to run this code.
raw_data_path = input("Enter the path for raw data file: ")
tweet_data_path = raw_data_path
tweet_data = []
tweets_file = open(tweet_data_path, "r", encoding="utf-8")
for line in tweets_file:
try:
tweet = json.loads(line)
tweet_data.append(tweet)
except:
continue
tweet_data2 = [tweet for tweet in tweet_data if isinstance(tweet,
dict)]
from pandas.io.json import json_normalize
tweets = json_normalize(tweet_data2)[["text", "lang", "place.country",
"created_at", "coordinates",
"user.location", "id"]]
Can a solution be found where those lines where such error occurs can be skipped and continue for the rest of the lines.
The issue here is not with lines in data but with tweet_data itself. If you check your tweet_data, you will find one more elements which are of 'int' datatype (assuming your tweet_data is a list of dictionaries as it only expects "dict or list of dicts").
You may want to check your tweet data to remove values other that dictionaries.
I was able to reproduce with below example for json_normalize document:
Working Example:
from pandas.io.json import json_normalize
data = [{'state': 'Florida',
'shortname': 'FL',
'info': {
'governor': 'Rick Scott'
},
'counties': [{'name': 'Dade', 'population': 12345},
{'name': 'Broward', 'population': 40000},
{'name': 'Palm Beach', 'population': 60000}]},
{'state': 'Ohio',
'shortname': 'OH',
'info': {
'governor': 'John Kasich'
},
'counties': [{'name': 'Summit', 'population': 1234},
{'name': 'Cuyahoga', 'population': 1337}]},
]
json_normalize(data)
Output:
Displays datarame
Reproducing Error:
from pandas.io.json import json_normalize
data = [{'state': 'Florida',
'shortname': 'FL',
'info': {
'governor': 'Rick Scott'
},
'counties': [{'name': 'Dade', 'population': 12345},
{'name': 'Broward', 'population': 40000},
{'name': 'Palm Beach', 'population': 60000}]},
{'state': 'Ohio',
'shortname': 'OH',
'info': {
'governor': 'John Kasich'
},
'counties': [{'name': 'Summit', 'population': 1234},
{'name': 'Cuyahoga', 'population': 1337}]},
1 # *Added an integer to the list*
]
result = json_normalize(data)
Error:
AttributeError: 'int' object has no attribute 'items'
How to prune "tweet_data": Not needed, if you follow update below
Before normalising, run below:
tweet_data = [tweet for tweet in tweet_data if isinstance(tweet, dict)]
Update: (for foor loop)
for line in tweets_file:
try:
tweet = json.loads(line)
if isinstance(tweet, dict):
tweet_data.append(tweet)
except:
continue
The final form of code looks like this:
tweet_data_path = raw_data_path
tweet_data = []
tweets_file = open(tweet_data_path, "r", encoding="utf-8")
for line in tweets_file:
try:
tweet = json.loads(line)
if isinstance(tweet, dict):
tweet_data.append(tweet)
except:
continue
This clears all the possibility of attribute error that might hinder importing into panda dataframe.

Validating trello board API responses in Python unittest

I am writing a unittest that queries the trello board API and want to assert that a particular card exists.
The first attempt was using the /1/boards/[board_id]/lists rewuest which gives results like:
[{'cards': [
{'id': 'id1', 'name': 'item1'},
{'id': 'id2', 'name': 'item2'},
{'id': 'id3', 'name': 'item3'},
{'id': 'id4', 'name': 'item4'},
{'id': 'id5', 'name': 'item5'},
{'id': 'id6', 'name': 'item6'}],
'id': 'id7',
'name': 'ABC'},
{'cards': [], 'id': 'id8', 'name': 'DEF'},
{'cards': [], 'id': 'id9', 'name': 'GHI'}]
I want to assert that 'item6' is indeed in the above mentioned list. Loading the json and using assertTrue, like this:
element = [item for item in json_data if item['name'] == "item6"]
self.assertTrue(element)
but I receive an error: 'TypeError: the JSON object must be str, bytes or bytearray, not 'list'.
Then discovered using the /1/boards/[board_id]/cards request gives a plain list of cards:
[
{'id': 'id1', 'name': 'item1'},
{'id': 'id2', 'name': 'item2'},
...
]
How should I write this unittest assertion?
The neatest option is to create a class that will equal the dict for the card you want to ensure is there, then use that in an assertion. For your example, with a list of cards returned over the api:
cards = board.get_cards()
self.assertIn(Card(name="item6"), cards)
Here's a reasonable implementation for the Card() helper class, it may look a little complex but is mostly straight forward:
class Card(object):
"""Class that matches a dict with card details from json api response."""
def __init__(self, name):
self.name = name
def __eq__(self, other):
if isinstance(other, dict):
return other.get("name", None) == self.name
return NotImplemented
def __repr__(self):
return "{}({!r}, {!r})".format(
self.__class__.__name__, self.key, self.value)
You could add more fields to validate as needed.
One question worth touching on at this point is whether the unit test should be making real api queries. Generally a unit test would have test data to just focus on the function you control, but perhaps this is really an integration test for your trello deployment using the unittest module?
import unittest
from urllib.request import urlopen
import json
class Basic(unittest.TestCase):
url = 'https://api.trello.com/1/boards/[my_id]/cards?fields=id,name,idList,url&key=[my_key]&token=[my_token]'
response = urlopen(url)
resp = response.read()
json_ob = json.loads(resp)
el_list = [item for item in json_ob if item['name'] == 'card6']
def testBasic(self):
self.assertTrue(self.el_list)
if __name__ == '__main__':
unittest.main()
So what I did wrong: I focused too much on the list itself which I got after using the following code:
import requests
from pprint import pprint
import json
url = "https://api.trello.com/1/boards/[my_id]/lists"
params = {"cards":"open","card_fields":"name","fields":"name","key":"[my_key]","token":"[my_token]"}
response = requests.get(url=url, params=params)
pprint(response.json())

scipy dendrogram to json for d3.js tree visualisation

I am trying to convert results of scipy hierarchical clustering into json for display in d3.js here an example
The following codes produces a dendrogram with 6 branches.
import pandas as pd
import scipy.spatial
import scipy.cluster
d = {'employee' : ['A', 'B', 'C', 'D', 'E', 'F'],
'skillX': [2,8,3,6,8,10],
'skillY': [8,15,6,9,7,10]}
d1 = pd.DataFrame(d)
distMat = xPairWiseDist = scipy.spatial.distance.pdist(np.array(d1[['skillX', 'skillY']]), 'euclidean')
clusters = scipy.cluster.hierarchy.linkage(distMat, method='single')
dendo = scipy.cluster.hierarchy.dendrogram(clusters, labels = list(d1.employee), orientation = 'right')
dendo
my question
How can I represent the data in a json file in a format that d3.js understand
{'name': 'Root1’,
'children':[{'name' : 'B'},
{'name': 'E-D-F-C-A',
'children' : [{'name': 'C-A',
'children' : {'name': 'A'},
{'name' : 'C'}]
}
}
]
}
The embarassing truth is that I do not know if I can extract this information from the dendogram or from the linkage matrix and how
I am thankful for any help I can get.
EDIT TO CLARIFY
So far, I have tried to use the totree method but have difficulties understanding its structure (yes, I read the documentation).
a = scipy.cluster.hierarchy.to_tree(clusters , rd=True)
for x in a[1]:
#print x.get_id()
if x.is_leaf() != True :
print x.get_left().get_id(), x.get_right().get_id(), x.get_count()
You can do this in three steps:
Recursively construct a nested dictionary that represents the tree returned by Scipy's to_tree method.
Iterate through the nested dictionary to label each internal node with the leaves in its subtree.
dump the resulting nested dictionary to JSON and load into d3.
Construct a nested dictionary representing the dendrogram
For the first step, it is important to call to_tree with rd=False so that the root of the dendrogram is returned. From that root, you can construct the nested dictionary as follows:
# Create a nested dictionary from the ClusterNode's returned by SciPy
def add_node(node, parent ):
# First create the new node and append it to its parent's children
newNode = dict( node_id=node.id, children=[] )
parent["children"].append( newNode )
# Recursively add the current node's children
if node.left: add_node( node.left, newNode )
if node.right: add_node( node.right, newNode )
T = scipy.cluster.hierarchy.to_tree( clusters , rd=False )
d3Dendro = dict(children=[], name="Root1")
add_node( T, d3Dendro )
# Output: => {'name': 'Root1', 'children': [{'node_id': 10, 'children': [{'node_id': 1, 'children': []}, {'node_id': 9, 'children': [{'node_id': 6, 'children': [{'node_id': 0, 'children': []}, {'node_id': 2, 'children': []}]}, {'node_id': 8, 'children': [{'node_id': 5, 'children': []}, {'node_id': 7, 'children': [{'node_id': 3, 'children': []}, {'node_id': 4, 'children': []}]}]}]}]}]}
The basic idea is to start with a node not in the dendrogram that will serve as the root of the whole dendrogram. Then we recursively add left- and right-children to this dictionary until we reach the leaves. At this point, we do not have labels for the nodes, so I'm just labeling nodes by their clusterNode ID.
Label the dendrogram
Next, we need to use the node_ids to label the dendrogram. The comments should be enough explanation for how this works.
# Label each node with the names of each leaf in its subtree
def label_tree( n ):
# If the node is a leaf, then we have its name
if len(n["children"]) == 0:
leafNames = [ id2name[n["node_id"]] ]
# If not, flatten all the leaves in the node's subtree
else:
leafNames = reduce(lambda ls, c: ls + label_tree(c), n["children"], [])
# Delete the node id since we don't need it anymore and
# it makes for cleaner JSON
del n["node_id"]
# Labeling convention: "-"-separated leaf names
n["name"] = name = "-".join(sorted(map(str, leafNames)))
return leafNames
label_tree( d3Dendro["children"][0] )
Dump to JSON and load into D3
Finally, after the dendrogram has been labeled, we just need to output it to JSON and load into D3. I'm just pasting the Python code to dump it to JSON here for completeness.
# Output to JSON
json.dump(d3Dendro, open("d3-dendrogram.json", "w"), sort_keys=True, indent=4)
Output
I created Scipy and D3 versions of the dendrogram below. For the D3 version, I simply plugged the JSON file I output ('d3-dendrogram.json') into this Gist.
SciPy dendrogram
D3 dendrogram

Using JSON keys as attributes in nested JSON

I'm working with nested JSON-like data structures in python 2.7 that I exchange with some foreign perl code. I just want to 'work with' these nested structures of lists and dictionaries in amore pythonic way.
So if I have a structure like this...
a = {
'x': 4,
'y': [2, 3, { 'a': 55, 'b': 66 }],
}
...I want to be able to deal with it in a python script as if it was nested python classes/Structs, like this:
>>> aa = j2p(a) # <<- this is what I'm after.
>>> print aa.x
4
>>> aa.z = 99
>>> print a
{
'x': 4,
'y': [2, 3, { 'a': 55, 'b': 66 }],
'z': 99
}
>>> aa.y[2].b = 999
>>> print a
{
'x': 4,
'y': [2, 3, { 'a': 55, 'b': 999 }],
'z': 99
}
Thus aa is a proxy into the original structure. This is what I came up with so far, inspired by the excellent What is a metaclass in Python? question.
def j2p(x):
"""j2p creates a pythonic interface to nested arrays and
dictionaries, as returned by json readers.
>>> a = { 'x':[5,8], 'y':5}
>>> aa = j2p(a)
>>> aa.y=7
>>> print a
{'x': [5, 8], 'y':7}
>>> aa.x[1]=99
>>> print a
{'x': [5, 99], 'y':7}
>>> aa.x[0] = {'g':5, 'h':9}
>>> print a
{'x': [ {'g':5, 'h':9} , 99], 'y':7}
>>> print aa.x[0].g
5
"""
if isinstance(x, list):
return _list_proxy(x)
elif isinstance(x, dict):
return _dict_proxy(x)
else:
return x
class _list_proxy(object):
def __init__(self, proxied_list):
object.__setattr__(self, 'data', proxied_list)
def __getitem__(self, a):
return j2p(object.__getattribute__(self, 'data').__getitem__(a))
def __setitem__(self, a, v):
return object.__getattribute__(self, 'data').__setitem__(a, v)
class _dict_proxy(_list_proxy):
def __init__(self, proxied_dict):
_list_proxy.__init__(self, proxied_dict)
def __getattribute__(self, a):
return j2p(object.__getattribute__(self, 'data').__getitem__(a))
def __setattr__(self, a, v):
return object.__getattribute__(self, 'data').__setitem__(a, v)
def p2j(x):
"""p2j gives back the underlying json-ic json-ic nested
dictionary/list structure of an object or attribute created with
j2p.
"""
if isinstance(x, (_list_proxy, _dict_proxy)):
return object.__getattribute__(x, 'data')
else:
return x
Now I wonder whether there is an elegant way of mapping a whole set of the __*__ special functions, like __iter__, __delitem__? so I don't need to unwrap things using p2j() just to iterate or do other pythonic stuff.
# today:
for i in p2j(aa.y):
print i
# would like to...
for i in aa.y:
print i
I think you're making this more complex than it needs to be. If I understand you correctly, all you should need to do is this:
import json
class Struct(dict):
def __getattr__(self, name):
return self[name]
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
del self[name]
j = '{"y": [2, 3, {"a": 55, "b": 66}], "x": 4}'
aa = json.loads(j, object_hook=Struct)
for i in aa.y:
print(i)
When you load JSON, the object_hook parameter lets you specify a callable object to process objects that it loads. I've just used it to turn the dict into an object that allows attribute access to its keys. Docs
There is an attrdict library that does exactly that in a very safe manner, but if you want, a quick and dirty (possibly leaking memory) approach was given in this answer:
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
j = '{"y": [2, 3, {"a": 55, "b": 66}], "x": 4}'
aa = json.loads(j, object_hook=AttrDict)
I found the answer: There is intentionally no way to automatically map the special methods in python, using __getattribute__. So to achieve what I want, I need to explicitely define all special methods like __len__ one after the other.