Convert nested CSV to nested JSON using Pandas - json

I have a dataframe like this
org.iden.account,org.iden.id,adress.city,adress.country,person.name.fullname,person.gender,person.birthYear,subs.id,subs.subs1.birthday,subs.subs1.org.address.country,subs.subs1.org.address.strret1,subs.org.buyer.email.address,subs.org.buyer.phone.number
account123,id123,riga,latvia,laura,female,1990,subs123,1990-12-14T00:00:00Z,latvia,street 1,email1#myorg.com|email2#sanoma.com,+371401234567
account123,id000,riga,latvia,laura,female,1990,subs456,1990-12-14T00:00:00Z,latvia,street 1,email1#myorg.com,+371401234567
account123,id456,riga,latvia,laura,female,1990,subs789,1990-12-14T00:00:00Z,latvia,street 1,email1#myorg.com,+371401234567
And I need to convert this into a nested JSON based on the column separated by a dot(.). So for the first row the expected result should be
{
"org": {
"iden": {
"account": "account123",
"id": "id123"
}
},
"address": {
"city": "riga",
"country": "country"
},
"person": {
"name": {
"fullname": laura,
},
"gender": "female",
"birthYear": 1990
},
"subs": {
"id": "subs123",
"subs1": {
"birthday": "1990-12-14T00:00:00Z",
"org": {
"address": {
"country": "latvia",
"street1": "street 1"
}
}
},
"org": {
"buyer": {
"email": {
"address": "email1#myorg.com|email2#sanoma.com"
},
"phone": {
"number": "+371401234567"
}
}
}
}
}
And then of course all the records as a list. I have tried to use simple pandas .to_json() but it didn't help and I get the following which doesn't have the nested structure I need.
[{"org.iden.account":"account123","org.iden.id":"id123","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs123","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1#myorg.com|email2#sanoma.com","subs.org.buyer.phone.number":371401234567},{"org.iden.account":"account123","org.iden.id":"id000","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs456","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1#myorg.com","subs.org.buyer.phone.number":371407654321},{"org.iden.account":"account123","org.iden.id":"id456","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs789","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1#myorg.com","subs.org.buyer.phone.number":371407654321}]
Any help in this would be highly appreciated!

def df_to_json(row):
tree = {}
for item in row.index:
t = tree
for part in item.split('.'):
prev, t = t, t.setdefault(part, {})
prev[part] = row[item]
return tree
>>> df.apply(df_to_json, axis='columns').tolist()
[{'org': {'iden': {'account': 'account123', 'id': 'id123'}},
'adress': {'city': 'riga', 'country': 'latvia'},
'person': {'name': {'fullname': 'laura'},
'gender': 'female',
'birthYear': 1990},
'subs': {'id': 'subs123',
'subs1': {'birthday': '1990-12-14T00:00:00Z',
'org': {'address': {'country': 'latvia', 'strret1': 'street 1'}}},
'org': {'buyer': {'email': {'address': 'email1#myorg.com|email2#sanoma.com'},
'phone': {'number': 371401234567}}}}},
{'org': {'iden': {'account': 'account123', 'id': 'id000'}},
'adress': {'city': 'riga', 'country': 'latvia'},
'person': {'name': {'fullname': 'laura'},
'gender': 'female',
'birthYear': 1990},
'subs': {'id': 'subs456',
'subs1': {'birthday': '1990-12-14T00:00:00Z',
'org': {'address': {'country': 'latvia', 'strret1': 'street 1'}}},
'org': {'buyer': {'email': {'address': 'email1#myorg.com'},
'phone': {'number': 371401234567}}}}},
{'org': {'iden': {'account': 'account123', 'id': 'id456'}},
'adress': {'city': 'riga', 'country': 'latvia'},
'person': {'name': {'fullname': 'laura'},
'gender': 'female',
'birthYear': 1990},
'subs': {'id': 'subs789',
'subs1': {'birthday': '1990-12-14T00:00:00Z',
'org': {'address': {'country': 'latvia', 'strret1': 'street 1'}}},
'org': {'buyer': {'email': {'address': 'email1#myorg.com'},
'phone': {'number': 371401234567}}}}}]

Assuming your json structure looks something like this
json_data = [
{
"org.iden.account": "account123",
"org.iden.id": "id123",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs123",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com|email2#sanoma.com",
"subs.org.buyer.phone.number": 371401234567
},
{
"org.iden.account": "account123",
"org.iden.id": "id000",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs456",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
},
{
"org.iden.account": "account123",
"org.iden.id": "id456",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs789",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
}
]
You could nest it on a dict by dict basis.
def nestify(unnested):
nested = dict()
for k, v in unnested.items():
current_dict = nested
parts = k.split('.')
for i in parts[:-1]:
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
current_dict[parts[-1]] = v
return nested
This function takes one of the unnested dicts, iterates through the keys and assigns the value to the final depth.
Commented version
def nestify(unnested):
# this will be our return value
nested = dict()
for k, v in unnested.items():
# current_dict is the current dict were operating on
# gets reset to the base dict on each unnested key
current_dict = nested
parts = k.split('.')
# only create dicts up to the final period
# for example, current_dict is the base
# and creates an empty dict under the org key
# then current_dict is under the org key
# and creates an empty dict under the iden key
# then current_dict is under the iden key
for i in parts[:-1]:
# no reason to create an empty dict if it was
# already created for a prior key
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
# assign the value of the unnested dict
# to each final current_dict
# for example, the final part of the first key is "account"
# so rather than assign an empty dict, assign it "account123"
current_dict[parts[-1]] = v
return nested
Then you can just call it on each element of the json_data list in a comprehension.
nested = [nestify(i) for i in json_data]
Full code:
json_data = [
{
"org.iden.account": "account123",
"org.iden.id": "id123",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs123",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com|email2#sanoma.com",
"subs.org.buyer.phone.number": 371401234567
},
{
"org.iden.account": "account123",
"org.iden.id": "id000",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs456",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
},
{
"org.iden.account": "account123",
"org.iden.id": "id456",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs789",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
}
]
def nestify(unnested):
nested = dict()
for k, v in unnested.items():
current_dict = nested
parts = k.split('.')
for i in parts[:-1]:
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
current_dict[parts[-1]] = v
return nested
nested = [nestify(i) for i in json_data]
print(nested)
Output:
[
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id123'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs123',
'org': {
'buyer': {
'email': {
'address': 'email1#myorg.com|email2#sanoma.com'
},
'phone': {
'number': 371401234567
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
},
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id000'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs456',
'org': {
'buyer': {
'email': {
'address': 'email1#myorg.com'
},
'phone': {
'number': 371407654321
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
},
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id456'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs789',
'org': {
'buyer': {
'email': {
'address': 'email1#myorg.com'
},
'phone': {
'number': 371407654321
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
}
]

Related

Remove empty elements from nested JSON

I have a nested json with an arbitrary depth level :
json_list = [
{
'class': 'Year 1',
'room': 'Yellow',
'students': [
{'name': 'James', 'sex': 'M', 'grades': {}},
]
},
{
'class': 'Year 2',
'info': {
'teachers': {
'math': 'Alan Turing',
'physics': []
}
},
'students': [
{ 'name': 'Tony', 'sex': 'M', 'age': ''},
{ 'name': 'Jacqueline', 'sex': 'F' },
],
'other': []
}
]
I want to remove any element that its value meet certain criteria.
For example:
values_to_drop = ({}, (), [], '', ' ')
filtered_json = clean_json(json_list, values_to_drop)
filtered_json
Expected Output of clean_json:
[
{
'class': 'Year 1',
'room': 'Yellow',
'students': [
{'name': 'James', 'sex': 'M'},
]
},
{
'class': 'Year 2',
'info': {
'teachers': {
'math': 'Alan Turing',
}
},
'students': [
{ 'name': 'Tony', 'sex': 'M'},
{ 'name': 'Jacqueline', 'sex': 'F'},
]
}
]
I thought of something like first converting the object to string using json.dumps and then looking in the string and replacing each value that meets the criteria with some kind of flag to filter it after before reading it again with json.loads but I couldn't figure it out and I don't know if this is the way to go
I managed to get the desired output by tweaking this answer a bit:
def clean_json(json_obj, values_to_drop):
if isinstance(json_obj, dict):
json_obj = {
key: clean_json(value, values_to_drop)
for key, value in json_obj.items()
if value not in values_to_drop}
elif isinstance(json_obj, list):
json_obj = [clean_json(item, values_to_drop)
for item in json_obj
if item not in values_to_drop]
return json_obj

How do I get an element that is nested within a JSON dictionary?

I got a JSON response using the Spotify API and I'm trying to access the element called 'name' (one says '3 Doors Down' and the other starts with 'Bret Michaels') that seems to be inside the 'items' element but I can't seem to find the solution.
This is how I loaded the data:
search_results = requests.get(search_url + 'q=' + query + '&type=artist', headers=granted_headers).json()
Here is my JSON data:
{
'artists': {
'href': 'https://api.spotify.com/v1/search?query=3+doors+down&type=artist&offset=0&limit=20',
'items': [
{
'external_urls': {
'spotify': 'https://open.spotify.com/artist/2RTUTCvo6onsAnheUk3aL9'
},
'followers': {
'href': None,
'total': 2631330
},
'genres': [
'alternative metal',
'nu metal',
'pop rock',
'post-grunge'
],
'href': 'https://api.spotify.com/v1/artists/2RTUTCvo6onsAnheUk3aL9',
'id': '2RTUTCvo6onsAnheUk3aL9',
'images': [
{
'height': 640,
'url': 'https://i.scdn.co/image/ead4e883a59d30d8c157385aa531d3fe8e688fc0',
'width': 640
},
{
'height': 320,
'url': 'https://i.scdn.co/image/611a4fd8aaf2637c5894acf65f12e79d75926329',
'width': 320
},
{
'height': 160,
'url': 'https://i.scdn.co/image/f1a1a2c37f2f6d242b1ab7ae3f4d893bf5822095',
'width': 160
}
],
'name': '3 Doors Down',
'popularity': 72,
'type': 'artist',
'uri': 'spotify:artist:2RTUTCvo6onsAnheUk3aL9'
},
{
'external_urls': {
'spotify': 'https://open.spotify.com/artist/2kPbQDZvnasPcCuXbq6YQx'
},
'followers': {
'href': None,
'total': 156
},
'genres': [
],
'href': 'https://api.spotify.com/v1/artists/2kPbQDZvnasPcCuXbq6YQx',
'id': '2kPbQDZvnasPcCuXbq6YQx',
'images': [
],
'name': 'Bret Michaels (Featuring Brad Arnold of 3 Doors Down, Chris Cagle, Mark Wills)',
'popularity': 4,
'type': 'artist',
'uri': 'spotify:artist:2kPbQDZvnasPcCuXbq6YQx'
}
],
'limit': 20,
'next': None,
'offset': 0,
'previous': None,
'total': 4
}
}
artists = search_results['artists']['items'] # This is a list
for artist in artists: # artist is each object in the list
print(artist['name'])
Output:
> '3 Doors Down'
> 'Bret Michaels (Featuring Brad Arnold of 3 Doors Down, Chris Cagle, Mark Wills)'

splitting JSON file using python

I have below big json file
{
"sections": [
{
"facts": [
{
"name": "Server",
"value": "<https://xxxxxxx:18443/collector/pipeline/v1_allagents>"
},
{
"name": "Environment",
"value": "dev"
},
{
"name": "Issue",
"value": "Server is [EDITED]"
}
]
},
{
"facts": [
{
"name": "Server",
"value": "<https://xxxxx:18443/collector/pipeline/customer-characterstics-v1>"
},
{
"name": "Environment",
"value": "dev"
},
{
"name": "Issue",
"value": "Server is [STOPPED]"
}
]
}
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxxx:18443/collector/pipeline/soap-post-v1_relations>'},
{'name': 'Environment', 'value': u'dev'}, {'name': 'Issue', 'value': u' status is [STOPPED]'}
]
},
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxxxx.134:18443/collector/pipeline/characterstics-v1_allagents>'},
{'name': 'Environment', 'value': u'dev'}, {'name': 'Issue', 'value': u' status is [EDITED]'}
]
},
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxxxx:18443/collector/pipeline/ab23-8128b7c9fcf2>'},
{'name': 'Environment', 'value': u'dev'}, {'name': 'Issue', 'value': u'status is [EDITED]'}
]
}
]
}
....
now I'm struggling to split above file as below and dump into another new files:
{
"text": "Status",
"themeColor": "#FF0000",
{
"sections": [
{
"facts": [
{
"name": "Server",
"value": "<https://xxxxxxx:18443/collector/pipeline/v1_allagents>"
},
{
"name": "Environment",
"value": "dev"
},
{
"name": "Issue",
"value": "Server is [EDITED]"
}
]
}
]
}
}
what I could able to achieve so far is print each tags under facts, but not the way I expect as above.
so, I'm having trouble adding those extra lines prior the final ones and then dump it to another file.
How should I approach this? not using JQ.
each splitted file should have same header and then exactly same pattern for key sections and facts .
edit:
As per Andrej's solution it works perfectly alright for one split at a time.
But how to split the file based on n size, let's say I want to split my original big file where 5 facts exists 2 facts per file.n = 2
so, it should create 3 json files , where first 2 contains 2 blocks of facts and last one should be only one since that's left.
Then final output should be:
{'text': ' Status', 'themeColor': '#FF0000', 'sections':
[
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxxx:18443/collector/pipeline/soap-post-v1>'},
{'name': 'Environment', 'value': u'dev'},
{'name': 'Issue', 'value': u' status is [STOPPED]'}
]
},
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxx:18443/collector/pipeline/be9694085a70>'},
{'name': 'Environment', 'value': u'dev'},
{'name': 'Issue', 'value': u' status is [STOPPED]'}
]
}
]
}
and
{'text': ' Status', 'themeColor': '#FF0000', 'sections':
[
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxxx:18443/collector/pipeline/soap-post-v1_relations>'},
{'name': 'Environment', 'value': u'dev'}, {'name': 'Issue', 'value': u' status is [STOPPED]'}
]
},
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxxxx.134:18443/collector/pipeline/characterstics-v1_allagents>'},
{'name': 'Environment', 'value': u'dev'}, {'name': 'Issue', 'value': u' status is [EDITED]'}
]
}
]}
as per above one block of fact from original file, hence it will create it's own json
{'text': ' Status', 'themeColor': '#FF0000', 'sections':
[
{'facts':
[
{'name': 'Server', 'value': u'<https://xxxxxxx:18443/collector/pipeline/ab23-8128b7c9fcf2>'},
{'name': 'Environment', 'value': u'dev'}, {'name': 'Issue', 'value': u'status is [EDITED]'}
]
}
]}
You can load the big file json into dictionary using json module. Then treat the loaded data as classical Python dict.
If your file contains the string in question, then this example:
import json
with open('YOUR_JSON_FILE.json', 'r') as f_in:
data = json.load(f_in)
for i, fact in enumerate(data['sections'], 1):
with open('data_out_{}.json'.format(i), 'w') as f_out:
d = {}
d['text'] = 'Status'
d['themeColor'] = '#FF0000'
d['sections'] = fact
json.dump(d, f_out, indent=4)
This creates two files data_out_1.json and data_out_2.json containing:
{
"text": "Status",
"themeColor": "#FF0000",
"sections": {
"facts": [
{
"name": "Server",
"value": "<https://xxxxxxx:18443/collector/pipeline/v1_allagents>"
},
{
"name": "Environment",
"value": "dev"
},
{
"name": "Issue",
"value": "Server is [EDITED]"
}
]
}
}
and
{
"text": "Status",
"themeColor": "#FF0000",
"sections": {
"facts": [
{
"name": "Server",
"value": "<https://xxxxx:18443/collector/pipeline/customer-characterstics-v1>"
},
{
"name": "Environment",
"value": "dev"
},
{
"name": "Issue",
"value": "Server is [STOPPED]"
}
]
}
}
EDIT:
To chunk the JSON file, you can use this example:
import json
def chunk(lst, n):
for i in range(0, len(lst), n):
yield lst[i:i + n]
with open('YOUR_JSON_FILE.json', 'r') as f_in:
data = json.load(f_in)
for i, fact in enumerate(chunk(data['sections'], 2), 1): # <-- change 2 to your chunk size
with open('data_out_{}.json'.format(i), 'w') as f_out:
d = {}
d['text'] = 'Status'
d['themeColor'] = '#FF0000'
d['sections'] = fact
json.dump(d, f_out, indent=4)
import json
with open('/tmp/json_response_output.json') as datafile:
datastore = json.load(datafile)
for n, details in enumerate(datastore['sections']):
split_json = datastore.copy()
split_json['sections'] = [details]
with open(f'json_response_output_part{n}.json', 'w') as f:
json.dump(split_json, f, indent=4, ensure_ascii=False)

How to transform or map a json to a specific json format

I have a JSON payload which i want to transform or map to another JSON payload.
source = {
'customer': {
'first_name': 'Inigo',
'last_name': 'Montoya',
'Age': 24,
},
'address': {
'city': 'Sicily',
'country': 'Florin',
},
}
Output should be:
{"fullName": "Inigo Montoya", "city": "Sicily"}
Try like this:
import json
source = {
'customer': {
'first_name': 'Inigo',
'last_name': 'Montoya',
'Age': 24,
},
'address': {
'city': 'Sicily',
'country': 'Florin',
},
}
def make_res_payload(source):
fullName = "{} {}".format(source['customer']['first_name'], source['customer']['last_name'])
city = source['address']['city']
res = dict({ 'fullName': fullName, 'city': city })
return json.dumps(res)
res = make_res_payload(source)
print(res)
Outputting:
{"fullName": "Inigo Montoya", "city": "Sicily"}

How to return Nested TreeView JSON in Web API

I'm new to web API. I need some help to generate the JSON like following.
[
{
'id': 66,
'text': 'This is the first comment.',
'creator': {
'id': 52,
'display_name': 'Ben'
},
'respondsto': null,
'created_at': '2014-08-14T13:19:59.751Z',
'responses': [
{
'id': 71,
'text': 'This is a response to the first comment.',
'creator': {
'id': 14,
'display_name': 'Daniel',
},
'respondsto': 66,
'created_at': '2014-08-14T13:27:13.915Z',
'responses': [
{
'id': 87,
'text': 'This is a response to the response.',
'creator': {
'id': 52,
'display_name': 'Ben',
},
'respondsto': 71,
'created_at': '2014-08-14T13:27:38.046Z',
'responses': []
}
]
}
]
},
{
'id': 70,
'text': 'Đây là bình luận thứ hai.',
'creator': {
'id': 12,
'display_name': 'Nguyễn'
},
'respondsto': null,
'created_at': '2014-08-14T13:25:47.933Z',
'responses': []
}
];
My Intention is to give JSON Data for the Image.
I'm able to generate normal JSON data. I was struck how to create that responses inside responses until the empty response comes.
Any help would be appreciated.
UPDATE: I found the Answer