New to jq here.
I just want to ask how to add the key of an object to each item of its value and convert it to an array of objects instead. I have the following JSON format:
{
"key1" : [
"key1item1",
"key1item2",
"key1item3",
"key1item4",
...
],
"key2" : [
"key2item1",
"key2item2",
...
]
}
What I want to achieve is this:
{
"key1" : [
{
'parent': 'key1',
'key': 'key1_key1item1',
'value': 'key1_item1',
},
{
'parent': 'key1',
'key': 'key1_key1item2',
'value': 'key1_item2',
}
{
'parent': 'key1',
'key': 'key1_key1item3',
'value': 'key1_item3',
}
],
"key2" : [
{
'parent': 'key2',
'key': 'key2_key2item1',
'value': 'key2_item1',
},
{
'parent': 'key2',
'key': 'key2_key2item2',
'value': 'key2_item2',
}
{
'parent': 'key2',
'key': 'key2_key2item3',
'value': 'key2_item3',
}
]
This should do it:
with_entries(
.key as $key
| .value |= map(
{parent: $key,
key: ($key + (tostring)),
value: .}) )
I have a nested json with an arbitrary depth level :
json_list = [
{
'class': 'Year 1',
'room': 'Yellow',
'students': [
{'name': 'James', 'sex': 'M', 'grades': {}},
]
},
{
'class': 'Year 2',
'info': {
'teachers': {
'math': 'Alan Turing',
'physics': []
}
},
'students': [
{ 'name': 'Tony', 'sex': 'M', 'age': ''},
{ 'name': 'Jacqueline', 'sex': 'F' },
],
'other': []
}
]
I want to remove any element that its value meet certain criteria.
For example:
values_to_drop = ({}, (), [], '', ' ')
filtered_json = clean_json(json_list, values_to_drop)
filtered_json
Expected Output of clean_json:
[
{
'class': 'Year 1',
'room': 'Yellow',
'students': [
{'name': 'James', 'sex': 'M'},
]
},
{
'class': 'Year 2',
'info': {
'teachers': {
'math': 'Alan Turing',
}
},
'students': [
{ 'name': 'Tony', 'sex': 'M'},
{ 'name': 'Jacqueline', 'sex': 'F'},
]
}
]
I thought of something like first converting the object to string using json.dumps and then looking in the string and replacing each value that meets the criteria with some kind of flag to filter it after before reading it again with json.loads but I couldn't figure it out and I don't know if this is the way to go
I managed to get the desired output by tweaking this answer a bit:
def clean_json(json_obj, values_to_drop):
if isinstance(json_obj, dict):
json_obj = {
key: clean_json(value, values_to_drop)
for key, value in json_obj.items()
if value not in values_to_drop}
elif isinstance(json_obj, list):
json_obj = [clean_json(item, values_to_drop)
for item in json_obj
if item not in values_to_drop]
return json_obj
I have a dataframe like this
org.iden.account,org.iden.id,adress.city,adress.country,person.name.fullname,person.gender,person.birthYear,subs.id,subs.subs1.birthday,subs.subs1.org.address.country,subs.subs1.org.address.strret1,subs.org.buyer.email.address,subs.org.buyer.phone.number
account123,id123,riga,latvia,laura,female,1990,subs123,1990-12-14T00:00:00Z,latvia,street 1,email1#myorg.com|email2#sanoma.com,+371401234567
account123,id000,riga,latvia,laura,female,1990,subs456,1990-12-14T00:00:00Z,latvia,street 1,email1#myorg.com,+371401234567
account123,id456,riga,latvia,laura,female,1990,subs789,1990-12-14T00:00:00Z,latvia,street 1,email1#myorg.com,+371401234567
And I need to convert this into a nested JSON based on the column separated by a dot(.). So for the first row the expected result should be
{
"org": {
"iden": {
"account": "account123",
"id": "id123"
}
},
"address": {
"city": "riga",
"country": "country"
},
"person": {
"name": {
"fullname": laura,
},
"gender": "female",
"birthYear": 1990
},
"subs": {
"id": "subs123",
"subs1": {
"birthday": "1990-12-14T00:00:00Z",
"org": {
"address": {
"country": "latvia",
"street1": "street 1"
}
}
},
"org": {
"buyer": {
"email": {
"address": "email1#myorg.com|email2#sanoma.com"
},
"phone": {
"number": "+371401234567"
}
}
}
}
}
And then of course all the records as a list. I have tried to use simple pandas .to_json() but it didn't help and I get the following which doesn't have the nested structure I need.
[{"org.iden.account":"account123","org.iden.id":"id123","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs123","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1#myorg.com|email2#sanoma.com","subs.org.buyer.phone.number":371401234567},{"org.iden.account":"account123","org.iden.id":"id000","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs456","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1#myorg.com","subs.org.buyer.phone.number":371407654321},{"org.iden.account":"account123","org.iden.id":"id456","adress.city":"riga","adress.country":"latvia","person.name.fullname":"laura","person.gender":"female","person.birthYear":1990,"subs.id":"subs789","subs.subs1.birthday":"1990-12-14T00:00:00Z","subs.subs1.org.address.country":"latvia","subs.subs1.org.address.strret1":"street 1","subs.org.buyer.email.address":"email1#myorg.com","subs.org.buyer.phone.number":371407654321}]
Any help in this would be highly appreciated!
def df_to_json(row):
tree = {}
for item in row.index:
t = tree
for part in item.split('.'):
prev, t = t, t.setdefault(part, {})
prev[part] = row[item]
return tree
>>> df.apply(df_to_json, axis='columns').tolist()
[{'org': {'iden': {'account': 'account123', 'id': 'id123'}},
'adress': {'city': 'riga', 'country': 'latvia'},
'person': {'name': {'fullname': 'laura'},
'gender': 'female',
'birthYear': 1990},
'subs': {'id': 'subs123',
'subs1': {'birthday': '1990-12-14T00:00:00Z',
'org': {'address': {'country': 'latvia', 'strret1': 'street 1'}}},
'org': {'buyer': {'email': {'address': 'email1#myorg.com|email2#sanoma.com'},
'phone': {'number': 371401234567}}}}},
{'org': {'iden': {'account': 'account123', 'id': 'id000'}},
'adress': {'city': 'riga', 'country': 'latvia'},
'person': {'name': {'fullname': 'laura'},
'gender': 'female',
'birthYear': 1990},
'subs': {'id': 'subs456',
'subs1': {'birthday': '1990-12-14T00:00:00Z',
'org': {'address': {'country': 'latvia', 'strret1': 'street 1'}}},
'org': {'buyer': {'email': {'address': 'email1#myorg.com'},
'phone': {'number': 371401234567}}}}},
{'org': {'iden': {'account': 'account123', 'id': 'id456'}},
'adress': {'city': 'riga', 'country': 'latvia'},
'person': {'name': {'fullname': 'laura'},
'gender': 'female',
'birthYear': 1990},
'subs': {'id': 'subs789',
'subs1': {'birthday': '1990-12-14T00:00:00Z',
'org': {'address': {'country': 'latvia', 'strret1': 'street 1'}}},
'org': {'buyer': {'email': {'address': 'email1#myorg.com'},
'phone': {'number': 371401234567}}}}}]
Assuming your json structure looks something like this
json_data = [
{
"org.iden.account": "account123",
"org.iden.id": "id123",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs123",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com|email2#sanoma.com",
"subs.org.buyer.phone.number": 371401234567
},
{
"org.iden.account": "account123",
"org.iden.id": "id000",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs456",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
},
{
"org.iden.account": "account123",
"org.iden.id": "id456",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs789",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
}
]
You could nest it on a dict by dict basis.
def nestify(unnested):
nested = dict()
for k, v in unnested.items():
current_dict = nested
parts = k.split('.')
for i in parts[:-1]:
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
current_dict[parts[-1]] = v
return nested
This function takes one of the unnested dicts, iterates through the keys and assigns the value to the final depth.
Commented version
def nestify(unnested):
# this will be our return value
nested = dict()
for k, v in unnested.items():
# current_dict is the current dict were operating on
# gets reset to the base dict on each unnested key
current_dict = nested
parts = k.split('.')
# only create dicts up to the final period
# for example, current_dict is the base
# and creates an empty dict under the org key
# then current_dict is under the org key
# and creates an empty dict under the iden key
# then current_dict is under the iden key
for i in parts[:-1]:
# no reason to create an empty dict if it was
# already created for a prior key
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
# assign the value of the unnested dict
# to each final current_dict
# for example, the final part of the first key is "account"
# so rather than assign an empty dict, assign it "account123"
current_dict[parts[-1]] = v
return nested
Then you can just call it on each element of the json_data list in a comprehension.
nested = [nestify(i) for i in json_data]
Full code:
json_data = [
{
"org.iden.account": "account123",
"org.iden.id": "id123",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs123",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com|email2#sanoma.com",
"subs.org.buyer.phone.number": 371401234567
},
{
"org.iden.account": "account123",
"org.iden.id": "id000",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs456",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
},
{
"org.iden.account": "account123",
"org.iden.id": "id456",
"adress.city": "riga",
"adress.country": "latvia",
"person.name.fullname": "laura",
"person.gender": "female",
"person.birthYear": 1990,
"subs.id": "subs789",
"subs.subs1.birthday": "1990-12-14T00:00:00Z",
"subs.subs1.org.address.country": "latvia",
"subs.subs1.org.address.strret1": "street 1",
"subs.org.buyer.email.address": "email1#myorg.com",
"subs.org.buyer.phone.number": 371407654321
}
]
def nestify(unnested):
nested = dict()
for k, v in unnested.items():
current_dict = nested
parts = k.split('.')
for i in parts[:-1]:
if i not in current_dict:
current_dict[i] = dict()
current_dict = current_dict[i]
current_dict[parts[-1]] = v
return nested
nested = [nestify(i) for i in json_data]
print(nested)
Output:
[
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id123'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs123',
'org': {
'buyer': {
'email': {
'address': 'email1#myorg.com|email2#sanoma.com'
},
'phone': {
'number': 371401234567
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
},
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id000'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs456',
'org': {
'buyer': {
'email': {
'address': 'email1#myorg.com'
},
'phone': {
'number': 371407654321
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
},
{
'adress': {
'city': 'riga',
'country': 'latvia'
},
'org': {
'iden': {
'account': 'account123',
'id': 'id456'
}
},
'person': {
'birthYear': 1990,
'gender': 'female',
'name': {
'fullname': 'laura'
}
},
'subs': {
'id': 'subs789',
'org': {
'buyer': {
'email': {
'address': 'email1#myorg.com'
},
'phone': {
'number': 371407654321
}
}
},
'subs1': {
'birthday': '1990-12-14T00:00:00Z',
'org': {
'address': {
'country': 'latvia',
'strret1': 'street 1'
}
}
}
}
}
]