How to walk through JSON - json

I am currently working on a walking json where I could with parameters add in the walk I want to go through JSON. I have created something like this:
from collections import abc
def walk(obj, *path):
"""
Goes through the given json path. If it is found then return the given path else empty dict
"""
for segment in path:
if not isinstance(obj, abc.Mapping) or segment not in obj:
print(f"Couldn't walk path; {path}")
return {}
obj = obj[segment]
return obj
# -------------------------------------------------------- #
json_value = {
"id": "dc932304-dde4-3517-8b76-58081cc9dd0d",
"Information": [{
"merch": {
"id": "8fb66657-b93d-5f2d-8fe7-a5e355f0f3a8",
"status": "ACTIVE"
},
"value": {
"country": "SE IS BEST"
},
"View": {
"id": "9aae10f4-1b75-481d-ac5f-b17bc46675bd"
}
}],
"collectionTermIds": [
],
"resourceType": "thread",
"rollup": {
"totalThreads": 1,
"threads": [
]
},
"collectionsv2": {
"groupedCollectionTermIds": {
},
"collectionTermIds": [
]
}
}
# -------------------------------------------------------- #
t = walk(json_value, "Information", 0)
print(t)
My current problem is that I am trying to get the the first in a list from "Information" by giving the walk function the value 0 as I provided however it returns that it couldn't due to Couldn't walk path; ('Information', 0)
I wonder how I can choose which list number I want to walk through by giving it into the parameter? e.g. if I would choose 1, it should return Couldn't walk path; ('Information', 1) but if I choose to do 0 then it should return
{
"merch": {
"id": "8fb66657-b93d-5f2d-8fe7-a5e355f0f3a8",
"status": "ACTIVE"
},
"value": {
"country": "SE IS BEST"
},
"View": {
"id": "9aae10f4-1b75-481d-ac5f-b17bc46675bd"
}
}

This should work for any JSON object:
def walk(obj, *path):
"""
Goes through the given json path. If it is found then return the given path else empty dict
"""
try:
segment, key, *rest = path
except ValueError:
# We are simply passed in a single key, ex. walk(obj, 'my_key')
key = path[0]
try:
return obj[key]
except KeyError:
print(f"Couldn't walk path: {key!r}\n"
f" obj={obj!r}\n"
" reason=key missing from object")
return {}
except TypeError:
print(f"Couldn't walk path: {key!r}\n"
f" obj={obj!r}\n"
" reason=object is not a mapping or array type.")
return {}
except IndexError as e:
print(f"Couldn't walk path: {key!r}\n"
f" obj={obj!r}\n"
f" reason={e}")
return {}
# This indicates we want to walk down a nested (or deeply nested) path. We
# can use recursion to solve this case.
try:
inner_obj = obj[segment]
except KeyError:
print(f"Couldn't walk path: {segment!r} -> {key!r}\n"
f" obj={obj!r}\n"
" reason=key missing from object")
return {}
except IndexError as e:
print(f"Couldn't walk path: {segment!r} -> {key!r}\n"
f" obj={obj!r}\n"
f" reason={e}")
return {}
else:
return walk(inner_obj, key, *rest)
that could probably be optimized a bit, for example by removing the duplicate except blocks with a slight modification.
code for testing (using the json_value from above):
t = walk(json_value, 'Information', 1)
assert t == {}
t = walk(json_value, 'Information', 1, 'value', 'country')
assert t == {}
t = walk(json_value, 'Information', 0)
print(t)
# {'merch': {'id': '8fb66657-b93d-5f2d-8fe7-a5e355f0f3a8', 'status': 'ACTIVE'}, 'value': {'country': 'SE IS BEST'}, 'View': {'id': '9aae10f4-1b75-481d-ac5f-b17bc46675bd'}}
t = walk(json_value, 'Information', 0, 'value', 'country')
print(t)
# SE IS BEST
t = walk(json_value, 'collectionsv2', 'collectionTermIds')
print(t)
# []
t = walk(json_value, 'id')
print(t)
# dc932304-dde4-3517-8b76-58081cc9dd0d
t = walk(json_value, 'id', 'test')
assert t == {}
# error: wrong type

Related

Finding Values in Python json.loads Dictionary

I'm working with a REST API that returns data in the following format:
{
"id": "2902cbad6da44459ad05abd1305eed14",
"displayName": "",
"sourceHost": "dev01.test.lan",
"sourceIP": "192.168.145.1",
"messagesPerSecond": 0,
"messages": 2733,
"size": 292062,
"archiveSize": 0,
"dates": [
{
"date": 1624921200000,
"messages": 279,
"size": 29753,
"archiveSize": 0
},
{
"date": 1625007600000,
"messages": 401,
"size": 42902,
"archiveSize": 0
}
]
}
I'm using json.loads to successfully pull the data from the API, and I now need to search for a particular "date:" value and read the corresponding "messages", "size" and "archiveSize" values.
I'm trying to use the "if-in" method to find the value I'm interested in, for example:
response = requests.request("GET", apiQuery, headers=headers, data=payload)
json_response = json.loads(response.text)
test = 2733
if test in json_response.values():
print(f"Yes, value: '{test}' exist in dictionary")
else:
print(f"No, value: '{test}' does not exist in dictionary")
This works fine for any value in the top section of the JSON return, but it never finds any values in the "dates" sub-branches.
I have two questions, firstly, how do I find the target "date" value? Secondly, once I find that "sub-branch" what would be the best way to extract the three values I need?
Thanks.
from json import load
def list_dates_whose_message_count_equals(dates=None, message_count=0):
return list(filter(
lambda date: date.get("messages") == message_count, dates
))
def main():
json_ = {}
with open("values.json", "r") as fp:
json_ = load(fp)
print(list_dates_whose_message_count_equals(json_["dates"], message_count=279))
print(list_dates_whose_message_count_equals(json_["dates"], message_count=401))
if __name__ == "__main__":
main()
Returns this
[{'date': 1624921200000, 'messages': 279, 'size': 29753, 'archiveSize': 0}]
[{'date': 1625007600000, 'messages': 401, 'size': 42902, 'archiveSize': 0}]

Filtering a json dictionary by values Python

I am trying to write a piece of code where it filters out the values RSI, MOM, MOM_RSI within the Json file and filters by Status. I want to keep the values that has a Status of ACTIVE and get rid of the one that have a status of PAUSED. I have a working code for it from the issue:link. But I want to make it cleaner but attempting to configure the filters within the filtered_data dictionary but its not working. How would I be able to fix it?
Working:
def reading():
with open('data.json') as f:
data = json.load(f)
result = {}
for filter_key in data.keys():
for d in data[filter_key]:
if d['Status'] == 'ACTIVE':
try:
result[filter_key].append(d)
except KeyError:
result[filter_key] = [d]
Not Working Code:
def reading():
with open('data.json') as f:
data = json.load(f)
required_names = {key for filter_key in data.keys() for key in data[filter_key]}
filtered_data = {
key: value
for key, value in data.keys()
if key['Status'] in required_names
}
return data
reading()
Expected Output:
{
"RSI": [
{
"TradingPair": "BTCUSD",
"Status": "ACTIVE",
}
],
"MOM_RSI":[
{
"TradingPair": "BTCUSDT",
"Status": "ACTIVE",
}
]
}
JSON File:
{
"RSI": [
{
"TradingPair": "BTCUSD",
"Status": "ACTIVE",
}
],
"MOM":[
{
"TradingPair": "BCHUSDT",
"Status": "PAUSED",
}
],
"MOM_RSI":[
{
"TradingPair": "BTCUSDT",
"Status": "ACTIVE",
}
]
}
Using inline loops to filter should do the trick for you
for key in data.keys():
data[key] = [x for x in data[key] if x['Status'] == 'ACTIVE']
# in case of empty data, remove the key
data = {k: v for k, v in data.items() if v != []}

Add a # to beginning of each key in Json Python2.7

I'm trying to add a "#" at the beginning to each key of a Json object (got it from RabbitMQ api calls)
here is my attempt :
#!/bin/python
# Libraries import
import requests
import json
import sys
import os
# Define URLs
overview="/api/overview"
nodes="/api/nodes"
queues="/api/queues"
# Get credentials from file
with open('/credentials') as json_file:
data = json.load(json_file)
user = data['user']
passwd = data['pass']
# Test which URL we want to call
if ''.join(sys.argv[1]) == "overview":
commande=overview
if ''.join(sys.argv[1]) == "queues":
commande=queues
if ''.join(sys.argv[1]) == "nodes":
commande=nodes
def append(mydict):
return dict(map(lambda (key, value): ("#"+str(key), value), mydict.items()))
def transform(multileveldict):
new = append(multileveldict)
for key, value in new.items():
if isinstance(value, dict):
new[key] = transform(value)
return new
def upper_keys(x):
if isinstance(x, list):
return [upper_keys(v) for v in x]
elif isinstance(x, dict):
return dict((k.upper(), upper_keys(v)) for k, v in x.iteritems())
else:
return x
# Main
response = requests.get("http://localhost:15672" + commande, auth=(user, passwd))
if(response.ok):
json_data = json.loads(response.content)
json = json.dumps(upper_keys(json_data), indent=4)
print(json)
Here is the JSON that I get in "response.content" :
[
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-01",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-02",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"NODE": "rabbit#server567",
"EXCLUSIVE": false,
"NAME": "test-03",
"SYNCHRONISED_SLAVE_NODES": [],
"SLAVE_NODES": [],
"AUTO_DELETE": false,
"VHOST": "/",
"ARGUMENTS": {},
"TYPE": "classic",
"DURABLE": false
},
{
"MESSAGES_UNACKNOWLEDGED_RAM": 0,
"RECOVERABLE_SLAVES": null,
"CONSUMERS": 0,
"REDUCTIONS": 9700519,
"AUTO_DELETE": false,
"MESSAGE_BYTES_PAGED_OUT": 0,
"MESSAGE_BYTES_UNACKNOWLEDGED": 0,
"REDUCTIONS_DETAILS": {
"RATE": 0.0
},
"MESSAGE_BYTES": 0,
"MESSAGES_UNACKNOWLEDGED": 0,
"CONSUMER_UTILISATION": null,
"EXCLUSIVE": false,
"VHOST": "/",
"GARBAGE_COLLECTION": {
"MAX_HEAP_SIZE": 0,
"MIN_HEAP_SIZE": 233,
"FULLSWEEP_AFTER": 65535,
"MINOR_GCS": 15635,
"MIN_BIN_VHEAP_SIZE": 46422
},
"MESSAGES_DETAILS": {
"RATE": 0.0
},
"SLAVE_NODES": [
"rabbit#server567"
],
"MESSAGE_BYTES_PERSISTENT": 0,
"POLICY": "ha-all",
"MESSAGES_PAGED_OUT": 0,
"NODE": "rabbit#server566",
"HEAD_MESSAGE_TIMESTAMP": null,
"DURABLE": false,
"MESSAGES_READY_RAM": 0,
"STATE": "running",
"ARGUMENTS": {},
"EFFECTIVE_POLICY_DEFINITION": {
"HA-MODE": "all"
},
"MESSAGES_READY": 0,
"MESSAGES_RAM": 0,
"MESSAGE_BYTES_READY": 0,
"SINGLE_ACTIVE_CONSUMER_TAG": null,
"NAME": "test-04",
"MESSAGES_PERSISTENT": 0,
"BACKING_QUEUE_STATUS": {
"MIRROR_SENDERS": 0,
"Q1": 0,
"Q3": 0,
"Q2": 0,
"Q4": 0,
"AVG_ACK_EGRESS_RATE": 0.0,
"MIRROR_SEEN": 0,
"LEN": 0,
"TARGET_RAM_COUNT": "infinity",
"MODE": "default",
"NEXT_SEQ_ID": 0,
"DELTA": [
"delta",
"undefined",
0,
0,
"undefined"
],
"AVG_ACK_INGRESS_RATE": 0.0,
"AVG_EGRESS_RATE": 0.0,
"AVG_INGRESS_RATE": 0.0
},
"MESSAGES": 0,
"IDLE_SINCE": "2020-10-16 13:50:50",
"OPERATOR_POLICY": null,
"SYNCHRONISED_SLAVE_NODES": [
"rabbit#server567"
],
"MEMORY": 10556,
"EXCLUSIVE_CONSUMER_TAG": null,
"MESSAGES_READY_DETAILS": {
"RATE": 0.0
},
"TYPE": "classic",
"MESSAGES_UNACKNOWLEDGED_DETAILS": {
"RATE": 0.0
},
"MESSAGE_BYTES_RAM": 0
}
]
Here, I made every key in uppercase and can display it has JSON but can't find anything to add this "#" to the beginning of each key
PS : I'm new to Python development
Thank you very much
Since you mentioned that you have successfully converted every keys in a dictionary into upper case keys, why don't you reuse the method and change the part where you do upper case into prepending "#"
# the one you provided
def upper_keys(x):
if isinstance(x, list):
return [upper_keys(v) for v in x]
elif isinstance(x, dict):
return dict((k.upper(), upper_keys(v)) for k, v in x.iteritems())
else:
return x
# the modified method
def prepend_hash_keys(x):
if isinstance(x, list):
return [prepend_hash_keys(v) for v in x]
elif isinstance(x, dict):
# this part from k.upper() to "#" + k
return dict(("#" + k, prepend_hash_keys(v)) for k, v in x.iteritems())
else:
return x
Your transform function actually works fine (for Python 2), you just forgot to actually call it! Instead, you call only upper_keys, but not transform:
json = json.dumps(upper_keys(json_data), indent=4) # where's transform?
If you use both one after the other (order does not matter) it should work:
json = {"nested": {"dict": {"with": {"lowercase": "keys"}}}}
print(transform(upper_keys(json)))
# {'#NESTED': {'#DICT': {'#WITH': {'#LOWERCASE': 'keys'}}}}
However, both transform and upper_keys can be simplified a lot using dictionary comprehensions (also available in Python 2), and you can combine both in one function:
def transform_upper(d):
if isinstance(d, dict):
return {"#" + k.upper(): transform_upper(v) for k, v in d.items()}
else:
return d
print(transform_upper(json))
# {'#NESTED': {'#DICT': {'#WITH': {'#LOWERCASE': 'keys'}}}}
From the look of it you already tried something like that in append() function.
If you modify that a bit to have something like this, it may do what you are looking for:
mydict = {
'name':1,
'surname':2
}
def append(mydict):
new_dict = {}
for key, val in mydict.items():
new_dict['#'+key]=val
return new_dict
print(append(mydict))

how to parse CSV to JSON from 2 CSV Files in Groovy

Please help with parse CSV to JSON from 2 CSV Files in groovy
For example :
CSV1:
testKey,status
Name001,PASS
Name002,PASS
Name003,FAIL
CSV2:
Kt,Pd
PT-01,Name001
PT-02,Name002
PT-03,Name003
PT-04,Name004
I want to input in "testlist" data from CSV2.val[1..-1],CSV1.val[1..-1]
Result should be like :
{
"testExecutionKey": "DEMO-303",
"info": {
"user": "admin"
},
"tests": [
{
"TestKey": "PT-01",
"status": "PASS"
},
{
"TestKey": "PT-02",
"status": "PASS"
},
{
"TestKey": "PT-03",
"status": "FAIL"
}
]
code without this modification (from only 1 csv):
import groovy.json.*
def kindaFile = '''
TestKey;Finished;user;status
Name001;PASS;
Name002;PASS;
'''.trim()
def keys
def testList = []
//parse CSV
kindaFile.splitEachLine( /;/ ){ parts ->
if( !keys )
keys = parts
else{
def test = [:]
parts.eachWithIndex{ val, ix -> test[ keys[ ix ] ] = val }
testList << test
}
}
def builder = new JsonBuilder()
def root = builder {
testExecutionKey 'DEMO-303'
info user: 'admin'
tests testList
}
println JsonOutput.prettyPrint(JsonOutput.toJson(root))
Your sample JSON doesn't match the CSV definition. It looks lile you're using fields [1..-1] from CSV 1, as you stated, but fields [0..-2] from CSV 2. As you only have 2 fields in each CSV that's the equivalent of csv1[1] and csv2[0]. The example below uses [0..-2]. Note that if you always have exactly two fields in your input files then the following code could be simplified a little. I've given a more generic solution that can cope with more fields.
Load both CSV files into lists
File csv1 = new File( 'one.csv')
File csv2 = new File( 'two.csv')
def lines1 = csv1.readLines()
def lines2 = csv2.readLines()
assert lines1.size() <= lines2.size()
Note the assert. That's there as I noticed you have 4 tests in CSV2 but only 3 in CSV1. To allow the code to work with your sample data, it iterates through through CSV1 and adds the matching data from CSV2.
Get the field names
fieldSep = /,[ ]*/
def fieldNames1 = lines1[0].split( fieldSep )
def fieldNames2 = lines1[0].split( fieldSep )
Build the testList collection
def testList = []
lines1[1..-1].eachWithIndex { csv1Line, lineNo ->
def mappedLine = [:]
def fieldsCsv1 = csv1Line.split( fieldSep )
fieldsCsv1[1..-1].eachWithIndex { value, fldNo ->
String name = fieldNames1[ fldNo + 1 ]
mappedLine[ name ] = value
}
def fieldsCsv2 = lines2[lineNo + 1].split( fieldSep )
fieldsCsv2[0..-2].eachWithIndex { value, fldNo ->
String name = fieldNames2[ fldNo ]
mappedLine[ name ] = value
}
testList << mappedLine
}
Parsing
You can now parse the list of maps with your existing code. I've made a change to the way the JSON string is displayed though.
def builder = new JsonBuilder()
def root = builder {
testExecutionKey 'DEMO-303'
info user: 'admin'
tests testList
}
println builder.toPrettyString()
JSON Output
Running the above code, using your CSV1 and CSV 2 data, gives the JSON that you desire.
for CSV1:
testKey,status
Name001,PASS
Name002,PASS
Name003,FAIL
and CSV2:
Kt,Pd
PT-01,Name007
PT-02,Name001
PT-03,Name003
PT-05,Name002
PT-06,Name004
PT-07,Name006
result is:
{
"testExecutionKey": "DEMO-303",
"info": {
"user": "admin"
},
"tests": [
{
"status": "PASS",
"testKey": "PT-01"
},
{
"status": "PASS",
"testKey": "PT-02"
},
{
"status": "FAIL",
"testKey": "PT-03"
}
]
}
but I need exactly the same values for testKey (testKey from CSV1=Kt from CSV2)
{
"testExecutionKey": "DEMO-303",
"info": {
"user": "admin"
},
"tests": [
{
"testKey": "PT-02",
"status": "PASS"
},
{
"testKey": "PT-05",
"status": "PASS"
},
{
"testKey": "PT-03",
"status": "FAIL"
}
]
}

AttributeError: 'list' object has no attribute 'get'?

This is the script
def validate_record_schema(record):
device = record.get('Payload', {})
manual_added= device.get('ManualAdded', None)
location = device.get('Location', None)
if isinstance(manual_added, dict) and isinstance(location, dict):
if 'Value' in manual_added and 'Value' in location:
return False
return isinstance(manual_added, bool) and isinstance(location, str)
print([validate_record_schema(r) for r in data])
This is json data
data = [{
"Id": "12",
"Type": "DevicePropertyChangedEvent",
"Payload": [{
"DeviceType": "producttype",
"DeviceId": 2,
"IsFast": false,
"Payload": {
"DeviceInstanceId": 2,
"IsResetNeeded": false,
"ProductType": "product",
"Product": {
"Family": "home"
},
"Device": {
"DeviceFirmwareUpdate": {
"DeviceUpdateStatus": null,
"DeviceUpdateInProgress": null,
"DeviceUpdateProgress": null,
"LastDeviceUpdateId": null
},
"ManualAdded": {
"value":false
},
"Name": {
"Value": "Jigital60asew",
"IsUnique": true
},
"State": null,
"Location": {
"value":"bangalore"
},
"Serial": null,
"Version": "2.0.1.100"
}
}
}]
}]
For the line device = device.get('ManualAdded', None), I am getting the following error: AttributeError: 'list' object has no attribute 'get'.
please have a look and help me to solve this issue
Where i am doing mistake...
How can i fix this error?
Please help me to solve this issue
You are having problems tracking types as you traverse data. One trick is to add prints along the way for debug to see what is going on. For instance, that top "Payload" object is a list of dict, not a single dict. The list implies that you can have more than one device descriptor so I wrote a sample that checks all of them and returns False if it finds something wrong along the way. you will likely need to update this according to your validation rules, but this will get you started.
def validate_record_schema(record):
"""Validate that the 0 or more Payload dicts in record
use proper types"""
err_path = "root"
try:
for device in record.get('Payload', []):
payload = device.get('Payload', None)
if payload is None:
# its okay to have device without payload?
continue
device = payload["Device"]
if not isinstance(device["ManualAdded"]["value"], bool):
return False
if not isinstance(device["Location"]["value"], str):
return False
except KeyError as e:
print("missing key")
return False
return True
As the error suggests, you can't .get() on a list. To get the Location and ManualAdded field, you could use:
manual_added = record.get('Payload')[0].get('Payload').get('Device').get('ManualAdded')
location = record.get('Payload')[0].get('Payload').get('Device').get('Location')
So your function would become:
def validate_record_schema(record):
manual_added = record.get('Payload')[0].get('Payload').get('Device').get('ManualAdded')
location = record.get('Payload')[0].get('Payload').get('Device').get('Location')
if isinstance(manual_added, dict) and isinstance(location, dict):
if 'Value' in manual_added and 'Value' in location:
return False
return isinstance(manual_added, bool) and isinstance(location, str)
Note that this would set location to
{
"value":"bangalore"
}
and manual_added to
{
"value":false
}