I've created a Python Dictionary Structure as below:
import pprint
log_data = {
'Date':'',
'Prayers':{
'Fajr':'',
'Dhuhr/Jumu\'ah':'',
'Asr':'',
'Maghrib':'',
'Isha\'a':''
},
'Task List':[{
'Task':'',
'Timeline':'',
'Status':''
}],
'Meals':{
'Breakfast':{
'Menu':'',
'Place':'',
'Time':''
},
'Lunch':{
'Menu':'',
'Place':'',
'Time':''
},
'Evening Snacks':{
'Menu':'',
'Place':'',
'Time':''
},
'Dinner':{
'Menu':'',
'Place':'',
'Time':''
}
},
'Exercises':[{
'Exercise':'',
'Duration':''
}]
}
pprint.pprint(log_data)
As you see this is just an dictionary structure without data. I want to iterate over all the keys and take input data as value from user using input().
Then I would like to save this dictionary as json file.
Could you please help on how I can iterate over all keys and take input from user.
Thanks.
Searched but couldn't found exact type of help that I need.
For this kind of thing, one needs to use recursion.
This is not fancy, but will get the job done:
from copy import deepcopy
import json
import pprint
log_data = {
'Date':'',
'Prayers':{
'Fajr':'',
'Dhuhr/Jumu\'ah':'',
'Asr':'',
'Maghrib':'',
'Isha\'a':''
},
'Task List':[{
'Task':'',
'Timeline':'',
'Status':''
}],
# ...
}
def input_fields(substruct, path=""):
print(f"Inputing values '{path}':")
for fieldname, value in substruct.items():
if isinstance(value, (str, int)):
substruct[fieldname] = input(f"{path}.{fieldname}: ")
elif isinstance(value, dict):
input_fields(value, f"{path}.{fieldname}")
elif isinstance(value, list):
original = value[0]
value.pop()
counter = 0
if not isinstance(original, dict):
raise ValueError("Not supported: A list should contain a dictionary-substructure")
while True:
item = deepcopy(original)
input_fields(item, f"{path}.{fieldname}.[{counter}]")
value.append(item)
continue_ = input(f"Enter one more {path}.{fieldname} item? (y/n) ").lower().strip()[0] == "y"
if not continue_:
break
counter+=1
return substruct
def main():
values = input_fields(deepcopy(log_data))
json.dump(values, open("myfile.json", "wt"), indent=4)
if __name__ == "__main__":
main()
Related
I'm having trouble with finding json elements in a nested json.
It seems that my code only finds the element on the root level.
My code is not able to find the elements recursively it seems.
import json
import pandas as pd
jsonString = '{"airplane": {"wings": {}, "wheels": {}, "cockpit": {}}}'
jsonObj = json.loads(jsonString)
data = ['airplane','wings','wheels','cockpit']
dfProp = pd.DataFrame(data, columns=['object'])
# find elements in JSON
for index, row in dfProp.iterrows():
if row['object'] in jsonObj:
print(row['object'] + ' ' + 'FOUND')
else:
print(row['object'] + ' ' + 'NOT FOUND')
I want to find all elements regardless of how many nesting levels there are in json files.
Can someone point me into the right direction?
If I understand you correctly, you want to check if all values from list data is found as a key in jsonObj:
import json
jsonString = '{"airplane": {"wings": {}, "wheels": {}, "cockpit": {}}}'
jsonObj = json.loads(jsonString)
data = ["airplane", "wings", "wheels", "cockpit"]
def find(o):
if isinstance(o, dict):
for k, v in o.items():
yield k
yield from find(v)
elif isinstance(o, list):
for v in o:
yield from find(v)
s = set(data).difference(find(jsonObj))
if not s:
print("All values from data found in jsonObj")
else:
print("Not all values from data found in jsonObj", s)
Prints:
All values from data found in jsonObj
I know I can use ruamel.yaml to load a file with tags in it. But when I want to dump without them i get an error. Simplified example :-
from ruamel.yaml import YAML
from json import dumps
import sys
yaml = YAML()
data = yaml.load(
"""
!mytag
a: 1
b: 2
c: 2022-05-01
"""
)
try:
yaml2 = YAML(typ='safe', pure=True)
yaml.default_flow_style = True
yaml2.dump(data, sys.stdout)
except Exception as e:
print('exception dumping using yaml', e)
try:
print(dumps(data))
except Exception as e:
print('exception dumping using json', e)
exception dumping using cannot represent an object: ordereddict([('a', 1), ('b', 2), ('c', datetime.date(2022, 5, 1))])
exception dumping using json Object of type date is not JSON serializable
I cannot change the load() without getting an error on the tag. How to get output with tags stripped (YAML or JSON)?
You get the error because the neither the safe dumper (pure or not), nor JSON, do know about the ruamel.yaml internal
types that preserve comments, tagging, block/flow-style, etc.
Dumping as YAML, you could register these types with alternate dump methods. As JSON this is more complex
as AFAIK you can only convert the leaf-nodes (i.e. the YAML scalars, you would e.g. be
able to use that to dump the datetime.datetime instance that is loaded as the value of key c).
I have used YAML as a readable, editable and programmatically updatable config file with
an much faster loading JSON version of the data used if its file is not older than the corresponding YAML (if
it is older JSON gets created from the YAML). The thing to do in order to dump(s) is
recursively generate Python primitives that JSON understands.
The following does so, but there are other constructs besides datetime
instances that JSON doesn't allow. E.g. when using sequences or dicts
as keys (which is allowed in YAML, but not in JSON). For keys that are
sequences I concatenate the string representation of the elements
:
from ruamel.yaml import YAML
import sys
import datetime
import json
from collections.abc import Mapping
yaml = YAML()
data = yaml.load("""\
!mytag
a: 1
b: 2
c: 2022-05-01
[d, e]: !myseq [42, 196]
{f: g, 18: y}: !myscalar x
""")
def json_dump(data, out, indent=None):
def scalar(obj):
if obj is None:
return None
if isinstance(obj, (datetime.date, datetime.datetime)):
return str(obj)
if isinstance(obj, ruamel.yaml.scalarbool.ScalarBoolean):
return obj == 1
if isinstance(obj, bool):
return bool(obj)
if isinstance(obj, int):
return int(obj)
if isinstance(obj, float):
return float(obj)
if isinstance(obj, tuple):
return '_'.join([str(x) for x in obj])
if isinstance(obj, Mapping):
return '_'.join([f'{k}-{v}' for k, v in obj.items()])
if not isinstance(obj, str): print('type', type(obj))
return obj
def prep(obj):
if isinstance(obj, dict):
return {scalar(k): prep(v) for k, v in obj.items()}
if isinstance(obj, list):
return [prep(elem) for elem in obj]
if isinstance(obj, ruamel.yaml.comments.TaggedScalar):
return prep(obj.value)
return scalar(obj)
res = prep(data)
json.dump(res, out, indent=indent)
json_dump(data, sys.stdout, indent=2)
which gives:
{
"a": 1,
"b": 2,
"c": "2022-05-01",
"d_e": [
42,
196
],
"f-g_18-y": "x"
}
I'm traversing a directory tree, which contains directories and files. I know I could use os.walk for this, but this is just an example of what I'm doing, and the end result has to be recursive.
The function to get the data out is below:
def walkfn(dirname):
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print(name)
walkfn(path)
elif os.path.isfile(path):
print(name)
Assuming we had a directory structure such as this:
testDir/
a/
1/
2/
testa2.txt
testa.txt
b/
3/
testb3.txt
4/
The code above would return the following:
a
testa.txt
1
2
testa2.txt
c
d
b
4
3
testb3.txt
It's doing what I would expect at this point, and the values are all correct, but I'm trying to get this data into a JSON object. I've seen that I can add these into nested dictionaries, and then convert it to JSON, but I've failed miserably at getting them into nested dictionaries using this recursive method.
The JSON I'm expecting out would be something like:
{
"test": {
"b": {
"4": {},
"3": {
"testb3.txt": null
}
},
"a": {
"testa.txt": null,
"1": {},
"2": {
"testa2.txt": null
}
}
}
}
You should pass json_data in your recursion function:
import os
from pprint import pprint
from typing import Dict
def walkfn(dirname: str, json_data: Dict=None):
if not json_data:
json_data = dict()
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
json_data[name] = dict()
json_data[name] = walkfn(path, json_data=json_data[name])
elif os.path.isfile(path):
json_data.update({name: None})
return json_data
json_data = walkfn(dirname="your_dir_name")
pprint(json_data)
I want to perform the validation on the data. I have written the code using the pandas schema , instead of pandas schema how can I pass a json file which contains all the rules of validation in it and then apply it on the csv file.
That means to apply which rule on which column must be taken from the json file instead of the pandas schema and generate the error file.
def check_decimal(dec):
try:
Decimal(dec)
except InvalidOperation:
return False
return True
def check_int(num):
try:
int(num)
except ValueError:
return False
return True
def do_validation():
# read the data
data = pd.read_csv('data.csv')
# define validation elements
decimal_validation = [CustomElementValidation(lambda d: check_decimal(d), 'is not decimal')]
int_validation = [CustomElementValidation(lambda i: check_int(i), 'is not integer')]
null_validation = [CustomElementValidation(lambda d: d is None, 'this field cannot be null')]
# define validation schema
schema = pandas_schema.Schema([
Column('dec1', decimal_validation + null_validation),
Column('dec2', decimal_validation),
Column('dec3', decimal_validation),
Column('dec4', decimal_validation),
Column('dec5', decimal_validation),
Column('dec6', decimal_validation),
Column('dec7', decimal_validation),
Column('company_id', int_validation + null_validation),
Column('currency_id', int_validation + null_validation),
Column('country_id', int_validation + null_validation)])
# apply validation
errors = schema.validate(data)
errors_index_rows = [e.row for e in errors]
data_clean = data.drop(index=errors_index_rows)
# save data
pd.DataFrame({'col':errors}).to_csv('errors55.csv')
So, I don't know anything really about pandas_schema, but if you have columns and their validators in a json like this:
{
"dec1": ['decimal', 'null'],
"dec2": ['decimal'],
"dec3": ['decimal'],
"dec4": ['decimal'],
"dec5": ['decimal'],
"dec6": ['decimal'],
"dec7": ['decimal'],
"company_id": ['int', 'null'],
"currency_id": ['int', 'null'],
"country_id": ['int', 'null']
}
Then you can use a dict of validators and a list comprehension to generate your Column objects for the Schema:
def check_decimal(dec):
try:
Decimal(dec)
except InvalidOperation:
return False
return True
def check_int(num):
try:
int(num)
except ValueError:
return False
return True
VALIDATORS = {
'decimal': CustomElementValidation(lambda d: check_decimal(d), 'is not decimal'),
'int': CustomElementValidation(lambda i: check_int(i), 'is not integer'),
'null': CustomElementValidation(lambda d: d is None, 'this field cannot be null'),
}
def do_validation():
# read the data
data = pd.read_csv('data.csv')
with open('my_json_schema.json', 'r') as my_json:
json_schema = json.load(my_json)
column_list = [Column(k, [VALIDATORS[v] for v in vals]) for k, vals in json_schema.items()]
schema = pandas_schema.Schema(column_list)
# apply validation
errors = schema.validate(data)
errors_index_rows = [e.row for e in errors]
data_clean = data.drop(index=errors_index_rows)
# save data
pd.DataFrame({'col':errors}).to_csv('errors55.csv')
EDIT:
For using validators with arguments defined in the JSON you are going to need to change up both the JSON format and the code a bit. The following should work, but I can't test it myself.
{
"dec1": [['decimal'], ['null']],
"dec2": [['decimal'], ['range', 0, 10]],
"dec3": [['decimal']],
"dec4": [['decimal']],
"dec5": [['decimal']],
"dec6": [['decimal']],
"dec7": [['decimal']],
"company_id": [['int'], ['null']],
"currency_id": [['int'], ['null']],
"country_id": [['int'], ['null']]
}
def get_validator(opts)
VALIDATORS = {
'decimal': (CustomElementValidation, [lambda d: check_decimal(d), 'is not decimal']),
'int': (CustomElementValidation, [lambda i: check_int(i), 'is not integer']),
'null': (CustomElementValidation, [lambda d: d is None, 'this field cannot be null']),
'range': (InRangeValidation, []),
}
func, args = VALIDATORS[opts[0]]
args.extend(opts[1:])
return func(*args)
def do_validation():
# read the data
data = pd.read_csv('data.csv')
with open('my_json_schema.json', 'r') as my_json:
json_schema = json.load(my_json)
column_list = [Column(k, [get_validator(v) for v in vals]) for k, vals in json_schema.items()]
schema = pandas_schema.Schema(column_list)
# apply validation
errors = schema.validate(data)
errors_index_rows = [e.row for e in errors]
data_clean = data.drop(index=errors_index_rows)
# save data
pd.DataFrame({'col':errors}).to_csv('errors55.csv')
I have a django model with a JSONField (django.contrib.postgres.fields.JSONField)
Is there any way that I can validate model data against a json schema file?
(pre-save)
Something like my_field = JSONField(schema_file=my_schema_file)
I wrote a custom validator using jsonschema in order to do this.
project/validators.py
import django
from django.core.validators import BaseValidator
import jsonschema
class JSONSchemaValidator(BaseValidator):
def compare(self, value, schema):
try:
jsonschema.validate(value, schema)
except jsonschema.exceptions.ValidationError:
raise django.core.exceptions.ValidationError(
'%(value)s failed JSON schema check', params={'value': value}
)
project/app/models.py
from django.db import models
from project.validators import JSONSchemaValidator
MY_JSON_FIELD_SCHEMA = {
'schema': 'http://json-schema.org/draft-07/schema#',
'type': 'object',
'properties': {
'my_key': {
'type': 'string'
}
},
'required': ['my_key']
}
class MyModel(models.Model):
my_json_field = models.JSONField(
default=dict,
validators=[JSONSchemaValidator(limit_value=MY_JSON_FIELD_SCHEMA)]
)
That's what the Model.clean() method is for (see docs). Example:
class MyData(models.Model):
some_json = JSONField()
...
def clean(self):
if not is_my_schema(self.some_json):
raise ValidationError('Invalid schema.')
you could use cerberus to validate your data against a schema
from cerberus import Validator
schema = {'name': {'type': 'string'}}
v = Validator(schema)
data = {'name': 'john doe'}
v.validate(data) # returns "True" (if passed)
v.errors # this would return the error dict (or on empty dict in case of no errors)
it's pretty straightforward to use (also due to it's good documentation -> validation rules: http://docs.python-cerberus.org/en/stable/validation-rules.html)
I wrote a custom JSONField that extends models.JSONField and validates attribute's value by using jsonschema (Django 3.1, Python 3.7).
I didn't use the validators parameter for one reason: I want to let users define the schema dynamically.So I use a schema parameter, that should be:
None (by default): the field will behave like its parent class (no JSON schema validation support).
A dict object. This option is suitable for a small schema definition (for example: {"type": "string"});
A str object that describes a path to the file where the schema code is contained. This option is suitable for a big schema definition (to preserve the beauty of the model class definition code). For searching I use all enabled finders: django.contrib.staticfiles.finders.find().
A function that takes a model instance as an argument and returns a schema as dict object. So you can build a schema based on the state of the given model instance. The function will be called every time when the validate() is called.
myapp/models/fields.py
import json
from jsonschema import validators as json_validators
from jsonschema import exceptions as json_exceptions
from django.contrib.staticfiles import finders
from django.core import checks, exceptions
from django.db import models
from django.utils.functional import cached_property
class SchemaMode:
STATIC = 'static'
DYNAMIC = 'dynamic'
class JSONField(models.JSONField):
"""
A models.JSONField subclass that supports the JSON schema validation.
"""
def __init__(self, *args, schema=None, **kwargs):
if schema is not None:
if not(isinstance(schema, (bool, dict, str)) or callable(schema)):
raise ValueError('The "schema" parameter must be bool, dict, str, or callable object.')
self.validate = self._validate
else:
self.__dict__['schema_mode'] = False
self.schema = schema
super().__init__(*args, **kwargs)
def check(self, **kwargs):
errors = super().check(**kwargs)
if self.schema_mode == SchemaMode.STATIC:
errors.extend(self._check_static_schema(**kwargs))
return errors
def _check_static_schema(self, **kwargs):
try:
schema = self.get_schema()
except (TypeError, OSError):
return [
checks.Error(
f"The file '{self.schema}' cannot be found.",
hint="Make sure that 'STATICFILES_DIRS' and 'STATICFILES_FINDERS' settings "
"are configured correctly.",
obj=self,
id='myapp.E001',
)
]
except json.JSONDecodeError:
return [
checks.Error(
f"The file '{self.schema}' contains an invalid JSON data.",
obj=self,
id='myapp.E002'
)
]
validator_cls = json_validators.validator_for(schema)
try:
validator_cls.check_schema(schema)
except json_exceptions.SchemaError:
return [
checks.Error(
f"{schema} must be a valid JSON Schema.",
obj=self,
id='myapp.E003'
)
]
else:
return []
def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
if self.schema is not None:
kwargs['schema'] = self.schema
return name, path, args, kwargs
#cached_property
def schema_mode(self):
if callable(self.schema):
return SchemaMode.DYNAMIC
return SchemaMode.STATIC
#cached_property
def _get_schema(self):
if callable(self.schema):
return self.schema
elif isinstance(self.schema, str):
with open(finders.find(self.schema)) as fp:
schema = json.load(fp)
else:
schema = self.schema
return lambda obj: schema
def get_schema(self, obj=None):
"""
Return schema data for this field.
"""
return self._get_schema(obj)
def _validate(self, value, model_instance):
super(models.JSONField, self).validate(value, model_instance)
schema = self.get_schema(model_instance)
try:
json_validators.validate(value, schema)
except json_exceptions.ValidationError as e:
raise exceptions.ValidationError(e.message, code='invalid')
Usage:
myapp/models/__init__.py
def schema(instance):
schema = {}
# Here is your code that uses the other
# instance's fields to create a schema.
return schema
class JSONSchemaModel(models.Model):
dynamic = JSONField(schema=schema, default=dict)
from_dict = JSONField(schema={'type': 'object'}, default=dict)
# A static file: myapp/static/myapp/schema.json
from_file = JSONField(schema='myapp/schema.json', default=dict)
Another solution using jsonschema for simple cases.
class JSONValidatedField(models.JSONField):
def __init__(self, *args, **kwargs):
self.props = kwargs.pop('props')
self.required_props = kwargs.pop('required_props', [])
super().__init__(*args, **kwargs)
def validate(self, value, model_instance):
try:
jsonschema.validate(
value, {
'schema': 'http://json-schema.org/draft-07/schema#',
'type': 'object',
'properties': self.props,
'required': self.required_props
}
)
except jsonschema.exceptions.ValidationError:
raise ValidationError(
f'Value "{value}" failed schema validation.')
class SomeModel(models.Model):
my_json_field = JSONValidatedField(
props={
'foo': {'type': 'string'},
'bar': {'type': 'integer'}
},
required_props=['foo'])