JSON serialization using Marshmallow - skip None attributes - json

I am using Marshmallow to send instance of my Decision class to JSON. However, this will also dump the attributes which are None, e.g. my attribute score will translate to null in JSON. After that I am unable to read the JSON again using the same approach.
https://repl.it/repls/VoluminousMulticoloredFacts
The last line is where it currently fails. I need to either NOT dump None to JSON or skip null during loading:
import json
from marshmallow import Schema, fields, post_load
json_data = """{
"appid": "2309wfjwef",
"strategy": "First Strategy"
}"""
# Output class definition
class Decision(object):
def __init__(self, appid = None, strategy = None, score = None):
self.appid = appid
self.strategy = strategy
self.score = score
class DecisionSchema(Schema):
appid = fields.Str()
strategy = fields.Str()
score = fields.Int()
#post_load
def make_decision(self, data):
return Decision(**data)
# Deserialization into object
dec_json = json.loads(json_data)
schema = DecisionSchema()
dec = schema.load(dec_json).data
print(dec.strategy)
# Dump results back to JSON
schema = DecisionSchema()
out = schema.dumps(dec)
print(out.data)
# Load back from dump
schema = DecisionSchema()
dec = schema.load(out).data
#print(dec.strategy) # returns error currently

An "official" answer from marshmallow development team can be found in this comment in the bugtracker:
Use a post_dump method.
from marshmallow import Schema, fields, post_dump
class BaseSchema(Schema):
SKIP_VALUES = set([None])
#post_dump
def remove_skip_values(self, data, **kwargs):
return {
key: value for key, value in data.items()
if value not in self.SKIP_VALUES
}
class MySchema(BaseSchema):
foo = fields.Field()
bar = fields.Field()
sch = MySchema()
sch.dump({'foo': 42, 'bar': None}).data # {'foo': 42}
As I point out in a further comment, there's a shortcoming: it will also remove None when the field's allow_none is True.

As I pointed out in my comment above this messes with the order if you use the
class Meta:
fields = (
'field1', 'field2'
)
ordered = True
To fix this I used this:
# Remove None fields
#pre_dump
def remove_skip_values(self, data):
return {
key: value for key, value in data.items()
if value is not None
}
This works for my dictonary of objects

Related

How to convert simple JSON to DynamoDB JSON?

I have a simple JSON and want to convert it to DynamoDB JSON. Is there any easy way to do that?
If you mean JsonString to Dynamodb Map, you can use boto3.
Here is the example.
import boto3
import json
json_string = '{"key1": 1, "key2": "value"}'
json_obj = json.loads(json_string)
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table('test-table')
table.put_item(Item={'pk': 'pk-value', 'map': json_obj})
If you just want to update the while Map attribute, you can use just JSON format the same as put_item.
json_string = '{"key1": 2, "key2": "value2"}'
json_obj = json.loads(json_string2)
rsp = table.update_item(
Key={'pk': 'pk-value'},
AttributeUpdates={'map': {'Value': json_obj2, 'Action': 'PUT'}}
)
However, If you want to update only specific nested attribute, you need to use UpdateExpression. For example, the below is code to update only key1 attribute to 'value3'.
nested_json_string = '{"nested": "key3"}'
nested_json_obj = json.loads(nested_json_string)
rsp = table.update_item(
Key={'pk': 'pk-value'},
UpdateExpression='SET #map.#key1 = :val3',
ExpressionAttributeNames={'#map': 'map', '#key1': 'key1'},
ExpressionAttributeValues={':val3': nested_json_obj}
)
I know this is an old question, but I came across it and the accepted answer didn't help me (it seems to suggest that you can feed boto3 with plain JSON, but it didn't work for me) and the library mentioned in the comments didn't help me either.
What did work for me was using the serializer/deserializer from boto3.dynamodb.types, basically as suggested by this answer on a very similar topic.
from boto3.dynamodb.types import TypeSerializer, TypeDeserializer
import json
serializer = TypeSerializer()
deserializer = TypeDeserializer()
# for building a DynamoDB JSON from a Python object
def serialize(item):
serialized_item = serializer.serialize(vars(item) if hasattr(item, '__dict__') else item)
return item if 'M' not in serialized_item else serialized_item['M']
# for building a plain JSON from a DynamoDB JSON
def deserialize(dynamodb_json_string):
return deserializer.deserialize({'M': dynamodb_json_string})
class MyItem:
def __init__(self, some_string_value=None, some_numeric_value=None):
self.my_key = some_string_value
self.my_other_key = some_numeric_value
def __str__(self):
return json.dumps(self, default=lambda x: x.__dict__)
if __name__ == '__main__':
my_classy_item = MyItem("my_string_value", 5)
my_string_item = json.loads('{"my_key": "my_string_value", "my_other_key" : 5}')
print(serialize(my_classy_item))
print(serialize(my_string_item))

validation of csv using json schema in python

I want to perform the validation on the data. I have written the code using the pandas schema , instead of pandas schema how can I pass a json file which contains all the rules of validation in it and then apply it on the csv file.
That means to apply which rule on which column must be taken from the json file instead of the pandas schema and generate the error file.
def check_decimal(dec):
try:
Decimal(dec)
except InvalidOperation:
return False
return True
def check_int(num):
try:
int(num)
except ValueError:
return False
return True
def do_validation():
# read the data
data = pd.read_csv('data.csv')
# define validation elements
decimal_validation = [CustomElementValidation(lambda d: check_decimal(d), 'is not decimal')]
int_validation = [CustomElementValidation(lambda i: check_int(i), 'is not integer')]
null_validation = [CustomElementValidation(lambda d: d is None, 'this field cannot be null')]
# define validation schema
schema = pandas_schema.Schema([
Column('dec1', decimal_validation + null_validation),
Column('dec2', decimal_validation),
Column('dec3', decimal_validation),
Column('dec4', decimal_validation),
Column('dec5', decimal_validation),
Column('dec6', decimal_validation),
Column('dec7', decimal_validation),
Column('company_id', int_validation + null_validation),
Column('currency_id', int_validation + null_validation),
Column('country_id', int_validation + null_validation)])
# apply validation
errors = schema.validate(data)
errors_index_rows = [e.row for e in errors]
data_clean = data.drop(index=errors_index_rows)
# save data
pd.DataFrame({'col':errors}).to_csv('errors55.csv')
So, I don't know anything really about pandas_schema, but if you have columns and their validators in a json like this:
{
"dec1": ['decimal', 'null'],
"dec2": ['decimal'],
"dec3": ['decimal'],
"dec4": ['decimal'],
"dec5": ['decimal'],
"dec6": ['decimal'],
"dec7": ['decimal'],
"company_id": ['int', 'null'],
"currency_id": ['int', 'null'],
"country_id": ['int', 'null']
}
Then you can use a dict of validators and a list comprehension to generate your Column objects for the Schema:
def check_decimal(dec):
try:
Decimal(dec)
except InvalidOperation:
return False
return True
def check_int(num):
try:
int(num)
except ValueError:
return False
return True
VALIDATORS = {
'decimal': CustomElementValidation(lambda d: check_decimal(d), 'is not decimal'),
'int': CustomElementValidation(lambda i: check_int(i), 'is not integer'),
'null': CustomElementValidation(lambda d: d is None, 'this field cannot be null'),
}
def do_validation():
# read the data
data = pd.read_csv('data.csv')
with open('my_json_schema.json', 'r') as my_json:
json_schema = json.load(my_json)
column_list = [Column(k, [VALIDATORS[v] for v in vals]) for k, vals in json_schema.items()]
schema = pandas_schema.Schema(column_list)
# apply validation
errors = schema.validate(data)
errors_index_rows = [e.row for e in errors]
data_clean = data.drop(index=errors_index_rows)
# save data
pd.DataFrame({'col':errors}).to_csv('errors55.csv')
EDIT:
For using validators with arguments defined in the JSON you are going to need to change up both the JSON format and the code a bit. The following should work, but I can't test it myself.
{
"dec1": [['decimal'], ['null']],
"dec2": [['decimal'], ['range', 0, 10]],
"dec3": [['decimal']],
"dec4": [['decimal']],
"dec5": [['decimal']],
"dec6": [['decimal']],
"dec7": [['decimal']],
"company_id": [['int'], ['null']],
"currency_id": [['int'], ['null']],
"country_id": [['int'], ['null']]
}
def get_validator(opts)
VALIDATORS = {
'decimal': (CustomElementValidation, [lambda d: check_decimal(d), 'is not decimal']),
'int': (CustomElementValidation, [lambda i: check_int(i), 'is not integer']),
'null': (CustomElementValidation, [lambda d: d is None, 'this field cannot be null']),
'range': (InRangeValidation, []),
}
func, args = VALIDATORS[opts[0]]
args.extend(opts[1:])
return func(*args)
def do_validation():
# read the data
data = pd.read_csv('data.csv')
with open('my_json_schema.json', 'r') as my_json:
json_schema = json.load(my_json)
column_list = [Column(k, [get_validator(v) for v in vals]) for k, vals in json_schema.items()]
schema = pandas_schema.Schema(column_list)
# apply validation
errors = schema.validate(data)
errors_index_rows = [e.row for e in errors]
data_clean = data.drop(index=errors_index_rows)
# save data
pd.DataFrame({'col':errors}).to_csv('errors55.csv')

Serialization through the Django Rest Framework - how to deserialize?

I have an array of the objects and then try to serialize it using the following statement:
serializer = MovieWithDescriptionSerializer(movies, many=True)
data = serializer.data
The class and the serializer are as below:
class MovieWithDescription(object):
id = 0
name = ''
description = ''
rating = ''
year = 0
def __init__(self, uid, name, description):
self.id = uid
self.name = name
self.description = description
class MovieWithDescriptionSerializer(serializers.Serializer):
class Meta:
model = MovieWithDescription
fields = ('id', 'name', 'description')
id = serializers.IntegerField()
name = serializers.StringRelatedField()
description = serializers.StringRelatedField()
The data is saved to session:
request.session['movies'] = data
And read on the other page:
movies = request.session['movies']
However when I tried to deserialize it I learned that the movies variable contains list. So it looks like I don't need to deserialize and just need to iterate through the list to process the data. What I'm doing wrong with this serialization? Is there any more simple way to serialize data than to use Django Rest Framework?
Answering your question from the comments section; also to clarify for whoever is migrating from a Java environment:
In Django Rest Framework; Serialization happens as follows:
Python Object --> serializer.data ==> Python Native Types --> Renderer Class ==> JSON
serializer.data is constructed by calling to_representation on each of the fields declared on the Serializer.
#property
def data(self):
...
if not hasattr(self, '_data'):
if self.instance is not None and not getattr(self, '_errors', None):
self._data = self.to_representation(self.instance)
elif hasattr(self, '_validated_data') and not getattr(self, '_errors', None):
self._data = self.to_representation(self.validated_data)
else:
self._data = self.get_initial()
return self._data
To representation is supposed to take an object instance and return a dict of primitive types:
def to_representation(self, instance):
"""
Object instance -> Dict of primitive datatypes.
"""
...
So if serializer.data does not render to JSON, where does this magic happen?
It happens inside the Response object. When you construct a Response object with the data attribute (data here is a dict of primitive or a list of dict of primitives), the rendered_content method then defines how the data is rendered, sets the appropriate Content-Type headers, and so on.
#property
def rendered_content(self):
renderer = getattr(self, 'accepted_renderer', None)
accepted_media_type = getattr(self, 'accepted_media_type', None)
context = getattr(self, 'renderer_context', None)
assert renderer, ".accepted_renderer not set on Response"
assert accepted_media_type, ".accepted_media_type not set on Response"
assert context is not None, ".renderer_context not set on Response"
context['response'] = self
media_type = renderer.media_type
charset = renderer.charset
content_type = self.content_type
if content_type is None and charset is not None:
content_type = "{0}; charset={1}".format(media_type, charset)
elif content_type is None:
content_type = media_type
self['Content-Type'] = content_type
ret = renderer.render(self.data, accepted_media_type, context)
if isinstance(ret, six.text_type):
assert charset, (
'renderer returned unicode, and did not specify '
'a charset value.'
)
return bytes(ret.encode(charset))
if not ret:
del self['Content-Type']
return ret
This allows you to do something neat; you can define a single Django Rest Framework View, that can render XML, JSON, and many more -> you can find them under rest_framework.renderers.
For example, you can define an APIView that supports multiple render formats based on the query parameter (?format=json, ?format=xml, ?format=csv) as follows:
class ExampleAPIView(APIView):
renderer_classes = (JSONRenderer, XMLRenderer, CSVRenderer)
...
XML and CSV require additional package installations. Read more here: http://www.django-rest-framework.org/api-guide/renderers/#xml

django postgresql json field schema validation

I have a django model with a JSONField (django.contrib.postgres.fields.JSONField)
Is there any way that I can validate model data against a json schema file?
(pre-save)
Something like my_field = JSONField(schema_file=my_schema_file)
I wrote a custom validator using jsonschema in order to do this.
project/validators.py
import django
from django.core.validators import BaseValidator
import jsonschema
class JSONSchemaValidator(BaseValidator):
def compare(self, value, schema):
try:
jsonschema.validate(value, schema)
except jsonschema.exceptions.ValidationError:
raise django.core.exceptions.ValidationError(
'%(value)s failed JSON schema check', params={'value': value}
)
project/app/models.py
from django.db import models
from project.validators import JSONSchemaValidator
MY_JSON_FIELD_SCHEMA = {
'schema': 'http://json-schema.org/draft-07/schema#',
'type': 'object',
'properties': {
'my_key': {
'type': 'string'
}
},
'required': ['my_key']
}
class MyModel(models.Model):
my_json_field = models.JSONField(
default=dict,
validators=[JSONSchemaValidator(limit_value=MY_JSON_FIELD_SCHEMA)]
)
That's what the Model.clean() method is for (see docs). Example:
class MyData(models.Model):
some_json = JSONField()
...
def clean(self):
if not is_my_schema(self.some_json):
raise ValidationError('Invalid schema.')
you could use cerberus to validate your data against a schema
from cerberus import Validator
schema = {'name': {'type': 'string'}}
v = Validator(schema)
data = {'name': 'john doe'}
v.validate(data) # returns "True" (if passed)
v.errors # this would return the error dict (or on empty dict in case of no errors)
it's pretty straightforward to use (also due to it's good documentation -> validation rules: http://docs.python-cerberus.org/en/stable/validation-rules.html)
I wrote a custom JSONField that extends models.JSONField and validates attribute's value by using jsonschema (Django 3.1, Python 3.7).
I didn't use the validators parameter for one reason: I want to let users define the schema dynamically.So I use a schema parameter, that should be:
None (by default): the field will behave like its parent class (no JSON schema validation support).
A dict object. This option is suitable for a small schema definition (for example: {"type": "string"});
A str object that describes a path to the file where the schema code is contained. This option is suitable for a big schema definition (to preserve the beauty of the model class definition code). For searching I use all enabled finders: django.contrib.staticfiles.finders.find().
A function that takes a model instance as an argument and returns a schema as dict object. So you can build a schema based on the state of the given model instance. The function will be called every time when the validate() is called.
myapp/models/fields.py
import json
from jsonschema import validators as json_validators
from jsonschema import exceptions as json_exceptions
from django.contrib.staticfiles import finders
from django.core import checks, exceptions
from django.db import models
from django.utils.functional import cached_property
class SchemaMode:
STATIC = 'static'
DYNAMIC = 'dynamic'
class JSONField(models.JSONField):
"""
A models.JSONField subclass that supports the JSON schema validation.
"""
def __init__(self, *args, schema=None, **kwargs):
if schema is not None:
if not(isinstance(schema, (bool, dict, str)) or callable(schema)):
raise ValueError('The "schema" parameter must be bool, dict, str, or callable object.')
self.validate = self._validate
else:
self.__dict__['schema_mode'] = False
self.schema = schema
super().__init__(*args, **kwargs)
def check(self, **kwargs):
errors = super().check(**kwargs)
if self.schema_mode == SchemaMode.STATIC:
errors.extend(self._check_static_schema(**kwargs))
return errors
def _check_static_schema(self, **kwargs):
try:
schema = self.get_schema()
except (TypeError, OSError):
return [
checks.Error(
f"The file '{self.schema}' cannot be found.",
hint="Make sure that 'STATICFILES_DIRS' and 'STATICFILES_FINDERS' settings "
"are configured correctly.",
obj=self,
id='myapp.E001',
)
]
except json.JSONDecodeError:
return [
checks.Error(
f"The file '{self.schema}' contains an invalid JSON data.",
obj=self,
id='myapp.E002'
)
]
validator_cls = json_validators.validator_for(schema)
try:
validator_cls.check_schema(schema)
except json_exceptions.SchemaError:
return [
checks.Error(
f"{schema} must be a valid JSON Schema.",
obj=self,
id='myapp.E003'
)
]
else:
return []
def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
if self.schema is not None:
kwargs['schema'] = self.schema
return name, path, args, kwargs
#cached_property
def schema_mode(self):
if callable(self.schema):
return SchemaMode.DYNAMIC
return SchemaMode.STATIC
#cached_property
def _get_schema(self):
if callable(self.schema):
return self.schema
elif isinstance(self.schema, str):
with open(finders.find(self.schema)) as fp:
schema = json.load(fp)
else:
schema = self.schema
return lambda obj: schema
def get_schema(self, obj=None):
"""
Return schema data for this field.
"""
return self._get_schema(obj)
def _validate(self, value, model_instance):
super(models.JSONField, self).validate(value, model_instance)
schema = self.get_schema(model_instance)
try:
json_validators.validate(value, schema)
except json_exceptions.ValidationError as e:
raise exceptions.ValidationError(e.message, code='invalid')
Usage:
myapp/models/__init__.py
def schema(instance):
schema = {}
# Here is your code that uses the other
# instance's fields to create a schema.
return schema
class JSONSchemaModel(models.Model):
dynamic = JSONField(schema=schema, default=dict)
from_dict = JSONField(schema={'type': 'object'}, default=dict)
# A static file: myapp/static/myapp/schema.json
from_file = JSONField(schema='myapp/schema.json', default=dict)
Another solution using jsonschema for simple cases.
class JSONValidatedField(models.JSONField):
def __init__(self, *args, **kwargs):
self.props = kwargs.pop('props')
self.required_props = kwargs.pop('required_props', [])
super().__init__(*args, **kwargs)
def validate(self, value, model_instance):
try:
jsonschema.validate(
value, {
'schema': 'http://json-schema.org/draft-07/schema#',
'type': 'object',
'properties': self.props,
'required': self.required_props
}
)
except jsonschema.exceptions.ValidationError:
raise ValidationError(
f'Value "{value}" failed schema validation.')
class SomeModel(models.Model):
my_json_field = JSONValidatedField(
props={
'foo': {'type': 'string'},
'bar': {'type': 'integer'}
},
required_props=['foo'])

Returning JSON from Flask

I was following this tutorial as a starting point to using models/controllers with python. Bear with me as I'm learning python.
I'm basically trying to get a json representation of my database table with key:value pairs as one would typically expect.
I can't seem to figure out the proper way of doing this.
My Model looks like this:
from app import db
import json
# define a base model for other database tables to inherit
class Base(db.Model):
__abstract__ = True
id = db.Column(db.Integer,primary_key=True)
date_created = db.Column(db.DateTime, default=db.func.current_timestamp())
date_modified = db.Column(db.DateTime, default=db.func.current_timestamp(), onupdate=db.func.current_timestamp())
# define a members model
class Member(Base):
__tablename__ = "members"
# member name
fname = db.Column(db.String(48), nullable=False)
sname = db.Column(db.String(48), nullable=False)
title = db.Column(db.String(90), nullable=True)
# new instance instantiation procedure
def __init__(self, fname, sname, title):
self.fname = fname
self.sname = sname
self.title = title
def __repr__(self):
# return '<Member %r>' % (self.fname)
# return '[{"id":self.id,"fname":self.fname,"sname":self.sname,"title":self.title}]'
return json.dumps(self.__dict__)
that will return an error: TypeError: datetime.datetime(2015, 2, 18, 11, 50, 1) is not JSON serializable
My Controller looks like the following:
# import flask dependencies
from flask import Blueprint, request, jsonify, Response
# import the database object from the main app module
from app import db
# import module models (i.e. Members)
from app.team_members.models import Member
# define the blueprint: 'member' set its url prefix: app.url/members
team_members = Blueprint('member',__name__,url_prefix='/members')
# set the route and accepted methods
#team_members.route('/list/',methods=['GET'])
def list():
members = Member.query.all()
return jsonify(dict(('member%d' % i, member) for i, member in enumerate(Member.query.all(), start=1)))
# resp = Response(response=members, status=200, mimetype="application/json")
# return resp
I understand that jsonify will not return a list, but a dictionary for security reasons. And If I understand correctly, I need __repr__ to return a string.
Any help would be much appreciated.