Specializing JSON object encoding with python 3 - json

I'm having some troubles due to the changes to dict.values() and keys() in Python3.
My old code was something like this:
import json
class ComplexEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, complex):
return [obj.real, obj.imag]
return json.JSONEncoder.default(self, obj)
a = { '1' : 2 + 1j, '2' : 4 + 2j }
print(json.dumps(a.values(), cls=ComplexEncoder))
This on Python 3.3+ raise the exception:
TypeError: dict_values([(2+1j), (4+2j)]) is not JSON serializable
The easy workaround is to do list(a.values()), the problem for me is that I have a lot instances like that in the code. Is there a way to extend the ComplexEncoder in order to iterate over the
view?

You could encode the iterable as a list:
class IterEncoder(json.JSONEncoder):
def default(self, obj):
try:
return list(obj)
except TypeError:
return super().default(obj)
class ComplexEncoder(IterEncoder):
def default(self, obj):
if isinstance(obj, complex):
return [obj.real, obj.imag]
return super().default(obj)

Related

json.dumps not working with custom python object

I am not able to serialize a custom object in python 3 .
Below is the explanation and code
packages= []
data = {
"simplefield": "value1",
"complexfield": packages,
}
where packages is a list of custom object Library.
Library object is a class as below (I have also sub-classed json.JSONEncoder but it is not helping )
class Library(json.JSONEncoder):
def __init__(self, name):
print("calling Library constructor ")
self.name = name
def default(self, object):
print("calling default method ")
if isinstance(object, Library):
return {
"name": object.name
}
else:
raiseExceptions("Object is not instance of Library")
Now I am calling json.dumps(data) but it is throwing below exception.
TypeError: Object of type `Library` is not JSON serializable
It seems "calling default method" is not printed meaning, Library.default method is not called
Can anybody please help here ?
I have also referred to Serializing class instance to JSON but its is not helping much
Inheriting json.JSONEncoder will not make a class serializable. You should define your encoder separately and then provide the encoder instance in the json.dumps call.
See an example approach below
import json
from typing import Any
class Library:
def __init__(self, name):
print("calling Library constructor ")
self.name = name
def __repr__(self):
# I have just created a serialized version of the object
return '{"name": "%s"}' % self.name
# Create your custom encoder
class CustomEncoder(json.JSONEncoder):
def default(self, o: Any) -> Any:
# Encode differently for different types
return str(o) if isinstance(o, Library) else super().default(o)
packages = [Library("package1"), Library("package2")]
data = {
"simplefield": "value1",
"complexfield": packages,
}
#Use your encoder while serializing
print(json.dumps(data, cls=CustomEncoder))
Output was like:
{"simplefield": "value1", "complexfield": ["{\"name\": \"package1\"}", "{\"name\": \"package2\"}"]}
You can use the default parameter of json.dump:
def default(obj):
if isinstance(obj, Library):
return {"name": obj.name}
raise TypeError(f"Can't serialize {obj!r}.")
json.dumps(data, default=default)

How do I make a dict subclass json serializable?

I can represent the my Simple_Dict_Subclass and List_Subclass with json.dumps, but not Custom_Dict_Subclass. When json.dumps is called on List_Subclass its __iter__ method is called, so I reasoned that json.dumps would call a dictionary's items method. And items is called in Simple_Dict_Subclass but not Custom_Dict_Subclass. How can I make my Custom_Dict_Subclass json serializable like Simple_Dict_Subclass?
import json
class Custom_Dict_Subclass(dict):
def __init__(self):
self.data = {}
def __setitem__(self, key, value):
self.data[key] = value
def __getitem__(self, key):
return self.data[key]
def __str__(self):
return str(self.data)
def items(self):
print("'Items' called from Custom_Dict_Subclass")
yield from self.data.items()
class Simple_Dict_Subclass(dict):
def __setitem__(self, key, value):
super().__setitem__(key, value)
def __getitem__(self, key):
return super().__getitem__(key)
def __str__(self):
return super().__str__()
def items(self):
print("'Items' called from Simple_Dict_Subclass")
yield from super().items()
class List_Subclass(list):
def __init__(self):
self.data = []
def __setitem__(self, index, value):
self.data[index] = value
def __getitem__(self, index):
return self.data[index]
def __str__(self):
return str(self.data)
def __iter__(self):
yield from self.data
def append(self, value):
self.data.append(value)
d = Custom_Dict_Subclass()
d[0] = None
print(d) # Works
print(json.dumps(d)) # Does't work
d = Simple_Dict_Subclass()
d[0] = None
print(d) # Works
print(json.dumps(d)) # Works
l = List_Subclass()
l.append(None)
print(l) # Works
print(json.dumps(l)) # Works
Output:
{0: None} # Custom dict string working
{} # Custom dict json.dumps not working
{0: None} # Simple dict string working
'Items' called from Simple_Dict_Subclass
{"0": null} # Simple dict json.dumps working
[None] # List string working
[null] # List json.dumps working
Generally speaking, it is not safe to assume that json.dumps will
trigger the items method of the dictionary. This is how it is
implemented but you cannot rely on that.
In your case, the Custom_Dict_Subclass.items is never called because
(key, value) pairs are not added to the dict object but to its
data attribute.
To fix that you need to invoke the super methods in
Custom_Dict_Subclass:
class Custom_Dict_Subclass(dict):
def __init__(self):
dict.__init__(self)
self.data = {}
def __setitem__(self, key, value):
self.data[key] = value
super().__setitem__(key, value)
The object is dumped correctly, but of course, (key, value) will then
be stored twice: in the dict object and in its data attribute.
In that situation, it is better to define a sub class of
json.JSONEncoder to implement the translation of a
Custom_Dict_Subclass object to a json serialisable object and to
give this class as the keyword argument cls of json.dumps:
import json
class Custom_Dict_Subclass:
def __init__(self):
self.data = {}
def __setitem__(self, key, value):
self.data[key] = value
def __getitem__(self, key):
return self.data[key]
def __str__(self):
return str(self.data)
def items(self):
print("'Items' called from Custom_Dict_Subclass")
yield from self.data.items()
class CustomDictEncoder(json.JSONEncoder):
def default(self, obj):
"""called by json.dumps to translate an object obj to
a json serialisable data"""
if isinstance(obj, Custom_Dict_Subclass):
return obj.data
return json.JSONEncoder.default(self, obj)
d = Custom_Dict_Subclass()
d[0] = None
print(json.dumps(d, cls=CustomDictEncoder))

AttributeError: 'tuple' object has no attribute 'dumps'

I'm new to python, I try to serialize a list of custom object. this is the object I try to serialize:
test = [(deliveryRecipientObject){
deliveryType = "selected"
id = "gkfhgjhfjhgjghkj"
type = "list"
}]
After I read some post and tutorial I come up with this:
class deliverRecipientObject(object):
def __init__(self):
self.deliveryType = ""
self.id = ""
self.type = ""
class MyJsonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, deliverRecipientObject):
return {}
return (MyJsonEncoder, self).dumps(obj)
then I run:
json.dumps(test, cls=MyJsonEncoder)
and then I got this error: AttributeError: 'tuple' object has no attribute 'dumps'
my goal is to read that as json and then I can flatten it and save it as csv
thank you
I think you may have intended (MyJsonEncoder, self).dumps to be super(MyJsonEncoder, self).dumps, though that would have been wrong too. Instead, you should be calling super().default (in python 3 you don't need to pass the arguments)
class MyJsonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, deliverRecipientObject):
return {
"deliveryType": obj.deliveryType,
"id": obj.id,
"type": obj.type,
}
return super().default(obj)

JSON Serialization is empty when serialising from eulxml in python

I am working with eXistDB in python and leveraging the eulxml library to handle mapping from the xml in the database into custom objects. I want to then serialize these objects to json (for another application to consume) but I'm running into issues. jsonpickle doesn't work (it ends up returning all sorts of excess garbage and the value are the fields aren't actually encoded but rather their eulxml type) and the standard json.dumps() is simply giving me empty json (this was after trying to implement the solution detailed here). The problem seems to stem from the fact that the __dict__ values are not initialised __oninit__ (as they are mapped as class properties) so the __dict__ appears empty upon serialization. Here is some sample code:
Serializable Class Object
class Serializable(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# hack to fix _json.so make_encoder serialize properly
self.__setitem__('dummy', 1)
def _myattrs(self):
return [
(x, self._repr(getattr(self, x)))
for x in self.__dir__()
if x not in Serializable().__dir__()
]
def _repr(self, value):
if isinstance(value, (str, int, float, list, tuple, dict)):
return value
else:
return repr(value)
def __repr__(self):
return '<%s.%s object at %s>' % (
self.__class__.__module__,
self.__class__.__name__,
hex(id(self))
)
def keys(self):
return iter([x[0] for x in self._myattrs()])
def values(self):
return iter([x[1] for x in self._myattrs()])
def items(self):
return iter(self._myattrs())
Base Class
from eulxml import xmlmap
import inspect
import lxml
import json as JSON
from models.serializable import Serializable
class AlcalaBase(xmlmap.XmlObject,Serializable):
def toJSON(self):
return JSON.dumps(self, indent=4)
def to_json(self, skipBegin=False):
json = list()
if not skipBegin:
json.append('{')
json.append(str.format('"{0}": {{', self.ROOT_NAME))
for attr, value in inspect.getmembers(self):
if (attr.find("_") == -1
and attr.find("serialize") == -1
and attr.find("context") == -1
and attr.find("node") == -1
and attr.find("schema") == -1):
if type(value) is xmlmap.fields.NodeList:
if len(value) > 0:
json.append(str.format('"{0}": [', attr))
for v in value:
json.append(v.to_json())
json.append(",")
json = json[:-1]
json.append("]")
else:
json.append(str.format('"{0}": null', attr))
elif (type(value) is xmlmap.fields.StringField
or type(value) is str
or type(value) is lxml.etree._ElementUnicodeResult):
value = JSON.dumps(value)
json.append(str.format('"{0}": {1}', attr, value))
elif (type(value) is xmlmap.fields.IntegerField
or type(value) is int
or type(value) is float):
json.append(str.format('"{0}": {1}', attr, value))
elif value is None:
json.append(str.format('"{0}": null', attr))
elif type(value) is list:
if len(value) > 0:
json.append(str.format('"{0}": [', attr))
for x in value:
json.append(x)
json.append(",")
json = json[:-1]
json.append("]")
else:
json.append(str.format('"{0}": null', attr))
else:
json.append(value.to_json(skipBegin=True))
json.append(",")
json = json[:-1]
if not skipBegin:
json.append('}')
json.append('}')
return ''.join(json)
Sample Class that implements Base
from eulxml import xmlmap
from models.alcalaMonth import AlcalaMonth
from models.alcalaBase import AlcalaBase
class AlcalaPage(AlcalaBase):
ROOT_NAME = "page"
id = xmlmap.StringField('pageID')
year = xmlmap.IntegerField('content/#yearID')
months = xmlmap.NodeListField('content/month', AlcalaMonth)
The toJSON() method on the base is the method that is using the Serializable class and is returning empty json, e.g. "{}". The to_json() is my attempt to for a json-like implementation but that has it's own problems (for some reason it skips certain properties / child objects for no reason I can see but thats a thread for another day).
If I attempt to access myobj.keys or myobj.values (both of which are exposed via Serializable) I can see property names and values as I would expect but I have no idea why json.dumps() produces an empty json string.
Does anyone have any idea why I cannot get these objects to serialize to json?! I've been pulling my hair out for weeks with this. Any help would be greatly appreciated.
So after a lot of playing around, I was finally able to fix this with jsonpickle and it took only 3 lines of code:
def toJson(self):
jsonpickle.set_preferred_backend('simplejson')
return jsonpickle.encode(self, unpicklable=False)
I used simplejson to eliminate some of the additional object notation that was being added and the unpicklable property removed the rest (I'm not sure if this would work with the default json backend as I didn't test it).
Now when I call toJson() on any object that inherits from this base class, I get very nice json and it works brilliantly.

django postgresql json field schema validation

I have a django model with a JSONField (django.contrib.postgres.fields.JSONField)
Is there any way that I can validate model data against a json schema file?
(pre-save)
Something like my_field = JSONField(schema_file=my_schema_file)
I wrote a custom validator using jsonschema in order to do this.
project/validators.py
import django
from django.core.validators import BaseValidator
import jsonschema
class JSONSchemaValidator(BaseValidator):
def compare(self, value, schema):
try:
jsonschema.validate(value, schema)
except jsonschema.exceptions.ValidationError:
raise django.core.exceptions.ValidationError(
'%(value)s failed JSON schema check', params={'value': value}
)
project/app/models.py
from django.db import models
from project.validators import JSONSchemaValidator
MY_JSON_FIELD_SCHEMA = {
'schema': 'http://json-schema.org/draft-07/schema#',
'type': 'object',
'properties': {
'my_key': {
'type': 'string'
}
},
'required': ['my_key']
}
class MyModel(models.Model):
my_json_field = models.JSONField(
default=dict,
validators=[JSONSchemaValidator(limit_value=MY_JSON_FIELD_SCHEMA)]
)
That's what the Model.clean() method is for (see docs). Example:
class MyData(models.Model):
some_json = JSONField()
...
def clean(self):
if not is_my_schema(self.some_json):
raise ValidationError('Invalid schema.')
you could use cerberus to validate your data against a schema
from cerberus import Validator
schema = {'name': {'type': 'string'}}
v = Validator(schema)
data = {'name': 'john doe'}
v.validate(data) # returns "True" (if passed)
v.errors # this would return the error dict (or on empty dict in case of no errors)
it's pretty straightforward to use (also due to it's good documentation -> validation rules: http://docs.python-cerberus.org/en/stable/validation-rules.html)
I wrote a custom JSONField that extends models.JSONField and validates attribute's value by using jsonschema (Django 3.1, Python 3.7).
I didn't use the validators parameter for one reason: I want to let users define the schema dynamically.So I use a schema parameter, that should be:
None (by default): the field will behave like its parent class (no JSON schema validation support).
A dict object. This option is suitable for a small schema definition (for example: {"type": "string"});
A str object that describes a path to the file where the schema code is contained. This option is suitable for a big schema definition (to preserve the beauty of the model class definition code). For searching I use all enabled finders: django.contrib.staticfiles.finders.find().
A function that takes a model instance as an argument and returns a schema as dict object. So you can build a schema based on the state of the given model instance. The function will be called every time when the validate() is called.
myapp/models/fields.py
import json
from jsonschema import validators as json_validators
from jsonschema import exceptions as json_exceptions
from django.contrib.staticfiles import finders
from django.core import checks, exceptions
from django.db import models
from django.utils.functional import cached_property
class SchemaMode:
STATIC = 'static'
DYNAMIC = 'dynamic'
class JSONField(models.JSONField):
"""
A models.JSONField subclass that supports the JSON schema validation.
"""
def __init__(self, *args, schema=None, **kwargs):
if schema is not None:
if not(isinstance(schema, (bool, dict, str)) or callable(schema)):
raise ValueError('The "schema" parameter must be bool, dict, str, or callable object.')
self.validate = self._validate
else:
self.__dict__['schema_mode'] = False
self.schema = schema
super().__init__(*args, **kwargs)
def check(self, **kwargs):
errors = super().check(**kwargs)
if self.schema_mode == SchemaMode.STATIC:
errors.extend(self._check_static_schema(**kwargs))
return errors
def _check_static_schema(self, **kwargs):
try:
schema = self.get_schema()
except (TypeError, OSError):
return [
checks.Error(
f"The file '{self.schema}' cannot be found.",
hint="Make sure that 'STATICFILES_DIRS' and 'STATICFILES_FINDERS' settings "
"are configured correctly.",
obj=self,
id='myapp.E001',
)
]
except json.JSONDecodeError:
return [
checks.Error(
f"The file '{self.schema}' contains an invalid JSON data.",
obj=self,
id='myapp.E002'
)
]
validator_cls = json_validators.validator_for(schema)
try:
validator_cls.check_schema(schema)
except json_exceptions.SchemaError:
return [
checks.Error(
f"{schema} must be a valid JSON Schema.",
obj=self,
id='myapp.E003'
)
]
else:
return []
def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
if self.schema is not None:
kwargs['schema'] = self.schema
return name, path, args, kwargs
#cached_property
def schema_mode(self):
if callable(self.schema):
return SchemaMode.DYNAMIC
return SchemaMode.STATIC
#cached_property
def _get_schema(self):
if callable(self.schema):
return self.schema
elif isinstance(self.schema, str):
with open(finders.find(self.schema)) as fp:
schema = json.load(fp)
else:
schema = self.schema
return lambda obj: schema
def get_schema(self, obj=None):
"""
Return schema data for this field.
"""
return self._get_schema(obj)
def _validate(self, value, model_instance):
super(models.JSONField, self).validate(value, model_instance)
schema = self.get_schema(model_instance)
try:
json_validators.validate(value, schema)
except json_exceptions.ValidationError as e:
raise exceptions.ValidationError(e.message, code='invalid')
Usage:
myapp/models/__init__.py
def schema(instance):
schema = {}
# Here is your code that uses the other
# instance's fields to create a schema.
return schema
class JSONSchemaModel(models.Model):
dynamic = JSONField(schema=schema, default=dict)
from_dict = JSONField(schema={'type': 'object'}, default=dict)
# A static file: myapp/static/myapp/schema.json
from_file = JSONField(schema='myapp/schema.json', default=dict)
Another solution using jsonschema for simple cases.
class JSONValidatedField(models.JSONField):
def __init__(self, *args, **kwargs):
self.props = kwargs.pop('props')
self.required_props = kwargs.pop('required_props', [])
super().__init__(*args, **kwargs)
def validate(self, value, model_instance):
try:
jsonschema.validate(
value, {
'schema': 'http://json-schema.org/draft-07/schema#',
'type': 'object',
'properties': self.props,
'required': self.required_props
}
)
except jsonschema.exceptions.ValidationError:
raise ValidationError(
f'Value "{value}" failed schema validation.')
class SomeModel(models.Model):
my_json_field = JSONValidatedField(
props={
'foo': {'type': 'string'},
'bar': {'type': 'integer'}
},
required_props=['foo'])