Implementing MySQL "generated columns" on Django 1.8/1.9 - mysql

I discovered the new generated columns functionality of MySQL 5.7, and wanted to replace some properties of my models by those kind of columns. Here is a sample of a model:
class Ligne_commande(models.Model):
Quantite = models.IntegerField()
Prix = models.DecimalField(max_digits=8, decimal_places=3)
Discount = models.DecimalField(max_digits=5, decimal_places=3, blank=True, null=True)
#property
def Prix_net(self):
if self.Discount:
return (1 - self.Discount) * self.Prix
return self.Prix
#property
def Prix_total(self):
return self.Quantite * self.Prix_net
I defined generated field classes as subclasses of Django fields (e.g. GeneratedDecimalField as a subclass of DecimalField). This worked in a read-only context and Django migrations handles it correctly, except a detail : generated columns of MySQL does not support forward references and django migrations does not respect the order the fields are defined in a model, so the migration file must be edited to reorder operations.
After that, trying to create or modify an instance returned the mysql error : 'error totally whack'. I suppose Django tries to write generated field and MySQL doesn't like that. After taking a look to django code I realized that, at the lowest level, django uses the _meta.local_concrete_fields list and send it to MySQL. Removing the generated fields from this list fixed the problem.
I encountered another problem: during the modification of an instance, generated fields don't reflect the change that have been made to the fields from which they are computed. If generated fields are used during instance modification, as in my case, this is problematic. To fix that point, I created a "generated field descriptor".
Here is the final code of all of this.
Creation of generated fields in the model, replacing the properties defined above:
Prix_net = mk_generated_field(models.DecimalField, max_digits=8, decimal_places=3,
sql_expr='if(Discount is null, Prix, (1.0 - Discount) * Prix)',
pyfunc=lambda x: x.Prix if not x.Discount else (1 - x.Discount) * x.Prix)
Prix_total = mk_generated_field(models.DecimalField, max_digits=10, decimal_places=2,
sql_expr='Prix_net * Quantite',
pyfunc=lambda x: x.Prix_net * x.Quantite)
Function that creates generated fields. Classes are dynamically created for simplicity:
from django.db.models import fields
def mk_generated_field(field_klass, *args, sql_expr=None, pyfunc=None, **kwargs):
assert issubclass(field_klass, fields.Field)
assert sql_expr
generated_name = 'Generated' + field_klass.__name__
try:
generated_klass = globals()[generated_name]
except KeyError:
globals()[generated_name] = generated_klass = type(generated_name, (field_klass,), {})
def __init__(self, sql_expr, pyfunc=None, *args, **kwargs):
self.sql_expr = sql_expr
self.pyfunc = pyfunc
self.is_generated = True # mark the field
# null must be True otherwise migration will ask for a default value
kwargs.update(null=True, editable=False)
super(generated_klass, self).__init__(*args, **kwargs)
def db_type(self, connection):
assert connection.settings_dict['ENGINE'] == 'django.db.backends.mysql'
result = super(generated_klass, self).db_type(connection)
# double single '%' if any because it will clash with later Django format
sql_expr = re.sub('(?<!%)%(?!%)', '%%', self.sql_expr)
result += ' GENERATED ALWAYS AS (%s)' % sql_expr
return result
def deconstruct(self):
name, path, args, kwargs = super(generated_klass, self).deconstruct()
kwargs.update(sql_expr=self.sql_expr)
return name, path, args, kwargs
generated_klass.__init__ = __init__
generated_klass.db_type = db_type
generated_klass.deconstruct = deconstruct
return generated_klass(sql_expr, pyfunc, *args, **kwargs)
The function to register generated fields in a model. It must be called at django start-up, for example in the ready method of the AppConfig of the application.
from django.utils.datastructures import ImmutableList
def register_generated_fields(model):
local_concrete_fields = list(model._meta.local_concrete_fields[:])
generated_fields = []
for field in model._meta.fields:
if hasattr(field, 'is_generated'):
local_concrete_fields.remove(field)
generated_fields.append(field)
if field.pyfunc:
setattr(model, field.name, GeneratedFieldDescriptor(field.pyfunc))
if generated_fields:
model._meta.local_concrete_fields = ImmutableList(local_concrete_fields)
And the descriptor. Note that it is used only if a pyfunc is defined for the field.
class GeneratedFieldDescriptor(object):
attr_prefix = '_GFD_'
def __init__(self, pyfunc, name=None):
self.pyfunc = pyfunc
self.nickname = self.attr_prefix + (name or str(id(self)))
def __get__(self, instance, owner):
if instance is None:
return self
if hasattr(instance, self.nickname) and not instance.has_changed:
return getattr(instance, self.nickname)
return self.pyfunc(instance)
def __set__(self, instance, value):
setattr(instance, self.nickname, value)
def __delete__(self, instance):
delattr(instance, self.nickname)
Note the instance.has_changed that must tell if the instance is being modified. If found a solution for this here.
I have done extensive tests of my application and it works fine, but I am far from using all django functionalities. My question is: could this settings clash with some use cases of django ?

Related

Database model custom JSONField values persisting between test cases

I'm using a Django database model to store objects corresponding to a remote ticket service. In testing this model, I'm running several tests to make sure that log messages work correctly for the database model - I'm using Django-nose to run these tests.
The database model looks something like this, using the JSONField from this StackOverflow answer with a small modification to support lists as well as dicts:
class TicketEntity(django.db.models.Model)
tickets = JSONField(null=True, blank=True, default=[], serialize=True)
queued_ticket_data = JSONField(null=True, blank=True, default=[], serialize=True)
...
#classmethod
def create(cls, *args, **kwargs):
instance = cls(*args, **kwargs)
instance.save()
return instance
def queue_output_for_ticket(self, log_data):
self.queued_ticket_data += [log_data]
self.save()
def push_ticket(self):
self.tickets += [self.queued_ticket_data]
self.queued_ticket_data = []
self.save()
return True
The tests (in the order they appear to get run) look like this:
def test_ticket_entity_push_ticket(self):
entity = TicketEntity.create()
entity.queue_output_for_ticket("log output")
entity.queue_output_for_ticket("more log output")
entity.push_ticket()
self.assertEquals(entity.tickets, [[log_data, log_data_1]])
self.assertFalse(entity.queued_ticket_data)
def test_ticket_entity_queue_output_for_ticket(self):
entity = TicketEntity.create()
log_data = "Here's some output to log.\nHere's another line of output.\nAnd another."
entity.queue_output_for_ticket(log_data)
self.assertEquals(entity.queued_ticket_data, [log_data])
The first test passes, perfectly fine. The second test fails on that assert statement, because the entity.queued_ticket_data looks like this:
["log output",
"more log output",
"Here's some output to log.\nHere's another line of output.\nAnd another."]
Those first two elements are there at the very start of the test, immediately after we call TicketEntity.create(). They shouldn't be there - the new instance should be clean because we're creating it from scratch.
Similarly, tickets is pre-filled as well. The TicketEntity has other fields that are not JSONFields, and they do not seem to exhibit this problem.
Can someone shed some light on why this problem might be happening, and what kind of modification we'd need for a fix?

Django REST Errors when serializing model with ManyToManyField

I've created a class modeling a group of files within a software product build, on a Django server using the Django-REST package. The design is that the group of files (a Depot instance) should be able to be assigned to multiple Build instances (e.g. both the "alpha" and "beta" builds using the same exact audio file depot). However, at the time that the depot is created, it is being created as part of the creation of single Build on the client; it is only later that a utility script will allow an existing Depot to be added to other Builds.
It seemed natural to me that the Depot class should represent this relationship with a ManyToManyField. The problem is that the serializer does not seem to know what to do with this ManyToManyField. I've tried several workarounds, but each has its own error. I've tried having my DepotSerializer be either a rest_framework.serializers.Serializer or a rest_framework.serializers.ModelSerializer, but that seems largely unrelated to this problem.
Models.py:
class Depot(models.Model):
name = models.CharField(max_length=64)
builds = models.ManyToManyField(Build)
TYPE_EXECUTABLE = 0
TYPE_CORE = 1
TYPE_STREAMING = 2
depot_type = models.IntegerField(choices = (
(TYPE_EXECUTABLE, 'Executable'),
(TYPE_CORE, 'Core'),
(TYPE_STREAMING, 'Streaming'),
))
def __str__(self):
return self.name
Views.py:
class DepotCreate(mixins.CreateModelMixin,
generics.GenericAPIView):
serializer_class = DepotSerializer
queryset = Depot.objects.all()
def post(self, request, *args, **kwargs):
return self.create(request, *args, **kwargs)
Serializers.py version 1:
class DepotSerializer(serializers.ModelSerializer):
builds = serializers.PrimaryKeyRelatedField()
class Meta:
model = Depot
fields = ('id', 'name', 'builds', 'depot_type')
read_only_fields = ('id',)
def validate(self, attrs):
build = attrs['builds']
if build == None:
raise serializers.ValidationError("Build could not be found")
for depot in build.depot_set.all():
if depot.name == attrs['name']:
raise serializers.ValidationError("Build already contains a depot \"{}\"".format(depot.name))
return attrs
def restore_object(self, attrs, instance=None):
# existence of the build has already been validated
return Depot(**attrs)
This version results in the following error during the Depot init call:
Exception Type: TypeError
Exception Value:
'builds' is an invalid keyword argument for this function
Exception Location: /webapps/cdp_admin_django/lib/python3.4/site-packages/django/db/models/base.py in __init__, line 417
That appears to indicate that the Depot model cannot handle the 'builds' parameter despite the fact that it has a 'builds' ManyToManyField member.
Serializers.py 'restore_object' ver 2:
def restore_object(self, attrs, instance=None):
# existence of the build has already been validated
build = attrs['builds']
depotObj = Depot(name=attrs['name'], depot_type=attrs['depot_type'])
depotObj.builds.add(build)
return depotObj
This gave me the error:
Exception Type: ValueError
Exception Value:
"<Depot: depot_test4>" needs to have a value for field "depot" before this many-to-many relationship can be used.
Exception Location: /webapps/cdp_admin_django/lib/python3.4/site-packages/django/db/models/fields/related.py in __init__, line 524
After quite a bit of investigation, I found that ManyToMany relationships can give you trouble if you don't save the MYSQL entry before attempting to manipulate that field. Hence, restore_object ver 3:
def restore_object(self, attrs, instance=None):
# existence of the build has already been validated
build = attrs['builds']
depotObj = Depot(name=attrs['name'], depot_type=attrs['depot_type'])
depotObj.save()
depotObj.builds.add(build)
return depotObj
This does successfully create the table entry for this instance, but ends up throwing the following error:
Exception Type: IntegrityError
Exception Value:
(1062, "Duplicate entry '5' for key 'PRIMARY'")
Exception Location: /webapps/cdp_admin_django/lib/python3.4/site-packages/MySQLdb/connections.py in defaulterrorhandler, line 38
This error takes place during rest_framework/mixins.py call to serializer.save(force_insert=True). Which looks like it is supposed to force the creation of a new table entry, presumably disagreeing with my earlier call to Model.save.
Does anyone know the correct approach for a design like this? I feel like this can't be that unusual of a table structure.
EDIT 10/20/2014:
After the suggestion below, I experimented with writing a new ModelSerializer for one of my models; for the most part because of these types of order-of-operations problems, I'd backed off from using ModelSerializer and did all of my data-to-object field processing in views.py by reading serializer.data.
Having a PrimaryKeyRelatedField(many=True) in the ModelSerializer DID help. Notably, I was able to create a serializer instance with existent models and get the correct serializer.data. However, I still have the problem where restore_object can do everything except create a new model instance and pass down the ManyToManyField value. I still get "TypeError: '[PrimaryKeyRelatedField name]' is an invalid keyword argument for this function" if I pass the field to the model's init func. I still cannot save the model before the REST library does it itself. In addition, in this mode, the serializer populates serializer.data with the values of the Model, not the values provided in the data input. So if you do not use the PrimaryKeyRelatedField's attrs value in restore_object, it is discarded.
It appears that I need to override ModelSerializer.save to some kind of a pre-save, apply ManyToMany input, and a post-save, but I would need the attrs values so I can apply and modify the ManyToManyField at that time. I realize that the serializer does have the init_data field to see the original inputs, but in the case where the serializer is being used to deserialize a list of data into a list of new objects, I don't think there's a way to trace which serializer.init_data corresponds with which serializer.object.
In your serializer version 1, you do not have to add
builds = serializers.PrimaryKeyRelatedField()
as the model serializer will create this for you. In fact if you look at the exemple of the documentation (http://www.django-rest-framework.org/api-guide/relations/) you'll see that the PrimaryKeyRelatedField is applied when there is a FK 'to' the current model (not a M2M relation).
I would remove this from the serializer and see then what's going on.

sqlalchemy: how to block updates on a specific column

I have a declarative mapping:
class User(base):
username = Column(Unicode(30), unique=True)
How can I tell sqlalchemy that this attribute may not be modified?
The workaround I came up with is kindof hacky:
from werkzeug.utils import cached_property
# regular #property works, too
class User(base):
_username = Column('username', Unicode(30), unique=True)
#cached_property
def username(self):
return self._username
def __init__(self, username, **kw):
super(User,self).__init__(**kw)
self._username=username
Doing this on the database column permission level will not work because not all databases support that.
You can use the validates SQLAlchemy feature.
from sqlalchemy.orm import validates
...
class User(base):
...
#validates('username')
def validates_username(self, key, value):
if self.username: # Field already exists
raise ValueError('Username cannot be modified.')
return value
reference: https://docs.sqlalchemy.org/en/13/orm/mapped_attributes.html#simple-validators
I can suggest the following ways do protect column from modification:
First is using hook when any attribute is being set:
In case above all column in all tables of Base declarative will be hooked, so you need somehow to store information about whether column can be modified or not. For example you could inherit sqlalchemy.Column class to add some attribute to it and then check attribute in the hook.
class Column(sqlalchemy.Column):
def __init__(self, *args, **kwargs):
self.readonly = kwargs.pop("readonly", False)
super(Column, self).__init__(*args, **kwargs)
# noinspection PyUnusedLocal
#event.listens_for(Base, 'attribute_instrument')
def configure_listener(class_, key, inst):
"""This event is called whenever an attribute on a class is instrumented"""
if not hasattr(inst.property, 'columns'):
return
# noinspection PyUnusedLocal
#event.listens_for(inst, "set", retval=True)
def set_column_value(instance, value, oldvalue, initiator):
"""This event is called whenever a "set" occurs on that instrumented attribute"""
logging.info("%s: %s -> %s" % (inst.property.columns[0], oldvalue, value))
column = inst.property.columns[0]
# CHECK HERE ON CAN COLUMN BE MODIFIED IF NO RAISE ERROR
if not column.readonly:
raise RuntimeError("Column %s can't be changed!" % column.name)
return value
To hook concrete attributes you can do the next way (adding attribute to column not required):
# standard decorator style
#event.listens_for(SomeClass.some_attribute, 'set')
def receive_set(target, value, oldvalue, initiator):
"listen for the 'set' event"
# ... (event handling logic) ...
Here is guide about SQLAlchemy events.
Second way that I can suggest is using standard Python property or SQLAlchemy hybrid_property as you have shown in your question, but using this approach result in code growing.
P.S. I suppose that compact way is add attribute to column and hook all set event.
Slight correction to #AlexQueue answer.
#validates('username')
def validates_username(self, key, value):
if self.username and self.username != value: # Field already exists
raise ValueError('Username cannot be modified.')
return value

Coercion in SQLAlchemy from Column annotations

Good day everyone,
I have a file of strings corresponding to the fields of my SQLAlchemy object. Some fields are floats, some are ints, and some are strings.
I'd like to be able to coerce my string into the proper type by interrogating the column definition. Is this possible?
For instance:
class MyClass(Base):
...
my_field = Column(Float)
It feels like one should be able to say something like MyClass.my_field.column.type and either ask the type to coerce the string directly or write some conditions and int(x), float(x) as needed.
I wondered whether this would happen automatically if all the values were strings, but I received Oracle errors because the type was incorrect.
Currently I naively coerce -- if it's float()able, that's my value, else it's a string, and I trust that integral floats will become integers upon inserting because they are represented exactly. But the runtime value is wrong (e.g. 1.0 vs 1) and it just seems sloppy.
Thanks for your input!
SQLAlchemy 0.7.4
You can iterate over columns of the mapped Table:
for col in MyClass.__table__.columns:
print col, repr(col.type)
... so you can check the type of each field by its name like this:
def get_col_type(cls_, fld_):
for col in cls_.__table__.columns:
if col.name == fld_:
return col.type # this contains the instance of SA type
assert Float == type(get_col_type(MyClass, 'my_field'))
I would cache the results though if your file is large in order to save the for-loop on every row imported from the file.
Type coercion for sqlalchemy prior to committing to some database.
How can I verify Column data types in the SQLAlchemy ORM?
from sqlalchemy import (
Column,
Integer,
String,
DateTime,
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import event
import datetime
Base = declarative_base()
type_coercion = {
Integer: int,
String: str,
DateTime: datetime.datetime,
}
# this event is called whenever an attribute
# on a class is instrumented
#event.listens_for(Base, 'attribute_instrument')
def configure_listener(class_, key, inst):
if not hasattr(inst.property, 'columns'):
return
# this event is called whenever a "set"
# occurs on that instrumented attribute
#event.listens_for(inst, "set", retval=True)
def set_(instance, value, oldvalue, initiator):
desired_type = type_coercion.get(inst.property.columns[0].type.__class__)
coerced_value = desired_type(value)
return coerced_value
class MyObject(Base):
__tablename__ = 'mytable'
id = Column(Integer, primary_key=True)
svalue = Column(String)
ivalue = Column(Integer)
dvalue = Column(DateTime)
x = MyObject(svalue=50)
assert isinstance(x.svalue, str)
I'm not sure if I'm reading this question correctly, but I would do something like:
class MyClass(Base):
some_float = Column(Float)
some_string = Column(String)
some_int = Column(Int)
...
def __init__(self, some_float, some_string, some_int, ...):
if isinstance(some_float, float):
self.some_float = somefloat
else:
try:
self.some_float = float(somefloat)
except:
# do something intelligent
if isinstance(some_string, string):
...
And I would repeat the checking process for each column. I would trust anything to do it "automatically". I also expect your file of strings to be well structured, otherwise something more complicated would have to be done.
Assuming your file is a CSV (I'm not good with file reads in python, so read this as pseudocode):
while not EOF:
thisline = readline('thisfile.csv', separator=',') # this line is an ordered list of strings
thisthing = MyClass(some_float=thisline[0], some_string=thisline[1]...)
DBSession.add(thisthing)

"Class already has a primary mapper defined" error with SQLAlchemy

Back in October 2010, I posted this question to the Sqlalchemy user list.
At the time, I just used the clear_mappers workaround mentioned in the message, and didn't try to figure out what the problem was. That was very naughty of me. Today I ran into this bug again, and decided to construct a minimal example, which appears below. Michael also addressed what is probably the same issue back in 2006. I decided to follow up here, to give Michael a break from my dumb questions.
So, the upshot appears to be that for a given class definition, you can't have more than one mapper defined. In my case I have the Pheno class declared in module scope (I assume that is top level scope here) and each time make_tables runs, it tries to define another mapper.
Mike wrote "Based on the description of the problem above, you need to ensure your Python classes are declared in the same scope as your mappers. The error message you're getting suggests that 'Pheno' is declared at the module level." That would take care of the problem, but how do I manage that, without altering my current structure? What other options do I have, if any? Apparently mapper doesn't have an option like "if the mapper is already defined, exit without doing anything", which would take care of it nicely. I guess I could define a wrapper function, but that would be pretty ugly.
from sqlalchemy import *
from sqlalchemy.orm import *
def make_pheno_table(meta, schema, name='pheno'):
pheno_table = Table(
name, meta,
Column('patientid', String(60), primary_key=True),
schema=schema,
)
return pheno_table
class Pheno(object):
def __init__(self, patientid):
self.patientid = patientid
def make_tables(schema):
from sqlalchemy import MetaData
meta = MetaData()
pheno_table = make_pheno_table(meta, schema)
mapper(Pheno, pheno_table)
table_dict = {'metadata': meta, 'pheno_table':pheno_table}
return table_dict
table_dict = make_tables('foo')
table_dict = make_tables('bar')
Error message follows. Tested with SQLAlchemy 0.6.3-3 on Debian squeeze.
$ python test.py
Traceback (most recent call last):
File "test.py", line 25, in <module>
table_dict = make_tables('bar')
File "test.py", line 20, in make_tables
mapper(Pheno, pheno_table)
File "/usr/lib/python2.6/dist-packages/sqlalchemy/orm/__init__.py", line 818, in mapper
return Mapper(class_, local_table, *args, **params)
File "/usr/lib/python2.6/dist-packages/sqlalchemy/orm/mapper.py", line 209, in __init__
self._configure_class_instrumentation()
File "/usr/lib/python2.6/dist-packages/sqlalchemy/orm/mapper.py", line 381, in _configure_class_instrumentation
self.class_)
sqlalchemy.exc.ArgumentError: Class '<class '__main__.Pheno'>' already has a primary mapper defined. Use non_primary=True to create a non primary Mapper. clear_mappers() will remove *all* current mappers from all classes.
EDIT: Per the documentation in SQLAlchemy: The mapper() API, I could replace mapper(Pheno, pheno_table) above with
from sqlalchemy.orm.exc import UnmappedClassError
try:
class_mapper(Pheno)
except UnmappedClassError:
mapper(Pheno, pheno_table)
If a mapper is not defined for Pheno, it throws an UnmappedClassError. This at least doesn't return an error in my test script, but I haven't checked if it actually works. Comments?
EDIT2: Per Denis's suggestion, the following works:
class Tables(object):
def make_tables(self, schema):
class Pheno(object):
def __init__(self, patientid):
self.patientid = patientid
from sqlalchemy import MetaData
from sqlalchemy.orm.exc import UnmappedClassError
meta = MetaData()
pheno_table = make_pheno_table(meta, schema)
mapper(Pheno, pheno_table)
table_dict = {'metadata': meta, 'pheno_table':pheno_table, 'Pheno':Pheno}
return table_dict
table_dict = Tables().make_tables('foo')
table_dict = Tables().make_tables('bar')
However, the superficially similar
# does not work
class Tables(object):
class Pheno(object):
def __init__(self, patientid):
self.patientid = patientid
def make_tables(self, schema):
from sqlalchemy import MetaData
from sqlalchemy.orm.exc import UnmappedClassError
meta = MetaData()
pheno_table = make_pheno_table(meta, schema)
mapper(self.Pheno, pheno_table)
table_dict = {'metadata': meta, 'pheno_table':pheno_table, 'Pheno':self.Pheno}
return table_dict
table_dict = Tables().make_tables('foo')
table_dict = Tables().make_tables('bar')
does not. I get the same error message as before.
I don't really understand the scoping issues well enough to say why.
Isn't the Pheno class in both cases in some kind of local scope?
You are trying to map the same class Pheno to 2 different tables. SQLAlchemy allows only one primary mapper for each class, so that it knows what table to use for session.query(Pheno). It's not clear what do you wish to get from your question, so I can't propose solution. There are 2 obvious options:
define separate class to map to second table,
create non-primary mapper for second table by passing non_primary=True parameter and pass it (the value returned by mapper() function) to session.query() instead of class.
Update: to define separate class for each table you can put its definition into the make_tables():
def make_tables(schema):
from sqlalchemy import MetaData
meta = MetaData()
pheno_table = make_pheno_table(meta, schema)
class Pheno(object):
def __init__(self, patientid):
self.patientid = patientid
mapper(Pheno, pheno_table)
table_dict = {'metadata': meta,
'pheno_class': Pheno,
'pheno_table':pheno_table}
return table_dict
maybe i didn't quite understand what you want, but this recipe create identical column in different __tablename__
class TBase(object):
"""Base class is a 'mixin'.
Guidelines for declarative mixins is at:
http://www.sqlalchemy.org/docs/orm/extensions/declarative.html#mixin-classes
"""
id = Column(Integer, primary_key=True)
data = Column(String(50))
def __repr__(self):
return "%s(data=%r)" % (
self.__class__.__name__, self.data
)
class T1Foo(TBase, Base):
__tablename__ = 't1'
class T2Foo(TBase, Base):
__tablename__ = 't2'
engine = create_engine('sqlite:///foo.db', echo=True)
Base.metadata.create_all(engine)
sess = sessionmaker(engine)()
sess.add_all([T1Foo(data='t1'), T1Foo(data='t2'), T2Foo(data='t3'),
T1Foo(data='t4')])
print sess.query(T1Foo).all()
print sess.query(T2Foo).all()
sess.commit()
info in example sqlalchemy