I have the following problem:
I have a hierachy of classes with joined table inheritance:
class AdGroupModel(Base, AdwordsRequestMixin):
__tablename__ = 'ad_groups'
db_id = Column(BigInteger, primary_key=True)
created_at = Column(DateTime(timezone=False), nullable=False, default=datetime.datetime.now())
# ----RELATIONS-----
# campaign MANY-to-ONE
campaign_db_id = Column(BigInteger,
ForeignKey('campaigns.db_id', ondelete='CASCADE'),
nullable = True,
)
# # ads ONE-to-MANY
ads = relationship("AdModel",
backref="ad_group",
lazy="subquery",
passive_deletes=True,
single_parent=True,
cascade="all, delete, delete-orphan")
# # # keywords ONE-to-MANY
criteria = relationship("AdGroupCriterionModel",
backref="ad_group",
lazy="subquery",
passive_deletes=True,
single_parent=True,
cascade="all, delete, delete-orphan")
# Joined Table Inheritance
type = Column(Unicode(50))
__mapper_args__ = {
'polymorphic_identity': 'ad_group',
'polymorphic_on': type
}
class AdGroupCriterionModel(Base, AdGroupDependenceMixin):
__tablename__ = 'ad_group_criterion'
db_id = Column(BigInteger, primary_key=True)
destination_url = Column(Unicode, nullable=True)
status = Column(Enum("PAUSED", "ACTIVE", "DELETED",
name='criterion_status'), default="ACTIVE")
# ----RELATIONS---
# ad_group ONE-to-MANY
ad_group_db_id = Column(BigInteger, ForeignKey('ad_groups.db_id',
ondelete='CASCADE'), nullable=True)
# Joined Table Inheritance
criterion_sub_type = Column(Unicode(50))
__mapper_args__ = {
'polymorphic_on': criterion_sub_type
}
class AdGroupKeywordModel(AdGroupCriterionModel):
__tablename__ = 'ad_group_keyword'
__mapper_args__ = {'polymorphic_identity': 'Keyword'}
db_id = Column(Integer, ForeignKey('ad_group_criterion.db_id'), primary_key=True)
text = Column(Unicode, nullable=False)
class AdGroupDependenceMixin(object):
_aggad_id = Column(BigInteger, nullable=True)
_agname = Column(Unicode, nullable=True)
#hybrid_property
def ad_group_GAD_id(self):
if self.ad_group is None:
res = self._aggad_id
else:
res = self.ad_group.GAD_id
return res
#ad_group_GAD_id.setter
def ad_group_GAD_id(self, value):
self._aggad_id = value
if value is not None:
self.ad_group = None
#ad_group_GAD_id.expression
def ad_group_GAD_id(cls):
what = case([( cls._aggad_id != None, cls._aggad_id)], else_=AdGroupModel.GAD_id)
return what.label('adgroupgadid_expression')
#hybrid_property
def ad_group_name(self):
if self.ad_group is None:
return self._agname
else:
return self.ad_group.name
#ad_group_name.setter
def ad_group_name(self, value):
self._agname = value
if value is not None:
self.campaign = None
#ad_group_name.expression
def ad_group_name(cls):
what = case([( cls._agname != None, cls._agname)], else_=AdGroupModel.name)
return what.label('adgroupname_expression')
And I load the Keywords objects from the database with the following query:
all_objects1 = self.database.session.query(AdGroupKeywordModel).join(AdGroupModel)\
.options(subqueryload('ad_group'))\
.filter(AdGroupModel.GAD_id!=None)\
.limit(self.options.limit).all()
which returns obejcts of type AdGroupKeywordModel.
Unfortunately every time I try to access the properties of the AdGroupKeywordModel which are in the parent table (AdGroupCriterionModel) a query of this type is emitted:
sqlalchemy.engine.base.Engine
SELECT ad_group_criterion.destination_url AS ad_group_criterion_destination_url, ad_group_criterion.status AS ad_group_criterion_status, ad_group_criterion.ad_group_db_id AS ad_group_criterion_ad_group_db_id, ad_group_criterion.criterion_sub_type AS ad_group_criterion_criterion_sub_type, ad_group_keyword.text AS ad_group_keyword_text
FROM ad_group_criterion JOIN ad_group_keyword ON ad_group_criterion.db_id = ad_group_keyword.db_id
which is strongly compromising the performace.
What I would like to have is that all the attributes for the class AdGroupKeywordModel which are related to the parent (and other classes defined in the relationship) to be loaded with the initial query and be cached for further use. So that when I access them I do not get any overhead from further sqlstatements.
It seems that eager loading is only defined for relationships but not for hierarchies. Is it possible to have this behaviour in sqlalchemy for hierarchies as well?
Thanks
What I see is: only AdGroupModel has a relationship with a lazy= definition (which is the keyword which defines eager loading for relationships), and the query only has a subqueryload('ad_group').
The only point, in which ad_group or AdGroupModel touch with AdGroupKeywordModel is in AdGroupModel.criteria, which has as backref AdGroupCriterionModel.ad_group. I'm not familiar with the subqueryload syntax, but If I would want to eager-load AdGroupCriterionModel.ad_group, I'd define criteria like this:
criteria = relationship(
"AdGroupCriterionModel", backref=backref("ad_group", lazy="subquery"),
lazy="subquery", passive_deletes=True, single_parent=True,
cascade="all, delete, delete-orphan")
The key would be in defining the right lazy also for the backref.
Related
Each of my mapped class contains created_by and updated_by audit properties that I would like to set automatically upon INSERT and UPDATE of respective objects.
class User(Base):
__tablename__ = 'user'
id = Column(BigInteger, primary_key=True)
name = Column(Text, nullable=False)
...
class Address(Base):
__tablename__ = 'address'
id = Column(BigInteger, primary_key=True)
street = Column(Text, nullable=False)
...
created_by = Column(BigInteger) # references user.id
updated_by = Column(BigInteger) # references user.id
...
Is there a way to handle this centrally in SQLAlchemy? I looked at the events but it appears it needs to be setup for every single mapped class individually (note the SomeClass in the decorator).
#event.listens_for(SomeClass, 'before_insert')
def on_insert(mapper, connection, target):
target.created_by = context["current_user"] # I want to be able to do this not just for 'SomeClass' but for all mapped classes
#event.listens_for(SomeClass, 'before_update')
def on_update(mapper, connection, target):
target.updated_by = context["current_user"] # I want to be able to do this not just for 'SomeClass' but for all mapped classes
One solution here is to use the default parameters in the Column class provided by sqlalchemy. You can actually pass a callable to both default (to execute when first created) and onupdate to execute whenever updated.
def get_current_user():
return context["user"].id
class Address(Base):
__tablename__ = 'address'
...
created_by = Column(default = get_current_user)
updated_by = Column(default = get_current_user, onupdate=get_current_user)
Managed to figure it out, though somewhat concerned about using a dunder method __subclasses__() on declarative_base. If there is a better alternative do suggest.
def on_insert(mapper, connection, target):
target.created_by = context["user"].id
target.updated_at = datetime.utcnow()
def on_update(mapper, connection, target):
target.updated_by = context["user"].id
target.updated_at = datetime.utcnow()
Base.metadata.create_all()
mapped_classes = Base.__subclasses__()
for mapped_class in mapped_classes:
event.listen(mapped_class, 'before_insert', on_insert)
event.listen(mapped_class, 'before_update', on_update)
The context being referred to here is actually starlette-context
I’m trying to define 2 entities like this:
class User(Base):
id = Column(Integer, primary_key=True)
name = Column(String(256), index=True, unique=True)
main_token_id = Column(ForeignKey('token.id'), nullable=False)
main_token = relationship('Token', uselist=False)
tokens = relationship('Token', back_populates="user", foreign_keys=['token.id'])
class Token(Base):
id = Column(Integer, primary_key=True)
user_id = Column(ForeignKey('user.id'), nullable=False)
user: User = relationship("user", back_populates="tokens")
I want the user to have access to the collection of all his tokens and I also want him to have a special, main token. I want to ensure that the user has just one main token and I need integrity provided by the foreign key. By both of them actually.
I have read Cascading deletes in mutually dependent tables in SQLAlchemy but I don't feel it helps. I would like to have the integrity from both sides.
How can I make this work? If the design is flawed how can I rephrase this so that I may keep my integrity guarantees?
A kludge I have used to sort of solve this problem before is to create a column like precedence = Column(Integer, nullable=False) on tokens. Then set a unique constraint like UniqueConstraint('user_id', 'precedence'). Then set that integer manually when you create the tokens. The token with precedence 0 or the lowest precedence is the main token.
Here is an example. I'm sure some sqlalchemy geniuses can perform the precedence swap without 3 updates but I think in most cases that doesn't come up very often. There is a way to defer the unique constraint within a transaction but I guess sqlite does not support that yet.
This relies on your application not clearing the main token from precedence 0, ie. no integrity check to prevent that.
from sqlalchemy import (
create_engine,
UnicodeText,
Integer,
String,
ForeignKey,
UniqueConstraint,
update,
)
from sqlalchemy.schema import (
Table,
Column,
MetaData,
)
from sqlalchemy.sql import select
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy.orm import Session
from sqlalchemy.exc import IntegrityError
Base = declarative_base()
engine = create_engine("sqlite://", echo=False)
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
name = Column(String(256), index=True, unique=True)
tokens = relationship('Token', backref="user", cascade="all, delete-orphan", order_by='Token.precedence')
main_token = relationship('Token', primaryjoin='and_(User.id == Token.user_id, Token.precedence == 0)', viewonly=True, uselist=False)
class Token(Base):
__tablename__ = 'tokens'
id = Column(Integer, primary_key=True)
precedence = Column(Integer, nullable=False)
user_id = Column(ForeignKey('users.id'), nullable=False)
__table_args__ = (UniqueConstraint('precedence', 'user_id', name='tokens_user_precedence'),)
Base.metadata.create_all(engine)
with Session(engine) as session:
user = User(name='tokenizer')
session.add(user)
main_token = Token(user=user, precedence=0)
session.add(main_token)
session.add(Token(user=user, precedence=1))
session.commit()
assert session.query(Token).first()
assert session.query(User).first()
assert session.query(User).first().tokens
assert session.query(User).first().tokens[0] == main_token
# This viewonly relationship seems to be working.
assert session.query(User).first().main_token == main_token
# We don't want this so don't do this, no integrity checks here!!
main_token.precedence = 100
session.commit()
assert not session.query(User).first().main_token
# Put it back now.
main_token.precedence = 0
session.commit()
assert session.query(User).first().main_token
# Now check tokens are cleared.
session.delete(user)
session.commit()
assert not session.query(Token).all()
assert not session.query(User).all()
with Session(engine) as session:
# Try making 2 main tokens.
user = User(name='tokenizer')
session.add(user)
main_token = Token(user=user, precedence=0)
main_token2 = Token(user=user, precedence=0)
session.add_all([main_token, main_token2])
try:
session.commit()
except IntegrityError as e:
pass
else:
assert False, 'Exception should have occurred.'
with Session(engine) as session:
# Try swapping the tokens.
user = User(name='tokenizer')
session.add(user)
main_token = Token(user=user, precedence=0)
session.add(main_token)
other_token = Token(user=user, precedence=1)
session.add(other_token)
session.commit()
old_precedence = other_token.precedence
main_token.precedence = -1
session.flush()
other_token.precedence = 0
session.flush()
main_token.precedence = old_precedence
session.commit()
user.tokens[0] == other_token
user.tokens[1] == main_token
user.main_token == other_token
session.commit()
everyone. I found realy strange behaviour of SQL alchemy. When I'm trying to insert new instance of Device, after commit sql alchemy set type field to None. Here is my models:
class BaseTable(db.Model):
__abstract__ = True
id = db.Column(db.Integer, primary_key=True, unique=True,
index=True, autoincrement=True, nullable=False)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.date_created = dt.datetime.utcnow()
class Device(BaseTable):
__tablename__ = "device"
node_id = db.Column(db.ForeignKey('node.id', ondelete='CASCADE'), index=True, nullable=False)
name = db.Column(db.Unicode(255), nullable=False)
description = db.Column(db.Unicode, nullable=True)
type = db.Column(db.ForeignKey('device_type.id', ondelete='SET NULL'), index=True, nullable=True)
type_description = db.relationship("DeviceType", lazy='joined')
class DeviceType(BaseTable):
__tablename__ = "device_type"
name = db.Column(db.Unicode(255), unique=True, index=True, nullable=False)
code = db.Column(db.Unicode(255), unique=True, index=True, nullable=False)
In device_type table there is 6 entries with id form 1 to 6.
I do simple post request for creating new device instance. Here is code of controller:
#accepts(schema=post_schema, api=api)
#responds(schema=main_schema, api=api)
def post(self):
strer = ''
result = None
payload = api.payload
try:
entity = post_schema().load(payload)
result = crud_service.post(entity, log, db_session.session)
except Exception as ex:
strer += str(ex)
if result is None:
return response500(log, strer)
else:
return result
In crud_service.post I have:
session.add(entity)
session.commit()
return entity
So, when I make post request in payload I get type of device = 5 (for example). I'm sure that I have this ID in my device_type table. But after sql alchemy do session.commit() type of device is None. After it, if I make put request and rewrite type with
ent = session.query(model)\
.filter(model.id == dict_entity['id'])\
.update(ent_dict)
session.commit()
I got type in my DB as I expected.
So, it seems that sql alchemy by doing session.add() set type field of my device to None,
but session.update() not.
Can enyone explain this behaviour? May be any trouble in my models?
It seems that I've found problem.
I delete type_description = db.relationship("DeviceType", lazy='joined') and now it works fine. I will see another way for make relationship field
I'm trying to work with the example in the SQLAlchemy docs: Simplifying Association Objects
What I am struggling with understanding is how I can access the special_key. Ultimately I'd like to be able to do something like this:
for user in users
for keyword in user.keywords
keyword.special_key
Here is the code from the example:
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String(64))
# association proxy of "user_keywords" collection
# to "keyword" attribute
keywords = association_proxy('user_keywords', 'keyword')
def __init__(self, name):
self.name = name
class UserKeyword(Base):
__tablename__ = 'user_keyword'
user_id = Column(Integer, ForeignKey('user.id'), primary_key=True)
keyword_id = Column(Integer, ForeignKey('keyword.id'), primary_key=True)
special_key = Column(String(50))
# bidirectional attribute/collection of "user"/"user_keywords"
user = relationship(User,
backref=backref("user_keywords",
cascade="all, delete-orphan")
)
# reference to the "Keyword" object
keyword = relationship("Keyword")
def __init__(self, keyword=None, user=None, special_key=None):
self.user = user
self.keyword = keyword
self.special_key = special_key
class Keyword(Base):
__tablename__ = 'keyword'
id = Column(Integer, primary_key=True)
keyword = Column('keyword', String(64))
def __init__(self, keyword):
self.keyword = keyword
def __repr__(self):
return 'Keyword(%s)' % repr(self.keyword)
Am I on the right track in following this pattern here?
My goal is essentially many-to-many with an extra column containing a boolean value.
This should work:
for user in users:
for keyword in user.user_keywords:
print keyword.special_key
I have a star-schema architectured database that I want to represent in SQLAlchemy. Now I have the problem on how this can be done in the best possible way. Right now I have a lot of properties with custom join conditions, because the data is stored in different tables.
It would be nice if it would be possible to re-use the dimensions for different fact tablesw but I haven't figured out how that can be done nicely.
A typical fact table in a star schema contains foreign key references to all dimension tables, so usually there wouldn't be any need for custom join conditions - they are determined automatically from foreign key references.
For example a star schema with two fact tables would look like:
Base = declarative_meta()
class Store(Base):
__tablename__ = 'store'
id = Column('id', Integer, primary_key=True)
name = Column('name', String(50), nullable=False)
class Product(Base):
__tablename__ = 'product'
id = Column('id', Integer, primary_key=True)
name = Column('name', String(50), nullable=False)
class FactOne(Base):
__tablename__ = 'sales_fact_one'
store_id = Column('store_id', Integer, ForeignKey('store.id'), primary_key=True)
product_id = Column('product_id', Integer, ForeignKey('product.id'), primary_key=True)
units_sold = Column('units_sold', Integer, nullable=False)
store = relation(Store)
product = relation(Product)
class FactTwo(Base):
__tablename__ = 'sales_fact_two'
store_id = Column('store_id', Integer, ForeignKey('store.id'), primary_key=True)
product_id = Column('product_id', Integer, ForeignKey('product.id'), primary_key=True)
units_sold = Column('units_sold', Integer, nullable=False)
store = relation(Store)
product = relation(Product)
But suppose you want to reduce the boilerplate in any case. I'd create generators local to the dimension classes which configure themselves on a fact table:
class Store(Base):
__tablename__ = 'store'
id = Column('id', Integer, primary_key=True)
name = Column('name', String(50), nullable=False)
#classmethod
def add_dimension(cls, target):
target.store_id = Column('store_id', Integer, ForeignKey('store.id'), primary_key=True)
target.store = relation(cls)
in which case usage would be like:
class FactOne(Base):
...
Store.add_dimension(FactOne)
But, there's a problem with that. Assuming the dimension columns you're adding are primary key columns, the mapper configuration is going to fail since a class needs to have its primary keys set up before the mapping is set up. So assuming we're using declarative (which you'll see below has a nice effect), to make this approach work we'd have to use the instrument_declarative() function instead of the standard metaclass:
meta = MetaData()
registry = {}
def register_cls(*cls):
for c in cls:
instrument_declarative(c, registry, meta)
So then we'd do something along the lines of:
class Store(object):
# ...
class FactOne(object):
__tablename__ = 'sales_fact_one'
Store.add_dimension(FactOne)
register_cls(Store, FactOne)
If you actually have a good reason for custom join conditions, as long as there's some pattern to how those conditions are created, you can generate that with your add_dimension():
class Store(object):
...
#classmethod
def add_dimension(cls, target):
target.store_id = Column('store_id', Integer, ForeignKey('store.id'), primary_key=True)
target.store = relation(cls, primaryjoin=target.store_id==cls.id)
But the final cool thing if you're on 2.6, is to turn add_dimension into a class decorator. Here's an example with everything cleaned up:
from sqlalchemy import *
from sqlalchemy.ext.declarative import instrument_declarative
from sqlalchemy.orm import *
class BaseMeta(type):
classes = set()
def __init__(cls, classname, bases, dict_):
klass = type.__init__(cls, classname, bases, dict_)
if 'metadata' not in dict_:
BaseMeta.classes.add(cls)
return klass
class Base(object):
__metaclass__ = BaseMeta
metadata = MetaData()
def __init__(self, **kw):
for k in kw:
setattr(self, k, kw[k])
#classmethod
def configure(cls, *klasses):
registry = {}
for c in BaseMeta.classes:
instrument_declarative(c, registry, cls.metadata)
class Store(Base):
__tablename__ = 'store'
id = Column('id', Integer, primary_key=True)
name = Column('name', String(50), nullable=False)
#classmethod
def dimension(cls, target):
target.store_id = Column('store_id', Integer, ForeignKey('store.id'), primary_key=True)
target.store = relation(cls)
return target
class Product(Base):
__tablename__ = 'product'
id = Column('id', Integer, primary_key=True)
name = Column('name', String(50), nullable=False)
#classmethod
def dimension(cls, target):
target.product_id = Column('product_id', Integer, ForeignKey('product.id'), primary_key=True)
target.product = relation(cls)
return target
#Store.dimension
#Product.dimension
class FactOne(Base):
__tablename__ = 'sales_fact_one'
units_sold = Column('units_sold', Integer, nullable=False)
#Store.dimension
#Product.dimension
class FactTwo(Base):
__tablename__ = 'sales_fact_two'
units_sold = Column('units_sold', Integer, nullable=False)
Base.configure()
if __name__ == '__main__':
engine = create_engine('sqlite://', echo=True)
Base.metadata.create_all(engine)
sess = sessionmaker(engine)()
sess.add(FactOne(store=Store(name='s1'), product=Product(name='p1'), units_sold=27))
sess.commit()