Trouble defining multiple self-referencing foreign keys in a table - sqlalchemy

I have some code here. I recently added this root_id parameter. The goal of that is to let me determine whether a File belongs to a particular Project without having to add a project_id FK into File (which would result in a model cycle.) Thus, I want to be able to compare Project.directory to File.root. If that is true, File belongs to Project.
However, the File.root attribute is not being autogenerated for File. My understanding is that defining a FK foo_id into table Foo implicit creates a foo attribute to which you can assign a Foo object. Then, upon session flush, foo_id is properly set to the id of the assigned object. In the snippet below that is clearly being done for Project.directory, but why not for File.root?
It definitely seems like it has to do with either 1) the fact that root_id is a self-referential FK or 2) the fact that there are several self-referential FKs in File and SQLAlchemy gets confused.
Things I've tried.
Trying to define a 'root' relationship() - I think this is wrong, this should not be represented by a join.
Trying to define a 'root' column_property() - allows read access to an already set root_id property, but when assigning to it, does not get reflected back to root_id
How can I do what I'm trying to do? Thanks!
from sqlalchemy import create_engine, Column, ForeignKey, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import backref, relationship, scoped_session, sessionmaker, column_property
Base = declarative_base()
engine = create_engine('sqlite:///:memory:', echo=True)
Session = scoped_session(sessionmaker(bind=engine))
class Project(Base):
__tablename__ = 'projects'
id = Column(Integer, primary_key=True)
directory_id = Column(Integer, ForeignKey('files.id'))
class File(Base):
__tablename__ = 'files'
id = Column(Integer, primary_key=True)
path = Column(String)
parent_id = Column(Integer, ForeignKey('files.id'))
root_id = Column(Integer, ForeignKey('files.id'))
children = relationship('File', primaryjoin=id==parent_id, backref=backref('parent', remote_side=id), cascade='all')
Base.metadata.create_all(engine)
p = Project()
root = File()
root.path = ''
p.directory = root
f1 = File()
f1.path = 'test.txt'
f1.parent = root
f1.root = root
Session.add(f1)
Session.add(root)
Session.flush()
# do this otherwise f1 will be returned when calculating rf1
Session.expunge(f1)
rf1 = Session.query(File).filter(File.path == 'test.txt').one()
# this property does not exist
print rf1.root

My understanding is that defining a FK foo_id into table Foo implicit creates a foo attribute to which you can assign a Foo object.
No, it doesn't. In the snippet, it just looks like it is being done for Project.directory, but if you look at the SQL statements being echo'ed, there is no INSERT at all for the projects table.
So, for it to work, you need to add these two relationships:
class Project(Base):
...
directory = relationship('File', backref='projects')
class File(Base):
...
root = relationship('File', primaryjoin='File.id == File.root_id', remote_side=id)

Related

In SQLAlchemy, how should I specify that the relationship field is required?

I have a model that depends on some fields on another model. This fields should be present when the record is created, but I do not see a way to enforce that on the database:
class Study(db.Model):
id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
type = db.Column(Enum(StudyTypeChoices), nullable=False)
owner_id = db.Column(UUID(as_uuid=True), db.ForeignKey('owner.id'), nullable=False)
participants = db.relationship('Participant', lazy=True, cascade='save-update, merge, delete')
How can I make sure that 'participants' is provided when the Study record gets created (similar to what happens with the 'type' field)? I know I can put a wrapper around it to make sure of that, but I am wondering is there is a more neat way of doing it with sqlalchemy.
Edit: This is the definition of the Participant model
class Participant(UserBase):
id = db.Column(UUID(as_uuid=True), db.ForeignKey("user_base.id"), primary_key=True)
study_id = db.Column(UUID(as_uuid=True), db.ForeignKey('study.id'))
You can listen to before_flush events and prevent flushes containing studies without participants by raising an exception for instance.
#event.listens_for(Session, "before_flush")
def before_flush(session, flush_context, instances):
for instance in session.new: # might want to inspect session.dirty as well
if isinstance(instance, Study) and (
instance.participants is None or instance.participants == []
):
raise ValueError(
f"Study {instance} cannot have {instance.participants} participants."
)
This only checks for new studies, you might want to check in session.dirty as well for updated studies.
Full demo:
from sqlalchemy import Column, ForeignKey, Integer, create_engine, event
from sqlalchemy.orm import Session, declarative_base, relationship
Base = declarative_base()
class Study(Base):
__tablename__ = "study"
id = Column(Integer, primary_key=True)
participants = relationship("Participant", uselist=True, back_populates="study")
class Participant(Base):
__tablename__ = "participant"
id = Column(Integer, primary_key=True)
study_id = Column(Integer, ForeignKey("study.id"), nullable=True)
study = relationship("Study", back_populates="participants")
#event.listens_for(Session, "before_flush")
def before_flush(session, flush_context, instances):
for instance in session.new: # might want to inspect session.dirty as well
if isinstance(instance, Study) and (
instance.participants is None or instance.participants == []
):
raise ValueError(
f"Study {instance} cannot have {instance.participants} participants."
)
engine = create_engine("sqlite://", future=True, echo=True)
Base.metadata.create_all(engine)
s1 = Study()
p1_1 = Participant()
p1_2 = Participant()
s1.participants.extend([p1_1, p1_2])
s2 = Study()
with Session(bind=engine) as session:
session.add(s1)
session.commit() # OK
with Session(bind=engine) as session:
session.add(s2)
session.commit() # ValueError

How to combine declarative mappings from different packages?

before I start contriving a minimal example with a lot of sqlalchemy boilerplate stuff, maybe I can explain the concept theoretically.
I have a package "foo" that defines some tables in a database schema "foo_db" in the standard ORM manner:
class FooTable(Base):
__tablename__ = 'foo_data'
id = Column(Integer, primary_key=True)
The package is typically used stand-alone with its own database schema, "foo_db."
A separate package "bar" has its own schema "bar_db" with its own tables, but it also needs to use the tables of "foo" on "foo_db," and it has foreign keys into foo_db's tables (both schemas obviously are on the same server):
from foo.models import Base, FooTable
class BarTable(Base):
__tablename__ = 'bar_data'
id = Column(Integer, primary_key=True)
foo_id = Column(ForeignKey('foo_db.foo_data.id'))
foo = relationship(FooTable)
When I try to use this code I get errors like these:
(MySQLdb._exceptions.ProgrammingError) (1146, "Table 'bar_db.foo_data' doesn't exist")
The only way I found to get around this is to literally re-define FooTable in package bar:
class FooTable(Base):
__tablename__ = 'foo_data'
__table_args__ = {'schema': 'foo_db'}
id = Column(Integer, primary_key=True)
This is very obviously not how it should be done. Any suggestions?

List of VLAN IDs on ports and circuits, how to model relationships?

I am trying to save a list of VLAN IDs per network port and also per network circuit. The list itself is something like this:
class ListOfVlanIds(Base):
__tablename__ = 'listofvlanids'
id = Column(Integer, primary_key=True)
listofvlanids_name = Column('listofvlanids_name', String, nullable = True)
And I then have a Port
class Port(Base):
__tablename__ = 'ports'
id = Column(Integer, primary_key=True)
listofvlanids_id = Column('listofvlanids_id', ForeignKey('ListOfVlanIds.id'), nullable = True)
and a Circuit:
class Circuit(Base):
__tablename__ = 'circuits'
id = Column(Integer, primary_key=True)
listofvlanids_id = Column('listofvlanids_id', ForeignKey('ListOfVlanIds.id'), nullable = True)
Running code like this results (for me) in a sqlalchemy.exc.NoReferencedTableError error on the ForeignKey.
Looking for the error I read I should add a relationship back from the list. I haven't found a way (or an example) where I can build this from both Port and Circuit. What am I missing?
Creating a list table for Ports and Circuits just moves the problem downstream, since a VLAN ID is it's own table... I'd love to be able to use ORM, instead of having to write (a lot of) SQL by hand.
ForeignKey expects a table and column name, not model and attribute name, so it should be ForeignKey('listofvlanids.id').

Passive deletes in SQLAlchemy with a many-to-many relationship don't prevent DELETE from being issued for related object

I am trying to get SQLAlchemy to let my database's foreign keys "on delete cascade" do the cleanup on the association table between two objects. I have setup the cascade and passive_delete options on the relationship as seems appropriate from the docs. However, when a related object is loaded into the collection of a primary object and the primary object is deleted from the session, then SQLAlchemy issues a delete statement on the secondary table for the record relating the primary and secondary objects.
For example:
import logging
import sqlalchemy as sa
import sqlalchemy.ext.declarative as sadec
import sqlalchemy.orm as saorm
engine = sa.create_engine('sqlite:///')
engine.execute('PRAGMA foreign_keys=ON')
logging.basicConfig()
_logger = logging.getLogger('sqlalchemy.engine')
meta = sa.MetaData(bind=engine)
Base = sadec.declarative_base(metadata=meta)
sess = saorm.sessionmaker(bind=engine)
session = sess()
blog_tags_table = sa.Table(
'blog_tag_map',
meta,
sa.Column('blog_id', sa.Integer, sa.ForeignKey('blogs.id', ondelete='cascade')),
sa.Column('tag_id', sa.Integer, sa.ForeignKey('tags.id', ondelete='cascade')),
sa.UniqueConstraint('blog_id', 'tag_id', name='uc_blog_tag_map')
)
class Blog(Base):
__tablename__ = 'blogs'
id = sa.Column(sa.Integer, primary_key=True)
title = sa.Column(sa.String, nullable=False)
tags = saorm.relationship('Tag', secondary=blog_tags_table, passive_deletes=True,
cascade='save-update, merge, refresh-expire, expunge')
class Tag(Base):
__tablename__ = 'tags'
id = sa.Column(sa.Integer, primary_key=True)
label = sa.Column(sa.String, nullable=False)
meta.create_all(bind=engine)
blog = Blog(title='foo')
blog.tags.append(Tag(label='bar'))
session.add(blog)
session.commit()
# sanity check
assert session.query(Blog.id).count() == 1
assert session.query(Tag.id).count() == 1
assert session.query(blog_tags_table).count() == 1
_logger.setLevel(logging.INFO)
session.commit()
# make sure the tag is loaded into the collection
assert blog.tags[0]
session.delete(blog)
session.commit()
_logger.setLevel(logging.WARNING)
# confirm check
assert session.query(Blog.id).count() == 0
assert session.query(Tag.id).count() == 1
assert session.query(blog_tags_table).count() == 0
The above code will produce DELETE statements as follows:
DELETE FROM blog_tag_map WHERE
blog_tag_map.blog_id = ? AND blog_tag_map.tag_id = ?
DELETE FROM blogs WHERE blogs.id = ?
Is there a way to setup the relationship so that no DELETE statement for blog_tag_map is issued? I've also tried setting passive_deletes='all' with the same results.
Here, the “related object” is not being deleted. That would be “tags”. The blog_tags_table is not an object, it is a many-to-many table. Right now the passive_deletes='all' option is not supported for many-to-many, that is, a relationship that includes "secondary". This would be an acceptable feature add but would require development and testing efforts.
Applying viewonly=True to the relationship() would prevent any changes from affecting the many-to-many table. If the blog_tags_table is otherwise special, then you'd want to use the association object pattern to have finer grained control.

How to build backref with both associatition object and secondaryjoin?

I need some models for instance following:
Work - e.g. works of literature.
Worker - e.g. composer, translator or something similar has contribution to work.
Thus, a 'type' field is required to distinguish workers by division of work. As SQLAlchemy's documentation, this case can benifit from association object like following:
class Work(base):
id = Column(Integer, primary_key=True)
name = Column(String(50))
description = Column(Text)
class Worker(base):
id = Column(Integer, primary_key=True)
name = Column(String(50))
description = Column(Text)
class Assignment(base):
work_id = Column(Integer, Foreignkey('work.id'), primary_key=True)
worker_id = Column(Integer, Foreignkey('worker.id'), primary_key=True)
type = Column(SmallInteger, nullable=True)
Nonetheless, how to take advantage of backref and alternatvie join condition for building relation immediately to implement that each Work object can retrieve and modify corresponding Worker(s) via different attributions for distinction. For example:
work = session.query(Work).get(1)
work.name
>>> 'A Dream of The Red Mansions'
work.composers
>>> [<Worker('Xueqin Cao')>]
work.translators
>>> [<Worker('Xianyi Yang')>, <Worker('Naidie Dai')>]
Vice versa:
worker = session.query(Worker).get(1)
worker.name
>>> 'Xueqin Cao'
worker.composed
>>> [<Work('A Dream of The Red Mansions')>]
worker.translated
>>> []
Adding secondaryjoin directly without secondary specified seems not feasible, besides, SQLAlchemy's docs notes that:
When using the association object pattern, it is advisable that the association-mapped table not be used as the secondary argument on a relationship() elsewhere, unless that relationship() contains the option viewonly=True. SQLAlchemy otherwise may attempt to emit redundant INSERT and DELETE statements on the same table, if similar state is detected on the related attribute as well as the associated object.
Then, is there some way to build these relations elegantly and readily ?
There's three general ways to go here.
One is, do a "vanilla" setup where you have "work"/"workers" set up without distinguishing on "type" - then, use relationship() for "composer", "composed", "translator", "translated" by using "secondary" to Assignment.__table__ along with custom join conditions, as well as viewonly=True. So you'd do writes via the vanilla properties only. A disadvantage here is that there's no immediate synchronization between the "vanilla" and "specific" collections.
Another is, same with the "vanilla" setup, but just use plain Python descriptors to give "composer", "composed", "translator", "translated" views in memory, that is, [obj.worker for obj in self.workers if obj.type == 'composer']. This is the simplest way to go. Whatever you put in the "vanilla" collections shows right up in the "filtered" collection, the SQL is simple, and there's fewer SELECT statements in play (one per Worker/Work instead of N per Worker/Work).
Finally, the approach that's closest to what you're asking, with primary joins and backrefs, but note with the association object, the backrefs are between Work/Assignment and Assignment/Worker, but not between Work/Worker directly. This approach probably winds up using more SQL to get at the results but is the most complete, and also has the nifty feature that the "type" is written automatically. We're also using a "one way backref", as Assignment doesn't have a simple way of relating back outwards (there's ways to do it but it would be tedious). Using a Python function to automate creation of the relationships reduces the boilerplate, and note here I'm using a string for "type", this can be an integer if you add more arguments to the system:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.associationproxy import association_proxy
Base = declarative_base()
def _work_assignment(name):
assign_ = relationship("Assignment",
primaryjoin="and_(Assignment.work_id==Work.id, "
"Assignment.type=='%s')" % name,
back_populates="work", cascade="all, delete-orphan")
assoc = association_proxy("%s_assign" % name, "worker",
creator=lambda worker: Assignment(worker=worker, type=name))
return assoc, assign_
def _worker_assignment(name):
assign_ = relationship("Assignment",
primaryjoin="and_(Assignment.worker_id==Worker.id, "
"Assignment.type=='%s')" % name,
back_populates="worker", cascade="all, delete-orphan")
assoc = association_proxy("%s_assign" % name, "work",
creator=lambda work: Assignment(work=work, type=name))
return assoc, assign_
class Work(Base):
__tablename__ = 'work'
id = Column(Integer, primary_key=True)
name = Column(String(50))
description = Column(Text)
composers, composer_assign = _work_assignment("composer")
translators, translator_assign = _work_assignment("translator")
class Worker(Base):
__tablename__ = 'worker'
id = Column(Integer, primary_key=True)
name = Column(String(50))
description = Column(Text)
composed, composer_assign = _worker_assignment("composer")
translated, translator_assign = _worker_assignment("translator")
class Assignment(Base):
__tablename__ = 'assignment'
work_id = Column(Integer, ForeignKey('work.id'), primary_key=True)
worker_id = Column(Integer, ForeignKey('worker.id'), primary_key=True)
type = Column(String, nullable=False)
worker = relationship("Worker")
work = relationship("Work")
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
session = Session(e)
ww1, ww2, ww3 = Worker(name='Xueqin Cao'), Worker(name='Xianyi Yang'), Worker(name='Naidie Dai')
w1 = Work(name='A Dream of The Red Mansions')
w1.composers.append(ww1)
w1.translators.extend([ww2, ww3])
session.add(w1)
session.commit()
work = session.query(Work).get(1)
assert work.name == 'A Dream of The Red Mansions'
assert work.composers == [ww1]
assert work.translators == [ww2, ww3]
worker = session.query(Worker).get(ww1.id)
assert worker.name == 'Xueqin Cao'
assert worker.composed == [work]
assert worker.translated == []
worker.composed[:] = []
# either do this...
session.expire(work, ['composer_assign'])
# or this....basically need composer_assign to reload
# session.commit()
assert work.composers == []