Trouble with sqlalchemy join query - get the next date from child - sqlalchemy

Using the example below, i'm trying to make a single query that will get my list of offices, and pull the next upcoming visit from the child table.
class Office(db.Model):
id = db.Column(db.Integer, primary_key=True)
office_name = db.Column(db.String(100))
visits = db.relationship('Visit', backref='office', lazy='select', order_by='desc(Visit.visit_date)')
class Visit(db.Model):
id = db.Column(db.Integer, primary_key=True)
visit_date = db.Column(db.Date)
office_id = db.Column(db.Integer, db.ForeignKey('office.id'))
I've been able to create a query in raw SQL that will return what i need:
SELECT * FROM office
LEFT OUTER JOIN ( SELECT office_id, visit_date FROM visit WHERE visit_date >= date('now')
GROUP BY office_id )
AS next_vis ON id = next_vis.office_id
But i haven't been able to convert the above in SQLAlchemy.
Closest i've got to is this:
next_vis = db.session.query(Visit.office_id, Visit.visit_date).filter(
Visit.visit_date >= datetime.utcnow().date()).order_by(
Visit.visit_date.asc()).group_by(Visit.office_id).subquery()
offices = db.session.query(Office, next_vis.c.visit_date).outerjoin(
next_vis, Office.id == next_vis.c.office_id).order_by(
Office.office_name.asc())
But the only problem is it returns a tuple of (office, date) but ideally i want it returned as a single object. Is that not possible?
Thanks!

If anyone is interested i went about this a slightly different way.
I switched from a join query to adding a column property in my Office model:
class Office(db.Model):
id = db.Column(db.Integer, primary_key=True)
office_name = db.Column(db.String(100))
visits = db.relationship('Visit', backref='office', lazy='select',
order_by='desc(Visit.visit_date)')
next_vis = column_property(
select([Visit.visit_date]).where(
and_(Visit.office_id == id, Visit.visit_date >= db.func.current_date())).order_by(
Visit.visit_date.asc()).correlate_except(Visit))
Now when i do Office.query.all() i can do:
for i in Office.query.all():
print(i.next_vis)
If i've overlooked anything please let me know!
Thanks,

Related

Filter results by count of items in relationship

Let's say I have these two models :
def Client(db.Model):
id = db.Column(db.Integer, primary_key=True)
invoices = db.relationship('Invoice', backref='client')
def Invoice(db.Model):
id = db.Column(db.Integer, primary_key=True)
I'd like to retrieve all Client with at least 1 Invoice and less than 20 Invoice.
I would be expecting it to work like this :
Client.query.join(Invoice).filter(and_(Invoice.count() > 1, Invoice.count() <= 20))
Or even this would be nice :
Client.query.join(Invoice).filter(and_(count_(Invoice) > 1, count_(Invoice) <= 20))
But of course, it can't be this simple. .count() can't work from there obviously and I can't find a count_() in sqlalchemy.func.
Thanks to coworkers and code lying around, we got it working:
client = Client.query\
.outerjoin(Client.invoices)\
.group_by(Client)\
.having(\
func.and_(\
func.count_(Client.invoices) >= 1)\
func.count_(Client.invoices) <= 20)\
)
).all()
I hope it helps someone!

Eager loading hierarchical children with explicit self-joins and contains_eager in SQLAlchemy

Given the following relationships:
- 1 MasterProduct parent -> many MasterProduct children
- 1 MasterProduct child -> many StoreProducts
- 1 StoreProduct -> 1 Store
I have defined the following declarative models in SQLAlchemy:
class MasterProduct(Base):
__tablename__ = 'master_products'
id = Column(Integer, primary_key=True)
pid = Column(Integer, ForeignKey('master_products.id'))
children = relationship('MasterProduct', join_depth=1,
backref=backref('parent', remote_side=[id]))
store_products = relationship('StoreProduct', backref='master_product')
class StoreProduct(Base):
__tablename__ = 'store_products'
id = Column(Integer, primary_key=True)
mid = Column(Integer, ForeignKey('master_products.id'))
sid = Column(Integer, ForeignKey('stores.id'))
timestamp = Column(DateTime)
store = relationship('Store', uselist=False)
class Store(Base):
__tablename__ = 'stores'
id = Column(Integer, primary_key=True)
My goal is to replicate the following query in SQLAlchemy with eager loading:
SELECT *
FROM master_products mp_parent
INNER JOIN master_products mp_child ON mp_child.pid = mp_parent.id
INNER JOIN store_products sp1 ON sp1.mid = mp_child.id
LEFT JOIN store_products sp2
ON sp1.mid = sp2.mid AND sp1.sid = sp2.sid AND sp1.timestamp < sp2.timestamp
WHERE mp_parent.id = 6752 AND sp2.id IS NULL
The query selects all MasterProduct children for parent 6752 and all
corresponding store products grouped by most recent timestamp using a NULL
self-join (greatest-n-per-group). There are 82 store products returned from the
query, with 14 master product children.
I've tried the following to no avail:
mp_child = aliased(MasterProduct)
sp1 = aliased(StoreProduct)
sp2 = aliased(StoreProduct)
q = db.session.query(MasterProduct).filter_by(id=6752) \
.join(mp_child, MasterProduct.children) \
.join(sp1, mp_child.store_products) \
.outerjoin(sp2, and_(sp1.mid == sp2.mid, sp1.sid == sp2.sid, sp1.timestamp < sp2.timestamp)) \
.filter(sp2.id == None) \
.options(contains_eager(MasterProduct.children, alias=mp_child),
contains_eager(MasterProduct.children, mp_child.store_products, alias=sp1))
>>> mp_parent = q.first() # the query below looks ok!
SELECT <all columns from master_products, master_products_1, and store_products_1>
FROM master_products INNER JOIN master_products AS master_products_1 ON master_products.id = master_products_1.pid INNER JOIN store_products AS store_products_1 ON master_products_1.id = store_products_1.mid LEFT OUTER JOIN store_products AS store_products_2 ON store_products_1.mid = store_products_2.mid AND store_products_1.sid = store_products_2.sid AND store_products_1.timestamp < store_products_2.timestamp
WHERE master_products.id = %s AND store_products_2.id IS NULL
LIMIT %s
>>> mp_parent.children # only *one* child is eagerly loaded (expected 14)
[<app.models.MasterProduct object at 0x2463850>]
>>> mp_parent.children[0].id # this is correct, 6762 is one of the children
6762L
>>> mp_parent.children[0].pid # this is correct
6752L
>>> mp_parent.children[0].store_products # only *one* store product is eagerly loaded (expected 7 for this child)
[<app.models.StoreProduct object at 0x24543d0>]
Taking a step back and simplifying the query to eagerly load just the children
also results in only 1 child being eagerly loaded instead of all 14:
mp_child = aliased(MasterProduct)
q = db.session.query(MasterProduct).filter_by(id=6752) \
.join(mp_child, MasterProduct.children)
.options(contains_eager(MasterProduct.children, alias=mp_child))
However, when I use a joinedload, joinedload_all, or subqueryload, all
14 children are eagerly loaded, i.e.:
q = db.session.query(MasterProduct).filter_by(id=6752) \
.options(joinedload_all('children.store_products', innerjoin=True))
So the problem seems to be populating MasterProduct.children from the
explicit join using contains_eager.
Can anyone spot the error in my ways or help point me in the right direction?
OK what you might observe in the SQL is that there's a "LIMIT 1" coming out. That's because you're using first(). We can just compare the first two queries, the contains eager, and the joinedload:
join() + contains_eager():
SELECT master_products_1.id AS master_products_1_id, master_products_1.pid AS master_products_1_pid, master_products.id AS master_products_id, master_products.pid AS master_products_pid
FROM master_products JOIN master_products AS master_products_1 ON master_products.id = master_products_1.pid
WHERE master_products.id = ?
LIMIT ? OFFSET ?
joinedload():
SELECT anon_1.master_products_id AS anon_1_master_products_id, anon_1.master_products_pid AS anon_1_master_products_pid, master_products_1.id AS master_products_1_id, master_products_1.pid AS master_products_1_pid
FROM (SELECT master_products.id AS master_products_id, master_products.pid AS master_products_pid
FROM master_products
WHERE master_products.id = ?
LIMIT ? OFFSET ?) AS anon_1 JOIN master_products AS master_products_1 ON anon_1.master_products_id = master_products_1.pid
you can see the second query is quite different; because first() means a LIMIT is applied, joinedload() knows to wrap the "criteria" query in a subquery, apply the limit to that, then apply the JOIN afterwards. In the join+contains_eager case, the LIMIT is applied to the collection itself and you get the wrong number of rows.
Just changing the script at the bottom to this:
for q, query_label in queries:
mp_parent = q.all()[0]
I get the output it says you're expecting:
[explicit join with contains_eager] children=3, store_products=27
[joinedload] children=3, store_products=27
[joinedload_all] children=3, store_products=27
[subqueryload] children=3, store_products=27
[subqueryload_all] children=3, store_products=27
[explicit joins with contains_eager, filtered by left-join] children=3, store_products=9
(this is why getting a user-created example is so important)

sqlalchemy - select records by month in MySQL

when i have a table in MySQL:
create table t
(
id integer primary key,
time datetime not null,
value integer not null
)
and an mapping class:
class T(Base):
__tablename__ = 't'
id = Column(INTEGER, primary_key=True, nullable=False, unique=True)
time = Column(DATETIME, nullable=False)
value = Column(INTEGER, nullable=False)
how can i select all values that have given month from this table using SQLAlchemy?
MySQL has the month function: select value from t where month(time) = 4
but SQLAlchemy has no month function.
Without loading all Ts into the session, one can use Functions to filter non April objects straight-away:
from sqlalchemy.sql import func
qry = session.query(T).filter(func.MONTH(T.time) == 4)
for t in qry:
print t.value
A very old question but a better answer is here:
from sqlalchemy import extract
session.query(T).filter(extract('month', T.time)==7).all()
This will return all the records into a database in July.
If for example you want the records from all April months irrespective of year or day:
for t in session.query(T):
if t.time.month == 4: print t.value

Django , Query to get certain values with a distinct name and the latest date and time

The situation is i have a database full of test results split into different test sets. Each test has a name, result , start time , start date , ... , what currently happens is over the week test sets can be run multiple times , giving multiple test results under a test set
Currently when i want to get the latest result of each test under a test set i am querying for the distinct test names, and then for each distinct name i am querying for that name and ordering by startDate and startTime to get the latest. This is a pain because when i have a test set with over 100 different tests it degrades substantially.
What im trying to do is to perform what i want in one call of django.objects...
Here is the mysql to effectively represent what i want to achieve:
select testName,result,MAX(startDate),MAX(startTime),othertestinfo from testset where testset_id = 'UniqueID' group by testName;
Im having a hard time trying to figure this out in django , if its even possible.
Any help would be much appreciated.
Thanks
Update 23/1/12
Models for what i am using.
class testCase(models.Model):
id = models.AutoField(primary_key=True)
testName = models.CharField(max_length=50)
result = models.CharField(max_length=50)
precision = models.CharField(max_length=10)
fileLocation = models.CharField(max_length=150)
testset_id = models.ForeignKey(testset)
machine = models.CharField(max_length=15)
startDate = models.DateField()
startTime = models.TimeField()
class testset(models.Model):
testsetID = models.CharField(max_length=100, primary_key=True)
testsetName = models.CharField(max_length=40)
platformName = models.CharField(max_length=15)
osName = models.CharField(max_length=15)
executionName = models.CharField(max_length=40)
version = models.CharField(max_length=10)
software = models.CharField(max_length=20)
runType = models.CharField(max_length=20)
You can give a try to the following:
t = testset.objects.values('testName').annotate(Max('startDate'),Max('startTime'))
This would give you a list of the objects' values-dictionaries containing key-value pairs of testName, startDate, startTime for the required condition.
You would get the condition fulfilled in this but you can try experimenting with this to get all the columns.

How to set filter to get children in certain time period by eagerload_all() at SqlAlchemy

I have a table posts and it stores 3 types of post, Topic, Reply and Comment. Each one has its parent id.
# Single table inheritance
class Post(Base):
__tablename__ = 'posts'
id = Column(Integer, primary_key=True)
parent_id = Column(Integer, ForeignKey('posts.id'))
discriminator = Column(String(1))
content = Column(UnicodeText)
added_at = Column(DateTime)
__mapper_args__ = {'polymorphic_on': discriminator}
class Topic(Post):
replies = relation("Reply")
__mapper_args__ = {'polymorphic_identity': 't'}
class Reply(Post):
comments = relation("Comment")
__mapper_args__ = {'polymorphic_identity': 'r'}
class Comment(Post):
__mapper_args__ = {'polymorphic_identity': 'c'}
And I'm using eagerload_all() to get all the replies and comments belong to one topic:
session.query(Topic).options(eagerload_all('replies.comments')).get(topic_id)
My question is, if I want to get only replies and those replies' comments in certain time period, for example, this week, or this month. How should I use filter to achieve this?
Thank you
The use of eagerload_all will only query for the children of an object Topic immediately rather on first request to the Replies and/or Comments, but since you load the Topic object into the session, all its related children will be loaded as well. This gives you the first option:
Option-1: Filter in the python code instead of database:
Basically create a method on the Topic object similar to
class Topic(Post):
...
def filter_replies(self, from_date, to_date):
return [r for r in self.replies
if r.added_at >= from_date
and r.added_at <= to_date]
Then you can do similar code on Replies to filter Comments or any combination of those. You get the idea.
Option-2: Filter on the database level:
In order to achieve this you need not load the Topic object, but filter directly on the Reply/Comment. Following query returns all Reply for a given Topic with a date filter:
topic_id = 1
from_date = date(2010, 9, 5)
to_date = date(2010, 9, 15)
q = session.query(Reply)
q = q.filter(Reply.parent_id == topic_id)
q = q.filter(Reply.added_at >= from_date)
q = q.filter(Reply.added_at <= to_date)
for r in q.all():
print "Reply: ", r
The version for the Comment is just a little bit more involved as you require an alias in order to overcome the SQL statement generation issue as all your objects are mapped to the same table name:
topic_id = 1
from_date = date(2010, 9, 5)
to_date = date(2010, 9, 15)
ralias = aliased(Reply)
q = session.query(Comment)
q = q.join((ralias, Comment.parent_id == ralias.id))
q = q.filter(ralias.parent_id == topic_id)
q = q.filter(Comment.added_at >= from_date)
q = q.filter(Comment.added_at <= to_date)
for c in q:
print "Comment: ", c
Obviously you can create a function that would combine both peaces into a more comprehensive query.
In order to achieve this week or this month type of queries you can either convert these filter into a date range as shown above or use the expression.func functionality of SA.