Using the example below, i'm trying to make a single query that will get my list of offices, and pull the next upcoming visit from the child table.
class Office(db.Model):
id = db.Column(db.Integer, primary_key=True)
office_name = db.Column(db.String(100))
visits = db.relationship('Visit', backref='office', lazy='select', order_by='desc(Visit.visit_date)')
class Visit(db.Model):
id = db.Column(db.Integer, primary_key=True)
visit_date = db.Column(db.Date)
office_id = db.Column(db.Integer, db.ForeignKey('office.id'))
I've been able to create a query in raw SQL that will return what i need:
SELECT * FROM office
LEFT OUTER JOIN ( SELECT office_id, visit_date FROM visit WHERE visit_date >= date('now')
GROUP BY office_id )
AS next_vis ON id = next_vis.office_id
But i haven't been able to convert the above in SQLAlchemy.
Closest i've got to is this:
next_vis = db.session.query(Visit.office_id, Visit.visit_date).filter(
Visit.visit_date >= datetime.utcnow().date()).order_by(
Visit.visit_date.asc()).group_by(Visit.office_id).subquery()
offices = db.session.query(Office, next_vis.c.visit_date).outerjoin(
next_vis, Office.id == next_vis.c.office_id).order_by(
Office.office_name.asc())
But the only problem is it returns a tuple of (office, date) but ideally i want it returned as a single object. Is that not possible?
Thanks!
If anyone is interested i went about this a slightly different way.
I switched from a join query to adding a column property in my Office model:
class Office(db.Model):
id = db.Column(db.Integer, primary_key=True)
office_name = db.Column(db.String(100))
visits = db.relationship('Visit', backref='office', lazy='select',
order_by='desc(Visit.visit_date)')
next_vis = column_property(
select([Visit.visit_date]).where(
and_(Visit.office_id == id, Visit.visit_date >= db.func.current_date())).order_by(
Visit.visit_date.asc()).correlate_except(Visit))
Now when i do Office.query.all() i can do:
for i in Office.query.all():
print(i.next_vis)
If i've overlooked anything please let me know!
Thanks,
Related
Let's say I have these two models :
def Client(db.Model):
id = db.Column(db.Integer, primary_key=True)
invoices = db.relationship('Invoice', backref='client')
def Invoice(db.Model):
id = db.Column(db.Integer, primary_key=True)
I'd like to retrieve all Client with at least 1 Invoice and less than 20 Invoice.
I would be expecting it to work like this :
Client.query.join(Invoice).filter(and_(Invoice.count() > 1, Invoice.count() <= 20))
Or even this would be nice :
Client.query.join(Invoice).filter(and_(count_(Invoice) > 1, count_(Invoice) <= 20))
But of course, it can't be this simple. .count() can't work from there obviously and I can't find a count_() in sqlalchemy.func.
Thanks to coworkers and code lying around, we got it working:
client = Client.query\
.outerjoin(Client.invoices)\
.group_by(Client)\
.having(\
func.and_(\
func.count_(Client.invoices) >= 1)\
func.count_(Client.invoices) <= 20)\
)
).all()
I hope it helps someone!
Given the following relationships:
- 1 MasterProduct parent -> many MasterProduct children
- 1 MasterProduct child -> many StoreProducts
- 1 StoreProduct -> 1 Store
I have defined the following declarative models in SQLAlchemy:
class MasterProduct(Base):
__tablename__ = 'master_products'
id = Column(Integer, primary_key=True)
pid = Column(Integer, ForeignKey('master_products.id'))
children = relationship('MasterProduct', join_depth=1,
backref=backref('parent', remote_side=[id]))
store_products = relationship('StoreProduct', backref='master_product')
class StoreProduct(Base):
__tablename__ = 'store_products'
id = Column(Integer, primary_key=True)
mid = Column(Integer, ForeignKey('master_products.id'))
sid = Column(Integer, ForeignKey('stores.id'))
timestamp = Column(DateTime)
store = relationship('Store', uselist=False)
class Store(Base):
__tablename__ = 'stores'
id = Column(Integer, primary_key=True)
My goal is to replicate the following query in SQLAlchemy with eager loading:
SELECT *
FROM master_products mp_parent
INNER JOIN master_products mp_child ON mp_child.pid = mp_parent.id
INNER JOIN store_products sp1 ON sp1.mid = mp_child.id
LEFT JOIN store_products sp2
ON sp1.mid = sp2.mid AND sp1.sid = sp2.sid AND sp1.timestamp < sp2.timestamp
WHERE mp_parent.id = 6752 AND sp2.id IS NULL
The query selects all MasterProduct children for parent 6752 and all
corresponding store products grouped by most recent timestamp using a NULL
self-join (greatest-n-per-group). There are 82 store products returned from the
query, with 14 master product children.
I've tried the following to no avail:
mp_child = aliased(MasterProduct)
sp1 = aliased(StoreProduct)
sp2 = aliased(StoreProduct)
q = db.session.query(MasterProduct).filter_by(id=6752) \
.join(mp_child, MasterProduct.children) \
.join(sp1, mp_child.store_products) \
.outerjoin(sp2, and_(sp1.mid == sp2.mid, sp1.sid == sp2.sid, sp1.timestamp < sp2.timestamp)) \
.filter(sp2.id == None) \
.options(contains_eager(MasterProduct.children, alias=mp_child),
contains_eager(MasterProduct.children, mp_child.store_products, alias=sp1))
>>> mp_parent = q.first() # the query below looks ok!
SELECT <all columns from master_products, master_products_1, and store_products_1>
FROM master_products INNER JOIN master_products AS master_products_1 ON master_products.id = master_products_1.pid INNER JOIN store_products AS store_products_1 ON master_products_1.id = store_products_1.mid LEFT OUTER JOIN store_products AS store_products_2 ON store_products_1.mid = store_products_2.mid AND store_products_1.sid = store_products_2.sid AND store_products_1.timestamp < store_products_2.timestamp
WHERE master_products.id = %s AND store_products_2.id IS NULL
LIMIT %s
>>> mp_parent.children # only *one* child is eagerly loaded (expected 14)
[<app.models.MasterProduct object at 0x2463850>]
>>> mp_parent.children[0].id # this is correct, 6762 is one of the children
6762L
>>> mp_parent.children[0].pid # this is correct
6752L
>>> mp_parent.children[0].store_products # only *one* store product is eagerly loaded (expected 7 for this child)
[<app.models.StoreProduct object at 0x24543d0>]
Taking a step back and simplifying the query to eagerly load just the children
also results in only 1 child being eagerly loaded instead of all 14:
mp_child = aliased(MasterProduct)
q = db.session.query(MasterProduct).filter_by(id=6752) \
.join(mp_child, MasterProduct.children)
.options(contains_eager(MasterProduct.children, alias=mp_child))
However, when I use a joinedload, joinedload_all, or subqueryload, all
14 children are eagerly loaded, i.e.:
q = db.session.query(MasterProduct).filter_by(id=6752) \
.options(joinedload_all('children.store_products', innerjoin=True))
So the problem seems to be populating MasterProduct.children from the
explicit join using contains_eager.
Can anyone spot the error in my ways or help point me in the right direction?
OK what you might observe in the SQL is that there's a "LIMIT 1" coming out. That's because you're using first(). We can just compare the first two queries, the contains eager, and the joinedload:
join() + contains_eager():
SELECT master_products_1.id AS master_products_1_id, master_products_1.pid AS master_products_1_pid, master_products.id AS master_products_id, master_products.pid AS master_products_pid
FROM master_products JOIN master_products AS master_products_1 ON master_products.id = master_products_1.pid
WHERE master_products.id = ?
LIMIT ? OFFSET ?
joinedload():
SELECT anon_1.master_products_id AS anon_1_master_products_id, anon_1.master_products_pid AS anon_1_master_products_pid, master_products_1.id AS master_products_1_id, master_products_1.pid AS master_products_1_pid
FROM (SELECT master_products.id AS master_products_id, master_products.pid AS master_products_pid
FROM master_products
WHERE master_products.id = ?
LIMIT ? OFFSET ?) AS anon_1 JOIN master_products AS master_products_1 ON anon_1.master_products_id = master_products_1.pid
you can see the second query is quite different; because first() means a LIMIT is applied, joinedload() knows to wrap the "criteria" query in a subquery, apply the limit to that, then apply the JOIN afterwards. In the join+contains_eager case, the LIMIT is applied to the collection itself and you get the wrong number of rows.
Just changing the script at the bottom to this:
for q, query_label in queries:
mp_parent = q.all()[0]
I get the output it says you're expecting:
[explicit join with contains_eager] children=3, store_products=27
[joinedload] children=3, store_products=27
[joinedload_all] children=3, store_products=27
[subqueryload] children=3, store_products=27
[subqueryload_all] children=3, store_products=27
[explicit joins with contains_eager, filtered by left-join] children=3, store_products=9
(this is why getting a user-created example is so important)
when i have a table in MySQL:
create table t
(
id integer primary key,
time datetime not null,
value integer not null
)
and an mapping class:
class T(Base):
__tablename__ = 't'
id = Column(INTEGER, primary_key=True, nullable=False, unique=True)
time = Column(DATETIME, nullable=False)
value = Column(INTEGER, nullable=False)
how can i select all values that have given month from this table using SQLAlchemy?
MySQL has the month function: select value from t where month(time) = 4
but SQLAlchemy has no month function.
Without loading all Ts into the session, one can use Functions to filter non April objects straight-away:
from sqlalchemy.sql import func
qry = session.query(T).filter(func.MONTH(T.time) == 4)
for t in qry:
print t.value
A very old question but a better answer is here:
from sqlalchemy import extract
session.query(T).filter(extract('month', T.time)==7).all()
This will return all the records into a database in July.
If for example you want the records from all April months irrespective of year or day:
for t in session.query(T):
if t.time.month == 4: print t.value
The situation is i have a database full of test results split into different test sets. Each test has a name, result , start time , start date , ... , what currently happens is over the week test sets can be run multiple times , giving multiple test results under a test set
Currently when i want to get the latest result of each test under a test set i am querying for the distinct test names, and then for each distinct name i am querying for that name and ordering by startDate and startTime to get the latest. This is a pain because when i have a test set with over 100 different tests it degrades substantially.
What im trying to do is to perform what i want in one call of django.objects...
Here is the mysql to effectively represent what i want to achieve:
select testName,result,MAX(startDate),MAX(startTime),othertestinfo from testset where testset_id = 'UniqueID' group by testName;
Im having a hard time trying to figure this out in django , if its even possible.
Any help would be much appreciated.
Thanks
Update 23/1/12
Models for what i am using.
class testCase(models.Model):
id = models.AutoField(primary_key=True)
testName = models.CharField(max_length=50)
result = models.CharField(max_length=50)
precision = models.CharField(max_length=10)
fileLocation = models.CharField(max_length=150)
testset_id = models.ForeignKey(testset)
machine = models.CharField(max_length=15)
startDate = models.DateField()
startTime = models.TimeField()
class testset(models.Model):
testsetID = models.CharField(max_length=100, primary_key=True)
testsetName = models.CharField(max_length=40)
platformName = models.CharField(max_length=15)
osName = models.CharField(max_length=15)
executionName = models.CharField(max_length=40)
version = models.CharField(max_length=10)
software = models.CharField(max_length=20)
runType = models.CharField(max_length=20)
You can give a try to the following:
t = testset.objects.values('testName').annotate(Max('startDate'),Max('startTime'))
This would give you a list of the objects' values-dictionaries containing key-value pairs of testName, startDate, startTime for the required condition.
You would get the condition fulfilled in this but you can try experimenting with this to get all the columns.
I have a table posts and it stores 3 types of post, Topic, Reply and Comment. Each one has its parent id.
# Single table inheritance
class Post(Base):
__tablename__ = 'posts'
id = Column(Integer, primary_key=True)
parent_id = Column(Integer, ForeignKey('posts.id'))
discriminator = Column(String(1))
content = Column(UnicodeText)
added_at = Column(DateTime)
__mapper_args__ = {'polymorphic_on': discriminator}
class Topic(Post):
replies = relation("Reply")
__mapper_args__ = {'polymorphic_identity': 't'}
class Reply(Post):
comments = relation("Comment")
__mapper_args__ = {'polymorphic_identity': 'r'}
class Comment(Post):
__mapper_args__ = {'polymorphic_identity': 'c'}
And I'm using eagerload_all() to get all the replies and comments belong to one topic:
session.query(Topic).options(eagerload_all('replies.comments')).get(topic_id)
My question is, if I want to get only replies and those replies' comments in certain time period, for example, this week, or this month. How should I use filter to achieve this?
Thank you
The use of eagerload_all will only query for the children of an object Topic immediately rather on first request to the Replies and/or Comments, but since you load the Topic object into the session, all its related children will be loaded as well. This gives you the first option:
Option-1: Filter in the python code instead of database:
Basically create a method on the Topic object similar to
class Topic(Post):
...
def filter_replies(self, from_date, to_date):
return [r for r in self.replies
if r.added_at >= from_date
and r.added_at <= to_date]
Then you can do similar code on Replies to filter Comments or any combination of those. You get the idea.
Option-2: Filter on the database level:
In order to achieve this you need not load the Topic object, but filter directly on the Reply/Comment. Following query returns all Reply for a given Topic with a date filter:
topic_id = 1
from_date = date(2010, 9, 5)
to_date = date(2010, 9, 15)
q = session.query(Reply)
q = q.filter(Reply.parent_id == topic_id)
q = q.filter(Reply.added_at >= from_date)
q = q.filter(Reply.added_at <= to_date)
for r in q.all():
print "Reply: ", r
The version for the Comment is just a little bit more involved as you require an alias in order to overcome the SQL statement generation issue as all your objects are mapped to the same table name:
topic_id = 1
from_date = date(2010, 9, 5)
to_date = date(2010, 9, 15)
ralias = aliased(Reply)
q = session.query(Comment)
q = q.join((ralias, Comment.parent_id == ralias.id))
q = q.filter(ralias.parent_id == topic_id)
q = q.filter(Comment.added_at >= from_date)
q = q.filter(Comment.added_at <= to_date)
for c in q:
print "Comment: ", c
Obviously you can create a function that would combine both peaces into a more comprehensive query.
In order to achieve this week or this month type of queries you can either convert these filter into a date range as shown above or use the expression.func functionality of SA.