How to filter the values of two fields as one? - django-filter

How to aggregate the results of the two fields and show as one in django-filter?
Let's take, for example, such a model:
class Animal(object):
LEGS_CHOICES = (2, 4, 8)
legs = models.PositiveSmallIntegerField(choices=LEGS_CHOICES)
class Dog(Animal):
pass
class Spider(Animal):
pass
My django-filter filter class:
class AnimalFilterSet(django_filters.FilterSet):
legs = django_filters.MultipleChoiceFilter(choices=Animal.LEGS_CHOICES, widget=forms.CheckboxSelectMultiple())
class Meta:
model = Animal
fields = ['legs']
I would like to filter this two models by the same fields and display them as one.
Using queryset I can do it like this:
Animal.objects.filter(Q(dog__legs = 4) | Q(spider__legs = 4))

I wrote my own Filter
class MultiMultipleChoiceFilter(django_filters.Filter):
"""
This filter preforms an OR query on the selected options for defined fields.
"""
field_class = forms.MultipleChoiceField
def __init__(self, fields, *args, **kwargs):
super(MultiMultipleChoiceFilter, self).__init__(*args, **kwargs)
self.fields = fields
def filter(self, qs, value):
value = value or ()
if len(value) == len(self.field.choices):
return qs
q = Q()
for v in value:
for f in self.fields:
q |= Q(**{f: v})
return qs.filter(q).distinct()
Example of use. As a parameter, enter a list of fields.
class AnimalFilterSet(django_filters.FilterSet):
legs = django_filters.MultiMultipleChoiceFilter(['dog__legs', 'spider__legs'], choices=Animal.LEGS_CHOICES, widget=forms.CheckboxSelectMultiple())
class Meta:
model = Animal
fields = ['legs']

Related

SQLAlchemy 1.4 table query that includes first result of second table [duplicate]

I have a SQLAlchemy model with a one-to-many relationship between table x and table y. The record (if any) with the greatest id in table y where y.x_id = x.id is special. Class X and class Y map tables x and y.
I know how to define X.all_y (ORDER BY y.id). How do I define X.latest_y equivalent to X.all_y[-1]?
the purely relational way to do it requires using a subquery to get the "latest" or "max" value, correlated to the parent, then equating that with the members of the collection. It means you'll get best results if you put an index on the column that determines "the latest":
from sqlalchemy import *
from sqlalchemy.orm import *
engine = create_engine('sqlite:///:memory:', echo='debug')
m = MetaData()
parent = Table('parent', m,
Column('id', Integer, primary_key=True)
)
child = Table('child', m,
Column('id', Integer, primary_key=True),
Column('parent_id', Integer, ForeignKey('parent.id')),
Column('sortkey', Integer)
)
m.create_all(engine)
class Parent(object):
def __init__(self, children):
self.all_c = children
class Child(object):
def __init__(self, sortkey):
self.sortkey = sortkey
latest_c = select([func.max(child.c.sortkey)]).\
where(child.c.parent_id==parent.c.id).\
correlate(parent).\
as_scalar()
mapper(Parent, parent, properties={
'all_c':relation(Child),
'latest_c':relation(Child,
primaryjoin=and_(
child.c.sortkey==latest_c,
child.c.parent_id==parent.c.id
),
uselist=False
)
})
mapper(Child, child)
session = sessionmaker(engine)()
p1, p2, p3 = Parent([Child('a'), Child('b'), Child('c')]), \
Parent([Child('b'), Child('c')]),\
Parent([Child('f'), Child('g'), Child('c')])
session.add_all([p1, p2, p3])
session.commit()
assert p1.latest_c.sortkey == 'c'
assert p2.latest_c.sortkey == 'c'
assert p3.latest_c.sortkey == 'g'
Alternatively, you can on some platforms use LIMIT, which can produce faster results since you avoid the aggregation and can join the collection item on its primary key:
latest_c = select([child.c.id]).\
where(child.c.parent_id==parent.c.id).\
order_by(child.c.sortkey.desc()).\
limit(1).\
correlate(parent).\
as_scalar()
mapper(Parent, parent, properties={
'all_c':relation(Child),
'latest_c':relation(Child,
primaryjoin=and_(
child.c.id==latest_c,
child.c.parent_id==parent.c.id
),
uselist=False
)
})
Here is a version of #zzzeek's answer that uses declarative rather than imperative mapping, using declared_attr to insert the relationship into the parent's __mapper_args__.
import sqlalchemy as sa
from sqlalchemy import orm
Base = orm.declarative_base()
class Child(Base):
__tablename__ = 'children'
id = sa.Column(sa.Integer, primary_key=True)
sortkey = sa.Column(sa.Integer, nullable=False)
parent_id = sa.Column(sa.Integer, sa.ForeignKey('parents.id'))
parent = orm.relationship('Parent', back_populates='children')
class Parent(Base):
__tablename__ = 'parents'
id = sa.Column(sa.Integer, primary_key=True)
children = orm.relationship('Child', back_populates='parent')
#orm.declared_attr
def __mapper_args__(cls):
children = Child.__table__
most_recent_child = (
sa.select(children.c.id)
.where(children.c.parent_id == cls.id)
.order_by(children.c.sortkey.desc())
.limit(1)
.correlate(cls.__table__)
.scalar_subquery()
)
rel = orm.relation(
Child,
primaryjoin=sa.and_(
Child.id == most_recent_child, Child.parent_id == cls.id
),
uselist=False,
viewonly=True,
)
return {'properties': {'latest_child': rel}}
# Build and test.
engine = sa.create_engine('sqlite://', echo=True, future=True)
Base.metadata.create_all(engine)
Session = orm.sessionmaker(engine, future=True)
with Session.begin() as s:
children = [Child(sortkey=i) for i in range(1, 6)]
parent = Parent(children=children)
s.add(parent)
with Session() as s:
w = s.scalars(sa.select(Parent)).first()
assert w.latest_child.sortkey == 5, f'{w.latest_child.sortkey=}'
assert len(w.children) == 5, f'{len(w.children)=}'

How to add extra fields in ValueQuerySet (Django)?

Basically, I want to convert the query_set to JSON. But I also want to add one more field something like size = some number in the query_set which is not present in the query_set attributes (it is computed attribute). Can you tell me how to do it?
query_set = PotholeCluster.objects.all().values('bearing', 'center_lat', 'center_lon', 'grid_id')
return JsonResponse(list(query_set), safe=False)
I tried the code below. It works, but I would like to know if there is any cleaner way to do this.
query_set = PotholeCluster.objects.all()
response_list = []
for pc in query_set:
d = {}
d['bearing'] = pc.get_bearing()
d['center_lat'] = pc.center_lat
d['center_lon'] = pc.center_lat
d['grid_id'] = pc.grid_id
d['size'] = pc.pothole_set.all().count()
response_list.append(d)
serialized = json.dumps(response_list)
return HttpResponse(serialized, content_type='application/json')
class PotholeCluster(models.Model):
center_lat = models.FloatField(default=0)
center_lon = models.FloatField(default=0)
snapped_lat = models.FloatField(default=0)
snapped_lon = models.FloatField(default=0)
size = models.IntegerField(default=-1)
# avgspeed in kmph
speed = models.FloatField(default=-1)
# in meters
accuracy = models.FloatField(default=-1)
# avg bearing in degree
bearing = models.FloatField(default=-1)
grid = models.ForeignKey(
Grid,
on_delete=models.SET_NULL,
null=True,
blank=True
)
def __str__(self):
raw_data = serialize('python', [self])
output = json.dumps(raw_data[0]['fields'])
return "pk = {}|{}".format(self.id, output)
def get_bearing(self):
if self.bearing != -1:
return self.bearing
potholes = self.pothole_set.all()
bearings = [pothole.location.bearing for pothole in potholes]
bearings.sort()
i = 0
if bearings[-1] >= 350:
while bearings[-1] - bearings[i] >= 340:
if bearings[i] <= 10:
bearings[i] += 360
i += 1
self.bearing = sum(bearings) / len(bearings) % 360
self.save()
return self.bearing
def get_size(self):
if self.size != -1:
return self.size
self.size = len(self.pothole_set.all())
self.save()
return self.size

Scrapy MySQL pipeline: spider closed before pipeline finished

I use scrapy to crawl a page which contains a list of items, and I save each of the item in MySQL databases.
But the problem is that I found spider closed before all items are stored in mysql. Each time I ran the spider the result count is different.
Could you please help let me know how to solve this?
Below is my sample code:
Spider
class FutianSpider(scrapy.Spider):
name = 'futian_soufang'
allowed_domain = ["fang.com"]
start_urls = []
def __init__(self, category=None, *args, **kwargs):
self.count = 0
pass
def closed(self, reason):
print "*" * 20 + str(self.count)
def start_requests(self):
url = "http://fangjia.fang.com/pghouse-c0sz/a085-h321-i3{}/"
response = requests.get(url.format(1))
response.encoding = 'gb2312'
strpages = Selector(text=response.text).xpath('//p[contains(#class, "pages")]/span[last()]/a/text()').extract()
# print response.text
pages = int(strpages[0])
for num in range(1, pages + 1):
yield scrapy.Request(url.format(num), callback=self.parse_page)
def parse_page(self, response):
houses = response.xpath("//div[#class='list']//div[#class='house']")
for house in houses:
# house = Selector(house.decode("UTF-8", 'ignore'))
self.count += 1
housespan_hyperlink = house.xpath(".//span[#class='housetitle']/a")
house_title = housespan_hyperlink.xpath("text()").extract()[0].strip()
house_link_rel = housespan_hyperlink.xpath("#href").extract()[0].strip()
house_link = response.urljoin(house_link_rel)
# if isinstance(house_link_rel, list) and len(house_link_rel) > 0:
# house_link = response.urljoin(house_link_rel)
address = house.xpath(".//span[#class='pl5']/text()").extract()[0].strip()
esf_keyword = u'二手房'
esf_span = house.xpath(".//span[contains(text(),'%s')]" % (esf_keyword))
esf_number = esf_span.xpath("./a/text()").extract()[0].strip()
esf_number = int(re.findall(r"\d+", esf_number)[0])
esf_link = esf_span.xpath("./a/#href").extract()[0].strip()
zf_hyperlink = house.xpath(".//span[#class='p110']/a")
zf_number = zf_hyperlink.xpath("text()").extract()[0].strip()
zf_number = int(re.findall(r"\d+", zf_number)[0])
zf_link = zf_hyperlink.xpath("#href").extract()[0].strip()
price = 0
try:
price = int(house.xpath(".//span[#class='price']/text()").extract()[0].strip())
except:
None
change = 0.0
try:
increase_span = house.xpath(".//span[contains(#class, 'nor')]")
changetext = increase_span.xpath("text()").extract()[0].strip()
change = float(changetext[:changetext.index('%')])
if len(increase_span.css(".green-down")) > 0:
change *= -1
except:
None
print house_title, house_link, address, esf_number, esf_link, zf_number, zf_link, price, change
item = XiaoquItem(
title=house_title,
url=house_link,
address=address,
esf_number=esf_number,
esf_link=esf_link,
zf_number=zf_number,
zf_link=zf_link,
price=price,
change=change
)
yield item
Item:
class XiaoquItem(Item):
# define the fields for your item here like:
title = Field()
url = Field()
address = Field()
esf_number = Field()
esf_link = Field()
zf_number = Field()
zf_link = Field()
price = Field()
change = Field()
Pipeline:
class MySQLPipeLine(object):
def __init__(self):
settings = get_project_settings()
dbargs = settings.get('DB_CONNECT')
db_server = settings.get('DB_SERVER')
dbpool = adbapi.ConnectionPool(db_server, **dbargs)
self.dbpool = dbpool
def close_spider(self, spider):
self.dbpool.close()
def process_item(self, item, spider):
if isinstance(item, XiaoquItem):
self._process_plot(item)
elif isinstance(item, PlotMonthlyPriceItem):
self._process_plot_price(item)
return item
def _process_plot(self, item):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self._handle_error, item)
# query.addBoth(lambda _: item)
def _conditional_insert(self, conn, item):
# create record if doesn't exist.
# all this block run on it's own thread
conn.execute("select * from houseplot where title = %s", item["title"])
result = conn.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level = log.DEBUG)
else:
conn.execute("insert into houseplot(title, url, address, esf_number, esf_link, zf_number, zf_link, price, price_change, upsert_time) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", \
(item["title"], item["url"], item["address"], int(item["esf_number"]), item["esf_link"], item["zf_number"], item["zf_link"], item["price"], item["change"], datetime.datetime.now())
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def _handle_error(self, e):
log.err(e)
def _process_plot_price(self, item):
query = self.dbpool.runInteraction(self._conditional_insert_price, item)
query.addErrback(self._handle_error, item)
def _conditional_insert_price(self, conn, item):
# create record if doesn't exist.
# all this block run on it's own thread
conn.execute("select * from houseplot_monthly_price where title = %s and price_date= %s", (item["title"], item["price_date"]))
result = conn.fetchone()
if result:
log.msg("Price Item already stored in db: %s" % item, level=log.DEBUG)
else:
conn.execute(
"insert into houseplot_monthly_price(title, price_date, price) values (%s, %s, %s)", (item["title"], item["price_date"], item["price"])
)
log.msg("Price Item stored in db: %s" % item, level=log.DEBUG)

How do I insert field having default value in Slick

Given mapping having NOT NULL field str with a default value:
case class Tst(id: Option[Int] = None, ii: Int, str: String)
class Tsts(tag: Tag) extends Table[Tst](tag, "tsts") {
def id = column[Option[Int]]("id", O.PrimaryKey, O.AutoInc)
def ii = column[Int]("ii")
def str = column[String]("str", O.Default("ddd"))
def * = (id, ii, str) <> (Tst.tupled, Tst.unapply)
}
How do I insert object specifying the field value if I have it:
Tst(ii = 1, str = "aaa")
and skipping it if I don't:
Tst(ii = 1)
Yes, I know the last statement will not compile.
I tried using Option[String] and other things. It ends up with either inserting null or failing with can't be null error
The compiler depends on you putting default values at the end, like:
scala> case class TST(ii: Int, str: String = "aaa", id: Option[Int] = None)
defined class TST
scala> new TST(3)
res0: TST = TST(3,aaa,None)
Edit: Just realized I didn't answer completely:
scala> new TST(3, id = Some(1))
res1: TST = TST(3,aaa,Some(1))
scala> new TST(3, str = "bbb")
res2: TST = TST(3,bbb,None)

Adjacency list + Abstract Base Class Inheritance used in relationship

Following is a example for Adjacency List + Inheritance. This works as expected but if i try to use it in a another Model Mammut as a relationship it throws me this error:
Traceback (most recent call last):
File "bin/py", line 73, in <module>
exec(compile(__file__f.read(), __file__, "exec"))
File "../adjacency_list.py", line 206, in <module>
create_entries(IntTreeNode)
File "../adjacency_list.py", line 170, in create_entries
mut.nodes.append(node)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/dynamic.py", line 304, in append
attributes.instance_dict(self.instance), item, None)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/dynamic.py", line 202, in append
self.fire_append_event(state, dict_, value, initiator)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/dynamic.py", line 99, in fire_append_event
value = fn(state, value, initiator or self._append_token)
File "/home/xxx/.buildout/eggs/SQLAlchemy-0.9.8-py3.4-linux-x86_64.egg/sqlalchemy/orm/attributes.py", line 1164, in emit_backref_from_collection_append_event
child_impl.append(
AttributeError: '_ProxyImpl' object has no attribute 'append'
The Code:
from sqlalchemy import (Column, ForeignKey, Integer, String, create_engine,
Float)
from sqlalchemy.orm import (Session, relationship, backref, joinedload_all)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.declarative import declared_attr, AbstractConcreteBase
Base = declarative_base()
class Mammut(Base):
__tablename__ = "mammut"
id = Column(Integer, primary_key=True)
nodes = relationship(
'TreeNode',
backref='mammut',
lazy='dynamic',
cascade="all, delete-orphan",
#viewonly=True
)
class TreeNode(AbstractConcreteBase, Base):
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
depth = Column(Integer, default=0)
data_type = Column(String(50))
#declared_attr
def mammut_id(cls):
return Column(Integer, ForeignKey('mammut.id'))
#declared_attr
def __tablename__(cls):
return cls.__name__.lower()
#declared_attr
def __mapper_args__(cls):
ret = {}
if cls.__name__ != "TreeNode":
ret = {'polymorphic_identity': cls.__name__,
'concrete': True,
# XXX redundant makes only sense if we use one table
'polymorphic_on': cls.data_type}
return ret
#declared_attr
def parent_id(cls):
_fid = '%s.id' % cls.__name__.lower()
return Column(Integer, ForeignKey(_fid))
#declared_attr
def children(cls):
_fid = '%s.id' % cls.__name__
return relationship(cls.__name__,
# cascade deletions
cascade="all, delete-orphan",
# many to one + adjacency list - remote_side
# is required to reference the 'remote'
# column in the join condition.
backref=backref("parent", remote_side=_fid),
# children will be represented as a dictionary
# on the "name" attribute.
collection_class=attribute_mapped_collection(
'name'),
)
def get_path(self, field):
if self.parent:
return self.parent.get_path(field) + [getattr(self, field)]
else:
return [getattr(self, field)]
#property
def name_path(self):
# XXX there is no way to query for it except we add a function with a
# cte (recursive query) to our database see [1] for it
# https://stackoverflow.com/questions/14487386/sqlalchemy-recursive-hybrid-property-in-a-tree-node
return '/'.join(self.get_path(field='name'))
def __init__(self, name, value=None, parent=None):
self.name = name
self.parent = parent
self.depth = 0
self.value = value
if self.parent:
self.depth = self.parent.depth + 1
def __repr__(self):
ret = "%s(name=%r, id=%r, parent_id=%r, value=%r, depth=%r, " \
"name_path=%s data_type=%s)" % (
self.__class__.__name__,
self.name,
self.id,
self.parent_id,
self.value,
self.depth,
self.name_path,
self.data_type
)
return ret
def dump(self, _indent=0):
return " " * _indent + repr(self) + \
"\n" + \
"".join([
c.dump(_indent + 1)
for c in self.children.values()]
)
class IntTreeNode(TreeNode):
value = Column(Integer)
class FloatTreeNode(TreeNode):
value = Column(Float)
miau = Column(String(50), default='zuff')
def __repr__(self):
ret = "%s(name=%r, id=%r, parent_id=%r, value=%r, depth=%r, " \
"name_path=%s data_type=%s miau=%s)" % (
self.__class__.__name__,
self.name,
self.id,
self.parent_id,
self.value,
self.depth,
self.name_path,
self.data_type,
self.miau
)
return ret
if __name__ == '__main__':
engine = create_engine('sqlite:///', echo=True)
def msg(msg, *args):
msg = msg % args
print("\n\n\n" + "-" * len(msg.split("\n")[0]))
print(msg)
print("-" * len(msg.split("\n")[0]))
msg("Creating Tree Table:")
Base.metadata.create_all(engine)
session = Session(engine)
def create_entries(Cls):
node = Cls('rootnode', value=2)
Cls('node1', parent=node)
Cls('node3', parent=node)
node2 = Cls('node2')
Cls('subnode1', parent=node2)
node.children['node2'] = node2
Cls('subnode2', parent=node.children['node2'])
msg("Created new tree structure:\n%s", node.dump())
msg("flush + commit:")
# XXX this throws the error
mut = Mammut()
mut.nodes.append(node)
session.add(mut)
session.add(node)
session.commit()
msg("Tree After Save:\n %s", node.dump())
Cls('node4', parent=node)
Cls('subnode3', parent=node.children['node4'])
Cls('subnode4', parent=node.children['node4'])
Cls('subsubnode1', parent=node.children['node4'].children['subnode3'])
# remove node1 from the parent, which will trigger a delete
# via the delete-orphan cascade.
del node.children['node1']
msg("Removed node1. flush + commit:")
session.commit()
msg("Tree after save:\n %s", node.dump())
msg("Emptying out the session entirely, "
"selecting tree on root, using eager loading to join four levels deep.")
session.expunge_all()
node = session.query(Cls).\
options(joinedload_all("children", "children",
"children", "children")).\
filter(Cls.name == "rootnode").\
first()
msg("Full Tree:\n%s", node.dump())
# msg("Marking root node as deleted, flush + commit:")
# session.delete(node)
# session.commit()
create_entries(IntTreeNode)
create_entries(FloatTreeNode)
nodes = session.query(TreeNode).filter(
TreeNode.name == "rootnode").all()
for idx, n in enumerate(nodes):
msg("Full (%s) Tree:\n%s" % (idx, n.dump()))
concrete inheritance can be very difficult, and AbstractConcreteBase itself has bugs in 0.9 which get in the way of elaborate mappings like this from being used.
Using 1.0 (not released, use git master), I can get the major elements going as follows:
from sqlalchemy import Column, String, Integer, create_engine, ForeignKey, Float
from sqlalchemy.orm import Session, relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.ext.declarative import declared_attr, AbstractConcreteBase
Base = declarative_base()
class Mammut(Base):
__tablename__ = "mammut"
id = Column(Integer, primary_key=True)
nodes = relationship(
'TreeNode',
lazy='dynamic',
back_populates='mammut',
)
class TreeNode(AbstractConcreteBase, Base):
id = Column(Integer, primary_key=True)
name = Column(String)
#declared_attr
def __tablename__(cls):
if cls.__name__ == 'TreeNode':
return None
else:
return cls.__name__.lower()
#declared_attr
def __mapper_args__(cls):
return {'polymorphic_identity': cls.__name__, 'concrete': True}
#declared_attr
def parent_id(cls):
return Column(Integer, ForeignKey(cls.id))
#declared_attr
def mammut_id(cls):
return Column(Integer, ForeignKey('mammut.id'))
#declared_attr
def mammut(cls):
return relationship("Mammut", back_populates="nodes")
#declared_attr
def children(cls):
return relationship(
cls,
back_populates="parent",
collection_class=attribute_mapped_collection('name'),
)
#declared_attr
def parent(cls):
return relationship(
cls, remote_side="%s.id" % cls.__name__,
back_populates='children')
class IntTreeNode(TreeNode):
value = Column(Integer)
class FloatTreeNode(TreeNode):
value = Column(Float)
miau = Column(String(50), default='zuff')
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
session = Session(e)
root = IntTreeNode(name='root')
IntTreeNode(name='n1', parent=root)
n2 = IntTreeNode(name='n2', parent=root)
IntTreeNode(name='n2n1', parent=n2)
m1 = Mammut()
m1.nodes.append(n2)
m1.nodes.append(root)
session.add(root)
session.commit()
session.close()
root = session.query(TreeNode).filter_by(name='root').one()
print root.children