Related
By this I tried to make a minimal reproducible example of what I now have.
Using QSortFilterProxyModel() to find text and a custom SortFilterProxyModel to show all the selected checkboxes.
It looks like both proxy's conflicting eachother and giving the wrong row number back. (There is also an option for adding value with a dialog and also this goes to the wrong row with the custom proxy enabled, but works without the custom proxy).
class winConfigurator(QtWidgets.QMainWindow):
def __init__(self, data=None):
super(winConfigurator,self).__init__()
uic.loadUi(os.path.join(os.path.dirname(__file__), 'winConfiguratorView.ui'), self)
self.leSearch.returnPressed.connect(self.searchField)
self.chMatchSelected.toggled.connect(self.showSelected)
def readFile(self, filename):
self.model = TableModel([headers, newRows])
self.proxy_model = QSortFilterProxyModel()
self.proxy_model.setSourceModel(self.model)
"""
Proxy for 'Show Selected'
"""
self.custom_proxy_model = SortFilterProxyModel()
self.custom_proxy_model.setSourceModel(self.proxy_model)
self.tableView.setModel(self.custom_proxy_model)
def searchField(self):
self.proxy_model.setFilterFixedString(self.leSearch.text())
def showSelected(self, state = None):
self.custom_proxy_model.clearFilter()
checkstate_items = self.model.checks.items()
if state == True:
self.custom_proxy_model.setFilterByCheckbox(checkstate_items)
class SortFilterProxyModel(QSortFilterProxyModel):
def __init__(self, *args, **kwargs):
QSortFilterProxyModel.__init__(self, *args, **kwargs)
self.filters = {}
def setFilterByCheckbox(self, checkstates = {}):
self.filters = checkstates
self.invalidateFilter()
def clearFilter(self):
self.filters = {}
self.invalidateFilter()
def filterAcceptsRow(self, source_row, source_parent):
"""
Check if checkbox is checked and show this row
Slow, 7 seconds for 50k rows.
"""
try:
values = []
if self.filters:
for index, is_checked in self.filters:
if is_checked:
row = index.row()
model = self.sourceModel()
if hasattr(model, 'mapToSource'):
index = model.index(source_row, 0, source_parent)
if not index.parent().isValid():
modelIndex = model.mapToSource(index)
source_row = modelIndex.row()
if row == source_row:
values.append(index)
return any(values)
return True
except Exception as e:
# print(e)
return True
In a testing suite I have a fixture that drop all the tables in an engine, then start fresh and create all the tables. After this fixture logic, my test case runs, using the newly created table.
The fixture and the test case are run in the MainThread, while the database consumer is a web application server run in another thread.
However, I keep getting: sqlite3.OperationalError: no such table: ***
I've checked that they are using the same in-memory engine, but different connections(this is correct). And I've checked that the fixture does run before the consumer thread starts running.
What could be possible cause?
My code is as below:
import os
import pytest
import cherrypy
class DAL:
def __init__(self,
path="database",
filename=None,
conn_string=None,
echo=False):
if filename is None and conn_string is None:
conn_string = "sqlite:///:memory:"
elif conn_string is not None:
conn_string = conn_string
else:
conn_string = f'sqlite:///{os.path.abspath(path)}/{filename}'
self.conn_string = conn_string
engine = create_engine(conn_string, echo=echo)
Session_Factory = sessionmaker(bind=engine)
self.Session = sqlalchemy.orm.scoped_session(Session_Factory)
def __str__(self):
return f"<DAL>object: {self.conn_string}, at {hex(id(self))}"
def get_a_dbsession(self):
opened_db = self.Session()
return opened_db
def __enter__(self):
return self.get_a_dbsession()
def __exit__(self, exception_type, exception_value, exception_traceback):
opened_db = self.Session()
try:
opened_db.commit()
except:
opened_db.rollback()
raise
finally:
self.Session.remove()
else:
opened_db.close()
raise exception_type
def create_schema(self):
SchemaBase.metadata.create_all(self.Session().connection().engine)
class SAEnginePlugin(cherrypy.process.plugins.SimplePlugin):
def __init__(self, bus, dal):
"""
The plugin is registered to the CherryPy engine.
"""
cherrypy.process.plugins.SimplePlugin.__init__(self, bus)
self.dal = dal
def start(self):
self.bus.subscribe("bind-session", self.bind)
def stop(self):
self.bus.unsubscribe("bind-session", self.bind)
if self.dal:
del self.dal
def bind(self):
"""
Whenever this plugin receives the 'bind-session' message, it applies
this method and bind the received session to the engine.
"""
# self.dal.Session.configure(bind=self.dal.engine)
session = self.dal.get_a_dbsession()
return session
class SATool(cherrypy.Tool):
def __init__(self):
"""
This tool binds a session to the engine each time
a requests starts and commits/rollbacks whenever
the request terminates.
"""
cherrypy.Tool.__init__(self,
'on_start_resource',
self.bind_session,
priority=20)
def _setup(self):
cherrypy.Tool._setup(self)
cherrypy.request.hooks.attach('on_end_resource',
self.close_session,
priority=80)
def bind_session(self):
"""
Attaches a session to the request's scope by requesting
the SA plugin to bind a session to the SA engine.
"""
session = cherrypy.engine.publish('bind-session').pop()
cherrypy.request.db = session
def close_session(self):
"""
Commits the current transaction or rollbacks if an error occurs.
In all cases, the current session is unbound and therefore
not usable any longer.
"""
if not hasattr(cherrypy.request, 'db'):
return
try:
cherrypy.request.db.commit()
except:
cherrypy.request.db.rollback()
raise
finally:
cherrypy.request.db.close()
cherrypy.request.db = None
# Register the SQLAlchemy tool
cherrypy.tools.db = SATool()
class UnitServer:
...
#cherrypy.expose
#cherrypy.tools.json_in()
def list_filtered_entries(self):
...
queryOBJ = cherrypy.request.db.query(classmodel_obj)
...
############# main module code below ############:
# mocking 'db':
dal = database.DAL()
# configure cherrypy:
SAEnginePlugin(cherrypy.engine, dal).subscribe()
#pytest.fixture(autouse=True) # automatically run before every test case
def mocked_dal(request):
# first, clean the database by dropping all tables:
database.SchemaBase.metadata.drop_all(dal.Session().connection().engine)
# second, create the schema from blank:
dal.create_schema()
# third, insert some dummy data record:
...
db.commit()
class TestMyUnitServer(cherrypy.test.helper.CPWebCase):
#staticmethod
def setup_server():
...
server_app = UnitServer()
cherrypy.tree.mount(server_app, '', {'/': {'tools.db.on': True}})
def test_list_filtered_entries_allentries(self):
...
self.getPage('/list_filtered_entries',
headers=[("Accept", "application/json"),
('Content-type', 'application/json'),
('Content-Length',
str(len(json.dumps(query_params)))),
("Connection", "keep-alive"),
("Cache-Control", "max-age=0")],
body=serialized_query_params,
method="POST")
self.assertStatus('200 OK')
I've written Graphene models for polymorphic entities represented in my database w/SQLalchemy.
The problem is simple:
I want to create an interface that reflects my SQLAlchemy models for Graphene but also either a) implements Node or b) does not conflict with Node and allows me to retrieve the model's ID without needing to add ... on Node {id} to the query string.
have to exclude the ID field from my ORM-based interface or field conflicts with the Node interface, by doing so in order to get the ID then you need to add ...on Node { id }, which is ugly.
I created an SQLAlchemyInterface object that extends graphene.Interface. Many (but not all) of my models used this as well as Node as interfaces. The first problem was that this contains an ID field and it conflicted with the Node interface.
I excluded the id field to not interfere with Node, but then found I could not directly query ID on my models anymore, and had to add ... on Node {id} to the query string.
I then decided to have this SQLAlchemyInterface extend Node. I don't love this approach because I need to use another (named) Node interface for all of my models that don't necessarily need to implement SQLAlchemyInterface
class SQLAlchemyInterface(Node):
#classmethod
def __init_subclass_with_meta__(
cls,
model=None,
registry=None,
only_fields=(),
exclude_fields=(),
connection_field_factory=default_connection_field_factory,
_meta=None,
**options
):
_meta = SQLAlchemyInterfaceOptions(cls)
_meta.name = f'{cls.__name__}Node'
autoexclude_columns = exclude_autogenerated_sqla_columns(model=model)
exclude_fields += autoexclude_columns
assert is_mapped_class(model), (
"You need to pass a valid SQLAlchemy Model in " '{}.Meta, received "{}".'
).format(cls.__name__, model)
if not registry:
registry = get_global_registry()
assert isinstance(registry, Registry), (
"The attribute registry in {} needs to be an instance of "
'Registry, received "{}".'
).format(cls.__name__, registry)
sqla_fields = yank_fields_from_attrs(
construct_fields(
model=model,
registry=registry,
only_fields=only_fields,
exclude_fields=exclude_fields,
connection_field_factory=connection_field_factory
),
_as=Field
)
if not _meta:
_meta = SQLAlchemyInterfaceOptions(cls)
_meta.model = model
_meta.registry = registry
connection = Connection.create_type(
"{}Connection".format(cls.__name__), node=cls)
assert issubclass(connection, Connection), (
"The connection must be a Connection. Received {}"
).format(connection.__name__)
_meta.connection = connection
if _meta.fields:
_meta.fields.update(sqla_fields)
else:
_meta.fields = sqla_fields
super(SQLAlchemyInterface, cls).__init_subclass_with_meta__(_meta=_meta, **options)
#classmethod
def Field(cls, *args, **kwargs): # noqa: N802
return NodeField(cls, *args, **kwargs)
#classmethod
def node_resolver(cls, only_type, root, info, id):
return cls.get_node_from_global_id(info, id, only_type=only_type)
#classmethod
def get_node_from_global_id(cls, info, global_id, only_type=None):
try:
node: DeclarativeMeta = one_or_none(session=info.context.get('session'), model=cls._meta.model, id=global_id)
return node
except Exception:
return None
#staticmethod
def from_global_id(global_id):
return global_id
#staticmethod
def to_global_id(type, id):
return id
Interface impls, Models + Query code examples:
class CustomNode(Node):
class Meta:
name = 'UuidNode'
#staticmethod
def to_global_id(type, id):
return '{}:{}'.format(type, id)
#staticmethod
def get_node_from_global_id(info, global_id, only_type=None):
type, id = global_id.split(':')
if only_type:
# We assure that the node type that we want to retrieve
# is the same that was indicated in the field type
assert type == only_type._meta.name, 'Received not compatible node.'
if type == 'User':
return one_or_none(session=info.context.get('session'), model=User, id=global_id)
elif type == 'Well':
return one_or_none(session=info.context.get('session'), model=Well, id=global_id)
class ControlledVocabulary(SQLAlchemyInterface):
class Meta:
name = 'ControlledVocabularyNode'
model = BaseControlledVocabulary
class TrackedEntity(SQLAlchemyInterface):
class Meta:
name = 'TrackedEntityNode'
model = TrackedEntityModel
class Request(SQLAlchemyObjectType):
"""Request node."""
class Meta:
model = RequestModel
interfaces = (TrackedEntity,)
class User(SQLAlchemyObjectType):
"""User Node"""
class Meta:
model = UserModel
interfaces = (CustomNode,)
class CvFormFieldValueType(SQLAlchemyObjectType):
class Meta:
model = CvFormFieldValueTypeModel
interfaces = (ControlledVocabulary,)
common_field_kwargs = {'id': graphene.UUID(required=False), 'label': graphene.String(required=False)}
class Query(graphene.ObjectType):
"""Query objects for GraphQL API."""
node = CustomNode.Field()
te_node = TrackedEntity.Field()
cv_node = ControlledVocabulary.Field()
# Non-Tracked Entities:
users: List[User] = SQLAlchemyConnectionField(User)
# Generic Query for any Tracked Entity:
tracked_entities: List[TrackedEntity] = FilteredConnectionField(TrackedEntity, sort=None, filter=graphene.Argument(TrackedEntityInput))
# Generic Query for any Controlled Vocabulary:
cv: ControlledVocabulary = graphene.Field(ControlledVocabulary, controlled_vocabulary_type_id=graphene.UUID(required=False),
base_entry_key=graphene.String(required=False),
**common_field_kwargs)
cvs: List[ControlledVocabulary] = FilteredConnectionField(ControlledVocabulary, sort=None, filter=graphene.Argument(CvInput))
#staticmethod
def resolve_with_filters(info: ResolveInfo, model: Type[SQLAlchemyObjectType], **kwargs):
query = model.get_query(info)
log.debug(kwargs)
for filter_name, filter_value in kwargs.items():
model_filter_column = getattr(model._meta.model, filter_name, None)
log.debug(type(filter_value))
if not model_filter_column:
continue
if isinstance(filter_value, SQLAlchemyInputObjectType):
log.debug(True)
filter_model = filter_value.sqla_model
q = FilteredConnectionField.get_query(filter_model, info, sort=None, **kwargs)
# noinspection PyArgumentList
query = query.filter(model_filter_column == q.filter_by(**filter_value))
log.info(query)
else:
query = query.filter(model_filter_column == filter_value)
return query
def resolve_tracked_entity(self, info: ResolveInfo, **kwargs):
entity: TrackedEntity = Query.resolve_with_filters(info=info, model=BaseTrackedEntity, **kwargs).one()
return entity
def resolve_tracked_entities(self, info, **kwargs):
query = Query.resolve_with_filters(info=info, model=BaseTrackedEntity, **kwargs)
tes: List[BaseTrackedEntity] = query.all()
return tes
def resolve_cv(self, info, **kwargs):
cv: List[BaseControlledVocabulary] = Query.resolve_with_filters(info=info, model=BaseControlledVocabulary, **kwargs).one()
log.info(cv)
return cv
def resolve_cvs(self, info, **kwargs):
cv: List[BaseControlledVocabulary] = Query.resolve_with_filters(info=info, model=BaseControlledVocabulary, **kwargs).all()
return cv
schema:
schema = Schema(query=Query, types=[*tracked_members, *cv_members])
I would like to be able to not extend Node with SQLAlchemyInterface and rather add Node back to the list of interfaces for TrackedEntity and ControlledVocabulary but be able to perform a query like this:
query queryTracked {
trackedEntities{
id
(other fields)
... on Request {
(request specific fields)
}
}
In Django I often assert the number of queries that should be made so that unit tests catch new N+1 query problems
from django import db
from django.conf import settings
settings.DEBUG=True
class SendData(TestCase):
def test_send(self):
db.connection.queries = []
event = Events.objects.all()[1:]
s = str(event) # QuerySet is lazy, force retrieval
self.assertEquals(len(db.connection.queries), 2)
In in SQLAlchemy tracing to STDOUT is enabled by setting the echo flag on
engine
engine.echo=True
What is the best way to write tests that count the number of queries made by SQLAlchemy?
class SendData(TestCase):
def test_send(self):
event = session.query(Events).first()
s = str(event)
self.assertEquals( ... , 2)
I've created a context manager class for this purpose:
class DBStatementCounter(object):
"""
Use as a context manager to count the number of execute()'s performed
against the given sqlalchemy connection.
Usage:
with DBStatementCounter(conn) as ctr:
conn.execute("SELECT 1")
conn.execute("SELECT 1")
assert ctr.get_count() == 2
"""
def __init__(self, conn):
self.conn = conn
self.count = 0
# Will have to rely on this since sqlalchemy 0.8 does not support
# removing event listeners
self.do_count = False
sqlalchemy.event.listen(conn, 'after_execute', self.callback)
def __enter__(self):
self.do_count = True
return self
def __exit__(self, *_):
self.do_count = False
def get_count(self):
return self.count
def callback(self, *_):
if self.do_count:
self.count += 1
Use SQLAlchemy Core Events to log/track queries executed (you can attach it from your unit tests so they don't impact your performance on the actual application:
event.listen(engine, "before_cursor_execute", catch_queries)
Now you write the function catch_queries, where the way depends on how you test. For example, you could define this function in your test statement:
def test_something(self):
stmts = []
def catch_queries(conn, cursor, statement, ...):
stmts.append(statement)
# Now attach it as a listener and work with the collected events after running your test
The above method is just an inspiration. For extended cases you'd probably like to have a global cache of events that you empty after each test. The reason is that prior to 0.9 (current dev) there is no API to remove event listeners. Thus make one global listener that accesses a global list.
what about the approach of using flask_sqlalchemy.get_debug_queries() btw. this is the methodology used by internal of Flask Debug Toolbar check its source
from flask_sqlalchemy import get_debug_queries
def test_list_with_assuring_queries_count(app, client):
with app.app_context():
# here generating some test data
for _ in range(10):
notebook = create_test_scheduled_notebook_based_on_notebook_file(
db.session, owner='testing_user',
schedule={"kind": SCHEDULE_FREQUENCY_DAILY}
)
for _ in range(100):
create_test_scheduled_notebook_run(db.session, notebook_id=notebook.id)
with app.app_context():
# after resetting the context call actual view we want asserNumOfQueries
client.get(url_for('notebooks.personal_notebooks'))
assert len(get_debug_queries()) == 3
keep in mind that for having reset context and count you have to call with app.app_context() before the exact stuff you want to measure.
Slightly modified version of #omar-tarabai's solution that removes the event listener when exiting the context:
from sqlalchemy import event
class QueryCounter(object):
"""Context manager to count SQLALchemy queries."""
def __init__(self, connection):
self.connection = connection.engine
self.count = 0
def __enter__(self):
event.listen(self.connection, "before_cursor_execute", self.callback)
return self
def __exit__(self, *args, **kwargs):
event.remove(self.connection, "before_cursor_execute", self.callback)
def callback(self, *args, **kwargs):
self.count += 1
Usage:
with QueryCounter(session.connection()) as counter:
session.query(XXX).all()
session.query(YYY).all()
print(counter.count) # 2
I am new to Scrapy, I had the spider code
class Example_spider(BaseSpider):
name = "example"
allowed_domains = ["www.example.com"]
def start_requests(self):
yield self.make_requests_from_url("http://www.example.com/bookstore/new")
def parse(self, response):
hxs = HtmlXPathSelector(response)
urls = hxs.select('//div[#class="bookListingBookTitle"]/a/#href').extract()
for i in urls:
yield Request(urljoin("http://www.example.com/", i[1:]), callback=self.parse_url)
def parse_url(self, response):
hxs = HtmlXPathSelector(response)
main = hxs.select('//div[#id="bookshelf-bg"]')
items = []
for i in main:
item = Exampleitem()
item['book_name'] = i.select('div[#class="slickwrap full"]/div[#id="bookstore_detail"]/div[#class="book_listing clearfix"]/div[#class="bookstore_right"]/div[#class="title_and_byline"]/p[#class="book_title"]/text()')[0].extract()
item['price'] = i.select('div[#id="book-sidebar-modules"]/div[#class="add_to_cart_wrapper slickshadow"]/div[#class="panes"]/div[#class="pane clearfix"]/div[#class="inner"]/div[#class="add_to_cart 0"]/form/div[#class="line-item"]/div[#class="line-item-price"]/text()').extract()
items.append(item)
return items
And pipeline code is:
class examplePipeline(object):
def __init__(self):
self.dbpool = adbapi.ConnectionPool('MySQLdb',
db='blurb',
user='root',
passwd='redhat',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, spider, item):
# run db query in thread pool
assert isinstance(item, Exampleitem)
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
print "db connected-=========>"
# create record if doesn't exist.
tx.execute("select * from example_book_store where book_name = %s", (item['book_name']) )
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute("""INSERT INTO example_book_store (book_name,price)
VALUES (%s,%s)""",
(item['book_name'],item['price'])
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
After running this I am getting the following error
exceptions.NameError: global name 'Exampleitem' is not defined
I got the above error when I added the below code in process_item method
assert isinstance(item, Exampleitem)
and without adding this line I am getting
**exceptions.TypeError: 'Example_spider' object is not subscriptable
Can anyone make this code run and make sure that all the items saved into database?
Try the following code in your pipeline
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MySQLStorePipeline(object):
def __init__(self):
self.conn = MySQLdb.connect('host', 'user', 'passwd',
'dbname', charset="utf8",
use_unicode=True)
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
try:
self.cursor.execute("""INSERT INTO example_book_store (book_name, price)
VALUES (%s, %s)""",
(item['book_name'].encode('utf-8'),
item['price'].encode('utf-8')))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item
Your process_item method should be declared as: def process_item(self, item, spider): instead of def process_item(self, spider, item): -> you switched the arguments around.
This exception: exceptions.NameError: global name 'Exampleitem' is not defined indicates you didn't import the Exampleitem in your pipeline.
Try adding: from myspiders.myitems import Exampleitem (with correct names/paths ofcourse).
I think this way is better and more concise:
#Item
class pictureItem(scrapy.Item):
topic_id=scrapy.Field()
url=scrapy.Field()
#SQL
self.save_picture="insert into picture(`url`,`id`) values(%(url)s,%(id)s);"
#usage
cur.execute(self.save_picture,dict(item))
It's just like
cur.execute("insert into picture(`url`,`id`) values(%(url)s,%(id)s)" % {"url":someurl,"id":1})
Cause (you can read more about Items in Scrapy)
The Field class is just an alias to the built-in dict class and doesn’t provide any extra functionality or attributes. In other words, Field objects are plain-old Python dicts.