Multiprocessing, sqlAlchemy and scoped_sessions - sqlalchemy

I want to run multiple strategies in concurrent processes. I came up with something like this
import logging
import multiprocessing
import os
from sqlalchemy.orm import scoped_session, Session
from pyutil.sql.interfaces.symbols.symbol import Symbol
from pyutil.sql.session import get_one_or_create
class StratRunner(object):
def __init__(self, session_scope, logger=None):
assert isinstance(session_scope, scoped_session)
self.__session_scope = session_scope
self.__logger = logger or logging.getLogger(__name__)
# this function is the target for mp.Process
def _run(self, strategy):
self.__logger.debug("Pid {pid}".format(pid=os.getpid()))
symbols = self.symbols
self.__logger.info("Run strategy {s}".format(s=strategy))
configuration = strategy.configuration()
strategy.upsert(portfolio=configuration.portfolio, symbols=symbols, days=5)
def run_strategies(self):
# loop over all active strategies!
jobs = []
# we are in the main thread here...
for s in self.active_strategies:
# what shall I give to the Process? The strategy object, the strategy_id, a session instance, the session_scope...
job = multiprocessing.Process(target=self._run, kwargs={"strategy": s})
job.name = s.name
jobs.append(job)
run_jobs(jobs, logger=self.__logger)
#property
def symbols(self):
return {s.name: s for s in self.__session_scope().query(Symbol)}
#property
def active_strategies(self):
return self.__session_scope().query(Strategy).filter(Strategy.active == True).all()
I am aware of tons of documentation on this project but I am overwhelmed.
I loop over the rows of a table (The active_strategies). class Strategies(Base)... . I then hand over the strategy object to the _run method and update the strategy object within the very same method. Please feel free to shred my code.
I am in particular puzzled about what to give to the _run method? Shall I hand over the strategy object, the strategy ID, the session, the scoped_session, ... ?
I have now created a runner object:
import abc
import logging
import os
from sqlalchemy.orm import sessionmaker
class Runner(object):
__metaclass__ = abc.ABCMeta
def __init__(self, engine, logger=None):
self.__engine = engine
self._logger = logger or logging.getLogger(__name__)
self.__jobs = []
#property
def _session(self):
""" Create a fresh new session... """
self.__engine.dispose()
factory = sessionmaker(self.__engine)
return factory()
def _run_jobs(self):
self._logger.debug("PID main {pid}".format(pid=os.getpid()))
for job in self.jobs:
# all jobs get the trigge
self._logger.info("Job {j}".format(j=job.name))
job.start()
for job in self.jobs:
self._logger.info("Wait for job {j}".format(j=job.name))
job.join()
self._logger.info("Job {j} done".format(j=job.name))
#property
def jobs(self):
return self.__jobs
#abc.abstractmethod
def run(self):
""" Described in the child class """
In particular this class can provide a fresh session (via ._session). However, using this setup I see plenty of :
psycopg2.OperationalError: server closed the connection unexpectedly
| This probably means the server terminated abnormally
| before or while processing the request.

Related

connection in a different thread can't read tables created in another thread?

In a testing suite I have a fixture that drop all the tables in an engine, then start fresh and create all the tables. After this fixture logic, my test case runs, using the newly created table.
The fixture and the test case are run in the MainThread, while the database consumer is a web application server run in another thread.
However, I keep getting: sqlite3.OperationalError: no such table: ***
I've checked that they are using the same in-memory engine, but different connections(this is correct). And I've checked that the fixture does run before the consumer thread starts running.
What could be possible cause?
My code is as below:
import os
import pytest
import cherrypy
class DAL:
def __init__(self,
path="database",
filename=None,
conn_string=None,
echo=False):
if filename is None and conn_string is None:
conn_string = "sqlite:///:memory:"
elif conn_string is not None:
conn_string = conn_string
else:
conn_string = f'sqlite:///{os.path.abspath(path)}/{filename}'
self.conn_string = conn_string
engine = create_engine(conn_string, echo=echo)
Session_Factory = sessionmaker(bind=engine)
self.Session = sqlalchemy.orm.scoped_session(Session_Factory)
def __str__(self):
return f"<DAL>object: {self.conn_string}, at {hex(id(self))}"
def get_a_dbsession(self):
opened_db = self.Session()
return opened_db
def __enter__(self):
return self.get_a_dbsession()
def __exit__(self, exception_type, exception_value, exception_traceback):
opened_db = self.Session()
try:
opened_db.commit()
except:
opened_db.rollback()
raise
finally:
self.Session.remove()
else:
opened_db.close()
raise exception_type
def create_schema(self):
SchemaBase.metadata.create_all(self.Session().connection().engine)
class SAEnginePlugin(cherrypy.process.plugins.SimplePlugin):
def __init__(self, bus, dal):
"""
The plugin is registered to the CherryPy engine.
"""
cherrypy.process.plugins.SimplePlugin.__init__(self, bus)
self.dal = dal
def start(self):
self.bus.subscribe("bind-session", self.bind)
def stop(self):
self.bus.unsubscribe("bind-session", self.bind)
if self.dal:
del self.dal
def bind(self):
"""
Whenever this plugin receives the 'bind-session' message, it applies
this method and bind the received session to the engine.
"""
# self.dal.Session.configure(bind=self.dal.engine)
session = self.dal.get_a_dbsession()
return session
class SATool(cherrypy.Tool):
def __init__(self):
"""
This tool binds a session to the engine each time
a requests starts and commits/rollbacks whenever
the request terminates.
"""
cherrypy.Tool.__init__(self,
'on_start_resource',
self.bind_session,
priority=20)
def _setup(self):
cherrypy.Tool._setup(self)
cherrypy.request.hooks.attach('on_end_resource',
self.close_session,
priority=80)
def bind_session(self):
"""
Attaches a session to the request's scope by requesting
the SA plugin to bind a session to the SA engine.
"""
session = cherrypy.engine.publish('bind-session').pop()
cherrypy.request.db = session
def close_session(self):
"""
Commits the current transaction or rollbacks if an error occurs.
In all cases, the current session is unbound and therefore
not usable any longer.
"""
if not hasattr(cherrypy.request, 'db'):
return
try:
cherrypy.request.db.commit()
except:
cherrypy.request.db.rollback()
raise
finally:
cherrypy.request.db.close()
cherrypy.request.db = None
# Register the SQLAlchemy tool
cherrypy.tools.db = SATool()
class UnitServer:
...
#cherrypy.expose
#cherrypy.tools.json_in()
def list_filtered_entries(self):
...
queryOBJ = cherrypy.request.db.query(classmodel_obj)
...
############# main module code below ############:
# mocking 'db':
dal = database.DAL()
# configure cherrypy:
SAEnginePlugin(cherrypy.engine, dal).subscribe()
#pytest.fixture(autouse=True) # automatically run before every test case
def mocked_dal(request):
# first, clean the database by dropping all tables:
database.SchemaBase.metadata.drop_all(dal.Session().connection().engine)
# second, create the schema from blank:
dal.create_schema()
# third, insert some dummy data record:
...
db.commit()
class TestMyUnitServer(cherrypy.test.helper.CPWebCase):
#staticmethod
def setup_server():
...
server_app = UnitServer()
cherrypy.tree.mount(server_app, '', {'/': {'tools.db.on': True}})
def test_list_filtered_entries_allentries(self):
...
self.getPage('/list_filtered_entries',
headers=[("Accept", "application/json"),
('Content-type', 'application/json'),
('Content-Length',
str(len(json.dumps(query_params)))),
("Connection", "keep-alive"),
("Cache-Control", "max-age=0")],
body=serialized_query_params,
method="POST")
self.assertStatus('200 OK')

FastAPI unittesting not overriding get_db

I'm just trying to get FastAPI unittests working with SQLAlchemy, but I'm having trouble testing with objects created in the database. I have the following setup, as per the docs.
main.py
routes = [
APIRoute('/games/', views.games_list, name='index', response_class=HTMLResponse),
]
settings = Settings()
app = FastAPI(debug=settings.debug, routes=routes)
views.py
# Dependency
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
async def games_list(request: Request, db: Session = Depends(get_db)):
settings = Settings()
games = db.query(Game).all()
return settings.templates.TemplateResponse('games/list.jinja', {'request': request, 'games': games})
database.py
def prepare_database(settings):
engine = create_engine(settings.database_url)
Base.metadata.create_all(engine)
return engine
engine = prepare_database(delete_existing=False, settings=Settings())
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
conftest.py
#pytest.fixture
def db_conn():
session = SessionLocalTesting()
try:
yield session
finally:
session.close()
#pytest.fixture
def cli(db_conn):
def override_get_db():
session = SessionLocalTesting()
try:
yield session
finally:
session.close()
app.dependency_overrides[get_db] = override_get_db
with TestClient(app) as client:
yield client
test file
def test_games_list(cli, factory, db_conn):
factory.create_game()
# This will return the game I have created with my factory, definitely in the test db.
print(db_conn.query(Game.name).all())
r = cli.get('/games/')
assert 'DnD Game' in r.content.decode()
My issue is that I can't get the objects from the test db in view. If I print(db.bind.database.url ) in views the test database is not being used, so it's trying to get items from the real database, not the test one.
So it looks like get_db is not getting overridden, though I'm not sure why.
Thanks in advance
I think you forgot to specify the scope of the fixture when you should call a fixture
Create a DB connection and override DB dependency through #pytest.fixture there is a kind of scope available in pytest.fixture(scope='session') there module level as well and other lots off scope available in Type of scope
I have mentioned a link which helps you Use test DB intend of real
import pytest
from fastapi.testclient import TestClient
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy_utils import create_database, drop_database
from app.main import app
from db.base import Base
from db.settings import get_database
from settings.config import (
TEST_DATABASE_URI as SQLALCHEMY_DATABASE_URL,
)
# SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(SQLALCHEMY_DATABASE_URL, pool_pre_ping=True)
def override_get_db():
"""" Override """
try:
db = Session(autocommit=False, autoflush=False, bind=engine)
yield db
finally:
db.close()
#pytest.fixture(scope="session", autouse=True)
def create_db():
""" creating db model in database """
create_database(SQLALCHEMY_DATABASE_URL)
print("\n" + "\x1b[6;30;42m" + "Creating test database." + "\x1b[0m")
Base.metadata.create_all(bind=engine)
app.dependency_overrides[get_database] = override_get_db
yield 1
drop_database(SQLALCHEMY_DATABASE_URL)
print("\n" + "\x1b[6;30;42m" + "Delete test database." + "\x1b[0m")
#pytest.fixture()
def get_db_session():
""" Getting session for db transaction """
session = Session(autocommit=False, autoflush=False, bind=engine)
yield session
session.close()
#pytest.fixture()
def client():
""" Getting testclient of app """
with TestClient(app) as client:
yield client
If you still face the problem or not let me know

How do I write SQLAlchemy test fixtures for FastAPI applications

I am writing a FastAPI application that uses a SQLAlchemy database. I have copied the example from the FastAPI documentation, simplifying the database schema for concisions' sake. The complete source is at the bottom of this post.
This works. I can run it with uvicorn sql_app.main:app and interact with the database via the Swagger docs. When it runs it creates a test.db in the working directory.
Now I want to add a unit test. Something like this.
from fastapi import status
from fastapi.testclient import TestClient
from pytest import fixture
from main import app
#fixture
def client() -> TestClient:
return TestClient(app)
def test_fast_sql(client: TestClient):
response = client.get("/users/")
assert response.status_code == status.HTTP_200_OK
assert response.json() == []
Using the source code below, this takes the test.db in the working directory as the database. Instead I want to create a new database for every unit test that is deleted at the end of the test.
I could put the global database.engine and database.SessionLocal inside an object that is created at runtime, like so:
class UserDatabase:
def __init__(self, directory: Path):
directory.mkdir(exist_ok=True, parents=True)
sqlalchemy_database_url = f"sqlite:///{directory}/store.db"
self.engine = create_engine(
sqlalchemy_database_url, connect_args={"check_same_thread": False}
)
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
models.Base.metadata.create_all(bind=self.engine)
but I don't know how to make that work with main.get_db, since the Depends(get_db) logic ultimately assumes database.engine and database.SessionLocal are available globally.
I'm used to working with Flask, whose unit testing facilities handle all this for you. I don't know how to write it myself. Can someone show me the minimal changes I'd have to make in order to generate a new database for each unit test in this framework?
The complete source of the simplified FastAPI/SQLAlchemy app is as follows.
database.py
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
models.py
from sqlalchemy import Column, Integer, String
from database import Base
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True, index=True)
name = Column(String)
age = Column(Integer)
schemas.py
from pydantic import BaseModel
class UserBase(BaseModel):
name: str
age: int
class UserCreate(UserBase):
pass
class User(UserBase):
id: int
class Config:
orm_mode = True
crud.py
from sqlalchemy.orm import Session
import schemas
import models
def get_user(db: Session, user_id: int):
return db.query(models.User).filter(models.User.id == user_id).first()
def get_users(db: Session, skip: int = 0, limit: int = 100):
return db.query(models.User).offset(skip).limit(limit).all()
def create_user(db: Session, user: schemas.UserCreate):
db_user = models.User(name=user.name, age=user.age)
db.add(db_user)
db.commit()
db.refresh(db_user)
return db_user
main.py
from typing import List
from fastapi import Depends, FastAPI, HTTPException
from sqlalchemy.orm import Session
import schemas
import models
import crud
from database import SessionLocal, engine
models.Base.metadata.create_all(bind=engine)
app = FastAPI()
# Dependency
def get_db():
try:
db = SessionLocal()
yield db
finally:
db.close()
#app.post("/users/", response_model=schemas.User)
def create_user(user: schemas.UserCreate, db: Session = Depends(get_db)):
return crud.create_user(db=db, user=user)
#app.get("/users/", response_model=List[schemas.User])
def read_users(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
users = crud.get_users(db, skip=skip, limit=limit)
return users
#app.get("/users/{user_id}", response_model=schemas.User)
def read_user(user_id: int, db: Session = Depends(get_db)):
db_user = crud.get_user(db, user_id=user_id)
if db_user is None:
raise HTTPException(status_code=404, detail="User not found")
return db_user
You need to override your get_db dependency in your tests, see these docs.
Something like this for your fixture:
#fixture
def db_fixture() -> Session:
raise NotImplementError() # Make this return your temporary session
#fixture
def client(db_fixture) -> TestClient:
def _get_db_override():
return db_fixture
app.dependency_overrides[get_db] = _get_db_override
return TestClient(app)

Scrapy / Pipeline not inserting data to MySQL database

I'm making a pipeline in scrapy to store scraped data in a mysql database. When the spider is run in terminal it works perfectly. Even the pipeline is opened. However the data is not being sent to the database. Any help appreciated! :)
here's the pipeline code:
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
from tutorial.items import TutorialItem
class MySQLTest(object):
def __init__(self):
db = MySQLdb.connect(user='root', passwd='', host='localhost', db='python')
cursor = db.cursor()
def process_item(self, spider, item):
try:
cursor.execute("INSERT INTO info (venue, datez) VALUES (%s, %s)", (item['artist'], item['date']))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item
and heres the spider code
import scrapy # Import required libraries.
from scrapy.selector import HtmlXPathSelector # Allows for path detection in a websites code.
from scrapy.spider import BaseSpider # Used to create a simple spider to extract data.
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor # Needed for the extraction of href links in HTML to crawl further pages.
from scrapy.contrib.spiders import CrawlSpider # Needed to make the crawl spider.
from scrapy.contrib.spiders import Rule # Allows specified rules to affect what the link
import spotipy
import soundcloud
import mysql.connector
from tutorial.items import TutorialItem
class AllGigsSpider(CrawlSpider):
name = "allGigs" # Name of the Spider. In command promt, when in the correct folder, enter "scrapy crawl Allgigs".
allowed_domains = ["www.allgigs.co.uk"] # Allowed domains is a String NOT a URL.
start_urls = [
"http://www.allgigs.co.uk/whats_on/London/clubbing-1.html",
"http://www.allgigs.co.uk/whats_on/London/festivals-1.html",
"http://www.allgigs.co.uk/whats_on/London/comedy-1.html",
"http://www.allgigs.co.uk/whats_on/London/theatre_and_opera-1.html",
"http://www.allgigs.co.uk/whats_on/London/dance_and_ballet-1.html"
] # Specify the starting points for the web crawler.
rules = [
Rule(SgmlLinkExtractor(restrict_xpaths='//div[#class="more"]'), # Search the start URL's for
callback="parse_me",
follow=True),
]
def parse_me(self, response):
for info in response.xpath('//div[#class="entry vevent"]|//div[#class="resultbox"]'):
item = TutorialItem() # Extract items from the items folder.
item ['artist'] = info.xpath('.//span[#class="summary"]//text()').extract() # Extract artist information.
item ['date'] = info.xpath('.//span[#class="dates"]//text()').extract() # Extract date information.
#item ['endDate'] = info.xpath('.//abbr[#class="dtend"]//text()').extract() # Extract end date information.
#item ['startDate'] = info.xpath('.//abbr[#class="dtstart"]//text()').extract() # Extract start date information.
item ['genre'] = info.xpath('.//div[#class="header"]//text()').extract()
yield item # Retreive items in item.
client = soundcloud.Client(client_id='401c04a7271e93baee8633483510e263')
tracks = client.get('/tracks', limit=1, license='cc-by-sa', q= item['artist'])
for track in tracks:
print(tracks)
I believe the problem was in my settings.py file where i had missed a comma... yawn.
ITEM_PIPELINES = {
'tutorial.pipelines.MySQLTest': 300,
}

How to count sqlalchemy queries in unit tests

In Django I often assert the number of queries that should be made so that unit tests catch new N+1 query problems
from django import db
from django.conf import settings
settings.DEBUG=True
class SendData(TestCase):
def test_send(self):
db.connection.queries = []
event = Events.objects.all()[1:]
s = str(event) # QuerySet is lazy, force retrieval
self.assertEquals(len(db.connection.queries), 2)
In in SQLAlchemy tracing to STDOUT is enabled by setting the echo flag on
engine
engine.echo=True
What is the best way to write tests that count the number of queries made by SQLAlchemy?
class SendData(TestCase):
def test_send(self):
event = session.query(Events).first()
s = str(event)
self.assertEquals( ... , 2)
I've created a context manager class for this purpose:
class DBStatementCounter(object):
"""
Use as a context manager to count the number of execute()'s performed
against the given sqlalchemy connection.
Usage:
with DBStatementCounter(conn) as ctr:
conn.execute("SELECT 1")
conn.execute("SELECT 1")
assert ctr.get_count() == 2
"""
def __init__(self, conn):
self.conn = conn
self.count = 0
# Will have to rely on this since sqlalchemy 0.8 does not support
# removing event listeners
self.do_count = False
sqlalchemy.event.listen(conn, 'after_execute', self.callback)
def __enter__(self):
self.do_count = True
return self
def __exit__(self, *_):
self.do_count = False
def get_count(self):
return self.count
def callback(self, *_):
if self.do_count:
self.count += 1
Use SQLAlchemy Core Events to log/track queries executed (you can attach it from your unit tests so they don't impact your performance on the actual application:
event.listen(engine, "before_cursor_execute", catch_queries)
Now you write the function catch_queries, where the way depends on how you test. For example, you could define this function in your test statement:
def test_something(self):
stmts = []
def catch_queries(conn, cursor, statement, ...):
stmts.append(statement)
# Now attach it as a listener and work with the collected events after running your test
The above method is just an inspiration. For extended cases you'd probably like to have a global cache of events that you empty after each test. The reason is that prior to 0.9 (current dev) there is no API to remove event listeners. Thus make one global listener that accesses a global list.
what about the approach of using flask_sqlalchemy.get_debug_queries() btw. this is the methodology used by internal of Flask Debug Toolbar check its source
from flask_sqlalchemy import get_debug_queries
def test_list_with_assuring_queries_count(app, client):
with app.app_context():
# here generating some test data
for _ in range(10):
notebook = create_test_scheduled_notebook_based_on_notebook_file(
db.session, owner='testing_user',
schedule={"kind": SCHEDULE_FREQUENCY_DAILY}
)
for _ in range(100):
create_test_scheduled_notebook_run(db.session, notebook_id=notebook.id)
with app.app_context():
# after resetting the context call actual view we want asserNumOfQueries
client.get(url_for('notebooks.personal_notebooks'))
assert len(get_debug_queries()) == 3
keep in mind that for having reset context and count you have to call with app.app_context() before the exact stuff you want to measure.
Slightly modified version of #omar-tarabai's solution that removes the event listener when exiting the context:
from sqlalchemy import event
class QueryCounter(object):
"""Context manager to count SQLALchemy queries."""
def __init__(self, connection):
self.connection = connection.engine
self.count = 0
def __enter__(self):
event.listen(self.connection, "before_cursor_execute", self.callback)
return self
def __exit__(self, *args, **kwargs):
event.remove(self.connection, "before_cursor_execute", self.callback)
def callback(self, *args, **kwargs):
self.count += 1
Usage:
with QueryCounter(session.connection()) as counter:
session.query(XXX).all()
session.query(YYY).all()
print(counter.count) # 2