How to count sqlalchemy queries in unit tests - sqlalchemy

In Django I often assert the number of queries that should be made so that unit tests catch new N+1 query problems
from django import db
from django.conf import settings
settings.DEBUG=True
class SendData(TestCase):
def test_send(self):
db.connection.queries = []
event = Events.objects.all()[1:]
s = str(event) # QuerySet is lazy, force retrieval
self.assertEquals(len(db.connection.queries), 2)
In in SQLAlchemy tracing to STDOUT is enabled by setting the echo flag on
engine
engine.echo=True
What is the best way to write tests that count the number of queries made by SQLAlchemy?
class SendData(TestCase):
def test_send(self):
event = session.query(Events).first()
s = str(event)
self.assertEquals( ... , 2)

I've created a context manager class for this purpose:
class DBStatementCounter(object):
"""
Use as a context manager to count the number of execute()'s performed
against the given sqlalchemy connection.
Usage:
with DBStatementCounter(conn) as ctr:
conn.execute("SELECT 1")
conn.execute("SELECT 1")
assert ctr.get_count() == 2
"""
def __init__(self, conn):
self.conn = conn
self.count = 0
# Will have to rely on this since sqlalchemy 0.8 does not support
# removing event listeners
self.do_count = False
sqlalchemy.event.listen(conn, 'after_execute', self.callback)
def __enter__(self):
self.do_count = True
return self
def __exit__(self, *_):
self.do_count = False
def get_count(self):
return self.count
def callback(self, *_):
if self.do_count:
self.count += 1

Use SQLAlchemy Core Events to log/track queries executed (you can attach it from your unit tests so they don't impact your performance on the actual application:
event.listen(engine, "before_cursor_execute", catch_queries)
Now you write the function catch_queries, where the way depends on how you test. For example, you could define this function in your test statement:
def test_something(self):
stmts = []
def catch_queries(conn, cursor, statement, ...):
stmts.append(statement)
# Now attach it as a listener and work with the collected events after running your test
The above method is just an inspiration. For extended cases you'd probably like to have a global cache of events that you empty after each test. The reason is that prior to 0.9 (current dev) there is no API to remove event listeners. Thus make one global listener that accesses a global list.

what about the approach of using flask_sqlalchemy.get_debug_queries() btw. this is the methodology used by internal of Flask Debug Toolbar check its source
from flask_sqlalchemy import get_debug_queries
def test_list_with_assuring_queries_count(app, client):
with app.app_context():
# here generating some test data
for _ in range(10):
notebook = create_test_scheduled_notebook_based_on_notebook_file(
db.session, owner='testing_user',
schedule={"kind": SCHEDULE_FREQUENCY_DAILY}
)
for _ in range(100):
create_test_scheduled_notebook_run(db.session, notebook_id=notebook.id)
with app.app_context():
# after resetting the context call actual view we want asserNumOfQueries
client.get(url_for('notebooks.personal_notebooks'))
assert len(get_debug_queries()) == 3
keep in mind that for having reset context and count you have to call with app.app_context() before the exact stuff you want to measure.

Slightly modified version of #omar-tarabai's solution that removes the event listener when exiting the context:
from sqlalchemy import event
class QueryCounter(object):
"""Context manager to count SQLALchemy queries."""
def __init__(self, connection):
self.connection = connection.engine
self.count = 0
def __enter__(self):
event.listen(self.connection, "before_cursor_execute", self.callback)
return self
def __exit__(self, *args, **kwargs):
event.remove(self.connection, "before_cursor_execute", self.callback)
def callback(self, *args, **kwargs):
self.count += 1
Usage:
with QueryCounter(session.connection()) as counter:
session.query(XXX).all()
session.query(YYY).all()
print(counter.count) # 2

Related

Problem with PettingZoo and Stable-Baselines3 with a ParallelEnv

I am having trouble in making things work with a Custom ParallelEnv I wrote by using PettingZoo. I am using SuperSuit's ss.pettingzoo_env_to_vec_env_v1(env) as a wrapper to Vectorize the environment and make it work with Stable-Baseline3 and documented here.
You can find attached a summary of the most relevant part of the code:
from typing import Optional
from gym import spaces
import random
import numpy as np
from pettingzoo import ParallelEnv
from pettingzoo.utils.conversions import parallel_wrapper_fn
import supersuit as ss
from gym.utils import EzPickle, seeding
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
petting_zoo = env
class parallel_env(ParallelEnv, EzPickle):
metadata = {'render_modes': ['ansi'], "name": "PlayerEnv-Multi-v0"}
def __init__(self, n_agents: int = 20, new_step_api: bool = True) -> None:
EzPickle.__init__(
self,
n_agents,
new_step_api
)
self._episode_ended = False
self.n_agents = n_agents
self.possible_agents = [
f"player_{idx}" for idx in range(n_agents)]
self.agents = self.possible_agents[:]
self.agent_name_mapping = dict(
zip(self.possible_agents, list(range(len(self.possible_agents))))
)
self.observation_spaces = spaces.Dict(
{agent: spaces.Box(shape=(len(self.agents),),
dtype=np.float64, low=0.0, high=1.0) for agent in self.possible_agents}
)
self.action_spaces = spaces.Dict(
{agent: spaces.Discrete(4) for agent in self.possible_agents}
)
self.current_step = 0
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
def observation_space(self, agent):
return self.observation_spaces[agent]
def action_space(self, agent):
return self.action_spaces[agent]
def __calculate_observation(self, agent_id: int) -> np.ndarray:
return self.observation_space(agent_id).sample()
def __calculate_observations(self) -> np.ndarray:
observations = {
agent: self.__calculate_observation(
agent_id=agent)
for agent in self.agents
}
return observations
def observe(self, agent):
return self.__calculate_observation(agent_id=agent)
def step(self, actions):
if self._episode_ended:
return self.reset()
observations = self.__calculate_observations()
rewards = random.sample(range(100), self.n_agents)
self.current_step += 1
self._episode_ended = self.current_step >= 100
infos = {agent: {} for agent in self.agents}
dones = {agent: self._episode_ended for agent in self.agents}
rewards = {
self.agents[i]: rewards[i]
for i in range(len(self.agents))
}
if self._episode_ended:
self.agents = {} # To satisfy `set(par_env.agents) == live_agents`
return observations, rewards, dones, infos
def reset(self,
seed: Optional[int] = None,
return_info: bool = False,
options: Optional[dict] = None,):
self.agents = self.possible_agents[:]
self._episode_ended = False
self.current_step = 0
observations = self.__calculate_observations()
return observations
def render(self, mode="human"):
# TODO: IMPLEMENT
print("TO BE IMPLEMENTED")
def close(self):
pass
Unfortunately when I try to test with the following main procedure:
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.env_checker import check_env
from dummy_env import dummy
from pettingzoo.test import parallel_api_test
if __name__ == '__main__':
# Testing the parallel algorithm alone
env_parallel = dummy.parallel_env()
parallel_api_test(env_parallel) # This works!
# Testing the environment with the wrapper
env = dummy.petting_zoo()
# ERROR: AssertionError: The observation returned by the `reset()` method does not match the given observation space
check_env(env)
# Model initialization
model = PPO("MlpPolicy", env, verbose=1)
# ERROR: ValueError: could not broadcast input array from shape (20,20) into shape (20,)
model.learn(total_timesteps=10_000)
I get the following error:
AssertionError: The observation returned by the `reset()` method does not match the given observation space
If I skip check_env() I get the following one:
ValueError: could not broadcast input array from shape (20,20) into shape (20,)
It seems like that ss.pettingzoo_env_to_vec_env_v1(env) is capable of splitting the parallel environment in multiple vectorized ones, but not for the reset() function.
Does anyone know how to fix this problem?
Plese find the Github Repository to reproduce the problem.
You should double check the reset() function in PettingZoo. It will return None instead of an observation like GYM
Thanks to discussion I had in the issue section of the SuperSuit repository, I am able to post the solution to the problem. Thanks to jjshoots!
First of all it is necessary to have the latest SuperSuit version. In order to get that I needed to install Stable-Baseline3 using the instructions here to make it work with gym 0.24+.
After that, taking the code in the question as example, it is necessary to substitute
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
with
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
env_ = ss.concat_vec_envs_v1(env_, 1, base_class="stable_baselines3")
return env_
The outcomes are:
Outcome 1: leaving the line with check_env(env) I got an error AssertionError: Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py
Outcome 2: removing the line with check_env(env), the agent starts training successfully!
In the end, I think that the argument base_class="stable_baselines3" made the difference.
Only the small problem on check_env remains to be reported, but I think it can be considered as trivial if the training works.

connection in a different thread can't read tables created in another thread?

In a testing suite I have a fixture that drop all the tables in an engine, then start fresh and create all the tables. After this fixture logic, my test case runs, using the newly created table.
The fixture and the test case are run in the MainThread, while the database consumer is a web application server run in another thread.
However, I keep getting: sqlite3.OperationalError: no such table: ***
I've checked that they are using the same in-memory engine, but different connections(this is correct). And I've checked that the fixture does run before the consumer thread starts running.
What could be possible cause?
My code is as below:
import os
import pytest
import cherrypy
class DAL:
def __init__(self,
path="database",
filename=None,
conn_string=None,
echo=False):
if filename is None and conn_string is None:
conn_string = "sqlite:///:memory:"
elif conn_string is not None:
conn_string = conn_string
else:
conn_string = f'sqlite:///{os.path.abspath(path)}/{filename}'
self.conn_string = conn_string
engine = create_engine(conn_string, echo=echo)
Session_Factory = sessionmaker(bind=engine)
self.Session = sqlalchemy.orm.scoped_session(Session_Factory)
def __str__(self):
return f"<DAL>object: {self.conn_string}, at {hex(id(self))}"
def get_a_dbsession(self):
opened_db = self.Session()
return opened_db
def __enter__(self):
return self.get_a_dbsession()
def __exit__(self, exception_type, exception_value, exception_traceback):
opened_db = self.Session()
try:
opened_db.commit()
except:
opened_db.rollback()
raise
finally:
self.Session.remove()
else:
opened_db.close()
raise exception_type
def create_schema(self):
SchemaBase.metadata.create_all(self.Session().connection().engine)
class SAEnginePlugin(cherrypy.process.plugins.SimplePlugin):
def __init__(self, bus, dal):
"""
The plugin is registered to the CherryPy engine.
"""
cherrypy.process.plugins.SimplePlugin.__init__(self, bus)
self.dal = dal
def start(self):
self.bus.subscribe("bind-session", self.bind)
def stop(self):
self.bus.unsubscribe("bind-session", self.bind)
if self.dal:
del self.dal
def bind(self):
"""
Whenever this plugin receives the 'bind-session' message, it applies
this method and bind the received session to the engine.
"""
# self.dal.Session.configure(bind=self.dal.engine)
session = self.dal.get_a_dbsession()
return session
class SATool(cherrypy.Tool):
def __init__(self):
"""
This tool binds a session to the engine each time
a requests starts and commits/rollbacks whenever
the request terminates.
"""
cherrypy.Tool.__init__(self,
'on_start_resource',
self.bind_session,
priority=20)
def _setup(self):
cherrypy.Tool._setup(self)
cherrypy.request.hooks.attach('on_end_resource',
self.close_session,
priority=80)
def bind_session(self):
"""
Attaches a session to the request's scope by requesting
the SA plugin to bind a session to the SA engine.
"""
session = cherrypy.engine.publish('bind-session').pop()
cherrypy.request.db = session
def close_session(self):
"""
Commits the current transaction or rollbacks if an error occurs.
In all cases, the current session is unbound and therefore
not usable any longer.
"""
if not hasattr(cherrypy.request, 'db'):
return
try:
cherrypy.request.db.commit()
except:
cherrypy.request.db.rollback()
raise
finally:
cherrypy.request.db.close()
cherrypy.request.db = None
# Register the SQLAlchemy tool
cherrypy.tools.db = SATool()
class UnitServer:
...
#cherrypy.expose
#cherrypy.tools.json_in()
def list_filtered_entries(self):
...
queryOBJ = cherrypy.request.db.query(classmodel_obj)
...
############# main module code below ############:
# mocking 'db':
dal = database.DAL()
# configure cherrypy:
SAEnginePlugin(cherrypy.engine, dal).subscribe()
#pytest.fixture(autouse=True) # automatically run before every test case
def mocked_dal(request):
# first, clean the database by dropping all tables:
database.SchemaBase.metadata.drop_all(dal.Session().connection().engine)
# second, create the schema from blank:
dal.create_schema()
# third, insert some dummy data record:
...
db.commit()
class TestMyUnitServer(cherrypy.test.helper.CPWebCase):
#staticmethod
def setup_server():
...
server_app = UnitServer()
cherrypy.tree.mount(server_app, '', {'/': {'tools.db.on': True}})
def test_list_filtered_entries_allentries(self):
...
self.getPage('/list_filtered_entries',
headers=[("Accept", "application/json"),
('Content-type', 'application/json'),
('Content-Length',
str(len(json.dumps(query_params)))),
("Connection", "keep-alive"),
("Cache-Control", "max-age=0")],
body=serialized_query_params,
method="POST")
self.assertStatus('200 OK')

Multiprocessing, sqlAlchemy and scoped_sessions

I want to run multiple strategies in concurrent processes. I came up with something like this
import logging
import multiprocessing
import os
from sqlalchemy.orm import scoped_session, Session
from pyutil.sql.interfaces.symbols.symbol import Symbol
from pyutil.sql.session import get_one_or_create
class StratRunner(object):
def __init__(self, session_scope, logger=None):
assert isinstance(session_scope, scoped_session)
self.__session_scope = session_scope
self.__logger = logger or logging.getLogger(__name__)
# this function is the target for mp.Process
def _run(self, strategy):
self.__logger.debug("Pid {pid}".format(pid=os.getpid()))
symbols = self.symbols
self.__logger.info("Run strategy {s}".format(s=strategy))
configuration = strategy.configuration()
strategy.upsert(portfolio=configuration.portfolio, symbols=symbols, days=5)
def run_strategies(self):
# loop over all active strategies!
jobs = []
# we are in the main thread here...
for s in self.active_strategies:
# what shall I give to the Process? The strategy object, the strategy_id, a session instance, the session_scope...
job = multiprocessing.Process(target=self._run, kwargs={"strategy": s})
job.name = s.name
jobs.append(job)
run_jobs(jobs, logger=self.__logger)
#property
def symbols(self):
return {s.name: s for s in self.__session_scope().query(Symbol)}
#property
def active_strategies(self):
return self.__session_scope().query(Strategy).filter(Strategy.active == True).all()
I am aware of tons of documentation on this project but I am overwhelmed.
I loop over the rows of a table (The active_strategies). class Strategies(Base)... . I then hand over the strategy object to the _run method and update the strategy object within the very same method. Please feel free to shred my code.
I am in particular puzzled about what to give to the _run method? Shall I hand over the strategy object, the strategy ID, the session, the scoped_session, ... ?
I have now created a runner object:
import abc
import logging
import os
from sqlalchemy.orm import sessionmaker
class Runner(object):
__metaclass__ = abc.ABCMeta
def __init__(self, engine, logger=None):
self.__engine = engine
self._logger = logger or logging.getLogger(__name__)
self.__jobs = []
#property
def _session(self):
""" Create a fresh new session... """
self.__engine.dispose()
factory = sessionmaker(self.__engine)
return factory()
def _run_jobs(self):
self._logger.debug("PID main {pid}".format(pid=os.getpid()))
for job in self.jobs:
# all jobs get the trigge
self._logger.info("Job {j}".format(j=job.name))
job.start()
for job in self.jobs:
self._logger.info("Wait for job {j}".format(j=job.name))
job.join()
self._logger.info("Job {j} done".format(j=job.name))
#property
def jobs(self):
return self.__jobs
#abc.abstractmethod
def run(self):
""" Described in the child class """
In particular this class can provide a fresh session (via ._session). However, using this setup I see plenty of :
psycopg2.OperationalError: server closed the connection unexpectedly
| This probably means the server terminated abnormally
| before or while processing the request.

Working with coroutines in Python Tornado Web Server

I am working on an autonomous car implementation for a web browser game with Python 2x. I use Tornado Web Server to run game on localhost and I post and receive data from game with JSON data format in the function called "FrameHandler" and also I determine what the act of car should be in "to_dict_faster()" function.
Here, my problem is that I can write data to text file which is hold in speed_data variable in specific time interval with help of a coroutine. However, I can't dump JSON data to function in this specific time interval because "FrameHandler" acts like While True and it always requests data to dump. What I am trying to do is sending desired acts as writing text file in specific time interval while not changing flow frame handler because it affects FPS of the game.
I am trying to figure out How can I do that for a long time any help would be great here:
#gen.coroutine
def sampler():
io_loop = tornado.ioloop.IOLoop.current()
start = time.time()
while True:
with open("Sampled_Speed.txt", "a") as text_file:
text_file.write("%d,%.2f\n" % (speed_data, ((time.time() - start))))
yield gen.Task(io_loop.add_timeout, io_loop.time() + period)
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.redirect("/static/v2.curves.html")
class FrameHandler(tornado.web.RequestHandler):
def post(self):
global speed_data
data = json.loads(self.get_arguments("telemetry")[0])
ar = np.fromstring(base64.decodestring(self.request.body), dtype=np.uint8)
image = ar.reshape(hp.INPUT_SIZE, hp.INPUT_SIZE, hp.NUM_CHANNELS)
left, right, faster, slower = data["action"]
terminal, action, all_data, was_start = (
data["terminal"],
Action(left=left, right=right, faster=faster, slower=slower),
data["all_data"],
data["was_start"]
)
for i in range(len(all_data)):
data_dict=all_data[i]
speed_data = data_dict[u'speed']
position_data=data_dict[u'position']
result_action = agent.steps(image, 0.1, terminal, was_start, action, all_data)
if speed_data < 4000:
self.write(json.dumps(result_action.to_dict_faster()))
else:
self.write(json.dumps(result_action.to_dict_constant()))
def make_app():
return tornado.web.Application([
(r"/", MainHandler),
(r"/frame", FrameHandler),
(r"/static/(.*)", tornado.web.StaticFileHandler, {"path": static_path})
], debug=True)
if __name__ == "__main__":
app = make_app()
if "SERVER_PORT" in os.environ:
port = int(os.environ["SERVER_PORT"])
else:
port = 8880
print "LISTENING ON PORT: %d" % port
app.listen(port)
tornado.ioloop.IOLoop.current().run_sync(sampler)
tornado.ioloop.IOLoop.current().start()
You can move file writing to a different thread (using tornado's run_on_executor for example), so python interpreter will automatically switch from Sampler to main thread with FrameHandler on write. But you have to use thread-safe speed_data variable, I've used stdlib Queue.Queue as an example:
class Handler(tornado.web.RequestHandler):
#gen.coroutine
def get(self):
global speed_data
speed_data.put("REALLY BIG TEST DATA\n")
self.finish("OK")
class Sampler():
executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
def __init__(self, queue):
self._q = queue
#run_on_executor
def write_sample(self):
with open("foobar.txt", "w") as f:
while True:
data = self._q.get()
f.write(data)
if __name__ == '__main__':
application = Application(
[("/status", Handler)]
)
server = HTTPServer(application)
server.listen(8888)
speed_data = Queue.Queue()
smp = Sampler(speed_data)
IOLoop.current().add_callback(smp.write_sample)
IOLoop.current().start()

WSGI application middleware to handle SQLAlchemy session

My WSGI application uses SQLAlchemy. I want to start session when request starts, commit it if it's dirty and request processing finished successfully, make rollback otherwise. So, I need to implement behavior of Django's TransactionMiddleware.
So, I suppose that I should create WSGI middleware and make following stuff:
Create and add DB session to environ on pre-processing.
Get DB session from environ and call commit() on post-processing, if no errors occurred.
Get DB session from environ and call rollback() on post-processing, if some errors occurred.
Step 1 is obvious for me:
class DbSessionMiddleware:
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
environ['db_session'] = create_session()
return self.app(environ, start_response)
Step 2 and 3 - not. I found the example of post-processing task:
class Caseless:
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
for chunk in self.app(environ, start_response):
yield chunk.lower()
It contains comment:
Note that the __call__ function is a Python generator, which is typical for this sort of “post-processing” task.
Could you please clarify how does it work and how can I solve my issue similarly.
Thanks,
Boris.
For step 1 I use SQLAlchemy scoped sessions:
engine = create_engine(settings.DB_URL, echo=settings.DEBUG, client_encoding='utf8')
Base = declarative_base()
sm = sessionmaker(bind=engine)
get_session = scoped_session(sm)
They return the same thread-local session for each get_session() call.
Step 2 and 3 for now is following:
class DbSessionMiddleware:
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
try:
db.get_session().begin_nested()
return self.app(environ, start_response)
except BaseException:
db.get_session().rollback()
raise
finally:
db.get_session().commit()
As you can see, I start nested transaction on session to be able to rollback even queries that were already committed in views.