how to pickle/dill only selected method stack - pickle

I want to send lean object to another machine for execution of selected method.
I have a class like:
class ToBeSent:
def __init__(self, data_info):
self.data_info = data_info
def use_data_info_common(self, other_data):
print('using data info ' + self.data_info + other_data)
def use_data_info_SERIALIZE(self, other_data):
print('using data info ' + self.data_info + other_data)
def use_data_info_NOT_serialize(self, other_data):
# some other dependencies I don't want
print("refers to bunch of methods and dependencies I don't want" + self.data_info + other_data)
def serialize_me(self, other_data):
self.use_data_info_common('other' + other_data)
self.use_data_info_SERIALIZE('something')
def NOT_serialize_me(self, other):
self.use_data_info_common(other)
self.use_data_info_NOT_serialize(other)
I want to serialize only one method serialize_me with all it's dependencies from this object (or super.object) but not the NOT_serialize_me and it's dependencies, because they are user defined and I cannot run them on target machine.
In case above I would like to automatically detect and serialize serialize_me and all dependencies which are:
use_data_info_SERIALIZE
use_data_info_common
self.data_info
and remove everything else:
NOT_serialize_me
use_data_info_NOT_serialize
I was experimenting with a proxy getattr trying to record all calls but it doesn't see any internal calls to common or other methods, and I was cutting too much from this class.

Related

Inserting to MySQL with mysql.connector - good practice/efficiency

I am working on a personal project and was wondering if my solution for inserting data to a MySQL database would be considered "pythonic" and efficient.
I have written a separate class for that, which will be called from an object which holds a dataframe. From there I am calling my save() function to write the dataframe to the database.
The script will be running once a day where I scrape some data from some websites and save it to my database. So it is important that it really runs through completely even when I have bad data or temporary connection issues (script and database run on different machines).
import mysql.connector
# custom logger
from myLog import logger
# custom class for formatting the data, a lot of potential errors are handled here
from myFormat import myFormat
# insert strings to mysql are stored and referenced here
import sqlStrings
class saveSQL:
def __init__(self):
self.frmt = myFormat()
self.host = 'XXX.XXX.XXX.XXX'
self.user = 'XXXXXXXX'
self.password = 'XXXXXXXX'
self.database = 'XXXXXXXX'
def save(self, payload, type):
match type:
case 'First':
return self.__first(payload)
case 'Second':
...
case _:
logger.error('Undefined Input for Type!')
def __first(self, payload):
try:
self.mydb = mysql.connector.connect(host=self.host,user=self.user,password=self.password,database=self.database)
mycursor = self.mydb.cursor()
except mysql.connector.Error as err:
logger.error('Couldn\'t establish connection to DB!')
try:
tmpList = payload.values.tolist()
except ValueError:
logger.error('Value error in converting dataframe to list: ' % payload)
try:
mycursor.executemany(sqlStrings.First, tmpList)
self.mydb.commit()
dbWrite = mycursor.rowcount
except mysql.connector.Error as err:
logger.error('Error in writing to database: %s' % err)
for ele in myList:
dbWrite = 0
try:
mycursor.execute(sqlStrings.First, ele)
self.mydb.commit()
dbWrite = dbWrite + mycursor.rowcount
except mysql.connector.Error as err:
logger.error('Error in writing to database: %s \n ele: %s' % [err,ele])
continue
pass
mycursor.close()
return dbWrite
Things I am wondering about:
Is the match case a good option to distinguish between writing to different tables depending on the data?
Are the different try/except blocks really necessary or are there easier ways of handling potential errors?
Do I really need the pass command at the end of the for-loop?

Working with coroutines in Python Tornado Web Server

I am working on an autonomous car implementation for a web browser game with Python 2x. I use Tornado Web Server to run game on localhost and I post and receive data from game with JSON data format in the function called "FrameHandler" and also I determine what the act of car should be in "to_dict_faster()" function.
Here, my problem is that I can write data to text file which is hold in speed_data variable in specific time interval with help of a coroutine. However, I can't dump JSON data to function in this specific time interval because "FrameHandler" acts like While True and it always requests data to dump. What I am trying to do is sending desired acts as writing text file in specific time interval while not changing flow frame handler because it affects FPS of the game.
I am trying to figure out How can I do that for a long time any help would be great here:
#gen.coroutine
def sampler():
io_loop = tornado.ioloop.IOLoop.current()
start = time.time()
while True:
with open("Sampled_Speed.txt", "a") as text_file:
text_file.write("%d,%.2f\n" % (speed_data, ((time.time() - start))))
yield gen.Task(io_loop.add_timeout, io_loop.time() + period)
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.redirect("/static/v2.curves.html")
class FrameHandler(tornado.web.RequestHandler):
def post(self):
global speed_data
data = json.loads(self.get_arguments("telemetry")[0])
ar = np.fromstring(base64.decodestring(self.request.body), dtype=np.uint8)
image = ar.reshape(hp.INPUT_SIZE, hp.INPUT_SIZE, hp.NUM_CHANNELS)
left, right, faster, slower = data["action"]
terminal, action, all_data, was_start = (
data["terminal"],
Action(left=left, right=right, faster=faster, slower=slower),
data["all_data"],
data["was_start"]
)
for i in range(len(all_data)):
data_dict=all_data[i]
speed_data = data_dict[u'speed']
position_data=data_dict[u'position']
result_action = agent.steps(image, 0.1, terminal, was_start, action, all_data)
if speed_data < 4000:
self.write(json.dumps(result_action.to_dict_faster()))
else:
self.write(json.dumps(result_action.to_dict_constant()))
def make_app():
return tornado.web.Application([
(r"/", MainHandler),
(r"/frame", FrameHandler),
(r"/static/(.*)", tornado.web.StaticFileHandler, {"path": static_path})
], debug=True)
if __name__ == "__main__":
app = make_app()
if "SERVER_PORT" in os.environ:
port = int(os.environ["SERVER_PORT"])
else:
port = 8880
print "LISTENING ON PORT: %d" % port
app.listen(port)
tornado.ioloop.IOLoop.current().run_sync(sampler)
tornado.ioloop.IOLoop.current().start()
You can move file writing to a different thread (using tornado's run_on_executor for example), so python interpreter will automatically switch from Sampler to main thread with FrameHandler on write. But you have to use thread-safe speed_data variable, I've used stdlib Queue.Queue as an example:
class Handler(tornado.web.RequestHandler):
#gen.coroutine
def get(self):
global speed_data
speed_data.put("REALLY BIG TEST DATA\n")
self.finish("OK")
class Sampler():
executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
def __init__(self, queue):
self._q = queue
#run_on_executor
def write_sample(self):
with open("foobar.txt", "w") as f:
while True:
data = self._q.get()
f.write(data)
if __name__ == '__main__':
application = Application(
[("/status", Handler)]
)
server = HTTPServer(application)
server.listen(8888)
speed_data = Queue.Queue()
smp = Sampler(speed_data)
IOLoop.current().add_callback(smp.write_sample)
IOLoop.current().start()

pytest: skip addfinalizer if exception in fixture

I have a function, that should do report, if test function success.
But, I don't want to do report, if there is an Exception inside test function.
I try to use pytest.fixture, pytest.yield_fixture, but all of them always call finalizers. How can I understand, that Exception had been raised in test function?
test.py StatisticClass: start
FStatisticClass: stop
finalizer
contest of test.py:
#pytest.mark.usefixtures("statistic_maker")
def test_dummy():
raise Exception()
content of conftest.py:
class StatisticClass():
def __init__(self, req):
self.req = req
pass
def start(self):
print "StatisticClass: start"
def stop(self):
print "StatisticClass: stop"
def if_not_exception(self):
"""
I don't want to call this if Exception inside yield.
Maybe, there is any info in request object?
"""
print "finalizer"
#pytest.yield_fixture(scope="function")
def statistic_maker(request):
ds = StatisticClass(request)
ds.start()
request.addfinalizer(ds.if_not_exception)
yield
ds.stop()
P.S. I can't use decorator because, I use fixture.

How to count sqlalchemy queries in unit tests

In Django I often assert the number of queries that should be made so that unit tests catch new N+1 query problems
from django import db
from django.conf import settings
settings.DEBUG=True
class SendData(TestCase):
def test_send(self):
db.connection.queries = []
event = Events.objects.all()[1:]
s = str(event) # QuerySet is lazy, force retrieval
self.assertEquals(len(db.connection.queries), 2)
In in SQLAlchemy tracing to STDOUT is enabled by setting the echo flag on
engine
engine.echo=True
What is the best way to write tests that count the number of queries made by SQLAlchemy?
class SendData(TestCase):
def test_send(self):
event = session.query(Events).first()
s = str(event)
self.assertEquals( ... , 2)
I've created a context manager class for this purpose:
class DBStatementCounter(object):
"""
Use as a context manager to count the number of execute()'s performed
against the given sqlalchemy connection.
Usage:
with DBStatementCounter(conn) as ctr:
conn.execute("SELECT 1")
conn.execute("SELECT 1")
assert ctr.get_count() == 2
"""
def __init__(self, conn):
self.conn = conn
self.count = 0
# Will have to rely on this since sqlalchemy 0.8 does not support
# removing event listeners
self.do_count = False
sqlalchemy.event.listen(conn, 'after_execute', self.callback)
def __enter__(self):
self.do_count = True
return self
def __exit__(self, *_):
self.do_count = False
def get_count(self):
return self.count
def callback(self, *_):
if self.do_count:
self.count += 1
Use SQLAlchemy Core Events to log/track queries executed (you can attach it from your unit tests so they don't impact your performance on the actual application:
event.listen(engine, "before_cursor_execute", catch_queries)
Now you write the function catch_queries, where the way depends on how you test. For example, you could define this function in your test statement:
def test_something(self):
stmts = []
def catch_queries(conn, cursor, statement, ...):
stmts.append(statement)
# Now attach it as a listener and work with the collected events after running your test
The above method is just an inspiration. For extended cases you'd probably like to have a global cache of events that you empty after each test. The reason is that prior to 0.9 (current dev) there is no API to remove event listeners. Thus make one global listener that accesses a global list.
what about the approach of using flask_sqlalchemy.get_debug_queries() btw. this is the methodology used by internal of Flask Debug Toolbar check its source
from flask_sqlalchemy import get_debug_queries
def test_list_with_assuring_queries_count(app, client):
with app.app_context():
# here generating some test data
for _ in range(10):
notebook = create_test_scheduled_notebook_based_on_notebook_file(
db.session, owner='testing_user',
schedule={"kind": SCHEDULE_FREQUENCY_DAILY}
)
for _ in range(100):
create_test_scheduled_notebook_run(db.session, notebook_id=notebook.id)
with app.app_context():
# after resetting the context call actual view we want asserNumOfQueries
client.get(url_for('notebooks.personal_notebooks'))
assert len(get_debug_queries()) == 3
keep in mind that for having reset context and count you have to call with app.app_context() before the exact stuff you want to measure.
Slightly modified version of #omar-tarabai's solution that removes the event listener when exiting the context:
from sqlalchemy import event
class QueryCounter(object):
"""Context manager to count SQLALchemy queries."""
def __init__(self, connection):
self.connection = connection.engine
self.count = 0
def __enter__(self):
event.listen(self.connection, "before_cursor_execute", self.callback)
return self
def __exit__(self, *args, **kwargs):
event.remove(self.connection, "before_cursor_execute", self.callback)
def callback(self, *args, **kwargs):
self.count += 1
Usage:
with QueryCounter(session.connection()) as counter:
session.query(XXX).all()
session.query(YYY).all()
print(counter.count) # 2

WSGI application middleware to handle SQLAlchemy session

My WSGI application uses SQLAlchemy. I want to start session when request starts, commit it if it's dirty and request processing finished successfully, make rollback otherwise. So, I need to implement behavior of Django's TransactionMiddleware.
So, I suppose that I should create WSGI middleware and make following stuff:
Create and add DB session to environ on pre-processing.
Get DB session from environ and call commit() on post-processing, if no errors occurred.
Get DB session from environ and call rollback() on post-processing, if some errors occurred.
Step 1 is obvious for me:
class DbSessionMiddleware:
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
environ['db_session'] = create_session()
return self.app(environ, start_response)
Step 2 and 3 - not. I found the example of post-processing task:
class Caseless:
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
for chunk in self.app(environ, start_response):
yield chunk.lower()
It contains comment:
Note that the __call__ function is a Python generator, which is typical for this sort of “post-processing” task.
Could you please clarify how does it work and how can I solve my issue similarly.
Thanks,
Boris.
For step 1 I use SQLAlchemy scoped sessions:
engine = create_engine(settings.DB_URL, echo=settings.DEBUG, client_encoding='utf8')
Base = declarative_base()
sm = sessionmaker(bind=engine)
get_session = scoped_session(sm)
They return the same thread-local session for each get_session() call.
Step 2 and 3 for now is following:
class DbSessionMiddleware:
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
try:
db.get_session().begin_nested()
return self.app(environ, start_response)
except BaseException:
db.get_session().rollback()
raise
finally:
db.get_session().commit()
As you can see, I start nested transaction on session to be able to rollback even queries that were already committed in views.