Sqlalchemy event loop closed - sqlalchemy

i was messing around with the sqlalchemy ORM functionality
i was able to make it work on my main app but when i created a separate file test.py to test something, i kept getting event loop closed errors:
Exception ignored in: <function Connection.__del__ at 0x7f7041c07310>
Traceback (most recent call last):
File "/home/krypt/Documents/Projects/app/env/lib/python3.9/site-packages/aiomysql/connection.py", line 1072, in __del__
File "/home/krypt/Documents/Projects/app/env/lib/python3.9/site-packages/aiomysql/connection.py", line 298, in close
File "/usr/lib/python3.9/asyncio/selector_events.py", line 700, in close
File "/usr/lib/python3.9/asyncio/base_events.py", line 746, in call_soon
File "/usr/lib/python3.9/asyncio/base_events.py", line 510, in _check_closed
RuntimeError: Event loop is closed
here is the code for test.py:
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import Table, Column, Integer, String
from sqlalchemy.future import select
from sqlalchemy import delete
import asyncio
Base = declarative_base()
class Table(Base):
__tablename__ = 'Table'
id = Column(Integer, primary_key=True)
string = Column(String(30))
prefix = Column(String(1), default = "!")
async def main():
engine = create_async_engine("mariadb+aiomysql://user:password#127.0.0.1:3306/dbname")
session = AsyncSession(engine)
stmt = select(Table).where(Table.prefix == "!")
res = await session.execute(stmt)
row = res.scalars().first()
print(row)
asyncio.run(main())

The problem seems to be that aiomysql is trying to close its connection after the event loop has closed. I could make the code in the question work by ensuring that the session was closed and the engine disposed.
async def main():
engine = create_async_engine("mariadb+aiomysql://user:password#127.0.0.1:3306/dbname")
async with AsyncSession(engine) as session:
stmt = select(Table).where(Table.prefix == "!")
res = await session.execute(stmt)
row = res.scalars().first()
print(row)
await engine.dispose()
There's some discussion about this here (towards the end); explicitly closing and disposing is the recommended workaround to prevent the connection's __del__ method executing after the event loop has closed.

Related

Getting error with sql query using python

i am trying to fetch the list of sql query running more than 3600 sec and kill those id's using python below is the code
import json
import mysql.connector
import pymysql
def main():
# TODO implement
connection = pymysql.connect(user='', password='',
host='',
port=3306,
database='');
cursor = connection.cursor() # get the cursor
# cursor.execute('SHOW PROCESSLIST;')
# extracted_data = cursor.fetchall();
# for i in extracted_data:
# print(i)
with connection.cursor() as cursor:
print(cursor.execute('SHOW PROCESSLIST'))
for item in cursor.fetchall():
if item.get('Time') > 3600 and item.get('command') == 'query':
_id = item.get('Id')
print('kill %s' % item)
cursor.execute('kill %s', _id)
connection.close()
main()
below is the error i am getting
"C:\drive c\pyfile\venv\Scripts\python.exe" "C:/drive c/pyfile/sqlnew2.py"
Traceback (most recent call last):
File "C:\drive c\pyfile\sqlnew2.py", line 23, in <module>
main()
File "C:\drive c\pyfile\sqlnew2.py", line 18, in main
if item.get('Time') > 3600 and item.get('command') == 'query':
AttributeError: 'tuple' object has no attribute 'get'
The .fetchall() method returns a tuple, not a dictionary. Therefore you should access the elements using the numerical indexes, for example item[0], item[1], etc
As an alternative, if you want to fetch the results as a dictionary, you can use a DictCursor
First import it:
import pymysql.cursors
Then modify the cursor line like that:
with connection.cursor(pymysql.cursors.DictCursor) as cursor:
...

Snowflake Account must be specified error, but it is specified

I have the below code, I have the account, username, pw, etc, but I'm still seeing the below error:
raise error_class( sqlalchemy.exc.ProgrammingError:
(snowflake.connector.errors.ProgrammingError) 251001: Account must be
specified
I've also tried by changing the engine variable in my created_db_engine function like below, but I see the same error:
engine = snowflake.connector.connect(
user='USER',
password='PASSWORD',
account='ACCOUNT',
warehouse='WAREHOUSE',
database='DATABASE',
schema='SCHEMA'
)
here is my code
import pandas as pd
from snowflake.sqlalchemy import URL
from sqlalchemy import create_engine
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas, pd_writer
from pandas import json_normalize
import requests
df = 'my_dataframe'
def create_db_engine(db_name, schema_name):
engine = URL(
account="ab12345.us-west-2.snowflakecomputing.com",
user="my_user",
password="my_pw",
database="DB",
schema="PUBLIC",
warehouse="WH1",
role="DEV"
)
return engine
def create_table(out_df, table_name, idx=False):
url = create_db_engine(db_name="db", schema_name="skm")
engine = create_engine(url)
connection = engine.connect()
try:
out_df.to_sql(
table_name, connection, if_exists="append", index=idx, method=pd_writer
)
except ConnectionError:
print("Unable to connect to database!")
finally:
connection.close()
engine.dispose()
return True
print(df.head)
create_table(df, "reporting")
Given the Snowflake documentation for SqlAlchemy, your account parameter should not include snowflakecomputing.com.
So you should try with ab12345.us-west-2 and connector will append the domain part automatically for you.

FastAPI unittesting not overriding get_db

I'm just trying to get FastAPI unittests working with SQLAlchemy, but I'm having trouble testing with objects created in the database. I have the following setup, as per the docs.
main.py
routes = [
APIRoute('/games/', views.games_list, name='index', response_class=HTMLResponse),
]
settings = Settings()
app = FastAPI(debug=settings.debug, routes=routes)
views.py
# Dependency
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
async def games_list(request: Request, db: Session = Depends(get_db)):
settings = Settings()
games = db.query(Game).all()
return settings.templates.TemplateResponse('games/list.jinja', {'request': request, 'games': games})
database.py
def prepare_database(settings):
engine = create_engine(settings.database_url)
Base.metadata.create_all(engine)
return engine
engine = prepare_database(delete_existing=False, settings=Settings())
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
conftest.py
#pytest.fixture
def db_conn():
session = SessionLocalTesting()
try:
yield session
finally:
session.close()
#pytest.fixture
def cli(db_conn):
def override_get_db():
session = SessionLocalTesting()
try:
yield session
finally:
session.close()
app.dependency_overrides[get_db] = override_get_db
with TestClient(app) as client:
yield client
test file
def test_games_list(cli, factory, db_conn):
factory.create_game()
# This will return the game I have created with my factory, definitely in the test db.
print(db_conn.query(Game.name).all())
r = cli.get('/games/')
assert 'DnD Game' in r.content.decode()
My issue is that I can't get the objects from the test db in view. If I print(db.bind.database.url ) in views the test database is not being used, so it's trying to get items from the real database, not the test one.
So it looks like get_db is not getting overridden, though I'm not sure why.
Thanks in advance
I think you forgot to specify the scope of the fixture when you should call a fixture
Create a DB connection and override DB dependency through #pytest.fixture there is a kind of scope available in pytest.fixture(scope='session') there module level as well and other lots off scope available in Type of scope
I have mentioned a link which helps you Use test DB intend of real
import pytest
from fastapi.testclient import TestClient
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy_utils import create_database, drop_database
from app.main import app
from db.base import Base
from db.settings import get_database
from settings.config import (
TEST_DATABASE_URI as SQLALCHEMY_DATABASE_URL,
)
# SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(SQLALCHEMY_DATABASE_URL, pool_pre_ping=True)
def override_get_db():
"""" Override """
try:
db = Session(autocommit=False, autoflush=False, bind=engine)
yield db
finally:
db.close()
#pytest.fixture(scope="session", autouse=True)
def create_db():
""" creating db model in database """
create_database(SQLALCHEMY_DATABASE_URL)
print("\n" + "\x1b[6;30;42m" + "Creating test database." + "\x1b[0m")
Base.metadata.create_all(bind=engine)
app.dependency_overrides[get_database] = override_get_db
yield 1
drop_database(SQLALCHEMY_DATABASE_URL)
print("\n" + "\x1b[6;30;42m" + "Delete test database." + "\x1b[0m")
#pytest.fixture()
def get_db_session():
""" Getting session for db transaction """
session = Session(autocommit=False, autoflush=False, bind=engine)
yield session
session.close()
#pytest.fixture()
def client():
""" Getting testclient of app """
with TestClient(app) as client:
yield client
If you still face the problem or not let me know

Multiprocessing, sqlAlchemy and scoped_sessions

I want to run multiple strategies in concurrent processes. I came up with something like this
import logging
import multiprocessing
import os
from sqlalchemy.orm import scoped_session, Session
from pyutil.sql.interfaces.symbols.symbol import Symbol
from pyutil.sql.session import get_one_or_create
class StratRunner(object):
def __init__(self, session_scope, logger=None):
assert isinstance(session_scope, scoped_session)
self.__session_scope = session_scope
self.__logger = logger or logging.getLogger(__name__)
# this function is the target for mp.Process
def _run(self, strategy):
self.__logger.debug("Pid {pid}".format(pid=os.getpid()))
symbols = self.symbols
self.__logger.info("Run strategy {s}".format(s=strategy))
configuration = strategy.configuration()
strategy.upsert(portfolio=configuration.portfolio, symbols=symbols, days=5)
def run_strategies(self):
# loop over all active strategies!
jobs = []
# we are in the main thread here...
for s in self.active_strategies:
# what shall I give to the Process? The strategy object, the strategy_id, a session instance, the session_scope...
job = multiprocessing.Process(target=self._run, kwargs={"strategy": s})
job.name = s.name
jobs.append(job)
run_jobs(jobs, logger=self.__logger)
#property
def symbols(self):
return {s.name: s for s in self.__session_scope().query(Symbol)}
#property
def active_strategies(self):
return self.__session_scope().query(Strategy).filter(Strategy.active == True).all()
I am aware of tons of documentation on this project but I am overwhelmed.
I loop over the rows of a table (The active_strategies). class Strategies(Base)... . I then hand over the strategy object to the _run method and update the strategy object within the very same method. Please feel free to shred my code.
I am in particular puzzled about what to give to the _run method? Shall I hand over the strategy object, the strategy ID, the session, the scoped_session, ... ?
I have now created a runner object:
import abc
import logging
import os
from sqlalchemy.orm import sessionmaker
class Runner(object):
__metaclass__ = abc.ABCMeta
def __init__(self, engine, logger=None):
self.__engine = engine
self._logger = logger or logging.getLogger(__name__)
self.__jobs = []
#property
def _session(self):
""" Create a fresh new session... """
self.__engine.dispose()
factory = sessionmaker(self.__engine)
return factory()
def _run_jobs(self):
self._logger.debug("PID main {pid}".format(pid=os.getpid()))
for job in self.jobs:
# all jobs get the trigge
self._logger.info("Job {j}".format(j=job.name))
job.start()
for job in self.jobs:
self._logger.info("Wait for job {j}".format(j=job.name))
job.join()
self._logger.info("Job {j} done".format(j=job.name))
#property
def jobs(self):
return self.__jobs
#abc.abstractmethod
def run(self):
""" Described in the child class """
In particular this class can provide a fresh session (via ._session). However, using this setup I see plenty of :
psycopg2.OperationalError: server closed the connection unexpectedly
| This probably means the server terminated abnormally
| before or while processing the request.

Pipeline doesn't write to MySQL but also gives no error

I've tried to implement this pipeline in my spider.
After installing the necessary dependencies I am able to run the spider without any errors but for some reason it doesn't write to my database.
I'm pretty sure there is something going wrong with connecting to the database. When I give in a wrong password, I still don't get any error.
When the spider scraped all the data, it needs a few minutes before it starts dumping the stats.
2017-08-31 13:17:12 [scrapy] INFO: Closing spider (finished)
2017-08-31 13:17:12 [scrapy] INFO: Stored csv feed (27 items) in: test.csv
2017-08-31 13:24:46 [scrapy] INFO: Dumping Scrapy stats:
Pipeline:
import MySQLdb.cursors
from twisted.enterprise import adbapi
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.utils.project import get_project_settings
from scrapy import log
SETTINGS = {}
SETTINGS['DB_HOST'] = 'mysql.domain.com'
SETTINGS['DB_USER'] = 'username'
SETTINGS['DB_PASSWD'] = 'password'
SETTINGS['DB_PORT'] = 3306
SETTINGS['DB_DB'] = 'database_name'
class MySQLPipeline(object):
#classmethod
def from_crawler(cls, crawler):
return cls(crawler.stats)
def __init__(self, stats):
print "init"
#Instantiate DB
self.dbpool = adbapi.ConnectionPool ('MySQLdb',
host=SETTINGS['DB_HOST'],
user=SETTINGS['DB_USER'],
passwd=SETTINGS['DB_PASSWD'],
port=SETTINGS['DB_PORT'],
db=SETTINGS['DB_DB'],
charset='utf8',
use_unicode = True,
cursorclass=MySQLdb.cursors.DictCursor
)
self.stats = stats
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_closed(self, spider):
print "close"
""" Cleanup function, called after crawing has finished to close open
objects.
Close ConnectionPool. """
self.dbpool.close()
def process_item(self, item, spider):
print "process"
query = self.dbpool.runInteraction(self._insert_record, item)
query.addErrback(self._handle_error)
return item
def _insert_record(self, tx, item):
print "insert"
result = tx.execute(
" INSERT INTO matches(type,home,away,home_score,away_score) VALUES (soccer,"+item["home"]+","+item["away"]+","+item["score"].explode("-")[0]+","+item["score"].explode("-")[1]+")"
)
if result > 0:
self.stats.inc_value('database/items_added')
def _handle_error(self, e):
print "error"
log.err(e)
Spider:
import scrapy
import dateparser
from crawling.items import KNVBItem
class KNVBspider(scrapy.Spider):
name = "knvb"
start_urls = [
'http://www.knvb.nl/competities/eredivisie/uitslagen',
]
custom_settings = {
'ITEM_PIPELINES': {
'crawling.pipelines.MySQLPipeline': 301,
}
}
def parse(self, response):
# www.knvb.nl/competities/eredivisie/uitslagen
for row in response.xpath('//div[#class="table"]'):
for div in row.xpath('./div[#class="row"]'):
match = KNVBItem()
match['home'] = div.xpath('./div[#class="value home"]/div[#class="team"]/text()').extract_first()
match['away'] = div.xpath('./div[#class="value away"]/div[#class="team"]/text()').extract_first()
match['score'] = div.xpath('./div[#class="value center"]/text()').extract_first()
match['date'] = dateparser.parse(div.xpath('./preceding-sibling::div[#class="header"]/span/span/text()').extract_first(), languages=['nl']).strftime("%d-%m-%Y")
yield match
If there are better pipelines available to do what I'm trying to achieve that'd be welcome as well. Thanks!
Update:
With the link provided in the accepted answer I eventually got to this function that's working (and thus solved my problem):
def process_item(self, item, spider):
print "process"
query = self.dbpool.runInteraction(self._insert_record, item)
query.addErrback(self._handle_error)
query.addBoth(lambda _: item)
return query
Take a look at this for how to use adbapi with MySQL for saving scraped items. Note the difference in your process_item and their process_item method implementation. While you return the item immediately, they return Deferred object which is the result of runInteraction method and which returns the item upon its completion. I think this is the reason your _insert_record never gets called.
If you can see the insert in your output that's already a good sign.
I'd rewrite the insert function this way:
def _insert_record(self, tx, item):
print "insert"
raw_sql = "INSERT INTO matches(type,home,away,home_score,away_score) VALUES ('%s', '%s', '%s', '%s', '%s')"
sql = raw_sql % ('soccer', item['home'], item['away'], item['score'].explode('-')[0], item['score'].explode('-')[1])
print sql
result = tx.execute(sql)
if result > 0:
self.stats.inc_value('database/items_added')
It allows you to debug the sql you're using. In you version you're not wrapping the string in ' which is a syntax error in mysql.
I'm not sure about your last values (score) so I treated them as strings.