Airflow Connections & pandas.to sql : Don't work together? - sqlalchemy

In My DAG:
I need use pandas .to_sql
It work with sqlalchemy, but not safety.
It error with connection in Airflow...?
from sqlalchemy import create_engine
# It's works, but not safety!
conn = create_engine('postgresql+psycopg2://login:pass#localhost:5432/de')
# More safe: Make connection in Airflow, PG_PROJECT_CONN
dwh_hook = PostgresHook('PG_PROJECT_CONN')
conn = dwh_hook.get_conn()
# Work OK:
df_ = pd.read_sql(f'select * from stg.restaurants;',conn)
# Don't work: WARNING - /usr/local/lib/python3.8/dist-packages/pandas/io/sql.py:761
# UserWarning: pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI
# or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy
df_restaurants.to_sql('restaurants', conn, if_exists='append', schema='stg', index=False, chunksize=100)

Related

How do you set a query timeout for MS SQL using SqlAlchemy with pymssql?

The pymssql.connect documentation lists a timeout argument, but how do you pipe that through from SqlAlchemy using a mssql+pymssql:// connection?
Through experimentation, it must be passed as a connection URL query parameter:
from sqlalchemy import create_engine
ms_url = f"mssql+pymssql://{username}:{password}#{host}:{port}?timeout=10"
ms_engine = create_engine(
ms_url,
pool_pre_ping=True, # any other args
connect_args={
# other connect args for example; but timeout does not work here
"login_timeout": 3,
},
)
connection = ms_engine.connect()
Alternatives did not work:
Passing connect_args={"timeout": 10} does not work, queries longer than 10s still run.
The sqlalchemy.engine.Engine.connect() call does not accept a timeout kwarg.

Snowflake Account must be specified error, but it is specified

I have the below code, I have the account, username, pw, etc, but I'm still seeing the below error:
raise error_class( sqlalchemy.exc.ProgrammingError:
(snowflake.connector.errors.ProgrammingError) 251001: Account must be
specified
I've also tried by changing the engine variable in my created_db_engine function like below, but I see the same error:
engine = snowflake.connector.connect(
user='USER',
password='PASSWORD',
account='ACCOUNT',
warehouse='WAREHOUSE',
database='DATABASE',
schema='SCHEMA'
)
here is my code
import pandas as pd
from snowflake.sqlalchemy import URL
from sqlalchemy import create_engine
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas, pd_writer
from pandas import json_normalize
import requests
df = 'my_dataframe'
def create_db_engine(db_name, schema_name):
engine = URL(
account="ab12345.us-west-2.snowflakecomputing.com",
user="my_user",
password="my_pw",
database="DB",
schema="PUBLIC",
warehouse="WH1",
role="DEV"
)
return engine
def create_table(out_df, table_name, idx=False):
url = create_db_engine(db_name="db", schema_name="skm")
engine = create_engine(url)
connection = engine.connect()
try:
out_df.to_sql(
table_name, connection, if_exists="append", index=idx, method=pd_writer
)
except ConnectionError:
print("Unable to connect to database!")
finally:
connection.close()
engine.dispose()
return True
print(df.head)
create_table(df, "reporting")
Given the Snowflake documentation for SqlAlchemy, your account parameter should not include snowflakecomputing.com.
So you should try with ab12345.us-west-2 and connector will append the domain part automatically for you.

Error on converting dataframe to SQL - Pandas

I am getting this error: DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': Not all parameters were used in the SQL statement. when trying to convert my dataframe to sql
My connection variable:
con = mysql.connector.connect(
host="****",
port="****",
database="*****",
user="*****",
password="*****"
)
My try to convert it to sql:
df.to_sql('menageiro2',con)
Note: I am using:
import pandas as pd
import sqlalchemy
import mysql.connector
The reference says con: sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection. You appear to be passing in a mysql connection instead of a SQLAlchamy engine (that you connected to MySQL):
con = sqlalchemy.create_engine(
'mysql+mysqlconnector://<user>:<password>#<host>:<port>/<default_db>...')

FastAPI unittesting not overriding get_db

I'm just trying to get FastAPI unittests working with SQLAlchemy, but I'm having trouble testing with objects created in the database. I have the following setup, as per the docs.
main.py
routes = [
APIRoute('/games/', views.games_list, name='index', response_class=HTMLResponse),
]
settings = Settings()
app = FastAPI(debug=settings.debug, routes=routes)
views.py
# Dependency
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
async def games_list(request: Request, db: Session = Depends(get_db)):
settings = Settings()
games = db.query(Game).all()
return settings.templates.TemplateResponse('games/list.jinja', {'request': request, 'games': games})
database.py
def prepare_database(settings):
engine = create_engine(settings.database_url)
Base.metadata.create_all(engine)
return engine
engine = prepare_database(delete_existing=False, settings=Settings())
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
conftest.py
#pytest.fixture
def db_conn():
session = SessionLocalTesting()
try:
yield session
finally:
session.close()
#pytest.fixture
def cli(db_conn):
def override_get_db():
session = SessionLocalTesting()
try:
yield session
finally:
session.close()
app.dependency_overrides[get_db] = override_get_db
with TestClient(app) as client:
yield client
test file
def test_games_list(cli, factory, db_conn):
factory.create_game()
# This will return the game I have created with my factory, definitely in the test db.
print(db_conn.query(Game.name).all())
r = cli.get('/games/')
assert 'DnD Game' in r.content.decode()
My issue is that I can't get the objects from the test db in view. If I print(db.bind.database.url ) in views the test database is not being used, so it's trying to get items from the real database, not the test one.
So it looks like get_db is not getting overridden, though I'm not sure why.
Thanks in advance
I think you forgot to specify the scope of the fixture when you should call a fixture
Create a DB connection and override DB dependency through #pytest.fixture there is a kind of scope available in pytest.fixture(scope='session') there module level as well and other lots off scope available in Type of scope
I have mentioned a link which helps you Use test DB intend of real
import pytest
from fastapi.testclient import TestClient
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy_utils import create_database, drop_database
from app.main import app
from db.base import Base
from db.settings import get_database
from settings.config import (
TEST_DATABASE_URI as SQLALCHEMY_DATABASE_URL,
)
# SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(SQLALCHEMY_DATABASE_URL, pool_pre_ping=True)
def override_get_db():
"""" Override """
try:
db = Session(autocommit=False, autoflush=False, bind=engine)
yield db
finally:
db.close()
#pytest.fixture(scope="session", autouse=True)
def create_db():
""" creating db model in database """
create_database(SQLALCHEMY_DATABASE_URL)
print("\n" + "\x1b[6;30;42m" + "Creating test database." + "\x1b[0m")
Base.metadata.create_all(bind=engine)
app.dependency_overrides[get_database] = override_get_db
yield 1
drop_database(SQLALCHEMY_DATABASE_URL)
print("\n" + "\x1b[6;30;42m" + "Delete test database." + "\x1b[0m")
#pytest.fixture()
def get_db_session():
""" Getting session for db transaction """
session = Session(autocommit=False, autoflush=False, bind=engine)
yield session
session.close()
#pytest.fixture()
def client():
""" Getting testclient of app """
with TestClient(app) as client:
yield client
If you still face the problem or not let me know

Cannot RESTORE within a transaction; autocommit is on

I'm using sqlalchemy with pyodbc to restore a mssql ".bak" file. I've followed advice from previous posts regarding getting around transactions but it doesn't seem to change anything. Any help would be appreciated.
from urllib.parse import quote_plus
from sqlalchemy import create_engine
params = quote_plus("Driver={SQL Server Native Client 11.0};"
"Server=Computer\SQLEXPRESS;"
"Database=master;"
"Trusted_Connection=yes;")
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)
connection = engine.raw_connection()
db_path = r"C:\\Path\\to\\OutputDB.bak"
move_path = r"C:\\Path\\to\\backup\\db.mdf"
move_log_path = r"C:\\Path\\to\\backup\\db_Log.ldf"
sql_cmd = f"""
RESTORE DATABASE [db]
FROM DISK = N'{db_path}'
WITH FILE = 1,
MOVE N'db'
TO N'{move_path}',
MOVE N'test_log'
TO N'{move_log_path}',
RECOVERY,
NOUNLOAD,
REPLACE,
STATS = 5
"""
connection.autocommit = True
cursor = connection.cursor()
cursor.execute(sql_cmd)
while cursor.nextset():
pass
connection.autocommit = False
I get the below error message:
ProgrammingError: ('42000', '[42000] [Microsoft][SQL Server Native Client 11.0][SQL Server]Cannot perform a backup or restore operation within a transaction. (3021) (SQLExecDirectW); [42000] [Microsoft][SQL Server Native Client 11.0][SQL Server]RESTORE DATABASE is terminating abnormally. (3013)')
I managed to fix this by passing connect_args={'autocommit': True} to create_engine. Neither cursor.execute(sql_cmd).execution_options(autocommit=True) or connection.autocommit = True appeared to work.
from urllib.parse import quote_plus
from sqlalchemy import create_engine
params = quote_plus("Driver={SQL Server Native Client 11.0};"
"Server=Computer\SQLEXPRESS;"
"Database=master;"
"Trusted_Connection=yes;")
engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params, connect_args={'autocommit': True})
connection = engine.raw_connection()
db_path = r"C:\\Path\\to\\OutputDB.bak"
move_path = r"C:\\Path\\to\\backup\\db.mdf"
move_log_path = r"C:\\Path\\to\\backup\\db_Log.ldf"
sql_cmd = f"""
RESTORE DATABASE [db]
FROM DISK = N'{db_path}'
WITH FILE = 1,
MOVE N'db'
TO N'{move_path}',
MOVE N'test_log'
TO N'{move_log_path}',
RECOVERY,
NOUNLOAD,
REPLACE,
STATS = 5
"""
cursor = connection.cursor()
cursor.execute(sql_cmd)
while cursor.nextset():
pass