I started a project using PostgreSQL and SQLAlchemy. Since i'm not a experienced programmer(just started using classes) and also quite new to databases i noticed some workflows i don't really understand.
What i understand up till now from classes is the following workflow:
# filename.py
class ClassName():
def __init__(self):
# do something
def some_funcion(self, var1, var2):
# do something with parameters
---------------------------------------
# main.py
from filename import ClassName
par1 = ...
par2 = ...
a = ClassName()
b = a.some_function(par1, par2)
Now i am creating tables from classes:
# base.py
from sqlalchemy.orm import declarative_base
Base = declarative_base()
# tables.py
from base import Base
from sqlalchemy import Column
from sqlalchemy import Integer, String
class A(Base):
__tablename__ = "a"
a_id = Column(Integer, primary_key=True)
a_column = Column(String(30))
class B(Base):
__tablename__ = "b"
b_id = Column(Integer, primary_key=True)
b_column = Column(String(30))
and
import typing
from base import Base
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy import Table
from sqlalchemy.orm import sessionmaker
from tables import A, B
metadata_obj = MetaData()
def create_tables(engine):
session = sessionmaker()
session.configure(bind=engine)
Base.metadata.create_all(bind=engine)
a = Table("a", metadata_obj, autoload_with=engine)
b = Table("b", metadata_obj, autoload_with=engine)
return(a, b) # not sure return is needed
if __name__ == "__main__":
username = "username"
password = "AtPasswordHere!"
dbname = "dbname"
url = "postgresql://" + username + ":" + password + "#localhost/" + dbname
engine = create_engine(url, echo=True, future=True)
a, b = create_tables(engine)
Everything works fine in that it creates Table A and Table B in the database. The point i don't understand is the following:
Both my IDE(pyflake) and LGTM complain 'Tables. ... imported but not used'. (EDIT i understand why it complains in the way it is not the normal Class flow. It is mor about Why it is not the normal class workflow)
Is this normal behavior for this usecase? I only see examples that make use of the above workflow
Are there better methods to create the same results (but without the warnings)
If this is the normal behavior: Is there an explanation for this? I didn't read it anywhere.
Related
I have a pyramid==1.10.4, SQLAlchemy==1.3.19 and pymssql==2.2.1 setup to use the default QueuePool with {pool_size: 5, max_overflow: 40, pool_recycle: 3600 } but it does not close connections when it opens connections in excess of the configured pool_size. For example, at one point it opened 14 connections and some of the connections have been idle for over 8 hours and not closed.
sqlalchemy.convert_unicode = true
sqlalchemy.max_overflow = 10
sqlalchemy.pool_recycle = 3600
sqlalchemy.pool_size = 5
sqlalchemy.strategy = plain
sqlalchemy.url = mssql+pymssql://username:password#db_host/db?login_timeout=2&timeout=5
from sqlalchemy import engine_from_config
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import Integer, DateTime, String, Boolean, Text, Float
from marshmallow_sqlalchemy import ModelSchema
DBSession = scoped_session(sessionmaker())
settings = config.get_settings()
engine = engine_from_config(settings, 'sqlalchemy.')
DBSession.configure(bind=engine)
Base = declarative_base()
class User(Base):
__tablename__ = 'users'
__table_args__ = {'extend_existing': True}
user_id = Column('UserId', Integer, primary_key=True)
first_name = Column('FirstName', Unicode(50), nullable=False)
class UserSchema(ModelSchema):
class Meta:
model = User
user = DBSession.query(User).filter_by(user_id=200).first()
schema = UserSchema(session=DBSession)
myuser = schema.dump(user)
with DBSession.begin():
BSession.add(myuser)
query = DBSession.query(User).filter_by(user_id=200).first()
# this is a pyramid hook that is fired one every http response
# https://docs.pylonsproject.org/projects/pyramid/en/latest/api/events.html#pyramid.events.NewResponse
config.add_subscriber(
close_db_session,
'pyramid.events.NewResponse'
)
def close_db_session(event=None):
DBSession.close()
My expectation is that the pool will keep a max of 5 connections in the pool and can request more connections up to the max_overflow limit but any connections checked out in excess of the pool_size will be closed immediately when returned to the pool.
But I am not seeing the connections closing. I even set the poos_size to 1 to experiment with but it still keeps all the connections open and doing nothing.
from fastapi import Depends, FastAPI, HTTPException, Body, Request
from sqlalchemy import create_engine, Boolean, Column, ForeignKey, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session, sessionmaker, relationship
from sqlalchemy.inspection import inspect
from typing import List, Optional
from pydantic import BaseModel
import json
SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
app = FastAPI()
# sqlalchemy models
class RootModel(Base):
__tablename__ = "root_table"
id = Column(Integer, primary_key=True, index=True)
someRootText = Column(String)
subData = relationship("SubModel", back_populates="rootData")
class SubModel(Base):
__tablename__ = "sub_table"
id = Column(Integer, primary_key=True, index=True)
someSubText = Column(String)
root_id = Column(Integer, ForeignKey("root_table.id"))
rootData = relationship("RootModel", back_populates="subData")
# pydantic models/schemas
class SchemaSubBase(BaseModel):
someSubText: str
class Config:
orm_mode = True
class SchemaSub(SchemaSubBase):
id: int
root_id: int
class Config:
orm_mode = True
class SchemaRootBase(BaseModel):
someRootText: str
subData: List[SchemaSubBase] = []
class Config:
orm_mode = True
class SchemaRoot(SchemaRootBase):
id: int
class Config:
orm_mode = True
class SchemaSimpleBase(BaseModel):
someRootText: str
class Config:
orm_mode = True
class SchemaSimple(SchemaSimpleBase):
id: int
class Config:
orm_mode = True
Base.metadata.create_all(bind=engine)
# database functions (CRUD)
def db_add_simple_data_pydantic(db: Session, root: SchemaRootBase):
db_root = RootModel(**root.dict())
db.add(db_root)
db.commit()
db.refresh(db_root)
return db_root
def db_add_nested_data_pydantic_generic(db: Session, root: SchemaRootBase):
# this fails:
db_root = RootModel(**root.dict())
db.add(db_root)
db.commit()
db.refresh(db_root)
return db_root
def db_add_nested_data_pydantic(db: Session, root: SchemaRootBase):
# start: hack: i have to manually generate the sqlalchemy model from the pydantic model
root_dict = root.dict()
sub_dicts = []
# i have to remove the list form root dict in order to fix the error from above
for key in list(root_dict):
if isinstance(root_dict[key], list):
sub_dicts = root_dict[key]
del root_dict[key]
# now i can do it
db_root = RootModel(**root_dict)
for sub_dict in sub_dicts:
db_root.subData.append(SubModel(**sub_dict))
# end: hack
db.add(db_root)
db.commit()
db.refresh(db_root)
return db_root
def db_add_nested_data_nopydantic(db: Session, root):
print(root)
sub_dicts = root.pop("subData")
print(sub_dicts)
db_root = RootModel(**root)
for sub_dict in sub_dicts:
db_root.subData.append(SubModel(**sub_dict))
db.add(db_root)
db.commit()
db.refresh(db_root)
# problem
"""
if I would now "return db_root", the answer would be of this:
{
"someRootText": "string",
"id": 24
}
and not containing "subData"
therefore I have to do the following.
Why?
"""
from sqlalchemy.orm import joinedload
db_root = (
db.query(RootModel)
.options(joinedload(RootModel.subData))
.filter(RootModel.id == db_root.id)
.all()
)[0]
return db_root
# Dependency
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
#app.post("/addNestedModel_pydantic_generic", response_model=SchemaRootBase)
def addSipleModel_pydantic_generic(root: SchemaRootBase, db: Session = Depends(get_db)):
data = db_add_simple_data_pydantic(db=db, root=root)
return data
#app.post("/addSimpleModel_pydantic", response_model=SchemaSimpleBase)
def add_simple_data_pydantic(root: SchemaSimpleBase, db: Session = Depends(get_db)):
data = db_add_simple_data_pydantic(db=db, root=root)
return data
#app.post("/addNestedModel_nopydantic")
def add_nested_data_nopydantic(root=Body(...), db: Session = Depends(get_db)):
data = db_add_nested_data_nopydantic(db=db, root=root)
return data
#app.post("/addNestedModel_pydantic", response_model=SchemaRootBase)
def add_nested_data_pydantic(root: SchemaRootBase, db: Session = Depends(get_db)):
data = db_add_nested_data_pydantic(db=db, root=root)
return data
Description
My Question is:
How to make nested sqlalchemy models from nested pydantic models (or python dicts) in a generic way and write them to the database in "one shot".
My example model is called RootModel and has a list of submodels called "sub models" in subData key.
Please see above for pydantic and sqlalchemy definitions.
Example:
The user provides a nested json string:
{
"someRootText": "string",
"subData": [
{
"someSubText": "string"
}
]
}
Open the browser and call the endpoint /docs.
You can play around with all endpoints and POST the json string from above.
/addNestedModel_pydantic_generic
When you call the endpoint /addNestedModel_pydantic_generic it will fail, because sqlalchemy cannot create the nested model from pydantic nested model directly:
AttributeError: 'dict' object has no attribute '_sa_instance_state'
/addSimpleModel_pydantic
With a non-nested model it works.
The remaining endpoints are showing "hacks" to solve the problem of nested models.
/addNestedModel_pydantic
In this endpoint is generate the root model and andd the submodels with a loop in a non-generic way with pydantic models.
/addNestedModel_pydantic
In this endpoint is generate the root model and andd the submodels with a loop in a non-generic way with python dicts.
My solutions are only hacks, I want a generic way to create nested sqlalchemy models either from pydantic (preferred) or from a python dict.
Environment
OS: Windows,
FastAPI Version : 0.61.1
Python version: Python 3.8.5
sqlalchemy: 1.3.19
pydantic : 1.6.1
I haven't found a nice built-in way to do this within pydantic/SQLAlchemy. How I solved it: I gave every nested pydantic model a Meta class containing the corresponding SQLAlchemy model. Like so:
from pydantic import BaseModel
from models import ChildDBModel, ParentDBModel
class ChildModel(BaseModel):
some_attribute: str = 'value'
class Meta:
orm_model = ChildDBModel
class ParentModel(BaseModel):
child: SubModel
That allowed me to write a generic function that loops through the pydantic object and transforms submodels into SQLAlchemy models:
def is_pydantic(obj: object):
"""Checks whether an object is pydantic."""
return type(obj).__class__.__name__ == "ModelMetaclass"
def parse_pydantic_schema(schema):
"""
Iterates through pydantic schema and parses nested schemas
to a dictionary containing SQLAlchemy models.
Only works if nested schemas have specified the Meta.orm_model.
"""
parsed_schema = dict(schema)
for key, value in parsed_schema.items():
try:
if isinstance(value, list) and len(value):
if is_pydantic(value[0]):
parsed_schema[key] = [schema.Meta.orm_model(**schema.dict()) for schema in value]
else:
if is_pydantic(value):
parsed_schema[key] = value.Meta.orm_model(**value.dict())
except AttributeError:
raise AttributeError("Found nested Pydantic model but Meta.orm_model was not specified.")
return parsed_schema
The parse_pydantic_schema function returns a dictionary representation of the pydantic model where submodels are substituted by the corresponding SQLAlchemy model specified in Meta.orm_model. You can use this return value to create the parent SQLAlchemy model in one go:
parsed_schema = parse_pydantic_schema(parent_model) # parent_model is an instance of pydantic ParentModel
new_db_model = ParentDBModel(**parsed_schema)
# do your db actions/commit here
If you want you can even extend this to also automatically create the parent model, but that requires you to also specify the Meta.orm_model for all pydantic models.
Using a validators is a lot simpler:
SQLAlchemy models.py:
class ChildModel(Base):
__tablename__ = "Child"
name: str = Column(Unicode(255), nullable=False, primary_key=True)
class ParentModel(Base):
__tablename__ = "Parent"
some_attribute: str = Column(Unicode(255))
children = relationship("Child", lazy="joined", cascade="all, delete-orphan")
#validates("children")
def adjust_children(self, _, value) -> ChildModel:
"""Instantiate Child object if it is only plain string."""
if value and isinstance(value, str):
return ChildModel(some_attribute=value)
return value
Pydantic schema.py:
class Parent(BaseModel):
"""Model used for parents."""
some_attribute: str
children: List[str] = Field(example=["foo", "bar"], default=[])
#validator("children", pre=True)
def adjust_children(cls, children):
"""Convert to plain string if it is a Child object."""
if children and not isinstance(next(iter(children), None), str):
return [child["name"] for child in children]
return children
Nice function #dann, for more than two level of nesting you can use this recursive function :
def pydantic_to_sqlalchemy_model(schema):
"""
Iterates through pydantic schema and parses nested schemas
to a dictionary containing SQLAlchemy models.
Only works if nested schemas have specified the Meta.orm_model.
"""
parsed_schema = dict(schema)
for key, value in parsed_schema.items():
try:
if isinstance(value, list) and len(value) and is_pydantic(value[0]):
parsed_schema[key] = [
item.Meta.orm_model(**pydantic_to_sqlalchemy_model(item))
for item in value
]
elif is_pydantic(value):
parsed_schema[key] = value.Meta.orm_model(
**pydantic_to_sqlalchemy_model(value)
)
except AttributeError:
raise AttributeError(
f"Found nested Pydantic model in {schema.__class__} but Meta.orm_model was not specified."
)
return parsed_schema
Use it sparingly ! is you have a cyclical nesting it will loop forever.
And then call you data transformer like this :
def create_parent(db: Session, parent: Parent_pydantic_schema):
db_parent = Parent_model(**pydantic_to_sqlalchemy_model(intent))
db.add(db_parent)
db.commit()
return db_parent
I am writing a FastAPI application that uses a SQLAlchemy database. I have copied the example from the FastAPI documentation, simplifying the database schema for concisions' sake. The complete source is at the bottom of this post.
This works. I can run it with uvicorn sql_app.main:app and interact with the database via the Swagger docs. When it runs it creates a test.db in the working directory.
Now I want to add a unit test. Something like this.
from fastapi import status
from fastapi.testclient import TestClient
from pytest import fixture
from main import app
#fixture
def client() -> TestClient:
return TestClient(app)
def test_fast_sql(client: TestClient):
response = client.get("/users/")
assert response.status_code == status.HTTP_200_OK
assert response.json() == []
Using the source code below, this takes the test.db in the working directory as the database. Instead I want to create a new database for every unit test that is deleted at the end of the test.
I could put the global database.engine and database.SessionLocal inside an object that is created at runtime, like so:
class UserDatabase:
def __init__(self, directory: Path):
directory.mkdir(exist_ok=True, parents=True)
sqlalchemy_database_url = f"sqlite:///{directory}/store.db"
self.engine = create_engine(
sqlalchemy_database_url, connect_args={"check_same_thread": False}
)
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
models.Base.metadata.create_all(bind=self.engine)
but I don't know how to make that work with main.get_db, since the Depends(get_db) logic ultimately assumes database.engine and database.SessionLocal are available globally.
I'm used to working with Flask, whose unit testing facilities handle all this for you. I don't know how to write it myself. Can someone show me the minimal changes I'd have to make in order to generate a new database for each unit test in this framework?
The complete source of the simplified FastAPI/SQLAlchemy app is as follows.
database.py
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
models.py
from sqlalchemy import Column, Integer, String
from database import Base
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True, index=True)
name = Column(String)
age = Column(Integer)
schemas.py
from pydantic import BaseModel
class UserBase(BaseModel):
name: str
age: int
class UserCreate(UserBase):
pass
class User(UserBase):
id: int
class Config:
orm_mode = True
crud.py
from sqlalchemy.orm import Session
import schemas
import models
def get_user(db: Session, user_id: int):
return db.query(models.User).filter(models.User.id == user_id).first()
def get_users(db: Session, skip: int = 0, limit: int = 100):
return db.query(models.User).offset(skip).limit(limit).all()
def create_user(db: Session, user: schemas.UserCreate):
db_user = models.User(name=user.name, age=user.age)
db.add(db_user)
db.commit()
db.refresh(db_user)
return db_user
main.py
from typing import List
from fastapi import Depends, FastAPI, HTTPException
from sqlalchemy.orm import Session
import schemas
import models
import crud
from database import SessionLocal, engine
models.Base.metadata.create_all(bind=engine)
app = FastAPI()
# Dependency
def get_db():
try:
db = SessionLocal()
yield db
finally:
db.close()
#app.post("/users/", response_model=schemas.User)
def create_user(user: schemas.UserCreate, db: Session = Depends(get_db)):
return crud.create_user(db=db, user=user)
#app.get("/users/", response_model=List[schemas.User])
def read_users(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
users = crud.get_users(db, skip=skip, limit=limit)
return users
#app.get("/users/{user_id}", response_model=schemas.User)
def read_user(user_id: int, db: Session = Depends(get_db)):
db_user = crud.get_user(db, user_id=user_id)
if db_user is None:
raise HTTPException(status_code=404, detail="User not found")
return db_user
You need to override your get_db dependency in your tests, see these docs.
Something like this for your fixture:
#fixture
def db_fixture() -> Session:
raise NotImplementError() # Make this return your temporary session
#fixture
def client(db_fixture) -> TestClient:
def _get_db_override():
return db_fixture
app.dependency_overrides[get_db] = _get_db_override
return TestClient(app)
I get an error I don't understand when I do session commit after a deletion like this: (in a shell with flask app context or anywhere while running the app)
>>> from app.extensions import db
>>> from app.models.user import User
>>> user = User.query.all()[0]
>>> db.session.delete(user)
>>> db.session.commit()
File
"/Users/hugo/Dropbox/lahey/api/.venv/lib/python3.6/site-packages/sqlalchemy/util/langhelpers.py",
line 962, in module
% (self._il_path, self._il_addtl)) ImportError: importlater.resolve_all() hasn't been called (this is sqlalchemy.orm
strategy_options)
My model for the object I try to delete looks like this:
import datetime
from sqlalchemy_utils.types.password import PasswordType
from sqlalchemy_utils import force_auto_coercion
from app.extensions import db
# Setup coercion of passwords
force_auto_coercion()
class User(db.Model):
id = db.Column(db.Integer, primary_key=True)
email = db.Column(db.String(120), unique=True, nullable=False)
password = db.Column(PasswordType(schemes=['pbkdf2_sha512']), nullable=False)
name = db.Column(db.String(256))
created_at = db.Column(db.DateTime, default=datetime.datetime.now)
updated_at = db.Column(db.DateTime, onupdate=datetime.datetime.now)
Deleting objects of other models works fine. Could this somehow be because I'm using the PasswordType column from sqlalchemy_utils?
If you are using Flask, the docsting says you are wrong configuring the Column:
Lazy configuration of the type with Flask config:
import flask
from sqlalchemy_utils import PasswordType, force_auto_coercion
force_auto_coercion()
class User(db.Model):
__tablename__ = 'user'
password = db.Column(
PasswordType(
# The returned dictionary is forwarded to the CryptContext
onload=lambda **kwargs: dict(
schemes=flask.current_app.config['PASSWORD_SCHEMES'],
**kwargs
),
),
unique=False,
nullable=False,
)
I've worked out what caused this. I've been using package sqlalchemy_bulk_lazy_loader
which had a bug (strategy_options was not imported in a correct way). The issue is now fixed in the package
See full sqlalchemy mail list thread for full details
I want to create a new instance of an SQLAlchemy object, so that fields are filled with default values, but I want to commit that to the database generating an UPDATE to a row that already exists with the same primary key, effectively resetting it to the default values. Is there any simple way to do that?
I have tried to do that and failed, because SQLAlchemy session tracks state of objects. So there is no easy way to make session to track new object as persistent one.
But you want to reset object to default, do you? There is a simple way to do that:
from sqlalchemy.ext.declarative import declarative_base
class Base(object):
def reset(self):
for name, column in self.__class__.__table__.columns.items():
if column.default is not None:
setattr(self, name, column.default.execute())
Base = declarative_base(bind=engine, cls=Base)
This adds reset method to all your model classes.
Here is the complete working example to fiddle with:
import os
from datetime import datetime
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import functions
here = os.path.abspath(os.path.dirname(__file__))
engine = create_engine('sqlite:///%s/db.sqlite' % here, echo=True)
Session = sessionmaker(bind=engine)
class Base(object):
def reset(self):
for name, column in self.__class__.__table__.columns.items():
if column.default is not None:
setattr(self, name, column.default.execute())
Base = declarative_base(bind=engine, cls=Base)
class Thing(Base):
__tablename__ = 'things'
id = Column(Integer, primary_key=True)
value = Column(String(255), default='default')
ts1 = Column(DateTime, default=datetime.now)
ts2 = Column(DateTime, default=functions.now())
def __repr__(self):
return '<Thing(id={0.id!r}, value={0.value!r}, ' \
'ts1={0.ts1!r}, ts2={0.ts2!r})>'.format(self)
if __name__ == '__main__':
Base.metadata.drop_all()
Base.metadata.create_all()
print("---------------------------------------")
print("Create a new thing")
print("---------------------------------------")
session = Session()
thing = Thing(
value='some value',
ts1=datetime(2014, 1, 1),
ts2=datetime(2014, 2, 2),
)
session.add(thing)
session.commit()
session.close()
print("---------------------------------------")
print("Quering it from DB")
print("---------------------------------------")
session = Session()
thing = session.query(Thing).filter(Thing.id == 1).one()
print(thing)
session.close()
print("---------------------------------------")
print("Reset it to default")
print("---------------------------------------")
session = Session()
thing = session.query(Thing).filter(Thing.id == 1).one()
thing.reset()
session.commit()
session.close()
print("---------------------------------------")
print("Quering it from DB")
print("---------------------------------------")
session = Session()
thing = session.query(Thing).filter(Thing.id == 1).one()
print(thing)
session.close()
Is there any simple way to do that?
Upon further consideration, not really. The cleanest way will be to define your defaults in __init__. The constructor is never called when fetching objects from the DB, so it's perfectly safe. You can also use backend functions such as current_timestamp().
class MyObject(Base):
id = Column(sa.Integer, primary_key=True)
column1 = Column(sa.String)
column2 = Column(sa.Integer)
columnN = Column(sa.String)
updated = Column(sa.DateTime)
def __init__(self, **kwargs):
kwargs.setdefault('column1', 'default value')
kwargs.setdefault('column2', 123)
kwargs.setdefault('columnN', None)
kwargs.setdefault('updated', sa.func.current_timestamp())
super(MyObject, self).__init__(**kwargs)
default_obj = MyObject()
default_obj.id = old_id
session.merge(default_obj)
session.commit()