Alembic autogenerate empty migration file - sqlalchemy

I'm trying to connect the alembic library to the databases and sqlalchemy libraries. As a guide, I use this example link
My projects file:
db.py
from databases import Database
from sqlalchemy import MetaData, create_engine
DATABASE_URL = "postgresql://....#localhost:5432/db"
engine = create_engine(DATABASE_URL)
metadata = MetaData()
database = Database(DATABASE_URL)
models.py
from sqlalchemy import Table, Column, Integer, String, DateTime
from sqlalchemy.sql import func
from db import metadata
notes = Table(
"notes",
metadata,
Column("id", Integer, primary_key=True),
Column("title", String(50)),
Column("description", String(50)),
Column("created_date", DateTime, default=func.now(), nullable=False),
)
env.py (alembic settings)
from db import DATABASE_URL, metadata
....
#add new
target_metadata = metadata
...
#change
def run_migrations_online():
config.set_main_option('sqlalchemy.url', str(DATABASE_URL))
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
When I run
alembic revision --autogenerate -m 'Add notest table'
the new file at migrations/versions this context is created
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
I suppose it may be related to the use of the target_metadata = metadata variable. It seems to be all according to instructions, but the migrations do not work as expected.

If anyone has a similar problem. All you have to do is import the tables from models.py into the env.py file before metadata object.
env.py
...
from models.notes import notes
from db import DATABASE_URL, metadata
...

Related

Alembic attempts to recreate all tables in the Base class on every migration

In my env.py I have set my target_metadata to Base.metadata which I import from models.py. I have a fresh database with a schema named basic that I want to use to create the tables and setup my models.py like this:
from datetime import datetime
from sqlalchemy import Column, DateTime, Integer, MetaData, String
from sqlalchemy.orm import declarative_base
Base = declarative_base(metadata=MetaData(schema='basic'))
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
created_on = Column(DateTime, default=datetime.utcnow)
I run alembic revision --autogenerate -m"Create user model" and run alembic upgrade heads. Everything works as expected and I have table user in my database under the schema basic.
Now I want to add a table country. I add it to my models.py which now looks like this:
from datetime import datetime
from sqlalchemy import Column, DateTime, Integer, MetaData, String
from sqlalchemy.orm import declarative_base
Base = declarative_base(metadata=MetaData(schema='basic'))
class User(Base):
__tablename__ = 'user'
id = Column(Integer, primary_key=True)
name = Column(String, nullable=False)
created_on = Column(DateTime, default=datetime.utcnow)
class Country(Base):
__tablename__ = 'country'
id = Column(Integer, primary_key=True)
country = Column(String, nullable=False)
created_on = Column(DateTime, default=datetime.utcnow)
I run alembic revision --autogenerate -m"Create country model" which creates a new versions file that looks like this:
"""Create country model
Revision ID: 0eef32919b0d
Revises: 2da4668d1069
Create Date: 2023-01-19 15:39:08.778274
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '0eef32919b0d'
down_revision = '2da4668d1069'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('country',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('country', sa.String(), nullable=False),
sa.Column('created_on', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id'),
schema='basic'
)
op.create_table('user',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(), nullable=False),
sa.Column('created_on', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id'),
schema='basic'
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('user', schema='basic')
op.drop_table('country', schema='basic')
# ### end Alembic commands ###
Why does it also try to create the table user again? Running this will give an error that the object basic.user already exists. How can I fix this so that it looks at the current state of the db and only wants to create the table country?
Setting the option include_schemas=True (which is suggested in this thread: Alembic - sqlalchemy does not detect existing tables) helps but then includes all schemas and I only want it to be aware of this single schema.
I only want it to be aware of this single schema.
Then you also need to use include_name=, like so:
def run_migrations_online():
# …
def include_name(name, type_, parent_names):
if type_ == "schema":
# note this will not include the default schema
return name in ["basic"]
else:
return True
with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata,
include_schemas=True,
include_name=include_name
)

How do I write SQLAlchemy test fixtures for FastAPI applications

I am writing a FastAPI application that uses a SQLAlchemy database. I have copied the example from the FastAPI documentation, simplifying the database schema for concisions' sake. The complete source is at the bottom of this post.
This works. I can run it with uvicorn sql_app.main:app and interact with the database via the Swagger docs. When it runs it creates a test.db in the working directory.
Now I want to add a unit test. Something like this.
from fastapi import status
from fastapi.testclient import TestClient
from pytest import fixture
from main import app
#fixture
def client() -> TestClient:
return TestClient(app)
def test_fast_sql(client: TestClient):
response = client.get("/users/")
assert response.status_code == status.HTTP_200_OK
assert response.json() == []
Using the source code below, this takes the test.db in the working directory as the database. Instead I want to create a new database for every unit test that is deleted at the end of the test.
I could put the global database.engine and database.SessionLocal inside an object that is created at runtime, like so:
class UserDatabase:
def __init__(self, directory: Path):
directory.mkdir(exist_ok=True, parents=True)
sqlalchemy_database_url = f"sqlite:///{directory}/store.db"
self.engine = create_engine(
sqlalchemy_database_url, connect_args={"check_same_thread": False}
)
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
models.Base.metadata.create_all(bind=self.engine)
but I don't know how to make that work with main.get_db, since the Depends(get_db) logic ultimately assumes database.engine and database.SessionLocal are available globally.
I'm used to working with Flask, whose unit testing facilities handle all this for you. I don't know how to write it myself. Can someone show me the minimal changes I'd have to make in order to generate a new database for each unit test in this framework?
The complete source of the simplified FastAPI/SQLAlchemy app is as follows.
database.py
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
models.py
from sqlalchemy import Column, Integer, String
from database import Base
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True, index=True)
name = Column(String)
age = Column(Integer)
schemas.py
from pydantic import BaseModel
class UserBase(BaseModel):
name: str
age: int
class UserCreate(UserBase):
pass
class User(UserBase):
id: int
class Config:
orm_mode = True
crud.py
from sqlalchemy.orm import Session
import schemas
import models
def get_user(db: Session, user_id: int):
return db.query(models.User).filter(models.User.id == user_id).first()
def get_users(db: Session, skip: int = 0, limit: int = 100):
return db.query(models.User).offset(skip).limit(limit).all()
def create_user(db: Session, user: schemas.UserCreate):
db_user = models.User(name=user.name, age=user.age)
db.add(db_user)
db.commit()
db.refresh(db_user)
return db_user
main.py
from typing import List
from fastapi import Depends, FastAPI, HTTPException
from sqlalchemy.orm import Session
import schemas
import models
import crud
from database import SessionLocal, engine
models.Base.metadata.create_all(bind=engine)
app = FastAPI()
# Dependency
def get_db():
try:
db = SessionLocal()
yield db
finally:
db.close()
#app.post("/users/", response_model=schemas.User)
def create_user(user: schemas.UserCreate, db: Session = Depends(get_db)):
return crud.create_user(db=db, user=user)
#app.get("/users/", response_model=List[schemas.User])
def read_users(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
users = crud.get_users(db, skip=skip, limit=limit)
return users
#app.get("/users/{user_id}", response_model=schemas.User)
def read_user(user_id: int, db: Session = Depends(get_db)):
db_user = crud.get_user(db, user_id=user_id)
if db_user is None:
raise HTTPException(status_code=404, detail="User not found")
return db_user
You need to override your get_db dependency in your tests, see these docs.
Something like this for your fixture:
#fixture
def db_fixture() -> Session:
raise NotImplementError() # Make this return your temporary session
#fixture
def client(db_fixture) -> TestClient:
def _get_db_override():
return db_fixture
app.dependency_overrides[get_db] = _get_db_override
return TestClient(app)

Multiprocessing, sqlAlchemy and scoped_sessions

I want to run multiple strategies in concurrent processes. I came up with something like this
import logging
import multiprocessing
import os
from sqlalchemy.orm import scoped_session, Session
from pyutil.sql.interfaces.symbols.symbol import Symbol
from pyutil.sql.session import get_one_or_create
class StratRunner(object):
def __init__(self, session_scope, logger=None):
assert isinstance(session_scope, scoped_session)
self.__session_scope = session_scope
self.__logger = logger or logging.getLogger(__name__)
# this function is the target for mp.Process
def _run(self, strategy):
self.__logger.debug("Pid {pid}".format(pid=os.getpid()))
symbols = self.symbols
self.__logger.info("Run strategy {s}".format(s=strategy))
configuration = strategy.configuration()
strategy.upsert(portfolio=configuration.portfolio, symbols=symbols, days=5)
def run_strategies(self):
# loop over all active strategies!
jobs = []
# we are in the main thread here...
for s in self.active_strategies:
# what shall I give to the Process? The strategy object, the strategy_id, a session instance, the session_scope...
job = multiprocessing.Process(target=self._run, kwargs={"strategy": s})
job.name = s.name
jobs.append(job)
run_jobs(jobs, logger=self.__logger)
#property
def symbols(self):
return {s.name: s for s in self.__session_scope().query(Symbol)}
#property
def active_strategies(self):
return self.__session_scope().query(Strategy).filter(Strategy.active == True).all()
I am aware of tons of documentation on this project but I am overwhelmed.
I loop over the rows of a table (The active_strategies). class Strategies(Base)... . I then hand over the strategy object to the _run method and update the strategy object within the very same method. Please feel free to shred my code.
I am in particular puzzled about what to give to the _run method? Shall I hand over the strategy object, the strategy ID, the session, the scoped_session, ... ?
I have now created a runner object:
import abc
import logging
import os
from sqlalchemy.orm import sessionmaker
class Runner(object):
__metaclass__ = abc.ABCMeta
def __init__(self, engine, logger=None):
self.__engine = engine
self._logger = logger or logging.getLogger(__name__)
self.__jobs = []
#property
def _session(self):
""" Create a fresh new session... """
self.__engine.dispose()
factory = sessionmaker(self.__engine)
return factory()
def _run_jobs(self):
self._logger.debug("PID main {pid}".format(pid=os.getpid()))
for job in self.jobs:
# all jobs get the trigge
self._logger.info("Job {j}".format(j=job.name))
job.start()
for job in self.jobs:
self._logger.info("Wait for job {j}".format(j=job.name))
job.join()
self._logger.info("Job {j} done".format(j=job.name))
#property
def jobs(self):
return self.__jobs
#abc.abstractmethod
def run(self):
""" Described in the child class """
In particular this class can provide a fresh session (via ._session). However, using this setup I see plenty of :
psycopg2.OperationalError: server closed the connection unexpectedly
| This probably means the server terminated abnormally
| before or while processing the request.

Django migrations changing choices value

A field has been added to one of my MySQL table previously:
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2017-09-14 00:49
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('my_app', '0102_previous_migrations'),
]
operations = [
migrations.AddField(
model_name='my_client',
name='my_team',
field=models.CharField(choices=[('Unassigned', 'Unassigned'), ('ACT', 'ACT'), ('Korea', 'Korea'), ('National', 'National')], default='Unassigned', max_length=255, verbose_name='My Team'),
),
]
So users have a choices of the above selection in my UI and will be saved into the table my_client:
Unassigned
ACT
Korea
National
The changes have been deployed and now a beginner like me would like to change, i.e. remove Korea and add 2 new choices: NSW and SA
How would I go about this? Do I need another migration or I will just need to change those choices in the models?
I use this in my model now like this:
class Client(MyAppModel):
TEAM_CHOICES = (
('Unassigned', 'Unassigned'),
('ACT', 'ACT'),
('Korea', 'Korea'),
('National', 'National'),
)
DEFAULT_TEAM = 'Unassigned'
my_team = models.CharField(verbose_name='MyTeam', null=False, max_length=255, choices=TEAM_CHOICES, default=DEFAULT_TEAM)
Update:
Base on the comment I have, I will need a migration too, a AlterField?
Do I also have to update all the existing value right now in the table my_client if any of them is having Korea to say Unassigned if I would like to remove Korea as a choice? What command can I use in the migration?
A high-level approach:
Create a migration which only adds the new choice. Write a method to take all models with the old choice, and give them a proper new choice migration.RunPython. Create a migration which removes the old choice.
Figure this out later, hope it helps others beginner like me.
I have to update the Client model
class Client(MyAppModel):
TEAM_CHOICES = (
('Unassigned', 'Unassigned'),
('National', 'National'),
('NSW', 'NSW'),
('SA', 'SA'),
)
Then enter this command:
python manage.py makemigrations
A migration will be generated:
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('my_app', '0102_previous_migrations'),
]
operations = [
migrations.AlterField(
model_name='my_client',
name='my_team',
field=models.CharField(choices=[('Unassigned', 'Unassigned'), ('ACT', 'ACT'), ('National', 'National'), ('NSW', 'NSW'), ('SA', 'SA')], default='Unassigned', max_length=255, verbose_name='My Team'),
),
]
Next to update all the existing values in the table now, referred here, enter this command to create an empty migration file:
python manage.py makemigrations --empty my_app
In the new migration file, put something like:
# -*- coding: utf-8 -*-
# Generated by Django 1.9.7 on 2017-10-26 06:36
from __future__ import unicode_literals
from django.db import migrations
def set_unassigned(apps, schema_editor):
Client = apps.get_model('my_app', 'my_client')
for client in Client.objects.all():
client.account_team = 'Unassigned'
client.save()
class Migration(migrations.Migration):
dependencies = [
('my_app', '0104_client_team_update'),
]
operations = [
migrations.RunPython(set_unassigned),
]

Scrapy / Pipeline not inserting data to MySQL database

I'm making a pipeline in scrapy to store scraped data in a mysql database. When the spider is run in terminal it works perfectly. Even the pipeline is opened. However the data is not being sent to the database. Any help appreciated! :)
here's the pipeline code:
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
from tutorial.items import TutorialItem
class MySQLTest(object):
def __init__(self):
db = MySQLdb.connect(user='root', passwd='', host='localhost', db='python')
cursor = db.cursor()
def process_item(self, spider, item):
try:
cursor.execute("INSERT INTO info (venue, datez) VALUES (%s, %s)", (item['artist'], item['date']))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item
and heres the spider code
import scrapy # Import required libraries.
from scrapy.selector import HtmlXPathSelector # Allows for path detection in a websites code.
from scrapy.spider import BaseSpider # Used to create a simple spider to extract data.
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor # Needed for the extraction of href links in HTML to crawl further pages.
from scrapy.contrib.spiders import CrawlSpider # Needed to make the crawl spider.
from scrapy.contrib.spiders import Rule # Allows specified rules to affect what the link
import spotipy
import soundcloud
import mysql.connector
from tutorial.items import TutorialItem
class AllGigsSpider(CrawlSpider):
name = "allGigs" # Name of the Spider. In command promt, when in the correct folder, enter "scrapy crawl Allgigs".
allowed_domains = ["www.allgigs.co.uk"] # Allowed domains is a String NOT a URL.
start_urls = [
"http://www.allgigs.co.uk/whats_on/London/clubbing-1.html",
"http://www.allgigs.co.uk/whats_on/London/festivals-1.html",
"http://www.allgigs.co.uk/whats_on/London/comedy-1.html",
"http://www.allgigs.co.uk/whats_on/London/theatre_and_opera-1.html",
"http://www.allgigs.co.uk/whats_on/London/dance_and_ballet-1.html"
] # Specify the starting points for the web crawler.
rules = [
Rule(SgmlLinkExtractor(restrict_xpaths='//div[#class="more"]'), # Search the start URL's for
callback="parse_me",
follow=True),
]
def parse_me(self, response):
for info in response.xpath('//div[#class="entry vevent"]|//div[#class="resultbox"]'):
item = TutorialItem() # Extract items from the items folder.
item ['artist'] = info.xpath('.//span[#class="summary"]//text()').extract() # Extract artist information.
item ['date'] = info.xpath('.//span[#class="dates"]//text()').extract() # Extract date information.
#item ['endDate'] = info.xpath('.//abbr[#class="dtend"]//text()').extract() # Extract end date information.
#item ['startDate'] = info.xpath('.//abbr[#class="dtstart"]//text()').extract() # Extract start date information.
item ['genre'] = info.xpath('.//div[#class="header"]//text()').extract()
yield item # Retreive items in item.
client = soundcloud.Client(client_id='401c04a7271e93baee8633483510e263')
tracks = client.get('/tracks', limit=1, license='cc-by-sa', q= item['artist'])
for track in tracks:
print(tracks)
I believe the problem was in my settings.py file where i had missed a comma... yawn.
ITEM_PIPELINES = {
'tutorial.pipelines.MySQLTest': 300,
}