SQLAlchemy session voes in unittest - sqlalchemy

I've just started using SQLAlchemy a few days ago and right now I'm stuck with a problem that I hope anyone can shed some light on before I loose all my hair.
When I run a unittest, see snippet below, only the first test in the sequence is passing. The test testPhysicalPrint works just fine, but testRecordingItem fails with NoResultFound exception - No row was found for one(). But if I remove testPhysicalPrint from the test class, then testRecordingItem works.
I assume that the problem has something to do with the session, but I can't really get a grip of it.
In case anyone wonders, the setup is as follows:
Python 3.1 (Ubuntu 10.04 package)
SQLAlchemy 0.7.2 (easy_install:ed)
PostgreSQL 8.4.8 (Ubuntu 10.04 package)
PsycoPG2 2.4.2 (easy_installed:ed)
Exemple test:
class TestSchema(unittest.TestCase):
test_items = [
# Some parent class products
PrintItem(key='p1', title='Possession', dimension='30x24'),
PrintItem(key='p2', title='Andrzej Żuławski - a director', dimension='22x14'),
DigitalItem(key='d1', title='Every Man His Own University', url='http://www.gutenberg.org/files/36955/36955-h/36955-h.htm'),
DigitalItem(key='d2', title='City Ballads', url='http://www.gutenberg.org/files/36954/36954-h/36954-h.htm'),
]
def testPrintItem(self):
item = self.session.query(PrintItem).filter(PrintItem.key == 'p1').one()
assert item.title == 'Possession', 'Title mismatch'
def testDigitalItem(self):
item2 = self.session.query(DigitalItem).filter(DigitalItem.key == 'd2').one()
assert item2.title == 'City Ballads', 'Title mismatch'
def setUp(self):
Base.metadata.create_all()
self.session = DBSession()
self.session.add_all(self.test_items)
self.session.commit()
def tearDown(self):
self.session.close()
Base.metadata.drop_all()
if __name__ == '__main__':
unittest.main()
UPDATE
Here is the working code snippet.
# -*- coding: utf-8 -*-
import time
import unittest
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
Base = declarative_base()
engine = create_engine('sqlite:///testdb', echo=False)
DBSession = sessionmaker(bind=engine)
class ItemMixin(object):
"""
Commons attributes for items, ie books, DVD:s...
"""
__tablename__ = 'testitems'
__table_args__ = {'extend_existing':True}
id = Column(Integer, autoincrement=True, primary_key=True)
key = Column(Unicode(16), unique=True, nullable=False)
title = Column(UnicodeText, default=None)
item_type = Column(Unicode(20), default=None)
__mapper_args__ = {'polymorphic_on': item_type}
def __init__(self, key, title=None):
self.key = key
self.title = title
class FooItem(Base, ItemMixin):
foo = Column(UnicodeText, default=None)
__mapper_args__ = {'polymorphic_identity':'foo'}
def __init__(self, foo=None, **kwargs):
ItemMixin.__init__(self, **kwargs)
self.foo = foo
class BarItem(Base, ItemMixin):
bar = Column(UnicodeText, default=None)
__mapper_args__ = {'polymorphic_identity':'bar'}
def __init__(self, bar=None, **kwargs):
ItemMixin.__init__(self, **kwargs)
self.bar = bar
# Tests
class TestSchema(unittest.TestCase):
# Class variables
is_setup = False
session = None
metadata = None
test_items = [
FooItem(key='f1', title='Possession', foo='Hello'),
FooItem(key='f2', title='Andrzej Żuławsk', foo='World'),
BarItem(key='b1', title='Wikipedia', bar='World'),
BarItem(key='b2', title='City Ballads', bar='Hello'),
]
def testFooItem(self):
print ('Test Foo Item')
item = self.__class__.session.query(FooItem).filter(FooItem.key == 'f1').first()
assert item.title == 'Possession', 'Title mismatch'
def testBarItem(self):
print ('Test Bar Item')
item = self.__class__.session.query(BarItem).filter(BarItem.key == 'b2').first()
assert item.title == 'City Ballads', 'Title mismatch'
def setUp(self):
if not self.__class__.is_setup:
self.__class__.session = DBSession()
self.metadata = Base.metadata
self.metadata.bind = engine
self.metadata.drop_all() # Drop table
self.metadata.create_all() # Create tables
self.__class__.session.add_all(self.test_items) # Add data
self.__class__.session.commit() # Commit
self.__class__.is_setup = True
def tearDown(self):
if self.__class__.is_setup:
self.__class__.session.close()
# Just for Python >=2.7 or >=3.2
#classmethod
def setUpClass(cls):
pass
#Just for Python >=2.7 or >=3.2
#classmethod
def tearDownClass(cls):
pass
if __name__ == '__main__':
unittest.main()

The most likely reason for this behavior is the fact that that data is not properly cleaned up between the tests. This explains why when you run only one test, it works.
setUp is called before every test, and tearDown - after.
Depending on what you would like to achieve, you have two options:
create data only once for all test.
In this case you if you had Python-2.7+ or Python-3.2+, you could use tearDownClass method. In your case you can handle it with a boolean class variable to prevent the code you have in setUp running more then once.
re-create data before every test
In this case you need to make sure that in the tearDown you delete all the data. This is what you are not doing right now, and I suspect that when the second test is ran, the call to one() fails not because it does not find an object, but because it finds more two objects matching the criteria.
Check the output of this code to understand the call sequence:
import unittest
class TestSchema(unittest.TestCase):
def testOne(self):
print '==testOne'
def testTwo(self):
print '==testTwo'
def setUp(self):
print '>>setUp'
def tearDown(self):
print '<<tearDown'
#classmethod
def setUpClass():
print '>>setUpClass'
#classmethod
def tearDownClass():
print '<<tearDownClass'
if __name__ == '__main__':
unittest.main()
Output:
>>setUp
==testOne
<<tearDown
>>setUp
==testTwo
<<tearDown

I have this as my tearDown method and it does work fine for my tests:
def tearDown (self):
"""Cleans up after each test case."""
sqlalchemy.orm.clear_mappers()

Related

connection in a different thread can't read tables created in another thread?

In a testing suite I have a fixture that drop all the tables in an engine, then start fresh and create all the tables. After this fixture logic, my test case runs, using the newly created table.
The fixture and the test case are run in the MainThread, while the database consumer is a web application server run in another thread.
However, I keep getting: sqlite3.OperationalError: no such table: ***
I've checked that they are using the same in-memory engine, but different connections(this is correct). And I've checked that the fixture does run before the consumer thread starts running.
What could be possible cause?
My code is as below:
import os
import pytest
import cherrypy
class DAL:
def __init__(self,
path="database",
filename=None,
conn_string=None,
echo=False):
if filename is None and conn_string is None:
conn_string = "sqlite:///:memory:"
elif conn_string is not None:
conn_string = conn_string
else:
conn_string = f'sqlite:///{os.path.abspath(path)}/{filename}'
self.conn_string = conn_string
engine = create_engine(conn_string, echo=echo)
Session_Factory = sessionmaker(bind=engine)
self.Session = sqlalchemy.orm.scoped_session(Session_Factory)
def __str__(self):
return f"<DAL>object: {self.conn_string}, at {hex(id(self))}"
def get_a_dbsession(self):
opened_db = self.Session()
return opened_db
def __enter__(self):
return self.get_a_dbsession()
def __exit__(self, exception_type, exception_value, exception_traceback):
opened_db = self.Session()
try:
opened_db.commit()
except:
opened_db.rollback()
raise
finally:
self.Session.remove()
else:
opened_db.close()
raise exception_type
def create_schema(self):
SchemaBase.metadata.create_all(self.Session().connection().engine)
class SAEnginePlugin(cherrypy.process.plugins.SimplePlugin):
def __init__(self, bus, dal):
"""
The plugin is registered to the CherryPy engine.
"""
cherrypy.process.plugins.SimplePlugin.__init__(self, bus)
self.dal = dal
def start(self):
self.bus.subscribe("bind-session", self.bind)
def stop(self):
self.bus.unsubscribe("bind-session", self.bind)
if self.dal:
del self.dal
def bind(self):
"""
Whenever this plugin receives the 'bind-session' message, it applies
this method and bind the received session to the engine.
"""
# self.dal.Session.configure(bind=self.dal.engine)
session = self.dal.get_a_dbsession()
return session
class SATool(cherrypy.Tool):
def __init__(self):
"""
This tool binds a session to the engine each time
a requests starts and commits/rollbacks whenever
the request terminates.
"""
cherrypy.Tool.__init__(self,
'on_start_resource',
self.bind_session,
priority=20)
def _setup(self):
cherrypy.Tool._setup(self)
cherrypy.request.hooks.attach('on_end_resource',
self.close_session,
priority=80)
def bind_session(self):
"""
Attaches a session to the request's scope by requesting
the SA plugin to bind a session to the SA engine.
"""
session = cherrypy.engine.publish('bind-session').pop()
cherrypy.request.db = session
def close_session(self):
"""
Commits the current transaction or rollbacks if an error occurs.
In all cases, the current session is unbound and therefore
not usable any longer.
"""
if not hasattr(cherrypy.request, 'db'):
return
try:
cherrypy.request.db.commit()
except:
cherrypy.request.db.rollback()
raise
finally:
cherrypy.request.db.close()
cherrypy.request.db = None
# Register the SQLAlchemy tool
cherrypy.tools.db = SATool()
class UnitServer:
...
#cherrypy.expose
#cherrypy.tools.json_in()
def list_filtered_entries(self):
...
queryOBJ = cherrypy.request.db.query(classmodel_obj)
...
############# main module code below ############:
# mocking 'db':
dal = database.DAL()
# configure cherrypy:
SAEnginePlugin(cherrypy.engine, dal).subscribe()
#pytest.fixture(autouse=True) # automatically run before every test case
def mocked_dal(request):
# first, clean the database by dropping all tables:
database.SchemaBase.metadata.drop_all(dal.Session().connection().engine)
# second, create the schema from blank:
dal.create_schema()
# third, insert some dummy data record:
...
db.commit()
class TestMyUnitServer(cherrypy.test.helper.CPWebCase):
#staticmethod
def setup_server():
...
server_app = UnitServer()
cherrypy.tree.mount(server_app, '', {'/': {'tools.db.on': True}})
def test_list_filtered_entries_allentries(self):
...
self.getPage('/list_filtered_entries',
headers=[("Accept", "application/json"),
('Content-type', 'application/json'),
('Content-Length',
str(len(json.dumps(query_params)))),
("Connection", "keep-alive"),
("Cache-Control", "max-age=0")],
body=serialized_query_params,
method="POST")
self.assertStatus('200 OK')

How do I write SQLAlchemy test fixtures for FastAPI applications

I am writing a FastAPI application that uses a SQLAlchemy database. I have copied the example from the FastAPI documentation, simplifying the database schema for concisions' sake. The complete source is at the bottom of this post.
This works. I can run it with uvicorn sql_app.main:app and interact with the database via the Swagger docs. When it runs it creates a test.db in the working directory.
Now I want to add a unit test. Something like this.
from fastapi import status
from fastapi.testclient import TestClient
from pytest import fixture
from main import app
#fixture
def client() -> TestClient:
return TestClient(app)
def test_fast_sql(client: TestClient):
response = client.get("/users/")
assert response.status_code == status.HTTP_200_OK
assert response.json() == []
Using the source code below, this takes the test.db in the working directory as the database. Instead I want to create a new database for every unit test that is deleted at the end of the test.
I could put the global database.engine and database.SessionLocal inside an object that is created at runtime, like so:
class UserDatabase:
def __init__(self, directory: Path):
directory.mkdir(exist_ok=True, parents=True)
sqlalchemy_database_url = f"sqlite:///{directory}/store.db"
self.engine = create_engine(
sqlalchemy_database_url, connect_args={"check_same_thread": False}
)
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
models.Base.metadata.create_all(bind=self.engine)
but I don't know how to make that work with main.get_db, since the Depends(get_db) logic ultimately assumes database.engine and database.SessionLocal are available globally.
I'm used to working with Flask, whose unit testing facilities handle all this for you. I don't know how to write it myself. Can someone show me the minimal changes I'd have to make in order to generate a new database for each unit test in this framework?
The complete source of the simplified FastAPI/SQLAlchemy app is as follows.
database.py
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
models.py
from sqlalchemy import Column, Integer, String
from database import Base
class User(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True, index=True)
name = Column(String)
age = Column(Integer)
schemas.py
from pydantic import BaseModel
class UserBase(BaseModel):
name: str
age: int
class UserCreate(UserBase):
pass
class User(UserBase):
id: int
class Config:
orm_mode = True
crud.py
from sqlalchemy.orm import Session
import schemas
import models
def get_user(db: Session, user_id: int):
return db.query(models.User).filter(models.User.id == user_id).first()
def get_users(db: Session, skip: int = 0, limit: int = 100):
return db.query(models.User).offset(skip).limit(limit).all()
def create_user(db: Session, user: schemas.UserCreate):
db_user = models.User(name=user.name, age=user.age)
db.add(db_user)
db.commit()
db.refresh(db_user)
return db_user
main.py
from typing import List
from fastapi import Depends, FastAPI, HTTPException
from sqlalchemy.orm import Session
import schemas
import models
import crud
from database import SessionLocal, engine
models.Base.metadata.create_all(bind=engine)
app = FastAPI()
# Dependency
def get_db():
try:
db = SessionLocal()
yield db
finally:
db.close()
#app.post("/users/", response_model=schemas.User)
def create_user(user: schemas.UserCreate, db: Session = Depends(get_db)):
return crud.create_user(db=db, user=user)
#app.get("/users/", response_model=List[schemas.User])
def read_users(skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
users = crud.get_users(db, skip=skip, limit=limit)
return users
#app.get("/users/{user_id}", response_model=schemas.User)
def read_user(user_id: int, db: Session = Depends(get_db)):
db_user = crud.get_user(db, user_id=user_id)
if db_user is None:
raise HTTPException(status_code=404, detail="User not found")
return db_user
You need to override your get_db dependency in your tests, see these docs.
Something like this for your fixture:
#fixture
def db_fixture() -> Session:
raise NotImplementError() # Make this return your temporary session
#fixture
def client(db_fixture) -> TestClient:
def _get_db_override():
return db_fixture
app.dependency_overrides[get_db] = _get_db_override
return TestClient(app)

ID collision with Graphene-SQLAlchemy Interface class plus Node Interface

I've written Graphene models for polymorphic entities represented in my database w/SQLalchemy.
The problem is simple:
I want to create an interface that reflects my SQLAlchemy models for Graphene but also either a) implements Node or b) does not conflict with Node and allows me to retrieve the model's ID without needing to add ... on Node {id} to the query string.
have to exclude the ID field from my ORM-based interface or field conflicts with the Node interface, by doing so in order to get the ID then you need to add ...on Node { id }, which is ugly.
I created an SQLAlchemyInterface object that extends graphene.Interface. Many (but not all) of my models used this as well as Node as interfaces. The first problem was that this contains an ID field and it conflicted with the Node interface.
I excluded the id field to not interfere with Node, but then found I could not directly query ID on my models anymore, and had to add ... on Node {id} to the query string.
I then decided to have this SQLAlchemyInterface extend Node. I don't love this approach because I need to use another (named) Node interface for all of my models that don't necessarily need to implement SQLAlchemyInterface
class SQLAlchemyInterface(Node):
#classmethod
def __init_subclass_with_meta__(
cls,
model=None,
registry=None,
only_fields=(),
exclude_fields=(),
connection_field_factory=default_connection_field_factory,
_meta=None,
**options
):
_meta = SQLAlchemyInterfaceOptions(cls)
_meta.name = f'{cls.__name__}Node'
autoexclude_columns = exclude_autogenerated_sqla_columns(model=model)
exclude_fields += autoexclude_columns
assert is_mapped_class(model), (
"You need to pass a valid SQLAlchemy Model in " '{}.Meta, received "{}".'
).format(cls.__name__, model)
if not registry:
registry = get_global_registry()
assert isinstance(registry, Registry), (
"The attribute registry in {} needs to be an instance of "
'Registry, received "{}".'
).format(cls.__name__, registry)
sqla_fields = yank_fields_from_attrs(
construct_fields(
model=model,
registry=registry,
only_fields=only_fields,
exclude_fields=exclude_fields,
connection_field_factory=connection_field_factory
),
_as=Field
)
if not _meta:
_meta = SQLAlchemyInterfaceOptions(cls)
_meta.model = model
_meta.registry = registry
connection = Connection.create_type(
"{}Connection".format(cls.__name__), node=cls)
assert issubclass(connection, Connection), (
"The connection must be a Connection. Received {}"
).format(connection.__name__)
_meta.connection = connection
if _meta.fields:
_meta.fields.update(sqla_fields)
else:
_meta.fields = sqla_fields
super(SQLAlchemyInterface, cls).__init_subclass_with_meta__(_meta=_meta, **options)
#classmethod
def Field(cls, *args, **kwargs): # noqa: N802
return NodeField(cls, *args, **kwargs)
#classmethod
def node_resolver(cls, only_type, root, info, id):
return cls.get_node_from_global_id(info, id, only_type=only_type)
#classmethod
def get_node_from_global_id(cls, info, global_id, only_type=None):
try:
node: DeclarativeMeta = one_or_none(session=info.context.get('session'), model=cls._meta.model, id=global_id)
return node
except Exception:
return None
#staticmethod
def from_global_id(global_id):
return global_id
#staticmethod
def to_global_id(type, id):
return id
Interface impls, Models + Query code examples:
class CustomNode(Node):
class Meta:
name = 'UuidNode'
#staticmethod
def to_global_id(type, id):
return '{}:{}'.format(type, id)
#staticmethod
def get_node_from_global_id(info, global_id, only_type=None):
type, id = global_id.split(':')
if only_type:
# We assure that the node type that we want to retrieve
# is the same that was indicated in the field type
assert type == only_type._meta.name, 'Received not compatible node.'
if type == 'User':
return one_or_none(session=info.context.get('session'), model=User, id=global_id)
elif type == 'Well':
return one_or_none(session=info.context.get('session'), model=Well, id=global_id)
class ControlledVocabulary(SQLAlchemyInterface):
class Meta:
name = 'ControlledVocabularyNode'
model = BaseControlledVocabulary
class TrackedEntity(SQLAlchemyInterface):
class Meta:
name = 'TrackedEntityNode'
model = TrackedEntityModel
class Request(SQLAlchemyObjectType):
"""Request node."""
class Meta:
model = RequestModel
interfaces = (TrackedEntity,)
class User(SQLAlchemyObjectType):
"""User Node"""
class Meta:
model = UserModel
interfaces = (CustomNode,)
class CvFormFieldValueType(SQLAlchemyObjectType):
class Meta:
model = CvFormFieldValueTypeModel
interfaces = (ControlledVocabulary,)
common_field_kwargs = {'id': graphene.UUID(required=False), 'label': graphene.String(required=False)}
class Query(graphene.ObjectType):
"""Query objects for GraphQL API."""
node = CustomNode.Field()
te_node = TrackedEntity.Field()
cv_node = ControlledVocabulary.Field()
# Non-Tracked Entities:
users: List[User] = SQLAlchemyConnectionField(User)
# Generic Query for any Tracked Entity:
tracked_entities: List[TrackedEntity] = FilteredConnectionField(TrackedEntity, sort=None, filter=graphene.Argument(TrackedEntityInput))
# Generic Query for any Controlled Vocabulary:
cv: ControlledVocabulary = graphene.Field(ControlledVocabulary, controlled_vocabulary_type_id=graphene.UUID(required=False),
base_entry_key=graphene.String(required=False),
**common_field_kwargs)
cvs: List[ControlledVocabulary] = FilteredConnectionField(ControlledVocabulary, sort=None, filter=graphene.Argument(CvInput))
#staticmethod
def resolve_with_filters(info: ResolveInfo, model: Type[SQLAlchemyObjectType], **kwargs):
query = model.get_query(info)
log.debug(kwargs)
for filter_name, filter_value in kwargs.items():
model_filter_column = getattr(model._meta.model, filter_name, None)
log.debug(type(filter_value))
if not model_filter_column:
continue
if isinstance(filter_value, SQLAlchemyInputObjectType):
log.debug(True)
filter_model = filter_value.sqla_model
q = FilteredConnectionField.get_query(filter_model, info, sort=None, **kwargs)
# noinspection PyArgumentList
query = query.filter(model_filter_column == q.filter_by(**filter_value))
log.info(query)
else:
query = query.filter(model_filter_column == filter_value)
return query
def resolve_tracked_entity(self, info: ResolveInfo, **kwargs):
entity: TrackedEntity = Query.resolve_with_filters(info=info, model=BaseTrackedEntity, **kwargs).one()
return entity
def resolve_tracked_entities(self, info, **kwargs):
query = Query.resolve_with_filters(info=info, model=BaseTrackedEntity, **kwargs)
tes: List[BaseTrackedEntity] = query.all()
return tes
def resolve_cv(self, info, **kwargs):
cv: List[BaseControlledVocabulary] = Query.resolve_with_filters(info=info, model=BaseControlledVocabulary, **kwargs).one()
log.info(cv)
return cv
def resolve_cvs(self, info, **kwargs):
cv: List[BaseControlledVocabulary] = Query.resolve_with_filters(info=info, model=BaseControlledVocabulary, **kwargs).all()
return cv
schema:
schema = Schema(query=Query, types=[*tracked_members, *cv_members])
I would like to be able to not extend Node with SQLAlchemyInterface and rather add Node back to the list of interfaces for TrackedEntity and ControlledVocabulary but be able to perform a query like this:
query queryTracked {
trackedEntities{
id
(other fields)
... on Request {
(request specific fields)
}
}

SQLAlchemy: replacing object with a new one, following defaults

I want to create a new instance of an SQLAlchemy object, so that fields are filled with default values, but I want to commit that to the database generating an UPDATE to a row that already exists with the same primary key, effectively resetting it to the default values. Is there any simple way to do that?
I have tried to do that and failed, because SQLAlchemy session tracks state of objects. So there is no easy way to make session to track new object as persistent one.
But you want to reset object to default, do you? There is a simple way to do that:
from sqlalchemy.ext.declarative import declarative_base
class Base(object):
def reset(self):
for name, column in self.__class__.__table__.columns.items():
if column.default is not None:
setattr(self, name, column.default.execute())
Base = declarative_base(bind=engine, cls=Base)
This adds reset method to all your model classes.
Here is the complete working example to fiddle with:
import os
from datetime import datetime
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import functions
here = os.path.abspath(os.path.dirname(__file__))
engine = create_engine('sqlite:///%s/db.sqlite' % here, echo=True)
Session = sessionmaker(bind=engine)
class Base(object):
def reset(self):
for name, column in self.__class__.__table__.columns.items():
if column.default is not None:
setattr(self, name, column.default.execute())
Base = declarative_base(bind=engine, cls=Base)
class Thing(Base):
__tablename__ = 'things'
id = Column(Integer, primary_key=True)
value = Column(String(255), default='default')
ts1 = Column(DateTime, default=datetime.now)
ts2 = Column(DateTime, default=functions.now())
def __repr__(self):
return '<Thing(id={0.id!r}, value={0.value!r}, ' \
'ts1={0.ts1!r}, ts2={0.ts2!r})>'.format(self)
if __name__ == '__main__':
Base.metadata.drop_all()
Base.metadata.create_all()
print("---------------------------------------")
print("Create a new thing")
print("---------------------------------------")
session = Session()
thing = Thing(
value='some value',
ts1=datetime(2014, 1, 1),
ts2=datetime(2014, 2, 2),
)
session.add(thing)
session.commit()
session.close()
print("---------------------------------------")
print("Quering it from DB")
print("---------------------------------------")
session = Session()
thing = session.query(Thing).filter(Thing.id == 1).one()
print(thing)
session.close()
print("---------------------------------------")
print("Reset it to default")
print("---------------------------------------")
session = Session()
thing = session.query(Thing).filter(Thing.id == 1).one()
thing.reset()
session.commit()
session.close()
print("---------------------------------------")
print("Quering it from DB")
print("---------------------------------------")
session = Session()
thing = session.query(Thing).filter(Thing.id == 1).one()
print(thing)
session.close()
Is there any simple way to do that?
Upon further consideration, not really. The cleanest way will be to define your defaults in __init__. The constructor is never called when fetching objects from the DB, so it's perfectly safe. You can also use backend functions such as current_timestamp().
class MyObject(Base):
id = Column(sa.Integer, primary_key=True)
column1 = Column(sa.String)
column2 = Column(sa.Integer)
columnN = Column(sa.String)
updated = Column(sa.DateTime)
def __init__(self, **kwargs):
kwargs.setdefault('column1', 'default value')
kwargs.setdefault('column2', 123)
kwargs.setdefault('columnN', None)
kwargs.setdefault('updated', sa.func.current_timestamp())
super(MyObject, self).__init__(**kwargs)
default_obj = MyObject()
default_obj.id = old_id
session.merge(default_obj)
session.commit()

Writing items to a MySQL database in Scrapy

I am new to Scrapy, I had the spider code
class Example_spider(BaseSpider):
name = "example"
allowed_domains = ["www.example.com"]
def start_requests(self):
yield self.make_requests_from_url("http://www.example.com/bookstore/new")
def parse(self, response):
hxs = HtmlXPathSelector(response)
urls = hxs.select('//div[#class="bookListingBookTitle"]/a/#href').extract()
for i in urls:
yield Request(urljoin("http://www.example.com/", i[1:]), callback=self.parse_url)
def parse_url(self, response):
hxs = HtmlXPathSelector(response)
main = hxs.select('//div[#id="bookshelf-bg"]')
items = []
for i in main:
item = Exampleitem()
item['book_name'] = i.select('div[#class="slickwrap full"]/div[#id="bookstore_detail"]/div[#class="book_listing clearfix"]/div[#class="bookstore_right"]/div[#class="title_and_byline"]/p[#class="book_title"]/text()')[0].extract()
item['price'] = i.select('div[#id="book-sidebar-modules"]/div[#class="add_to_cart_wrapper slickshadow"]/div[#class="panes"]/div[#class="pane clearfix"]/div[#class="inner"]/div[#class="add_to_cart 0"]/form/div[#class="line-item"]/div[#class="line-item-price"]/text()').extract()
items.append(item)
return items
And pipeline code is:
class examplePipeline(object):
def __init__(self):
self.dbpool = adbapi.ConnectionPool('MySQLdb',
db='blurb',
user='root',
passwd='redhat',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, spider, item):
# run db query in thread pool
assert isinstance(item, Exampleitem)
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
print "db connected-=========>"
# create record if doesn't exist.
tx.execute("select * from example_book_store where book_name = %s", (item['book_name']) )
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute("""INSERT INTO example_book_store (book_name,price)
VALUES (%s,%s)""",
(item['book_name'],item['price'])
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
After running this I am getting the following error
exceptions.NameError: global name 'Exampleitem' is not defined
I got the above error when I added the below code in process_item method
assert isinstance(item, Exampleitem)
and without adding this line I am getting
**exceptions.TypeError: 'Example_spider' object is not subscriptable
Can anyone make this code run and make sure that all the items saved into database?
Try the following code in your pipeline
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
class MySQLStorePipeline(object):
def __init__(self):
self.conn = MySQLdb.connect('host', 'user', 'passwd',
'dbname', charset="utf8",
use_unicode=True)
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
try:
self.cursor.execute("""INSERT INTO example_book_store (book_name, price)
VALUES (%s, %s)""",
(item['book_name'].encode('utf-8'),
item['price'].encode('utf-8')))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item
Your process_item method should be declared as: def process_item(self, item, spider): instead of def process_item(self, spider, item): -> you switched the arguments around.
This exception: exceptions.NameError: global name 'Exampleitem' is not defined indicates you didn't import the Exampleitem in your pipeline.
Try adding: from myspiders.myitems import Exampleitem (with correct names/paths ofcourse).
I think this way is better and more concise:
#Item
class pictureItem(scrapy.Item):
topic_id=scrapy.Field()
url=scrapy.Field()
#SQL
self.save_picture="insert into picture(`url`,`id`) values(%(url)s,%(id)s);"
#usage
cur.execute(self.save_picture,dict(item))
It's just like
cur.execute("insert into picture(`url`,`id`) values(%(url)s,%(id)s)" % {"url":someurl,"id":1})
Cause (you can read more about Items in Scrapy)
The Field class is just an alias to the built-in dict class and doesn’t provide any extra functionality or attributes. In other words, Field objects are plain-old Python dicts.