sqlalchemy.exc.NoInspectionAvailable: No inspection system is available for object of type <class 'sqlalchemy.ext.asyncio.engine.AsyncEngine'> - sqlalchemy

I followed this example but with aiosqlite: https://docs.sqlalchemy.org/en/14/tutorial/metadata.html#table-reflection
like:
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy import MetaData
from sqlalchemy import Table, Column, Integer, String
metadata = MetaData()
engine = create_async_engine(
"sqlite+aiosqlite:////Users/rui/database.db",
echo=True,
future=True,
)
# Table Reflection (Loading existing table)
users = Table("users", metadata, autoload_with=engine)
Then I'm having this error:
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/util/deprecations.py", line 298, in warned
return fn(*args, **kwargs)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 597, in __new__
metadata._remove_table(name, schema)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py", line 70, in __exit__
compat.raise_(
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/util/compat.py", line 211, in raise_
raise exception
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 592, in __new__
table._init(name, metadata, *args, **kw)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 667, in _init
self._autoload(
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 700, in _autoload
insp = inspection.inspect(autoload_with)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/inspection.py", line 71, in inspect
raise exc.NoInspectionAvailable(
sqlalchemy.exc.NoInspectionAvailable: No inspection system is available for object of type <class 'sqlalchemy.ext.asyncio.engine.AsyncEngine'>
python-BaseException
Process finished with exit code 1
I'm using Python 3.9.2, sqlalchemy==1.4.5 and aiosqlite==0.17.0
Should I replace aiosqlite by another one? Or should I wait until asyncio will be improved in sqlalchemy?

Related

getting error while writing data onto cloud bigTable through dataflow

I am using 2nd gen cloud function to trigger dataflow job. Dataflow template is basically reading parquet files from cloud storage and loading data onto bigTable.
Here are the code and package details
import os
import datetime
import logging
from configparser import ConfigParser
import apache_beam as beam
from google.cloud.bigtable import Client
from google.cloud.bigtable.row import DirectRow
from apache_beam.options.pipeline_options import PipelineOptions
from google.cloud import bigtable
from google.cloud.bigtable import column_family
from google.cloud.bigtable import row_filters
from apache_beam.io.gcp.bigtableio import WriteToBigTable
logger = logging.getLogger()
logger.setLevel(logging.INFO)
config_object = ConfigParser()
config_object.read("config.ini")
project_id = config_object["uprn"]["project_id"]
instance_id = config_object["uprn"]["instance_id"]
table_id = config_object["uprn"]["table_id"]
column_family_id = config_object["uprn"]["column_family_id"]
#input_columns = config_object["uprn"]["input_columns"]
timestamp = datetime.datetime(1970, 1, 1)
logging.info("--Starting..")
#client = bigtable.Client(project=project_id, admin=True)
#instance = client.instance(instance_id)
#table = instance.table(table_id)
def big_table_load(ele):
try:
rows = []
column_names = list(ele.keys())
row_key = str(str(ele['uprn'])).encode()
logging.info("--row_key "+str(row_key))
row = DirectRow(row_key)
for key in column_names:
row.set_cell(
column_family_id, key, str(ele[key]).encode('utf-8'), timestamp=timestamp
)
rows.append(row)
return rows
except Exception as e:
logging.info("Error encountered for row_key " + str(row_key) + " with error message "+ str(e))
def find_err_file():
filename_err = user_options.efilename.get()
return filename_err
class UserOptions(PipelineOptions):
#classmethod
def _add_argparse_args(cls, parser):
parser.add_value_provider_argument('--input_location',
default='gs://my-proj-dev-local-landing-zone/mock_data/*'
)
pipeline_options = PipelineOptions()
user_options = pipeline_options.view_as(UserOptions)
def run():
try:
with beam.Pipeline(options=pipeline_options) as p:
records = (p | 'Read' >> beam.io.ReadFromParquet(user_options.input_location)
| 'Format Rows' >> beam.ParDo(big_table_load)
| WriteToBigTable(
project_id=project_id,
instance_id=instance_id,
table_id=table_id
)
)
except Exception as e:
logging.info(e)
raise e
if __name__ == '__main__':
run()
Requirement.txt
google-cloud-bigtable==1.7.0
apache-beam[gcp]==2.39.0
Error processing instruction process_bundle-4225915941562411087-3. Original traceback is Traceback (most recent call last): File "apache_beam/runners/common.py", line 1232, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 475, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "apache_beam/runners/common.py", line 481, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 187, in finish_bundle self.batcher.flush() File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 88, in flush status.code))) Exception: Failed to write a batch of 12 records due to 'not_found' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 267, in _execute response = task() File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 340, in lambda: self.create_worker().do_instruction(request), request) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 581, in do_instruction getattr(request, request_type), request.instruction_id) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 618, in process_bundle bundle_processor.process_bundle(instruction_id)) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/bundle_processor.py", line 1001, in process_bundle op.finish() File "apache_beam/runners/worker/operations.py", line 736, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/worker/operations.py", line 738, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/worker/operations.py", line 739, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/common.py", line 1253, in apache_beam.runners.common.DoFnRunner.finish File "apache_beam/runners/common.py", line 1234, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 1281, in apache_beam.runners.common.DoFnRunner._reraise_augmented File "apache_beam/runners/common.py", line 1232, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 475, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "apache_beam/runners/common.py", line 481, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 187, in finish_bundle self.batcher.flush() File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 88, in flush status.code))) Exception: Failed to write a batch of 12 records due to 'not_found' [while running 'WriteToBigTable/ParDo(_BigTableWriteFn)-ptransform-43']
There is a “not found” error - does the table and columns family you are writing to exist?

ValueError: arrays must all be same length - Parse the JSON into Pandas DataFrame

import requests
import json
import pandas as pd
data = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson")
json_data = data.json()
with open(r"C:\path\file.json",'w') as outfile:
json.dump(json_data, outfile)
df = pd.read_json(r"C:\path\file.json")
when I tried to parse the json data into Pandas Dataframe, I get the below error:
ValueError: arrays must all be same length
Can anyone help me out in this?
Traceback (most recent call last):
File "c:/path/file.py", line 29, in <module>
df = pd.read_json(r"C:\path\file.json")
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 199, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 296, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 618, in read_json
result = json_reader.read()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 755, in read
obj = self._get_object_parser(self.data)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 777, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 886, in parse
self._parse_no_numpy()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 1118, in _parse_no_numpy
self.obj = DataFrame(
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 468, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 283, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 78, in arrays_to_mgr
index = extract_index(arrays)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 397, in extract_index
raise ValueError("arrays must all be same length")
ValueError: arrays must all be same length
Simpler approach is not to save to a file and use json_normalize()
import requests
import json
import pandas as pd
data = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson")
json_data = data.json()
pd.json_normalize(json_data["features"])

mySQL unicode decode error for emojis when the Django queryset is converted to a list

I am on Django 1.11, python 3.6 and mysqlclient 1.4.6. I need to convert a Django queryset into a list. The objects in the queryset have a field with a unicode emoji value. (see: field=u'✅ This is the field value') That particular table and field in mySQL are using utf8mb4_unicode_ci encoding.
When I run qs_list = list(qs), MySQL throws a Unicode error which I think is caused by the emoji in the field when the queryset is evaluated.
File encoding:
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
Queryset:
qs = Fake.objects.filter(..)
qs_list = list(qs)
Error:
<class 'UnicodeDecodeError'>
Exception: 'utf-8' codec can't decode byte 0xed in position 11: invalid continuation byte
File "/home/mysite/lib/python3.6/site-packages/django/db/models/query.py", line 250, in __iter__
self._fetch_all()
File "/home/mysite/lib/python3.6/site-packages/django/db/models/query.py", line 1121, in _fetch_all
self._result_cache = list(self._iterable_class(self))
File "/home/mysite/lib/python3.6/site-packages/django/db/models/query.py", line 53, in __iter__
results = compiler.execute_sql(chunked_fetch=self.chunked_fetch)
File "/home/mysite/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 899, in execute_sql
raise original_exception
File "/home/mysite/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 889, in execute_sql
cursor.execute(sql, params)
File "/home/mysite/lib/python3.6/site-packages/django/db/backends/utils.py", line 64, in execute
return self.cursor.execute(sql, params)
File "/home/mysite/lib/python3.6/site-packages/django/db/backends/mysql/base.py", line 101, in execute
return self.cursor.execute(query, args)
File "/home/mysite/lib/python3.6/site-packages/MySQLdb/cursors.py", line 209, in execute
res = self._query(query)
File "/home/mysite/lib/python3.6/site-packages/MySQLdb/cursors.py", line 317, in _query
self._post_get_result()
File "/home/mysite/lib/python3.6/site-packages/MySQLdb/cursors.py", line 352, in _post_get_result
self._rows = self._fetch_row(0)
File "/home/mysite/lib/python3.6/site-packages/MySQLdb/cursors.py", line 325, in _fetch_row
return self._result.fetch_row(size, self._fetch_type)
Is there a simple solution or a workaround for this? How can I get the list of objects in the queryset without queryset being evaluated?
Most probably this is related to the encoding and you need to check other types of encoding as suggested in this article
Regarding the list of objects - try iterating on the result to get(for example) each name and store the values in a list:
qs = Fake.objects.filter(..)
qs_list = [x.name for x in qs]
where 'name' is the model field you need to iterate.

Upload Pandas dataframe as a JSON object in Cloud Storage

I have been trying to upload a Pandas dataframe to a JSON object in Cloud Storage using Cloud Function. Follwing is my code -
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_file(source_file_name)
print('File {} uploaded to {}.'.format(
source_file_name,
destination_blob_name))
final_file = pd.concat([df, df_second], axis=0)
final_file.to_json('/tmp/abc.json')
with open('/tmp/abc.json', 'r') as file_obj:
upload_blob('test-bucket',file_obj,'abc.json')
I am getting the following error in line - blob.upload_from_file(source_file_name)
Deployment failure:
Function failed on loading user code. Error message: Code in file main.py
can't be loaded.
Detailed stack trace: Traceback (most recent call last):
File "/env/local/lib/python3.7/site-
packages/google/cloud/functions/worker.py", line 305, in
check_or_load_user_function
_function_handler.load_user_function()
File "/env/local/lib/python3.7/site-
packages/google/cloud/functions/worker.py", line 184, in load_user_function
spec.loader.exec_module(main)
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/user_code/main.py", line 6, in <module>
import datalab.storage as gcs
File "/env/local/lib/python3.7/site-packages/datalab/storage/__init__.py",
line 16, in <module>
from ._bucket import Bucket, Buckets
File "/env/local/lib/python3.7/site-packages/datalab/storage/_bucket.py",
line 21, in <module>
import datalab.context
File "/env/local/lib/python3.7/site-packages/datalab/context/__init__.py",
line 15, in <module>
from ._context import Context
File "/env/local/lib/python3.7/site-packages/datalab/context/_context.py",
line 20, in <module>
from . import _project
File "/env/local/lib/python3.7/site-packages/datalab/context/_project.py",
line 18, in <module>
import datalab.utils
File "/env/local/lib/python3.7/site-packages/datalab/utils/__init__.py",
line 15
from ._async import async, async_function, async_method
^
SyntaxError: invalid syntax
What possibly is the error?
You are passing a string to blob.upload_from_file(), but this method requires a file object. You probably want to use blob.upload_from_filename() instead. Check the sample in the GCP docs.
Alternatively, you could get the file object, and keep using blob.upload_from_file(), but it's unnecessary extra lines.
with open('/tmp/abc.json', 'r') as file_obj:
upload_blob('test-bucket', file_obj, 'abc.json')
Use a bucket object instead of string
something like upload_blob(conn.get_bucket(mybucket),'/tmp/abc.json','abc.json')}

SQLAlchemy session issues with celery

I have scheduled a few recurring tasks with celery beat for our web app
The app itself is build using pyramid web framework. Using the zopetransaction extension to manage session
In celery, I am using the app as a library. I am redefining session in models with a function.
It works well but once in a while, it raises InvalidRequestError: This session is in 'prepared' state; no further SQL can be emitted within this transaction
I am not sure what is wrong and why it issues these warnings.
Sample code:
in tasks.py
def initialize_async_session():
import sqlalchemy
from webapp.models import Base, set_dbsession, engine
Session = sqlalchemy.orm.scoped_session(
sqlalchemy.orm.sessionmaker(autocommit=True, autoflush=True)
)
Session.configure(bind=engine)
session = Session()
set_dbsession(session)
Base.metadata.bind = engine
return session
#celery.task
def rerun_scheduler():
log.info("Starting pipeline scheduler")
session = initialize_async_session()
webapp.sheduledtask.service.check_for_updates(session)
log.info("Ending pipeline scheduler")
In models.py in webapp
DBSession = scoped_session(sessionmaker(bind=engine, expire_on_commit=False,
extension=ZopeTransactionExtension()))
def set_dbsession(db_session=None):
"""
This function sets the db session
"""
global DBSession
if db_session:
DBSession = db_session
log.info("session changed to {0}".format(db_session))
UPDATE:
traceback:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 551, in __bootstrap_inner
self.run()
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/edgem_common-0.0-py2.7.egg/common/utils.py", line 54, in new_function
result = f(*args, **kwargs)
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/edgem_common-0.0-py2.7.egg/common/utils.py", line 100, in new_function
result = f(*args, **kwargs)
File "/home/ubuntu/modwsgi/env/mvc-service/webapp/webapp/data/mongo_service.py", line 1274, in run
self.table_params.set_task_status_as_finished()
File "/home/ubuntu/modwsgi/env/mvc-service/webapp/webapp/mem_objects.py", line 33, in set_task_status_as_finished
task = Task.get_by_id(self.task_id)
File "/home/ubuntu/modwsgi/env/mvc-service/webapp/webapp/models.py", line 162, in get_by_id
return DBSession.query(cls).filter(cls.id == obj_id).first()
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2156, in first
ret = list(self[0:1])
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2023, in __getitem__
return list(res)
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2227, in __iter__
return self._execute_and_instances(context)
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2240, in _execute_and_instances
close_with_result=True)
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2231, in _connection_from_session
**kw)
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 777, in connection
close_with_result=close_with_result)
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 781, in _connection_for_bind
return self.transaction._connection_for_bind(engine)
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 289, in _connection_for_bind
self._assert_is_active()
File "/home/ubuntu/modwsgi/env/local/lib/python2.7/site-packages/SQLAlchemy-0.7.9-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 217, in _assert_is_active
"This Session's transaction has been rolled back "
InvalidRequestError: This Session's transaction has been rolled back by a nested rollback() call. To begin a new transaction, issue Session.rollback() first.
#########################################################################
[2013-05-30 14:32:57,782: WARNING/PoolWorker-3] Exception in thread Thread-4:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 552, in __bootstrap_inner
self.run()
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/edgem_common-0.0-py2.7.egg/common/utils.py", line 54, in new_function
result = f(*args, **kwargs)
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/edgem_common-0.0-py2.7.egg/common/utils.py", line 100, in new_function
result = f(*args, **kwargs)
File "/home/ranjith/wksp/mvc-service/webapp/webapp/data/mongo_service.py", line 1274, in run
self.table_params.set_task_status_as_finished()
File "/home/ranjith/wksp/mvc-service/webapp/webapp/mem_objects.py", line 33, in set_task_status_as_finished
task = Task.get_by_id(self.task_id)
File "/home/ranjith/wksp/mvc-service/webapp/webapp/models.py", line 166, in get_by_id
return DBSession.query(cls).filter(cls.id == obj_id).first()
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2145, in first
ret = list(self[0:1])
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2012, in __getitem__
return list(res)
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2216, in __iter__
return self._execute_and_instances(context)
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2229, in _execute_and_instances
close_with_result=True)
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2220, in _connection_from_session
**kw)
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 798, in connection
close_with_result=close_with_result)
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 802, in _connection_for_bind
return self.transaction._connection_for_bind(engine)
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 281, in _connection_for_bind
self._assert_active()
File "/home/ranjith/wksp/env/local/lib/python2.7/site-packages/SQLAlchemy-0.8.1-py2.7-linux-x86_64.egg/sqlalchemy/orm/session.py", line 181, in _assert_active
"This session is in 'prepared' state; no further "
InvalidRequestError: This session is in 'prepared' state; no further SQL can be emitted within this transaction.
I believe the problem is that you are attempting to use the SQLAlchemy session in your Celery task.
The first thing I recommend doing is creating two separate scoped sessions, one for your Celery application and another one for your web application. Next, I would make sure your Celery database session is only configured once during Celery initialization. You can use the Celery worker_init.connect to make sure it creates the database during Celery startup (http://hynek.me/articles/using-celery-with-pyramid/).
It is very important that your web application does not use the same database session as your Celery application.
Something like this for your tasks.py file:
from celery import Celery
from celery.signals import worker_init
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
Session = sqlalchemy.orm.scoped_session(
sqlalchemy.orm.sessionmaker(autocommit=True, autoflush=True))
#worker_init.connect
def initialize_session():
some_engine = create_engine('database_url')
Session.configure(bind=some_engine)
#celery.task
def rerun_scheduler():
log.info("Starting pipeline scheduler")
webapp.sheduledtask.service.check_for_updates(Session)
log.info("Ending pipeline scheduler")
Cross posting my answer to a very similar stack overflow:
What's the proper way to use SQLAlchemy Sessions with Celery?
This solved the issue for me:
Sqlalchemy pools connections by default in a non-threadsafe manner,
Celery forks processes by default: one or the other needs to be changed.
Turn off Sqlalchemy pooling
Sql Alchemy Docs
from sqlalchemy.pool import NullPool
engine = create_engine(
SQLALCHEMY_DATABASE_URL, poolclass=NullPool
)