How can I switch TPU version for TPU VM architechture?
When attempting to switch software version for TPU(TPU VM architechture switching from tpu-vm-tf-2.6.0-pod to tpu-vm-base) using instructions found here, I get Connection Refused exception with traceback:
Traceback (most recent call last):
File "/usr/lib/python3.8/urllib/request.py", line 1354, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "/usr/lib/python3.8/http/client.py", line 1256, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib/python3.8/http/client.py", line 1302, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib/python3.8/http/client.py", line 1251, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib/python3.8/http/client.py", line 1011, in _send_output
self.send(msg)
File "/usr/lib/python3.8/http/client.py", line 951, in send
self.connect()
File "/usr/lib/python3.8/http/client.py", line 922, in connect
self.sock = self._create_connection(
File "/usr/lib/python3.8/socket.py", line 808, in create_connection
raise err
File "/usr/lib/python3.8/socket.py", line 796, in create_connection
sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "switch.py", line 20, in <module>
c.configure_tpu_version(args.target_version, restart_type="ifNeeded")
File "/usr/local/lib/python3.8/dist-packages/cloud_tpu_client/client.py", line 391, in configure_tpu_version
for result in results:
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 619, in result_iterator
yield fs.pop().result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
File "/usr/lib/python3.8/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.8/dist-packages/cloud_tpu_client/client.py", line 375, in configure_worker
request.urlopen(req)
File "/usr/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/usr/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/usr/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/usr/lib/python3.8/urllib/request.py", line 1383, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/usr/lib/python3.8/urllib/request.py", line 1357, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 111] Connection refused>
The command is run on cloud TPU VM with cloud-tpu-client version 0.10
When running the same command from my PC I get Connection timed out after a long pause with traceback:
Traceback (most recent call last):
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 1350, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1281, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1327, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1276, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 1036, in _send_output
self.send(msg)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 976, in send
self.connect()
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/http/client.py", line 948, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/socket.py", line 728, in create_connection
raise err
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/socket.py", line 716, in create_connection
sock.connect(sa)
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "tpu_version.py", line 19, in <module>
c.configure_tpu_version(args.target_version, restart_type="ifNeeded")
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/site-packages/cloud_tpu_client/client.py", line 392, in configure_tpu_version
for result in results:
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/_base.py", line 598, in result_iterator
yield fs.pop().result()
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/_base.py", line 435, in result
return self.__get_result()
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/site-packages/cloud_tpu_client/client.py", line 376, in configure_worker
request.urlopen(req)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 543, in _open
'_open', req)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 1378, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/home/nevus/anaconda3/envs/imageGen/lib/python3.7/urllib/request.py", line 1352, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 110] Connection timed out>
This feature is not supported by the TPU VM architecture.
Best way to change the TPU VM's version would be to delete it and recreate a new one with the desired version.
Related
I have error in the following code when I try to send a request with json data in it, is it because it only works synchronously, so does not work in fastapi which work asynchronously? If that is the case, what is the simplest way to send a request with json data in it?
import time
from fastapi import Request, FastAPI, BackgroundTasks
import multiprocessing as mp
import uvicorn
import requests
import json
def printmessage(job):
time.sleep(5)
print(job)
if __name__ == 'webhook_fastapi':
url = 'http://127.0.0.1:8000/webhook'
data = { 'text': 'hello'}
r = requests.post(url, data=json.dumps(data), headers={'Content-Type': 'application/json'})
print("Request Sent!")
app = FastAPI()
#app.post("/webhook")
async def webhook(request : Request, background_tasks: BackgroundTasks):
print("WEBHOOK RECEIVED")
job="doctor"
background_tasks.add_task(printmessage,job)
print('done')
return 'WEBHOOK RECEIVED'
if __name__ == '__main__':
print("PROGRAM LAUNCH...")
print("WEBHOOK RECEIVE READY...")
background_tasks = BackgroundTasks()
job="doctor"
background_tasks.add_task(printmessage,job)
uvicorn.run("webhook_fastapi:app", reload=False)
OUTPUT:
__main__
PROGRAM LAUNCH...
WEBHOOK RECEIVE READY...
webhook_fastapi
Traceback (most recent call last): File
"C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connection.py",
line 169, in _new_conn
conn = connection.create_connection( File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\util\connection.py",
line 96, in create_connection
raise err File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\util\connection.py",
line 86, in create_connection
sock.connect(sa) ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused
it
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connectionpool.py",
line 699, in urlopen
httplib_response = self._make_request( File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connectionpool.py",
line 394, in _make_request
conn.request(method, url, **httplib_request_kw) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connection.py",
line 234, in request
super(HTTPConnection, self).request(method, url, body=body, headers=headers) File
"C:\Users\User\AppData\Local\Programs\Python\Python39\lib\http\client.py",
line 1279, in request
self._send_request(method, url, body, headers, encode_chunked) File
"C:\Users\User\AppData\Local\Programs\Python\Python39\lib\http\client.py",
line 1325, in _send_request
self.endheaders(body, encode_chunked=encode_chunked) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\http\client.py",
line 1274, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked) File
"C:\Users\User\AppData\Local\Programs\Python\Python39\lib\http\client.py",
line 1034, in _send_output
self.send(msg) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\http\client.py",
line 974, in send
self.connect() File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connection.py",
line 200, in connect
conn = self._new_conn() File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connection.py",
line 181, in _new_conn
raise NewConnectionError( urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x0000013C0E494310>:
Failed to establish a new connection: [WinError 10061] No connection
could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\adapters.py",
line 439, in send
resp = conn.urlopen( File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\connectionpool.py",
line 755, in urlopen
retries = retries.increment( File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\util\retry.py",
line 574, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause)) urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='127.0.0.1',
port=8000): Max retries exceeded with url: /webhook (Caused by
NewConnectionError('<urllib3.connection.HTTPConnection object at
0x0000013C0E494310>: Failed to establish a new connection: [WinError
10061] No connection could be made because the target machine actively
refused it'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File
"c:\Users*\webhook_fastapi.py", line 40, in
uvicorn.run("webhook_fastapi:app", reload=False) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\uvicorn\main.py",
line 463, in run
server.run() File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\uvicorn\server.py",
line 60, in run
return asyncio.run(self.serve(sockets=sockets)) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\asyncio\runners.py",
line 44, in run
return loop.run_until_complete(main) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\asyncio\base_events.py",
line 642, in run_until_complete
return future.result() File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\uvicorn\server.py",
line 67, in serve
config.load() File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\uvicorn\config.py",
line 458, in load
self.loaded_app = import_from_string(self.app) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\uvicorn\importer.py",
line 21, in import_from_string
module = importlib.import_module(module_str) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\importlib_init_.py",
line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level) File "", line 1030, in _gcd_import File
"", line 1007, in _find_and_load File
"", line 986, in _find_and_load_unlocked
File "", line 680, in _load_unlocked
File "", line 850, in
exec_module File "", line 228, in
_call_with_frames_removed File "c:\Users*\webhook_fastapi.py", line 19, in
r = requests.post(url, data=json.dumps(data), headers={'Content-Type': 'application/json'}) File
"C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\api.py",
line 117, in post
return request('post', url, data=data, json=json, **kwargs) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\api.py",
line 61, in request
return session.request(method=method, url=url, **kwargs) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\sessions.py",
line 542, in request
resp = self.send(prep, **send_kwargs) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\sessions.py",
line 655, in send
r = adapter.send(request, kwargs) File "C:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\adapters.py",
line 516, in send
raise ConnectionError(e, request=request) requests.exceptions.ConnectionError:
HTTPConnectionPool(host='127.0.0.1', port=8000): Max retries exceeded
with url: /webhook (Caused by
NewConnectionError('<urllib3.connection.HTTPConnection object at
0x0000013C0E494310>: Failed to establish a new connection: [WinError
10061] No connection could be made because the target machine actively
refused it')) PS C:\Users*>
There is no server running when you're trying to make your request. uvicorn is importing your code, and you're apparently trying to make a request while the code is being imported.
uvicorn can't launch the API and bind to the port before it has finished importing the code and the ASGI application has been set up and properly configured.
I have looked through similar posts on SO and they seem to be specific to using Docker environments and haven't been much helpful. Ours is a little different, we do run a docker image of Airflow hosted on Azure App Service but it connects to hosted Azure Database for PostgreSQL server (version 11).
Python = 3.8
Apache Airflow = 2.1.4
SQL Alchemy = 1.3.24
Executor = Local
The environment has been setup and it works fine for most cases. However, when we run DAGs that handle large amounts of data (typically several GB), we suddenly encounter Heartbeat issues. Now, I have tried setting values in Airflow Config for Keep Alives through sql_alchemy_connect_args variable, and also changing the variables web_server_master_timeout and web_server_worker_timeout to a higher value to no avail.
The ERROR:
{base_job.py:222} ERROR - LocalTaskJob heartbeat got an exception
Traceback (most recent call last):
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py”, line 2336, in _wrap_pool_connect
return fn()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 364, in connect
return _ConnectionFairy._checkout(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 778, in _checkout
fairy = _ConnectionRecord.checkout(pool)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 495, in checkout
rec = pool._do_get()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/impl.py”, line 241, in _do_get
return self._create_connection()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 309, in _create_connection
return _ConnectionRecord(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 440, in __init__
self.__connect(first_connect_check=True)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 661, in __connect
pool.logger.debug(“Error on connect(): %s”, e)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/util/langhelpers.py”, line 68, in __exit__
compat.raise_(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py”, line 182, in raise_
raise exception
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 656, in __connect
connection = pool._invoke_creator(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py”, line 114, in connect
return dialect.connect(*cargs, **cparams)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py”, line 508, in connect
return self.dbapi.connect(*cargs, **cparams)
File “/usr/local/lib/python3.8/site-packages/psycopg2/__init__.py”, line 122, in connect
conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
psycopg2.OperationalError: could not translate host name “<address>” to address: Temporary failure in name resolution
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File “/usr/local/lib/python3.8/site-packages/airflow/jobs/base_job.py”, line 194, in heartbeat
session.merge(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py”, line 2166, in merge
return self._merge(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py”, line 2244, in _merge
merged = self.query(mapper.class_).get(key[1])
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 1018, in get
return self._get_impl(ident, loading.load_on_pk_identity)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 1135, in _get_impl
return db_load_fn(self, primary_key_identity)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/loading.py”, line 286, in load_on_pk_identity
return q.one()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 3490, in one
ret = self.one_or_none()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 3459, in one_or_none
ret = list(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 3535, in __iter__
return self._execute_and_instances(context)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 3556, in _execute_and_instances
conn = self._get_bind_args(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 3571, in _get_bind_args
return fn(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/query.py”, line 3550, in _connection_from_session
conn = self.session.connection(**kw)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py”, line 1142, in connection
return self._connection_for_bind(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py”, line 1150, in _connection_for_bind
return self.transaction._connection_for_bind(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/orm/session.py”, line 433, in _connection_for_bind
conn = bind._contextual_connect()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py”, line 2302, in _contextual_connect
self._wrap_pool_connect(self.pool.connect, None),
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py”, line 2339, in _wrap_pool_connect
Connection._handle_dbapi_exception_noconnection(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py”, line 1583, in _handle_dbapi_exception_noconnection
util.raise_(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py”, line 182, in raise_
raise exception
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/base.py”, line 2336, in _wrap_pool_connect
return fn()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 364, in connect
return _ConnectionFairy._checkout(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 778, in _checkout
fairy = _ConnectionRecord.checkout(pool)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 495, in checkout
rec = pool._do_get()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/impl.py”, line 241, in _do_get
return self._create_connection()
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 309, in _create_connection
return _ConnectionRecord(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 440, in __init__
self.__connect(first_connect_check=True)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 661, in __connect
pool.logger.debug(“Error on connect(): %s”, e)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/util/langhelpers.py”, line 68, in __exit__
compat.raise_(
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/util/compat.py”, line 182, in raise_
raise exception
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/pool/base.py”, line 656, in __connect
connection = pool._invoke_creator(self)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py”, line 114, in connect
return dialect.connect(*cargs, **cparams)
File “/usr/local/lib/python3.8/site-packages/sqlalchemy/engine/default.py”, line 508, in connect
return self.dbapi.connect(*cargs, **cparams)
File “/usr/local/lib/python3.8/site-packages/psycopg2/__init__.py”, line 122, in connect
conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) could not translate host name “<address>” to address: Temporary failure in name resolution
(Background on this error at: http://sqlalche.me/e/13/e3q8)
Could someone throw some light to help me navigate through this issue? I am at my wits end and I am not sure, if I am treading in the right direction debugging this.
I had the same issue with my deployment of airflow on a Kubernetes cluster. This is apparently due to a high number of simultaneous connections to the database.
I fixed it by enabling the pgBouncer as it is recommended in the official production guide.
# PgBouncer settings
pgbouncer:
# Enable PgBouncer
enabled: true
I am running a Flask application on local and production server. I have no issues with the local, I am facing 'MySQL connection not available' for every second database request on production server. After reloading or after performing rollback operation, it is getting executed, but the issue repeats. I tried major solutions available on the internet by changing, pool_recycle, 'wait_timeout' and other timeouts to same value of 1600, but didn't work. I finally destroyed the application and reinstalled everything on the server but the issue is still on. Please help in this, thank you.
My production MySQL config:
class ProductionConfig(Config):
SECRET_KEY = 'secret_key'
DEBUG=False
SQLALCHEMY_DATABASE_URI = "mysql+pymysql://{username}:
{password}#{hostname}/{databasename}".format(
username="myusername",
password="password",
hostname="myhostname",
databasename="mydatabase",
)
SQLALCHEMY_POOL_RECYCLE = 299
SQLALCHEMY_TRACK_MODIFICATIONS = False
Error Message:
Exception on /add_questions/1/2 [POST]
Traceback (most recent call last):
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/connections.py", line 713, in _write_bytes
self._sock.sendall(data)
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1248, in _execute_context
cursor, statement, parameters, context
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line 590, in do_execute
cursor.execute(statement, parameters)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/cursors.py", line 170, in execute
result = self._query(query)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/cursors.py", line 328, in _query
conn.query(q)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/connections.py", line 516, in query
self._execute_command(COMMAND.COM_QUERY, sql)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/connections.py", line 771, in _execute_command
self._write_bytes(packet)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/connections.py", line 718, in _write_bytes
"MySQL server has gone away (%r)" % (e,))
pymysql.err.OperationalError: (2006, "MySQL server has gone away (ConnectionResetError(104, 'Connection reset by peer'))")
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask/app.py", line 2447, in wsgi_app
response = self.full_dispatch_request()
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask/app.py", line 1952, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask/app.py", line 1821, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask/_compat.py", line 39, in reraise
raise value
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask/app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask/app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask_login/utils.py", line 270, in decorated_view
elif not current_user.is_authenticated:
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/werkzeug/local.py", line 347, in __getattr__
return getattr(self._get_current_object(), name)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/werkzeug/local.py", line 306, in _get_current_object
return self.__local()
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask_login/utils.py", line 26, in <lambda>
current_user = LocalProxy(lambda: _get_user())
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask_login/utils.py", line 346, in _get_user
current_app.login_manager._load_user()
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/flask_login/login_manager.py", line 318, in _load_user
user = self._user_callback(user_id)
File "/home/ulznrcvr/jaihindpro/application/auth/views.py", line 26, in load_user
return User.query.get(int(user_id))
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 1004, in get
return self._get_impl(ident, loading.load_on_pk_identity)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 1121, in _get_impl
return db_load_fn(self, primary_key_identity)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/orm/loading.py", line 287, in load_on_pk_identity
return q.one()
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3360, in one
ret = self.one_or_none()
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3329, in one_or_none
ret = list(self)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3405, in __iter__
return self._execute_and_instances(context)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/orm/query.py", line 3430, in _execute_and_instances
result = conn.execute(querycontext.statement, self._params)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 984, in execute
return meth(self, multiparams, params)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/sql/elements.py", line 293, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1103, in _execute_clauseelement
distilled_params,
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1288, in _execute_context
e, statement, parameters, cursor, context
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1482, in _handle_dbapi_exception
sqlalchemy_exception, with_traceback=exc_info[2], from_=e
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 178, in raise_
raise exception
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1248, in _execute_context
cursor, statement, parameters, context
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line 590, in do_execute
cursor.execute(statement, parameters)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/cursors.py", line 170, in execute
result = self._query(query)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/cursors.py", line 328, in _query
conn.query(q)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/connections.py", line 516, in query
self._execute_command(COMMAND.COM_QUERY, sql)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/connections.py", line 771, in _execute_command
self._write_bytes(packet)
File "/home/ulznrcvr/virtualenv/jaihindpro/3.6/lib/python3.6/site-packages/pymysql/connections.py", line 718, in _write_bytes
"MySQL server has gone away (%r)" % (e,))
sqlalchemy.exc.OperationalError: (pymysql.err.OperationalError) (2006, "MySQL server has gone away (ConnectionResetError(104, 'Connection reset by peer'))")
[SQL: SELECT user.id AS user_id, user.student_id AS user_student_id, user.role_id AS user_role_id, user.course_id AS user_course_id, user.sub_course_id AS user_sub_course_id, user.batch_id AS user_batch_id, user.full_name AS user_full_name, user.email AS user_email, user.phone_number AS user_phone_number, user.password AS user_password, user.active AS user_active, user.confirmed AS user_confirmed, user.confirmed_date AS user_confirmed_date, user.subscribed AS user_subscribed, user.start_date AS user_start_date, user.end_date AS user_end_date, user.re_url AS user_re_url, user.online AS user_online
FROM user
WHERE user.id = %(param_1)s]
[parameters: {'param_1': 2}]
(Background on this error at: http://sqlalche.me/e/e3q8)
I figured it out myself, I flagged "pool_pre_ping" attribute to "True" in engine settings.
SQLALCHEMY_ENGINE_OPTIONS = {
"pool_pre_ping": True,
"pool_recycle": 300,
}
Ref: https://medium.com/#heyjcmc/controlling-the-flask-sqlalchemy-engine-a0f8fae15b47
I am using #transaction.atomic decorator
I am using python and django
and I got this error:
Internal Server Error: /create_project
Traceback (most recent call last):
File "/usr/local/lib/python3.4/dist-packages/django/db/backends/base/base.py", line 239, in _commit
return self.connection.commit()
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 422, in commit
self._read_ok_packet()
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 396, in _read_ok_packet
pkt = self._read_packet()
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 656, in _read_packet
packet_header = self._read_bytes(4)
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 702, in _read_bytes
CR.CR_SERVER_LOST, "Lost connection to MySQL server during query")
pymysql.err.OperationalError: (2013, 'Lost connection to MySQL server during query')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.4/dist-packages/django/core/handlers/exception.py", line 35, in inner
response = get_response(request)
File "/usr/local/lib/python3.4/dist-packages/django/core/handlers/base.py", line 128, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/usr/local/lib/python3.4/dist-packages/django/core/handlers/base.py", line 126, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/usr/lib/python3.4/contextlib.py", line 30, in inner
return func(*args, **kwds)
File "/usr/local/lib/python3.4/dist-packages/django/db/transaction.py", line 212, in __exit__
connection.commit()
File "/usr/local/lib/python3.4/dist-packages/django/db/backends/base/base.py", line 261, in commit
self._commit()
File "/usr/local/lib/python3.4/dist-packages/django/db/backends/base/base.py", line 239, in _commit
return self.connection.commit()
File "/usr/local/lib/python3.4/dist-packages/django/db/utils.py", line 89, in __exit__
raise dj_exc_value.with_traceback(traceback) from exc_value
File "/usr/local/lib/python3.4/dist-packages/django/db/backends/base/base.py", line 239, in _commit
return self.connection.commit()
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 422, in commit
self._read_ok_packet()
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 396, in _read_ok_packet
pkt = self._read_packet()
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 656, in _read_packet
packet_header = self._read_bytes(4)
File "/usr/local/lib/python3.4/dist-packages/pymysql/connections.py", line 702, in _read_bytes
CR.CR_SERVER_LOST, "Lost connection to MySQL server during query")
django.db.utils.OperationalError: (2013, 'Lost connection to MySQL server during query')
What do I do??
UPD: I am using cloud 9 ide (c9.io) so can this be connected to performance issue since the c9 servers are not so robust?
Has anyone encountered "sorry, too many clients already" error on heartbeat phase while running a subdag?
[2017-11-22 08:21:23,747] {jobs.py:2136} ERROR - Exception while
trying to heartbeat! Sleeping for 5.0s Traceback (most recent call
last): File
"/usr/local/lib/python2.7/site-packages/airflow/jobs.py", line 2131,
in _execute
self.heartbeat() File "/usr/local/lib/python2.7/site-packages/airflow/jobs.py", line 180, in
heartbeat
job = session.query(BaseJob).filter(BaseJob.id == self.id).first() File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py",
line 2755, in first
ret = list(self[0:1]) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line
2547, in getitem
return list(res) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line
2855, in iter
return self._execute_and_instances(context) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line
2876, in _execute_and_instances
close_with_result=True) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line
2885, in _get_bind_args
**kw File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line
2867, in _connection_from_session
conn = self.session.connection(**kw) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py",
line 998, in connection
execution_options=execution_options) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py",
line 1003, in _connection_for_bind
engine, execution_options) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/orm/session.py",
line 403, in _connection_for_bind
conn = bind.contextual_connect() File "/usr/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py",
line 2112, in contextual_connect
self._wrap_pool_connect(self.pool.connect, None), File "/usr/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py",
line 2151, in _wrap_pool_connect
e, dialect, self) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py",
line 1465, in _handle_dbapi_exception_noconnection
exc_info File "/usr/local/lib/python2.7/site-packages/sqlalchemy/util/compat.py",
line 203, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/engine/base.py",
line 2147, in _wrap_pool_connect
return fn() File "/usr/local/lib/python2.7/site-packages/sqlalchemy/pool.py", line 387,
in connect
return _ConnectionFairy._checkout(self) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/pool.py", line 766,
in _checkout
fairy = _ConnectionRecord.checkout(pool) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/pool.py", line 516,
in checkout
rec = pool._do_get() File "/usr/local/lib/python2.7/site-packages/sqlalchemy/pool.py", line
1229, in _do_get
return self._create_connection() File "/usr/local/lib/python2.7/site-packages/sqlalchemy/pool.py", line 333,
in _create_connection
return _ConnectionRecord(self) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/pool.py", line 461,
in init
self.connect(first_connect_check=True) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/pool.py", line 651,
in __connect
connection = pool._invoke_creator(self) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/engine/strategies.py",
line 105, in connect
return dialect.connect(*cargs, **cparams) File "/usr/local/lib/python2.7/site-packages/sqlalchemy/engine/default.py",
line 393, in connect
return self.dbapi.connect(*cargs, **cparams) File "/usr/local/lib/python2.7/site-packages/psycopg2/__init.py", line
130, in connect
conn = _connect(dsn, connection_factory=connection_factory, **kwasync) OperationalError: (psycopg2.OperationalError) FATAL: sorry, too many clients already
from airflow.cfg:
on airflow.cfg:
sql_alchemy_pool_size = 100
sql_alchemy_pool_recycle = 3600
Is there a known solution to this problem?
I'm using airflow 1.8.2 with LocalExecutor.