SQLAlchemy insert data dumped on the database A into the database B - mysql

What is the most effective way to insert data dumped on the database A into the database B? Normally I would use mysqldump for the task like this, but because of the complex query I had to take a different approach. At present I have the following inefficient solution:
from sqlalchemy import create_engine, Column, INTEGER, CHAR, VARCHAR
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
SessFactory = sessionmaker()
print('## Configure database connections')
db_one = create_engine('mysql://root:pwd1#127.0.0.1/db_one', echo=True).connect()
sess_one = SessFactory(bind=db_one)
db_two = create_engine('mysql://root:pwd2#127.0.0.2/db_two', echo=True).connect()
sess_two = SessFactory(bind=db_two)
## Declare query to dump data
dump_query = (
'SELECT A.id, A.name, B.address '
'FROM table_a A JOIN table_b B '
'ON A.id = B.id_c WHERE '
'A.deleted = 0'
)
print('## Fetch data on db_one')
data = db_one.execute(dump_query).fetchall()
## Declare table on db_two
class cstm_table(Base):
__tablename__ = 'cstm_table'
pk = Column(INTEGER, primary_key=True)
id = Column(CHAR(36), nullable=False)
name = Column(VARCHAR(150), default=None)
address = Column(VARCHAR(150), default=None)
print('## Recreate "cstm_table" on db_two')
cstm_table.__table__.drop(bind=db_two, checkfirst=True)
cstm_table.__table__.create(bind=db_two)
print('## Insert dumped data into the "cstm_table" on db_two')
for row in data:
insert = cstm_table.__table__.insert().values(row)
db_two.execute(insert)
This execute sequentially over a 100K inserts (horrible).
I also tried:
with db_two.connect() as conn:
with conn.begin() as trans:
row_as_dict = [dict(row.items()) for row in data]
try:
conn.execute(cstm_table.__table__.insert(), row_as_dict)
except:
trans.rollback()
raise
else:
trans.commit()
But then after inserting ~20 rows I get error:
OperationalError: (_mysql_exceptions.OperationalError) (2006, 'MySQL server has gone away')
The following also does the job, but I'm not so sure it's the most efficient:
sess_two.add_all([cstm_table(**dict(row.items())) for row in data])
sess_two.flush()
sess_two.commit()

Related

How to execute multiple SQL commands at once in pd.read_sql_query?

Let me create an use case to discuss on.
CREATE DATABASE sample;
USE sample;
CREATE TABLE quote (
`id` int(2) unsigned NOT NULL AUTO_INCREMENT,
`code` text ,
`date` date DEFAULT NULL,
`close` double DEFAULT NULL,
PRIMARY KEY (`id`)
) ;
INSERT INTO quote (`code`, `date`, `close`)
VALUES ('epm', '20200824', 2.64);
INSERT INTO quote (`code`, `date`, `close`)
VALUES ('dss', '20200824', 6.4);
It is simple to execute only one sql command with sqlalchemy.
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = '127.0.0.1'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
cmd_one_line_sql = 'select * from sample.quote;'
df = pd.read_sql_query(cmd_one_line_sql,con = engine)
df
id code date close
0 1 epm 2020-08-24 2.64
1 2 dss 2020-08-24 6.40
I get the desired result,now the cmd contains multiple sql commands,for simplicity,it contains only two lines
cmd_multi_lines_sql = 'use sample;select * from quote;'
The cmd_multi_lines_sql just split cmd_one_line_sql as two.
I rewrite the code snippet according to manual:
execute many sql commands with sqlalchemy
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = '127.0.0.1'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
cmd_multi_lines_sql = 'use sample;select * from quote;'
try:
cursor = connection.cursor()
cursor.execute(cmd_multi_lines_sql)
results_one = cursor.fetchall()
finally:
connection.close()
Get the below error info:
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 170, in execute
result = self._query(query)
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 328, in _query
conn.query(q)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 517, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 732, in _read_query_result
result.read()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1075, in read
first_packet = self.connection._read_packet()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 684, in _read_packet
packet.check_error()
File "/usr/local/lib/python3.5/dist-packages/pymysql/protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "/usr/local/lib/python3.5/dist-packages/pymysql/err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'select * from quote' at line 1")
Another try:
vim /tmp/test.sql
use sample;
select * from quote;
#write the commands in `/tmp/test.sql`
f = open('/tmp/test.sql','r')
cmd = f.read()
df = pd.read_sql_query(cmd, con = engine)
It output the same error info.How to fix it?
After some research and asking at github
the answer is obvious
you need to pass the needed parameters with
connect_args=
And the parameter since sqlalchemy
is
{"client_flag": MULTI_STATEMENTS}
So your python code locks like his
from sqlalchemy import create_engine
import pymysql
from pymysql.constants.CLIENT import MULTI_STATEMENTS
user = 'root'
mysql_pass = 'testpassword'
mysql_ip = 'localhost'
cmd = 'SELECT * FROM table1;SELECT * FROM test'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306/testdb1?charset=utf8".format(user,mysql_pass,mysql_ip),connect_args={"client_flag": MULTI_STATEMENTS})
connection = engine.raw_connection()
try:
cursor = connection.cursor()
cursor.execute(cmd)
results_one = cursor.fetchall()
cursor.nextset()
results_two = cursor.fetchall()
cursor.close()
finally:
connection.close()
But with this solution you need to know before hand which queries you run.
If you want to be more flexible, with dynamic sql statements
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'testpassword'
mysql_ip = 'localhost'
cmd = 'SELECT * FROM table1;SELECT * FROM test'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306/testdb1?charset=utf8".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
splitstring = cmd.split(";")
ges_resultset = []
try:
cursor = connection.cursor()
for cmdoneonly in splitstring:
cursor.execute(cmdoneonly)
results = cursor.fetchall()
ges_resultset.append(results)
cursor.close()
finally:
connection.close()
Where you can check every single command and know how python can react to it
SELECT need to get the result set
INSERT DELETE CREATE you don't(there are more, but you get the gist)
The issues you face are:
You need to pass the MULTI_STATEMENTS flag to PyMySQL, and
read_sql_query assumes that the first result set contains the data for the DataFrame, and that may not be true for an anonymous code block.
You can create your own PyMySQL connection and retrieve the data like this:
import pandas as pd
import pymysql
from pymysql.constants import CLIENT
conn_info = {
"host": "localhost",
"port": 3307,
"user": "root",
"password": "toot",
"database": "mydb",
"client_flag": CLIENT.MULTI_STATEMENTS,
}
cnxn = pymysql.connect(**conn_info)
crsr = cnxn.cursor()
sql = """\
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
"""
crsr.execute(sql)
num_tries = 5
result = None
for i in range(num_tries):
result = crsr.fetchall()
if result:
break
crsr.nextset()
if not result:
print(f"(no result found after {num_tries} attempts)")
else:
df = pd.DataFrame(result, columns=[x[0] for x in crsr.description])
print(df)
"""console output:
id txt
0 1 foo
1 2 ΟΠΑ!
"""
(Edit) Additional notes:
Note 1: As mentioned in another answer, you can use the connect_args argument to SQLAlchemy's create_engine method to pass the MULTI_STATEMENTS flag. If you need a SQLAlchemy Engine object for other things (e.g., for to_sql) then that might be preferable to creating your own PyMySQL connection directly.
Note 2: num_tries can be arbitrarily large; it is simply a way of avoiding an endless loop. If we need to skip the first n empty result sets then we need to call nextset that many times regardless, and once we've found the non-empty result set we break out of the loop.
#Gord Thompson,i make a little improvement to set num_tries automatically:
import pandas as pd
import pymysql
from pymysql.constants import CLIENT
conn_info = {
"host": "localhost",
"port": 3306,
"user": "root",
"password": "your mysql passwd",
"client_flag": CLIENT.MULTI_STATEMENTS,
}
cnxn = pymysql.connect(**conn_info)
crsr = cnxn.cursor()
sql = """\
create database sample;
USE sample;
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
SELECT txt FROM tmp;
"""
crsr.execute(sql)
num_tries = sql.count(';') if sql.endswith(';') else sql.count(';') + 1
for i in range(num_tries):
result = crsr.fetchall()
if result:
df = pd.DataFrame(result, columns=[x[0] for x in crsr.description])
print(df)
crsr.nextset()
#nbk:when the cmd contain many sql statements,to execute your code may encounter such issue as :
pymysql.err.InternalError: (1065, 'Query was empty')
Make a little improvement based on your code:
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = 'localhost'
sql = """\
create database sample;
USE sample;
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
SELECT txt FROM tmp;
"""
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
splitstring = sql.split(";")
try:
cursor = connection.cursor()
for cmdoneonly in splitstring:
if cmdoneonly.strip():
cursor.execute(cmdoneonly)
results = cursor.fetchall()
if results :
df = pd.DataFrame(results, columns=[x[0] for x in cursor.description])
print(df)
cursor.close()
finally:
connection.close()
it is necessary to add a determine statement if cmdoneonly.strip(): to avoid 1065:Query was empty error.
it is a wonderful statement df = pd.DataFrame(results, columns=[x[0] for x in cursor.description]) learning from #Gord Thompson.

load data in sql table if table exists but it is empty

I am writing a python script which connects to the SQL database. It creates databases based on the folder paths and tables based on the files present in those folders in corresponding databases.
My following code is doing everything fine but I want to optimize it in a way that it loads the data into tables only if the tables are empty.
The problem with the following code is that whenever I run it, it checks if the table is not present, it creates the table and if the table is already there, it moves on but when it comes to loading data into tables, every time run the script, it loads the data into tables from file 1.
I want to tweak it in a way that it loads data into tables only and only if the data is already not present in it. If the data is present, the code move on.
I tried to do something like create table if not exists but not successful.
hostname = 'hostname'
username = 'usrname'
password = '12345'
database = 'd1'
portname = '12345'
from mysql.connector.constants import ClientFlag
import pathlib
import sys
import os
import mysql.connector
import subprocess
from subprocess import *
import time
from termcolor import colored
print(colored('\nConnecting SQL database using host = '+hostname+' , username = '+username+' , port = ' +portname+ ' , database = ' +database+'.','cyan',attrs=['reverse','blink']))
print('\n')
myConnection = mysql.connector.connect(user=username, passwd=password,host=hostname,port=portname,database=database,client_flags=[ClientFlag.LOCAL_FILES])
myCursor =myConnection.cursor()
rootDir35 = '/mnt/Wdrive/pc35/SK/E13'
filenames35 = os.listdir(rootDir35)
root35 = pathlib.Path(rootDir35)
non_empty_dirs35 = {str(p35.parent) for p35 in root35.rglob('*') if p35.is_file()}
#35
try:
print(colored('**** Starting Performing SQL-Queries for pc35 **** \n','green',attrs=['reverse','blink'] ))
for f35 in non_empty_dirs35:
dB35 = f35.replace('/','_') or f35.replace('-','_')
for dirName35, subdirList35, fileList35 in os.walk(rootDir35):
if dirName35 == f35:
p1135= 'Current Working Directory is: %s' %f35+ ' '
print(colored(p1135,'cyan'))
createDB35='CREATE DATABASE IF NOT EXISTS %s' %dB35
myCursor.execute(createDB35)
p135='Database of pc35 Created : %s' %dB35
print(colored(p135,'cyan'))
useDB35='use %s' %dB35
myCursor.execute(useDB35)
myConnection.commit()
p235= 'Database in use : %s' %dB35
print(colored(p235,'cyan'))
print(' ')
for fname35 in fileList35:
completePath35 ='%s' %dirName35+'/%s'%fname35
tblname35 = os.path.basename(fname35).split('.')[0]
if '-' not in tblname35:
if '.' not in tblname35:
sql35= 'CREATE TABLE if not exists %s (Datum varchar(50), Uhrzeit varchar(13), UpsACT_V varchar(6), UpsPRE_V varchar(6), IpsACT_A varchar(6), IpsPRE_A varchar(6), PpsACT_W varchar(6), PpsPRE_W varchar(10), UelACT_V varchar(6), UelPRE_V varchar(6), IelACT_A varchar(8), IelPRE_A varchar(8), PelACT_W varchar(8), PelPRE_W varchar(8), Qlad_Ah varchar(10), Qlast_Ah varchar(10))' %(tblname35)
myCursor.execute(sql35)
myConnection.commit()
test35 = 'The Table %s ' %tblname35+ 'in database %s '%dB35+'is created'
print(colored(test35,'yellow'))
loadData35= "LOAD DATA LOCAL INFILE '%s' " %completePath35 + "INTO TABLE %s" %tblname35
myCursor.execute(loadData35)
myConnection.commit()
p335='Data loaded from file %s ' %fname35
p435=' into table %s ' %tblname35
p535 = p335 + p435
print(colored(p535,'green'))
print(' ')
print(colored('**** SQL-Queries for pc35 successfully executed **** \n','green',attrs=['reverse','blink']))
except:
print(' ')
print(colored('**** SQL queries for pc35 doesnot executed. Please refer to the report or user manual for more details ****','red',attrs=['reverse','blink']))
print(' ')
What I want is something like load data if not exists in table.
what do you think, is it possible or what should I do to achieve this?

table not updating while using functions in python

I made a python program which takes arguments from function call to update a table. The arguments are passed successfully but Does not update the table.
`
import mysql.connector
mydb = mysql.connector.connect(host="localhost",user='root',passwd="",database='student')
print(mydb)
mycursor = mydb.cursor()
mycursor.execute("create TABLE if not exists testtable ( num INT NOT NULL AUTO_INCREMENT,issue varchar(30), status varchar(30),PRIMARY KEY (num))")
def dev(y,z):
values=(y,z)
print(values)
print(mydb)
sql = "UPDATE form SET status = %s WHERE num = %s"
mycursor.execute(sql,values)
mydb.commit()
print(mycursor.rowcount, "record(s) affected")
values=('goog',1)
sql = "UPDATE form SET status = %s WHERE num = %s"
mycursor.execute(sql,values)
mydb.commit()
print(mycursor.rowcount, "record(s) affected")
dev('goog',2)`
a similar query outside the function works properly.
For some reason mycursor.execute() wont execute

Trying to load records from CSV to mysql

import csv
import MySQLdb
conn = MySQLdb.connect('localhost','tekno','poop','media')
cursor = conn.cursor()
txt = csv.reader(file('movies.csv'))
for row in txt:
cursor.execute('insert into shows_and_tv(watched_on,title,score_rating)' 'values ("%s","%s","%s")',row)
conn.close()
when I run this I get
TypeError: not enough arguments for format string
but it matches up
the csv is formatted like
dd-mm-yyyy,string,tinyint
which watches the fields in the database
I do not have a mysql database to play with. So I did what you need but in sqlite. It should be quite easy to adapt this to your needs.
import csv
import sqlite3
from collections import namedtuple
conn = sqlite3.connect('statictest.db')
c = conn.cursor()
c.execute('''CREATE TABLE if not exists movies (ID INTEGER PRIMARY KEY AUTOINCREMENT, 'watched_on','title','score_rating')''')
record = namedtuple('record',['watched_on','title','score_rating'])
SQL ='''
INSERT INTO movies ("watched_on","title","score_rating") VALUES (?,?,?)
'''
with open('statictest.csv', 'r') as file:
read_data = csv.reader(file)
for row in read_data:
watched_on, title, score_rating = row
data = (record(watched_on, title, score_rating))
c.execute(SQL, data)
conn.commit()

How to select the MIN() of an ORM attribute and a literal datetime?

How do I get the MIN() of a datetime column and a literal datetime in SQL Alchemy v0.6.4? I want to clamp a datetime result to a specific range.
I've been using sqlalchemy.func.min(column, literal_datetime). This seems to work fine in SQLite but not at all with MySQL, which no doubt means I'm going about this wrong. The error from MySQL is:
sqlalchemy.exc.ProgrammingError: (ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ' '2011-06-14 12:30:00') AS min_1 \nFROM example' at line 1") 'SELECT min(example.my_datetime, %s) AS min_1 \nFROM example' (datetime.datetime(2011, 6, 14, 12, 30),)
How can I clamp datetime results in a portable way?
(Do I really have to do it in my application?)
Here's what I've been using to explore the problem - works fine as presented here (using a memory-based SQLite DB), doesn't work with MySQL:
#!/usr/bin/env python
import random
from datetime import datetime
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy.ext.declarative import declarative_base
orm_base = declarative_base()
class MyClass( orm_base ):
__tablename__ = "example"
pri_key = sa.Column(sa.Integer(), primary_key=True)
my_datetime = sa.Column(sa.DateTime(), index=True)
engine = sa.create_engine("sqlite:///:memory:", echo=True)
Session = orm.sessionmaker( bind=engine )
orm_base.metadata.bind = engine
orm_base.metadata.create_all()
# Create test-data
session = Session()
random.seed(1234567980)
for i in range(100):
month = random.randint(1, 12)
day = random.randint(1, 28)
hour = random.randint(0, 23)
minute = random.randint(0, 59)
my_instance = MyClass()
my_instance.my_datetime = datetime(2011, month, day, hour, minute)
session.add( my_instance )
session.commit()
session.close()
# Problem starts here
session = Session()
literal_datetime = datetime(2011, 06, 14, 12, 30)
print session.query( sa.func.min(MyClass.my_datetime) ).one() # OK
print session.query( sa.func.min(MyClass.my_datetime, literal_datetime) ).all() # Error in MySQL
I'm not an SQL expert but database portability is important to me, so any tips on how to avoid pitfalls like this in the future are welcome.
Use sqlalchemy.sql.expression.case expression, which is supported on both mySql and SQLite.
The min/max functions of SQLite support non-aggregate operation on multiple values. This is not supported on mySql though.