CSV Insert per line to MySQL python

CSV Insert per line to MySQL python - mysql

20170325_225012,ctcc01,voice,639128342574,639464810386,cap_timeout,6004,639180007006,2,0,null
20170325_235012,ctcc01,voice,639128342554,639464520384,cap_timeout,6004,639180007006,2,0,null
20170325_245012,ctcc01,voice,639128342174,639464820327,cap_timeout,6004,639180007006,2,0,null
Sample text data.csv file above:
Steps needed to complete:
Process the csv file
Each line should be inserted to MySQL Column. Column1, Column2, Column3 ... Column11
This is my Code so far.
import csv
import re
f = open('data.csv')
csv_f = csv.reader(f)
writer = csv.writer(f)
cdr = []
for row in csv_f:
cdr.append("Some auto increment id")
cdr.append(re.sub(r'_.*$', "", row[0]))
cdr.append(row[1])
cdr.append(row[2])
cdr.append(row[3])
cdr.append(row[4])
cdr.append(row[5])
cdr.append(row[6])
cdr.append(row[7])
cdr.append(row[8])
cdr.append(row[9])
cdr.append(row[10])
print cdr
with open('output.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(cdr)
I was able to output it on the terminal the way i want it but it did make it in one list :). Somehow i don't know how can i split and insert it on the mysql.
['Some auto increment id', '20170325', 'ctcc01', 'voice', '639128342574', '639464820387', 'cap_timeout', '6004', '639180007006', '2', '0', 'null', 'Some auto increment id', '20170325', 'ctcc01', 'voice', '639128342574', '639464820387', 'no_subs', '6004', '639180007006', '2', '0', 'null', 'Some auto increment id', '20170325', 'ctcc01', 'voice', '639128342574', '639464820387', 'cap_timeout', '6004', '639180007006', '2', '0', 'null']

No. You and you need to use MySql.db.connect, and insert and commit.
Basically, you'll find your answer in a similar question here
The code should be:
# open file, and define a csv reader and writer - you've done that correctly
import csv
import re
f = open('data.csv')
csv_f = csv.reader(f)
writer = csv.writer(f)
vals = []
# open and connect to database
dbname = 'mydb' # or whatever your database is named
tablename = 'mytable' # or whatever table you wish to insert into
hostname = 'localhost' # or whatever your mysql db hostname is
username = 'root' # or whatever your username for mysql db is
pw = '' # or whatever your password is for that user
mydb = MySQLdb.connect(host=hostname, user=username, passwd=pw, db=dbname)
cursor = mydb.cursor()
# for each row create an 'INSERT INTO' execution-string
auto = 0 # auto-incrementing
exec_string = ""
rowid = ""
for row in csv_f:
# INSERT INTO mytable(Column, Column1,Column2, ... Column12)
# VALUES(auto, rowid, row(1), row(2)...
# execstr header:
exec_string = "INSERT INTO " + tablename + "(Column, "
for i in range(1,11): # columns
exec_string += "Column" + i + (", " if (i<11))
# ...it may be a mistake in the question and you need Column0
# ...in which case the end of the exec_string line should read + "("
# ...and the for should be in range(0,11): ...
# execstr values:
exec_string += ") Values("
for _ in range(12):
exec_string += "%S"
exec_string += ")" # close values function
vals = []
auto += 1
rowid = re.sub(r'_.*$', "", row[0])
vals.append(auto)
vals.append(rowid)
for i in range(2,12) # count to 12 starting with 2
vals.append(row[i])
# and execute it!
cursor.execute(exec_string, vals)
# commit and close the connection to the database.
mydb.commit()
cursor.close()

Related

Unable to populate multiple results for the invoice system using MySQL query

I'm trying to build an invoice generator GUI app using tkinter and reportlab. Currently, it's reading an excel file showing some results on treeview. However, to generate an invoice, I am creating a pdf file to show all the data for the invoice but unable to populate multiple results in the pdf file from MySQL table.
Here is my complete code:
import tkinter as tk
from tkinter import ttk
import pandas as pd
from tkinter import filedialog
import mysql.connector
import os
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
def browse_file():
# Open the file browser dialog
file_path = filedialog.askopenfilename()
# Read the Excel file into a DataFrame
df = pd.read_excel("mpo.xlsx", sheet_name='OrderDetails')
df = df.fillna('NULL')
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Insert each row into the MySQL database
for index, row in df.iterrows():
cursor.execute("INSERT INTO perorder (mp_task_nr, batch_id, dropoff_sequence, id_user_payout_type, task_status, city, fleet, id_user, Name, Vendor, order_date, UID, KM, total_amount, Remarks) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )", (row["mp_task_nr"], row["batch_id"], row["dropoff_sequence"], row["id_user_payout_type"], row["task_status"], row["city"], row["fleet"], row["id_user"], row["Name"], row["Vendor"], row["order_date"], row["UID"], row["KM"], row["total_amount"], row["Remarks"]))
# Commit the changes and close the cursor and connection
conn.commit()
cursor.close()
conn.close()
def populate_treeview():
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Execute a SELECT statement
cursor.execute("SELECT id_user, Vendor, KM FROM perorder")
# Fetch the results
results = cursor.fetchall()
# Add the results to the Treeview widget
for result in results:
treeview.insert("", tk.END, values=result)
# Close the cursor and connection
cursor.close()
conn.close()
def generateInvoice():
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Execute a SELECT statement to retrieve the invoice data
cursor.execute("SELECT city, count(KM), total_amount, total_amount*count(KM), total_amount*count(KM), round(total_amount*count(KM)*0.05,2), total_amount*count(KM)*0.05+(total_amount*count(KM)) FROM perorder where city like '%XB' and KM between 1 and 3")
# Fetch the results
results = cursor.fetchall()
# Create the PDF file
pdf_file = "invoice.pdf"
doc = SimpleDocTemplate(pdf_file, pagesize=letter)
# Create the table data
table_data = []
table_data.append(['City', 'Qantity', 'Rate', 'Amount', 'Taxable Value','Amount + 5% VAT', 'Grand Total'])
for result in results:
table_data.append([result[0], result[1], result[2], result[3], result[4], result[5], result[6]])
# Create the table
table = Table(table_data)
# Set the table style
table.setStyle(TableStyle([
('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
('BOX', (0,0), (-1,-1), 0.25, colors.black)
]))
# Build the document
doc.build([table])
# Create the root window
root = tk.Tk()
root.title("Megnatico Invoice System")
root.geometry("800x800")
# Create the treeview
treeview = ttk.Treeview(root)
treeview.pack(side="left", fill="both", expand=True)
treeview["columns"] = ("id_user", "Vendor", "KM")
treeview.column("id_user", width=150, minwidth=150, stretch=tk.NO)
treeview.column("Vendor", width=150, minwidth=150, stretch=tk.NO)
treeview.column("KM", width=150, minwidth=150, stretch=tk.NO)
treeview.heading("id_user", text="User ID")
treeview.heading("Vendor", text="Vendor")
treeview.heading("KM", text="KM")
treeview["displaycolumns"] = ("id_user", "Vendor", "KM")
# Create the buttons
read_excel_button = tk.Button(root, text="Read Excel", command=browse_file)
invoice_per_order_button = tk.Button(root, text="Invoice per Order", command=populate_treeview)
save_pdf_button = tk.Button(root, text="Generate Invoice", command=generateInvoice)
close_button = tk.Button(root, text="Close")
# Place the buttons in a frame and pack the frame to the right of the root window
button_frame = tk.Frame(root)
button_frame.pack(side="right", fill="both")
read_excel_button.pack(padx=10, pady=5)
invoice_per_order_button.pack(padx=10, pady=5)
save_pdf_button.pack(padx=10, pady=5)
close_button.pack(padx=10, pady=5)
# Run the Tkinter event loop
root.mainloop()
From the above code, the following code is populating invoice result:
# Execute a SELECT statement to retrieve the invoice data
cursor.execute("SELECT city, count(KM), total_amount, total_amount*count(KM), total_amount*count(KM), round(total_amount*count(KM)*0.05,2), total_amount*count(KM)*0.05+(total_amount*count(KM)) FROM perorder where city like '%XB' and KM between 1 and 3")
Example of data I have
There are multiple cities AUH, DXB but my current SQL query only generating for DXB. I need it should work for AUH and other KM values too such as 1 to 3, 4 to 5 and 6 to 7.
Current Invoice Result
Example of an expected invoice
If you have any further questions, please ask.

insert values into mysql table in python

To insert values into mysql table in python. Below is the code extracting mongodb collection data and inserting it into mysql table in python.
def insert():
client=MongoClient('mongodb://localhost:27017')
db=client['mydb'] #database
coll=db['data'] #collection
mongo_docs = coll.find({},{'_id':0}) #mongo cursor removed '_id' in projection
fieldnames = list(mongo_docs[0].keys()) #all the column names in the dataset
for record in mongo_docs:
values = list(record.values()) #all the values in the dataset
#print(values)
connection=mysql.connector.connect(host="localhost",user="root",database="mydb",password="passwd")
cursor1=connection.cursor()
connection.commit() #mysql connection
count=0
for i in fieldnames:
count=count+1
qmark=[]
a=0
while a<count:
qmark.append('%s')
a=a+1
q=','.join(tuple(qmark))
query="INSERT INTO ndata VALUES ('%s')"%(q)
cursor1.executemany("INSERT INTO ndata VALUES (%s)" %(q),(values))
This code throws an error:
ProgrammingError: Could not process parameters: int(82506), it must be of type list, tuple or dict
The values in the dataset are like this:
[82506, '1945-12-31', 0, '', 29.44444444, 17.22222222, 23.33333333, 0, '', 45, 12, 31, 0, '', '', 85, 63, 74, 0, '', '', '', '', '', '', '', '', '', '', '', '']
which has empty strings inside it.
q in the code produces %s, generates %s which equal to number of columns in the dataset. Here 31 columns in the dataset so there are 31 of (%s,%s,%s.....) in q
The same code with when executed with
cursor.execute("INSERT INTO ndata VALUES (%s)" %(q),(values))
in place of cursor.executemany() runs without any errors but it does not insert any values into the table in mysql.
What changes should i make to insert multiple rows of values at once ?
or how could i insert it row by row?

I can't test it but I think you create values in wrong way.
If it works for execute() then values has only one row of data but executemany() expects list with many rows of data.
And this may suggest that you create values in wrong way.
You should create list values = [] before for-loop and you should use values.append(...) instead of values = ... to add new row to list (instead of keeping only one row in variable).
# --- before loop ---
values = []
# --- loop ---
for record in mongo_docs:
row = list(record.values())
values.append(row)
# --- after loop ---
print(values)
BTW:
Shorter
count = len(fieldnames)
and
qmark = ['%s'] * count
q = ','.join(qmark)
def insert():
client = MongoClient('mongodb://localhost:27017') # PEP8: spaces around `=`
db = mongo_client['mydb']
collection = db['data']
mongo_docs = collection.find({},{'_id':0})
fieldnames = list(mongo_docs[0].keys())
values = []
for record in mongo_docs:
row = list(record.values())
values.append(row)
connection = mysql.connector.connect(host="localhost", user="root", database="mydb", password="passwd") # PEP8: spaces after `,`
cursor = connection.cursor()
count = len(fieldnames)
qmark = ['%s'] * count
q = ','.join(qmark) # no need `tuple()`
query = f"INSERT INTO ndata VALUES ({q})" # modern `f-string` instead of very old `%`
cursor.executemany(query, values)
connection.commit() # after `executemany('INSERT ...')`
PEP 8 -- Style Guide for Python Code

How to execute multiple SQL commands at once in pd.read_sql_query?

Let me create an use case to discuss on.
CREATE DATABASE sample;
USE sample;
CREATE TABLE quote (
`id` int(2) unsigned NOT NULL AUTO_INCREMENT,
`code` text ,
`date` date DEFAULT NULL,
`close` double DEFAULT NULL,
PRIMARY KEY (`id`)
) ;
INSERT INTO quote (`code`, `date`, `close`)
VALUES ('epm', '20200824', 2.64);
INSERT INTO quote (`code`, `date`, `close`)
VALUES ('dss', '20200824', 6.4);
It is simple to execute only one sql command with sqlalchemy.
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = '127.0.0.1'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
cmd_one_line_sql = 'select * from sample.quote;'
df = pd.read_sql_query(cmd_one_line_sql,con = engine)
df
id code date close
0 1 epm 2020-08-24 2.64
1 2 dss 2020-08-24 6.40
I get the desired result,now the cmd contains multiple sql commands,for simplicity,it contains only two lines
cmd_multi_lines_sql = 'use sample;select * from quote;'
The cmd_multi_lines_sql just split cmd_one_line_sql as two.
I rewrite the code snippet according to manual:
execute many sql commands with sqlalchemy
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = '127.0.0.1'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
cmd_multi_lines_sql = 'use sample;select * from quote;'
try:
cursor = connection.cursor()
cursor.execute(cmd_multi_lines_sql)
results_one = cursor.fetchall()
finally:
connection.close()
Get the below error info:
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 170, in execute
result = self._query(query)
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 328, in _query
conn.query(q)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 517, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 732, in _read_query_result
result.read()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1075, in read
first_packet = self.connection._read_packet()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 684, in _read_packet
packet.check_error()
File "/usr/local/lib/python3.5/dist-packages/pymysql/protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "/usr/local/lib/python3.5/dist-packages/pymysql/err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'select * from quote' at line 1")
Another try:
vim /tmp/test.sql
use sample;
select * from quote;
#write the commands in `/tmp/test.sql`
f = open('/tmp/test.sql','r')
cmd = f.read()
df = pd.read_sql_query(cmd, con = engine)
It output the same error info.How to fix it?

After some research and asking at github
the answer is obvious
you need to pass the needed parameters with
connect_args=
And the parameter since sqlalchemy
is
{"client_flag": MULTI_STATEMENTS}
So your python code locks like his
from sqlalchemy import create_engine
import pymysql
from pymysql.constants.CLIENT import MULTI_STATEMENTS
user = 'root'
mysql_pass = 'testpassword'
mysql_ip = 'localhost'
cmd = 'SELECT * FROM table1;SELECT * FROM test'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306/testdb1?charset=utf8".format(user,mysql_pass,mysql_ip),connect_args={"client_flag": MULTI_STATEMENTS})
connection = engine.raw_connection()
try:
cursor = connection.cursor()
cursor.execute(cmd)
results_one = cursor.fetchall()
cursor.nextset()
results_two = cursor.fetchall()
cursor.close()
finally:
connection.close()
But with this solution you need to know before hand which queries you run.
If you want to be more flexible, with dynamic sql statements
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'testpassword'
mysql_ip = 'localhost'
cmd = 'SELECT * FROM table1;SELECT * FROM test'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306/testdb1?charset=utf8".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
splitstring = cmd.split(";")
ges_resultset = []
try:
cursor = connection.cursor()
for cmdoneonly in splitstring:
cursor.execute(cmdoneonly)
results = cursor.fetchall()
ges_resultset.append(results)
cursor.close()
finally:
connection.close()
Where you can check every single command and know how python can react to it
SELECT need to get the result set
INSERT DELETE CREATE you don't(there are more, but you get the gist)

The issues you face are:
You need to pass the MULTI_STATEMENTS flag to PyMySQL, and
read_sql_query assumes that the first result set contains the data for the DataFrame, and that may not be true for an anonymous code block.
You can create your own PyMySQL connection and retrieve the data like this:
import pandas as pd
import pymysql
from pymysql.constants import CLIENT
conn_info = {
"host": "localhost",
"port": 3307,
"user": "root",
"password": "toot",
"database": "mydb",
"client_flag": CLIENT.MULTI_STATEMENTS,
}
cnxn = pymysql.connect(**conn_info)
crsr = cnxn.cursor()
sql = """\
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
"""
crsr.execute(sql)
num_tries = 5
result = None
for i in range(num_tries):
result = crsr.fetchall()
if result:
break
crsr.nextset()
if not result:
print(f"(no result found after {num_tries} attempts)")
else:
df = pd.DataFrame(result, columns=[x[0] for x in crsr.description])
print(df)
"""console output:
id txt
0 1 foo
1 2 ΟΠΑ!
"""
(Edit) Additional notes:
Note 1: As mentioned in another answer, you can use the connect_args argument to SQLAlchemy's create_engine method to pass the MULTI_STATEMENTS flag. If you need a SQLAlchemy Engine object for other things (e.g., for to_sql) then that might be preferable to creating your own PyMySQL connection directly.
Note 2: num_tries can be arbitrarily large; it is simply a way of avoiding an endless loop. If we need to skip the first n empty result sets then we need to call nextset that many times regardless, and once we've found the non-empty result set we break out of the loop.

#Gord Thompson,i make a little improvement to set num_tries automatically:
import pandas as pd
import pymysql
from pymysql.constants import CLIENT
conn_info = {
"host": "localhost",
"port": 3306,
"user": "root",
"password": "your mysql passwd",
"client_flag": CLIENT.MULTI_STATEMENTS,
}
cnxn = pymysql.connect(**conn_info)
crsr = cnxn.cursor()
sql = """\
create database sample;
USE sample;
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
SELECT txt FROM tmp;
"""
crsr.execute(sql)
num_tries = sql.count(';') if sql.endswith(';') else sql.count(';') + 1
for i in range(num_tries):
result = crsr.fetchall()
if result:
df = pd.DataFrame(result, columns=[x[0] for x in crsr.description])
print(df)
crsr.nextset()
#nbk:when the cmd contain many sql statements,to execute your code may encounter such issue as :
pymysql.err.InternalError: (1065, 'Query was empty')
Make a little improvement based on your code:
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = 'localhost'
sql = """\
create database sample;
USE sample;
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
SELECT txt FROM tmp;
"""
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
splitstring = sql.split(";")
try:
cursor = connection.cursor()
for cmdoneonly in splitstring:
if cmdoneonly.strip():
cursor.execute(cmdoneonly)
results = cursor.fetchall()
if results :
df = pd.DataFrame(results, columns=[x[0] for x in cursor.description])
print(df)
cursor.close()
finally:
connection.close()
it is necessary to add a determine statement if cmdoneonly.strip(): to avoid 1065:Query was empty error.
it is a wonderful statement df = pd.DataFrame(results, columns=[x[0] for x in cursor.description]) learning from #Gord Thompson.

Converting a column in a CSV file to NULL

I have a CSV file that has data that looks like this:
54861,54850,Doe,John,NULL,-87.1181407064,30.3773576858
54862,54851,Doe,Linda,Lee,None,None
The last two columns are longitude and latitude
I'm using mysql.connector to insert into the database. It handles the first record, okay but, because the last two columns are floats, it croaks when it gets to the second record with the values set to "None".
I tried programmatically to set the values to NULL but it won't accept that either.
I've tried a couple of different things but can't figure it out.
This has to be done in Python.
Here is the code:
import sys
import mysql.connector
import csv
import os
from mysql.connector import Error
from mysql.connector import errorcode
#Specify the import file
try:
inputCSV = 'geocoded_test.csv'
#Open the file and give it a handle
csvFile = open(inputCSV, 'r')
#Create a reader object for the input file
reader = csv.reader(csvFile, delimiter = ',')
except IOError as e:
print("The input file ", inputCSV, " was not found", e)
exit()
try:
mydb = mysql.connector.connect(host='localhost',
database='wordpress',
user='wp_user',
password='XXXXXXXX!'
)
mycursor = mydb.cursor()
except mysql.connector.Error as error:
print( "Failed to connect to database: {}".format(error))
exit()
try:
record_count = 0
for row in reader:
contact_id,address_id,last_name, first_name, middle_name, longitude, latitude = row
print(row)
# It is here that I want to convert to NULL.
if longitude == "None":
longitude = -1.0
if latitude == "None":
latitude = -1.0
#Update single record now
mycursor.execute("""
update civicrm_address
set
geo_code_1 = %s,
geo_code_2 = %s
where
id = %s
and
location_type_id = %s
""",
(latitude, longitude, address_id, 6)
)
mydb.commit()
print(mycursor.rowcount)
record_count +=1
print("Record", record_count, " updated successfully")
finally:
print(record_count, " records updated")
#closing database connection.
if(mydb.is_connected()):
mydb.close()
print("connection is closed")

One option would be to use LOAD DATA with custom logic which catches the None string values and then converts them to NULL:
LOAD DATA LOCAL INFILE 'your_file.csv'
INTO TABLE yourTable
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
(col1, col2, last, first, middle, #lat, #lng)
SET lat = CASE WHEN #lat = 'None'
THEN NULL
ELSE CAST(#lat AS DECIMAL(10,8)) END,
SET lng = CASE WHEN #lng = 'None'
THEN NULL
ELSE CAST(#lng AS DECIMAL(10,8)) END;
I assume above that your latitude column is called lat, and your longitude column lng. I just dummy placeholder names for the other columns, but you would need to use the actual column names to make the above load work.

Bulk update MySql with python

I have to update millions of row into MySQL. I am currently using for loop to execute query. To make the update faster I want to use executemany() of Python MySQL Connector, so that I can update in batches using single query for each batch.

I don't think mysqldb has a way of handling multiple UPDATE queries at one time.
But you can use an INSERT query with ON DUPLICATE KEY UPDATE condition at the end.
I written the following example for ease of use and readability.
import MySQLdb
def update_many(data_list=None, mysql_table=None):
"""
Updates a mysql table with the data provided. If the key is not unique, the
data will be inserted into the table.
The dictionaries must have all the same keys due to how the query is built.
Param:
data_list (List):
A list of dictionaries where the keys are the mysql table
column names, and the values are the update values
mysql_table (String):
The mysql table to be updated.
"""
# Connection and Cursor
conn = MySQLdb.connect('localhost', 'jeff', 'atwood', 'stackoverflow')
cur = conn.cursor()
query = ""
values = []
for data_dict in data_list:
if not query:
columns = ', '.join('`{0}`'.format(k) for k in data_dict)
duplicates = ', '.join('{0}=VALUES({0})'.format(k) for k in data_dict)
place_holders = ', '.join('%s'.format(k) for k in data_dict)
query = "INSERT INTO {0} ({1}) VALUES ({2})".format(mysql_table, columns, place_holders)
query = "{0} ON DUPLICATE KEY UPDATE {1}".format(query, duplicates)
v = data_dict.values()
values.append(v)
try:
cur.executemany(query, values)
except MySQLdb.Error, e:
try:
print"MySQL Error [%d]: %s" % (e.args[0], e.args[1])
except IndexError:
print "MySQL Error: %s" % str(e)
conn.rollback()
return False
conn.commit()
cur.close()
conn.close()
Explanation of one liners
columns = ', '.join('`{}`'.format(k) for k in data_dict)
is the same as
column_list = []
for k in data_dict:
column_list.append(k)
columns = ", ".join(columns)
Here's an example of usage
test_data_list = []
test_data_list.append( {'id' : 1, 'name' : 'Marco', 'articles' : 1 } )
test_data_list.append( {'id' : 2, 'name' : 'Keshaw', 'articles' : 8 } )
test_data_list.append( {'id' : 3, 'name' : 'Wes', 'articles' : 0 } )
update_many(data_list=test_data_list, mysql_table='writers')
Query output
INSERT INTO writers (`articles`, `id`, `name`) VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE articles=VALUES(articles), id=VALUES(id), name=VALUES(name)
Values output
[[1, 1, 'Marco'], [8, 2, 'Keshaw'], [0, 3, 'Wes']]

Maybe this can help
How to update multiple rows with single MySQL query in python?
cur.executemany("UPDATE Writers SET Name = %s WHERE Id = %s ",
[("new_value" , "3"),("new_value" , "6")])
conn.commit()

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

CSV Insert per line to MySQL python - mysql

Related

Unable to populate multiple results for the invoice system using MySQL query

insert values into mysql table in python

How to execute multiple SQL commands at once in pd.read_sql_query?

Converting a column in a CSV file to NULL

Bulk update MySql with python

Categories

Resources