Trying to load records from CSV to mysql - mysql

import csv
import MySQLdb
conn = MySQLdb.connect('localhost','tekno','poop','media')
cursor = conn.cursor()
txt = csv.reader(file('movies.csv'))
for row in txt:
cursor.execute('insert into shows_and_tv(watched_on,title,score_rating)' 'values ("%s","%s","%s")',row)
conn.close()
when I run this I get
TypeError: not enough arguments for format string
but it matches up
the csv is formatted like
dd-mm-yyyy,string,tinyint
which watches the fields in the database

I do not have a mysql database to play with. So I did what you need but in sqlite. It should be quite easy to adapt this to your needs.
import csv
import sqlite3
from collections import namedtuple
conn = sqlite3.connect('statictest.db')
c = conn.cursor()
c.execute('''CREATE TABLE if not exists movies (ID INTEGER PRIMARY KEY AUTOINCREMENT, 'watched_on','title','score_rating')''')
record = namedtuple('record',['watched_on','title','score_rating'])
SQL ='''
INSERT INTO movies ("watched_on","title","score_rating") VALUES (?,?,?)
'''
with open('statictest.csv', 'r') as file:
read_data = csv.reader(file)
for row in read_data:
watched_on, title, score_rating = row
data = (record(watched_on, title, score_rating))
c.execute(SQL, data)
conn.commit()

Related

Unable to populate multiple results for the invoice system using MySQL query

I'm trying to build an invoice generator GUI app using tkinter and reportlab. Currently, it's reading an excel file showing some results on treeview. However, to generate an invoice, I am creating a pdf file to show all the data for the invoice but unable to populate multiple results in the pdf file from MySQL table.
Here is my complete code:
import tkinter as tk
from tkinter import ttk
import pandas as pd
from tkinter import filedialog
import mysql.connector
import os
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
def browse_file():
# Open the file browser dialog
file_path = filedialog.askopenfilename()
# Read the Excel file into a DataFrame
df = pd.read_excel("mpo.xlsx", sheet_name='OrderDetails')
df = df.fillna('NULL')
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Insert each row into the MySQL database
for index, row in df.iterrows():
cursor.execute("INSERT INTO perorder (mp_task_nr, batch_id, dropoff_sequence, id_user_payout_type, task_status, city, fleet, id_user, Name, Vendor, order_date, UID, KM, total_amount, Remarks) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )", (row["mp_task_nr"], row["batch_id"], row["dropoff_sequence"], row["id_user_payout_type"], row["task_status"], row["city"], row["fleet"], row["id_user"], row["Name"], row["Vendor"], row["order_date"], row["UID"], row["KM"], row["total_amount"], row["Remarks"]))
# Commit the changes and close the cursor and connection
conn.commit()
cursor.close()
conn.close()
def populate_treeview():
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Execute a SELECT statement
cursor.execute("SELECT id_user, Vendor, KM FROM perorder")
# Fetch the results
results = cursor.fetchall()
# Add the results to the Treeview widget
for result in results:
treeview.insert("", tk.END, values=result)
# Close the cursor and connection
cursor.close()
conn.close()
def generateInvoice():
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Execute a SELECT statement to retrieve the invoice data
cursor.execute("SELECT city, count(KM), total_amount, total_amount*count(KM), total_amount*count(KM), round(total_amount*count(KM)*0.05,2), total_amount*count(KM)*0.05+(total_amount*count(KM)) FROM perorder where city like '%XB' and KM between 1 and 3")
# Fetch the results
results = cursor.fetchall()
# Create the PDF file
pdf_file = "invoice.pdf"
doc = SimpleDocTemplate(pdf_file, pagesize=letter)
# Create the table data
table_data = []
table_data.append(['City', 'Qantity', 'Rate', 'Amount', 'Taxable Value','Amount + 5% VAT', 'Grand Total'])
for result in results:
table_data.append([result[0], result[1], result[2], result[3], result[4], result[5], result[6]])
# Create the table
table = Table(table_data)
# Set the table style
table.setStyle(TableStyle([
('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
('BOX', (0,0), (-1,-1), 0.25, colors.black)
]))
# Build the document
doc.build([table])
# Create the root window
root = tk.Tk()
root.title("Megnatico Invoice System")
root.geometry("800x800")
# Create the treeview
treeview = ttk.Treeview(root)
treeview.pack(side="left", fill="both", expand=True)
treeview["columns"] = ("id_user", "Vendor", "KM")
treeview.column("id_user", width=150, minwidth=150, stretch=tk.NO)
treeview.column("Vendor", width=150, minwidth=150, stretch=tk.NO)
treeview.column("KM", width=150, minwidth=150, stretch=tk.NO)
treeview.heading("id_user", text="User ID")
treeview.heading("Vendor", text="Vendor")
treeview.heading("KM", text="KM")
treeview["displaycolumns"] = ("id_user", "Vendor", "KM")
# Create the buttons
read_excel_button = tk.Button(root, text="Read Excel", command=browse_file)
invoice_per_order_button = tk.Button(root, text="Invoice per Order", command=populate_treeview)
save_pdf_button = tk.Button(root, text="Generate Invoice", command=generateInvoice)
close_button = tk.Button(root, text="Close")
# Place the buttons in a frame and pack the frame to the right of the root window
button_frame = tk.Frame(root)
button_frame.pack(side="right", fill="both")
read_excel_button.pack(padx=10, pady=5)
invoice_per_order_button.pack(padx=10, pady=5)
save_pdf_button.pack(padx=10, pady=5)
close_button.pack(padx=10, pady=5)
# Run the Tkinter event loop
root.mainloop()
From the above code, the following code is populating invoice result:
# Execute a SELECT statement to retrieve the invoice data
cursor.execute("SELECT city, count(KM), total_amount, total_amount*count(KM), total_amount*count(KM), round(total_amount*count(KM)*0.05,2), total_amount*count(KM)*0.05+(total_amount*count(KM)) FROM perorder where city like '%XB' and KM between 1 and 3")
Example of data I have
There are multiple cities AUH, DXB but my current SQL query only generating for DXB. I need it should work for AUH and other KM values too such as 1 to 3, 4 to 5 and 6 to 7.
Current Invoice Result
Example of an expected invoice
If you have any further questions, please ask.

How to execute multiple SQL commands at once in pd.read_sql_query?

Let me create an use case to discuss on.
CREATE DATABASE sample;
USE sample;
CREATE TABLE quote (
`id` int(2) unsigned NOT NULL AUTO_INCREMENT,
`code` text ,
`date` date DEFAULT NULL,
`close` double DEFAULT NULL,
PRIMARY KEY (`id`)
) ;
INSERT INTO quote (`code`, `date`, `close`)
VALUES ('epm', '20200824', 2.64);
INSERT INTO quote (`code`, `date`, `close`)
VALUES ('dss', '20200824', 6.4);
It is simple to execute only one sql command with sqlalchemy.
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = '127.0.0.1'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
cmd_one_line_sql = 'select * from sample.quote;'
df = pd.read_sql_query(cmd_one_line_sql,con = engine)
df
id code date close
0 1 epm 2020-08-24 2.64
1 2 dss 2020-08-24 6.40
I get the desired result,now the cmd contains multiple sql commands,for simplicity,it contains only two lines
cmd_multi_lines_sql = 'use sample;select * from quote;'
The cmd_multi_lines_sql just split cmd_one_line_sql as two.
I rewrite the code snippet according to manual:
execute many sql commands with sqlalchemy
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = '127.0.0.1'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
cmd_multi_lines_sql = 'use sample;select * from quote;'
try:
cursor = connection.cursor()
cursor.execute(cmd_multi_lines_sql)
results_one = cursor.fetchall()
finally:
connection.close()
Get the below error info:
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 170, in execute
result = self._query(query)
File "/usr/local/lib/python3.5/dist-packages/pymysql/cursors.py", line 328, in _query
conn.query(q)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 517, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 732, in _read_query_result
result.read()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 1075, in read
first_packet = self.connection._read_packet()
File "/usr/local/lib/python3.5/dist-packages/pymysql/connections.py", line 684, in _read_packet
packet.check_error()
File "/usr/local/lib/python3.5/dist-packages/pymysql/protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "/usr/local/lib/python3.5/dist-packages/pymysql/err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'select * from quote' at line 1")
Another try:
vim /tmp/test.sql
use sample;
select * from quote;
#write the commands in `/tmp/test.sql`
f = open('/tmp/test.sql','r')
cmd = f.read()
df = pd.read_sql_query(cmd, con = engine)
It output the same error info.How to fix it?
After some research and asking at github
the answer is obvious
you need to pass the needed parameters with
connect_args=
And the parameter since sqlalchemy
is
{"client_flag": MULTI_STATEMENTS}
So your python code locks like his
from sqlalchemy import create_engine
import pymysql
from pymysql.constants.CLIENT import MULTI_STATEMENTS
user = 'root'
mysql_pass = 'testpassword'
mysql_ip = 'localhost'
cmd = 'SELECT * FROM table1;SELECT * FROM test'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306/testdb1?charset=utf8".format(user,mysql_pass,mysql_ip),connect_args={"client_flag": MULTI_STATEMENTS})
connection = engine.raw_connection()
try:
cursor = connection.cursor()
cursor.execute(cmd)
results_one = cursor.fetchall()
cursor.nextset()
results_two = cursor.fetchall()
cursor.close()
finally:
connection.close()
But with this solution you need to know before hand which queries you run.
If you want to be more flexible, with dynamic sql statements
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'testpassword'
mysql_ip = 'localhost'
cmd = 'SELECT * FROM table1;SELECT * FROM test'
engine = create_engine("mysql+pymysql://{}:{}#{}:3306/testdb1?charset=utf8".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
splitstring = cmd.split(";")
ges_resultset = []
try:
cursor = connection.cursor()
for cmdoneonly in splitstring:
cursor.execute(cmdoneonly)
results = cursor.fetchall()
ges_resultset.append(results)
cursor.close()
finally:
connection.close()
Where you can check every single command and know how python can react to it
SELECT need to get the result set
INSERT DELETE CREATE you don't(there are more, but you get the gist)
The issues you face are:
You need to pass the MULTI_STATEMENTS flag to PyMySQL, and
read_sql_query assumes that the first result set contains the data for the DataFrame, and that may not be true for an anonymous code block.
You can create your own PyMySQL connection and retrieve the data like this:
import pandas as pd
import pymysql
from pymysql.constants import CLIENT
conn_info = {
"host": "localhost",
"port": 3307,
"user": "root",
"password": "toot",
"database": "mydb",
"client_flag": CLIENT.MULTI_STATEMENTS,
}
cnxn = pymysql.connect(**conn_info)
crsr = cnxn.cursor()
sql = """\
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
"""
crsr.execute(sql)
num_tries = 5
result = None
for i in range(num_tries):
result = crsr.fetchall()
if result:
break
crsr.nextset()
if not result:
print(f"(no result found after {num_tries} attempts)")
else:
df = pd.DataFrame(result, columns=[x[0] for x in crsr.description])
print(df)
"""console output:
id txt
0 1 foo
1 2 ΟΠΑ!
"""
(Edit) Additional notes:
Note 1: As mentioned in another answer, you can use the connect_args argument to SQLAlchemy's create_engine method to pass the MULTI_STATEMENTS flag. If you need a SQLAlchemy Engine object for other things (e.g., for to_sql) then that might be preferable to creating your own PyMySQL connection directly.
Note 2: num_tries can be arbitrarily large; it is simply a way of avoiding an endless loop. If we need to skip the first n empty result sets then we need to call nextset that many times regardless, and once we've found the non-empty result set we break out of the loop.
#Gord Thompson,i make a little improvement to set num_tries automatically:
import pandas as pd
import pymysql
from pymysql.constants import CLIENT
conn_info = {
"host": "localhost",
"port": 3306,
"user": "root",
"password": "your mysql passwd",
"client_flag": CLIENT.MULTI_STATEMENTS,
}
cnxn = pymysql.connect(**conn_info)
crsr = cnxn.cursor()
sql = """\
create database sample;
USE sample;
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
SELECT txt FROM tmp;
"""
crsr.execute(sql)
num_tries = sql.count(';') if sql.endswith(';') else sql.count(';') + 1
for i in range(num_tries):
result = crsr.fetchall()
if result:
df = pd.DataFrame(result, columns=[x[0] for x in crsr.description])
print(df)
crsr.nextset()
#nbk:when the cmd contain many sql statements,to execute your code may encounter such issue as :
pymysql.err.InternalError: (1065, 'Query was empty')
Make a little improvement based on your code:
import pandas as pd
from sqlalchemy import create_engine
user = 'root'
mysql_pass = 'your mysql passwd'
mysql_ip = 'localhost'
sql = """\
create database sample;
USE sample;
CREATE TEMPORARY TABLE tmp (id int primary key, txt varchar(20))
ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO tmp (id, txt) VALUES (1, 'foo'), (2, 'ΟΠΑ!');
SELECT id, txt FROM tmp;
SELECT txt FROM tmp;
"""
engine = create_engine("mysql+pymysql://{}:{}#{}:3306".format(user,mysql_pass,mysql_ip))
connection = engine.raw_connection()
splitstring = sql.split(";")
try:
cursor = connection.cursor()
for cmdoneonly in splitstring:
if cmdoneonly.strip():
cursor.execute(cmdoneonly)
results = cursor.fetchall()
if results :
df = pd.DataFrame(results, columns=[x[0] for x in cursor.description])
print(df)
cursor.close()
finally:
connection.close()
it is necessary to add a determine statement if cmdoneonly.strip(): to avoid 1065:Query was empty error.
it is a wonderful statement df = pd.DataFrame(results, columns=[x[0] for x in cursor.description]) learning from #Gord Thompson.

Converting a column in a CSV file to NULL

I have a CSV file that has data that looks like this:
54861,54850,Doe,John,NULL,-87.1181407064,30.3773576858
54862,54851,Doe,Linda,Lee,None,None
The last two columns are longitude and latitude
I'm using mysql.connector to insert into the database. It handles the first record, okay but, because the last two columns are floats, it croaks when it gets to the second record with the values set to "None".
I tried programmatically to set the values to NULL but it won't accept that either.
I've tried a couple of different things but can't figure it out.
This has to be done in Python.
Here is the code:
import sys
import mysql.connector
import csv
import os
from mysql.connector import Error
from mysql.connector import errorcode
#Specify the import file
try:
inputCSV = 'geocoded_test.csv'
#Open the file and give it a handle
csvFile = open(inputCSV, 'r')
#Create a reader object for the input file
reader = csv.reader(csvFile, delimiter = ',')
except IOError as e:
print("The input file ", inputCSV, " was not found", e)
exit()
try:
mydb = mysql.connector.connect(host='localhost',
database='wordpress',
user='wp_user',
password='XXXXXXXX!'
)
mycursor = mydb.cursor()
except mysql.connector.Error as error:
print( "Failed to connect to database: {}".format(error))
exit()
try:
record_count = 0
for row in reader:
contact_id,address_id,last_name, first_name, middle_name, longitude, latitude = row
print(row)
# It is here that I want to convert to NULL.
if longitude == "None":
longitude = -1.0
if latitude == "None":
latitude = -1.0
#Update single record now
mycursor.execute("""
update civicrm_address
set
geo_code_1 = %s,
geo_code_2 = %s
where
id = %s
and
location_type_id = %s
""",
(latitude, longitude, address_id, 6)
)
mydb.commit()
print(mycursor.rowcount)
record_count +=1
print("Record", record_count, " updated successfully")
finally:
print(record_count, " records updated")
#closing database connection.
if(mydb.is_connected()):
mydb.close()
print("connection is closed")
One option would be to use LOAD DATA with custom logic which catches the None string values and then converts them to NULL:
LOAD DATA LOCAL INFILE 'your_file.csv'
INTO TABLE yourTable
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
(col1, col2, last, first, middle, #lat, #lng)
SET lat = CASE WHEN #lat = 'None'
THEN NULL
ELSE CAST(#lat AS DECIMAL(10,8)) END,
SET lng = CASE WHEN #lng = 'None'
THEN NULL
ELSE CAST(#lng AS DECIMAL(10,8)) END;
I assume above that your latitude column is called lat, and your longitude column lng. I just dummy placeholder names for the other columns, but you would need to use the actual column names to make the above load work.

How to fix the query errors

I want to put randomised (will later be substituted with real numbers) numbers and a timestamp in a mySQL database.
I am running the SQL Server on my PC, if I copy & paste the comand in the SQL Terminal it works, but with python it generates errors
import pymysql
import random
import time
import datetime
def dynamic_data_entry():
date = "10.07.19"
Temperatur = str(random.randint(0, 100))
Feuchtigkeit = str(random.randint(20, 80))
Operation = 'INSERT INTO messwerte_sensor1 (Time, Temperatur, Luftfeuchtigkeit) VALUES (%s, %s, %s);' , (date, Temperatur, Feuchtigkeit)
db = pymysql.connect("127.0.0.1","root","Root","tests")
cursor = db.cursor()
cursor.execute(Operation)
data = cursor.fetchone
print(data)
db.close()
dynamic_data_entry()
The problem is with your date format. In mysql, standar date format is aaaa-mm-dd, sou you will need to change it. Also, i modify your code to use prepared statements:
import pymysql
import random
import time
import datetime
def dynamic_data_entry():
date = "2019-07-10"
Temperatur = str(random.randint(0, 100))
Feuchtigkeit = str(random.randint(20, 80))
Operation = 'INSERT INTO messwerte_sensor1 (Time, Temperatur, Luftfeuchtigkeit) VALUES (%s, %s, %s);'
db = pymysql.connect("127.0.0.1","root","Root","tests")
cursor = db.cursor()
cursor.execute(Operation,(date, Temperatur, Feuchtigkeit))
data = cursor.fetchone()
print(data)
db.close()
dynamic_data_entry()
I thin you need quotes around each %s, like "%s"

SQLAlchemy insert data dumped on the database A into the database B

What is the most effective way to insert data dumped on the database A into the database B? Normally I would use mysqldump for the task like this, but because of the complex query I had to take a different approach. At present I have the following inefficient solution:
from sqlalchemy import create_engine, Column, INTEGER, CHAR, VARCHAR
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
SessFactory = sessionmaker()
print('## Configure database connections')
db_one = create_engine('mysql://root:pwd1#127.0.0.1/db_one', echo=True).connect()
sess_one = SessFactory(bind=db_one)
db_two = create_engine('mysql://root:pwd2#127.0.0.2/db_two', echo=True).connect()
sess_two = SessFactory(bind=db_two)
## Declare query to dump data
dump_query = (
'SELECT A.id, A.name, B.address '
'FROM table_a A JOIN table_b B '
'ON A.id = B.id_c WHERE '
'A.deleted = 0'
)
print('## Fetch data on db_one')
data = db_one.execute(dump_query).fetchall()
## Declare table on db_two
class cstm_table(Base):
__tablename__ = 'cstm_table'
pk = Column(INTEGER, primary_key=True)
id = Column(CHAR(36), nullable=False)
name = Column(VARCHAR(150), default=None)
address = Column(VARCHAR(150), default=None)
print('## Recreate "cstm_table" on db_two')
cstm_table.__table__.drop(bind=db_two, checkfirst=True)
cstm_table.__table__.create(bind=db_two)
print('## Insert dumped data into the "cstm_table" on db_two')
for row in data:
insert = cstm_table.__table__.insert().values(row)
db_two.execute(insert)
This execute sequentially over a 100K inserts (horrible).
I also tried:
with db_two.connect() as conn:
with conn.begin() as trans:
row_as_dict = [dict(row.items()) for row in data]
try:
conn.execute(cstm_table.__table__.insert(), row_as_dict)
except:
trans.rollback()
raise
else:
trans.commit()
But then after inserting ~20 rows I get error:
OperationalError: (_mysql_exceptions.OperationalError) (2006, 'MySQL server has gone away')
The following also does the job, but I'm not so sure it's the most efficient:
sess_two.add_all([cstm_table(**dict(row.items())) for row in data])
sess_two.flush()
sess_two.commit()