Automated insertion of csv files into mysql table - mysql

I am trying to insert each row from about 2000 csv files into a mysql table. With the following code, I have inserted only one row from just one file. How can I automate the code so that it inserts all rows for each file? The insertions need to be done just once.
import pymysql.cursors
connection = pymysql.connect(host='localhost',
user='s',
password='n9',
db='si',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
sql = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (%s,%s,%s,%s,%s,%s,%s, %s)"
cursor.execute(sql, ('03', 'P Exp','SF','HWH', 'none','no arr today','n/a','ND'))
connection.commit()
finally:
connection.close()

How about checking this code?
To run this you can put all your .csv files in one folder and os.walk(folder_location) that folder to get locations of all the .csv files and then I've opened them one by one and inserted into the required DB (MySQL) here.
import pandas as pd
import os
import subprocess
import warnings
warnings.simplefilter("ignore")
cwd = os.getcwd()
connection = pymysql.connect(host='localhost',
user='s',
password='n9',
db='si',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
files_csv = []
for subdir, dir, file in os.walk(cwd):
files_csv += [ fi for fi in file if fi.endswith(".csv") ]
print(files_csv)
for i in range(len(files_csv)):
with open(os.path.join(cwd, files_csv[i])) as f:
lis=[line.split() for line in f]
for i,x in enumerate(lis):
#print("line{0} = {1}".format(i,x))
#HERE x contains the row data and you can access it individualy using x[0], x[1], etc
#USE YOUR MySQL INSERTION commands here and insert the x row here.
with connection.cursor() as cursor:
sql = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (%s,%s,%s,%s,%s,%s,%s, %s)"
cursor.execute(sql, (#CONVERTED VALUES FROM x))
connection.commit()
Update -
getting values for (#CONVERTED VALUES FROM X)
values = ""
for i in range(len(columns)):
values = values + x[i] + "," # Here x[i] gives a record data in ith row. Here i'm just appending the all values to be inserted in the sql table.
values = values[:-1] # Removing the last extra comma.
command = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (" + str(values) + ")"
cursor.execute(command)
#Then commit using connection.commit()

import psycopg2
import time
import csv
conn = psycopg2.connect(
host = "localhost",
database = "postgres",
user = "postgres",
password = "postgres"
)
cur = conn.cursor()
start = time.time()
with open('combined_category_data_100 copy.csv', 'r') as file:
reader=csv.reader(file)
ncol = len(next(reader))
next(reader)
for row in reader:
cur.execute(" insert into data values (%s = (no. of columns
))", row)
conn.commit()
print("data entered successfully")
end = time.time()
print(f" time taken is {end - start}")
cur.close()

Related

Error Loading Delimited file into MySQL using Airflow( Error code 2068)

I have airflow installed on Ubuntu as WSL on windows.
I am trying to load a delimited file that is stored on my C drive into Mysql database using the code below:
import logging
import os
import csv
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.mysql_operator import MySqlOperator
from airflow.hooks.mysql_hook import MySqlHook
def bulk_load_sql(table_name, **kwargs):
local_filepath = 'some c drive path'
conn = MySqlHook(conn_name_attr='mysql_default')
conn.bulk_load(table_name, local_filepath)
return table_name
dag = DAG(
"dag_name",
start_date=datetime.datetime.now() - datetime.timedelta(days=1),
schedule_interval=None)
t1 = PythonOperator(
task_id='csv_to_stgtbl',
provide_context=True,
python_callable=bulk_load_sql,
op_kwargs={'table_name': 'mysqltablnm'},
dag=dag
)
It gives the following exception:
MySQLdb._exceptions.OperationalError: (2068, 'LOAD DATA LOCAL INFILE file request rejected due to restrictions on access.')
I have checked the following setting on mysql and its ON
SHOW GLOBAL VARIABLES LIKE 'local_infile'
Could someone please provide some pointers as to how to fix it.
Is there any other way I can load a delimited file into mysql using airflow.
For now, I have implemented a work around as follows:
def load_staging():
mysqlHook = MySqlHook(conn_name_attr='mysql_default')
#cursor = conn.cursor()
conn = mysqlHook.get_conn()
cursor = conn.cursor()
csv_data = csv.reader(open('c drive file path'))
header = next(csv_data)
logging.info('Importing the CSV Files')
for row in csv_data:
#print(row)
cursor.execute("INSERT INTO table_name (col1,col2,col3) VALUES (%s, %s, %s)",
row)
conn.commit()
cursor.close()
t1 = PythonOperator(
task_id='csv_to_stgtbl',
python_callable=load_staging,
dag=dag
)
However, it would have been great if the LOAD DATA LOCAL INFILE would have worked.

Empty data in SQL table

I am learning Tkinter and have made a program (just for practice) which takes user input in Entry field and save what the user has entered in a MySQL database by clicking submit button.
Code:
from tkinter import *
import tkinter
import mysql.connector
from tkinter import *
import mysql.connector
import random
DB = mysql.connector.connect(
host = "localhost",
user = "Lincoln",
password = "lincoln110904#",
database = "test"
)
cursor = DB.cursor()
gui2 = Tk()
gui2.title("Airline Ticket Booking System")
gui2.iconbitmap("C:/Users/keepa/OneDrive/Desktop/icon.ico")
gui2.maxsize(width=1000, height=70)
gui2.minsize(width=1000, height=700)
data = Entry(gui2, text = 'food name', textvariable="data_var")
data.pack()
data_var = tkinter.StringVar(data).get()
user_id = random.randint(1128, 9721)
def submit_it():
sql = "INSERT INTO user_time(user_id, time) VALUES(%s, %s)"
values = (user_id, str(data_var))
cursor.execute(sql,values)
DB.commit()
submit = Button(gui2, text = 'submit', command=submit_it)
submit.pack()
gui2.mainloop()
Output in MySQL database:
Anyone, please help why the data in the time row is empty/blank?
You need to:
use reference of a StringVar instead of string for the textvariable option
get the input content when it is needed instead of getting it just after the entry is created:
...
data_var = tkinter.StringVar()
data = tkinter.Entry(gui2, text='food name', textvariable=data_var)
...
def submit_it():
sql = "INSERT INTO user_time (user_id, time) VALUES (%s, %s)"
# get the input data from the Entry here
values = (user_id, data_var.get())
cursor.execute(sql, values)
DB.commit()
Also note that wildcard import is not recommended.

Incorrect number of parameters in prepared statement

I'm having a heck of a time getting the mysql.connector module to work. I'd really like to find some accurate documentation on it. By hit and by miss, I have arrived here.
Traceback (most recent call last):
File "update_civicrm_address.py", line 80, in <module>
cursor.execute(mysql_select_query, address_id)
File "/home/ubuntu/.local/lib/python3.6/site-packages/mysql/connector/cursor.py", line 1210, in execute
msg="Incorrect number of arguments " \
mysql.connector.errors.ProgrammingError: 1210: Incorrect number of arguments executing prepared statement
Here is the program (it's a bit messy because I have tried so many things to get it to work). Aside from the fact that the update is not working at all, what is causing the error? There is only one parameter and it is accounted for.
import sys
import mysql.connector
import csv
import os
from mysql.connector import Error
from mysql.connector import errorcode
#Specify the import file
try:
inputCSV = 'geocoded_rhode_island_export.csv'
#Open the file and give it a handle
csvFile = open(inputCSV, 'r')
#Create a reader object for the input file
reader = csv.reader(csvFile, delimiter = ',')
except IOError as e:
print("The input file ", inputCSV, " was not found", e)
exit()
try:
conn = mysql.connector.connect(host='localhost',
database='wordpress',
user='wp_user',
password='secret!',
use_pure=True)
cursor = conn.cursor(prepared=True)
except mysql.connector.Error as error:
print( "Failed to connect to database: {}".format(error))
exit()
try:
record_count = 0
for row in reader:
contact_id,address_id,last_name, first_name, middle_name, longitude, latitude = row
print(row)
#Update single record now
print(address_id)
cursor.execute(
"""
update civicrm_address
set
geo_code_1 = %s,
geo_code_2 = %s
where
id = %s
and
location_type_id = %s
""",
(longitude, latitude, address_id, 6)
)
conn.commit
print(cursor.rowcount)
print("Record updated successfully")
mysql_select_query = """
select
id,
geo_code_1,
geo_code_2
from
civicrm_address
where
id = %s
"""
input = (address_id)
cursor.execute(mysql_select_query, address_id)
record = cursor.fetchone()
print(record)
record_count = record_count + 1
finally:
print(record_count, " records updated")
#closing database connection.
if(conn.is_connected()):
conn.close()
print("connection is closed")
The is an error in the code
conn.commit
should be
conn.commit()

how to export SQL query to excel with column names

The code below will accept an sql file and export the data to excel using xlsxwriter but I cannot seem to get it to export with the column names as well (header). I can get it to display the column names onto the screen though, but cannot get it to add it as the first row to the excel file.
I did notice that if I change to using DictCursor instead of SSCursor, the excel file will instead be filled with just the column names repeated many times.
import MySQLdb
import sys
from xlsxwriter.workbook import Workbook
import MySQLdb.cursors as cursors
reload(sys)
sys.setdefaultencoding('ISO-8859-1')
sql_file = sys.argv[1]
target_file = sys.argv[2]
target_sheet = sys.argv[3]
if sys.version_info[0] == 2: # Not named on 2.6
access = 'wb'
kwargs = {}
else:
access = 'wt'
kwargs = {'newline':''}
workbook = Workbook(target_file)
worksheet = workbook.add_worksheet(target_sheet)
conn = MySQLdb.connect(host="localhost",
user="username",
passwd="password",
db="database"
, compress=1
)
with open(sql_file, 'r') as sql_script:
sql_string = sql_script.read()
cur = conn.cursor(cursorclass=MySQLdb.cursors.SSCursor)
#print sql_string
cur.execute(sql_string)
results = cur.fetchall()
print ([i[0] for i in cur.description])
cur.close()
conn.close()
for r, row in enumerate(results):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()
You can get the column names and write them in the first row, then proceed with writing the row values.
cols = [i[0] for i in cur.description]
for c, col in enumerate(cols):
worksheet.write(0, c, col)
For writing the row values:
for r, row in enumerate(results):
for c, col in enumerate(row):
worksheet.write(r+1, c, col)

Question marks while fetching data from phpmyadmin in Python 3

Hello I have win 10 Greece. I want to fetch same data (the path of a file) from mysql. The problem is while I retrieve the path instead of the name of the folder I came across with??????. A sample of the code is the following
import pymysql as MySQLdb
dbid = "client"
password = "pass"
database = "clients"
serverip = "192.168.168.150"
db = None
cur = None
try:
db = MySQLdb.connect(host=serverip, user=dbid, passwd=password, db=database, connect_timeout=20)
cur = db.cursor()
except MySQLdb.Error as e:
print("Cannot connect with the Data Base : "), e
query1 = "SELECT path FROM `instructions` WHERE clientmac ='%s'" % (str(5555))
try:
row = None
cur.execute(query1)
db.commit()
row = cur.fetchall()
except MySQLdb.Error as e:
print("Problem while Executing query :", query1, "error : ", e)
print(row)
db.close()
cur.close()
Python 3 uses Unicode so I thing that the problem is not from python but from phpmyadmin. The field path is “utf8_general_ci”. Everything looks fine but why instead of
'C:\\Users\\Γιαννης\\Documents\\Arduino' I am getting 'C:\\Users\\???????\\Documents\\Arduino'
I found the answer. I hat to but some additional coding before the execution of the query. I post the code below.
import pymysql as MySQLdb
dbid = "client"
password = "pass"
database = "clients"
serverip = "192.168.168.150"
socketip = "192.168.168.18"
db = None
cur = None
try:
db = MySQLdb.connect(host=serverip, user=dbid, passwd=password, db=database, use_unicode=True, connect_timeout=20)
cur = db.cursor()
except MySQLdb.Error as e:
print("Cannot connect with the Data Base : "), e
query1 = "SELECT path FROM `instructions` WHERE clientmac ='%s'" % (str(5555))
try:
row = None
db.set_charset('utf8')
cur.execute('SET NAMES utf8;')
cur.execute('SET CHARACTER SET utf8;')
cur.execute('SET character_set_connection=utf8;')
cur.execute(query1)
db.commit()
row = cur.fetchall()
except MySQLdb.Error as e:
print("Problem while Executing query :", query1, "error : ", e)
print(row)
db.close()
cur.close()