how to export SQL query to excel with column names - mysql

The code below will accept an sql file and export the data to excel using xlsxwriter but I cannot seem to get it to export with the column names as well (header). I can get it to display the column names onto the screen though, but cannot get it to add it as the first row to the excel file.
I did notice that if I change to using DictCursor instead of SSCursor, the excel file will instead be filled with just the column names repeated many times.
import MySQLdb
import sys
from xlsxwriter.workbook import Workbook
import MySQLdb.cursors as cursors
reload(sys)
sys.setdefaultencoding('ISO-8859-1')
sql_file = sys.argv[1]
target_file = sys.argv[2]
target_sheet = sys.argv[3]
if sys.version_info[0] == 2: # Not named on 2.6
access = 'wb'
kwargs = {}
else:
access = 'wt'
kwargs = {'newline':''}
workbook = Workbook(target_file)
worksheet = workbook.add_worksheet(target_sheet)
conn = MySQLdb.connect(host="localhost",
user="username",
passwd="password",
db="database"
, compress=1
)
with open(sql_file, 'r') as sql_script:
sql_string = sql_script.read()
cur = conn.cursor(cursorclass=MySQLdb.cursors.SSCursor)
#print sql_string
cur.execute(sql_string)
results = cur.fetchall()
print ([i[0] for i in cur.description])
cur.close()
conn.close()
for r, row in enumerate(results):
for c, col in enumerate(row):
worksheet.write(r, c, col)
workbook.close()

You can get the column names and write them in the first row, then proceed with writing the row values.
cols = [i[0] for i in cur.description]
for c, col in enumerate(cols):
worksheet.write(0, c, col)
For writing the row values:
for r, row in enumerate(results):
for c, col in enumerate(row):
worksheet.write(r+1, c, col)

Related

CSV read into MySQLdb failing

I am having a problem with reading my csv file into the MySQL database. I have tried a number of solutions, but the errors just keep changing and the code isn't working. This same code had worked with another csv file, so I'm thinking I might be doing something wrong with this one?
Here is my code
from database_access import *
from builtins import bytes, int, str
import codecs
import csv
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import re
import cgi
import MySQLdb
import chardet
# from database_access import *
import MySQLdb
import simplejson
if __name__ == '__main__':
with open("SIMRA.csv",'r') as file:
reader = csv.reader(file)
#reader = csv.reader(text)
next(reader, None)
print ("project running")
#print (row[7])
#rowlist = []
all_links = []
all_project_ids = []
for row in reader:
if row[7] != "" and row[16] != "":
country = row[2]
city = row[8]
description = row[11] + '' + row[12]
title = row[7].replace("'", "''")
link = row[16]
#date_start = row[9]
#print a check here
print(title,description,country, city, link)
db = MySQLdb.connect(host, username, password, database, charset='utf8')
cursor = db.cursor()
new_project = True
proj_check = "SELECT * from Projects where ProjectName like '%" + title + "%'"
#proj_check = "SELECT * from Projects where ProjectName like %s",(title,)
#cur.execute("SELECT * FROM records WHERE email LIKE %s", (search,))
cursor.execute(proj_check)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
url_compare = "SELECT * from Projects where ProjectWebpage like '" + link + "'"
#url_compare = "SELECT * from Projects where ProjectWebpage like %s",(link,)
cursor.execute(url_compare)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
if new_project:
project_insert = "Insert into Projects (ProjectName,ProjectWebpage,FirstDataSource,DataSources_idDataSources) VALUES (%s,%s,%s,%s)"
cursor.execute(project_insert, (title, link,'SIMRA', 5))
projectid = cursor.lastrowid
print(projectid)
#ashoka_projectids.append(projectid)
db.commit()
ins_desc = "Insert into AdditionalProjectData (FieldName,Value,Projects_idProjects,DateObtained) VALUES (%s,%s,%s,NOW())"
cursor.executemany(ins_desc, ("Description", description, str(projectid)))
db.commit()
ins_location = "Insert into ProjectLocation (Type,Country,City,Projects_idProjects) VALUES (%s,%s,%s,%s)"
cursor.execute(ins_location, ("Main", country,city, str(projectid)))
db.commit()
else:
print('Project already exists!')
print(title)
all_links.append(link)
#print out SIMRA's links to a file for crawling later
with open('simra_links', 'w', newline='') as f:
write = csv.writer(f)
for row in all_links:
columns = [c.strip() for c in row.strip(', ').split(',')]
write.writerow(columns)
When I ran this, I got the following error:
File "/usr/lib/python3.8/codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa3 in position 898: invalid start byte
I did some research and tried handling the encoding error by adding different forms of encoding, as seen here - UnicodeDecodeError: ‘utf8’ codec can’t decode byte 0xa5 in position 0: invalid start byte, and Python MySQLdb TypeError: not all arguments converted during string formatting. Added this in this in the csv open parameter -
with open("SIMRA.csv", 'r', encoding="cp437", errors='ignore') as file:
Running the code with these different encoding options came up with a different error:
MySQLdb._exceptions.ProgrammingError: not all arguments converted during bytes formatting
Further research suggested using tuples or lists in order to address this problem, so I added these in the 'select' function in the code, as suggested here - Python MySQLdb TypeError: not all arguments converted during string formatting and in the Python SQL documentation here - PythonMySqldb
So the select query became:
proj_check = "SELECT * from Projects where ProjectName like %s",(title,)
cursor.execute(proj_check)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
url_compare = "SELECT * from Projects where ProjectWebpage like %s",(link,)
cursor.execute(url_compare)
num_rows = cursor.rowcount
if num_rows != 0:
new_project = False
When I ran the code, I came up with this Assertion Error and I have no idea what to do anymore.
File "/home/ros/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 205, in execute
assert isinstance(query, (bytes, bytearray))
AssertionError
I have run out of ideas. It might be that I'm missing something small, but I can't figure this out now as I've been battling with this for two days now.
Can anyone help point out what I'm missing? It will be greatly appreciated. This code ran perfectly with another csv file. I am running this with Python 3.8 btw.
Have solved this now. I had to use a different encoding with the original code and this solved the problem. So, I changed the csv open parameter to:
with open("SIMRA.csv",'r', encoding="ISO-8859-1") as file:
reader = csv.reader(file)
Were you expecting £? You need to specify what the encoding of the file is. It may be "latin1". See the syntax of LOAD DATA for how to specify CHARACTER SET latin1.

Single CSV output with data in different columns

I have a number of CSV files with data in the first three columns only. I want to copy data from each CSV file and paste it into one single CSV file in column order. For example data from the first CSV file goes into columns 1,2 and 3 in the output file. Similarly, data from the 2nd CSV goes to columns 4,5, and 6 of the same output CSV file and so on. Any help would be highly appreciated. Thanks.
I have tried the following code but it gets me the output in same columns only.
import glob
import pandas as pd
import time
import numpy as np
start = time.time()
Filename='Combined_Data.csv'
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
for i in range(len(all_filenames)):
data= pd.read_csv(all_filenames[i],skiprows=23)
data= data.rename({'G1': 'CH1', 'G2': 'CH2','Dis': 'CH3'},axis=1)
data= data[['CH1','CH2','CH3']]
data= data.apply(pd.to_numeric, errors='coerce')
print(all_filenames[i])
if i == 0:
data.to_csv(Filename,sep=',',index=False,header=True,mode='a')
else:
data.to_csv(Filename,sep=',',index=False,header=False,mode='a')
end = time.time()
print((end - start),'Seconds(Execution Time)')
If you don't need to write your own code for this, I'd recommend GoCSV's zip command; it can also handle the CSVs having different numbers of rows.
I have three CSV files:
file1.csv
Dig1,Dig2,Dig3
1,2,3
4,5,6
7,8,9
file2.csv
Letter1,Letter2,Letter3
a,b,c
d,e,f
and file3.csv
RomNum1,RomNum2,RomNum3
I,II,III
When I run gocsv zip file2.csv file1.csv file3.csv I get:
Letter1,Letter2,Letter3,Dig1,Dig2,Dig3,RomNum1,RomNum2,RomNum3
a,b,c,1,2,3,I,II,III
d,e,f,4,5,6,,,
,,,7,8,9,,,
GoCSV is pre-built for a number of different OS'es.
Here's how to do it with Python's CSV module, using these files:
file1.csv
Dig1,Dig2,Dig3
1,2,3
4,5,6
7,8,9
file2.csv
Letter1,Letter2,Letter3
a,b,c
d,e,f
and file3.csv
RomNum1,RomNum2,RomNum3
I,II,III
The more-memory-intensive option
This accumulates the final CSV one file at a time, expanding a list that represents the final CSV with with each new input CSV.
#!/usr/bin/env python3
import csv
import sys
csv_files = [
'file2.csv',
'file1.csv',
'file3.csv',
]
all = []
for csv_file in csv_files:
with open(csv_file) as f:
reader = csv.reader(f)
rows = list(reader)
len_all = len(all)
# First file, initialize all and continue (skip)
if len_all == 0:
all = rows
continue
# The number of columns in all so far
len_cols = len(all[0])
# Extend all with the new rows
for i, row in enumerate(rows):
# Check to make sure all has as many rows as this file
if i >= len_all:
all.append(['']*len_cols)
all[i].extend(row)
# Finally, pad all rows on the right
len_cols = len(all[0])
for i in range(len(all)):
len_row = len(all[i])
if len_row < len_cols:
col_diff = len_cols - len_row
all[i].extend(['']*col_diff)
writer = csv.writer(sys.stdout)
writer.writerows(all)
The streaming option
This reads-and-writes a line/row at a time.
(this is basically a Python port of the Go code from GoCSV's zip, from above)
import csv
import sys
fnames = [
'file2.csv',
'file1.csv',
'file3.csv',
]
num_files = len(fnames)
readers = [csv.reader(open(x)) for x in fnames]
# Collect "header" lines; each header defines the number
# of columns for its file
headers = []
num_cols = 0
offsets = [0]
for reader in readers:
header = next(reader)
headers.append(header)
num_cols += len(header)
offsets.append(num_cols)
writer = csv.writer(sys.stdout)
# With all headers counted, every row must have this many columns
shell_row = [''] * num_cols
for i, header in enumerate(headers):
start = offsets[i]
end = offsets[i+1]
shell_row[start:end] = header
# Write headers
writer.writerow(shell_row)
# Expect that not all CSVs have the same number of rows; some will "finish" ahead of others
file_is_complete = [False] * num_files
num_complete = 0
# Loop a row at a time...
while True:
# ... for each CSV
for i, reader in enumerate(readers):
if file_is_complete[i]:
continue
start = offsets[i]
end = offsets[i+1]
try:
row = next(reader)
# Put this row in its place in the main row
shell_row[start:end] = row
except StopIteration:
file_is_complete[i] = True
num_complete += 1
except:
raise
if num_complete == num_files:
break
# Done iterating CSVs (for this row), write it
writer.writerow(shell_row)
# Reset for next main row
shell_row = [''] * num_cols
For either, I get:
Letter1,Letter2,Letter3,Dig1,Dig2,Dig3,RomNum1,RomNum2,RomNum3
a,b,c,1,2,3,I,II,III
d,e,f,4,5,6,,,
,,,7,8,9,,,

How to retrieve image from database using python

i want to retrieve image from database using python but i have a problem where i execute this code
import mysql.connector
import io
from PIL import Image
connection= mysql.connector.connect(
host ="localhost",
user ="root",
passwd ="pfe_altran",
database = "testes",
)
cursor=connection.cursor()
sql1 = "SELECT * FROM pfe WHERE id = 1 "
cursor.execute(sql1)
data2 = cursor.fetchall()
file_like2 = io.BytesIO(data2[0][0])
img1=Image.open(file_like2)
img1.show()
cursor.close()
connection.close()
and i have this error :
file_like2 = io.BytesIO(data2[0][0]) TypeError: a bytes-like object
is required, not 'int'
cursor.fetchall() returns a list of rows to your variable so you need to handle that using a looper
for row in data2:
file_like2 = io.BytesIO(row[0]) #assumming row[0] contains the byte form of your image
you can use cursor.fetchmany(size=1) or cursor.fetchone() if you know that your query will return only a single row or if you know that you only need one row, this way you can manipulate it directly and not use the loop.

Automated insertion of csv files into mysql table

I am trying to insert each row from about 2000 csv files into a mysql table. With the following code, I have inserted only one row from just one file. How can I automate the code so that it inserts all rows for each file? The insertions need to be done just once.
import pymysql.cursors
connection = pymysql.connect(host='localhost',
user='s',
password='n9',
db='si',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
sql = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (%s,%s,%s,%s,%s,%s,%s, %s)"
cursor.execute(sql, ('03', 'P Exp','SF','HWH', 'none','no arr today','n/a','ND'))
connection.commit()
finally:
connection.close()
How about checking this code?
To run this you can put all your .csv files in one folder and os.walk(folder_location) that folder to get locations of all the .csv files and then I've opened them one by one and inserted into the required DB (MySQL) here.
import pandas as pd
import os
import subprocess
import warnings
warnings.simplefilter("ignore")
cwd = os.getcwd()
connection = pymysql.connect(host='localhost',
user='s',
password='n9',
db='si',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
files_csv = []
for subdir, dir, file in os.walk(cwd):
files_csv += [ fi for fi in file if fi.endswith(".csv") ]
print(files_csv)
for i in range(len(files_csv)):
with open(os.path.join(cwd, files_csv[i])) as f:
lis=[line.split() for line in f]
for i,x in enumerate(lis):
#print("line{0} = {1}".format(i,x))
#HERE x contains the row data and you can access it individualy using x[0], x[1], etc
#USE YOUR MySQL INSERTION commands here and insert the x row here.
with connection.cursor() as cursor:
sql = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (%s,%s,%s,%s,%s,%s,%s, %s)"
cursor.execute(sql, (#CONVERTED VALUES FROM x))
connection.commit()
Update -
getting values for (#CONVERTED VALUES FROM X)
values = ""
for i in range(len(columns)):
values = values + x[i] + "," # Here x[i] gives a record data in ith row. Here i'm just appending the all values to be inserted in the sql table.
values = values[:-1] # Removing the last extra comma.
command = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (" + str(values) + ")"
cursor.execute(command)
#Then commit using connection.commit()
import psycopg2
import time
import csv
conn = psycopg2.connect(
host = "localhost",
database = "postgres",
user = "postgres",
password = "postgres"
)
cur = conn.cursor()
start = time.time()
with open('combined_category_data_100 copy.csv', 'r') as file:
reader=csv.reader(file)
ncol = len(next(reader))
next(reader)
for row in reader:
cur.execute(" insert into data values (%s = (no. of columns
))", row)
conn.commit()
print("data entered successfully")
end = time.time()
print(f" time taken is {end - start}")
cur.close()

How to import csv file data in aerospike without using aerospike loader?Is there any alternative available for aerospike loader?

I want to import following csv file data in aerospike and want to fire simple select query to display data using python as a client
e.g
policyID,statecode,county,eq_site_limit,hu_site_limit,fl_site_limit,fr_site_limit,tiv_2011,tiv_2012,eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible,point_latitude,point_longitude,line,construction,point_granularity
119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1
448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3
query = client.query( 'test', 'csvfile' )
query.select( 'policyID', 'statecode' )
You could try to use the python csv module along with Aerospike Python client:
https://docs.python.org/2/library/csv.html
http://www.aerospike.com/docs/client/python/
And do something similar to the following:
import aerospike
import sys
import csv
global rec
rec = {}
csvfile = open('aerospike.csv', "rb")
reader = csv.reader(csvfile)
rownum = 0
for row in reader:
# Save First Row with headers
if rownum == 0:
header = row
else:
colnum = 0
for col in row:
# print (rownum,header[colnum],col)
rec[header[colnum]] = col
colnum += 1
rownum += 1
# print(rownum,rec)
if rec:
client.put(('test', 'demo', str(rownum)), rec)
rec = {}
csvfile.close()
Note: You may need to check the size of your headers and make sure they do not exceed 14 characters.
if not you could get the following:
error: (21L, 'A bin name should not exceed 14 characters limit', 'src/main/conversions.c', 500)
as far as I am aware there is no pre-built tool other than the loader that allows you to import CSV. You could, perhaps, build one using the existing client tools.