2017-05-12 is inserted in to MySQL as 2017-06-12 - couldn't figure out with debugging...pasted the key parts of code
Windows platform; Used PyMySQL; If the issue isnt resolved, i will probably bulk update the table to reduce the month by one...i guess that should be easy...
exceptions = []
flag = 'N'
def obtain_list_of_db_tickers():
"""
Obtains a list of the ticker symbols in the database.
"""
with con:
cur = con.cursor()
cur.execute("SELECT id, ticker FROM symbol")
data = cur.fetchall()
list1,list2 = map(list,zip(*data))
uniqtickers = list(set(list2))
rangelist = list(range(1,len(uniqtickers)+1))
newtickers = list(zip(rangelist,uniqtickers))
return [(d[0], d[1]) for d in newtickers]
def get_daily_historic_data_yahoo(
ticker, start_date=(2017,5,12),
end_date=datetime.date.today().timetuple()[0:3]
) :
""""
Obtains data from Yahoo Finance returns and a list of tuples
ticker: Yahoo Finance ticker symbol eg: GOOG
start_date: Start date in (YYYY,M,D) format
End_date: End date in (YYYY,M,D) format
"""
# Construct the Yahoo URL with the correct integer query parameters
# for start and end dates. Note that some parameters are zero-based!
ticker_tup = (
ticker, start_date[1]-1, start_date[2],
start_date[0], end_date[1]-1, end_date[2],
end_date[0]
)
yahoo_url = "http://ichart.finance.yahoo.com/table.csv"
yahoo_url += "?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s"
yahoo_url = yahoo_url % ticker_tup
# Try connecting to Yahoo Finance and obtaining the data
# On failure, print an error message.
global flag
try:
yf_data = pd.read_csv(yahoo_url)
yf_data['Date'] = yf_data['Date'].apply(lambda x:
datetime.datetime.strptime(x, '%Y-%m-%d'))
prices = yf_data.set_index(['Date'])[['Open', 'High', 'Low', 'Close',
'Volume', 'Adj Close']].to_records().tolist()
except Exception as e:
print("Could not download Yahoo data: %s" %e)
flag = 'Y'
exceptions.append(ticker)
return None
return prices
def insert_daily_data_into_db(
data_vendor_id, symbol_id, ticker, daily_data
):
"""
Takes a list of tuples of daily data and adds it to the
MySQL database. Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data(with adj_close and volume)
"""
# Create the time now
now = datetime.datetime.utcnow()
#Amend the data to include the vendor ID and symbol IDprices
daily_data = [
(data_vendor_id, symbol_id, ticker, d[0], now, now,
d[1], d[2], d[3], d[4], d[5], d[6])
for d in daily_data
]
# Create the insert strings
column_str = """data_vendor_id, symbol_id, ticker, price_date, created_date,
last_updated_date, open_price, high_price, low_price,
close_price, volume, adj_close_price"""
insert_str = ("%s, " *12)[:-2]
final_str = "INSERT INTO daily_price (%s) VALUES (%s)" %\
(column_str, insert_str)
# Using the MySQL connection, carry out an INSERT INTO for every symbol
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
if __name__ == "__main__":
# This ignores the warnings regarding Data Truncation
# from the Yahoo precision to Decimal(19,4) datatypes
warnings.filterwarnings('ignore')
# Loop over the tickers and insert the daily historical
# data into the database
tickers = obtain_list_of_db_tickers()
lentickers = len(tickers)
for i, t in enumerate(tickers):
print(
"Adding data for %s: %s out of %s" %
(t[1], i+1, lentickers)
)
yf_data = get_daily_historic_data_yahoo(t[1])
if (flag == 'N'):
insert_daily_data_into_db('1', t[0], t[1], yf_data)
elif (flag == 'Y'):
flag = 'N'
Related
I'm trying to build an invoice generator GUI app using tkinter and reportlab. Currently, it's reading an excel file showing some results on treeview. However, to generate an invoice, I am creating a pdf file to show all the data for the invoice but unable to populate multiple results in the pdf file from MySQL table.
Here is my complete code:
import tkinter as tk
from tkinter import ttk
import pandas as pd
from tkinter import filedialog
import mysql.connector
import os
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
def browse_file():
# Open the file browser dialog
file_path = filedialog.askopenfilename()
# Read the Excel file into a DataFrame
df = pd.read_excel("mpo.xlsx", sheet_name='OrderDetails')
df = df.fillna('NULL')
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Insert each row into the MySQL database
for index, row in df.iterrows():
cursor.execute("INSERT INTO perorder (mp_task_nr, batch_id, dropoff_sequence, id_user_payout_type, task_status, city, fleet, id_user, Name, Vendor, order_date, UID, KM, total_amount, Remarks) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )", (row["mp_task_nr"], row["batch_id"], row["dropoff_sequence"], row["id_user_payout_type"], row["task_status"], row["city"], row["fleet"], row["id_user"], row["Name"], row["Vendor"], row["order_date"], row["UID"], row["KM"], row["total_amount"], row["Remarks"]))
# Commit the changes and close the cursor and connection
conn.commit()
cursor.close()
conn.close()
def populate_treeview():
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Execute a SELECT statement
cursor.execute("SELECT id_user, Vendor, KM FROM perorder")
# Fetch the results
results = cursor.fetchall()
# Add the results to the Treeview widget
for result in results:
treeview.insert("", tk.END, values=result)
# Close the cursor and connection
cursor.close()
conn.close()
def generateInvoice():
# Connect to the MySQL database
conn = mysql.connector.connect(user='root', password='', host='localhost', database='magnetico')
cursor = conn.cursor()
# Execute a SELECT statement to retrieve the invoice data
cursor.execute("SELECT city, count(KM), total_amount, total_amount*count(KM), total_amount*count(KM), round(total_amount*count(KM)*0.05,2), total_amount*count(KM)*0.05+(total_amount*count(KM)) FROM perorder where city like '%XB' and KM between 1 and 3")
# Fetch the results
results = cursor.fetchall()
# Create the PDF file
pdf_file = "invoice.pdf"
doc = SimpleDocTemplate(pdf_file, pagesize=letter)
# Create the table data
table_data = []
table_data.append(['City', 'Qantity', 'Rate', 'Amount', 'Taxable Value','Amount + 5% VAT', 'Grand Total'])
for result in results:
table_data.append([result[0], result[1], result[2], result[3], result[4], result[5], result[6]])
# Create the table
table = Table(table_data)
# Set the table style
table.setStyle(TableStyle([
('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
('BOX', (0,0), (-1,-1), 0.25, colors.black)
]))
# Build the document
doc.build([table])
# Create the root window
root = tk.Tk()
root.title("Megnatico Invoice System")
root.geometry("800x800")
# Create the treeview
treeview = ttk.Treeview(root)
treeview.pack(side="left", fill="both", expand=True)
treeview["columns"] = ("id_user", "Vendor", "KM")
treeview.column("id_user", width=150, minwidth=150, stretch=tk.NO)
treeview.column("Vendor", width=150, minwidth=150, stretch=tk.NO)
treeview.column("KM", width=150, minwidth=150, stretch=tk.NO)
treeview.heading("id_user", text="User ID")
treeview.heading("Vendor", text="Vendor")
treeview.heading("KM", text="KM")
treeview["displaycolumns"] = ("id_user", "Vendor", "KM")
# Create the buttons
read_excel_button = tk.Button(root, text="Read Excel", command=browse_file)
invoice_per_order_button = tk.Button(root, text="Invoice per Order", command=populate_treeview)
save_pdf_button = tk.Button(root, text="Generate Invoice", command=generateInvoice)
close_button = tk.Button(root, text="Close")
# Place the buttons in a frame and pack the frame to the right of the root window
button_frame = tk.Frame(root)
button_frame.pack(side="right", fill="both")
read_excel_button.pack(padx=10, pady=5)
invoice_per_order_button.pack(padx=10, pady=5)
save_pdf_button.pack(padx=10, pady=5)
close_button.pack(padx=10, pady=5)
# Run the Tkinter event loop
root.mainloop()
From the above code, the following code is populating invoice result:
# Execute a SELECT statement to retrieve the invoice data
cursor.execute("SELECT city, count(KM), total_amount, total_amount*count(KM), total_amount*count(KM), round(total_amount*count(KM)*0.05,2), total_amount*count(KM)*0.05+(total_amount*count(KM)) FROM perorder where city like '%XB' and KM between 1 and 3")
Example of data I have
There are multiple cities AUH, DXB but my current SQL query only generating for DXB. I need it should work for AUH and other KM values too such as 1 to 3, 4 to 5 and 6 to 7.
Current Invoice Result
Example of an expected invoice
If you have any further questions, please ask.
[Introduction]
I am currently creating a web application in Python 3.7.4 over CGI. (the web server does not support wsgi)
The application is a simple survey were users answer questions into a carousel form.
Answers are written in the DB (MySql) according to the respondentID.
No problems until now. Everything is working fine.
However, I have been asked to insert a logic into the survey and display results according to it.
[Objective]
Lets say I have 30 questions and users can answer 0, 1 or 2 for each of them.
Answers are processed only when the user complete the survey.
At completion, data are stored as integer inside DB as si001, si002, si003,....si030
If there are values inside the DB, I would like to count how many of each possible answer have been recorded. For example, how many 0s, how many 1s, how many 2s.
The results will be recorded inside the DB in different columns. (simaru, sisankaku, sibatsu)
What I would like to do is something similar to the code below. (code is obviously wrong)
I know the SELECT status from the DB can be summarized in one cursor.execute only, but for the time being (logic details are yet to be completed) I would like to left it as it is.
[CODE]
#app.route('/results1', methods=['GET', 'POST'])
def results1():
# Check survey completion status
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT * FROM private_survey WHERE privateid = %s', (session['username'],))
account = cursor.fetchone()
if account['surveystatus'] == 'Available' or account['surveystatus'] == 'Started':
# Check survey status, if available redirect to noresults
return redirect(url_for('noresults'))
else:
# Survey already completed, show results1
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT si001, si002, si003, si004, si005, si006, si007, si008, si009, si010, si011, si012, si013, si014, si015, si016, si017, si018, si019, si020, si021, si022, si023, si024, si025, si026, si027, si028, si029, si030 FROM private_survey INNER JOIN private_survey_answers ON private_survey.surveyid=private_survey_answers.surveyid WHERE private_survey.privateid=%s', (session['username'],))
answer1 = cursor.fetchone()
maru = 0
sankaku = 0
batsu = 0
simaru = 0
sisankaku = 0
sibatsu = 0
for x in answer1:
if x == 2:
simaru = maru + 1
if x == 1:
sisankaku = sankaku + 1
if x == 0:
sibatsu = batsu + 1
cursor.execute('UPDATE private_survey_answers INNER JOIN private_survey ON private_survey.surveyid=private_survey_answers.surveyid SET simaru = %s, sisankaku = %s, sibatsu = %s WHERE private_survey.privateid = %s', (simaru, sisankaku, sibatsu, session['username'],))
mysql.connection.commit()
return render_template('results1.html', answer1=answer1, account=account)
Any help or idea would be very appreciated.
I solved the problem using the count function.
The solution I found is both able to display the count in real time and write it in the DB for CSV download, etc..
#app.route('/results1', methods=['GET', 'POST'])
def results1():
# Check survey completion status
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT * FROM private_survey WHERE privateid = %s', (session['username'],))
account = cursor.fetchone()
if account['surveystatus'] == 'Available' or account['surveystatus'] == '開始':
# Check survey status, if available redirect to noresults
return redirect(url_for('noresults'))
else:
# Survey already completed, show results1
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT si001, si002, si003, si004, si005, si006, si007, si008, si009, si010, si011, si012, si013, si014, si015, si016, si017, si018, si019, si020, si021, si022, si023, si024, si025, si026, si027, si028, si029, si030 FROM private_survey INNER JOIN private_survey_answers ON private_survey.surveyid=private_survey_answers.surveyid WHERE private_survey.privateid=%s', (session['username'],))
answer1 = cursor.fetchone()
cnt = [answer1['si001'], answer1['si002'], answer1['si003'], answer1['si004'], answer1['si005']]
cnt.count(2)
cnt.count(1)
cnt.count(0)
cursor.execute('UPDATE private_survey_answers INNER JOIN private_survey ON private_survey.surveyid=private_survey_answers.surveyid SET simaru = %s, sisankaku = %s, sibatsu = %s WHERE private_survey.privateid = %s', (cnt.count(2), cnt.count(1), cnt.count(0), session['username'],))
mysql.connection.commit()
return render_template('results1.html', answer1=answer1, account=account, cnt=cnt)
I have a CSV file that has data that looks like this:
54861,54850,Doe,John,NULL,-87.1181407064,30.3773576858
54862,54851,Doe,Linda,Lee,None,None
The last two columns are longitude and latitude
I'm using mysql.connector to insert into the database. It handles the first record, okay but, because the last two columns are floats, it croaks when it gets to the second record with the values set to "None".
I tried programmatically to set the values to NULL but it won't accept that either.
I've tried a couple of different things but can't figure it out.
This has to be done in Python.
Here is the code:
import sys
import mysql.connector
import csv
import os
from mysql.connector import Error
from mysql.connector import errorcode
#Specify the import file
try:
inputCSV = 'geocoded_test.csv'
#Open the file and give it a handle
csvFile = open(inputCSV, 'r')
#Create a reader object for the input file
reader = csv.reader(csvFile, delimiter = ',')
except IOError as e:
print("The input file ", inputCSV, " was not found", e)
exit()
try:
mydb = mysql.connector.connect(host='localhost',
database='wordpress',
user='wp_user',
password='XXXXXXXX!'
)
mycursor = mydb.cursor()
except mysql.connector.Error as error:
print( "Failed to connect to database: {}".format(error))
exit()
try:
record_count = 0
for row in reader:
contact_id,address_id,last_name, first_name, middle_name, longitude, latitude = row
print(row)
# It is here that I want to convert to NULL.
if longitude == "None":
longitude = -1.0
if latitude == "None":
latitude = -1.0
#Update single record now
mycursor.execute("""
update civicrm_address
set
geo_code_1 = %s,
geo_code_2 = %s
where
id = %s
and
location_type_id = %s
""",
(latitude, longitude, address_id, 6)
)
mydb.commit()
print(mycursor.rowcount)
record_count +=1
print("Record", record_count, " updated successfully")
finally:
print(record_count, " records updated")
#closing database connection.
if(mydb.is_connected()):
mydb.close()
print("connection is closed")
One option would be to use LOAD DATA with custom logic which catches the None string values and then converts them to NULL:
LOAD DATA LOCAL INFILE 'your_file.csv'
INTO TABLE yourTable
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\r\n'
(col1, col2, last, first, middle, #lat, #lng)
SET lat = CASE WHEN #lat = 'None'
THEN NULL
ELSE CAST(#lat AS DECIMAL(10,8)) END,
SET lng = CASE WHEN #lng = 'None'
THEN NULL
ELSE CAST(#lng AS DECIMAL(10,8)) END;
I assume above that your latitude column is called lat, and your longitude column lng. I just dummy placeholder names for the other columns, but you would need to use the actual column names to make the above load work.
20170325_225012,ctcc01,voice,639128342574,639464810386,cap_timeout,6004,639180007006,2,0,null
20170325_235012,ctcc01,voice,639128342554,639464520384,cap_timeout,6004,639180007006,2,0,null
20170325_245012,ctcc01,voice,639128342174,639464820327,cap_timeout,6004,639180007006,2,0,null
Sample text data.csv file above:
Steps needed to complete:
Process the csv file
Each line should be inserted to MySQL Column. Column1, Column2, Column3 ... Column11
This is my Code so far.
import csv
import re
f = open('data.csv')
csv_f = csv.reader(f)
writer = csv.writer(f)
cdr = []
for row in csv_f:
cdr.append("Some auto increment id")
cdr.append(re.sub(r'_.*$', "", row[0]))
cdr.append(row[1])
cdr.append(row[2])
cdr.append(row[3])
cdr.append(row[4])
cdr.append(row[5])
cdr.append(row[6])
cdr.append(row[7])
cdr.append(row[8])
cdr.append(row[9])
cdr.append(row[10])
print cdr
with open('output.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(cdr)
I was able to output it on the terminal the way i want it but it did make it in one list :). Somehow i don't know how can i split and insert it on the mysql.
['Some auto increment id', '20170325', 'ctcc01', 'voice', '639128342574', '639464820387', 'cap_timeout', '6004', '639180007006', '2', '0', 'null', 'Some auto increment id', '20170325', 'ctcc01', 'voice', '639128342574', '639464820387', 'no_subs', '6004', '639180007006', '2', '0', 'null', 'Some auto increment id', '20170325', 'ctcc01', 'voice', '639128342574', '639464820387', 'cap_timeout', '6004', '639180007006', '2', '0', 'null']
No. You and you need to use MySql.db.connect, and insert and commit.
Basically, you'll find your answer in a similar question here
The code should be:
# open file, and define a csv reader and writer - you've done that correctly
import csv
import re
f = open('data.csv')
csv_f = csv.reader(f)
writer = csv.writer(f)
vals = []
# open and connect to database
dbname = 'mydb' # or whatever your database is named
tablename = 'mytable' # or whatever table you wish to insert into
hostname = 'localhost' # or whatever your mysql db hostname is
username = 'root' # or whatever your username for mysql db is
pw = '' # or whatever your password is for that user
mydb = MySQLdb.connect(host=hostname, user=username, passwd=pw, db=dbname)
cursor = mydb.cursor()
# for each row create an 'INSERT INTO' execution-string
auto = 0 # auto-incrementing
exec_string = ""
rowid = ""
for row in csv_f:
# INSERT INTO mytable(Column, Column1,Column2, ... Column12)
# VALUES(auto, rowid, row(1), row(2)...
# execstr header:
exec_string = "INSERT INTO " + tablename + "(Column, "
for i in range(1,11): # columns
exec_string += "Column" + i + (", " if (i<11))
# ...it may be a mistake in the question and you need Column0
# ...in which case the end of the exec_string line should read + "("
# ...and the for should be in range(0,11): ...
# execstr values:
exec_string += ") Values("
for _ in range(12):
exec_string += "%S"
exec_string += ")" # close values function
vals = []
auto += 1
rowid = re.sub(r'_.*$', "", row[0])
vals.append(auto)
vals.append(rowid)
for i in range(2,12) # count to 12 starting with 2
vals.append(row[i])
# and execute it!
cursor.execute(exec_string, vals)
# commit and close the connection to the database.
mydb.commit()
cursor.close()
I have a table named passive than contains a list of timestamped events per user. I want to fill the attribute duration, which correspond to the time between the current row's event and the next event done by this user.
I tried the following query:
UPDATE passive as passive1
SET passive1.duration = (
SELECT min(UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) )
FROM passive as passive2
WHERE passive1.user_id = passive2.user_id
AND UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) > 0
);
This returns the error message Error 1093 - You can't specify target table for update in FROM.
In order to circumvent this limitation, I tried to follow the structure given in https://stackoverflow.com/a/45498/395857, which uses a nested subquery in the FROM clause to create an implicit temporary table, so that it doesn't count as the same table we're updating:
UPDATE passive
SET passive.duration = (
SELECT *
FROM (SELECT min(UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive.event_time))
FROM passive, passive as passive2
WHERE passive.user_id = passive2.user_id
AND UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) > 0
)
AS X
);
However, the passive table in the nested subquery doesn't refer to the same passive as in the main query. Because of that, all rows have the same passive.duration value. How can I refer to the main query's passive in the nested subquery? (or maybe are there some alternative ways to structure such a query?)
Try Like this....
UPDATE passive as passive1
SET passive1.duration = (
SELECT min(UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) )
FROM (SELECT * from passive) Passive2
WHERE passive1.user_id = passive2.user_id
AND UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) > 0
)
;
We can use a Python script to circumvent the issue:
'''
We need an index on user_id, timestamp to speed up
'''
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Download it at http://sourceforge.net/projects/mysql-python/?source=dlp
# Tutorials: http://mysql-python.sourceforge.net/MySQLdb.html
# http://zetcode.com/db/mysqlpython/
import MySQLdb as mdb
import datetime, random
def main():
start = datetime.datetime.now()
db=MySQLdb.connect(user="root",passwd="password",db="db_name")
db2=MySQLdb.connect(user="root",passwd="password",db="db_name")
cursor = db.cursor()
cursor2 = db2.cursor()
cursor.execute("SELECT observed_event_id, user_id, observed_event_timestamp FROM observed_events ORDER BY observed_event_timestamp ASC")
count = 0
for row in cursor:
count += 1
timestamp = row[2]
user_id = row[1]
primary_key = row[0]
sql = 'SELECT observed_event_timestamp FROM observed_events WHERE observed_event_timestamp > "%s" AND user_id = "%s" ORDER BY observed_event_timestamp ASC LIMIT 1' % (timestamp, user_id)
cursor2.execute(sql)
duration = 0
for row2 in cursor2:
duration = (row2[0] - timestamp).total_seconds()
if (duration > (60*60)):
duration = 0
break
cursor2.execute("UPDATE observed_events SET observed_event_duration=%s WHERE observed_event_id = %s" % (duration, primary_key))
if count % 1000 == 0:
db2.commit()
print "Percent done: " + str(float(count) / cursor.rowcount * 100) + "%" + " in " + str((datetime.datetime.now() - start).total_seconds()) + " seconds."
db.close()
db2.close()
diff = (datetime.datetime.now() - start).total_seconds()
print 'finished in %s seconds' % diff
if __name__ == "__main__":
main()