Python3 compare data from DB and write the answer back to DB - mysql

[Introduction]
I am currently creating a web application in Python 3.7.4 over CGI. (the web server does not support wsgi)
The application is a simple survey were users answer questions into a carousel form.
Answers are written in the DB (MySql) according to the respondentID.
No problems until now. Everything is working fine.
However, I have been asked to insert a logic into the survey and display results according to it.
[Objective]
Lets say I have 30 questions and users can answer 0, 1 or 2 for each of them.
Answers are processed only when the user complete the survey.
At completion, data are stored as integer inside DB as si001, si002, si003,....si030
If there are values inside the DB, I would like to count how many of each possible answer have been recorded. For example, how many 0s, how many 1s, how many 2s.
The results will be recorded inside the DB in different columns. (simaru, sisankaku, sibatsu)
What I would like to do is something similar to the code below. (code is obviously wrong)
I know the SELECT status from the DB can be summarized in one cursor.execute only, but for the time being (logic details are yet to be completed) I would like to left it as it is.
[CODE]
#app.route('/results1', methods=['GET', 'POST'])
def results1():
# Check survey completion status
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT * FROM private_survey WHERE privateid = %s', (session['username'],))
account = cursor.fetchone()
if account['surveystatus'] == 'Available' or account['surveystatus'] == 'Started':
# Check survey status, if available redirect to noresults
return redirect(url_for('noresults'))
else:
# Survey already completed, show results1
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT si001, si002, si003, si004, si005, si006, si007, si008, si009, si010, si011, si012, si013, si014, si015, si016, si017, si018, si019, si020, si021, si022, si023, si024, si025, si026, si027, si028, si029, si030 FROM private_survey INNER JOIN private_survey_answers ON private_survey.surveyid=private_survey_answers.surveyid WHERE private_survey.privateid=%s', (session['username'],))
answer1 = cursor.fetchone()
maru = 0
sankaku = 0
batsu = 0
simaru = 0
sisankaku = 0
sibatsu = 0
for x in answer1:
if x == 2:
simaru = maru + 1
if x == 1:
sisankaku = sankaku + 1
if x == 0:
sibatsu = batsu + 1
cursor.execute('UPDATE private_survey_answers INNER JOIN private_survey ON private_survey.surveyid=private_survey_answers.surveyid SET simaru = %s, sisankaku = %s, sibatsu = %s WHERE private_survey.privateid = %s', (simaru, sisankaku, sibatsu, session['username'],))
mysql.connection.commit()
return render_template('results1.html', answer1=answer1, account=account)
Any help or idea would be very appreciated.

I solved the problem using the count function.
The solution I found is both able to display the count in real time and write it in the DB for CSV download, etc..
#app.route('/results1', methods=['GET', 'POST'])
def results1():
# Check survey completion status
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT * FROM private_survey WHERE privateid = %s', (session['username'],))
account = cursor.fetchone()
if account['surveystatus'] == 'Available' or account['surveystatus'] == '開始':
# Check survey status, if available redirect to noresults
return redirect(url_for('noresults'))
else:
# Survey already completed, show results1
cursor = mysql.connection.cursor(MySQLdb.cursors.DictCursor)
cursor.execute('SELECT si001, si002, si003, si004, si005, si006, si007, si008, si009, si010, si011, si012, si013, si014, si015, si016, si017, si018, si019, si020, si021, si022, si023, si024, si025, si026, si027, si028, si029, si030 FROM private_survey INNER JOIN private_survey_answers ON private_survey.surveyid=private_survey_answers.surveyid WHERE private_survey.privateid=%s', (session['username'],))
answer1 = cursor.fetchone()
cnt = [answer1['si001'], answer1['si002'], answer1['si003'], answer1['si004'], answer1['si005']]
cnt.count(2)
cnt.count(1)
cnt.count(0)
cursor.execute('UPDATE private_survey_answers INNER JOIN private_survey ON private_survey.surveyid=private_survey_answers.surveyid SET simaru = %s, sisankaku = %s, sibatsu = %s WHERE private_survey.privateid = %s', (cnt.count(2), cnt.count(1), cnt.count(0), session['username'],))
mysql.connection.commit()
return render_template('results1.html', answer1=answer1, account=account, cnt=cnt)

Related

mysql update current data

I'd like to increase/decrease upVote column value by 1 depending on isUpVote boolean value. I get this error when I try the code below. postID and upVote values are all integer.
MySQLdb._exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'upVote': 0}' - 1 WHERE postID = '2'' at line 1")
#app.route('/posts/upvote', methods=['GET', 'PATCH'])
def upvote_post():
if request.method == 'PATCH':
data = request.get_json()
print(data)
postID = data['postID']
isUpVote = data['isUpVote']
cur = mysql.connection.cursor()
cur.execute("SELECT upVote FROM posts WHERE postID = '{}'".format(postID))
current_upVote = cur.fetchone()
if isUpVote:
cur.execute("UPDATE posts SET upVote = '{0}' + 1 WHERE postID = '{1}'".format(current_upVote, postID))
else:
cur.execute("UPDATE posts SET upVote = '{0}' - 1 WHERE postID = '{1}'".format(current_upVote, postID))
How should I fix the error?
And I'd like to know if there's a simpler way to update the upVote value in one line without defining current_upVote variable.
You don't need the select Query at all, you can simple add or subtract 1 of the current value
#app.route('/posts/upvote', methods=['GET', 'PATCH'])
def upvote_post():
if request.method == 'PATCH':
data = request.get_json()
print(data)
postID = data['postID']
isUpVote = data['isUpVote']
cur = mysql.connection.cursor()
if isUpVote:
cur.execute("UPDATE posts SET upVote = upVote + 1 WHERE postID = '{0}'".format( postID))
else:
cur.execute("UPDATE posts SET upVote = upVote - 1 WHERE postID = '{0}'".format( postID))
current_upvote is a dictionary containing the row that you fetched. You need to extract the value from it.
You can also do the arithmetic in Python, then just do a single query to update.
You should use parameters rather than string formatting to substitute into queries.
cur.execute("SELECT upVote FROM posts WHERE postID = %s", (postID,))
current_upVote = cur.fetchone()
if isUpvote:
vote = current_upVote['upVote'] + 1
else:
vote = current_upVote['upVote'] - 1
cur.execute("UPDATE posts SET upVote = %s WHERE postID = %s", (vote, postID))
However, the best way to do it is nbk's answer. I only posted this to explain why your code didn't work, and also to show the proper use of parameters.

Insert multiple values in MySQL with Python from an array

I am trying to insert data from my array into MySQL.
To my big surprise there were not many examples on how to do it if you perform a for-loop for your array, every example that I have found was from an already existing array list.
Thanks to Adrian below, we noticed that I need tuples for my list.
Updated code
connection = mysql.connector.connect(
host='localhost',
database='test',
user='root',
password='pass'
)
query = "INSERT INTO blue (created, published, publisher) VALUES (%s, %s, %s)"
array = []
# The idea here is to get all table rows in the page so you can group the values into rows that are going to be added to MySQL
tr = soup.find_all('tr')
for table_row in tr:
row_data = table_row.find_all('td')
insert_row = []
for data in row_data:
data = re.sub('<[^>]*>', '', str(data))
insert_row.append(data)
array.append(tuple(insert_row))
print(array)
cursor = connection.cursor()
cursor.executemany(query, array)
cursor.commit()
Getting close but at the moment I receive the following
IndexError: Tuple index out of range
mysql.connector.errors.ProgrammingError: Not enough parameters for the SQL statement
Thanks in advance!
I think you are mixing two ways of solving the problem...
One way is using the executemany method as described in the documentation
query = "INSERT INTO blues (created, published, publisher) VALUES (%s, %s, %s)"
array = []
# The idea here is to get all table rows in the page so you
# can group the values into rows that are going to be added to MySQL
tr = soup.find_all('tr')
for table_row in tr:
row_data = table_row.find_all('td')
insert_row = [None, None, None]
for idx in range(len(row_data)):
if row_data[idx] and idx < 3:
data = re.sub('<[^>]*>', '', str(row_data[idx]))
if data:
insert_row[idx] = data
array.append(tuple(insert_row))
cursor = connection.cursor()
cursor.executemany(query, array)
cursor.commit()
Another way is to build the query yourself...
query = "INSERT INTO blues (created, published, publisher) VALUES "
array = []
# The idea here is to get all table rows in the page so you can group the values into rows that are going to be added to MySQL
tr = soup.find_all('tr')
for table_row in tr:
row_data = table_row.find_all('td')
insert_row = []
for data in row_data:
data = re.sub('<[^>]*>', '', str(data))
insert_row.append(data)
array.append(tuple(insert_row))
values = []
for item in array:
row = [None, None, None]
for idx in range(len(item)):
row[idx] = item[idx]
values.append(str(tuple(row)))
query += ",".join(values)
cursor = connection.cursor()
cursor.execute(query)
cursor.commit()
Hope this helps...

START TRANSACTION with Python 3.6 mysql-connector-python returns no results

I'm using Python 3.6, mysql-connector-python 8.0.11 and 8.0.11
MySQL Community Server - GPL. The table in question is using the innoDB engine.
When using the MySQL Workbench I can enter:
USE test; START TRANSACTION; SELECT * FROM tasks WHERE task_status != 1 LIMIT 1 FOR UPDATE;
And it provides 1 record as expected:
When I use a script using python3 (from the same machine - same access, etc):
* SQL QRY: START TRANSACTION; SELECT * FROM test WHERE task_status != 1 LIMIT 1 FOR UPDATE;
* SQL RES: No result set to fetch from.
This is debug output from my script. If I change the Query to normal SELECT, I do get output.
* SQL QRY: SELECT * FROM test WHERE task_status != 1 LIMIT 1;
* SQL RES: [(1, 0, 'TASK0001')]
I know SELECT * isn't the way to go but just trying to get some response for now.
I'm trying to allow multiple worker scripts to pick up a task without the workers taking the same task:
Do a select and row lock the task so other workers 'SELECT' query doesn't show them,
Set the task status to 'being processed' and unlock the record.
This is my first venture into locking so this is new ground. I'm able to do normal queries and populate tables etc so have some experience but not with locking.
TABLE creation:
create table test
(
id int auto_increment
primary key,
task_status int not null,
task_ref varchar(16) not null
);
Questions:
Is this the correct mindset? I.e. is there a more pythonic/mysql way to do this?
Is there a specific way I need to initiate the mysql connection? Why would it work using the MySQL workbench but not via script? I've tried using direct mysql and this works too - so I think it is the python connector that may need setting up correctly as it is the only component not working.
Currently I'm using 'autocommit=1' on the connector and 'buffered=True' on the cursor. I know you can set 'autocommit=0' in the SQL before the 'START TRANSACTION' so understand for the locking I may need to do this, but for all other transactions I would prefer to keep autocommit on. Is this OK and/or doable?
CODE:
#!/usr/bin/env python
import mysql.connector
import pprint
conn = mysql.connector.connect(user='testuser',
password='testpass',
host='127.0.0.1',
database='test_db',
autocommit=True)
dbc = conn.cursor(buffered=True)
qry = "START TRANSACTION; SELECT * FROM 'test' WHERE task_status != 1 LIMIT 1 ON UPDATE;"
sql_select = dbc.execute(qry)
try:
output = dbc.fetchall()
except mysql.connector.Error as e:
print(" * SQL QRY: {0}".format(qry))
print(" * SQL RES: {0}".format(e))
exit()
else:
print(" * SQL QRY: {0}".format(qry))
print(" * SQL RES: {0}".format(output))
Many Thanks,
Frank
So after playing around a bit, I worked out (by trial and error) that the proper way to do this is to just put 'FOR UPDATE' at the end of the normal query:
Full code is below (including option to add dummy records for testing):
#!/usr/bin/env python
import mysql.connector
import pprint
import os
conn = mysql.connector.connect(user='testuser',
password='testpass',
host='127.0.0.1',
database='test_db',
autocommit=True)
dbc = conn.cursor(buffered=True)
worker_pid = os.getpid()
all_done = False
create = False
if create:
items = []
for i in range(10000):
items.append([0, 'TASK%04d' % i])
dbc.executemany('INSERT INTO test (task_status, task_ref) VALUES (%s, %s)', tuple(items))
conn.commit()
conn.close
exit()
while all_done is False:
print(all_done)
qry = (
"SELECT id FROM test WHERE task_status != 1 LIMIT 1 FOR UPDATE;"
)
sql_select = dbc.execute(qry)
try:
output = dbc.fetchall()
except mysql.connector.Error as e:
print(" * SQL QRY: {0}".format(qry))
print(" * SQL RES: {0}".format(e))
exit()
else:
print(" * SQL QRY: {0}".format(qry))
print(" * SQL RES: {0}".format(output))
if len(output) == 0:
print("All Done = Yes")
all_done = True
continue
else:
print("Not Done yet!")
if len(output) > 0:
test_id = output[0][0]
print("WORKER {0} FOUND: '{1}'".format(worker_pid, test_id))
qry = "UPDATE test SET task_status = %s, task_ref = %s WHERE id = %s;"
sql_select = dbc.execute(qry, tuple([1, worker_pid, test_id]))
conn.commit()
try:
output = dbc.fetchall()
except mysql.connector.Error as e:
print(" * SQL QRY: {0}".format(qry))
print(" * SQL RES: {0}".format(e))
else:
print(" * SQL QRY: {0}".format(qry))
print(" * SQL RES: {0}".format(output))
print(all_done)
Hope this helps someone else save some time as there are a lot of places with different info but searches for python3, mysql-connector and transactions didn't get me anything.
Good Luck,
Frank

weird error - increasing month by one - Python+MySQL

2017-05-12 is inserted in to MySQL as 2017-06-12 - couldn't figure out with debugging...pasted the key parts of code
Windows platform; Used PyMySQL; If the issue isnt resolved, i will probably bulk update the table to reduce the month by one...i guess that should be easy...
exceptions = []
flag = 'N'
def obtain_list_of_db_tickers():
"""
Obtains a list of the ticker symbols in the database.
"""
with con:
cur = con.cursor()
cur.execute("SELECT id, ticker FROM symbol")
data = cur.fetchall()
list1,list2 = map(list,zip(*data))
uniqtickers = list(set(list2))
rangelist = list(range(1,len(uniqtickers)+1))
newtickers = list(zip(rangelist,uniqtickers))
return [(d[0], d[1]) for d in newtickers]
def get_daily_historic_data_yahoo(
ticker, start_date=(2017,5,12),
end_date=datetime.date.today().timetuple()[0:3]
) :
""""
Obtains data from Yahoo Finance returns and a list of tuples
ticker: Yahoo Finance ticker symbol eg: GOOG
start_date: Start date in (YYYY,M,D) format
End_date: End date in (YYYY,M,D) format
"""
# Construct the Yahoo URL with the correct integer query parameters
# for start and end dates. Note that some parameters are zero-based!
ticker_tup = (
ticker, start_date[1]-1, start_date[2],
start_date[0], end_date[1]-1, end_date[2],
end_date[0]
)
yahoo_url = "http://ichart.finance.yahoo.com/table.csv"
yahoo_url += "?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s"
yahoo_url = yahoo_url % ticker_tup
# Try connecting to Yahoo Finance and obtaining the data
# On failure, print an error message.
global flag
try:
yf_data = pd.read_csv(yahoo_url)
yf_data['Date'] = yf_data['Date'].apply(lambda x:
datetime.datetime.strptime(x, '%Y-%m-%d'))
prices = yf_data.set_index(['Date'])[['Open', 'High', 'Low', 'Close',
'Volume', 'Adj Close']].to_records().tolist()
except Exception as e:
print("Could not download Yahoo data: %s" %e)
flag = 'Y'
exceptions.append(ticker)
return None
return prices
def insert_daily_data_into_db(
data_vendor_id, symbol_id, ticker, daily_data
):
"""
Takes a list of tuples of daily data and adds it to the
MySQL database. Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data(with adj_close and volume)
"""
# Create the time now
now = datetime.datetime.utcnow()
#Amend the data to include the vendor ID and symbol IDprices
daily_data = [
(data_vendor_id, symbol_id, ticker, d[0], now, now,
d[1], d[2], d[3], d[4], d[5], d[6])
for d in daily_data
]
# Create the insert strings
column_str = """data_vendor_id, symbol_id, ticker, price_date, created_date,
last_updated_date, open_price, high_price, low_price,
close_price, volume, adj_close_price"""
insert_str = ("%s, " *12)[:-2]
final_str = "INSERT INTO daily_price (%s) VALUES (%s)" %\
(column_str, insert_str)
# Using the MySQL connection, carry out an INSERT INTO for every symbol
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
if __name__ == "__main__":
# This ignores the warnings regarding Data Truncation
# from the Yahoo precision to Decimal(19,4) datatypes
warnings.filterwarnings('ignore')
# Loop over the tickers and insert the daily historical
# data into the database
tickers = obtain_list_of_db_tickers()
lentickers = len(tickers)
for i, t in enumerate(tickers):
print(
"Adding data for %s: %s out of %s" %
(t[1], i+1, lentickers)
)
yf_data = get_daily_historic_data_yahoo(t[1])
if (flag == 'N'):
insert_daily_data_into_db('1', t[0], t[1], yf_data)
elif (flag == 'Y'):
flag = 'N'

How can I refer to the main query's table in a nested subquery?

I have a table named passive than contains a list of timestamped events per user. I want to fill the attribute duration, which correspond to the time between the current row's event and the next event done by this user.
I tried the following query:
UPDATE passive as passive1
SET passive1.duration = (
SELECT min(UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) )
FROM passive as passive2
WHERE passive1.user_id = passive2.user_id
AND UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) > 0
);
This returns the error message Error 1093 - You can't specify target table for update in FROM.
In order to circumvent this limitation, I tried to follow the structure given in https://stackoverflow.com/a/45498/395857, which uses a nested subquery in the FROM clause to create an implicit temporary table, so that it doesn't count as the same table we're updating:
UPDATE passive
SET passive.duration = (
SELECT *
FROM (SELECT min(UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive.event_time))
FROM passive, passive as passive2
WHERE passive.user_id = passive2.user_id
AND UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) > 0
)
AS X
);
However, the passive table in the nested subquery doesn't refer to the same passive as in the main query. Because of that, all rows have the same passive.duration value. How can I refer to the main query's passive in the nested subquery? (or maybe are there some alternative ways to structure such a query?)
Try Like this....
UPDATE passive as passive1
SET passive1.duration = (
SELECT min(UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) )
FROM (SELECT * from passive) Passive2
WHERE passive1.user_id = passive2.user_id
AND UNIX_TIMESTAMP(passive2.event_time) - UNIX_TIMESTAMP(passive1.event_time) > 0
)
;
We can use a Python script to circumvent the issue:
'''
We need an index on user_id, timestamp to speed up
'''
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Download it at http://sourceforge.net/projects/mysql-python/?source=dlp
# Tutorials: http://mysql-python.sourceforge.net/MySQLdb.html
# http://zetcode.com/db/mysqlpython/
import MySQLdb as mdb
import datetime, random
def main():
start = datetime.datetime.now()
db=MySQLdb.connect(user="root",passwd="password",db="db_name")
db2=MySQLdb.connect(user="root",passwd="password",db="db_name")
cursor = db.cursor()
cursor2 = db2.cursor()
cursor.execute("SELECT observed_event_id, user_id, observed_event_timestamp FROM observed_events ORDER BY observed_event_timestamp ASC")
count = 0
for row in cursor:
count += 1
timestamp = row[2]
user_id = row[1]
primary_key = row[0]
sql = 'SELECT observed_event_timestamp FROM observed_events WHERE observed_event_timestamp > "%s" AND user_id = "%s" ORDER BY observed_event_timestamp ASC LIMIT 1' % (timestamp, user_id)
cursor2.execute(sql)
duration = 0
for row2 in cursor2:
duration = (row2[0] - timestamp).total_seconds()
if (duration > (60*60)):
duration = 0
break
cursor2.execute("UPDATE observed_events SET observed_event_duration=%s WHERE observed_event_id = %s" % (duration, primary_key))
if count % 1000 == 0:
db2.commit()
print "Percent done: " + str(float(count) / cursor.rowcount * 100) + "%" + " in " + str((datetime.datetime.now() - start).total_seconds()) + " seconds."
db.close()
db2.close()
diff = (datetime.datetime.now() - start).total_seconds()
print 'finished in %s seconds' % diff
if __name__ == "__main__":
main()