Inserting to MySQL with mysql.connector - good practice/efficiency - mysql

I am working on a personal project and was wondering if my solution for inserting data to a MySQL database would be considered "pythonic" and efficient.
I have written a separate class for that, which will be called from an object which holds a dataframe. From there I am calling my save() function to write the dataframe to the database.
The script will be running once a day where I scrape some data from some websites and save it to my database. So it is important that it really runs through completely even when I have bad data or temporary connection issues (script and database run on different machines).
import mysql.connector
# custom logger
from myLog import logger
# custom class for formatting the data, a lot of potential errors are handled here
from myFormat import myFormat
# insert strings to mysql are stored and referenced here
import sqlStrings
class saveSQL:
def __init__(self):
self.frmt = myFormat()
self.host = 'XXX.XXX.XXX.XXX'
self.user = 'XXXXXXXX'
self.password = 'XXXXXXXX'
self.database = 'XXXXXXXX'
def save(self, payload, type):
match type:
case 'First':
return self.__first(payload)
case 'Second':
...
case _:
logger.error('Undefined Input for Type!')
def __first(self, payload):
try:
self.mydb = mysql.connector.connect(host=self.host,user=self.user,password=self.password,database=self.database)
mycursor = self.mydb.cursor()
except mysql.connector.Error as err:
logger.error('Couldn\'t establish connection to DB!')
try:
tmpList = payload.values.tolist()
except ValueError:
logger.error('Value error in converting dataframe to list: ' % payload)
try:
mycursor.executemany(sqlStrings.First, tmpList)
self.mydb.commit()
dbWrite = mycursor.rowcount
except mysql.connector.Error as err:
logger.error('Error in writing to database: %s' % err)
for ele in myList:
dbWrite = 0
try:
mycursor.execute(sqlStrings.First, ele)
self.mydb.commit()
dbWrite = dbWrite + mycursor.rowcount
except mysql.connector.Error as err:
logger.error('Error in writing to database: %s \n ele: %s' % [err,ele])
continue
pass
mycursor.close()
return dbWrite
Things I am wondering about:
Is the match case a good option to distinguish between writing to different tables depending on the data?
Are the different try/except blocks really necessary or are there easier ways of handling potential errors?
Do I really need the pass command at the end of the for-loop?

Related

How to implement a context manager for MySql either with MySQL connector or pyMySql

Trying to implement a context manager for a MySql connection I get an error message.
I have use MySql.connector module including the connection option to connect a database, and the pyMySql module, always getting the same result.
import pymysql
class MySQLConnector:
def __init__(self, con_dict):
self.cnx = None
self.con_dict = con_dict
def __enter__(self):
self.cnx = pymysql.connect(**self.con_dict)
return self.cnx
def __exit__(self):
self.cnx.close()
class ReceiveBroke(QDialog):
def __init__(self, db, config):
super().__init__()
with MySQLConnector(config) as
cur = self.cnx.cnx.cursor()
qry = "SELECT * FROM horses
cur.execute(qry)
result = cur.fetchall()
print(result)
self.setUI()
self.conTest()
def conTest(self):
if self.cnx.ope
print("y")
I hope to obtain a working context manager closing the database connection a finishing the with Block.
Result: Error Message. Always after executing the last line within the "with" block :"TypeError exit takes one positional argument but four were given" at which time the program crashes.
hope you found the error already. If not, I think the error message is quite clear:
TypeError exit takes one positional argument but four were given
__exit__ function needs 4 arguments: self, type, value, traceback. These last 3 relate to any exception that may happen in the with block.

Python script DB connection as Pool not working, but simple connection is working

I am writing a script in python 3 that is listening to the tunnel and saving and updating data inside MySQL depend on the message received.
I went into weird behavior, i did a simple connection to MySQL using pymysql module and everything worked fine, ut after sometime this simple connection closes.
So i decide to implement Pool connection to MySQL and here arises the problem. Something happens no errors, but the issue is the following:
My cursor = yield self._pool.execute(query, list(filters.values()))
cursor result = tornado_mysql.pools.Pool object at 0x0000019DE5D71F98
and stacks like that not doing anything more
If i remove yield from cursor pass that line and next line throws error
response = yield c.fetchall()
AttributeError: 'Future' object has no attribute 'fetchall'
How i can fix the MySQL pool connection to work properly?
What i tried:
I use few modules for pool connection, all goes in same issue
Did back simple connection with pymysql and worked again
Below my code:
python script file
import pika
from model import SyncModel
_model = SyncModel(conf, _server_id)
#coroutine
def main():
credentials = pika.PlainCredentials('user', 'password')
try:
cp = pika.ConnectionParameters(
host='127.0.0.1',
port=5671,
credentials=credentials,
ssl=False,
)
connection = pika.BlockingConnection(cp)
channel = connection.channel()
#coroutine
def callback(ch, method, properties, body):
if 'messageType' in properties.headers:
message_type = properties.headers['messageType']
if message_type in allowed_message_types:
result = ptoto_file._reflection.ParseMessage(descriptors[message_type], body)
if result:
result = protobuf_to_dict(result)
if message_type == 'MyMessage':
yield _model.message_event(data=result)
else:
print('Message type not in allowed list = ' + str(message_type))
print('continue listening...')
channel.basic_consume(callback, queue='queue', no_ack=True)
print(' [*] Waiting for messages. To exit press CTRL+C')
channel.start_consuming()
except Exception as e:
print('Could not connect to host 127.0.0.1 on port 5671')
print(str(e))
if __name__ == '__main__':
main()
SyncModel
from tornado_mysql import pools
from tornado.gen import coroutine, Return
from tornado_mysql.cursors import DictCursor
class SyncModel(object):
def __init__(self, conf, server_id):
self.conf = conf
servers = [i for i in conf.mysql.servers]
for s in servers:
if s['server_id'] == server_id:
// s hold all data as, host, user, port, autocommit, charset, db, password
s['cursorclass'] = DictCursor
self._pool = pools.Pool(s, max_idle_connections=1, max_recycle_sec=3)
#coroutine
def message_event(self, data):
table_name = 'table_name'
query = ''
data = data['message']
filters = {
'id': data['id']
}
// here the connection fails as describe above
response = yield self.query_select(table_name, self._pool, filters=filters)
#coroutine
def query_select(self, table_name, _pool, filters=None):
if filters is None:
filters = {}
combined_filters = ['`%s` = %%s' % i for i in filters.keys()]
where = 'WHERE ' + ' AND '.join(combined_filters) if combined_filters else ''
query = """SELECT * FROM `%s` %s""" % (table_name, where)
c = self._pool.execute(query, list(filters.values()))
response = yield c.fetchall()
raise Return({response})
All the code was working with just simple connection to the database, after i start to use pool example is not working anymore. Will appreciate any help in this issue.
This is a stand alone script.
The pool connection was not working, so switched back to pymysql with double checking the connection
I would like to post my answer that worked, only this solution worked for me
before connecting to mysql to check if the connection is open, if not reconnect
if not self.mysql.open:
self.mysql.ping(reconnect=True)

How to return keywords and values into json object using MySQL and python

I have a code that fetches data from a MySQL database, it return values but not the keywords. I need keywords so I can make calls later in JavaScript.
Instead of keywords which are firstname lastname username and mail it returns as 0 1 2 3.
0: "Luka"
1: "Tubic"
2: "Tubex"
3: "test#mail.com"
It should return
"firstname": "Luka"
"lastname":"Tubic"
"username":"Tubex"
"mail":"test#mail.com"
this is python script
from flask import Flask
from flask import jsonify, request, redirect, url_for
import mysql.connector
import random
import string
import smtplib
app = Flask(__name__, static_folder='www')
def _connect():
connection = mysql.connector.connect(host='localhost',
user='root',
password='root',
auth_plugin='mysql_native_password',
database='User')
connection.commit()
return connection
#app.route('/admin/users', methods=['GET', 'POST', 'OPTION'])
def dummyadmin():
if request.method == 'GET':
connection = _connect()
c = connection.cursor(buffered=True)
query = 'SELECT first_name,last_name,username,email from user.user'
c.execute(query)
users = c.fetchall()
if users is None:
return None
connection.commit()
c.close()
print(users)
return jsonify(users)
if request.method == 'POST':
connection = _connect()
c = connection.cursor(buffered=True)
#app.after_request
def after_request(response):
response.headers.add('Access-Control-Allow-Origin', '*')
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
return response
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0')
The solution is documented: use a DictCursor.
c = connection.cursor(dictionary=True, buffered=True)
NB : by default a db-api cursor yields tuples which are indeed indexed by position, not by column name. This is a pretty standard and natural representation of relational data, and is less costly than dicts.
MySQL has a function called "json_object" which takes the list of input and return as JSON object as below, so, in your case instead of fetching rows in the different column, you can consider getting as json_object from Db itself.
SELECT json_object('first_name',first_name,'last_name',last_name,'username',username,'email',email) as jsonString from user.user;
Output format
{"first_name": "John", "last_name": "Edial", "username":"john.edial","email": "john#wddil.com"}

Pipeline doesn't write to MySQL but also gives no error

I've tried to implement this pipeline in my spider.
After installing the necessary dependencies I am able to run the spider without any errors but for some reason it doesn't write to my database.
I'm pretty sure there is something going wrong with connecting to the database. When I give in a wrong password, I still don't get any error.
When the spider scraped all the data, it needs a few minutes before it starts dumping the stats.
2017-08-31 13:17:12 [scrapy] INFO: Closing spider (finished)
2017-08-31 13:17:12 [scrapy] INFO: Stored csv feed (27 items) in: test.csv
2017-08-31 13:24:46 [scrapy] INFO: Dumping Scrapy stats:
Pipeline:
import MySQLdb.cursors
from twisted.enterprise import adbapi
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.utils.project import get_project_settings
from scrapy import log
SETTINGS = {}
SETTINGS['DB_HOST'] = 'mysql.domain.com'
SETTINGS['DB_USER'] = 'username'
SETTINGS['DB_PASSWD'] = 'password'
SETTINGS['DB_PORT'] = 3306
SETTINGS['DB_DB'] = 'database_name'
class MySQLPipeline(object):
#classmethod
def from_crawler(cls, crawler):
return cls(crawler.stats)
def __init__(self, stats):
print "init"
#Instantiate DB
self.dbpool = adbapi.ConnectionPool ('MySQLdb',
host=SETTINGS['DB_HOST'],
user=SETTINGS['DB_USER'],
passwd=SETTINGS['DB_PASSWD'],
port=SETTINGS['DB_PORT'],
db=SETTINGS['DB_DB'],
charset='utf8',
use_unicode = True,
cursorclass=MySQLdb.cursors.DictCursor
)
self.stats = stats
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_closed(self, spider):
print "close"
""" Cleanup function, called after crawing has finished to close open
objects.
Close ConnectionPool. """
self.dbpool.close()
def process_item(self, item, spider):
print "process"
query = self.dbpool.runInteraction(self._insert_record, item)
query.addErrback(self._handle_error)
return item
def _insert_record(self, tx, item):
print "insert"
result = tx.execute(
" INSERT INTO matches(type,home,away,home_score,away_score) VALUES (soccer,"+item["home"]+","+item["away"]+","+item["score"].explode("-")[0]+","+item["score"].explode("-")[1]+")"
)
if result > 0:
self.stats.inc_value('database/items_added')
def _handle_error(self, e):
print "error"
log.err(e)
Spider:
import scrapy
import dateparser
from crawling.items import KNVBItem
class KNVBspider(scrapy.Spider):
name = "knvb"
start_urls = [
'http://www.knvb.nl/competities/eredivisie/uitslagen',
]
custom_settings = {
'ITEM_PIPELINES': {
'crawling.pipelines.MySQLPipeline': 301,
}
}
def parse(self, response):
# www.knvb.nl/competities/eredivisie/uitslagen
for row in response.xpath('//div[#class="table"]'):
for div in row.xpath('./div[#class="row"]'):
match = KNVBItem()
match['home'] = div.xpath('./div[#class="value home"]/div[#class="team"]/text()').extract_first()
match['away'] = div.xpath('./div[#class="value away"]/div[#class="team"]/text()').extract_first()
match['score'] = div.xpath('./div[#class="value center"]/text()').extract_first()
match['date'] = dateparser.parse(div.xpath('./preceding-sibling::div[#class="header"]/span/span/text()').extract_first(), languages=['nl']).strftime("%d-%m-%Y")
yield match
If there are better pipelines available to do what I'm trying to achieve that'd be welcome as well. Thanks!
Update:
With the link provided in the accepted answer I eventually got to this function that's working (and thus solved my problem):
def process_item(self, item, spider):
print "process"
query = self.dbpool.runInteraction(self._insert_record, item)
query.addErrback(self._handle_error)
query.addBoth(lambda _: item)
return query
Take a look at this for how to use adbapi with MySQL for saving scraped items. Note the difference in your process_item and their process_item method implementation. While you return the item immediately, they return Deferred object which is the result of runInteraction method and which returns the item upon its completion. I think this is the reason your _insert_record never gets called.
If you can see the insert in your output that's already a good sign.
I'd rewrite the insert function this way:
def _insert_record(self, tx, item):
print "insert"
raw_sql = "INSERT INTO matches(type,home,away,home_score,away_score) VALUES ('%s', '%s', '%s', '%s', '%s')"
sql = raw_sql % ('soccer', item['home'], item['away'], item['score'].explode('-')[0], item['score'].explode('-')[1])
print sql
result = tx.execute(sql)
if result > 0:
self.stats.inc_value('database/items_added')
It allows you to debug the sql you're using. In you version you're not wrapping the string in ' which is a syntax error in mysql.
I'm not sure about your last values (score) so I treated them as strings.

MySQL connection pooling in separate DB class - howto?

I'm writing an application where I've moved all the MySQL connection setup and teardown to a class, initializing within individual function calls with a With statement.
Now that the development is all done, I'm optimizing and would like to set up connection pooling - but I can't for the life of me figure out how - if I initialize the pool when I set up the object in enter, won't that set up a new pool for each object?
If I put the pool setup in the global of the module, then how do I ensure I set up the pool before I start creating DB objects?
My DB code looks somewhat like this:
# Setting up details for connecting to a local MariaDB/MySQL instance
# replace with suitable code/module when porting to cloud/production
import sys
import mysql.connector
"""Module for abstracting database connectivity
Import this module and then call run_query(), run_query_vals() or run_query_no_return() """
__all__ = ['UseDatabase', 'CredentialsError', 'ConnectionError', 'SQLError']
class ConnectionError(Exception):
pass
class CredentialsError(Exception):
pass
class SQLError(Exception):
pass
dbconfig = { 'host': '127.0.0.1', 'user' : 'statdev', 'password' : 'statdev', 'database': 'stat',}
# Just so we remember. This also doubles as default server details while doing unit testing.
class UseDatabase:
# myconfig = {'host': '127.0.0.1', 'user': 'statdev', 'password': 'statdev', 'database': 'stat', }
config = None
def __init__(self, config: dict):
self.config = config
def __enter__(self) -> 'self':
try:
self.conn = mysql.connector.connect(**self.config)
self.cursor = self.conn.cursor(dictionary=True)
return self
except mysql.connector.InterfaceError as err:
print('Can\'t connect to Database - is it available? \nError: ', str(err))
raise ConnectionError(err)
except mysql.connector.ProgrammingError as err:
print('Invalid credentials - please check ID/Password. \nError: ', str(err))
raise CredentialsError(err)
except mysql.connector.IntegrityError as err:
print("Error: {}".format(err))
except Exception as err:
print('Something else went wrong:', str(err))
return err
def __exit__(self, exc_type, exc_value, exc_traceback):
self.conn.commit()
self.cursor.close()
self.conn.close()
if exc_type is mysql.connector.errors.ProgrammingError:
print('Error in SQL Code - please check the query. \nError: ', str(exc_type))
raise SQLError(exc_value)
elif exc_type:
print('Something else went wrong\n', str(exc_type))
raise exc_type(exc_value)
def run_query(self,query_str) -> 'cursor':
"""query function that takes """
self.cursor.execute(query_str, None)
return self.cursor
def run_query_vals(self, query_str, tupleval) -> 'cursor':
# print("\n\n %s " % query_str)
self.cursor.execute(query_str, tupleval)
return self.cursor
def run_query_no_return(self,query_str) -> 'cursor':
"""query function that takes """
self.cursor.execute(query_str)
return self.cursor
def test():
# dbconfig = {'host': '127.0.0.1', 'user': 'statdev', 'password': 'statdev', 'database': 'stat', }
with UseDatabase(dbconfig) as db:
# result = db.run_query("Select NULL from dual")
result = db.run_query_vals('Select NULL from dual', None)
res = result.fetchone()
if res == {'NULL': None}:
print("DB Module Test was successful! \n"
"Queries return values in dictionaries."
"\nTest query \'Select NULL from dual\' returned result: %s" % str(res))
if __name__ == '__main__':
test()
This has worked for me but I am not sure it's a perfect solution as, for example, trying to do multiple inserts via a for loop results in a 'Failed getting connection; pool exhausted' error. I did not have this problem when I was using a function-based (non class-based) connection pool. Anyway, to avoid this problem I just simply use 'cursor.executemany' in one go.
Hope this helps someone!
from mysql.connector.pooling import MySQLConnectionPool
from mysql.connector.errors import ProgrammingError, InterfaceError
from settings import config
# Database connection pool
dbconfig = config.dbconfig
dbconfig_pool = config.dbconfig_pool
#The following is my 'class DBasePool' content:
def __init__(self, dbconfig, dbconfig_pool):
self.dbconfig = dbconfig
self.pool_name = dbconfig_pool['pool_name']
self.pool_size = dbconfig_pool['pool_size']
try:
self.cnxpool = self.create_pool(pool_name=self.pool_name, pool_size=self.pool_size)
self.cnx = self.cnxpool.get_connection()
self.cursor = self.cnx.cursor(buffered=True)
except InterfaceError as e:
logger.error(e)
raise ConnectionError(e)
except ProgrammingError as e:
logger.error(e)
raise CredentialsError(e)
except Exception as e:
logger.error(e)
raise
def create_pool(self, pool_name, pool_size):
return MySQLConnectionPool(pool_name=pool_name, pool_size= pool_size, **self.dbconfig)
def close(self, cnx, cursor):
cursor.close()
cnx.close()
def execute(self, sql, data=None):
# Get connection form connection pool instead of creating one
cnx = self.cnxpool.get_connection()
cursor = cnx.cursor(buffered=True)
cursor.execute(sql, data)
if cursor.rowcount:
cnx.commit()
rowcount = cursor.rowcount
self.close(cnx, cursor)
return rowcount
else:
print('Could not insert record(s): {}, {}'.format(sql, data))
return 0