Error on converting dataframe to SQL - Pandas - mysql

I am getting this error: DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': Not all parameters were used in the SQL statement. when trying to convert my dataframe to sql
My connection variable:
con = mysql.connector.connect(
host="****",
port="****",
database="*****",
user="*****",
password="*****"
)
My try to convert it to sql:
df.to_sql('menageiro2',con)
Note: I am using:
import pandas as pd
import sqlalchemy
import mysql.connector

The reference says con: sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection. You appear to be passing in a mysql connection instead of a SQLAlchamy engine (that you connected to MySQL):
con = sqlalchemy.create_engine(
'mysql+mysqlconnector://<user>:<password>#<host>:<port>/<default_db>...')

Related

Airflow Connections & pandas.to sql : Don't work together?

In My DAG:
I need use pandas .to_sql
It work with sqlalchemy, but not safety.
It error with connection in Airflow...?
from sqlalchemy import create_engine
# It's works, but not safety!
conn = create_engine('postgresql+psycopg2://login:pass#localhost:5432/de')
# More safe: Make connection in Airflow, PG_PROJECT_CONN
dwh_hook = PostgresHook('PG_PROJECT_CONN')
conn = dwh_hook.get_conn()
# Work OK:
df_ = pd.read_sql(f'select * from stg.restaurants;',conn)
# Don't work: WARNING - /usr/local/lib/python3.8/dist-packages/pandas/io/sql.py:761
# UserWarning: pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI
# or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy
df_restaurants.to_sql('restaurants', conn, if_exists='append', schema='stg', index=False, chunksize=100)

Error Loading Delimited file into MySQL using Airflow( Error code 2068)

I have airflow installed on Ubuntu as WSL on windows.
I am trying to load a delimited file that is stored on my C drive into Mysql database using the code below:
import logging
import os
import csv
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.mysql_operator import MySqlOperator
from airflow.hooks.mysql_hook import MySqlHook
def bulk_load_sql(table_name, **kwargs):
local_filepath = 'some c drive path'
conn = MySqlHook(conn_name_attr='mysql_default')
conn.bulk_load(table_name, local_filepath)
return table_name
dag = DAG(
"dag_name",
start_date=datetime.datetime.now() - datetime.timedelta(days=1),
schedule_interval=None)
t1 = PythonOperator(
task_id='csv_to_stgtbl',
provide_context=True,
python_callable=bulk_load_sql,
op_kwargs={'table_name': 'mysqltablnm'},
dag=dag
)
It gives the following exception:
MySQLdb._exceptions.OperationalError: (2068, 'LOAD DATA LOCAL INFILE file request rejected due to restrictions on access.')
I have checked the following setting on mysql and its ON
SHOW GLOBAL VARIABLES LIKE 'local_infile'
Could someone please provide some pointers as to how to fix it.
Is there any other way I can load a delimited file into mysql using airflow.
For now, I have implemented a work around as follows:
def load_staging():
mysqlHook = MySqlHook(conn_name_attr='mysql_default')
#cursor = conn.cursor()
conn = mysqlHook.get_conn()
cursor = conn.cursor()
csv_data = csv.reader(open('c drive file path'))
header = next(csv_data)
logging.info('Importing the CSV Files')
for row in csv_data:
#print(row)
cursor.execute("INSERT INTO table_name (col1,col2,col3) VALUES (%s, %s, %s)",
row)
conn.commit()
cursor.close()
t1 = PythonOperator(
task_id='csv_to_stgtbl',
python_callable=load_staging,
dag=dag
)
However, it would have been great if the LOAD DATA LOCAL INFILE would have worked.

mySQL export to GCP cloud-storage

I have mySQL running on-prem and would like to migrate it with mySQL running on Cloud SQL (GCP). I first want to export tables to Cloud Storage as JSON files and then from there move them to mySQL (cloud-sql) & Big Query.
Now I wonder how I should do this - export each table as JSON or just dump the whole database to cloud storage? (we might need to change schemas for some tables that's why im thinking to do it 1 by 1).
Is there any way doing it with python pandas?
I found this --> Pandas Dataframe to Cloud Storage Bucket
but don't understand how to connect this to my GCP's cloud storage, and how to do this mycursor.execute("SELECT * FROM table") for all my tables.
EDIT 1:
so i came up with this, but this works only for the selected schema + table. how can I do this for all tables in the schema??
#!/usr/bin/env python3
import mysql.connector
import pandas as pd
from google.cloud import storage
from google.oauth2 import service_account
import os
import csv
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/home/python2/key.json"
#export GOOGLE_APPLICATION_CREDENTIALS="/home/python2/key.json"
#credentials = storage.Client.from_service_account_json('/home/python2/key.json')
#credentials = service_account.Credentials.from_service_account_file('key.json')
mydb = mysql.connector.connect(
host="localhost", user="root", passwd="pass_word", database="test")
mycursor = mydb.cursor(named_tuple=True)
mycursor.execute("SELECT * FROM test")
myresult = mycursor.fetchall()
df = pd.DataFrame(data=myresult)
storage_client = storage.Client()
bucket = storage_client.get_bucket("my-buckets-1234567")
blob = bucket.blob("file.json")
df = pd.DataFrame(data=myresult).to_json(orient='records')
#df = pd.DataFrame(data=myresult).to_csv(sep=";", index=False, quotechar='"', quoting=csv.QUOTE_ALL, encoding="UTF-8")
blob.upload_from_string(data=df)

Snowflake Account must be specified error, but it is specified

I have the below code, I have the account, username, pw, etc, but I'm still seeing the below error:
raise error_class( sqlalchemy.exc.ProgrammingError:
(snowflake.connector.errors.ProgrammingError) 251001: Account must be
specified
I've also tried by changing the engine variable in my created_db_engine function like below, but I see the same error:
engine = snowflake.connector.connect(
user='USER',
password='PASSWORD',
account='ACCOUNT',
warehouse='WAREHOUSE',
database='DATABASE',
schema='SCHEMA'
)
here is my code
import pandas as pd
from snowflake.sqlalchemy import URL
from sqlalchemy import create_engine
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas, pd_writer
from pandas import json_normalize
import requests
df = 'my_dataframe'
def create_db_engine(db_name, schema_name):
engine = URL(
account="ab12345.us-west-2.snowflakecomputing.com",
user="my_user",
password="my_pw",
database="DB",
schema="PUBLIC",
warehouse="WH1",
role="DEV"
)
return engine
def create_table(out_df, table_name, idx=False):
url = create_db_engine(db_name="db", schema_name="skm")
engine = create_engine(url)
connection = engine.connect()
try:
out_df.to_sql(
table_name, connection, if_exists="append", index=idx, method=pd_writer
)
except ConnectionError:
print("Unable to connect to database!")
finally:
connection.close()
engine.dispose()
return True
print(df.head)
create_table(df, "reporting")
Given the Snowflake documentation for SqlAlchemy, your account parameter should not include snowflakecomputing.com.
So you should try with ab12345.us-west-2 and connector will append the domain part automatically for you.

How to download data from mysql connection

After making a connection with mysql library, i'd like to dowload all the database from the connection in my local space (tranforming them into pandas dataframe).
Here's my code:
import MySQLdb
import pandas as pd
conn = MySQLdb.connect(host='host' , user='datbase', passwd='password', db='databases' )
cursor = conn.cursor()
query = cursor.execute(' SELECT * FROM pro ')
df = pd.read_sql(query , conn)
row = cursor.fetchone()
conn.close()
I finnaly got the connection, so i can make some query. But i'd like to use these sql database as a pandas dataframe, '''how can i do it'''?
Just use
query = ' SELECT * FROM pro '
df = pd.read_sql(query , conn)
And df should already be your desired dataframe.