api data directly insert into PostgreSQL database with python - json

main.py
import json, urllib.request, requests
import psycopg2
from psycopg2.extras import execute_values
# Retrieve Json Data from Within API
url = "https://datahead.herokuapp.com/api/employeers/"
response = urllib.request.urlopen(url)
data = json.loads(response.read())
# ***** connect to the db *******
try:
conn = psycopg2.connect("dbname='datahead' user='postgres' host='localhost' password='datahead'")
except:
print("I am unable to connect to the database")
# cursor
cur = conn.cursor()
fields = [
'id', #integer
'name', #varchar
'log_date', #date
'log_time', #timestamp
'login', #timestamp
'logout' #timestamp
]
for item in data:
my_data = [item[field] for field in fields]
insert_query = "INSERT INTO employee VALUES (%s, %s, %s, %s, %s, %s)"
cur.execute(insert_query, tuple(my_data))
conn.commit()
# close the cursor
cur.close()
# close the connection
conn.close()
please visit my api data format https://datahead.herokuapp.com/api/employeers/
Will it change here timestamp type, if yes so which data type i've to use?
Line 56 what happened?

I think login_time should be of type time without time zone in PostgresSQL.
But you would probably be better off using timestamp with time zone to represent date and time together.

Related

Empty data in SQL table

I am learning Tkinter and have made a program (just for practice) which takes user input in Entry field and save what the user has entered in a MySQL database by clicking submit button.
Code:
from tkinter import *
import tkinter
import mysql.connector
from tkinter import *
import mysql.connector
import random
DB = mysql.connector.connect(
host = "localhost",
user = "Lincoln",
password = "lincoln110904#",
database = "test"
)
cursor = DB.cursor()
gui2 = Tk()
gui2.title("Airline Ticket Booking System")
gui2.iconbitmap("C:/Users/keepa/OneDrive/Desktop/icon.ico")
gui2.maxsize(width=1000, height=70)
gui2.minsize(width=1000, height=700)
data = Entry(gui2, text = 'food name', textvariable="data_var")
data.pack()
data_var = tkinter.StringVar(data).get()
user_id = random.randint(1128, 9721)
def submit_it():
sql = "INSERT INTO user_time(user_id, time) VALUES(%s, %s)"
values = (user_id, str(data_var))
cursor.execute(sql,values)
DB.commit()
submit = Button(gui2, text = 'submit', command=submit_it)
submit.pack()
gui2.mainloop()
Output in MySQL database:
Anyone, please help why the data in the time row is empty/blank?
You need to:
use reference of a StringVar instead of string for the textvariable option
get the input content when it is needed instead of getting it just after the entry is created:
...
data_var = tkinter.StringVar()
data = tkinter.Entry(gui2, text='food name', textvariable=data_var)
...
def submit_it():
sql = "INSERT INTO user_time (user_id, time) VALUES (%s, %s)"
# get the input data from the Entry here
values = (user_id, data_var.get())
cursor.execute(sql, values)
DB.commit()
Also note that wildcard import is not recommended.

FastAPI not running all the functions to return the right values from database

I am trying to make a twitter points program. Basically, you get points based off of the number of likes, retweets and replies your post with a specified hashtag gets. I made an API to get these points from a database but fastAPI is not doing all the funtions specified to return the correct values.
API code:
DATABASE_URL = "mysql+mysqlconnector://root:password#localhost:3306/twitterdb"
database = Database(DATABASE_URL)
metadata_obj = MetaData()
engine = create_engine(
DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
metadata = sqlalchemy.MetaData()
Base = declarative_base()
user_points = sqlalchemy.Table(
"points",
metadata_obj,
sqlalchemy.Column("username", sqlalchemy.String,),
sqlalchemy.Column("rt_points", sqlalchemy.Integer,),
sqlalchemy.Column("reply_points", sqlalchemy.Integer),
sqlalchemy.Column("like_points", sqlalchemy.Integer),
sqlalchemy.Column("total_points", sqlalchemy.Integer)
)
engine = sqlalchemy.create_engine(
DATABASE_URL
)
metadata.create_all(engine)
app = FastAPI()
#app.on_event("startup")
async def connect():
await database.connect()
#app.on_event("shutdown")
async def shutdown():
await database.disconnect()
class UserName(BaseModel):
rt_points: int
reply_points: int
like_points: int
total_points : int
#app.get('/userdata/', response_model=UserName)
async def get_points(user: str):
username=user
metrics.clear()
tweets_list = tweet_id(username)
tweets_list.get_tweet_ids(str(username))
metrics.main()
summing=summer(username)
summing.sum_fun(str(username))
query = user_points.select().where(user_points.c.username == username)
user = await database.fetch_one(query)
return {**user}
if __name__ == "__main__":
uvicorn.run("main:app", reload= True, host="127.0.0.1", port=5000, log_level="info")
code for metrics.py:
ids=[]
class tweet_id:
def __init__(self, name):
self.name = name
def get_tweet_ids(self, name):
try:
connection = mysql.connector.connect(host='localhost',
database='twitterdb',
user='root',
password='password')
cursor = connection.cursor()
query="truncate table twitterdb.points"
query1="truncate table twitterdb.Metrics"
sql_select_query = """SELECT tweetid FROM twitterdb.StreamData WHERE username = %s"""
# set variable in query
cursor.execute(query)
cursor.execute(query1)
cursor.execute(sql_select_query, (name,))
# fetch result
record = cursor.fetchall()
for row in record:
ids.append(int(row[0]))
except mysql.connector.Error as error:
print("Failed to get record from MySQL table: {}".format(error))
finally:
if connection.is_connected():
cursor.close()
connection.close()
def create_url():
tweet_fields = "tweet.fields=public_metrics"
converted_list = [str(element) for element in ids]
id_list = ",".join(converted_list)
url = "https://api.twitter.com/2/tweets?ids={}&{}".format(id_list, tweet_fields)
return url
#curl 'https://api.twitter.com/2/tweets?ids=1459764778088337413&tweet.fields=public_metrics&expansions=attachments.media_keys&media.fields=public_metrics' --header 'Authorization: Bearer $Bearer
def bearer_oauth(r):
"""
Method required by bearer token authentication.
"""
r.headers["Authorization"] = f"Bearer {bearer_token}"
return r
def connect_to_endpoint(url):
response = requests.request("GET", url, auth=bearer_oauth)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Request returned an error: {} {} {}".format(
response.status_code, response.text, ids
)
)
return url
return response.json()
def main():
def append_to_database(json_response):
#Loop through each tweet
for tweet in json_response['data']:
# Tweet ID
tweetid = tweet['id']
# Tweet metrics
retweet_count = tweet['public_metrics']['retweet_count']
reply_count = tweet['public_metrics']['reply_count']
like_count = tweet['public_metrics']['like_count']
quote_count = tweet['public_metrics']['quote_count']
connect(tweetid, retweet_count, reply_count, like_count, quote_count)
def connect(tweetid, retweet_count, reply_count, like_count, quote_count):
"""
connect to MySQL database and insert twitter data
"""
try:
con = mysql.connector.connect(host='localhost',
database='twitterdb', user='root', password='passsword', charset='utf8')
if con.is_connected():
"""
Insert twitter data
"""
cursor = con.cursor(buffered=True)
# twitter, golf
delete_previous_data_query = "truncate table Metrics"
query = "INSERT INTO Metrics (tweetid,retweet_count,reply_count,like_count,quote_count) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(delete_previous_data_query)
cursor.execute(query, (tweetid,retweet_count,reply_count,like_count,quote_count))
con.commit()
except Error as e:
print(e)
cursor.close()
con.close()
return
url = create_url()
json_response = connect_to_endpoint(url)
append_to_database(json_response)
#Function to calculate sum of points and display it
class summer:
def __init__(self, name):
self.name = name
def sum_fun(self, name):
try:
con = mysql.connector.connect(host='localhost',
database='twitterdb', user='root', password='password', charset='utf8')
if con.is_connected():
cursor = con.cursor(buffered=True)
def create_points_table():
query= ("INSERT INTO twitterdb.points(username, rt_points,reply_points,like_points,total_points) (SELECT %s, SUM(quote_count + retweet_count) * 150, SUM(reply_count) * 50, SUM(like_count) * 10, SUM(quote_count + retweet_count) * 150 + SUM(reply_count) * 50 + SUM(like_count) * 10 FROM twitterdb.Metrics)")
cursor.execute(query, (name,))
con.commit()
create_points_table();
except Error as e:
print(e)
cursor.close()
con.close()
def clear():
"""
connect to MySQL database and insert twitter data
"""
try:
con = mysql.connector.connect(host='localhost',
database='twitterdb', user='root', password='password', charset='utf8')
if con.is_connected():
cursor = con.cursor(buffered=True)
clear_points = ("truncate table twitterdb.points")
cursor.execute(clear_points)
except Error as e:
print(e)
cursor.close()
con.close()
return
What happens here is that there's a database named twitterdb with the tables StreamData, metrics, and points.
StreamData containts tweetids and usernames of the posts that were tweeted with the specified hashtag and it is build with the Streaming API.
Here the issues is that, suppose I have the following usernames mark and ramon in the streamdata table. So when I input the username via the API as mark no issues happen, it returns the correct points for mark, but if I then enter something like mark1 or any random value, it returns the points for mark again. But then if I enter ramon it gives the right points for ramon but then if I enter the random values again, I get the same points for ramon.
Furthermore, the first time when we start the API and if we enter a random value, it returns an error that is specified in the exception as defined in connect_to_endpoint function.
The code logic here is that,
We enter a username via the API, and the get_tweet_ids function looks for that username in the streamdata table and selects all the tweet ids corresponding to that username and saves it to a list, ids. This list of ids is given to the twitter metrics API endpoint and the required values from the response is saved to the table metrics.
Then, the sum_fun is called to select the sum of values of likes, rts and replies from the metrics table, multiply it with the specified points and save it to the table points along with the username.
The API at last returns the values in the table points matching the username.
How can I get it to stop returning the values for random data? If an invalid data is given, it must raise the exception in connect_to_endpoint function, but it just returns whatever value is in the table points previously.
I tried multiple approaches to this like, clearing the values of points before all other functions and checking to return only the values corresponding to the username in the points table. But neither of them worked. When the username was checked in the points table after running it with random values, it contained the random value but with the points of the previous valid username.
NOTE: The table points is a temporary table and values are assigned only when an API call is made.
I am a complete beginner to all this and this is more of a pet project I have been working on, so please help out. Any and all help and guidance regarding my logic and design and a fix for this will be of much use. Thanks.
if the code that you have provided for metrics.py is correct your problem should comme from how you declare the variable ids.
in your code you have declare it as a global so it will not be reset at every function call or class instance creation.
what you should to is declare it in get_tweet_ids()
class tweet_id:
def __init__(self, name):
self.name = name
def get_tweet_ids(self, name):
ids=[] # modification here
try:
connection = mysql.connector.connect(host='localhost',
database='twitterdb',
user='root',
password='password')
cursor = connection.cursor()
query="truncate table twitterdb.points"
query1="truncate table twitterdb.Metrics"
sql_select_query = """SELECT tweetid FROM twitterdb.StreamData WHERE username = %s"""
# set variable in query
cursor.execute(query)
cursor.execute(query1)
cursor.execute(sql_select_query, (name,))
# fetch result
record = cursor.fetchall()
for row in record:
ids.append(int(row[0]))
return ids # modification here
except mysql.connector.Error as error:
print("Failed to get record from MySQL table: {}".format(error))
finally:
if connection.is_connected():
cursor.close()
connection.close()
with this you will have a new instance of ids at every get_tweet_ids call.
You will have to change the rest of your code according to this return statement

'list' object not callable when INSERT into database

I'm trying to populate a database using a query API there are multiple lists within the JSON Payload but I can't seem to get the it right.
When I print I get: 'list' object is not callable.
The program runs fine and my data is runs but the SQL statement trips up what I am trying to do. Do I need to declare a dictionary?
import json
import urllib.request
import sqlite3
import urllib.parse
connection = sqlite3.connect("database.db")
cursor = connection.cursor()
# API endpoint
API_KEY = ''
API_ENDPOINT = "https://api.newsfilter.io/public/actions?token={}".format(API_KEY)
print(API_ENDPOINT)
# Define the filter parameters
queryString = "symbols: FRSX AND publishedAt:[2021-03-20 TO 2021-04-02]"
#{}".format(mydict)
payload = {
"type": "filterArticles",
"queryString": queryString,
"from": 0,
"size": 200
}
# Format your payload to JSON bytes
jsondata = json.dumps(payload)
jsondataasbytes = jsondata.encode('utf-8')
# Instantiate the request
req = urllib.request.Request(API_ENDPOINT)
# Set the correct HTTP header: Content-Type = application/json
req.add_header('Content-Type', 'application/json; charset=utf-8')
# Set the correct length of your request
req.add_header('Content-Length', len(jsondataasbytes))
# Send the request to the API
response = urllib.request.urlopen(req, jsondataasbytes)
# Read the response
res_body = response.read()
# Transform the response into JSON
assets = json.loads(res_body.decode("utf-8"))
articles = assets["articles"]
for asset in articles:
#print(asset)
try:
cursor.execute("""
INSERT INTO news_SEC (id, title, description, url, imageurl, publishedAt, source, symbol, cik)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",(articles('id'), articles('title'), articles('description'), articles('url'), articles('imageurl'), articles('publishedAT'), articles('source'), articles('symbol'), articles('cik')))
except Exception as e:
print(e)
print(asset)
connection.commit()

Automated insertion of csv files into mysql table

I am trying to insert each row from about 2000 csv files into a mysql table. With the following code, I have inserted only one row from just one file. How can I automate the code so that it inserts all rows for each file? The insertions need to be done just once.
import pymysql.cursors
connection = pymysql.connect(host='localhost',
user='s',
password='n9',
db='si',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
sql = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (%s,%s,%s,%s,%s,%s,%s, %s)"
cursor.execute(sql, ('03', 'P Exp','SF','HWH', 'none','no arr today','n/a','ND'))
connection.commit()
finally:
connection.close()
How about checking this code?
To run this you can put all your .csv files in one folder and os.walk(folder_location) that folder to get locations of all the .csv files and then I've opened them one by one and inserted into the required DB (MySQL) here.
import pandas as pd
import os
import subprocess
import warnings
warnings.simplefilter("ignore")
cwd = os.getcwd()
connection = pymysql.connect(host='localhost',
user='s',
password='n9',
db='si',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
files_csv = []
for subdir, dir, file in os.walk(cwd):
files_csv += [ fi for fi in file if fi.endswith(".csv") ]
print(files_csv)
for i in range(len(files_csv)):
with open(os.path.join(cwd, files_csv[i])) as f:
lis=[line.split() for line in f]
for i,x in enumerate(lis):
#print("line{0} = {1}".format(i,x))
#HERE x contains the row data and you can access it individualy using x[0], x[1], etc
#USE YOUR MySQL INSERTION commands here and insert the x row here.
with connection.cursor() as cursor:
sql = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (%s,%s,%s,%s,%s,%s,%s, %s)"
cursor.execute(sql, (#CONVERTED VALUES FROM x))
connection.commit()
Update -
getting values for (#CONVERTED VALUES FROM X)
values = ""
for i in range(len(columns)):
values = values + x[i] + "," # Here x[i] gives a record data in ith row. Here i'm just appending the all values to be inserted in the sql table.
values = values[:-1] # Removing the last extra comma.
command = "INSERT INTO `TrainsS` (`No.`, `Name`,`Zone`,`From`,`Delay`,`ETA`,`Location`,`To`) VALUES (" + str(values) + ")"
cursor.execute(command)
#Then commit using connection.commit()
import psycopg2
import time
import csv
conn = psycopg2.connect(
host = "localhost",
database = "postgres",
user = "postgres",
password = "postgres"
)
cur = conn.cursor()
start = time.time()
with open('combined_category_data_100 copy.csv', 'r') as file:
reader=csv.reader(file)
ncol = len(next(reader))
next(reader)
for row in reader:
cur.execute(" insert into data values (%s = (no. of columns
))", row)
conn.commit()
print("data entered successfully")
end = time.time()
print(f" time taken is {end - start}")
cur.close()

scrapy and mysql

I am trying to get scrapy to insert crawled data into mysql and my code crawls fine and collects the data in the buffer, does not error, but database is never updated.
'no luck', 'no error'
pipeline.py
from twisted.enterprise import adbapi
import datetime
import MySQLdb.cursors
class SQLStorePipeline(object):
def __init__(self):
self.dbpool = adbapi.ConnectionPool('MySQLdb', db='craigs',
user='bra', passwd='boobs', cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8', use_unicode=True)
def process_item(self, items, spider):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, items)
query.addErrback(self.handle_error)
return items
def _conditional_insert(self, tx, items):
# create record if doesn't exist.
# all this block run on it's own thread
tx.execute("select * from scraped where link = %s", (items['link'][0], ))
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % items, level=log.DEBUG)
else:
tx.execute(\
"insert into scraped (posting_id, email, location, text, title) "
"values (%s, %s, %s, %s, %s)",
(items['posting_id'][0],
items['email'][1],
items['location'][2],
items['text'][3],
items['title'][4],
)
)
log.msg("Item stored in db: %s" % items, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
crawl code
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from craigs.items import CraigsItem
class MySpider(CrawlSpider):
name = "craigs"
f = open("urls.txt")
start_urls = [url.strip() for url in f.readlines()]
f.close()
rules = [Rule(SgmlLinkExtractor(restrict_xpaths=('/html/body/blockquote[3]/p/a',)), follow=True, callback='parse_profile')]
def parse_profile(self, response):
items = []
img = CraigsItem()
hxs = HtmlXPathSelector(response)
img['title'] = hxs.select('//h2[contains(#class, "postingtitle")]/text()').extract()
img['posting_id'] = hxs.select('//html/body/article/section/section[2]/div/p/text()').extract()
items.append(img)
return items[0]
return img[0]
settings.py
BOT_NAME = 'craigs'
BOT_VERSION = '1.0'
SPIDER_MODULES = ['craigs.spiders']
NEWSPIDER_MODULE = 'craigs.spiders'
USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
The reason why the pipeline code is not being called at all is because it hasn't been activated. This activation is done by adding a new section to settings.py, as per the Item Pipelines page in the documentation. e.g
ITEM_PIPELINES = [
'craigs.pipeline.SQLStorePipeline',
]
Additionally, your parse_profile function should just return img. You'd only add an items list to return if a single response page would result in multiple items.
activate Pipeline in settings and use yield instead of return
You should COMMIT the current transaction, which making changes permanent.
So after
tx.execute(\
"insert into scraped (posting_id, email, location, text, title) "
"values (%s, %s, %s, %s, %s)",
(items['posting_id'][0],
items['email'][1],
items['location'][2],
items['text'][3],
items['title'][4],
)
)
you have to
db.commit()
db here is something like
db = MySQLdb.connect(host="localhost",user = "root", passwd = "1234", db="database_name")
Please try it.