I'm trying to compare a date from a JSON extract and a date in a CSV file.
Even though I print both and dates and type are the same, the comparison always says they are different when there aren't.
I've already tried many things but nothing worked.
the line comparing is
if (item_in["Data_Hora"].encode('utf-8')).strip <> Last_Date[1].strip:
Here's the full code:
import json
import requests
import csv
import os
import itertools
def get_last_row(In_file):
with open(In_file,'rb') as f:
reader = csv.reader(f)
lastline = reader.next()
for line in reader:
lastline = line
print type (line)
return lastline
params = {
'api_key': 'tz-XmMtuYEVeeRjIdk6cqW1z',
}
r = requests.get(
'https://www.parsehub.com/api/v2/projects/tw5xOi-cVrMG-_vAZC_cX1QX/last_ready_run/data',
params=params)
json_object_in = r.content
data_in = json.loads(json_object_in)
for item_in in data_in["Estacao_Pluviometrica"]:
if item_in["Regiao"] != "Santa Felicidade": #Santa Felicidade possui duas Estações, excluida do processo enquanto não conseguir diferencia-las
# Altere abaixo com o diretório onde ficarão armazenados os arquivos "{diretório}/%s.csv"
Path = "/tmp/csv2/%s.csv" %item_in["Regiao"]
if os.path.isfile(Path): # Checa se já existe um arquivo para a região / Check if file already exist
OutFile = open(Path, 'a+')
Last_Date = get_last_row(Path)
# Check if last date is igual to actual - Error is here - comparison is always different even though data is the same.
if (item_in["Data_Hora"].encode('utf-8')).strip <> Last_Date[1].strip:
print "Nova entrada para %s" %item_in["Regiao"]
fieldnames = ["Regiao", "Data e Hora", "Chuva (mm)"]
CSVFile = csv.DictWriter (OutFile, fieldnames=fieldnames, delimiter=',', lineterminator = '\n')
CSVFile.writerow({"Regiao": item_in["Regiao"].encode('latin-1') , "Data e Hora": item_in["Data_Hora"] , "Chuva (mm)": item_in["Chuva_mm"]})
else: # When entry is the same as before
print "Entrada repetida para %s" %item_in["Regiao"]
OutFile.close()
else: #caso não exista, se cria um novo arquivo / In case there's no
print "Criando novo arquivo para regiao %s em %s." % (item_in["Regiao"], Path)
OutFile = open(Path, 'w')
fieldnames = ["Regiao", "Data e Hora", "Chuva (mm)"]
CSVFile = csv.DictWriter (OutFile, fieldnames=fieldnames, delimiter=',', lineterminator = '\n')
CSVFile.writeheader()
CSVFile.writerow({"Regiao": item_in["Regiao"].encode('latin-1') , "Data e Hora": item_in["Data_Hora"] , "Chuva (mm)": item_in["Chuva_mm"]})
OutFile.close()
I'm using Python 2.7.10 and IDLE
Thanks.
Related
I have a json that shows me its content in a treeview. Currently I have a function that selects the item and deletes it, but it only deletes it in the treeview and it is not updated in the json.
My question is how to call the json in this function to update and delete the selected item.
this function deletes the item in the treeview
def borrar_select():
borrar1 = json_tree.selection()[0]
json_tree.delete(borrar1)
I tried to call the json to read it and write in this function.
def borrar_select():
with open('prueba1.json', "r") as f:
data = json.load(f)
for record in data['Clientes']:
borrar1 = json_tree.selection()[0]
json_tree.delete(borrar1)
with open('prueba1.json', "w") as f:
json.dump(record,f,indent=4)
Actually, it deletes the selected row in the treeview but in the console I get the following error.
PS C:\Users\*\Desktop\Tkinter> & E:/Prog/Python3/Python311/python.exe c:/Users/*/Desktop/Tkinter/test1.py
Exception in Tkinter callback
Traceback (most recent call last):
File "E:\Prog\Python3\Python311\Lib\tkinter\__init__.py", line 1948, in __call__
return self.func(*args)
^^^^^^^^^^^^^^^^
File "c:\Users\*\Desktop\Tkinter\test1.py", line 102, in borrar_select
borrar1 = json_tree.selection()[0]
~~~~~~~~~~~~~~~~~~~~~^^^
IndexError: tuple index out of range
Consequently, the modification is not saved in the json either.
I use python very recently so I would like if someone can give me a hand to solve this.
This is a example of how it works
import json
from tkinter import ttk
from tkinter import *
import tkinter as tk
ventana = tk.Tk()
ventana.title("Test")
ventana.geometry("1000x600")
frame1 = tk.Frame(ventana, bg="green", height=300, width=700)
frame1.grid(row=1, column=0)
frame2 = tk.Frame(ventana, bg="yellow", height=300, width=700)
frame2.grid(row=2, column=0)
frame_entry = tk.Frame(frame2) #Frame para los entry
frame_entry.pack(pady=20)
tree_frame = tk.Frame(frame1) #Frame para el arbol
tree_frame.pack(pady=20)
#style del tree
style = ttk.Style()
style.theme_use("clam")
style.configure("Treeview", background="#c7c7c7", foreground="black", rowheight=25,fieldbackground="#a1a1a1")
style.map("Treeview", background=[('selected','green')])
tree_scroll = Scrollbar(tree_frame) #Frame para el scrollbar del arbol
tree_scroll.pack(side=RIGHT, fill=Y)
#Lista Treeview
json_tree = ttk.Treeview(tree_frame, yscrollcommand=tree_scroll.set)
json_tree.pack()
#config scroll
tree_scroll.config(command=json_tree.yview)
#Definir columnas
json_tree['column'] = ("Logo", "Name", "Last Name", "Something")
#Formato columnas
json_tree.column("#0", width=0, minwidth=0)#Columna Fantasma
json_tree.column("Logo", anchor="w", width=120)
json_tree.column("Name", anchor="w", width=120)
json_tree.column("Last Name", anchor="w", width=120)
json_tree.column("Something", anchor="w", width=120)
#headings
json_tree.heading("#0", text="", anchor="w")#Columna Fantasma
json_tree.heading("Logo", text="Logo", anchor="w")
json_tree.heading("Name", text="Name", anchor="w")
json_tree.heading("Last Name", text="Last Name", anchor="w")
json_tree.heading("Something", text="Something", anchor="w")
#color rows
json_tree.tag_configure('par', background="#fff")
json_tree.tag_configure('inpar', background="#d6d6d6")
#Abrir y leer json para acceder a las propiedades en el objeto
with open('prueba1.json', "r") as f:
data = json.load(f)
count=0
for record in data['Clientes']:
if count % 2 ==0:
json_tree.insert(parent='', index="end", iid=count, text="", values=(record['Logo'],record['Name'],record['Last Name'],record['Something']), tags=('par',))
else:
json_tree.insert(parent='', index="end", iid=count, text="", values=(record['Logo'],record['Name'],record['Last Name'],record['Something']), tags=('inpar',))
count+=1
#entrys
l1 = Label( frame_entry, text="Logo")
l1.grid(row=0, column=0)
logo_lb = Entry( frame_entry)
logo_lb.grid(row=1, column=0)
l2 = Label( frame_entry, text="Name")
l2.grid(row=0, column=1)
name_lb = Entry(frame_entry)
name_lb.grid(row=1, column=1)
l3 = Label( frame_entry, text="Last Name")
l3.grid(row=0, column=2)
lastname_lb = Entry(frame_entry)
lastname_lb.grid(row=1, column=2)
l4 = Label( frame_entry, text="Something")
l4.grid(row=0, column=3,)
something_lb = Entry(frame_entry)
something_lb.grid(row=1, column=3)
#funciones de los botones
def borrar_select():
with open('prueba1.json', "r") as f:
data = json.load(f)
for record in data['Clientes']:
borrar1 = json_tree.selection()[0]
json_tree.delete(borrar1)
with open('prueba1.json', "w") as f:
json.dump(record,f,indent=4)
#Limpiar las cajas
logo_lb.delete(0,END)
name_lb.delete(0,END)
lastname_lb.delete(0,END)
something_lb.delete(0,END)
borrar_btn = tk.Button(frame2, text="Delete", command=borrar_select)
borrar_btn.pack(side=RIGHT, ipadx=30, pady=10)
def select_record():
#Limpiar las cajas
logo_lb.delete(0,END)
name_lb.delete(0,END)
lastname_lb.delete(0,END)
something_lb.delete(0,END)
selected = json_tree.focus() #numero de la posicion del record en el tree
values = json_tree.item(selected,'values') #con la posicion seleccionada, toma el valor del mismo
logo_lb.insert(0, values[0])
name_lb.insert(0,values[1])
lastname_lb.insert(0,values[2])
something_lb.insert(0,values[3])
select_btn = tk.Button(frame2, text="Select", command=select_record)
select_btn.pack(side=LEFT, ipadx=30,)
ventana.mainloop()
{
"Clientes": [
{
"Logo": "C:/Users/*/Desktop/Tkinter/logos/selavalacarita.png",
"Name": "2",
"Last Name": "3",
"Something": "4"
},
{
"Logo": "C:/Users/*/Desktop/Tkinter/logos/selavalacarita.png",
"Name": "1",
"Last Name": "4",
"Something": "7"
}
]
}
The below code inside borrar_select():
with open('prueba1.json', "r") as f:
data = json.load(f)
for record in data['Clientes']:
borrar1 = json_tree.selection()[0]
json_tree.delete(borrar1)
will loop through all records inside JSON file and try to delete first selected item in the treeview in each iteration. If there is no item selected, it will raise exception because json_tree.selection() will return empty tuple. Even there is one item selected, exception will also be raised in second iteration because the selected item has been deleted.
You can simply remove the selected items using json_tree.delete(...) and then save the remaining items to file:
# funciones de los botones
def borrar_select():
selections = json_tree.selection()
if selections:
# remove selected items from treeview
json_tree.delete(*selections)
# get the records from treeview to as list of dictionaries
fields = ["Logo", "Name", "Last Name", "Something"]
records = [dict(zip(fields,json_tree.item(iid,"values"))) for iid in json_tree.get_children()]
# save the list of dictionaris to file
with open('prueba1.json', 'w') as f:
json.dump({'Clientes': records}, f, indent=4)
# Are below lines necessary?
#Limpiar las cajas
logo_lb.delete(0, END)
name_lb.delete(0, END)
lastname_lb.delete(0, END)
something_lb.delete(0, END)
The code:
#Import the required Module
import tabula
# Read a PDF File
df=tabula.read_pdf("C:/Users/Desktop/abstract/abstract.pdf",encoding='cp1252', pages='all')
#Total page number can change. All pages must be taken. (to be generic)
# convert PDF into CSV
df1=df.to_csv('C:/Users/Desktop/abstract.pdf')
print(df1)
Hello friends, I have a monthly account statement in pdf. I want to get the name and period information as text, save the date, info, amount, and gift information as CSV and read it.
I tried something called tabula read but couldn't get a file the way I wanted. In addition, there are special characters in the pdf file. These are also decoded incorrectly (ğ, ü, ç, etc.)
How can I get the format which I want? So mean I can't reach İşlem tarihi,Açıklama,Tutar,Bankomat Para columns in CSV file or Is there any better way to convert pdf to CSV file?
original version (2 pages):
original version end of the page:
The code:
#libraries
import pandas as pd
import fitz
import io
def set_texts(pdf_files:list):
print("starting to text process")
#This function reads pdf and gets "CRC-32" components as texts
for pdf_file in pdf_files:
with fitz.open(pdf_file) as doc:
text = ""
for page in doc:
new_text = page.get_text()
text += new_text
return text
file_names = [r"C:/Users/Desktop/abstract/Alışveris_Özeti.pdf"]
text = set_texts(file_names)
buffer = io.StringIO(text)
new_text = ""
flag = False
i = 0
for line in buffer.readlines():
# print(line)
if "Bankomat Para Bilgileriniz" in line:
flag = False
elif "Bankomat Para (TL)" in line:
flag = True
elif "Vakıfbank" in line:
flag = False
elif flag:
new_text += line
elif "Sayın" in line :
name=(line.lstrip("Sayın ")).replace(",","")
print(name)
buffer = io.StringIO(new_text)
text_list = buffer.readlines()
# /n correction
a = list(map(lambda x: x.replace('\n', ''), text_list))
# Converting 4 spaces to single space
b = list(map(lambda x: x.replace(' ', ' '), a))
# card vocabulay
c = list(map(lambda x: x.replace('BANKOMAT KART ', 'BANKOMAT KART'), b))
# undesired words
stopwords = ['BANKOMAT KART','İŞLEMİ', 'ALIŞVERİŞ EKSTRESİ', 'Dekont yerine kullanılmaz. Uyuşmazlık halinde Banka kayıtları esas alınacaktır', 'www.vakifbank.com.tr I 0850 222 0 724', 'Türkiye Vakıflar Bankası T.A.O. Büyük Mükellefler V.D. 9220034970 Sicil Numarası: 776444','Saray Mahallesi Dr. Adnan Büyükdeniz Caddesi No :7 / A-B Ümraniye /İSTANBUL Mersis: 0922003497000017','Saray Mahallesi Dr. Adnan Büyükdeniz Caddesi No :7 / A-B Ümraniye /İSTANBUL Mersis: 0922003497000017 Sf 2 \\ 3 ']
d = list(filter(lambda w: w not in stopwords, c))
e = list(map(lambda x: x.replace('CÜZDANDAN HESABA TRANSFER ', 'CÜZDANDAN HESABA TRANSFER İŞLEMİ'), d))
# Align the list elements by 4 in the appropriate order according to the 4 columns of the df
z=[]
for i in range(int(len(e)/4)):
y=((e[i*4:i*4+4]))
z.append(y)
df = pd.DataFrame( z,columns=['ISLEM TARIHI', 'ACIKLAMA','TUTAR', "BANKOMAT PARA"])
# creating csv file
is_file=os.path.isfile('C:/Users/Desktop/abstract/Alışveris_Özeti.csv')
if is_file==False:
df.to_csv("Alışveris_Özeti.csv", index=False)
print('CSV file has created...')
else:
print("CSV file already exists.")
I started using MySQL today. I am creating my own level system on a discord bot (I use discord.py) and I cannot import the number I need from my database.
import discord
import random
from discord import client
from discord.ext import commands
import mysql.connector
from discord.utils import get
from random import choice
token = 'token'
client = commands.Bot(command_prefix='°')
levelsystem_db = mysql.connector.connect(
host="localhost",
user="root",
passwd="pass",
database="userlevels",
auth_plugin="mysql_native_password"
)
#client.event
async def on_ready():
print('Bot online')
print(levelsystem_db)
#client.event
async def on_message(message):
if message.author.bot:
return
xp = generateXP()
print(f"{message.author.name} ha ricevuto {str(xp)} xp")
cursor = levelsystem_db.cursor()
cursor.execute(f"SELECT user_xp FROM users WHERE client_id = {str(message.author.id)}")
result = cursor.fetchall()
print(result)
print(len(result))
if (len(result) == 0):
print("L'utente non è stato aggiunto al database.")
cursor.execute(f"INSERT INTO users VALUES({str(message.author.id)} ,{str(xp)} , 0)")
levelsystem_db.commit()
print("Aggiunta completata")
await level_up(cursor, xp, message.author, message)
else:
newXP = result[0][0] + xp
print(f"Gli xp di {message.author.name} sono aggiornati a {newXP}")
cursor.execute(f"UPDATE users SET user_xp = {str(newXP)} WHERE client_id = {str(message.author.id)}")
levelsystem_db.commit()
print(f"Aggiornamento degli xs di {message.author.name} completato.")
await level_up(cursor, newXP, message.author, message)
def generateXP():
return random.randint(5,10)
async def level_up(cursor, NewXP, user, message):
cursor.execute(f"SELECT user_level FROM users WHERE client_id = {str(message.author.id)}")
lvl_start = cursor.fetchall()
lvl_end = int(NewXP ** (1/4))
print(str(lvl_start))
print(str(lvl_end))
if (str(lvl_start) < str(lvl_end)):
await message.channel.send(f"{user.mention} è salito al livello {lvl_end}")
print(f"Il livello di {message.author.name} si sta aggiornando al livello {lvl_end}")
cursor.execute(f"UPDATE users SET user_level = {str(lvl_end)} WHERE client_id = {str(message.author.id)}")
levelsystem_db.commit()
print(f"Aggiornamento del livello di {message.author.name} completato.")
else:
print("Non è abbastanza!")
pass
The part that gives me problems is this:
cursor.execute(f"SELECT user_level FROM users WHERE client_id = {str(message.author.id)}")
lvl_start = cursor.fetchall()
lvl_end = int(NewXP ** (1/4))
print(str(lvl_start))
print(str(lvl_end))
I would like the lvl_start variable to bring me back the integer and the list variable.
I should get 0 from print (str (lvl_start)) not [(0,)].
I don't know if I made myself clear but I would like to solve this problem. Is there a way?
As much I have used mysql-python, it returns a list nested with tuples. And if we use it to select a single row, it returns a list nested with tuples of length 2 where first element is the row value and the second is blank. So, I suggest you to select at least 2 rows and then use nested list's indexing list lvl_start[0][0].
I'm extracting data from twitter and putting it into a csv, how do I separate it into different columns (userName, text) instead of having everything in a single row?
This is for Python 3.6.
import csv
import tweepy
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
search = tweepy.Cursor(api.search, q="#XXXXX", lang="es").items(5)
tweet = [[item.user.name.encode("utf-8"), item.text.encode("utf-8")] for item in search]
print(tweet)
with open('Hashtag_tweets.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(["userName", "text"])
writer.writerows(tweet)
pass
I expect the output of
LINE OF EXCEL:
"b'Irene Cuesta',b'RT #deustoEmprende: \xc2\xa1Tenemos nueva cr\xc3\xb3nica! Descubre c\xc3\xb3mo transcurrieron las sesiones sobre #gestiontiempo en #masterdual #emprendimientoen\xe2\x80\xa6'
to be
LINE 1 COLUMN 1:
Irene Cuesta
LINE 1 COLUMN 2:
RT #deustoEmprende: \xc2\xa1Tenemos nueva cr\xc3\xb3nica! Descubre c\xc3\xb3mo transcurrieron las sesiones sobre #gestiontiempo en #masterdual #emprendimientoen\xe2\x80\xa6
I am trying to parse all posts from a russian website(http://games4you.ucoz.ua/news/). I am using Python 2.7.9 and BeautifulSoup 4. I am working in PyCharm. I've tried a lot of things to make it work, but still I get this instead of russian text: '\u0421\u0442\u0440\u0430\u0442\u0435\u0433\u0456\u0457'
This is my code:
# Parsing information from games4you.ucoz.ua
# -*- coding: utf-8 -*-
import re
import csv
import urllib
from bs4 import BeautifulSoup
BASE_URL = "http://games4you.ucoz.ua/news/"
def get_html(url):
response = urllib.urlopen(url)
return response.read()
def get_page_count(html):
soup = BeautifulSoup(html)
paggination = soup.find('div', class_='catPages1')
return int(paggination.find_all('a')[-2].text)
def save(games, path):
# with open(path, 'w') as csvfile:
# writer = csv.writer(csvfile)
#
# writer.writerow(('Title', 'Category', 'Date', 'Time'))
#
# writer.writerows(
# (game['title'], ', '.join(game['category']), game['date'], game['time']) for game in games
# )
with open(path,'w+') as f:
f.write(str(games).encode("UTF-8"))
def parse(html):
soup = BeautifulSoup(html)
# Getting the <div> that contains all posts on page
all_entries = soup.find('div',id='allEntries')
# Getting all of the posts (every table represents one post)
tables = all_entries.find_all('table',class_='eBlock')
# Creating a list o dictionaries for games information
games = []
for table in tables:
try:
# Getting the game title
game_title = table.tr.td.a.text
game_post_body = table.find('div',class_='eMessage')
# Getting the game description
game_description = game_post_body.p.text.split('....')[0] + '.'
game_details = table.find('div',class_='eDetails')
# Getting the game category
game_category = game_details.a.text
game_post_details = game_details.text
except:
print 'Some error'
continue
# Getting the post views count
post_views = game_post_details[:game_post_details.find('function')].split()[-2]
# Getting the post date
post_date = game_details.span.text
# Getting the post time
post_time = game_details.span['title']
# print 'Game title: ',game_title,'\n'
# print 'Views: ',post_views,'\n'
# print 'Game category: ',game_category,'\n'
# print 'Game description: ','\n',game_description,'\n'
# print 'Post date: ',post_date,'\n'
# print 'Post time: ',post_time,'\n'
games.append({
'title': game_title,
'category' : game_category,
'description' : game_description,
'date' : post_date,
'time' : post_time
})
return games
def main():
total_pages = get_page_count(get_html(BASE_URL))
print('Total found %d pages...' % total_pages)
games = []
for page in range(1, total_pages + 1):
print('Parsing %d%% (%d/%d)' % (page / total_pages * 100, page, total_pages))
games.extend(parse(get_html(BASE_URL + "?page%d" % page)))
print('Saving...')
save(games, 'games.txt')
main()
in Python2
>>> import HTMLParser
>>> s = 'Ell és la víctima que expia els nostres pecats, i no tan sols els nostres, sinó els del món sencer.'
>>> print HTMLParser.HTMLParser().unescape(s)
Ell és la víctima que expia els nostres pecats, i no tan sols els nostres, sinó els del món sencer.
in Python3
>>> import html
>>> html.unescape(s)
your example
'Стратегії'
For "normal" utf-8 file writing(reading) use
import codecs
f = codecs.open(filename, 'w', 'utf-8')
hope this helps
Yes, I did it! Guess I messed up with the decoding/coding text and using different charsets. Everything I had to do is simply convert the data I get from BeautifulSoup from Unicode to Utf-8, like this:
game_title = game_title.encode("utf-8")
game_category = game_category.encode("utf-8")
game_description = game_description.encode("utf-8")
post_date = post_date.encode("utf-8")
post_time = post_time.encode("utf-8")
Nothing else was needed. This is the result code that worked for me:
# Parsing information from games4you.ucoz.ua
import csv
import urllib
from bs4 import BeautifulSoup
BASE_URL = "http://games4you.ucoz.ua/news/"
def get_html(url):
response = urllib.urlopen(url)
return response.read()
def get_page_count(html):
soup = BeautifulSoup(html)
paggination = soup.find('div', class_='catPages1')
return int(paggination.find_all('a')[-2].text)
def save(games, path):
with open(path, 'w+') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(('Title', 'Category', 'Date', 'Time'))
writer.writerows(
(game['title'],game['category'], game['date'], game['time']) for game in games
)
def parse(html):
soup = BeautifulSoup(html)
# Getting the <div> that contains all posts on page
all_entries = soup.find('div',id='allEntries')
# Getting all of the posts (every table represents one post)
tables = all_entries.find_all('table',class_='eBlock')
# Creating a list o dictionaries for games information
games = []
for table in tables:
try:
# Getting the game title
game_title = table.tr.td.a.text
game_post_body = table.find('div',class_='eMessage')
# Getting the game description
game_description = game_post_body.p.text.split('....')[0] + '.'
game_details = table.find('div',class_='eDetails')
# Getting the game category
game_category = game_details.a.text
game_post_details = game_details.text
except:
print 'Some error'
continue
# Getting the post views count
post_views = game_post_details[:game_post_details.find('function')].split()[-2]
# Getting the post date
post_date = game_details.span.text
# Getting the post time
post_time = game_details.span['title']
# Converting all data from Unicode to Utf-8
game_title = game_title.encode("utf-8")
game_category = game_category.encode("utf-8")
game_description = game_description.encode("utf-8")
post_date = post_date.encode("utf-8")
post_time = post_time.encode("utf-8")
# Writing data to the list
games.append({
'title': game_title,
'category' : game_category,
'description' : game_description,
'date' : post_date,
'time' : post_time
})
return games
def main():
total_pages = get_page_count(get_html(BASE_URL))
print('Total found %d pages...' % total_pages)
games = []
for page in range(1, total_pages + 1):
print('Parsing %d%% (%d/%d)' % (page / total_pages * 100, page, total_pages))
games.extend(parse(get_html(BASE_URL + "?page%d" % page)))
print('Saving...')
save(games, 'games.csv')
main()