Writing prints in a txt file - python-itertools

new some help here. I need the printed text in a .txt file instead of showing in run.
Cant seem to figure this thing out...
import random
import string
import itertools
def randomString(stringLength=3):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(stringLength))
num = 10
for i in itertools.repeat(num):
print(randomString(3) + str(random.randint(10, 99)) + (randomString(3)))

Related

How to scrape only texts from specific HTML elements?

I have a problem with selecting the appropriate items from the list.
For example - I want to omit "1." then the first "5" (as in the example)
Additionally, I would like to write a condition that the letter "W" should be changed to "WIN".
import re
from selenium import webdriver
from bs4 import BeautifulSoup as BS2
from time import sleep
driver = webdriver.Chrome()
driver.get("https://www.flashscore.pl/druzyna/ajax/8UOvIwnb/tabela/")
sleep(10)
page = driver.page_source
soup = BS2(page,'html.parser')
content = soup.find('div',{'class':'ui-table__body'})
content_list = content.find_all('span',{"table__cell table__cell--value"})
res = []
for i in content:
line = i.text.split()[0]
if re.search('Ajax', line):
res.append(line)
print(res)
results
['1.Ajax550016:315?WWWWW']
I need
Ajax;5;5;0;16;3;W;W;W;W;W
I would recommend to select your elements more specific:
for e in soup.select('.ui-table__row'):
Iterate the ResultSet and decompose() unwanted tag:
e.select_one('.wld--tbd').decompose()
Extract texts with stripped_strings and join() them to your expected string:
data.append(';'.join(e.stripped_strings))
Example
Also making some replacements, based on dict just to demonstrate how this would work, not knowing R or P.
...
soup = BS2(page,'html.parser')
data = []
for e in soup.select('.ui-table__row'):
e.select_one('.wld--tbd').decompose()
e.select_one('.tableCellRank').decompose()
e.select_one('.table__cell--points').decompose()
e.select_one('.table__cell--score').string = ';'.join(e.select_one('.table__cell--score').text.split(':'))
pattern = {'W':'WIN','R':'RRR','P':'PPP'}
data.append(';'.join([pattern.get(i,i) for i in e.stripped_strings]))
data
To only get result for Ajax:
data = []
for e in soup.select('.ui-table__row:-soup-contains("Ajax")'):
e.select_one('.wld--tbd').decompose()
e.select_one('.tableCellRank').decompose()
e.select_one('.table__cell--points').decompose()
e.select_one('.table__cell--score').string = ';'.join(e.select_one('.table__cell--score').text.split(':'))
pattern = {'W':'WIN','R':'RRR','P':'PPP'}
data.append(';'.join([pattern.get(i,i) for i in e.stripped_strings]))
data
Output
Based on actually data it may differ from questions example.
['Ajax;6;6;0;0;21;3;WIN;WIN;WIN;WIN;WIN']
you had the right start by using bs4 to find the table div, but then you gave up and just tried to use re to extract from the text. as you can see that's not going to work. Here is a simple way to hack and get what you want. I keep grabinn divs from the table div you find, and the grab the text of the next eight divs after finding Ajax. then I do some dirty string manipulation thing because the WWWWW is all in the same toplevel div.
import re
from selenium import webdriver
from bs4 import BeautifulSoup as BS2
from time import sleep
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
#driver = webdriver.Chrome()
driver.get("https://www.flashscore.pl/druzyna/ajax/8UOvIwnb/tabela/")
driver.implicitly_wait(10)
page = driver.page_source
soup = BS2(page,'html.parser')
content = soup.find('div',{'class':'ui-table__body'})
content_list = content.find_all('span',{"table__cell table__cell--value"})
res = []
found = 0
for i in content.find('div'):
line = i.text.split()[0]
if re.search('Ajax', line):
found = 8
if found:
found -= 1
res.append(line)
# change field 5 into separate values and skip field 6
res = res[:4] +res[5].split(':') + res[7:]
# break the last field into separate values and drop the first '?'
res = res[:-1] + [ i for i in res[-1]][1:]
print(";".join(res))
returns
Ajax;5;5;0;16;3;W;W;W;W;W
This works, but it is very brittle, and will break as soon as the website changes their content. you should put in a lot of error checking. I also replaced the sleep with a wait call, and added chromedrivermamager, which allows me to use selenium with chrome.

Bokeh: Slider is not updating results on Hbar plot

I wrote the following code for using a slider to filter and update values on a Hbar plot in Bokeh.
The plot (as shown in the picture) outputs correctly, but when I move the slider nothing happens.
I'd greatly appreciate any feedback.
import pandas as pd
from bokeh.core.properties import value
from IPython.display import display, HTML
from bokeh.plotting import figure, show
from bokeh.layouts import row, column, gridplot
from bokeh.io import output_notebook, save, curdoc
from bokeh.models import ColumnDataSource, HoverTool, DatetimeTickFormatter, FactorRange, DataTable, TableColumn, DateFormatter
from bokeh.models.widgets import Panel, Tabs, Slider
import matplotlib.pyplot as plt
xls=pd.ExcelFile(path)
test_data=pd.read_excel(xls, 'test_data')
display(test_data)
AREA counts
A 500
B 100
C 70
D 50
E 40
F 20
G 10
H 2
def myplot(doc):
source = ColumnDataSource(pd.DataFrame(data=test_data))
area_list=source.data['AREA'].tolist()[::-1]
# Creating the Bar Chart
p = figure(y_range=area_list ,plot_height=500, plot_width=500, title="Total counts per area",
x_axis_label='counts', y_axis_label='AREA')
p.hbar(y='AREA', right='counts', height=1,
line_color="black", fill_color='red',line_width=1,
source=source)
def update_plot(attr, old, new):
Number_of_counts = slider.value
new_data = test_data.loc[test_data['counts'] >=Number_of_counts]
source = ColumnDataSource(data=new_data)
# Make a slider object: slider
slider = Slider(start=1, end=100, step=1, value=1, title='counts')
# Attach the callback to the 'value' property of slider
slider.on_change('value', update_plot)
doc.add_root(column(slider, p))
show(myplot)
You're replacing the value of the source variable, but the old source is still there, being used by all the created models.
Instead of recreating the source, try to reassign the data attribute of the old source:
# source = ColumnDataSource(data=new_data)
source.data = ColumnDataSource.from_df(new_data)

pandas dataframe to_html formatters - can't display image

Im trying to add an up and down arrow to a pandas data frame with to_html for an email report.
I'm using a lambda function to input an up and down arrow onto column values in my data frame, I know the image html works ok becasue I can put it in the body of the email and it works fine but when I use this function and the pandas formatter it outputs like in the below image (ps. i know the CID is different to whats in my function, I was just tesing something)
Any one have any idea why? Or anyone have a better way to do it?
call:
worst_20_accounts.to_html(index=False,formatters={'Last Run Rank Difference': lambda x: check_html_val_for_arrow(x)}))
function:
def check_html_val_for_arrow(x):
try:
if x > 0:
return str(x) + ' <img src="cid:image7">'
elif x < 0:
return str(x) + ' <img src="cid:image8">'
else:
return str(x)
except:
return str(x)
escape=False
By default, the pandas.DataFrame.to_html method escapes any html in the dataframe's values.
my_img_snippet = (
"<img src='https://www.pnglot.com/pngfile/detail/"
"208-2086079_right-green-arrow-right-green-png-arrow.png'>"
)
df = pd.DataFrame([[my_img_snippet]])
Then
from IPython.display import HTML
HTML(df.to_html(escape=False))
style
You can let the styler object handle the rendering
df.style

Create footer in word docx by creating a footer.xml file in the word - folder of the docx.zip via python?

No idea how I can do this considering the (randomly?) generated rsids in the xml code, anyone has a solution?
from docx.text.run import Run
from docx import Document
doc = Document('/Users/cezi/Desktop/ME.docx')
p = doc.sections[0].footer.paragraphs[0]
for run in p.runs:
if ' ' in run.text:
new_run_element = p._element._new_r()
run._element.addprevious(new_run_element)
new_run = Run(new_run_element, run._parent)
new_run.text = "left"
new_run.add_tab()
new_run.add_text("Page")
p.add_run().add_tab()
p.add_run("right")
doc.save("HOW.docx")

How do I rerender HTML PyQt4

I have managed to use suggested code in order to render HTML from a webpage and then parse, find and use the text as wanted. I'm using PyQt4. However, the webpage I am interested in is updated frequently and I want to rerender the page and check the updated HTML for new info.
I thus have a loop in my pythonscript so that I sort of start all over again. However, this makes the program crash. I have searched the net and found out that this is to be expected, but I have not found any suggestion on how to do it correctly. It must be simple, I guess?
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Render (QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
r = Render(url)
html = r.frame.toHtml()
S,o when I hit r=Render(url) the second time, it crashes. S,o I am looking for something like r = Rerender(url).
As you might guess, I am not much of a programmer, and I usually get by by stealing code I barely understand. But this is the first time I can't find an answer, so I thought I should ask a question myself.
I hope my question is clear enough and that someone has the answer.
Simple demo (adapt to taste):
import sys, signal
from PyQt4 import QtCore, QtGui, QtWebKit
class WebPage(QtWebKit.QWebPage):
def __init__(self, url):
super(WebPage, self).__init__()
self.url = url
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
self.refresh()
def refresh(self):
self.mainFrame().load(QtCore.QUrl(self.url))
def handleLoadFinished(self):
print('Loaded:', self.mainFrame().url().toString())
# do stuff with html ...
print('Reloading in 3 seconds...\n')
QtCore.QTimer.singleShot(2000, self.refresh)
if __name__ == '__main__':
signal.signal(signal.SIGINT, signal.SIG_DFL)
app = QtGui.QApplication(sys.argv)
webpage = WebPage('http://en.wikipedia.org/')
print('Press Ctrl+C to quit\n')
sys.exit(app.exec_())