How to change style of hyperlinks in existing document, especially font size and name - python-docx

I would like to stick with python-docx library. But if there are other ways would love to hear them.
Currently i'm using this snippet for changing style in document, but links are no affected. Should I recreate them?
from docx import Document
from docx.shared import Pt
document = Document("existing file.docx")
for paragraph in document.paragraphs:
# paragraph.style = document.styles['Normal']
for run in paragraph.runs:
run.font.name = 'Arial'
run.font.size = Pt(10)
Snippet is from this answer.

from docx.oxml.shared import qn
This function is copy-paste from here. Because paragraph.runs is not returning Run instances for hyperlinks childrens.
def getParagraphRuns(paragraph):
def _get(node, parent):
for child in node:
if child.tag == qn('w:r'):
yield Run(child, parent)
if child.tag == qn('w:hyperlink'):
yield from _get(child, parent)
return list(_get(paragraph._element, paragraph))
If you want to get sequence of Run instances only for hyperlinks use this function instead.
def getHyperlinksRuns(paragraph):
def _get(node, parent):
for child in node:
if child.tag == qn('w:hyperlink'):
yield from returnRun(child, parent)
def returnRun(node, parent):
for child in node:
if child.tag == qn('w:r'):
yield Run(child, parent)
return list(_get(paragraph._element, paragraph))
and voila
for p in document.paragraphs:
runs = getParagraphRuns(p)
# runs = getHyperlinksRuns(p)
for run in runs:
run.font.name = 'Arial'
run.font.size = Pt(10)

Related

How to scrape only texts from specific HTML elements?

I have a problem with selecting the appropriate items from the list.
For example - I want to omit "1." then the first "5" (as in the example)
Additionally, I would like to write a condition that the letter "W" should be changed to "WIN".
import re
from selenium import webdriver
from bs4 import BeautifulSoup as BS2
from time import sleep
driver = webdriver.Chrome()
driver.get("https://www.flashscore.pl/druzyna/ajax/8UOvIwnb/tabela/")
sleep(10)
page = driver.page_source
soup = BS2(page,'html.parser')
content = soup.find('div',{'class':'ui-table__body'})
content_list = content.find_all('span',{"table__cell table__cell--value"})
res = []
for i in content:
line = i.text.split()[0]
if re.search('Ajax', line):
res.append(line)
print(res)
results
['1.Ajax550016:315?WWWWW']
I need
Ajax;5;5;0;16;3;W;W;W;W;W
I would recommend to select your elements more specific:
for e in soup.select('.ui-table__row'):
Iterate the ResultSet and decompose() unwanted tag:
e.select_one('.wld--tbd').decompose()
Extract texts with stripped_strings and join() them to your expected string:
data.append(';'.join(e.stripped_strings))
Example
Also making some replacements, based on dict just to demonstrate how this would work, not knowing R or P.
...
soup = BS2(page,'html.parser')
data = []
for e in soup.select('.ui-table__row'):
e.select_one('.wld--tbd').decompose()
e.select_one('.tableCellRank').decompose()
e.select_one('.table__cell--points').decompose()
e.select_one('.table__cell--score').string = ';'.join(e.select_one('.table__cell--score').text.split(':'))
pattern = {'W':'WIN','R':'RRR','P':'PPP'}
data.append(';'.join([pattern.get(i,i) for i in e.stripped_strings]))
data
To only get result for Ajax:
data = []
for e in soup.select('.ui-table__row:-soup-contains("Ajax")'):
e.select_one('.wld--tbd').decompose()
e.select_one('.tableCellRank').decompose()
e.select_one('.table__cell--points').decompose()
e.select_one('.table__cell--score').string = ';'.join(e.select_one('.table__cell--score').text.split(':'))
pattern = {'W':'WIN','R':'RRR','P':'PPP'}
data.append(';'.join([pattern.get(i,i) for i in e.stripped_strings]))
data
Output
Based on actually data it may differ from questions example.
['Ajax;6;6;0;0;21;3;WIN;WIN;WIN;WIN;WIN']
you had the right start by using bs4 to find the table div, but then you gave up and just tried to use re to extract from the text. as you can see that's not going to work. Here is a simple way to hack and get what you want. I keep grabinn divs from the table div you find, and the grab the text of the next eight divs after finding Ajax. then I do some dirty string manipulation thing because the WWWWW is all in the same toplevel div.
import re
from selenium import webdriver
from bs4 import BeautifulSoup as BS2
from time import sleep
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
#driver = webdriver.Chrome()
driver.get("https://www.flashscore.pl/druzyna/ajax/8UOvIwnb/tabela/")
driver.implicitly_wait(10)
page = driver.page_source
soup = BS2(page,'html.parser')
content = soup.find('div',{'class':'ui-table__body'})
content_list = content.find_all('span',{"table__cell table__cell--value"})
res = []
found = 0
for i in content.find('div'):
line = i.text.split()[0]
if re.search('Ajax', line):
found = 8
if found:
found -= 1
res.append(line)
# change field 5 into separate values and skip field 6
res = res[:4] +res[5].split(':') + res[7:]
# break the last field into separate values and drop the first '?'
res = res[:-1] + [ i for i in res[-1]][1:]
print(";".join(res))
returns
Ajax;5;5;0;16;3;W;W;W;W;W
This works, but it is very brittle, and will break as soon as the website changes their content. you should put in a lot of error checking. I also replaced the sleep with a wait call, and added chromedrivermamager, which allows me to use selenium with chrome.

In Gtk, how to make a window smaller when creating

I am trying to display both an image and a box with an Entry widget. I can do that, but the window is so large that the widget at the bottom is mostly out of view. I have tried several calls to set the window's size or unmaximize it, but they seem to have no effect. I determined that the problem only occurs when the image is large, but still wonder how to display a large image in a resizable window or, for that matter, to make any changes to the window's geometry from code. All the function call I tried seem to have no effect.
Here is my code:
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk
from gi.repository import GdkPixbuf
from urllib.request import urlopen
class Display(object):
def __init__(self):
self.window = Gtk.Window()
self.window.connect('destroy', self.destroy)
self.window.set_border_width(10)
# a box underneath would be added every time you do
# vbox.pack_start(new_widget)
vbox = Gtk.VBox()
self.image = Gtk.Image()
response = urlopen('http://1.bp.blogspot.com/-e-rzcjuCpk8/T3H-mSry7PI/AAAAAAAAOrc/Z3XrqSQNrSA/s1600/rubberDuck.jpg').read()
pbuf = GdkPixbuf.PixbufLoader()
pbuf.write(response)
pbuf.close()
self.image.set_from_pixbuf(pbuf.get_pixbuf())
self.window.add(vbox)
vbox.pack_start(self.image, False, False, 0)
self.entry = Gtk.Entry()
vbox.pack_start(self.entry, True,True, 0)
self.image.show()
self.window.show_all()
def main(self):
Gtk.main()
def destroy(self, widget, data=None):
Gtk.main_quit()
a=Display()
a.main()
Most of the posted information seems to pertain to Gtk2 rather than Gtk3, but there is a solution: to use a pix buf loader and set the size:
from gi.repository import Gtk, Gdk, GdkPixbuf
#more stuff
path = config['DEFAULT']['datasets']+'working.png'
with open(path,'rb') as f:
pixels = f.read()
loader = GdkPixbuf.PixbufLoader()
loader.set_size(400,400)
loader.write(pixels)
pb = GdkPixbuf.Pixbuf.new_from_file(path)
self.main_image.set_from_pixbuf(loader.get_pixbuf())
loader.close()

how to get mouse position in

I am a new pyqtgraph users,try to "Embedding widgets inside PyQt applications"following the instructions in http://www.pyqtgraph.org/documentation/how_to_use.html. in my example I promote Graphics view to PlotWidget, then save as "test2.ui", also follow the "crosshair/mouse interaction" example,my code:
import sys
import numpy
from PyQt5 import QtCore, QtGui,uic,QtWidgets
from PyQt5.QtWidgets import *
import pyqtgraph as pg
import os
hw,QtBaseClass=uic.loadUiType("test.ui")
def gaussian(A, B, x):
return A * numpy.exp(-(x / (2. * B)) ** 2.)
class MyApp(QtWidgets.QMainWindow, hw):
def __init__(self):
super().__init__()
self.setupUi(self)
winSize=self.size()
self.view.resize(winSize.width(),winSize.height())
x = numpy.linspace(-5., 5., 10000)
y =gaussian(5.,0.2, x)
self.p=self.view.plot(x,y)
proxy = pg.SignalProxy(self.view.scene().sigMouseMoved, rateLimit=60, slot=self.mouseMoved)
self.view.enableAutoRange("xy", True)
def mouseMoved(evt):
print("mouseTest")
mousePoint = self.p.vb.mapSceneToView(evt[0])
label.setText(
"<span style='font-size: 14pt; color: white'> x = %0.2f, <span style='color: white'> y = %0.2f</span>" % (
mousePoint.x(), mousePoint.y()))
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
window = MyApp()
window.show()
sys.exit(app.exec_())
it seems not get the mouse move event;
after change
proxy = pg.SignalProxy(self.view.scene().sigMouseMoved, rateLimit=60, slot=self.mouseMoved)
to
self.view.scene().sigMouseMoved.connect(self.mouseMoved),
output"MouseTest",but program imediatly crash.
can any one give me some help
Two things:
Re: Crashing
It seems as if you haven't placed a label in the GUI to modify, perhaps your code is seeing this and kicks it back to you. If you're using qtDesigner, it is likely defined as self.label, and in my GUI, I was required to use self.label to reference it.
Re: mouseMoved function
I was just struggling with a similar issue of it not working. I was able to get mine to work by changing the evt[0] to simply evt, something I think they moved to from pyqt4 to pyqt5.
Here's an example of what I was able to get to work:
..........setup code above... IN THE setupUi function:
..........setup code above...
Plotted = self.plot
vLine = pg.InfiniteLine(angle=90, movable=False)
hLine = pg.InfiniteLine(angle=0, movable=False)
Plotted.addItem(vLine, ignoreBounds=True)
Plotted.addItem(hLine, ignoreBounds=True)
Plotted.setMouseTracking(True)
Plotted.scene().sigMouseMoved.connect(self.mouseMoved)
def mouseMoved(self,evt):
pos = evt
if self.plot.sceneBoundingRect().contains(pos):
mousePoint = self.plot.plotItem.vb.mapSceneToView(pos)
self.label.setText("<span style='font-size: 15pt'>X=%0.1f, <span style='color: black'>Y=%0.1f</span>" % (mousePoint.x(),mousePoint.y()))
self.plot.plotItem.vLine.setPos(mousePoint.x())
self.plot.plotItem.hLine.setPos(mousePoint.y()
...the if__name__ =="__main__": function .....
In my case, I did not pass the proxy statement, and instead just went for the sigMouseMoved since it already passes the information the proxy would. I think this was in the example in pyqt5 (and commented out) because it was the change. However, the comment didn't specifically state this.

IPython notebook widgets using interactive

I'm having trouble creating widgets in a Jupyter notebook that update when other widget values are changed. This is the code I've been playing around with:
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
from IPython.display import display
def func(arg1,arg2):
print arg1
print arg2
choice = widgets.ToggleButtons(description='Choice:',options=['A','B'])
display(choice)
metric = widgets.Dropdown(options=['mercury','venus','earth'],description='Planets:')
text = widgets.Text(description='Text:')
a = interactive(func,
arg1=metric,
arg2=text,
__manual=True)
def update(*args):
if choice.value == 'A':
metric = widgets.Dropdown(options=['mercury','venus','earth'],description='Planets:')
text = widgets.Text(description='Text:')
a.children = (metric,text)
else:
metric = widgets.Dropdown(options=['monday','tuesday','wednesday'],description='Days:')
text2 = widgets.Textarea(description='Text2:')
a.children = (metric,text2)
choice.observe(update,'value')
display(a)
The resulting widgets metric and text do change based whether A or B is selected, but the problem is that the "Run func" button goes away as soon as I change to B. I've tried adding the __manual attribute immediately before display(a), adding it within update, and several other places. How do I change the children of the widget box without overwriting the fact that I want to manually run the function?

How do I rerender HTML PyQt4

I have managed to use suggested code in order to render HTML from a webpage and then parse, find and use the text as wanted. I'm using PyQt4. However, the webpage I am interested in is updated frequently and I want to rerender the page and check the updated HTML for new info.
I thus have a loop in my pythonscript so that I sort of start all over again. However, this makes the program crash. I have searched the net and found out that this is to be expected, but I have not found any suggestion on how to do it correctly. It must be simple, I guess?
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Render (QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
r = Render(url)
html = r.frame.toHtml()
S,o when I hit r=Render(url) the second time, it crashes. S,o I am looking for something like r = Rerender(url).
As you might guess, I am not much of a programmer, and I usually get by by stealing code I barely understand. But this is the first time I can't find an answer, so I thought I should ask a question myself.
I hope my question is clear enough and that someone has the answer.
Simple demo (adapt to taste):
import sys, signal
from PyQt4 import QtCore, QtGui, QtWebKit
class WebPage(QtWebKit.QWebPage):
def __init__(self, url):
super(WebPage, self).__init__()
self.url = url
self.mainFrame().loadFinished.connect(self.handleLoadFinished)
self.refresh()
def refresh(self):
self.mainFrame().load(QtCore.QUrl(self.url))
def handleLoadFinished(self):
print('Loaded:', self.mainFrame().url().toString())
# do stuff with html ...
print('Reloading in 3 seconds...\n')
QtCore.QTimer.singleShot(2000, self.refresh)
if __name__ == '__main__':
signal.signal(signal.SIGINT, signal.SIG_DFL)
app = QtGui.QApplication(sys.argv)
webpage = WebPage('http://en.wikipedia.org/')
print('Press Ctrl+C to quit\n')
sys.exit(app.exec_())