Python 2's webbrowser is not working in repl.it - google-chrome

I am using repl.it, and also the webbrowser module to open a link using webbrowser.open(<link to open>)
But, my link doesn't work. Can you help me with this?
Here is my code:
import webbrowser
webbrowser.open("https://www.daffodilday.com.au/get-involved/register-your-school/", 2)
And a not-so-quick overview of the webbrowser module:
import os, sys, shlex, stat, subprocess, time
__all__ = ["Error", "open", "open_new", "open_new_tab", "get", "register"]
class Error(Exception):
pass
_browsers = {}
_tryorder = []
def register(name, klass, instance=None, update_tryorder=1):
_browsers[name.lower()] = [klass, instance]
if update_tryorder > 0:
_tryorder.append(name)
elif update_tryorder < 0:
_tryorder.insert(0, name)
def get(using=None):
if using is not None:
alternatives = [using]
else:
alternatives = _tryorder
for browser in alternatives:
if '%s' in browser:
browser = shlex.split(browser)
if browser[-1] == '&':
return BackgroundBrowser(browser[:-1])
else:
return GenericBrowser(browser)
else:
try:
command = _browsers[browser.lower()]
except KeyError:
command = _synthesize(browser)
if command[1] is not None:
return command[1]
elif command[0] is not None:
return command[0]()
raise Error("could not locate runnable browser")
def open(url, new=0, autoraise=True):
for name in _tryorder:
browser = get(name)
if browser.open(url, new, autoraise):
return True
return False
def open_new(url):
return open(url, 1)
def open_new_tab(url):
return open(url, 2)
def _synthesize(browser, update_tryorder=1):
cmd = browser.split()[0]
if not _iscommand(cmd):
return [None, None]
name = os.path.basename(cmd)
try:
command = _browsers[name.lower()]
except KeyError:
return [None, None]
# now attempt to clone to fit the new name:
controller = command[1]
if controller and name.lower() == controller.basename:
import copy
controller = copy.copy(controller)
controller.name = browser
controller.basename = os.path.basename(browser)
register(browser, None, controller, update_tryorder)
return [None, controller]
return [None, None]
if sys.platform[:3] == "win":
def _isexecutable(cmd):
cmd = cmd.lower()
if os.path.isfile(cmd) and cmd.endswith((".exe", ".bat")):
return True
for ext in ".exe", ".bat":
if os.path.isfile(cmd + ext):
return True
return False
else:
def _isexecutable(cmd):
if os.path.isfile(cmd):
mode = os.stat(cmd)[stat.ST_MODE]
if mode & stat.S_IXUSR or mode & stat.S_IXGRP or mode & stat.S_IXOTH:
return True
return False
def _iscommand(cmd):
if _isexecutable(cmd):
return True
path = os.environ.get("PATH")
if not path:
return False
for d in path.split(os.pathsep):
exe = os.path.join(d, cmd)
if _isexecutable(exe):
return True
return False
class BaseBrowser(object):
"""Parent class for all browsers. Do not use directly."""
args = ['%s']
def __init__(self, name=""):
self.name = name
self.basename = name
def open(self, url, new=0, autoraise=True):
raise NotImplementedError
def open_new(self, url):
return self.open(url, 1)
def open_new_tab(self, url):
return self.open(url, 2)
class GenericBrowser(BaseBrowser):
def __init__(self, name):
if isinstance(name, basestring):
self.name = name
self.args = ["%s"]
else:
self.name = name[0]
self.args = name[1:]
self.basename = os.path.basename(self.name)
def open(self, url, new=0, autoraise=True):
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
if sys.platform[:3] == 'win':
p = subprocess.Popen(cmdline)
else:
p = subprocess.Popen(cmdline, close_fds=True)
return not p.wait()
except OSError:
return False
class BackgroundBrowser(GenericBrowser):
def open(self, url, new=0, autoraise=True):
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
if sys.platform[:3] == 'win':
p = subprocess.Popen(cmdline)
else:
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
p = subprocess.Popen(cmdline, close_fds=True, preexec_fn=setsid)
return (p.poll() is None)
except OSError:
return False
class UnixBrowser(BaseBrowser):
raise_opts = None
remote_args = ['%action', '%s']
remote_action = None
remote_action_newwin = None
remote_action_newtab = None
background = False
redirect_stdout = True
def _invoke(self, args, remote, autoraise):
raise_opt = []
if remote and self.raise_opts:
# use autoraise argument only for remote invocation
autoraise = int(autoraise)
opt = self.raise_opts[autoraise]
if opt: raise_opt = [opt]
cmdline = [self.name] + raise_opt + args
if remote or self.background:
inout = file(os.devnull, "r+")
else:
inout = None
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
p = subprocess.Popen(cmdline, close_fds=True, stdin=inout,
stdout=(self.redirect_stdout and inout or None),
stderr=inout, preexec_fn=setsid)
if remote:
time.sleep(1)
rc = p.poll()
if rc is None:
time.sleep(4)
rc = p.poll()
if rc is None:
return True
return not rc
elif self.background:
if p.poll() is None:
return True
else:
return False
else:
return not p.wait()
def open(self, url, new=0, autoraise=True):
if new == 0:
action = self.remote_action
elif new == 1:
action = self.remote_action_newwin
elif new == 2:
if self.remote_action_newtab is None:
action = self.remote_action_newwin
else:
action = self.remote_action_newtab
else:
raise Error("Bad 'new' parameter to open(); " +
"expected 0, 1, or 2, got %s" % new)
args = [arg.replace("%s", url).replace("%action", action)
for arg in self.remote_args]
success = self._invoke(args, True, autoraise)
if not success:
args = [arg.replace("%s", url) for arg in self.args]
return self._invoke(args, False, False)
else:
return True
class Mozilla(UnixBrowser):
raise_opts = ["-noraise", "-raise"]
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-tab"
background = True
Netscape = Mozilla
class Galeon(UnixBrowser):
raise_opts = ["-noraise", ""]
remote_args = ['%action', '%s']
remote_action = "-n"
remote_action_newwin = "-w"
background = True
class Chrome(UnixBrowser):
remote_args = ['%action', '%s']
remote_action = ""
remote_action_newwin = "--new-window"
remote_action_newtab = ""
background = True
Chromium = Chrome
class Opera(UnixBrowser):
raise_opts = ["-noraise", ""]
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-page"
background = True
class Elinks(UnixBrowser):
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-tab"
background = False
redirect_stdout = False
class Konqueror(BaseBrowser):
def open(self, url, new=0, autoraise=True):
if new == 2:
action = "newTab"
else:
action = "openURL"
devnull = file(os.devnull, "r+")
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
try:
p = subprocess.Popen(["kfmclient", action, url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull)
except OSError:
pass
else:
p.wait()
return True
try:
p = subprocess.Popen(["konqueror", "--silent", url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull,
preexec_fn=setsid)
except OSError:
pass
else:
if p.poll() is None:
return True
try:
p = subprocess.Popen(["kfm", "-d", url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull,
preexec_fn=setsid)
except OSError:
return False
else:
return (p.poll() is None)
class Grail(BaseBrowser):
def _find_grail_rc(self):
import glob
import pwd
import socket
import tempfile
tempdir = os.path.join(tempfile.gettempdir(),
".grail-unix")
user = pwd.getpwuid(os.getuid())[0]
filename = os.path.join(tempdir, user + "-*")
maybes = glob.glob(filename)
if not maybes:
return None
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
for fn in maybes:
# need to PING each one until we find one that's live
try:
s.connect(fn)
except socket.error:
# no good; attempt to clean it out, but don't fail:
try:
os.unlink(fn)
except IOError:
pass
else:
return s
def _remote(self, action):
s = self._find_grail_rc()
if not s:
return 0
s.send(action)
s.close()
return 1
def open(self, url, new=0, autoraise=True):
if new:
ok = self._remote("LOADNEW " + url)
else:
ok = self._remote("LOAD " + url)
return ok
def register_X_browsers():
if _iscommand("xdg-open"):
register("xdg-open", None, BackgroundBrowser("xdg-open"))
if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gvfs-open"):
register("gvfs-open", None, BackgroundBrowser("gvfs-open"))
if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gnome-open"):
register("gnome-open", None, BackgroundBrowser("gnome-open"))
if "KDE_FULL_SESSION" in os.environ and _iscommand("kfmclient"):
register("kfmclient", Konqueror, Konqueror("kfmclient"))
if _iscommand("x-www-browser"):
register("x-www-browser", None, BackgroundBrowser("x-www-browser"))
for browser in ("mozilla-firefox", "firefox",
"mozilla-firebird", "firebird",
"iceweasel", "iceape",
"seamonkey", "mozilla", "netscape"):
if _iscommand(browser):
register(browser, None, Mozilla(browser))
if _iscommand("kfm"):
register("kfm", Konqueror, Konqueror("kfm"))
elif _iscommand("konqueror"):
register("konqueror", Konqueror, Konqueror("konqueror"))
for browser in ("galeon", "epiphany"):
if _iscommand(browser):
register(browser, None, Galeon(browser))
if _iscommand("skipstone"):
register("skipstone", None, BackgroundBrowser("skipstone"))
for browser in ("google-chrome", "chrome", "chromium", "chromium-browser"):
if _iscommand(browser):
register(browser, None, Chrome(browser))
if _iscommand("opera"):
register("opera", None, Opera("opera")).
if _iscommand("mosaic"):
register("mosaic", None, BackgroundBrowser("mosaic")
if _iscommand("grail"):
register("grail", Grail, None)
if os.environ.get("DISPLAY"):
register_X_browsers()
if os.environ.get("TERM"):
if _iscommand("www-browser"):
register("www-browser", None, GenericBrowser("www-browser"))
if _iscommand("links"):
register("links", None, GenericBrowser("links"))
if _iscommand("elinks"):
register("elinks", None, Elinks("elinks"))
if _iscommand("lynx"):
register("lynx", None, GenericBrowser("lynx"))
if _iscommand("w3m"):
register("w3m", None, GenericBrowser("w3m"))
if sys.platform[:3] == "win":
class WindowsDefault(BaseBrowser):
def open(self, url, new=0, autoraise=True):
try:
os.startfile(url)
except WindowsError:
return False
else:
return True
_tryorder = []
_browsers = {}
register("windows-default", WindowsDefault)
iexplore = os.path.join(os.environ.get("PROGRAMFILES", "C:\\Program Files"),
"Internet Explorer\\IEXPLORE.EXE")
for browser in ("firefox", "firebird", "seamonkey", "mozilla",
"netscape", "opera", iexplore):
if _iscommand(browser):
register(browser, None, BackgroundBrowser(browser))
if sys.platform == 'darwin':
class MacOSX(BaseBrowser):
def __init__(self, name):
self.name = name
def open(self, url, new=0, autoraise=True):
assert "'" not in url
if not ':' in url:
url = 'file:'+url
new = int(bool(new))
if self.name == "default":
script = 'open location "%s"' % url.replace('"', '%22')
else:
if self.name == "OmniWeb":
toWindow = ""
else:
toWindow = "toWindow %d" % (new - 1)
cmd = 'OpenURL "%s"' % url.replace('"', '%22')
script = '''tell application "%s"
activate
%s %s
end tell''' % (self.name, cmd, toWindow)
osapipe = os.popen("osascript", "w")
if osapipe is None:
return False
osapipe.write(script)
rc = osapipe.close()
return not rc
class MacOSXOSAScript(BaseBrowser):
def __init__(self, name):
self._name = name
def open(self, url, new=0, autoraise=True):
if self._name == 'default':
script = 'open location "%s"' % url.replace('"', '%22')
script = '''
tell application "%s"
activate
open location "%s"
end
'''%(self._name, url.replace('"', '%22'))
osapipe = os.popen("osascript", "w")
if osapipe is None:
return False
osapipe.write(script)
rc = osapipe.close()
return not rc
register("safari", None, MacOSXOSAScript('safari'), -1)
register("firefox", None, MacOSXOSAScript('firefox'), -1)
register("MacOSX", None, MacOSXOSAScript('default'), -1)
if sys.platform[:3] == "os2" and _iscommand("netscape"):
_tryorder = []
_browsers = {}
register("os2netscape", None,
GenericBrowser(["start", "netscape", "%s"]), -1)
if "BROWSER" in os.environ:
_userchoices = os.environ["BROWSER"].split(os.pathsep)
_userchoices.reverse()
for cmdline in _userchoices:
if cmdline != '':
cmd = _synthesize(cmdline, -1)
if cmd[1] is None:
register(cmdline, None, GenericBrowser(cmdline), -1)
cmdline = None
del cmdline
del _userchoices
def main():
import getopt
usage = """Usage: %s [-n | -t] url
-n: open new window
-t: open new tab""" % sys.argv[0]
try:
opts, args = getopt.getopt(sys.argv[1:], 'ntd')
except getopt.error, msg:
print >>sys.stderr, msg
print >>sys.stderr, usage
sys.exit(1)
new_win = 0
for o, a in opts:
if o == '-n': new_win = 1
elif o == '-t': new_win = 2
if len(args) != 1:
print >>sys.stderr, usage
sys.exit(1)
url = args[0]
open(url, new_win)
print "\a"
if __name__ == "__main__":
main()
Can you help me? I mostly know about everything in Python, but still sometimes need quite quaintly a lot of effort.

You can simplify the answer like this:
import webbrowser
webbrowser.open(*website*)
Of course, replace * website * with the site and no need to write the 2.
Then tell me if it works.
And by the way, some online python processor like pythonanywhere do not allow opening all websites.
Instead, they only allow some specific websites.

Related

Unable to use method of a class in different class-missing 2 required positional arguments

I have two python classes:- One class(CloudLink) is responsible for sending JSON events to the app and another(ReadData) is responsible for building the JSON data.
The ReadData class will be using the CloudLink methods to send the JSON data to the App. But I'm getting error _buildJSONdata() missing 1 required positional argument: 'Data'.
ReadData class
from pyspark.sql import SparkSession
import functools
from pyspark.sql import DataFrame
from pyspark.sql.functions import explode
from cosmosconnect import azurecosmos
class ReadData:
#exception(logger)
def __init__(self):
self.spark_session = (
SparkSession.builder
.appName("readData")
.getOrCreate()
)
mssparkutils.fs.unmount('/mnt/test')
logger.info("Drive unmounted")
mssparkutils.fs.mount(
'abfss://abc#transl.dfs.core.windows.net/',
'/mnt/test',
{'linkedService': "linkCosmos"}
)
logger.info("Mounted Successfully")
self.input_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/input_path"
)
self.output_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/output_path"
)
'''
Reading the schema from csv file
'''
#exception(logger)
def readConfig(self):
try:
logger.info(f"Reading the Config present in {self.input_directory} ")
dfConfig = self.spark_session.read.option("multiline","true") \
.json(self.input_directory)
#for f in dfConfig.select("Entity","Query","Business_Rule").collect():
dfConfig=dfConfig.select(explode('Input').alias('Input_Data'))\
.select('Input_Data.Validation_Type','Input_Data.Entity','Input_Data.Query','Input_Data.Business_Rule')
for f in dfConfig.rdd.toLocalIterator():
#for index, f in dfConfig.toPandas().iterrows():
self.Validation_Type=f[0]
self.container=f[1]
self.query=f[2]
self.rule=f[3]
self.readCosmos(self)
except:
raise ValueError("")
#exception(logger)
def readCosmos(self,*params):
#from cosmosconnect import azurecosmos
#a=[]
linkedService='fg'
df=azurecosmos.cosmosConnect(linkedService,self.query,self.container)
df.cache()
if len(df.head(1)) >0:
outputpath=self.output_directory+'/'+self.container
df.coalesce(1).write.mode('overwrite').parquet(outputpath)
Status="Validation Failure"
Data= {"Validation_Type":[],"Status":[],"Container":[],"Business_Rule":[]}
Data["Validation_Type"].append(self.Validation_Type)
Data["Status"].append(Status)
Data["Container"].append(self.container)
Data["Business_Rule"].append(self.rule)
CloudLink._buildJSONdata(Data)
if __name__ == "__main__":
p = ReadData()
p.readConfig()
CloudLink class
import json
import datetime
import hashlib
import json
import sys
import traceback
import adal
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import logging
from functools import wraps
import sys
def create_logger():
#create a logger object
#logger = logging.getLogger()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logfile = logging.FileHandler('exc_logger.log')
#logfile = logging.StreamHandler(sys.stdout)
fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
formatter = logging.Formatter(fmt)
logfile.setFormatter(formatter)
logger.addHandler(logfile)
return logger
logger = create_logger()
def exception(logger):
def decorator(func):
#wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except:
issue = "exception in "+func.__name__+"\n"
issue = issue+"-------------------------\
------------------------------------------------\n"
logger.exception(issue)
raise
return wrapper
return decorator
class CloudLink(object):
_token = None
_instance = None
http = None
cloudclient = TokenLibrary.getSecret("xxxx", "rtrt")
clientid = TokenLibrary.getSecret("xxxx", "tyty")
clientcredentials = TokenLibrary.getSecret("xxxx", "abcabc")
authority_url = TokenLibrary.getSecret("xxxx", "abab")
cloudtest = TokenLibrary.getSecret("xxxx", "yyyy")
#staticmethod
def getInstance():
if not CloudLink._instance:
CloudLink._instance = CloudLink()
return CloudLink._instance
def __init__(self):
retry_strategy = Retry(
total=3,
backoff_factor=0,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "OPTIONS"],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.http = requests.Session()
self.http.mount("https://", adapter)
self.http.mount("http://", adapter)
print("Inside init")
def parseJSON(self, t):
try:
eventData = json.loads(t)
logger.info(f"Sending {eventData} to cloud")
self.sendToCloud(eventData)
except ValueError as e:
print("Error: %s Please validate JSON in https://www.jsonschemavalidator.net/"% e)
return None # or: raise
def sendToCloud(self, eventData):
cloudData = {"eventData": eventData, "metadata": self._buildMetadata()}
logger.info(f"Raising alert with data=({cloudData}")
response = self.http.post(
self.cloudtest, headers=self._buildHeaders(), json=cloudData
)
logger.info(f"cloud alert response={response}")
if response.status_code == 202 or response.status_code == 200:
logger.info("Mail sent to Cloud")
else:
raise Exception(f"Cloud reporting failed with Error {response}")
def _buildJSONdata(self,Data):
if len(Data) == 0:
raise Exception("JSON is empty")
else:
t = json.dumps(self.Data)
self.parseJSON(t)
def _buildMetadata(self):
return {
"messageType": "Send Email",
"messageVersion": "0.0.1",
"sender": "Send Email",
}
def _buildHeaders(self):
self._refreshADToken()
headers = {
"Authorization": "Bearer {}".format(self._token["accessToken"]),
"Content-type": "application/json",
"Accept": "text/plain",
}
return headers
def _refreshADToken(self):
def shouldRenew(token):
"""Returns True if the token should be renewed"""
expiresOn = datetime.datetime.strptime(
token["expiresOn"], "%Y-%m-%d %H:%M:%S.%f"
)
now = datetime.datetime.now()
return (expiresOn - now) < datetime.timedelta(minutes=5)
if not self._token or shouldRenew(self._token):
logger.info("Renewing credentials for Alerting")
result = None
try:
context = adal.AuthenticationContext(CloudLink.authority_url)
result = context.acquire_token_with_client_credentials(CloudLink.cloudclient, CloudLink.clientid,CloudLink.clientcredentials)
except Exception as e:
error = "Failed to renew client credentials."
logger.info(error)
raise
if result and "accessToken" in result:
self._token = result
else:
logger.error(
"Failed to acquire bearer token. accessToken not found in result object on renewing credentials."
)
raise Exception("Could not acquire a bearer token")

No data display after QTableView freeze column?

QTableView has been instantiated, use QSqlTableModel in PYQT5 to read MySQL database, and use setmodel() to display correctly in QTableView. Freeze the first row and first column using FreezeTableView but no data is displayed in the instantiated tableView. It needs show() to display the data in a new window. How can I display the data in the instantiated tableView instead of in a new window? Thanks!
Ui_untitled.py
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(978, 828)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.treeView_1 = QtWidgets.QTreeView(self.centralwidget)
self.treeView_1.setEnabled(False)
self.treeView_1.setGeometry(QtCore.QRect(10, 11, 191, 771))
self.treeView_1.setObjectName("treeView_1") #Omitted below
mainwindown.py
from ui_untitled import Ui_MainWindow
class MainForm(QMainWindow, Ui_MainWindow):
def __init__(self, pareng=None):
super(MainForm, self).__init__(pareng)
self.setupUi(self)
def on_treeView_clicked(self, index):
name = index.data()
self.tableModel_1 = QSqlTableModel()
self.tableModel_1.setTable("name")
self.tableModel_1.setEditStrategy(QSqlTableModel.OnRowChange)
self.tableModel_1.setFilter(self.tr("name_no = %s"%name))
self.tableModel_1.select()
self.tableView_1 = QTableView()
#self.tableView_1.setModel(self.tableModel_1)
self.tableView_1 = FreezeTableView(self.tableModel_1)
#self.tableView_1.show() #No data displayed after cancel
class FreezeTableView(QTableView, QAbstractSlider):
def __init__(self, model):
super(FreezeTableView, self).__init__()
self.model = model
self.frozenTableView = QTableView(self)
self.horizontalView = QTableView(self)
self.up = True
self.init()
def init(self):
self.setModel(self.model)
#print(self.model.rowCount(), self.model.ColumnCount())
self.frozenTableInit()
self.horizontalViewInit()
self.horizontalHeader().sectionResized.connect(self.updateSectionWidth)
self.verticalHeader().sectionResized.connect(self.updateSectionHeight)
self.verticalScrollBar().valueChanged.connect(self.vConnectFV)
self.frozenTableView.verticalScrollBar().valueChanged.connect(self.fVConnectV)
self.horizontalScrollBar().valueChanged.connect(self.hConnectH)
def vConnectFV(self, a0: int):
self.viewport().stackUnder(self.frozenTableView)
self.frozenTableView.stackUnder(self.horizontalView)
self.frozenTableView.verticalScrollBar().setValue(a0)
def fVConnectV(self, a0: int):
self.viewport().stackUnder(self.frozenTableView)
self.frozenTableView.stackUnder(self.horizontalView)
self.verticalScrollBar().setValue(a0)
def hConnectH(self, a0: int):
self.viewport().stackUnder(self.horizontalView)
self.horizontalView.stackUnder(self.frozenTableView)
self.horizontalView.horizontalScrollBar().setValue(a0)
def frozenTableInit(self):
self.frozenTableView.setModel(self.model)
self.frozenTableView.verticalHeader().hide()
self.frozenTableView.setFocusPolicy(Qt.NoFocus)
self.frozenTableView.horizontalHeader().setFixedHeight(self.horizontalHeader().height())
self.frozenTableView.horizontalHeader().setSectionResizeMode(QHeaderView.Fixed)
self.viewport().stackUnder(self.frozenTableView)
self.frozenTableView.setStyleSheet('QTableView {'
'border: none;'
'background-color: #8EDE21;'
'selection-background-color: #999}')
self.frozenTableView.setSelectionModel(self.selectionModel())
[self.frozenTableView.setColumnHidden(col, True) for col in range(1, self.model.columnCount())]
self.frozenTableView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.frozenTableView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.frozenTableView.show()
self.updateFrozenTableGeometry()
self.frozenTableView.setVerticalScrollMode(self.ScrollPerPixel)
self.setVerticalScrollMode(self.ScrollPerPixel)
self.setHorizontalScrollMode(self.ScrollPerPixel)
def horizontalViewInit(self):
self.horizontalView.setModel(self.model)
self.horizontalView.horizontalHeader().hide()
self.horizontalView.setFocusPolicy(Qt.NoFocus)
self.horizontalView.verticalHeader().setFixedWidth(self.verticalHeader().width())
self.horizontalView.verticalHeader().setSectionResizeMode(QHeaderView.Fixed)
self.frozenTableView.stackUnder(self.horizontalView)
self.horizontalView.setStyleSheet('QTableView { border: none;'
'background-color: #8EDE21;'
'selection-background-color: #999}')
self.horizontalView.setSelectionModel(self.selectionModel())
[self.horizontalView.setRowHidden(row, True) for row in range(1, self.model.rowCount())]
self.horizontalView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.horizontalView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.horizontalView.show()
self.updateFrozenTableGeometry()
self.horizontalView.setHorizontalScrollMode(self.ScrollPerPixel)
def updateFrozenTableGeometry(self):
self.frozenTableView.setGeometry(self.verticalHeader().width() + self.frameWidth(),
self.frameWidth(), self.columnWidth(0),
self.viewport().height() + self.horizontalHeader().height())
self.horizontalView.setGeometry(self.frameWidth(), self.frameWidth() + self.horizontalHeader().height(),
self.viewport().width() + self.verticalHeader().width(), self.rowHeight(0))
def updateSectionWidth(self, logicalIndex, oldSize, newSize):
self.horizontalView.setColumnWidth(logicalIndex, newSize)
if not logicalIndex:
self.frozenTableView.setColumnWidth(0, newSize)
self.updateFrozenTableGeometry()
def updateSectionHeight(self, logicalIndex, oldSize, newSize):
self.frozenTableView.setRowHeight(logicalIndex, newSize)
if not logicalIndex:
self.horizontalView.setRowHeight(0, newSize)
self.updateFrozenTableGeometry()
def resizeEvent(self, e: QtGui.QResizeEvent) -> None:
QTableView.resizeEvent(self, e)
self.updateFrozenTableGeometry()
def scrollTo(self, index: QtCore.QModelIndex, hint: QAbstractItemView.ScrollHint = ...) -> None:
print(index.row, index.column, self.model.data(index))
if index.column() > 0 or index.row() > 0:
QTableView.scrollTo(self, index, hint)
def moveCursor(self, cursorAction: QAbstractItemView.CursorAction,
modifiers: typing.Union[QtCore.Qt.KeyboardModifiers,
QtCore.Qt.KeyboardModifier]) -> QtCore.QModelIndex:
current = QTableView.moveCursor(self, cursorAction, modifiers)
if cursorAction == QAbstractItemView.MoveLeft and current.column() > 0 \
and self.visualRect(current).topLeft().x() < self.frozenTableView.columnWidth(0):
newValue = self.verticalScrollBar().value() + self.visualRect(current).topLeft().x() \
- self.frozenTableView.columnWidth(0)
self.horizontalScrollBar().setValue(newValue)
if cursorAction == QAbstractItemView.MoveUp and current.row() > 0 \
and self.visualRect(current).topLeft().y() < self.horizontalView.rowHeight(0):
newValue = self.horizontalScrollBar().value() + self.visualRect(current).topLeft().y() \
- self.horizontalView.rowHeight(0)
self.verticalScrollBar().setValue(newValue)
return current
'from ui_untitled import Ui_MainWindow
class MainForm(QMainWindow, Ui_MainWindow):
def __init__(self, pareng=None):
super(MainForm, self).__init__(pareng)
self.setupUi(self)
def on_treeView_clicked(self, index): #treeView双击显示Qmodelidx
name = index.data()
self.tableModel_1 = QSqlTableModel()
self.tableModel_1.setTable("name")
self.tableModel_1.setEditStrategy(QSqlTableModel.OnRowChange)
self.tableModel_1.setFilter(self.tr("name_no = %s"%name))
self.tableModel_1.select()
self.tableView_1 = QTableView()
#self.tableView_1.setModel(self.tableModel_1)
self.tableView_1 = FreezeTableView(self.tableModel_1)
#self.tableView_1.show() #No data displayed after cancel
class FreezeTableView(QTableView, QAbstractSlider):
def __init__(self, model):
super(FreezeTableView, self).__init__()
self.model = model
self.frozenTableView = QTableView(self)
self.horizontalView = QTableView(self)
self.up = True
self.init()
def init(self):
self.setModel(self.model)
#print(self.model.rowCount(), self.model.ColumnCount())
self.frozenTableInit()
self.horizontalViewInit()
self.horizontalHeader().sectionResized.connect(self.updateSectionWidth)
self.verticalHeader().sectionResized.connect(self.updateSectionHeight)
self.verticalScrollBar().valueChanged.connect(self.vConnectFV)
self.frozenTableView.verticalScrollBar().valueChanged.connect(self.fVConnectV)
self.horizontalScrollBar().valueChanged.connect(self.hConnectH)
def vConnectFV(self, a0: int):
self.viewport().stackUnder(self.frozenTableView)
self.frozenTableView.stackUnder(self.horizontalView)
self.frozenTableView.verticalScrollBar().setValue(a0)
def fVConnectV(self, a0: int):
self.viewport().stackUnder(self.frozenTableView)
self.frozenTableView.stackUnder(self.horizontalView)
self.verticalScrollBar().setValue(a0)
def hConnectH(self, a0: int):
self.viewport().stackUnder(self.horizontalView)
self.horizontalView.stackUnder(self.frozenTableView)
self.horizontalView.horizontalScrollBar().setValue(a0)
def frozenTableInit(self):
self.frozenTableView.setModel(self.model)
self.frozenTableView.verticalHeader().hide()
self.frozenTableView.setFocusPolicy(Qt.NoFocus)
self.frozenTableView.horizontalHeader().setFixedHeight(self.horizontalHeader().height())
self.frozenTableView.horizontalHeader().setSectionResizeMode(QHeaderView.Fixed)
self.viewport().stackUnder(self.frozenTableView)
self.frozenTableView.setStyleSheet('QTableView {'
'border: none;'
'background-color: #8EDE21;'
'selection-background-color: #999}')
self.frozenTableView.setSelectionModel(self.selectionModel())
[self.frozenTableView.setColumnHidden(col, True) for col in range(1, self.model.columnCount())]
self.frozenTableView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.frozenTableView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.frozenTableView.show()
self.updateFrozenTableGeometry()
self.frozenTableView.setVerticalScrollMode(self.ScrollPerPixel)
self.setVerticalScrollMode(self.ScrollPerPixel)
self.setHorizontalScrollMode(self.ScrollPerPixel)
def horizontalViewInit(self):
self.horizontalView.setModel(self.model)
self.horizontalView.horizontalHeader().hide()
self.horizontalView.setFocusPolicy(Qt.NoFocus)
self.horizontalView.verticalHeader().setFixedWidth(self.verticalHeader().width())
self.horizontalView.verticalHeader().setSectionResizeMode(QHeaderView.Fixed)
self.frozenTableView.stackUnder(self.horizontalView)
self.horizontalView.setStyleSheet('QTableView { border: none;'
'background-color: #8EDE21;'
'selection-background-color: #999}')
self.horizontalView.setSelectionModel(self.selectionModel())
[self.horizontalView.setRowHidden(row, True) for row in range(1, self.model.rowCount())]
self.horizontalView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.horizontalView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
self.horizontalView.show()
self.updateFrozenTableGeometry()
self.horizontalView.setHorizontalScrollMode(self.ScrollPerPixel)
def updateFrozenTableGeometry(self):
self.frozenTableView.setGeometry(self.verticalHeader().width() + self.frameWidth(),
self.frameWidth(), self.columnWidth(0),
self.viewport().height() + self.horizontalHeader().height())
self.horizontalView.setGeometry(self.frameWidth(), self.frameWidth() + self.horizontalHeader().height(),
self.viewport().width() + self.verticalHeader().width(), self.rowHeight(0))
def updateSectionWidth(self, logicalIndex, oldSize, newSize):
self.horizontalView.setColumnWidth(logicalIndex, newSize)
if not logicalIndex:
self.frozenTableView.setColumnWidth(0, newSize)
self.updateFrozenTableGeometry()
def updateSectionHeight(self, logicalIndex, oldSize, newSize):
self.frozenTableView.setRowHeight(logicalIndex, newSize)
if not logicalIndex:
self.horizontalView.setRowHeight(0, newSize)
self.updateFrozenTableGeometry()
def resizeEvent(self, e: QtGui.QResizeEvent) -> None:
QTableView.resizeEvent(self, e)
self.updateFrozenTableGeometry()
def scrollTo(self, index: QtCore.QModelIndex, hint: QAbstractItemView.ScrollHint = ...) -> None:
print(index.row, index.column, self.model.data(index))
if index.column() > 0 or index.row() > 0:
QTableView.scrollTo(self, index, hint)
def moveCursor(self, cursorAction: QAbstractItemView.CursorAction,
modifiers: typing.Union[QtCore.Qt.KeyboardModifiers,
QtCore.Qt.KeyboardModifier]) -> QtCore.QModelIndex:
current = QTableView.moveCursor(self, cursorAction, modifiers)
if cursorAction == QAbstractItemView.MoveLeft and current.column() > 0 \
and self.visualRect(current).topLeft().x() < self.frozenTableView.columnWidth(0):
newValue = self.verticalScrollBar().value() + self.visualRect(current).topLeft().x() \
- self.frozenTableView.columnWidth(0)
self.horizontalScrollBar().setValue(newValue)
if cursorAction == QAbstractItemView.MoveUp and current.row() > 0 \
and self.visualRect(current).topLeft().y() < self.horizontalView.rowHeight(0):
newValue = self.horizontalScrollBar().value() + self.visualRect(current).topLeft().y() \
- self.horizontalView.rowHeight(0)
self.verticalScrollBar().setValue(newValue)
return current

why is my code skipping my index page?

Whenever I go to the login site (0.0.0.0:5000/), the page immediately reverts to 0.0.0.0:5000/admin. Any ideas on why this is happening?
running through yocto on intel edison. editing via putty
#app.errorhandler(404)
#app.errorhandler(500)
def errorpage(e):
return render_template('404.html')
def login_required(f):
#wraps(f)
def wrap(*args, **kwargs):
if 'logged_in' in session:
return f(*args, **kwargs)
else:
flash('please login first.')
return redirect (url_for('index'))
return wrap
#app.route('/', methods=['GET','POST'])
def index():
error = None
if request.method == 'POST':
if request.form['username'] != 'admin' or request.form['password'] != 'password':
error = 'invalid attempt.'
else:
session['logged_in'] = True
return redirect(url_for('admin'))
return render_template('index.html', error = error)
#app.route('/logout')
def logout():
session.pop('logged_in', None)
return render_template('/logout.html')
#app.route('/admin',methods=['GET','POST'])
#login_required
def admin():
It looks like as soon as anyone hits the page with a GET, it's set as logged_in = True and redirects to /admin
#app.route('/', methods=['GET','POST'])
def index():
error = None
if request.method == 'POST':
if request.form['username'] != 'admin' or request.form['password'] != 'password':
error = 'invalid attempt.'
else: # this is always triggered for GETs, even if not logged in!
session['logged_in'] = True
return redirect(url_for('admin'))
return render_template('index.html', error = error)
Instead, you should be checking that they're already logged in, and set the logged_in property in the POST block instead, after credentials have been checked.
Actually, it just looks like your indenting is off on your else block:
#app.route('/', methods=['GET','POST'])
def index():
error = None
if request.method == 'POST':
if request.form['username'] != 'admin' or request.form['password'] != 'password':
error = 'invalid'
else: # this indenting should work now
session['logged_in'] = True
return redirect(url_for('admin'))
return render_template('index.html', error = error)

Serialize Gtk TreeStore / ListStore using JSON

I made a new example which shows much better what I am trying to do. The new example gives the following ouput. Is there a way that the data can go into the respective store key (the {} brackets)?
{
"copy": [
[
[
5.0,
8.0,
9.0
]
],
[
[
4.0,
0.0,
1.0
]
]
],
"name": "dataset1",
"sets": [
{
"store": {},
"type": "vector"
},
{
"store": {},
"type": "vector"
}
]
}
New example
from gi.repository import Gtk
import json
import random
class Vector(object):
def __init__(self, data):
self.store = Gtk.ListStore(float, float, float)
self.store.append([data[0], data[1], data[2]])
self.type = "vector"
def return_data(self):
store_data = []
def iterate_over_data(model, path, itr):
row = model[path]
store_data.append([row[0], row[1], row[2]])
self.store.foreach(iterate_over_data)
return store_data
class DataSet(object):
def __init__(self, name):
self.name = name
self.sets = []
def add_vector(self):
data = [random.randint(0,9) for x in range(3)]
self.sets.append(Vector(data))
def to_json(self):
self.copy = []
for s in self.sets:
self.copy.append(s.return_data())
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
obj1 = DataSet("dataset1")
for x in range(2):
obj1.add_vector()
print(obj1.to_json())
Old example
I am currently figuring out how to serialize a Gtk ListStore that is nested in a Gtk TreeStore. I got a small example to work, but am not sure if this approach will scale for programs that have more data attached (For example the layer object could hold a color or a date of creation). Is there maybe another way to to this?
My current approach is to gather the data in list and dictionary form myself and then just create the JSON-dump. I have the feeling that this would be rather difficult to maintain if I need to attach 25 values to each layer-object.
from gi.repository import Gtk, Gdk
import json
import random
class LayerTreeView(Gtk.TreeView):
def __init__(self, store):
Gtk.TreeView.__init__(self, store)
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Name", renderer, text=0)
self.append_column(column)
class DataTreeView(Gtk.TreeView):
def __init__(self, store):
Gtk.TreeView.__init__(self, store)
self.store = store
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Data", renderer, text=0)
self.append_column(column)
class MainWindow(Gtk.Window):
def __init__(self):
Gtk.Window.__init__(self, title="TreeView Serialize")
self.connect("delete-event", Gtk.main_quit)
self.set_border_width(10)
self.set_default_size(400, 300)
vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6, expand=True)
self.add(vbox)
self.clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
button = Gtk.Button("Cut")
button.connect("clicked", self.on_cut_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_COPY)
button.connect("clicked", self.on_copy_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_PASTE)
button.connect("clicked", self.on_paste_clicked)
hbox.pack_start(button, True, True, 0)
vbox.add(hbox)
self.layer_store = Gtk.TreeStore(str, object, object)
self.layer_view = LayerTreeView(self.layer_store)
self.layer_sw = Gtk.ScrolledWindow()
self.data_sw = Gtk.ScrolledWindow()
self.layer_sw.add(self.layer_view)
treebox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6, expand=True)
treebox.pack_start(self.layer_sw, True, True, 0)
treebox.pack_start(self.data_sw, True, True, 0)
vbox.add(treebox)
self.select = self.layer_view.get_selection()
self.select.connect("changed", self.on_selection_changed)
self.add_test_data()
def add_test_data(self):
for x in range(3):
data_store = Gtk.ListStore(str)
data_view = DataTreeView(data_store)
for y in range(5):
data_store.append([str(y+x)])
self.layer_store.append(None, ["Data {}".format(x), data_store, data_view])
def on_selection_changed(self, selection):
"""
When layer is switched load respective data
"""
model, treeiter = selection.get_selected()
if treeiter != None:
data_view = model[treeiter][2]
child = self.data_sw.get_child()
if child != None:
self.data_sw.remove(self.data_sw.get_child())
self.data_sw.add(data_view)
self.show_all()
def on_cut_clicked(self, button):
pass
def on_copy_clicked(self, button):
copy_list = ["safe-to-paste"]
data_dict = {}
for row in self.layer_store:
name = row[0]
data_obj = row[1]
value_list = []
for datarow in data_obj:
value = datarow[0]
value_list.append(value)
data_dict[name] = value_list
copy_list.append(data_dict)
data = json.dumps(copy_list)
self.clipboard.set_text(data, -1)
def on_paste_clicked(self, button):
paste_str = self.clipboard.wait_for_text()
try:
parse = json.loads(paste_str)
json_str = True
except:
json_str = False
if json_str is False:
return
keyword = parse[0]
if keyword != "safe-to-paste":
return
data_dict = parse[1]
for x in data_dict:
data_list = data_dict[x]
data_store = Gtk.ListStore(str)
data_view = DataTreeView(data_store)
for y in data_list:
data_store.append([str(y)])
self.layer_store.append(None, [x, data_store, data_view])
win = MainWindow()
win.show_all()
Gtk.main()
I have an improved version of your code with dict comprehension and #staticmethod that makes the signal callbacks more readable and shorter. Nevertheless, this does not really solve your problem as it still generates the json manually. If the ListStore gets more complex, it would probably be better to let the DataListStore class generate its own json with a corresponding method.
from gi.repository import Gtk, Gdk
import json
class LayerTreeView(Gtk.TreeView):
def __init__(self, store):
Gtk.TreeView.__init__(self, store)
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Name", renderer, text=0)
self.append_column(column)
class DataTreeView(Gtk.TreeView):
def __init__(self):
Gtk.TreeView.__init__(self)
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Data", renderer, text=0)
self.append_column(column)
class DataListStore(Gtk.ListStore):
#staticmethod
def from_json(*args, values=[]):
store = DataListStore(*args)
for value in values:
store.append((value,))
return store
class MainWindow(Gtk.Window):
def __init__(self):
Gtk.Window.__init__(self, title="TreeView Serialize")
self.connect("delete-event", Gtk.main_quit)
self.set_border_width(10)
self.set_default_size(400, 300)
vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6, expand=True)
self.add(vbox)
self.clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
button = Gtk.Button("Cut")
button.connect("clicked", self.on_cut_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_COPY)
button.connect("clicked", self.on_copy_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_PASTE)
button.connect("clicked", self.on_paste_clicked)
hbox.pack_start(button, True, True, 0)
vbox.add(hbox)
self.layer_store = Gtk.TreeStore(str, object)
self.layer_view = LayerTreeView(self.layer_store)
self.data_view = DataTreeView()
layer_sw = Gtk.ScrolledWindow()
layer_sw.add(self.layer_view)
data_sw = Gtk.ScrolledWindow()
data_sw.add(self.data_view)
treebox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6, expand=True)
treebox.pack_start(layer_sw, True, True, 0)
treebox.pack_start(data_sw, True, True, 0)
vbox.add(treebox)
select = self.layer_view.get_selection()
select.connect("changed", self.on_selection_changed)
self.add_test_data()
def add_test_data(self):
for x in range(3):
data_list = [str(y+x) for y in range(5)]
self.layer_store.append(None, ["Data {}".format(x), data_list])
def on_selection_changed(self, selection):
"""
When layer is switched load respective data
"""
model, treeiter = selection.get_selected()
if treeiter != None:
self.data_view.set_model(
DataListStore.from_json(str, values=model[treeiter][1])
)
def on_cut_clicked(self, button):
pass
def on_copy_clicked(self, button):
copy_list = [
'safe-to-paste',
{row[0]: row[1] for row in self.layer_store},
]
data = json.dumps(copy_list)
self.clipboard.set_text(data, -1)
def on_paste_clicked(self, button):
paste_str = self.clipboard.wait_for_text()
try:
parse = json.loads(paste_str)
except:
return
if parse[0] != "safe-to-paste":
return
data_dict = parse[1]
for x in data_dict:
self.layer_store.append(None, [x, data_dict[x]])
win = MainWindow()
win.show_all()
Gtk.main()

scrapy unhandled exception

I am using scrapy 0.16.2 version on linux. I'm running:
scrapy crawl mycrawlspider -s JOBDIR=/mnt/mycrawlspider
I'm getting this error which blocks scrapy (hangs and doesn't finish automatically, only ^C stops it)
2012-11-20 15:04:51+0000 [-] Unhandled Error Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/scrapy/commands/crawl.py", line 45, in run
self.crawler.start() File "/usr/lib/python2.7/site-packages/scrapy/crawler.py", line 80, in start
reactor.run(installSignalHandlers=False) # blocking call File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1169, in run
self.mainLoop() File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1178, in mainLoop
self.runUntilCurrent() --- <exception caught here> --- File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 800, in runUntilCurrent
call.func(*call.args, **call.kw) File "/usr/lib/python2.7/site-packages/scrapy/utils/reactor.py", line 41, in __call__
return self._func(*self._a, **self._kw) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 116, in
_next_request
self.crawl(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 172, in crawl
self.schedule(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 176, in schedule
return self.slots[spider].scheduler.enqueue_request(request) File "/usr/lib/python2.7/site-packages/scrapy/core/scheduler.py", line 48, in enqueue_request
if not request.dont_filter and self.df.request_seen(request): exceptions.AttributeError: 'NoneType' object has no attribute 'dont_filter'
BTW this worked in version 0.14
Here is the code:
class MySpider(CrawlSpider):
name = 'alrroya'
NEW_IGNORED_EXTENSIONS = list(IGNORED_EXTENSIONS)
NEW_IGNORED_EXTENSIONS.remove('pdf')
download_delay = 0.05
# Stay within these domains when crawling
allowed_domains = []
all_domains = {}
start_urls = []
# Add our callback which will be called for every found link
rules = [
Rule(SgmlLinkExtractor(deny_extensions=NEW_IGNORED_EXTENSIONS, tags=('a', 'area', 'frame', 'iframe'), attrs=('href', 'src')), follow=True, callback='parse_crawled_page')
]
# How many pages crawled
crawl_count = 0
# How many PDFs we have found
pdf_count = 0
def __init__(self, *args, **kwargs):
CrawlSpider.__init__(self, *args, **kwargs)
dispatcher.connect(self._spider_closed, signals.spider_closed)
dispatcher.connect(self._spider_opened, signals.spider_opened)
self.load_allowed_domains_and_start_urls()
def allowed_to_start(self):
curr_date = datetime.today()
curr_date = datetime(curr_date.year, curr_date.month, curr_date.day)
jobdir = self.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
day = timedelta(days=1)
if os.path.exists(checkfile):
f = open(checkfile, 'r')
data = f.read()
f.close()
data = data.split('\n')
reason = data[0]
try:
reason_date = datetime.strptime(data[1], '%Y-%m-%d')
except Exception as ex:
reason_date = None
if reason_date and 'shutdown' in reason:
reason = True
else:
if reason_date and reason_date + day <= curr_date and 'finished' in reason:
reason = True
else:
reason = False
else:
reason = True
return reason
def _spider_opened(self, spider):
if spider is not self:
return
curr_date = datetime.today()
curr_date = datetime(curr_date.year, curr_date.month, curr_date.day)
jobdir = spider.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
day = timedelta(days=1)
if os.path.exists(checkfile):
f = open(checkfile, 'r')
data = f.read()
f.close()
data = data.split('\n')
reason = data[0]
try:
reason_date = datetime.strptime(data[1], '%Y-%m-%d')
except Exception as ex:
reason_date = None
if reason_date and 'shutdown' in reason:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
else:
if reason_date and reason_date + day <= curr_date and 'finished' in reason:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
else:
crawler.engine.close_spider(self, 'finished')
if jobdir and os.path.exists(jobdir):
shutil.rmtree(jobdir)
f = open(checkfile, 'w')
f.write('finished\n')
f.write(str(date.today()))
f.close()
os._exit(1)
else:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
def _spider_closed(self, spider, reason):
if spider is not self:
return
jobdir = spider.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
if 'shutdown' in reason:
f = open(checkfile, 'w')
f.write('shutdown\n')
f.write(str(date.today()))
f.close()
else:
if jobdir and os.path.exists(jobdir):
shutil.rmtree(jobdir)
f = open(checkfile, 'w')
f.write('finished\n')
f.write(str(date.today()))
f.close()
def _requests_to_follow(self, response):
if getattr(response, 'encoding', None) != None:
return CrawlSpider._requests_to_follow(self, response)
else:
return []
def make_requests_from_url(self, url):
http_client = httplib2.Http()
try:
headers = {
'content-type': 'text/html',
'user-agent': random.choice(USER_AGENT_LIST)
}
response, content = http_client.request(url, method='HEAD', headers=headers)
#~ if 'pdf' in response['content-type'].lower() or (url.endswith('.pdf') and 'octet-stream' in response['content-type'].lower()):
if 'pdf' in response['content-type'].lower() or 'octet-stream' in response['content-type'].lower():
if self.allowed_to_start():
self.get_pdf_link(url)
else:
return CrawlSpider.make_requests_from_url(self, url)
except Exception as ex:
return CrawlSpider.make_requests_from_url(self, url)
def get_pdf_link(self, url):
source = self.__class__.name
parsed_url = urlparse(url)
url_domain = parsed_url.netloc
url_path = parsed_url.path
if url_domain:
for domain, paths in self.__class__.all_domains[source]['allow_domains'].iteritems():
if url_domain.endswith(domain):
pre_and = False
pre_or = False
and_cond = True
or_cond = False
for path in paths:
if path[0:1] == '!':
pre_and = True
if path[1:] not in url_path:
and_cond = and_cond and True
else:
and_cond = and_cond and False
else:
pre_or = True
if path in url_path:
or_cond = or_cond or True
else:
or_cond = or_cond or False
if pre_and and pre_or:
if and_cond and or_cond:
self.pdf_process(source, url)
return
elif pre_and:
if and_cond:
self.pdf_process(source, url)
return
elif pre_or:
if or_cond:
self.pdf_process(source, url)
return
else:
self.pdf_process(source, url)
return
def parse_crawled_page(self, response):
self.__class__.crawl_count += 1
crawl_count = self.__class__.crawl_count
if crawl_count % 100 == 0:
print 'Crawled %d pages' % crawl_count
if 'pdf' in response.headers.get('content-type', '').lower():
self.get_pdf_link(response.url)
return Item()
def load_allowed_domains_and_start_urls(self):
day = timedelta(days=1)
currdate = date.today()
alrroya = ('http://epaper.alrroya.com/currentissues.php?editiondt=' + currdate.strftime('%Y/%m/%d'),)
self.__class__.all_domains = {
'alrroya': {
'start_urls': alrroya,
'allow_domains': {
'epaper.alrroya.com': frozenset(()),
}
}
}
for domain in self.__class__.all_domains[self.__class__.name]['allow_domains']:
self.__class__.allowed_domains.append(domain)
self.__class__.start_urls.extend(self.__class__.all_domains[self.__class__.name]['start_urls'])
def pdf_process(self, source, url):
print '!!! ' + source + ' ' + url
This appears to be a bug in Scrapy. The current version doesn't seem to accept lists returned from make_requests_from_url(). I was able to modify the Scrapy code in the following way to work around the issue.
In the file Scrapy-0.16.5-py2.7.egg/scrapy/spider.py
Change:
def start_requests(self):
for url in self.start_urls:
yield self.make_requests_from_url(url)
To:
def start_requests(self):
for url in self.start_urls:
requests = self.make_requests_from_url(url)
if type(requests) is list:
for request in requests:
yield request
else:
yield requests
I expect that the official Scrapy people will fix this eventually.