Python 2's webbrowser is not working in repl.it - google-chrome
I am using repl.it, and also the webbrowser module to open a link using webbrowser.open(<link to open>)
But, my link doesn't work. Can you help me with this?
Here is my code:
import webbrowser
webbrowser.open("https://www.daffodilday.com.au/get-involved/register-your-school/", 2)
And a not-so-quick overview of the webbrowser module:
import os, sys, shlex, stat, subprocess, time
__all__ = ["Error", "open", "open_new", "open_new_tab", "get", "register"]
class Error(Exception):
pass
_browsers = {}
_tryorder = []
def register(name, klass, instance=None, update_tryorder=1):
_browsers[name.lower()] = [klass, instance]
if update_tryorder > 0:
_tryorder.append(name)
elif update_tryorder < 0:
_tryorder.insert(0, name)
def get(using=None):
if using is not None:
alternatives = [using]
else:
alternatives = _tryorder
for browser in alternatives:
if '%s' in browser:
browser = shlex.split(browser)
if browser[-1] == '&':
return BackgroundBrowser(browser[:-1])
else:
return GenericBrowser(browser)
else:
try:
command = _browsers[browser.lower()]
except KeyError:
command = _synthesize(browser)
if command[1] is not None:
return command[1]
elif command[0] is not None:
return command[0]()
raise Error("could not locate runnable browser")
def open(url, new=0, autoraise=True):
for name in _tryorder:
browser = get(name)
if browser.open(url, new, autoraise):
return True
return False
def open_new(url):
return open(url, 1)
def open_new_tab(url):
return open(url, 2)
def _synthesize(browser, update_tryorder=1):
cmd = browser.split()[0]
if not _iscommand(cmd):
return [None, None]
name = os.path.basename(cmd)
try:
command = _browsers[name.lower()]
except KeyError:
return [None, None]
# now attempt to clone to fit the new name:
controller = command[1]
if controller and name.lower() == controller.basename:
import copy
controller = copy.copy(controller)
controller.name = browser
controller.basename = os.path.basename(browser)
register(browser, None, controller, update_tryorder)
return [None, controller]
return [None, None]
if sys.platform[:3] == "win":
def _isexecutable(cmd):
cmd = cmd.lower()
if os.path.isfile(cmd) and cmd.endswith((".exe", ".bat")):
return True
for ext in ".exe", ".bat":
if os.path.isfile(cmd + ext):
return True
return False
else:
def _isexecutable(cmd):
if os.path.isfile(cmd):
mode = os.stat(cmd)[stat.ST_MODE]
if mode & stat.S_IXUSR or mode & stat.S_IXGRP or mode & stat.S_IXOTH:
return True
return False
def _iscommand(cmd):
if _isexecutable(cmd):
return True
path = os.environ.get("PATH")
if not path:
return False
for d in path.split(os.pathsep):
exe = os.path.join(d, cmd)
if _isexecutable(exe):
return True
return False
class BaseBrowser(object):
"""Parent class for all browsers. Do not use directly."""
args = ['%s']
def __init__(self, name=""):
self.name = name
self.basename = name
def open(self, url, new=0, autoraise=True):
raise NotImplementedError
def open_new(self, url):
return self.open(url, 1)
def open_new_tab(self, url):
return self.open(url, 2)
class GenericBrowser(BaseBrowser):
def __init__(self, name):
if isinstance(name, basestring):
self.name = name
self.args = ["%s"]
else:
self.name = name[0]
self.args = name[1:]
self.basename = os.path.basename(self.name)
def open(self, url, new=0, autoraise=True):
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
if sys.platform[:3] == 'win':
p = subprocess.Popen(cmdline)
else:
p = subprocess.Popen(cmdline, close_fds=True)
return not p.wait()
except OSError:
return False
class BackgroundBrowser(GenericBrowser):
def open(self, url, new=0, autoraise=True):
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
if sys.platform[:3] == 'win':
p = subprocess.Popen(cmdline)
else:
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
p = subprocess.Popen(cmdline, close_fds=True, preexec_fn=setsid)
return (p.poll() is None)
except OSError:
return False
class UnixBrowser(BaseBrowser):
raise_opts = None
remote_args = ['%action', '%s']
remote_action = None
remote_action_newwin = None
remote_action_newtab = None
background = False
redirect_stdout = True
def _invoke(self, args, remote, autoraise):
raise_opt = []
if remote and self.raise_opts:
# use autoraise argument only for remote invocation
autoraise = int(autoraise)
opt = self.raise_opts[autoraise]
if opt: raise_opt = [opt]
cmdline = [self.name] + raise_opt + args
if remote or self.background:
inout = file(os.devnull, "r+")
else:
inout = None
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
p = subprocess.Popen(cmdline, close_fds=True, stdin=inout,
stdout=(self.redirect_stdout and inout or None),
stderr=inout, preexec_fn=setsid)
if remote:
time.sleep(1)
rc = p.poll()
if rc is None:
time.sleep(4)
rc = p.poll()
if rc is None:
return True
return not rc
elif self.background:
if p.poll() is None:
return True
else:
return False
else:
return not p.wait()
def open(self, url, new=0, autoraise=True):
if new == 0:
action = self.remote_action
elif new == 1:
action = self.remote_action_newwin
elif new == 2:
if self.remote_action_newtab is None:
action = self.remote_action_newwin
else:
action = self.remote_action_newtab
else:
raise Error("Bad 'new' parameter to open(); " +
"expected 0, 1, or 2, got %s" % new)
args = [arg.replace("%s", url).replace("%action", action)
for arg in self.remote_args]
success = self._invoke(args, True, autoraise)
if not success:
args = [arg.replace("%s", url) for arg in self.args]
return self._invoke(args, False, False)
else:
return True
class Mozilla(UnixBrowser):
raise_opts = ["-noraise", "-raise"]
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-tab"
background = True
Netscape = Mozilla
class Galeon(UnixBrowser):
raise_opts = ["-noraise", ""]
remote_args = ['%action', '%s']
remote_action = "-n"
remote_action_newwin = "-w"
background = True
class Chrome(UnixBrowser):
remote_args = ['%action', '%s']
remote_action = ""
remote_action_newwin = "--new-window"
remote_action_newtab = ""
background = True
Chromium = Chrome
class Opera(UnixBrowser):
raise_opts = ["-noraise", ""]
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-page"
background = True
class Elinks(UnixBrowser):
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-tab"
background = False
redirect_stdout = False
class Konqueror(BaseBrowser):
def open(self, url, new=0, autoraise=True):
if new == 2:
action = "newTab"
else:
action = "openURL"
devnull = file(os.devnull, "r+")
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
try:
p = subprocess.Popen(["kfmclient", action, url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull)
except OSError:
pass
else:
p.wait()
return True
try:
p = subprocess.Popen(["konqueror", "--silent", url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull,
preexec_fn=setsid)
except OSError:
pass
else:
if p.poll() is None:
return True
try:
p = subprocess.Popen(["kfm", "-d", url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull,
preexec_fn=setsid)
except OSError:
return False
else:
return (p.poll() is None)
class Grail(BaseBrowser):
def _find_grail_rc(self):
import glob
import pwd
import socket
import tempfile
tempdir = os.path.join(tempfile.gettempdir(),
".grail-unix")
user = pwd.getpwuid(os.getuid())[0]
filename = os.path.join(tempdir, user + "-*")
maybes = glob.glob(filename)
if not maybes:
return None
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
for fn in maybes:
# need to PING each one until we find one that's live
try:
s.connect(fn)
except socket.error:
# no good; attempt to clean it out, but don't fail:
try:
os.unlink(fn)
except IOError:
pass
else:
return s
def _remote(self, action):
s = self._find_grail_rc()
if not s:
return 0
s.send(action)
s.close()
return 1
def open(self, url, new=0, autoraise=True):
if new:
ok = self._remote("LOADNEW " + url)
else:
ok = self._remote("LOAD " + url)
return ok
def register_X_browsers():
if _iscommand("xdg-open"):
register("xdg-open", None, BackgroundBrowser("xdg-open"))
if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gvfs-open"):
register("gvfs-open", None, BackgroundBrowser("gvfs-open"))
if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gnome-open"):
register("gnome-open", None, BackgroundBrowser("gnome-open"))
if "KDE_FULL_SESSION" in os.environ and _iscommand("kfmclient"):
register("kfmclient", Konqueror, Konqueror("kfmclient"))
if _iscommand("x-www-browser"):
register("x-www-browser", None, BackgroundBrowser("x-www-browser"))
for browser in ("mozilla-firefox", "firefox",
"mozilla-firebird", "firebird",
"iceweasel", "iceape",
"seamonkey", "mozilla", "netscape"):
if _iscommand(browser):
register(browser, None, Mozilla(browser))
if _iscommand("kfm"):
register("kfm", Konqueror, Konqueror("kfm"))
elif _iscommand("konqueror"):
register("konqueror", Konqueror, Konqueror("konqueror"))
for browser in ("galeon", "epiphany"):
if _iscommand(browser):
register(browser, None, Galeon(browser))
if _iscommand("skipstone"):
register("skipstone", None, BackgroundBrowser("skipstone"))
for browser in ("google-chrome", "chrome", "chromium", "chromium-browser"):
if _iscommand(browser):
register(browser, None, Chrome(browser))
if _iscommand("opera"):
register("opera", None, Opera("opera")).
if _iscommand("mosaic"):
register("mosaic", None, BackgroundBrowser("mosaic")
if _iscommand("grail"):
register("grail", Grail, None)
if os.environ.get("DISPLAY"):
register_X_browsers()
if os.environ.get("TERM"):
if _iscommand("www-browser"):
register("www-browser", None, GenericBrowser("www-browser"))
if _iscommand("links"):
register("links", None, GenericBrowser("links"))
if _iscommand("elinks"):
register("elinks", None, Elinks("elinks"))
if _iscommand("lynx"):
register("lynx", None, GenericBrowser("lynx"))
if _iscommand("w3m"):
register("w3m", None, GenericBrowser("w3m"))
if sys.platform[:3] == "win":
class WindowsDefault(BaseBrowser):
def open(self, url, new=0, autoraise=True):
try:
os.startfile(url)
except WindowsError:
return False
else:
return True
_tryorder = []
_browsers = {}
register("windows-default", WindowsDefault)
iexplore = os.path.join(os.environ.get("PROGRAMFILES", "C:\\Program Files"),
"Internet Explorer\\IEXPLORE.EXE")
for browser in ("firefox", "firebird", "seamonkey", "mozilla",
"netscape", "opera", iexplore):
if _iscommand(browser):
register(browser, None, BackgroundBrowser(browser))
if sys.platform == 'darwin':
class MacOSX(BaseBrowser):
def __init__(self, name):
self.name = name
def open(self, url, new=0, autoraise=True):
assert "'" not in url
if not ':' in url:
url = 'file:'+url
new = int(bool(new))
if self.name == "default":
script = 'open location "%s"' % url.replace('"', '%22')
else:
if self.name == "OmniWeb":
toWindow = ""
else:
toWindow = "toWindow %d" % (new - 1)
cmd = 'OpenURL "%s"' % url.replace('"', '%22')
script = '''tell application "%s"
activate
%s %s
end tell''' % (self.name, cmd, toWindow)
osapipe = os.popen("osascript", "w")
if osapipe is None:
return False
osapipe.write(script)
rc = osapipe.close()
return not rc
class MacOSXOSAScript(BaseBrowser):
def __init__(self, name):
self._name = name
def open(self, url, new=0, autoraise=True):
if self._name == 'default':
script = 'open location "%s"' % url.replace('"', '%22')
script = '''
tell application "%s"
activate
open location "%s"
end
'''%(self._name, url.replace('"', '%22'))
osapipe = os.popen("osascript", "w")
if osapipe is None:
return False
osapipe.write(script)
rc = osapipe.close()
return not rc
register("safari", None, MacOSXOSAScript('safari'), -1)
register("firefox", None, MacOSXOSAScript('firefox'), -1)
register("MacOSX", None, MacOSXOSAScript('default'), -1)
if sys.platform[:3] == "os2" and _iscommand("netscape"):
_tryorder = []
_browsers = {}
register("os2netscape", None,
GenericBrowser(["start", "netscape", "%s"]), -1)
if "BROWSER" in os.environ:
_userchoices = os.environ["BROWSER"].split(os.pathsep)
_userchoices.reverse()
for cmdline in _userchoices:
if cmdline != '':
cmd = _synthesize(cmdline, -1)
if cmd[1] is None:
register(cmdline, None, GenericBrowser(cmdline), -1)
cmdline = None
del cmdline
del _userchoices
def main():
import getopt
usage = """Usage: %s [-n | -t] url
-n: open new window
-t: open new tab""" % sys.argv[0]
try:
opts, args = getopt.getopt(sys.argv[1:], 'ntd')
except getopt.error, msg:
print >>sys.stderr, msg
print >>sys.stderr, usage
sys.exit(1)
new_win = 0
for o, a in opts:
if o == '-n': new_win = 1
elif o == '-t': new_win = 2
if len(args) != 1:
print >>sys.stderr, usage
sys.exit(1)
url = args[0]
open(url, new_win)
print "\a"
if __name__ == "__main__":
main()
Can you help me? I mostly know about everything in Python, but still sometimes need quite quaintly a lot of effort.
You can simplify the answer like this:
import webbrowser
webbrowser.open(*website*)
Of course, replace * website * with the site and no need to write the 2.
Then tell me if it works.
And by the way, some online python processor like pythonanywhere do not allow opening all websites.
Instead, they only allow some specific websites.
Related
Unable to use method of a class in different class-missing 2 required positional arguments
I have two python classes:- One class(CloudLink) is responsible for sending JSON events to the app and another(ReadData) is responsible for building the JSON data. The ReadData class will be using the CloudLink methods to send the JSON data to the App. But I'm getting error _buildJSONdata() missing 1 required positional argument: 'Data'. ReadData class from pyspark.sql import SparkSession import functools from pyspark.sql import DataFrame from pyspark.sql.functions import explode from cosmosconnect import azurecosmos class ReadData: #exception(logger) def __init__(self): self.spark_session = ( SparkSession.builder .appName("readData") .getOrCreate() ) mssparkutils.fs.unmount('/mnt/test') logger.info("Drive unmounted") mssparkutils.fs.mount( 'abfss://abc#transl.dfs.core.windows.net/', '/mnt/test', {'linkedService': "linkCosmos"} ) logger.info("Mounted Successfully") self.input_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/input_path" ) self.output_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/output_path" ) ''' Reading the schema from csv file ''' #exception(logger) def readConfig(self): try: logger.info(f"Reading the Config present in {self.input_directory} ") dfConfig = self.spark_session.read.option("multiline","true") \ .json(self.input_directory) #for f in dfConfig.select("Entity","Query","Business_Rule").collect(): dfConfig=dfConfig.select(explode('Input').alias('Input_Data'))\ .select('Input_Data.Validation_Type','Input_Data.Entity','Input_Data.Query','Input_Data.Business_Rule') for f in dfConfig.rdd.toLocalIterator(): #for index, f in dfConfig.toPandas().iterrows(): self.Validation_Type=f[0] self.container=f[1] self.query=f[2] self.rule=f[3] self.readCosmos(self) except: raise ValueError("") #exception(logger) def readCosmos(self,*params): #from cosmosconnect import azurecosmos #a=[] linkedService='fg' df=azurecosmos.cosmosConnect(linkedService,self.query,self.container) df.cache() if len(df.head(1)) >0: outputpath=self.output_directory+'/'+self.container df.coalesce(1).write.mode('overwrite').parquet(outputpath) Status="Validation Failure" Data= {"Validation_Type":[],"Status":[],"Container":[],"Business_Rule":[]} Data["Validation_Type"].append(self.Validation_Type) Data["Status"].append(Status) Data["Container"].append(self.container) Data["Business_Rule"].append(self.rule) CloudLink._buildJSONdata(Data) if __name__ == "__main__": p = ReadData() p.readConfig() CloudLink class import json import datetime import hashlib import json import sys import traceback import adal import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry import logging from functools import wraps import sys def create_logger(): #create a logger object #logger = logging.getLogger() logger = logging.getLogger() logger.setLevel(logging.INFO) logfile = logging.FileHandler('exc_logger.log') #logfile = logging.StreamHandler(sys.stdout) fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' formatter = logging.Formatter(fmt) logfile.setFormatter(formatter) logger.addHandler(logfile) return logger logger = create_logger() def exception(logger): def decorator(func): #wraps(func) def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except: issue = "exception in "+func.__name__+"\n" issue = issue+"-------------------------\ ------------------------------------------------\n" logger.exception(issue) raise return wrapper return decorator class CloudLink(object): _token = None _instance = None http = None cloudclient = TokenLibrary.getSecret("xxxx", "rtrt") clientid = TokenLibrary.getSecret("xxxx", "tyty") clientcredentials = TokenLibrary.getSecret("xxxx", "abcabc") authority_url = TokenLibrary.getSecret("xxxx", "abab") cloudtest = TokenLibrary.getSecret("xxxx", "yyyy") #staticmethod def getInstance(): if not CloudLink._instance: CloudLink._instance = CloudLink() return CloudLink._instance def __init__(self): retry_strategy = Retry( total=3, backoff_factor=0, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=["HEAD", "GET", "OPTIONS"], ) adapter = HTTPAdapter(max_retries=retry_strategy) self.http = requests.Session() self.http.mount("https://", adapter) self.http.mount("http://", adapter) print("Inside init") def parseJSON(self, t): try: eventData = json.loads(t) logger.info(f"Sending {eventData} to cloud") self.sendToCloud(eventData) except ValueError as e: print("Error: %s Please validate JSON in https://www.jsonschemavalidator.net/"% e) return None # or: raise def sendToCloud(self, eventData): cloudData = {"eventData": eventData, "metadata": self._buildMetadata()} logger.info(f"Raising alert with data=({cloudData}") response = self.http.post( self.cloudtest, headers=self._buildHeaders(), json=cloudData ) logger.info(f"cloud alert response={response}") if response.status_code == 202 or response.status_code == 200: logger.info("Mail sent to Cloud") else: raise Exception(f"Cloud reporting failed with Error {response}") def _buildJSONdata(self,Data): if len(Data) == 0: raise Exception("JSON is empty") else: t = json.dumps(self.Data) self.parseJSON(t) def _buildMetadata(self): return { "messageType": "Send Email", "messageVersion": "0.0.1", "sender": "Send Email", } def _buildHeaders(self): self._refreshADToken() headers = { "Authorization": "Bearer {}".format(self._token["accessToken"]), "Content-type": "application/json", "Accept": "text/plain", } return headers def _refreshADToken(self): def shouldRenew(token): """Returns True if the token should be renewed""" expiresOn = datetime.datetime.strptime( token["expiresOn"], "%Y-%m-%d %H:%M:%S.%f" ) now = datetime.datetime.now() return (expiresOn - now) < datetime.timedelta(minutes=5) if not self._token or shouldRenew(self._token): logger.info("Renewing credentials for Alerting") result = None try: context = adal.AuthenticationContext(CloudLink.authority_url) result = context.acquire_token_with_client_credentials(CloudLink.cloudclient, CloudLink.clientid,CloudLink.clientcredentials) except Exception as e: error = "Failed to renew client credentials." logger.info(error) raise if result and "accessToken" in result: self._token = result else: logger.error( "Failed to acquire bearer token. accessToken not found in result object on renewing credentials." ) raise Exception("Could not acquire a bearer token")
No data display after QTableView freeze column?
QTableView has been instantiated, use QSqlTableModel in PYQT5 to read MySQL database, and use setmodel() to display correctly in QTableView. Freeze the first row and first column using FreezeTableView but no data is displayed in the instantiated tableView. It needs show() to display the data in a new window. How can I display the data in the instantiated tableView instead of in a new window? Thanks! Ui_untitled.py class Ui_MainWindow(object): def setupUi(self, MainWindow): MainWindow.setObjectName("MainWindow") MainWindow.resize(978, 828) self.centralwidget = QtWidgets.QWidget(MainWindow) self.centralwidget.setObjectName("centralwidget") self.treeView_1 = QtWidgets.QTreeView(self.centralwidget) self.treeView_1.setEnabled(False) self.treeView_1.setGeometry(QtCore.QRect(10, 11, 191, 771)) self.treeView_1.setObjectName("treeView_1") #Omitted below mainwindown.py from ui_untitled import Ui_MainWindow class MainForm(QMainWindow, Ui_MainWindow): def __init__(self, pareng=None): super(MainForm, self).__init__(pareng) self.setupUi(self) def on_treeView_clicked(self, index): name = index.data() self.tableModel_1 = QSqlTableModel() self.tableModel_1.setTable("name") self.tableModel_1.setEditStrategy(QSqlTableModel.OnRowChange) self.tableModel_1.setFilter(self.tr("name_no = %s"%name)) self.tableModel_1.select() self.tableView_1 = QTableView() #self.tableView_1.setModel(self.tableModel_1) self.tableView_1 = FreezeTableView(self.tableModel_1) #self.tableView_1.show() #No data displayed after cancel class FreezeTableView(QTableView, QAbstractSlider): def __init__(self, model): super(FreezeTableView, self).__init__() self.model = model self.frozenTableView = QTableView(self) self.horizontalView = QTableView(self) self.up = True self.init() def init(self): self.setModel(self.model) #print(self.model.rowCount(), self.model.ColumnCount()) self.frozenTableInit() self.horizontalViewInit() self.horizontalHeader().sectionResized.connect(self.updateSectionWidth) self.verticalHeader().sectionResized.connect(self.updateSectionHeight) self.verticalScrollBar().valueChanged.connect(self.vConnectFV) self.frozenTableView.verticalScrollBar().valueChanged.connect(self.fVConnectV) self.horizontalScrollBar().valueChanged.connect(self.hConnectH) def vConnectFV(self, a0: int): self.viewport().stackUnder(self.frozenTableView) self.frozenTableView.stackUnder(self.horizontalView) self.frozenTableView.verticalScrollBar().setValue(a0) def fVConnectV(self, a0: int): self.viewport().stackUnder(self.frozenTableView) self.frozenTableView.stackUnder(self.horizontalView) self.verticalScrollBar().setValue(a0) def hConnectH(self, a0: int): self.viewport().stackUnder(self.horizontalView) self.horizontalView.stackUnder(self.frozenTableView) self.horizontalView.horizontalScrollBar().setValue(a0) def frozenTableInit(self): self.frozenTableView.setModel(self.model) self.frozenTableView.verticalHeader().hide() self.frozenTableView.setFocusPolicy(Qt.NoFocus) self.frozenTableView.horizontalHeader().setFixedHeight(self.horizontalHeader().height()) self.frozenTableView.horizontalHeader().setSectionResizeMode(QHeaderView.Fixed) self.viewport().stackUnder(self.frozenTableView) self.frozenTableView.setStyleSheet('QTableView {' 'border: none;' 'background-color: #8EDE21;' 'selection-background-color: #999}') self.frozenTableView.setSelectionModel(self.selectionModel()) [self.frozenTableView.setColumnHidden(col, True) for col in range(1, self.model.columnCount())] self.frozenTableView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.frozenTableView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.frozenTableView.show() self.updateFrozenTableGeometry() self.frozenTableView.setVerticalScrollMode(self.ScrollPerPixel) self.setVerticalScrollMode(self.ScrollPerPixel) self.setHorizontalScrollMode(self.ScrollPerPixel) def horizontalViewInit(self): self.horizontalView.setModel(self.model) self.horizontalView.horizontalHeader().hide() self.horizontalView.setFocusPolicy(Qt.NoFocus) self.horizontalView.verticalHeader().setFixedWidth(self.verticalHeader().width()) self.horizontalView.verticalHeader().setSectionResizeMode(QHeaderView.Fixed) self.frozenTableView.stackUnder(self.horizontalView) self.horizontalView.setStyleSheet('QTableView { border: none;' 'background-color: #8EDE21;' 'selection-background-color: #999}') self.horizontalView.setSelectionModel(self.selectionModel()) [self.horizontalView.setRowHidden(row, True) for row in range(1, self.model.rowCount())] self.horizontalView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.horizontalView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.horizontalView.show() self.updateFrozenTableGeometry() self.horizontalView.setHorizontalScrollMode(self.ScrollPerPixel) def updateFrozenTableGeometry(self): self.frozenTableView.setGeometry(self.verticalHeader().width() + self.frameWidth(), self.frameWidth(), self.columnWidth(0), self.viewport().height() + self.horizontalHeader().height()) self.horizontalView.setGeometry(self.frameWidth(), self.frameWidth() + self.horizontalHeader().height(), self.viewport().width() + self.verticalHeader().width(), self.rowHeight(0)) def updateSectionWidth(self, logicalIndex, oldSize, newSize): self.horizontalView.setColumnWidth(logicalIndex, newSize) if not logicalIndex: self.frozenTableView.setColumnWidth(0, newSize) self.updateFrozenTableGeometry() def updateSectionHeight(self, logicalIndex, oldSize, newSize): self.frozenTableView.setRowHeight(logicalIndex, newSize) if not logicalIndex: self.horizontalView.setRowHeight(0, newSize) self.updateFrozenTableGeometry() def resizeEvent(self, e: QtGui.QResizeEvent) -> None: QTableView.resizeEvent(self, e) self.updateFrozenTableGeometry() def scrollTo(self, index: QtCore.QModelIndex, hint: QAbstractItemView.ScrollHint = ...) -> None: print(index.row, index.column, self.model.data(index)) if index.column() > 0 or index.row() > 0: QTableView.scrollTo(self, index, hint) def moveCursor(self, cursorAction: QAbstractItemView.CursorAction, modifiers: typing.Union[QtCore.Qt.KeyboardModifiers, QtCore.Qt.KeyboardModifier]) -> QtCore.QModelIndex: current = QTableView.moveCursor(self, cursorAction, modifiers) if cursorAction == QAbstractItemView.MoveLeft and current.column() > 0 \ and self.visualRect(current).topLeft().x() < self.frozenTableView.columnWidth(0): newValue = self.verticalScrollBar().value() + self.visualRect(current).topLeft().x() \ - self.frozenTableView.columnWidth(0) self.horizontalScrollBar().setValue(newValue) if cursorAction == QAbstractItemView.MoveUp and current.row() > 0 \ and self.visualRect(current).topLeft().y() < self.horizontalView.rowHeight(0): newValue = self.horizontalScrollBar().value() + self.visualRect(current).topLeft().y() \ - self.horizontalView.rowHeight(0) self.verticalScrollBar().setValue(newValue) return current 'from ui_untitled import Ui_MainWindow class MainForm(QMainWindow, Ui_MainWindow): def __init__(self, pareng=None): super(MainForm, self).__init__(pareng) self.setupUi(self) def on_treeView_clicked(self, index): #treeView双击显示Qmodelidx name = index.data() self.tableModel_1 = QSqlTableModel() self.tableModel_1.setTable("name") self.tableModel_1.setEditStrategy(QSqlTableModel.OnRowChange) self.tableModel_1.setFilter(self.tr("name_no = %s"%name)) self.tableModel_1.select() self.tableView_1 = QTableView() #self.tableView_1.setModel(self.tableModel_1) self.tableView_1 = FreezeTableView(self.tableModel_1) #self.tableView_1.show() #No data displayed after cancel class FreezeTableView(QTableView, QAbstractSlider): def __init__(self, model): super(FreezeTableView, self).__init__() self.model = model self.frozenTableView = QTableView(self) self.horizontalView = QTableView(self) self.up = True self.init() def init(self): self.setModel(self.model) #print(self.model.rowCount(), self.model.ColumnCount()) self.frozenTableInit() self.horizontalViewInit() self.horizontalHeader().sectionResized.connect(self.updateSectionWidth) self.verticalHeader().sectionResized.connect(self.updateSectionHeight) self.verticalScrollBar().valueChanged.connect(self.vConnectFV) self.frozenTableView.verticalScrollBar().valueChanged.connect(self.fVConnectV) self.horizontalScrollBar().valueChanged.connect(self.hConnectH) def vConnectFV(self, a0: int): self.viewport().stackUnder(self.frozenTableView) self.frozenTableView.stackUnder(self.horizontalView) self.frozenTableView.verticalScrollBar().setValue(a0) def fVConnectV(self, a0: int): self.viewport().stackUnder(self.frozenTableView) self.frozenTableView.stackUnder(self.horizontalView) self.verticalScrollBar().setValue(a0) def hConnectH(self, a0: int): self.viewport().stackUnder(self.horizontalView) self.horizontalView.stackUnder(self.frozenTableView) self.horizontalView.horizontalScrollBar().setValue(a0) def frozenTableInit(self): self.frozenTableView.setModel(self.model) self.frozenTableView.verticalHeader().hide() self.frozenTableView.setFocusPolicy(Qt.NoFocus) self.frozenTableView.horizontalHeader().setFixedHeight(self.horizontalHeader().height()) self.frozenTableView.horizontalHeader().setSectionResizeMode(QHeaderView.Fixed) self.viewport().stackUnder(self.frozenTableView) self.frozenTableView.setStyleSheet('QTableView {' 'border: none;' 'background-color: #8EDE21;' 'selection-background-color: #999}') self.frozenTableView.setSelectionModel(self.selectionModel()) [self.frozenTableView.setColumnHidden(col, True) for col in range(1, self.model.columnCount())] self.frozenTableView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.frozenTableView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.frozenTableView.show() self.updateFrozenTableGeometry() self.frozenTableView.setVerticalScrollMode(self.ScrollPerPixel) self.setVerticalScrollMode(self.ScrollPerPixel) self.setHorizontalScrollMode(self.ScrollPerPixel) def horizontalViewInit(self): self.horizontalView.setModel(self.model) self.horizontalView.horizontalHeader().hide() self.horizontalView.setFocusPolicy(Qt.NoFocus) self.horizontalView.verticalHeader().setFixedWidth(self.verticalHeader().width()) self.horizontalView.verticalHeader().setSectionResizeMode(QHeaderView.Fixed) self.frozenTableView.stackUnder(self.horizontalView) self.horizontalView.setStyleSheet('QTableView { border: none;' 'background-color: #8EDE21;' 'selection-background-color: #999}') self.horizontalView.setSelectionModel(self.selectionModel()) [self.horizontalView.setRowHidden(row, True) for row in range(1, self.model.rowCount())] self.horizontalView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.horizontalView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.horizontalView.show() self.updateFrozenTableGeometry() self.horizontalView.setHorizontalScrollMode(self.ScrollPerPixel) def updateFrozenTableGeometry(self): self.frozenTableView.setGeometry(self.verticalHeader().width() + self.frameWidth(), self.frameWidth(), self.columnWidth(0), self.viewport().height() + self.horizontalHeader().height()) self.horizontalView.setGeometry(self.frameWidth(), self.frameWidth() + self.horizontalHeader().height(), self.viewport().width() + self.verticalHeader().width(), self.rowHeight(0)) def updateSectionWidth(self, logicalIndex, oldSize, newSize): self.horizontalView.setColumnWidth(logicalIndex, newSize) if not logicalIndex: self.frozenTableView.setColumnWidth(0, newSize) self.updateFrozenTableGeometry() def updateSectionHeight(self, logicalIndex, oldSize, newSize): self.frozenTableView.setRowHeight(logicalIndex, newSize) if not logicalIndex: self.horizontalView.setRowHeight(0, newSize) self.updateFrozenTableGeometry() def resizeEvent(self, e: QtGui.QResizeEvent) -> None: QTableView.resizeEvent(self, e) self.updateFrozenTableGeometry() def scrollTo(self, index: QtCore.QModelIndex, hint: QAbstractItemView.ScrollHint = ...) -> None: print(index.row, index.column, self.model.data(index)) if index.column() > 0 or index.row() > 0: QTableView.scrollTo(self, index, hint) def moveCursor(self, cursorAction: QAbstractItemView.CursorAction, modifiers: typing.Union[QtCore.Qt.KeyboardModifiers, QtCore.Qt.KeyboardModifier]) -> QtCore.QModelIndex: current = QTableView.moveCursor(self, cursorAction, modifiers) if cursorAction == QAbstractItemView.MoveLeft and current.column() > 0 \ and self.visualRect(current).topLeft().x() < self.frozenTableView.columnWidth(0): newValue = self.verticalScrollBar().value() + self.visualRect(current).topLeft().x() \ - self.frozenTableView.columnWidth(0) self.horizontalScrollBar().setValue(newValue) if cursorAction == QAbstractItemView.MoveUp and current.row() > 0 \ and self.visualRect(current).topLeft().y() < self.horizontalView.rowHeight(0): newValue = self.horizontalScrollBar().value() + self.visualRect(current).topLeft().y() \ - self.horizontalView.rowHeight(0) self.verticalScrollBar().setValue(newValue) return current
why is my code skipping my index page?
Whenever I go to the login site (0.0.0.0:5000/), the page immediately reverts to 0.0.0.0:5000/admin. Any ideas on why this is happening? running through yocto on intel edison. editing via putty #app.errorhandler(404) #app.errorhandler(500) def errorpage(e): return render_template('404.html') def login_required(f): #wraps(f) def wrap(*args, **kwargs): if 'logged_in' in session: return f(*args, **kwargs) else: flash('please login first.') return redirect (url_for('index')) return wrap #app.route('/', methods=['GET','POST']) def index(): error = None if request.method == 'POST': if request.form['username'] != 'admin' or request.form['password'] != 'password': error = 'invalid attempt.' else: session['logged_in'] = True return redirect(url_for('admin')) return render_template('index.html', error = error) #app.route('/logout') def logout(): session.pop('logged_in', None) return render_template('/logout.html') #app.route('/admin',methods=['GET','POST']) #login_required def admin():
It looks like as soon as anyone hits the page with a GET, it's set as logged_in = True and redirects to /admin #app.route('/', methods=['GET','POST']) def index(): error = None if request.method == 'POST': if request.form['username'] != 'admin' or request.form['password'] != 'password': error = 'invalid attempt.' else: # this is always triggered for GETs, even if not logged in! session['logged_in'] = True return redirect(url_for('admin')) return render_template('index.html', error = error) Instead, you should be checking that they're already logged in, and set the logged_in property in the POST block instead, after credentials have been checked. Actually, it just looks like your indenting is off on your else block: #app.route('/', methods=['GET','POST']) def index(): error = None if request.method == 'POST': if request.form['username'] != 'admin' or request.form['password'] != 'password': error = 'invalid' else: # this indenting should work now session['logged_in'] = True return redirect(url_for('admin')) return render_template('index.html', error = error)
Serialize Gtk TreeStore / ListStore using JSON
I made a new example which shows much better what I am trying to do. The new example gives the following ouput. Is there a way that the data can go into the respective store key (the {} brackets)? { "copy": [ [ [ 5.0, 8.0, 9.0 ] ], [ [ 4.0, 0.0, 1.0 ] ] ], "name": "dataset1", "sets": [ { "store": {}, "type": "vector" }, { "store": {}, "type": "vector" } ] } New example from gi.repository import Gtk import json import random class Vector(object): def __init__(self, data): self.store = Gtk.ListStore(float, float, float) self.store.append([data[0], data[1], data[2]]) self.type = "vector" def return_data(self): store_data = [] def iterate_over_data(model, path, itr): row = model[path] store_data.append([row[0], row[1], row[2]]) self.store.foreach(iterate_over_data) return store_data class DataSet(object): def __init__(self, name): self.name = name self.sets = [] def add_vector(self): data = [random.randint(0,9) for x in range(3)] self.sets.append(Vector(data)) def to_json(self): self.copy = [] for s in self.sets: self.copy.append(s.return_data()) return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) obj1 = DataSet("dataset1") for x in range(2): obj1.add_vector() print(obj1.to_json()) Old example I am currently figuring out how to serialize a Gtk ListStore that is nested in a Gtk TreeStore. I got a small example to work, but am not sure if this approach will scale for programs that have more data attached (For example the layer object could hold a color or a date of creation). Is there maybe another way to to this? My current approach is to gather the data in list and dictionary form myself and then just create the JSON-dump. I have the feeling that this would be rather difficult to maintain if I need to attach 25 values to each layer-object. from gi.repository import Gtk, Gdk import json import random class LayerTreeView(Gtk.TreeView): def __init__(self, store): Gtk.TreeView.__init__(self, store) renderer = Gtk.CellRendererText() column = Gtk.TreeViewColumn("Name", renderer, text=0) self.append_column(column) class DataTreeView(Gtk.TreeView): def __init__(self, store): Gtk.TreeView.__init__(self, store) self.store = store renderer = Gtk.CellRendererText() column = Gtk.TreeViewColumn("Data", renderer, text=0) self.append_column(column) class MainWindow(Gtk.Window): def __init__(self): Gtk.Window.__init__(self, title="TreeView Serialize") self.connect("delete-event", Gtk.main_quit) self.set_border_width(10) self.set_default_size(400, 300) vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6, expand=True) self.add(vbox) self.clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD) hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6) button = Gtk.Button("Cut") button.connect("clicked", self.on_cut_clicked) hbox.pack_start(button, True, True, 0) button = Gtk.Button(stock=Gtk.STOCK_COPY) button.connect("clicked", self.on_copy_clicked) hbox.pack_start(button, True, True, 0) button = Gtk.Button(stock=Gtk.STOCK_PASTE) button.connect("clicked", self.on_paste_clicked) hbox.pack_start(button, True, True, 0) vbox.add(hbox) self.layer_store = Gtk.TreeStore(str, object, object) self.layer_view = LayerTreeView(self.layer_store) self.layer_sw = Gtk.ScrolledWindow() self.data_sw = Gtk.ScrolledWindow() self.layer_sw.add(self.layer_view) treebox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6, expand=True) treebox.pack_start(self.layer_sw, True, True, 0) treebox.pack_start(self.data_sw, True, True, 0) vbox.add(treebox) self.select = self.layer_view.get_selection() self.select.connect("changed", self.on_selection_changed) self.add_test_data() def add_test_data(self): for x in range(3): data_store = Gtk.ListStore(str) data_view = DataTreeView(data_store) for y in range(5): data_store.append([str(y+x)]) self.layer_store.append(None, ["Data {}".format(x), data_store, data_view]) def on_selection_changed(self, selection): """ When layer is switched load respective data """ model, treeiter = selection.get_selected() if treeiter != None: data_view = model[treeiter][2] child = self.data_sw.get_child() if child != None: self.data_sw.remove(self.data_sw.get_child()) self.data_sw.add(data_view) self.show_all() def on_cut_clicked(self, button): pass def on_copy_clicked(self, button): copy_list = ["safe-to-paste"] data_dict = {} for row in self.layer_store: name = row[0] data_obj = row[1] value_list = [] for datarow in data_obj: value = datarow[0] value_list.append(value) data_dict[name] = value_list copy_list.append(data_dict) data = json.dumps(copy_list) self.clipboard.set_text(data, -1) def on_paste_clicked(self, button): paste_str = self.clipboard.wait_for_text() try: parse = json.loads(paste_str) json_str = True except: json_str = False if json_str is False: return keyword = parse[0] if keyword != "safe-to-paste": return data_dict = parse[1] for x in data_dict: data_list = data_dict[x] data_store = Gtk.ListStore(str) data_view = DataTreeView(data_store) for y in data_list: data_store.append([str(y)]) self.layer_store.append(None, [x, data_store, data_view]) win = MainWindow() win.show_all() Gtk.main()
I have an improved version of your code with dict comprehension and #staticmethod that makes the signal callbacks more readable and shorter. Nevertheless, this does not really solve your problem as it still generates the json manually. If the ListStore gets more complex, it would probably be better to let the DataListStore class generate its own json with a corresponding method. from gi.repository import Gtk, Gdk import json class LayerTreeView(Gtk.TreeView): def __init__(self, store): Gtk.TreeView.__init__(self, store) renderer = Gtk.CellRendererText() column = Gtk.TreeViewColumn("Name", renderer, text=0) self.append_column(column) class DataTreeView(Gtk.TreeView): def __init__(self): Gtk.TreeView.__init__(self) renderer = Gtk.CellRendererText() column = Gtk.TreeViewColumn("Data", renderer, text=0) self.append_column(column) class DataListStore(Gtk.ListStore): #staticmethod def from_json(*args, values=[]): store = DataListStore(*args) for value in values: store.append((value,)) return store class MainWindow(Gtk.Window): def __init__(self): Gtk.Window.__init__(self, title="TreeView Serialize") self.connect("delete-event", Gtk.main_quit) self.set_border_width(10) self.set_default_size(400, 300) vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6, expand=True) self.add(vbox) self.clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD) hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6) button = Gtk.Button("Cut") button.connect("clicked", self.on_cut_clicked) hbox.pack_start(button, True, True, 0) button = Gtk.Button(stock=Gtk.STOCK_COPY) button.connect("clicked", self.on_copy_clicked) hbox.pack_start(button, True, True, 0) button = Gtk.Button(stock=Gtk.STOCK_PASTE) button.connect("clicked", self.on_paste_clicked) hbox.pack_start(button, True, True, 0) vbox.add(hbox) self.layer_store = Gtk.TreeStore(str, object) self.layer_view = LayerTreeView(self.layer_store) self.data_view = DataTreeView() layer_sw = Gtk.ScrolledWindow() layer_sw.add(self.layer_view) data_sw = Gtk.ScrolledWindow() data_sw.add(self.data_view) treebox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6, expand=True) treebox.pack_start(layer_sw, True, True, 0) treebox.pack_start(data_sw, True, True, 0) vbox.add(treebox) select = self.layer_view.get_selection() select.connect("changed", self.on_selection_changed) self.add_test_data() def add_test_data(self): for x in range(3): data_list = [str(y+x) for y in range(5)] self.layer_store.append(None, ["Data {}".format(x), data_list]) def on_selection_changed(self, selection): """ When layer is switched load respective data """ model, treeiter = selection.get_selected() if treeiter != None: self.data_view.set_model( DataListStore.from_json(str, values=model[treeiter][1]) ) def on_cut_clicked(self, button): pass def on_copy_clicked(self, button): copy_list = [ 'safe-to-paste', {row[0]: row[1] for row in self.layer_store}, ] data = json.dumps(copy_list) self.clipboard.set_text(data, -1) def on_paste_clicked(self, button): paste_str = self.clipboard.wait_for_text() try: parse = json.loads(paste_str) except: return if parse[0] != "safe-to-paste": return data_dict = parse[1] for x in data_dict: self.layer_store.append(None, [x, data_dict[x]]) win = MainWindow() win.show_all() Gtk.main()
scrapy unhandled exception
I am using scrapy 0.16.2 version on linux. I'm running: scrapy crawl mycrawlspider -s JOBDIR=/mnt/mycrawlspider I'm getting this error which blocks scrapy (hangs and doesn't finish automatically, only ^C stops it) 2012-11-20 15:04:51+0000 [-] Unhandled Error Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/scrapy/commands/crawl.py", line 45, in run self.crawler.start() File "/usr/lib/python2.7/site-packages/scrapy/crawler.py", line 80, in start reactor.run(installSignalHandlers=False) # blocking call File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1169, in run self.mainLoop() File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1178, in mainLoop self.runUntilCurrent() --- <exception caught here> --- File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 800, in runUntilCurrent call.func(*call.args, **call.kw) File "/usr/lib/python2.7/site-packages/scrapy/utils/reactor.py", line 41, in __call__ return self._func(*self._a, **self._kw) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 116, in _next_request self.crawl(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 172, in crawl self.schedule(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 176, in schedule return self.slots[spider].scheduler.enqueue_request(request) File "/usr/lib/python2.7/site-packages/scrapy/core/scheduler.py", line 48, in enqueue_request if not request.dont_filter and self.df.request_seen(request): exceptions.AttributeError: 'NoneType' object has no attribute 'dont_filter' BTW this worked in version 0.14 Here is the code: class MySpider(CrawlSpider): name = 'alrroya' NEW_IGNORED_EXTENSIONS = list(IGNORED_EXTENSIONS) NEW_IGNORED_EXTENSIONS.remove('pdf') download_delay = 0.05 # Stay within these domains when crawling allowed_domains = [] all_domains = {} start_urls = [] # Add our callback which will be called for every found link rules = [ Rule(SgmlLinkExtractor(deny_extensions=NEW_IGNORED_EXTENSIONS, tags=('a', 'area', 'frame', 'iframe'), attrs=('href', 'src')), follow=True, callback='parse_crawled_page') ] # How many pages crawled crawl_count = 0 # How many PDFs we have found pdf_count = 0 def __init__(self, *args, **kwargs): CrawlSpider.__init__(self, *args, **kwargs) dispatcher.connect(self._spider_closed, signals.spider_closed) dispatcher.connect(self._spider_opened, signals.spider_opened) self.load_allowed_domains_and_start_urls() def allowed_to_start(self): curr_date = datetime.today() curr_date = datetime(curr_date.year, curr_date.month, curr_date.day) jobdir = self.settings['JOBDIR'] if jobdir: mnt = os.path.dirname(os.path.normpath(jobdir)) else: mnt = '' checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name) day = timedelta(days=1) if os.path.exists(checkfile): f = open(checkfile, 'r') data = f.read() f.close() data = data.split('\n') reason = data[0] try: reason_date = datetime.strptime(data[1], '%Y-%m-%d') except Exception as ex: reason_date = None if reason_date and 'shutdown' in reason: reason = True else: if reason_date and reason_date + day <= curr_date and 'finished' in reason: reason = True else: reason = False else: reason = True return reason def _spider_opened(self, spider): if spider is not self: return curr_date = datetime.today() curr_date = datetime(curr_date.year, curr_date.month, curr_date.day) jobdir = spider.settings['JOBDIR'] if jobdir: mnt = os.path.dirname(os.path.normpath(jobdir)) else: mnt = '' checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name) day = timedelta(days=1) if os.path.exists(checkfile): f = open(checkfile, 'r') data = f.read() f.close() data = data.split('\n') reason = data[0] try: reason_date = datetime.strptime(data[1], '%Y-%m-%d') except Exception as ex: reason_date = None if reason_date and 'shutdown' in reason: f = open(checkfile, 'w') f.write('started\n') f.write(str(date.today())) f.close() else: if reason_date and reason_date + day <= curr_date and 'finished' in reason: f = open(checkfile, 'w') f.write('started\n') f.write(str(date.today())) f.close() else: crawler.engine.close_spider(self, 'finished') if jobdir and os.path.exists(jobdir): shutil.rmtree(jobdir) f = open(checkfile, 'w') f.write('finished\n') f.write(str(date.today())) f.close() os._exit(1) else: f = open(checkfile, 'w') f.write('started\n') f.write(str(date.today())) f.close() def _spider_closed(self, spider, reason): if spider is not self: return jobdir = spider.settings['JOBDIR'] if jobdir: mnt = os.path.dirname(os.path.normpath(jobdir)) else: mnt = '' checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name) if 'shutdown' in reason: f = open(checkfile, 'w') f.write('shutdown\n') f.write(str(date.today())) f.close() else: if jobdir and os.path.exists(jobdir): shutil.rmtree(jobdir) f = open(checkfile, 'w') f.write('finished\n') f.write(str(date.today())) f.close() def _requests_to_follow(self, response): if getattr(response, 'encoding', None) != None: return CrawlSpider._requests_to_follow(self, response) else: return [] def make_requests_from_url(self, url): http_client = httplib2.Http() try: headers = { 'content-type': 'text/html', 'user-agent': random.choice(USER_AGENT_LIST) } response, content = http_client.request(url, method='HEAD', headers=headers) #~ if 'pdf' in response['content-type'].lower() or (url.endswith('.pdf') and 'octet-stream' in response['content-type'].lower()): if 'pdf' in response['content-type'].lower() or 'octet-stream' in response['content-type'].lower(): if self.allowed_to_start(): self.get_pdf_link(url) else: return CrawlSpider.make_requests_from_url(self, url) except Exception as ex: return CrawlSpider.make_requests_from_url(self, url) def get_pdf_link(self, url): source = self.__class__.name parsed_url = urlparse(url) url_domain = parsed_url.netloc url_path = parsed_url.path if url_domain: for domain, paths in self.__class__.all_domains[source]['allow_domains'].iteritems(): if url_domain.endswith(domain): pre_and = False pre_or = False and_cond = True or_cond = False for path in paths: if path[0:1] == '!': pre_and = True if path[1:] not in url_path: and_cond = and_cond and True else: and_cond = and_cond and False else: pre_or = True if path in url_path: or_cond = or_cond or True else: or_cond = or_cond or False if pre_and and pre_or: if and_cond and or_cond: self.pdf_process(source, url) return elif pre_and: if and_cond: self.pdf_process(source, url) return elif pre_or: if or_cond: self.pdf_process(source, url) return else: self.pdf_process(source, url) return def parse_crawled_page(self, response): self.__class__.crawl_count += 1 crawl_count = self.__class__.crawl_count if crawl_count % 100 == 0: print 'Crawled %d pages' % crawl_count if 'pdf' in response.headers.get('content-type', '').lower(): self.get_pdf_link(response.url) return Item() def load_allowed_domains_and_start_urls(self): day = timedelta(days=1) currdate = date.today() alrroya = ('http://epaper.alrroya.com/currentissues.php?editiondt=' + currdate.strftime('%Y/%m/%d'),) self.__class__.all_domains = { 'alrroya': { 'start_urls': alrroya, 'allow_domains': { 'epaper.alrroya.com': frozenset(()), } } } for domain in self.__class__.all_domains[self.__class__.name]['allow_domains']: self.__class__.allowed_domains.append(domain) self.__class__.start_urls.extend(self.__class__.all_domains[self.__class__.name]['start_urls']) def pdf_process(self, source, url): print '!!! ' + source + ' ' + url
This appears to be a bug in Scrapy. The current version doesn't seem to accept lists returned from make_requests_from_url(). I was able to modify the Scrapy code in the following way to work around the issue. In the file Scrapy-0.16.5-py2.7.egg/scrapy/spider.py Change: def start_requests(self): for url in self.start_urls: yield self.make_requests_from_url(url) To: def start_requests(self): for url in self.start_urls: requests = self.make_requests_from_url(url) if type(requests) is list: for request in requests: yield request else: yield requests I expect that the official Scrapy people will fix this eventually.