How to extract the subject, verb, object and their relationship of a sentence? - extract

My purpose is exactly the same as that in the following post:
How to extract subjects in a sentence and their respective dependent phrases?
Therefore, I used the code provided by the original author of this post:
SUBJECTS = ["nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl"]
OBJECTS = ["dobj", "dative", "attr", "oprd"]
ADJECTIVES = ["acomp", "advcl", "advmod", "amod", "appos", "nn", "nmod", "ccomp", "complm", "hmod", "infmod", "xcomp",
"rcmod", "poss", " possessive"]
COMPOUNDS = ["compound"]
PREPOSITIONS = ["prep"]
def getSubsFromConjunctions(subs):
moreSubs = []
for sub in subs:
# rights is a generator
rights = list(sub.rights)
rightDeps = {tok.lower_ for tok in rights}
if "and" in rightDeps:
moreSubs.extend([tok for tok in rights if tok.dep_ in SUBJECTS or tok.pos_ == "NOUN"])
if len(moreSubs) > 0:
moreSubs.extend(getSubsFromConjunctions(moreSubs))
return moreSubs
def getObjsFromConjunctions(objs):
moreObjs = []
for obj in objs:
# rights is a generator
rights = list(obj.rights)
rightDeps = {tok.lower_ for tok in rights}
if "and" in rightDeps:
moreObjs.extend([tok for tok in rights if tok.dep_ in OBJECTS or tok.pos_ == "NOUN"])
if len(moreObjs) > 0:
moreObjs.extend(getObjsFromConjunctions(moreObjs))
return moreObjs
def getVerbsFromConjunctions(verbs):
moreVerbs = []
for verb in verbs:
rightDeps = {tok.lower_ for tok in verb.rights}
if "and" in rightDeps:
moreVerbs.extend([tok for tok in verb.rights if tok.pos_ == "VERB"])
if len(moreVerbs) > 0:
moreVerbs.extend(getVerbsFromConjunctions(moreVerbs))
return moreVerbs
def findSubs(tok):
head = tok.head
while head.pos_ != "VERB" and head.pos_ != "NOUN" and head.head != head:
head = head.head
if head.pos_ == "VERB":
subs = [tok for tok in head.lefts if tok.dep_ == "SUB"]
if len(subs) > 0:
verbNegated = isNegated(head)
subs.extend(getSubsFromConjunctions(subs))
return subs, verbNegated
elif head.head != head:
return findSubs(head)
elif head.pos_ == "NOUN":
return [head], isNegated(tok)
return [], False
def isNegated(tok):
negations = {"no", "not", "n't", "never", "none"}
for dep in list(tok.lefts) + list(tok.rights):
if dep.lower_ in negations:
return True
return False
def findSVs(tokens):
svs = []
verbs = [tok for tok in tokens if tok.pos_ == "VERB"]
for v in verbs:
subs, verbNegated = getAllSubs(v)
if len(subs) > 0:
for sub in subs:
svs.append((sub.orth_, "!" + v.orth_ if verbNegated else v.orth_))
return svs
def getObjsFromPrepositions(deps):
objs = []
for dep in deps:
if dep.pos_ == "ADP" and dep.dep_ == "prep":
objs.extend(
[tok for tok in dep.rights if tok.dep_ in OBJECTS or (tok.pos_ == "PRON" and tok.lower_ == "me")])
return objs
def getAdjectives(toks):
toks_with_adjectives = []
for tok in toks:
adjs = [left for left in tok.lefts if left.dep_ in ADJECTIVES]
adjs.append(tok)
adjs.extend([right for right in tok.rights if tok.dep_ in ADJECTIVES])
tok_with_adj = " ".join([adj.lower_ for adj in adjs])
toks_with_adjectives.extend(adjs)
return toks_with_adjectives
def getObjsFromAttrs(deps):
for dep in deps:
if dep.pos_ == "NOUN" and dep.dep_ == "attr":
verbs = [tok for tok in dep.rights if tok.pos_ == "VERB"]
if len(verbs) > 0:
for v in verbs:
rights = list(v.rights)
objs = [tok for tok in rights if tok.dep_ in OBJECTS]
objs.extend(getObjsFromPrepositions(rights))
if len(objs) > 0:
return v, objs
return None, None
def getObjFromXComp(deps):
for dep in deps:
if dep.pos_ == "VERB" and dep.dep_ == "xcomp":
v = dep
rights = list(v.rights)
objs = [tok for tok in rights if tok.dep_ in OBJECTS]
objs.extend(getObjsFromPrepositions(rights))
if len(objs) > 0:
return v, objs
return None, None
def getAllSubs(v):
verbNegated = isNegated(v)
subs = [tok for tok in v.lefts if tok.dep_ in SUBJECTS and tok.pos_ != "DET"]
if len(subs) > 0:
subs.extend(getSubsFromConjunctions(subs))
else:
foundSubs, verbNegated = findSubs(v)
subs.extend(foundSubs)
return subs, verbNegated
def getAllObjs(v):
# rights is a generator
rights = list(v.rights)
objs = [tok for tok in rights if tok.dep_ in OBJECTS]
objs.extend(getObjsFromPrepositions(rights))
potentialNewVerb, potentialNewObjs = getObjFromXComp(rights)
if potentialNewVerb is not None and potentialNewObjs is not None and len(potentialNewObjs) > 0:
objs.extend(potentialNewObjs)
v = potentialNewVerb
if len(objs) > 0:
objs.extend(getObjsFromConjunctions(objs))
return v, objs
def getAllObjsWithAdjectives(v):
# rights is a generator
rights = list(v.rights)
objs = [tok for tok in rights if tok.dep_ in OBJECTS]
if len(objs) == 0:
objs = [tok for tok in rights if tok.dep_ in ADJECTIVES]
objs.extend(getObjsFromPrepositions(rights))
potentialNewVerb, potentialNewObjs = getObjFromXComp(rights)
if potentialNewVerb is not None and potentialNewObjs is not None and len(potentialNewObjs) > 0:
objs.extend(potentialNewObjs)
v = potentialNewVerb
if len(objs) > 0:
objs.extend(getObjsFromConjunctions(objs))
return v, objs
def findSVAOs(tokens):
svos = []
verbs = [tok for tok in tokens if tok.pos_ == "VERB" or tok.dep_ != "aux"]
for v in verbs:
subs, verbNegated = getAllSubs(v)
# hopefully there are subs, if not, don't examine this verb any longer
if len(subs) > 0:
v, objs = getAllObjsWithAdjectives(v)
for sub in subs:
for obj in objs:
objNegated = isNegated(obj)
obj_desc_tokens = generate_left_right_adjectives(obj)
sub_compound = generate_sub_compound(sub)
svos.append((" ".join(tok.lower_ for tok in sub_compound),
"!" + v.lower_ if verbNegated or objNegated else v.lower_,
" ".join(tok.lower_ for tok in obj_desc_tokens)))
return svos
def generate_sub_compound(sub):
sub_compunds = []
for tok in sub.lefts:
if tok.dep_ in COMPOUNDS:
sub_compunds.extend(generate_sub_compound(tok))
sub_compunds.append(sub)
for tok in sub.rights:
if tok.dep_ in COMPOUNDS:
sub_compunds.extend(generate_sub_compound(tok))
return sub_compunds
def generate_left_right_adjectives(obj):
obj_desc_tokens = []
for tok in obj.lefts:
if tok.dep_ in ADJECTIVES:
obj_desc_tokens.extend(generate_left_right_adjectives(tok))
obj_desc_tokens.append(obj)
for tok in obj.rights:
if tok.dep_ in ADJECTIVES:
obj_desc_tokens.extend(generate_left_right_adjectives(tok))
return obj_desc_tokens
nlp = spacy.load('en_core_web_md')
sentence = "Lung cancer causes huge mortality to population, and pharmaceutical companies require new drugs as an alternative either synthetic or natural targeting lung cancer. This review highlights the inextricable role of G. lucidum and its bioconstituents in lung cancer signaling for the first time."
doc = nlp(sentence)
print(findSVAOs(doc))
But because the version used is different, I made two necessary changes according to the prompts in the comments in the original post.
Change the following code:
from spacy.lang.en import English
parser = English()
parse = parser(sentence)
print(findSVAOs(parse))
To:
import spacy
nlp = spacy.load('en_core_web_md')
doc = nlp(sentence)
print(findSVAOs(doc))
Change the code in findSVAOs:
verbs = [tok for tok in tokens if tok.pos_ == "VERB" and tok.dep_ != "aux"]
To:
verbs = [tok for tok in tokens if tok.pos_ == "VERB" or tok.dep_ != "aux"]
When:
sentence = "Lung cancer causes huge mortality to population, and pharmaceutical companies require new drugs as an alternative either synthetic or natural targeting lung cancer. This review highlights the inextricable role of G. lucidum and its bioconstituents in lung cancer signaling for the first time."
Output:
[('lung cancer', 'causes', 'huge mortality'), ('companies', 'require', 'new drugs'), ('review', 'highlights', 'inextricable role')]
This is an acceptable result.
But since all the documents I want to analyze are biomedical documents, I hope to use en_core_sci_md to replace en_core_web_md. That is:
nlp = spacy.load('en_core_sci_md')
However, after I made this replacement, something strange happened.
Now, the output is:
[('lung cancer', 'causes', 'huge mortality population'), ('lung cancer', 'causes', 'population'), ('companies', 'require', 'new drugs'), ('review', 'highlights', 'inextricable role lucidum'), ('role', 'bioconstituents', 'signaling first time'), ('bioconstituents', 'signaling', 'first time')]
In some ways, the result is indeed better. For example:
('lung cancer', 'causes', 'huge mortality population') compared with the previous ('lung cancer', 'causes', 'huge mortality').
However, it obviously outputs some redundant items. For example:
('lung cancer', 'causes', 'huge mortality population') and ('lung cancer', 'causes', 'population')
The latter is obviously redundant.
Another example:
('role', 'bioconstituents', 'signaling first time') and ('bioconstituents', 'signaling', 'first time')
The latter is obviously redundant.
How can I remove these redundant items when using en_core_sci_md?

This change does not make sense, the result is going to be verbs plus any other non-aux tokens:
Change the code in findSVAOs:
verbs = [tok for tok in tokens if tok.pos_ == "VERB" and tok.dep_ != "aux"]
To:
verbs = [tok for tok in tokens if tok.pos_ == "VERB" or tok.dep_ != "aux"]

Related

Python 2's webbrowser is not working in repl.it

I am using repl.it, and also the webbrowser module to open a link using webbrowser.open(<link to open>)
But, my link doesn't work. Can you help me with this?
Here is my code:
import webbrowser
webbrowser.open("https://www.daffodilday.com.au/get-involved/register-your-school/", 2)
And a not-so-quick overview of the webbrowser module:
import os, sys, shlex, stat, subprocess, time
__all__ = ["Error", "open", "open_new", "open_new_tab", "get", "register"]
class Error(Exception):
pass
_browsers = {}
_tryorder = []
def register(name, klass, instance=None, update_tryorder=1):
_browsers[name.lower()] = [klass, instance]
if update_tryorder > 0:
_tryorder.append(name)
elif update_tryorder < 0:
_tryorder.insert(0, name)
def get(using=None):
if using is not None:
alternatives = [using]
else:
alternatives = _tryorder
for browser in alternatives:
if '%s' in browser:
browser = shlex.split(browser)
if browser[-1] == '&':
return BackgroundBrowser(browser[:-1])
else:
return GenericBrowser(browser)
else:
try:
command = _browsers[browser.lower()]
except KeyError:
command = _synthesize(browser)
if command[1] is not None:
return command[1]
elif command[0] is not None:
return command[0]()
raise Error("could not locate runnable browser")
def open(url, new=0, autoraise=True):
for name in _tryorder:
browser = get(name)
if browser.open(url, new, autoraise):
return True
return False
def open_new(url):
return open(url, 1)
def open_new_tab(url):
return open(url, 2)
def _synthesize(browser, update_tryorder=1):
cmd = browser.split()[0]
if not _iscommand(cmd):
return [None, None]
name = os.path.basename(cmd)
try:
command = _browsers[name.lower()]
except KeyError:
return [None, None]
# now attempt to clone to fit the new name:
controller = command[1]
if controller and name.lower() == controller.basename:
import copy
controller = copy.copy(controller)
controller.name = browser
controller.basename = os.path.basename(browser)
register(browser, None, controller, update_tryorder)
return [None, controller]
return [None, None]
if sys.platform[:3] == "win":
def _isexecutable(cmd):
cmd = cmd.lower()
if os.path.isfile(cmd) and cmd.endswith((".exe", ".bat")):
return True
for ext in ".exe", ".bat":
if os.path.isfile(cmd + ext):
return True
return False
else:
def _isexecutable(cmd):
if os.path.isfile(cmd):
mode = os.stat(cmd)[stat.ST_MODE]
if mode & stat.S_IXUSR or mode & stat.S_IXGRP or mode & stat.S_IXOTH:
return True
return False
def _iscommand(cmd):
if _isexecutable(cmd):
return True
path = os.environ.get("PATH")
if not path:
return False
for d in path.split(os.pathsep):
exe = os.path.join(d, cmd)
if _isexecutable(exe):
return True
return False
class BaseBrowser(object):
"""Parent class for all browsers. Do not use directly."""
args = ['%s']
def __init__(self, name=""):
self.name = name
self.basename = name
def open(self, url, new=0, autoraise=True):
raise NotImplementedError
def open_new(self, url):
return self.open(url, 1)
def open_new_tab(self, url):
return self.open(url, 2)
class GenericBrowser(BaseBrowser):
def __init__(self, name):
if isinstance(name, basestring):
self.name = name
self.args = ["%s"]
else:
self.name = name[0]
self.args = name[1:]
self.basename = os.path.basename(self.name)
def open(self, url, new=0, autoraise=True):
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
if sys.platform[:3] == 'win':
p = subprocess.Popen(cmdline)
else:
p = subprocess.Popen(cmdline, close_fds=True)
return not p.wait()
except OSError:
return False
class BackgroundBrowser(GenericBrowser):
def open(self, url, new=0, autoraise=True):
cmdline = [self.name] + [arg.replace("%s", url)
for arg in self.args]
try:
if sys.platform[:3] == 'win':
p = subprocess.Popen(cmdline)
else:
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
p = subprocess.Popen(cmdline, close_fds=True, preexec_fn=setsid)
return (p.poll() is None)
except OSError:
return False
class UnixBrowser(BaseBrowser):
raise_opts = None
remote_args = ['%action', '%s']
remote_action = None
remote_action_newwin = None
remote_action_newtab = None
background = False
redirect_stdout = True
def _invoke(self, args, remote, autoraise):
raise_opt = []
if remote and self.raise_opts:
# use autoraise argument only for remote invocation
autoraise = int(autoraise)
opt = self.raise_opts[autoraise]
if opt: raise_opt = [opt]
cmdline = [self.name] + raise_opt + args
if remote or self.background:
inout = file(os.devnull, "r+")
else:
inout = None
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
p = subprocess.Popen(cmdline, close_fds=True, stdin=inout,
stdout=(self.redirect_stdout and inout or None),
stderr=inout, preexec_fn=setsid)
if remote:
time.sleep(1)
rc = p.poll()
if rc is None:
time.sleep(4)
rc = p.poll()
if rc is None:
return True
return not rc
elif self.background:
if p.poll() is None:
return True
else:
return False
else:
return not p.wait()
def open(self, url, new=0, autoraise=True):
if new == 0:
action = self.remote_action
elif new == 1:
action = self.remote_action_newwin
elif new == 2:
if self.remote_action_newtab is None:
action = self.remote_action_newwin
else:
action = self.remote_action_newtab
else:
raise Error("Bad 'new' parameter to open(); " +
"expected 0, 1, or 2, got %s" % new)
args = [arg.replace("%s", url).replace("%action", action)
for arg in self.remote_args]
success = self._invoke(args, True, autoraise)
if not success:
args = [arg.replace("%s", url) for arg in self.args]
return self._invoke(args, False, False)
else:
return True
class Mozilla(UnixBrowser):
raise_opts = ["-noraise", "-raise"]
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-tab"
background = True
Netscape = Mozilla
class Galeon(UnixBrowser):
raise_opts = ["-noraise", ""]
remote_args = ['%action', '%s']
remote_action = "-n"
remote_action_newwin = "-w"
background = True
class Chrome(UnixBrowser):
remote_args = ['%action', '%s']
remote_action = ""
remote_action_newwin = "--new-window"
remote_action_newtab = ""
background = True
Chromium = Chrome
class Opera(UnixBrowser):
raise_opts = ["-noraise", ""]
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-page"
background = True
class Elinks(UnixBrowser):
remote_args = ['-remote', 'openURL(%s%action)']
remote_action = ""
remote_action_newwin = ",new-window"
remote_action_newtab = ",new-tab"
background = False
redirect_stdout = False
class Konqueror(BaseBrowser):
def open(self, url, new=0, autoraise=True):
if new == 2:
action = "newTab"
else:
action = "openURL"
devnull = file(os.devnull, "r+")
setsid = getattr(os, 'setsid', None)
if not setsid:
setsid = getattr(os, 'setpgrp', None)
try:
p = subprocess.Popen(["kfmclient", action, url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull)
except OSError:
pass
else:
p.wait()
return True
try:
p = subprocess.Popen(["konqueror", "--silent", url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull,
preexec_fn=setsid)
except OSError:
pass
else:
if p.poll() is None:
return True
try:
p = subprocess.Popen(["kfm", "-d", url],
close_fds=True, stdin=devnull,
stdout=devnull, stderr=devnull,
preexec_fn=setsid)
except OSError:
return False
else:
return (p.poll() is None)
class Grail(BaseBrowser):
def _find_grail_rc(self):
import glob
import pwd
import socket
import tempfile
tempdir = os.path.join(tempfile.gettempdir(),
".grail-unix")
user = pwd.getpwuid(os.getuid())[0]
filename = os.path.join(tempdir, user + "-*")
maybes = glob.glob(filename)
if not maybes:
return None
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
for fn in maybes:
# need to PING each one until we find one that's live
try:
s.connect(fn)
except socket.error:
# no good; attempt to clean it out, but don't fail:
try:
os.unlink(fn)
except IOError:
pass
else:
return s
def _remote(self, action):
s = self._find_grail_rc()
if not s:
return 0
s.send(action)
s.close()
return 1
def open(self, url, new=0, autoraise=True):
if new:
ok = self._remote("LOADNEW " + url)
else:
ok = self._remote("LOAD " + url)
return ok
def register_X_browsers():
if _iscommand("xdg-open"):
register("xdg-open", None, BackgroundBrowser("xdg-open"))
if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gvfs-open"):
register("gvfs-open", None, BackgroundBrowser("gvfs-open"))
if "GNOME_DESKTOP_SESSION_ID" in os.environ and _iscommand("gnome-open"):
register("gnome-open", None, BackgroundBrowser("gnome-open"))
if "KDE_FULL_SESSION" in os.environ and _iscommand("kfmclient"):
register("kfmclient", Konqueror, Konqueror("kfmclient"))
if _iscommand("x-www-browser"):
register("x-www-browser", None, BackgroundBrowser("x-www-browser"))
for browser in ("mozilla-firefox", "firefox",
"mozilla-firebird", "firebird",
"iceweasel", "iceape",
"seamonkey", "mozilla", "netscape"):
if _iscommand(browser):
register(browser, None, Mozilla(browser))
if _iscommand("kfm"):
register("kfm", Konqueror, Konqueror("kfm"))
elif _iscommand("konqueror"):
register("konqueror", Konqueror, Konqueror("konqueror"))
for browser in ("galeon", "epiphany"):
if _iscommand(browser):
register(browser, None, Galeon(browser))
if _iscommand("skipstone"):
register("skipstone", None, BackgroundBrowser("skipstone"))
for browser in ("google-chrome", "chrome", "chromium", "chromium-browser"):
if _iscommand(browser):
register(browser, None, Chrome(browser))
if _iscommand("opera"):
register("opera", None, Opera("opera")).
if _iscommand("mosaic"):
register("mosaic", None, BackgroundBrowser("mosaic")
if _iscommand("grail"):
register("grail", Grail, None)
if os.environ.get("DISPLAY"):
register_X_browsers()
if os.environ.get("TERM"):
if _iscommand("www-browser"):
register("www-browser", None, GenericBrowser("www-browser"))
if _iscommand("links"):
register("links", None, GenericBrowser("links"))
if _iscommand("elinks"):
register("elinks", None, Elinks("elinks"))
if _iscommand("lynx"):
register("lynx", None, GenericBrowser("lynx"))
if _iscommand("w3m"):
register("w3m", None, GenericBrowser("w3m"))
if sys.platform[:3] == "win":
class WindowsDefault(BaseBrowser):
def open(self, url, new=0, autoraise=True):
try:
os.startfile(url)
except WindowsError:
return False
else:
return True
_tryorder = []
_browsers = {}
register("windows-default", WindowsDefault)
iexplore = os.path.join(os.environ.get("PROGRAMFILES", "C:\\Program Files"),
"Internet Explorer\\IEXPLORE.EXE")
for browser in ("firefox", "firebird", "seamonkey", "mozilla",
"netscape", "opera", iexplore):
if _iscommand(browser):
register(browser, None, BackgroundBrowser(browser))
if sys.platform == 'darwin':
class MacOSX(BaseBrowser):
def __init__(self, name):
self.name = name
def open(self, url, new=0, autoraise=True):
assert "'" not in url
if not ':' in url:
url = 'file:'+url
new = int(bool(new))
if self.name == "default":
script = 'open location "%s"' % url.replace('"', '%22')
else:
if self.name == "OmniWeb":
toWindow = ""
else:
toWindow = "toWindow %d" % (new - 1)
cmd = 'OpenURL "%s"' % url.replace('"', '%22')
script = '''tell application "%s"
activate
%s %s
end tell''' % (self.name, cmd, toWindow)
osapipe = os.popen("osascript", "w")
if osapipe is None:
return False
osapipe.write(script)
rc = osapipe.close()
return not rc
class MacOSXOSAScript(BaseBrowser):
def __init__(self, name):
self._name = name
def open(self, url, new=0, autoraise=True):
if self._name == 'default':
script = 'open location "%s"' % url.replace('"', '%22')
script = '''
tell application "%s"
activate
open location "%s"
end
'''%(self._name, url.replace('"', '%22'))
osapipe = os.popen("osascript", "w")
if osapipe is None:
return False
osapipe.write(script)
rc = osapipe.close()
return not rc
register("safari", None, MacOSXOSAScript('safari'), -1)
register("firefox", None, MacOSXOSAScript('firefox'), -1)
register("MacOSX", None, MacOSXOSAScript('default'), -1)
if sys.platform[:3] == "os2" and _iscommand("netscape"):
_tryorder = []
_browsers = {}
register("os2netscape", None,
GenericBrowser(["start", "netscape", "%s"]), -1)
if "BROWSER" in os.environ:
_userchoices = os.environ["BROWSER"].split(os.pathsep)
_userchoices.reverse()
for cmdline in _userchoices:
if cmdline != '':
cmd = _synthesize(cmdline, -1)
if cmd[1] is None:
register(cmdline, None, GenericBrowser(cmdline), -1)
cmdline = None
del cmdline
del _userchoices
def main():
import getopt
usage = """Usage: %s [-n | -t] url
-n: open new window
-t: open new tab""" % sys.argv[0]
try:
opts, args = getopt.getopt(sys.argv[1:], 'ntd')
except getopt.error, msg:
print >>sys.stderr, msg
print >>sys.stderr, usage
sys.exit(1)
new_win = 0
for o, a in opts:
if o == '-n': new_win = 1
elif o == '-t': new_win = 2
if len(args) != 1:
print >>sys.stderr, usage
sys.exit(1)
url = args[0]
open(url, new_win)
print "\a"
if __name__ == "__main__":
main()
Can you help me? I mostly know about everything in Python, but still sometimes need quite quaintly a lot of effort.
You can simplify the answer like this:
import webbrowser
webbrowser.open(*website*)
Of course, replace * website * with the site and no need to write the 2.
Then tell me if it works.
And by the way, some online python processor like pythonanywhere do not allow opening all websites.
Instead, they only allow some specific websites.

TypeError: string indices must be integers

Hi i have a problem with my code that i get a error in a loop that works for a few times but then throws me a typeerro: string indices must be integers.
I want to call an api to get a json back and get some parts of the json response. heres the code:
class API(object):
def __init__(self, api_key):
self.api_key = api_key
def _request(self, api_url, params={}):
args = {'api_key': self.api_key}
for key, value in params.items():
if key not in args:
args[key] = value
response = requests.get(
Consts.URL['base'].format(
url=api_url
),
params=args
)
if response.status_code == requests.codes.ok:
return response.json()
else:
return "not possible"
print(response.url)
def get_list(self):
excel = EXCEL('s6.xlsx')
api_url = Consts.URL['list'].format(
version = Consts.API_VERSIONS['matchversion'],
start = excel.get_gamenr()
)
return self._request(api_url)
def get_match(self, matchid):
idlist = matchid
api_url = Consts.URL['match'].format(
version = Consts.API_VERSIONS['matchversion'],
matchId = idlist
)
return self._request(api_url)
def match_ids(self):
api = API('c6ea2f68-7ed6-40fa-9b99-fd591c55c05f')
x = api.get_list()
y = x['matches']
count = len(y)
ids = []
while count > 0:
count = count - 1
temp = y[0]
ids.append(temp['matchId'])
del y[0]
return ids
def match_info(self):
matchids = self.match_ids()
print(matchids)
matchinfolist = {}
counter = 1
for gameids in matchids:
info = self.get_match(gameids)
myid = self.find_partid(info['participantIdentities'])
prepdstats = info['participants'][myid-1]
print(prepdstats)
matchinfolist['stats' + str(counter)] = prepdstats
return matchinfolist
def find_partid(self, partlist):
partid = 0
idlist = partlist
while partid < 10:
partid = partid + 1
tempplayer = idlist[0]['player']
if tempplayer['summonerId'] == 19204660:
playernr = partid
partid = 500
del idlist[0]
return playernr
when i run the match_info() function i get this error
Traceback (most recent call last):
File "C:\Users\Niklas\Desktop\python riot\main.py", line 17, in <module>
main()
File "C:\Users\Niklas\Desktop\python riot\main.py", line 10, in main
print(api.match_info())
File "C:\Users\Niklas\Desktop\python riot\api.py", line 78, in match_info
myid = self.find_partid(info['participantIdentities'])
TypeError: string indices must be integers
but only after the loop in the function has run for a few times. I have no idea what im doing wrong. Any help would be nice.
Here is a link to the json: https://euw.api.pvp.net/api/lol/euw/v2.2/match/2492271473?api_key=c6ea2f68-7ed6-40fa-9b99-fd591c55c05f
The error shows up on
myid = self.find_partid(info['participantIdentities'])
For this line to execute, info must be a mapping with string keys, not a string itself. info is
info = self.get_match(gameids)
get_match ends with
return self._request(api_url)
_request ends with
if response.status_code == requests.codes.ok:
return response.json()
else:
return "not possible"
For the loop to ever run, response.json() must be a dict with key 'participantIdentities'. Your bug is expecting that to always be true.
One fix might be to make the expectation always ture. If there is a satisfactory default value, return {'participantIdentities': <default value>}. Otherwise, return None and change the loop to
info = self.get_match(gameids)
if info is not None:
# as before
else:
# whatever default action you want

Serialize Gtk TreeStore / ListStore using JSON

I made a new example which shows much better what I am trying to do. The new example gives the following ouput. Is there a way that the data can go into the respective store key (the {} brackets)?
{
"copy": [
[
[
5.0,
8.0,
9.0
]
],
[
[
4.0,
0.0,
1.0
]
]
],
"name": "dataset1",
"sets": [
{
"store": {},
"type": "vector"
},
{
"store": {},
"type": "vector"
}
]
}
New example
from gi.repository import Gtk
import json
import random
class Vector(object):
def __init__(self, data):
self.store = Gtk.ListStore(float, float, float)
self.store.append([data[0], data[1], data[2]])
self.type = "vector"
def return_data(self):
store_data = []
def iterate_over_data(model, path, itr):
row = model[path]
store_data.append([row[0], row[1], row[2]])
self.store.foreach(iterate_over_data)
return store_data
class DataSet(object):
def __init__(self, name):
self.name = name
self.sets = []
def add_vector(self):
data = [random.randint(0,9) for x in range(3)]
self.sets.append(Vector(data))
def to_json(self):
self.copy = []
for s in self.sets:
self.copy.append(s.return_data())
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
obj1 = DataSet("dataset1")
for x in range(2):
obj1.add_vector()
print(obj1.to_json())
Old example
I am currently figuring out how to serialize a Gtk ListStore that is nested in a Gtk TreeStore. I got a small example to work, but am not sure if this approach will scale for programs that have more data attached (For example the layer object could hold a color or a date of creation). Is there maybe another way to to this?
My current approach is to gather the data in list and dictionary form myself and then just create the JSON-dump. I have the feeling that this would be rather difficult to maintain if I need to attach 25 values to each layer-object.
from gi.repository import Gtk, Gdk
import json
import random
class LayerTreeView(Gtk.TreeView):
def __init__(self, store):
Gtk.TreeView.__init__(self, store)
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Name", renderer, text=0)
self.append_column(column)
class DataTreeView(Gtk.TreeView):
def __init__(self, store):
Gtk.TreeView.__init__(self, store)
self.store = store
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Data", renderer, text=0)
self.append_column(column)
class MainWindow(Gtk.Window):
def __init__(self):
Gtk.Window.__init__(self, title="TreeView Serialize")
self.connect("delete-event", Gtk.main_quit)
self.set_border_width(10)
self.set_default_size(400, 300)
vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6, expand=True)
self.add(vbox)
self.clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
button = Gtk.Button("Cut")
button.connect("clicked", self.on_cut_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_COPY)
button.connect("clicked", self.on_copy_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_PASTE)
button.connect("clicked", self.on_paste_clicked)
hbox.pack_start(button, True, True, 0)
vbox.add(hbox)
self.layer_store = Gtk.TreeStore(str, object, object)
self.layer_view = LayerTreeView(self.layer_store)
self.layer_sw = Gtk.ScrolledWindow()
self.data_sw = Gtk.ScrolledWindow()
self.layer_sw.add(self.layer_view)
treebox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6, expand=True)
treebox.pack_start(self.layer_sw, True, True, 0)
treebox.pack_start(self.data_sw, True, True, 0)
vbox.add(treebox)
self.select = self.layer_view.get_selection()
self.select.connect("changed", self.on_selection_changed)
self.add_test_data()
def add_test_data(self):
for x in range(3):
data_store = Gtk.ListStore(str)
data_view = DataTreeView(data_store)
for y in range(5):
data_store.append([str(y+x)])
self.layer_store.append(None, ["Data {}".format(x), data_store, data_view])
def on_selection_changed(self, selection):
"""
When layer is switched load respective data
"""
model, treeiter = selection.get_selected()
if treeiter != None:
data_view = model[treeiter][2]
child = self.data_sw.get_child()
if child != None:
self.data_sw.remove(self.data_sw.get_child())
self.data_sw.add(data_view)
self.show_all()
def on_cut_clicked(self, button):
pass
def on_copy_clicked(self, button):
copy_list = ["safe-to-paste"]
data_dict = {}
for row in self.layer_store:
name = row[0]
data_obj = row[1]
value_list = []
for datarow in data_obj:
value = datarow[0]
value_list.append(value)
data_dict[name] = value_list
copy_list.append(data_dict)
data = json.dumps(copy_list)
self.clipboard.set_text(data, -1)
def on_paste_clicked(self, button):
paste_str = self.clipboard.wait_for_text()
try:
parse = json.loads(paste_str)
json_str = True
except:
json_str = False
if json_str is False:
return
keyword = parse[0]
if keyword != "safe-to-paste":
return
data_dict = parse[1]
for x in data_dict:
data_list = data_dict[x]
data_store = Gtk.ListStore(str)
data_view = DataTreeView(data_store)
for y in data_list:
data_store.append([str(y)])
self.layer_store.append(None, [x, data_store, data_view])
win = MainWindow()
win.show_all()
Gtk.main()
I have an improved version of your code with dict comprehension and #staticmethod that makes the signal callbacks more readable and shorter. Nevertheless, this does not really solve your problem as it still generates the json manually. If the ListStore gets more complex, it would probably be better to let the DataListStore class generate its own json with a corresponding method.
from gi.repository import Gtk, Gdk
import json
class LayerTreeView(Gtk.TreeView):
def __init__(self, store):
Gtk.TreeView.__init__(self, store)
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Name", renderer, text=0)
self.append_column(column)
class DataTreeView(Gtk.TreeView):
def __init__(self):
Gtk.TreeView.__init__(self)
renderer = Gtk.CellRendererText()
column = Gtk.TreeViewColumn("Data", renderer, text=0)
self.append_column(column)
class DataListStore(Gtk.ListStore):
#staticmethod
def from_json(*args, values=[]):
store = DataListStore(*args)
for value in values:
store.append((value,))
return store
class MainWindow(Gtk.Window):
def __init__(self):
Gtk.Window.__init__(self, title="TreeView Serialize")
self.connect("delete-event", Gtk.main_quit)
self.set_border_width(10)
self.set_default_size(400, 300)
vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6, expand=True)
self.add(vbox)
self.clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
button = Gtk.Button("Cut")
button.connect("clicked", self.on_cut_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_COPY)
button.connect("clicked", self.on_copy_clicked)
hbox.pack_start(button, True, True, 0)
button = Gtk.Button(stock=Gtk.STOCK_PASTE)
button.connect("clicked", self.on_paste_clicked)
hbox.pack_start(button, True, True, 0)
vbox.add(hbox)
self.layer_store = Gtk.TreeStore(str, object)
self.layer_view = LayerTreeView(self.layer_store)
self.data_view = DataTreeView()
layer_sw = Gtk.ScrolledWindow()
layer_sw.add(self.layer_view)
data_sw = Gtk.ScrolledWindow()
data_sw.add(self.data_view)
treebox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6, expand=True)
treebox.pack_start(layer_sw, True, True, 0)
treebox.pack_start(data_sw, True, True, 0)
vbox.add(treebox)
select = self.layer_view.get_selection()
select.connect("changed", self.on_selection_changed)
self.add_test_data()
def add_test_data(self):
for x in range(3):
data_list = [str(y+x) for y in range(5)]
self.layer_store.append(None, ["Data {}".format(x), data_list])
def on_selection_changed(self, selection):
"""
When layer is switched load respective data
"""
model, treeiter = selection.get_selected()
if treeiter != None:
self.data_view.set_model(
DataListStore.from_json(str, values=model[treeiter][1])
)
def on_cut_clicked(self, button):
pass
def on_copy_clicked(self, button):
copy_list = [
'safe-to-paste',
{row[0]: row[1] for row in self.layer_store},
]
data = json.dumps(copy_list)
self.clipboard.set_text(data, -1)
def on_paste_clicked(self, button):
paste_str = self.clipboard.wait_for_text()
try:
parse = json.loads(paste_str)
except:
return
if parse[0] != "safe-to-paste":
return
data_dict = parse[1]
for x in data_dict:
self.layer_store.append(None, [x, data_dict[x]])
win = MainWindow()
win.show_all()
Gtk.main()

Django error: AttributeError at - 'Manager' object has no attribute 'META'

I'm using django 1.4, and python 2.7
I'm trying to get data from the MySQL database...
views.py:
def myRequests(request):
#request = ProjectRequest.objects
response = render_to_response('myRequests.html', {'ProjectRequest': request}, context_instance = RequestContext(request))
return response
As soon as I uncomment 'request = ProjectRequest.objects' I get the error:
AttributeError at /myRequests/
'Manager' object has no attribute 'META'
I'm not defining any new user models so this error makes no sense to me.
Exception location:
/{path}/lib/python2.7/site-packages/django/core/context_processors.py in debug, line 35
modelsRequest.py:
class ProjectRequest(Model):
reqType = CharField("Request Type", max_length = MAX_CHAR_LENGTH)
reqID = IntegerField("Request ID", primary_key = True)
ownerID = IntegerField("Owner ID")
projCodeName = CharField("Project Code Name", max_length = MAX_CHAR_LENGTH)
projPhase = CharField("Project Phase", max_length = MAX_CHAR_LENGTH)
projType = CharField("Project Phase", max_length = MAX_CHAR_LENGTH)
projNotes = CharField("Project Notes", max_length = MAX_CHAR_LENGTH)
contacts = CharField("Project Notes", max_length = MAX_CHAR_LENGTH)
reqStatus = CharField("Status", max_length = MAX_CHAR_LENGTH)
reqPriority = CharField("Request Priority", max_length = MAX_CHAR_LENGTH)
reqEstDate = DateTimeField("Request Estimated Complete Date", auto_now_add = True)
lastSaved = DateTimeField("Last saved", auto_now = True)
created = DateTimeField("Created", auto_now_add = True)
def __unicode__(self):
return str(self.reqID) + ": " + str(self.reqType)
class Meta:
app_label = 'djangoTestApp'
db_table = 'requests'
Any idea what's going on?!
Use a different variable name
project_request = ProjectRequest.objects
The issue is because of this context_instance = RequestContext(request)
It loses context of the request object as it has been overwritten. Hence the issue.
def myRequests(request):
project_request = ProjectRequest.objects
#Now you have access to request object,
# do whatever you want with project_request -
response = render_to_response('myRequests.html', {'ProjectRequest': project_request}, context_instance = RequestContext(request))
return response
The reason you are getting the error 'Manager' object has no attribute 'META' is, when you do
ProjectRequest.objects
the default manager (objects) for models ProjectRequest is assign to the (overwritten) local variable request.

scrapy unhandled exception

I am using scrapy 0.16.2 version on linux. I'm running:
scrapy crawl mycrawlspider -s JOBDIR=/mnt/mycrawlspider
I'm getting this error which blocks scrapy (hangs and doesn't finish automatically, only ^C stops it)
2012-11-20 15:04:51+0000 [-] Unhandled Error Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/scrapy/commands/crawl.py", line 45, in run
self.crawler.start() File "/usr/lib/python2.7/site-packages/scrapy/crawler.py", line 80, in start
reactor.run(installSignalHandlers=False) # blocking call File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1169, in run
self.mainLoop() File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1178, in mainLoop
self.runUntilCurrent() --- <exception caught here> --- File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 800, in runUntilCurrent
call.func(*call.args, **call.kw) File "/usr/lib/python2.7/site-packages/scrapy/utils/reactor.py", line 41, in __call__
return self._func(*self._a, **self._kw) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 116, in
_next_request
self.crawl(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 172, in crawl
self.schedule(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 176, in schedule
return self.slots[spider].scheduler.enqueue_request(request) File "/usr/lib/python2.7/site-packages/scrapy/core/scheduler.py", line 48, in enqueue_request
if not request.dont_filter and self.df.request_seen(request): exceptions.AttributeError: 'NoneType' object has no attribute 'dont_filter'
BTW this worked in version 0.14
Here is the code:
class MySpider(CrawlSpider):
name = 'alrroya'
NEW_IGNORED_EXTENSIONS = list(IGNORED_EXTENSIONS)
NEW_IGNORED_EXTENSIONS.remove('pdf')
download_delay = 0.05
# Stay within these domains when crawling
allowed_domains = []
all_domains = {}
start_urls = []
# Add our callback which will be called for every found link
rules = [
Rule(SgmlLinkExtractor(deny_extensions=NEW_IGNORED_EXTENSIONS, tags=('a', 'area', 'frame', 'iframe'), attrs=('href', 'src')), follow=True, callback='parse_crawled_page')
]
# How many pages crawled
crawl_count = 0
# How many PDFs we have found
pdf_count = 0
def __init__(self, *args, **kwargs):
CrawlSpider.__init__(self, *args, **kwargs)
dispatcher.connect(self._spider_closed, signals.spider_closed)
dispatcher.connect(self._spider_opened, signals.spider_opened)
self.load_allowed_domains_and_start_urls()
def allowed_to_start(self):
curr_date = datetime.today()
curr_date = datetime(curr_date.year, curr_date.month, curr_date.day)
jobdir = self.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
day = timedelta(days=1)
if os.path.exists(checkfile):
f = open(checkfile, 'r')
data = f.read()
f.close()
data = data.split('\n')
reason = data[0]
try:
reason_date = datetime.strptime(data[1], '%Y-%m-%d')
except Exception as ex:
reason_date = None
if reason_date and 'shutdown' in reason:
reason = True
else:
if reason_date and reason_date + day <= curr_date and 'finished' in reason:
reason = True
else:
reason = False
else:
reason = True
return reason
def _spider_opened(self, spider):
if spider is not self:
return
curr_date = datetime.today()
curr_date = datetime(curr_date.year, curr_date.month, curr_date.day)
jobdir = spider.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
day = timedelta(days=1)
if os.path.exists(checkfile):
f = open(checkfile, 'r')
data = f.read()
f.close()
data = data.split('\n')
reason = data[0]
try:
reason_date = datetime.strptime(data[1], '%Y-%m-%d')
except Exception as ex:
reason_date = None
if reason_date and 'shutdown' in reason:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
else:
if reason_date and reason_date + day <= curr_date and 'finished' in reason:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
else:
crawler.engine.close_spider(self, 'finished')
if jobdir and os.path.exists(jobdir):
shutil.rmtree(jobdir)
f = open(checkfile, 'w')
f.write('finished\n')
f.write(str(date.today()))
f.close()
os._exit(1)
else:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
def _spider_closed(self, spider, reason):
if spider is not self:
return
jobdir = spider.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
if 'shutdown' in reason:
f = open(checkfile, 'w')
f.write('shutdown\n')
f.write(str(date.today()))
f.close()
else:
if jobdir and os.path.exists(jobdir):
shutil.rmtree(jobdir)
f = open(checkfile, 'w')
f.write('finished\n')
f.write(str(date.today()))
f.close()
def _requests_to_follow(self, response):
if getattr(response, 'encoding', None) != None:
return CrawlSpider._requests_to_follow(self, response)
else:
return []
def make_requests_from_url(self, url):
http_client = httplib2.Http()
try:
headers = {
'content-type': 'text/html',
'user-agent': random.choice(USER_AGENT_LIST)
}
response, content = http_client.request(url, method='HEAD', headers=headers)
#~ if 'pdf' in response['content-type'].lower() or (url.endswith('.pdf') and 'octet-stream' in response['content-type'].lower()):
if 'pdf' in response['content-type'].lower() or 'octet-stream' in response['content-type'].lower():
if self.allowed_to_start():
self.get_pdf_link(url)
else:
return CrawlSpider.make_requests_from_url(self, url)
except Exception as ex:
return CrawlSpider.make_requests_from_url(self, url)
def get_pdf_link(self, url):
source = self.__class__.name
parsed_url = urlparse(url)
url_domain = parsed_url.netloc
url_path = parsed_url.path
if url_domain:
for domain, paths in self.__class__.all_domains[source]['allow_domains'].iteritems():
if url_domain.endswith(domain):
pre_and = False
pre_or = False
and_cond = True
or_cond = False
for path in paths:
if path[0:1] == '!':
pre_and = True
if path[1:] not in url_path:
and_cond = and_cond and True
else:
and_cond = and_cond and False
else:
pre_or = True
if path in url_path:
or_cond = or_cond or True
else:
or_cond = or_cond or False
if pre_and and pre_or:
if and_cond and or_cond:
self.pdf_process(source, url)
return
elif pre_and:
if and_cond:
self.pdf_process(source, url)
return
elif pre_or:
if or_cond:
self.pdf_process(source, url)
return
else:
self.pdf_process(source, url)
return
def parse_crawled_page(self, response):
self.__class__.crawl_count += 1
crawl_count = self.__class__.crawl_count
if crawl_count % 100 == 0:
print 'Crawled %d pages' % crawl_count
if 'pdf' in response.headers.get('content-type', '').lower():
self.get_pdf_link(response.url)
return Item()
def load_allowed_domains_and_start_urls(self):
day = timedelta(days=1)
currdate = date.today()
alrroya = ('http://epaper.alrroya.com/currentissues.php?editiondt=' + currdate.strftime('%Y/%m/%d'),)
self.__class__.all_domains = {
'alrroya': {
'start_urls': alrroya,
'allow_domains': {
'epaper.alrroya.com': frozenset(()),
}
}
}
for domain in self.__class__.all_domains[self.__class__.name]['allow_domains']:
self.__class__.allowed_domains.append(domain)
self.__class__.start_urls.extend(self.__class__.all_domains[self.__class__.name]['start_urls'])
def pdf_process(self, source, url):
print '!!! ' + source + ' ' + url
This appears to be a bug in Scrapy. The current version doesn't seem to accept lists returned from make_requests_from_url(). I was able to modify the Scrapy code in the following way to work around the issue.
In the file Scrapy-0.16.5-py2.7.egg/scrapy/spider.py
Change:
def start_requests(self):
for url in self.start_urls:
yield self.make_requests_from_url(url)
To:
def start_requests(self):
for url in self.start_urls:
requests = self.make_requests_from_url(url)
if type(requests) is list:
for request in requests:
yield request
else:
yield requests
I expect that the official Scrapy people will fix this eventually.