TypeError: can't concat str to bytes when converting Python 2 to 3 with Encryption Function - python-2to3

I am trying to transfer a code from python2 to 3. The problem happens. "pad * chr(pad)" looks like a string but when I print it out it shows . I dont know what it is really is.
<ipython-input-26-6c9679723473> in aesEncrypt(text, secKey)
43 def aesEncrypt(text, secKey):
44 pad = 16 - len(text) % 16
---> 45 text =text + pad * chr(pad)
46 encryptor = AES.new(secKey, 2, '0102030405060708')
47 ciphertext = encryptor.encrypt(text)
TypeError: can't concat str to bytes
I then tried encode() but it didnt work. I am wonder how can concat two string in python3.
<ipython-input-53-e9f33b00348a> in aesEncrypt(text, secKey)
43 def aesEncrypt(text, secKey):
44 pad = 16 - len(text) % 16
---> 45 text = text.encode("utf-8") + (pad * chr(pad)).encode("utf-8")
46 encryptor = AES.new(secKey, 2, '0102030405060708')
47 ciphertext = encryptor.encrypt(text)
AttributeError:'bytes' object has no attribute 'encode'
For the reference the original code is
作者:路人甲
链接:https://www.zhihu.com/question/31677442/answer/119959112
#encoding=utf8
import requests
from bs4 import BeautifulSoup
import re,time
import os,json
import base64
from Crypto.Cipher import AES
from pprint import pprint
Default_Header = {
'Referer':'http://music.163.com/',
'Host':'music.163.com',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Iceweasel/38.3.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate'
}
BASE_URL = 'http://music.163.com'
_session = requests.session()
_session.headers.update(Default_Header)
def getPage(pageIndex):
pageUrl = 'http://music.163.com/discover/playlist/?order=hot&cat=全部&limit=35&offset='+pageIndex
soup = BeautifulSoup(_session.get(pageUrl).content)
songList = soup.findAll('a',attrs = {'class':'tit f-thide s-fc0'})
for i in songList:
print i['href']
getPlayList(i['href'])
def getPlayList(playListId):
playListUrl = BASE_URL + playListId
soup = BeautifulSoup(_session.get(playListUrl).content)
songList = soup.find('ul',attrs = {'class':'f-hide'})
for i in songList.findAll('li'):
startIndex = (i.find('a'))['href']
songId = startIndex.split('=')[1]
readEver(songId)
def getSongInfo(songId):
pass
def aesEncrypt(text, secKey):
pad = 16 - len(text) % 16
text = text + pad * chr(pad)
encryptor = AES.new(secKey, 2, '0102030405060708')
ciphertext = encryptor.encrypt(text)
ciphertext = base64.b64encode(ciphertext)
return ciphertext
def rsaEncrypt(text, pubKey, modulus):
text = text[::-1]
rs = int(text.encode('hex'), 16)**int(pubKey, 16) % int(modulus, 16)
return format(rs, 'x').zfill(256)
def createSecretKey(size):
return (''.join(map(lambda xx: (hex(ord(xx))[2:]), os.urandom(size))))[0:16]
def readEver(songId):
url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_'+str(songId)+'/?csrf_token='
headers = { 'Cookie': 'appver=1.5.0.75771;', 'Referer': 'http://music.163.com/' }
text = { 'username': '', 'password': '', 'rememberLogin': 'true' }
modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
nonce = '0CoJUm6Qyw8W8jud'
pubKey = '010001'
text = json.dumps(text)
secKey = createSecretKey(16)
encText = aesEncrypt(aesEncrypt(text, nonce), secKey)
encSecKey = rsaEncrypt(secKey, pubKey, modulus)
data = { 'params': encText, 'encSecKey': encSecKey }
req = requests.post(url, headers=headers, data=data)
total = req.json()['total']
if int(total) > 10000:
print songId,total
else:
pass
if __name__=='__main__':
for i in range(1,43):
getPage(str(i*35))

around line 85: encText = aesEncrypt(aesEncrypt(text, nonce), secKey)
aesEncrypt() is called first time with data as shown with subscript '_1' and 2nd time with data shown with subscript '_2'. Notice that the type of secKey changes from a string to a list of strings, and text from a string to a bytes object.
>>> secKey_1
'0CoJUm6Qyw8W8jud'
>>> secKey_2
['e4', '1a', '61', '7c', '1e', '62', '76', '5', '94', '62', '5a', '92', '9', 'fd', '2f', '4a']
>>>
>>> text_1
'{"username": "", "password": "", "rememberLogin": "true"}'
>>> text_2
b'qjTTWCVgh3v45StLOhGtNtY3zzoImIeGkRry1Vq0LzNSgr9hDHkkh19ujd+iqbvXnzjmHDhOIA5H0z/Wf3uU5Q=='
>>>

Thanks to #Todd. He has found the issue. aesEncrypt()has been called twice and it returns bytes while it receives str, which is acceptable in Python2 but not for Python3.
In the end, I change return ciphertext to return str(ciphertext).

Related

Unable to use method of a class in different class-missing 2 required positional arguments

I have two python classes:- One class(CloudLink) is responsible for sending JSON events to the app and another(ReadData) is responsible for building the JSON data.
The ReadData class will be using the CloudLink methods to send the JSON data to the App. But I'm getting error _buildJSONdata() missing 1 required positional argument: 'Data'.
ReadData class
from pyspark.sql import SparkSession
import functools
from pyspark.sql import DataFrame
from pyspark.sql.functions import explode
from cosmosconnect import azurecosmos
class ReadData:
#exception(logger)
def __init__(self):
self.spark_session = (
SparkSession.builder
.appName("readData")
.getOrCreate()
)
mssparkutils.fs.unmount('/mnt/test')
logger.info("Drive unmounted")
mssparkutils.fs.mount(
'abfss://abc#transl.dfs.core.windows.net/',
'/mnt/test',
{'linkedService': "linkCosmos"}
)
logger.info("Mounted Successfully")
self.input_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/input_path"
)
self.output_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/output_path"
)
'''
Reading the schema from csv file
'''
#exception(logger)
def readConfig(self):
try:
logger.info(f"Reading the Config present in {self.input_directory} ")
dfConfig = self.spark_session.read.option("multiline","true") \
.json(self.input_directory)
#for f in dfConfig.select("Entity","Query","Business_Rule").collect():
dfConfig=dfConfig.select(explode('Input').alias('Input_Data'))\
.select('Input_Data.Validation_Type','Input_Data.Entity','Input_Data.Query','Input_Data.Business_Rule')
for f in dfConfig.rdd.toLocalIterator():
#for index, f in dfConfig.toPandas().iterrows():
self.Validation_Type=f[0]
self.container=f[1]
self.query=f[2]
self.rule=f[3]
self.readCosmos(self)
except:
raise ValueError("")
#exception(logger)
def readCosmos(self,*params):
#from cosmosconnect import azurecosmos
#a=[]
linkedService='fg'
df=azurecosmos.cosmosConnect(linkedService,self.query,self.container)
df.cache()
if len(df.head(1)) >0:
outputpath=self.output_directory+'/'+self.container
df.coalesce(1).write.mode('overwrite').parquet(outputpath)
Status="Validation Failure"
Data= {"Validation_Type":[],"Status":[],"Container":[],"Business_Rule":[]}
Data["Validation_Type"].append(self.Validation_Type)
Data["Status"].append(Status)
Data["Container"].append(self.container)
Data["Business_Rule"].append(self.rule)
CloudLink._buildJSONdata(Data)
if __name__ == "__main__":
p = ReadData()
p.readConfig()
CloudLink class
import json
import datetime
import hashlib
import json
import sys
import traceback
import adal
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import logging
from functools import wraps
import sys
def create_logger():
#create a logger object
#logger = logging.getLogger()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logfile = logging.FileHandler('exc_logger.log')
#logfile = logging.StreamHandler(sys.stdout)
fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
formatter = logging.Formatter(fmt)
logfile.setFormatter(formatter)
logger.addHandler(logfile)
return logger
logger = create_logger()
def exception(logger):
def decorator(func):
#wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except:
issue = "exception in "+func.__name__+"\n"
issue = issue+"-------------------------\
------------------------------------------------\n"
logger.exception(issue)
raise
return wrapper
return decorator
class CloudLink(object):
_token = None
_instance = None
http = None
cloudclient = TokenLibrary.getSecret("xxxx", "rtrt")
clientid = TokenLibrary.getSecret("xxxx", "tyty")
clientcredentials = TokenLibrary.getSecret("xxxx", "abcabc")
authority_url = TokenLibrary.getSecret("xxxx", "abab")
cloudtest = TokenLibrary.getSecret("xxxx", "yyyy")
#staticmethod
def getInstance():
if not CloudLink._instance:
CloudLink._instance = CloudLink()
return CloudLink._instance
def __init__(self):
retry_strategy = Retry(
total=3,
backoff_factor=0,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "OPTIONS"],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.http = requests.Session()
self.http.mount("https://", adapter)
self.http.mount("http://", adapter)
print("Inside init")
def parseJSON(self, t):
try:
eventData = json.loads(t)
logger.info(f"Sending {eventData} to cloud")
self.sendToCloud(eventData)
except ValueError as e:
print("Error: %s Please validate JSON in https://www.jsonschemavalidator.net/"% e)
return None # or: raise
def sendToCloud(self, eventData):
cloudData = {"eventData": eventData, "metadata": self._buildMetadata()}
logger.info(f"Raising alert with data=({cloudData}")
response = self.http.post(
self.cloudtest, headers=self._buildHeaders(), json=cloudData
)
logger.info(f"cloud alert response={response}")
if response.status_code == 202 or response.status_code == 200:
logger.info("Mail sent to Cloud")
else:
raise Exception(f"Cloud reporting failed with Error {response}")
def _buildJSONdata(self,Data):
if len(Data) == 0:
raise Exception("JSON is empty")
else:
t = json.dumps(self.Data)
self.parseJSON(t)
def _buildMetadata(self):
return {
"messageType": "Send Email",
"messageVersion": "0.0.1",
"sender": "Send Email",
}
def _buildHeaders(self):
self._refreshADToken()
headers = {
"Authorization": "Bearer {}".format(self._token["accessToken"]),
"Content-type": "application/json",
"Accept": "text/plain",
}
return headers
def _refreshADToken(self):
def shouldRenew(token):
"""Returns True if the token should be renewed"""
expiresOn = datetime.datetime.strptime(
token["expiresOn"], "%Y-%m-%d %H:%M:%S.%f"
)
now = datetime.datetime.now()
return (expiresOn - now) < datetime.timedelta(minutes=5)
if not self._token or shouldRenew(self._token):
logger.info("Renewing credentials for Alerting")
result = None
try:
context = adal.AuthenticationContext(CloudLink.authority_url)
result = context.acquire_token_with_client_credentials(CloudLink.cloudclient, CloudLink.clientid,CloudLink.clientcredentials)
except Exception as e:
error = "Failed to renew client credentials."
logger.info(error)
raise
if result and "accessToken" in result:
self._token = result
else:
logger.error(
"Failed to acquire bearer token. accessToken not found in result object on renewing credentials."
)
raise Exception("Could not acquire a bearer token")

Converting COCO Format to LabelMe Format

I am trying to convert COCO json file to LabelMe json file. I used a python script called "coco2labelme.py" to convert the json file.
It successfully converts the json file, the only problem is that I get an error every time I try to load the converted json file in LabelMe. An error occurs regarding the 'imageData' of the file.
Does anyone have an idea on how to convert from COCO to LabelMe format with the image data?
below is the code for coco2labelme.py
[Source: https://gist.github.com/travishsu/6efa5c9fb92ece37b4748036026342f6]
import os
import json
import subprocess
import numpy as np
import pandas as pd
from skimage.measure import find_contours
class CocoDatasetHandler:
def __init__(self, jsonpath, imgpath):
with open(jsonpath, 'r') as jsonfile:
ann = json.load(jsonfile)
images = pd.DataFrame.from_dict(ann['images']).set_index('id')
annotations = pd.DataFrame.from_dict(ann['annotations']).set_index('id')
categories = pd.DataFrame.from_dict(ann['categories']).set_index('id')
annotations = annotations.merge(images, left_on='image_id', right_index=True)
annotations = annotations.merge(categories, left_on='category_id', right_index=True)
annotations = annotations.assign(
shapes=annotations.apply(self.coco2shape, axis=1))
self.annotations = annotations
self.labelme = {}
self.imgpath = imgpath
self.images = pd.DataFrame.from_dict(ann['images']).set_index('file_name')
def coco2shape(self, row):
if row.iscrowd == 1:
shapes = self.rle2shape(row)
elif row.iscrowd == 0:
shapes = self.polygon2shape(row)
return shapes
def rle2shape(self, row):
rle, shape = row['segmentation']['counts'], row['segmentation']['size']
mask = self._rle_decode(rle, shape)
padded_mask = np.zeros(
(mask.shape[0]+2, mask.shape[1]+2),
dtype=np.uint8,
)
padded_mask[1:-1, 1:-1] = mask
points = find_contours(mask, 0.5)
shapes = [
[[int(point[1]), int(point[0])] for point in polygon]
for polygon in points
]
return shapes
def _rle_decode(self, rle, shape):
mask = np.zeros([shape[0] * shape[1]], np.bool)
for idx, r in enumerate(rle):
if idx < 1:
s = 0
else:
s = sum(rle[:idx])
e = s + r
if e == s:
continue
assert 0 <= s < mask.shape[0]
assert 1 <= e <= mask.shape[0], "shape: {} s {} e {} r {}".format(shape, s, e, r)
if idx % 2 == 1:
mask[s:e] = 1
# Reshape and transpose
mask = mask.reshape([shape[1], shape[0]]).T
return mask
def polygon2shape(self, row):
# shapes: (n_polygons, n_points, 2)
shapes = [
[[int(points[2*i]), int(points[2*i+1])] for i in range(len(points)//2)]
for points in row.segmentation
]
return shapes
def coco2labelme(self):
fillColor = [255, 0, 0, 128]
lineColor = [0, 255, 0, 128]
groups = self.annotations.groupby('file_name')
for file_idx, (filename, df) in enumerate(groups):
record = {
'imageData': None,
'fillColor': fillColor,
'lineColor': lineColor,
'imagePath': filename,
'imageHeight': int(self.images.loc[filename].height),
'imageWidth': int(self.images.loc[filename].width),
}
record['shapes'] = []
instance = {
'line_color': None,
'fill_color': None,
'shape_type': "polygon",
}
for inst_idx, (_, row) in enumerate(df.iterrows()):
for polygon in row.shapes:
copy_instance = instance.copy()
copy_instance.update({
'label': row['name'],
'group_id': inst_idx,
'points': polygon
})
record['shapes'].append(copy_instance)
if filename not in self.labelme.keys():
self.labelme[filename] = record
def save_labelme(self, file_names, dirpath, save_json_only=False):
if not os.path.exists(dirpath):
os.makedirs(dirpath)
else:
raise ValueError(f"{dirpath} has existed")
for file in file_names:
filename = os.path.basename(os.path.splitext(file)[0])
with open(os.path.join(dirpath, filename+'.json'), 'w') as jsonfile:
json.dump(self.labelme[file], jsonfile, ensure_ascii=True, indent=2)
if not save_json_only:
subprocess.call(['cp', os.path.join(self.imgpath, file), dirpath])
ds = CocoDatasetHandler('cocodataset/annotations/instances_train2014.json', 'cocodataset/train2014/')
ds.coco2labelme()
ds.save_labelme(ds.labelme.keys(), 'cocodataset/labelme/train2014')

How to update Kivy labels dynamically after button is pressed

I'm trying to make an app that uses data from MySQL server. So far I was doing fine, until I stumbled accross a need to update the Labels.
This is what I have so far:
from kivy.uix.button import Button
from kivy.lang import Builder
from kivy.uix.screenmanager import ScreenManager, Screen
from kivy.uix.label import Label
from kivy.uix.scrollview import ScrollView
from kivy.clock import Clock
import MySQLdb
class MainView(ScrollView):
def qchange(self):
query = 'SELECT * FROM `citiestovisit` ORDER BY `idcitiestovisit`'
self.db_data(query)
q = 'SELECT * FROM `citiestovisit` ORDER BY `Name`'
def db_data(self, query=q):
#vector = ListProperty()
vector = []
con = MySQLdb.connect(host="localhost", user="root", passwd="", db="cities")
cur = con.cursor()
cur.execute('SET NAMES `utf8`')
cur.execute(query)
result = cur.fetchall()
for row in result:
string = str(row[0]) + " " + str(row[1]) + " " + str(row[2])
vector.append(string)
print vector
return vector
def __init__(self, **kwargs):
kwargs['cols'] = 2
super(MainView, self).__init__(**kwargs)
GL = GridLayout(cols = 3, spacing=10, size_hint_y=None)
GL.bind(minimum_height=GL.setter('height'))
for row in self.db_data():
splitRow = row.split(" ")
for data in splitRow:
GL.add_widget(Label(text=data,size_hint_y=None, font_size='20sp'))
self.add_widget(GL)
Builder.load_string("""
<MenuScreen>:
BoxLayout:
GridLayout:
cols: 1
Button:
text: 'Goto settings'
on_press:
root.manager.transition.direction = 'left'
root.manager.current = 'settings'
Button:
text: 'Quit'
Label:
font_name: 'C:\Anonymous\Anonymous.ttf'
text: "picture here"
<SettingsScreen>:
""")
# Declare both screens
class MenuScreen(Screen):
pass
class SettingsScreen(Screen):
pass
ss = SettingsScreen(name='settings')
layout = BoxLayout(orientation='vertical')
BL = BoxLayout()
layout.add_widget(BL)
#Instance of a MainView class
MV = MainView()
def callback(instance):
sm.transition.direction = 'right'
sm.current = 'menu'
def callback2(instance):
MV.qchange()
btn = Button(text="Back to Menu")
btn.bind(on_press=callback)
btn.size_hint = (1, 0.3)
BL.add_widget(btn)
btn2 = Button(text="Sort by ID")
btn2.size_hint = (1, 0.3)
btn2.bind(on_press=callback2)
BL.add_widget(btn2)
layout.add_widget(MainView())
sublayout = GridLayout(cols=3)
sublayout.add_widget(Label(text="hello"))
sublayout.add_widget(Label(text="World"))
sublayout.add_widget(Label(text="Python"))
layout.add_widget(sublayout)
ss.add_widget(layout)
# Create the screen manager
sm = ScreenManager()
sm.add_widget(MenuScreen(name='menu'))
sm.add_widget(ss)
class MyApp(App):
def build(self):
return sm
if __name__ == '__main__':
MyApp().run()
I'm particularly interested in def qchange(self) mehod as it passes into def db_data(self, query=q) a new query; as a result request is sent to the database and an array of strings is returned. However, this array is not proccessed any further and labels in GL widget are not updated. I think I need to add the clock that would call the __init__ in MainView, but it's only a guess as I've also read about using properties (which I don't know how to use here as well)
I've eddited my code. Now it looks like this:
class MainView(ScrollView):
def qchange(self):
query = 'SELECT * FROM `citiestovisit` ORDER BY `idcitiestovisit`'
#self.db_data(query)
#LG = self.LabelsGrid(self.GL)
q = 'SELECT * FROM `citiestovisit` ORDER BY `Name`'
def db_data(self, query=q):
vector = []
con = MySQLdb.connect(host="localhost", user="root", passwd="", db="cities")
cur = con.cursor()
cur.execute('SET NAMES `utf8`')
cur.execute(query)
result = cur.fetchall()
for row in result:
string = str(row[0]) + " " + str(row[1]) + " " + str(row[2])
vector.append(string)
print vector
return vector
class LabelsGrid(GridLayout):
def __init__(self, **kwargs):
self.cols = 3
self.spacing = 10
self.size_hint_y = None
def show_labels(self, strings):
self.clear_widgets()
for row in strings:
splitRow = row.split(" ")
for data in splitRow:
label = Label(text=data, size_hint_y=None, font_size='20sp')
self.add_widget(label)
GL = LabelsGrid()
def __init__(self, **kwargs):
kwargs['cols'] = 2
super(MainView, self).__init__(**kwargs)
self.GL=self.LabelsGrid()
# GL = GridLayout(cols = 3, spacing=10, size_hint_y=None)
self.GL.bind(minimum_height=self.GL.setter('height'))
self.GL.show_labels(self.db_data(self.q))
self.add_widget(self.GL)
#self.GL.clear_widgets()
Builder.load_string("""
<MenuScreen>:
BoxLayout:
GridLayout:
cols: 1
Button:
text: 'Goto settings'
on_press:
root.manager.transition.direction = 'left'
root.manager.current = 'settings'
Button:
text: 'Quit'
Label:
font_name: 'C:\Anonymous\Anonymous.ttf'
text: "picture here"
<SettingsScreen>:
""")
# Declare both screens
class MenuScreen(Screen):
pass
class SettingsScreen(Screen):
pass
ss = SettingsScreen(name='settings')
layout = BoxLayout(orientation='vertical')
BL = BoxLayout()
layout.add_widget(BL)
#Instance of a MainView class
MV = MainView()
def callback(instance):
sm.transition.direction = 'right'
sm.current = 'menu'
def callback2(instance):
MV.qchange()
btn = Button(text="Back to Menu")
btn.bind(on_press=callback)
btn.size_hint = (1, 0.3)
BL.add_widget(btn)
btn2 = Button(text="Sort by ID")
btn2.size_hint = (1, 0.3)
btn2.bind(on_press=callback2)
BL.add_widget(btn2)
layout.add_widget(MainView())
sublayout = GridLayout(cols=3)
sublayout.add_widget(Label(text="hello"))
sublayout.add_widget(Label(text="World"))
sublayout.add_widget(Label(text="Python"))
layout.add_widget(sublayout)
ss.add_widget(layout)
# Create the screen manager
sm = ScreenManager()
sm.add_widget(MenuScreen(name='menu'))
sm.add_widget(ss)
class MyApp(App):
def build(self):
return sm
if __name__ == '__main__':
MyApp().run()
By adding
class LabelsGrid(GridLayout):
def __init__(self, **kwargs):
self.cols = 3
self.spacing = 10
self.size_hint_y = None
def show_labels(self, strings):
self.clear_widgets()
for row in strings:
splitRow = row.split(" ")
for data in splitRow:
label = Label(text=data, size_hint_y=None, font_size='20sp')
self.add_widget(label)
I wanted to add custom GridLayout according to a given piece of advice, however, now I get an error saying:
AttributeError: 'LabelsGrid' object has no attribute '_trigger_layout'
Any ideas on how to handle this?
Create a custom grid layout, let's say LabelsGrid, and in the class implement a method show_labels. Example:
class LabelsGrid(GridLayout):
def show_labels(self, strings):
self.clear_widgets()
for text in strings:
label = Label(text=text)
self.add_widget(label)
This way, each time you call the method with names of labels in a list, it will update itself.

TypeError: string indices must be integers

Hi i have a problem with my code that i get a error in a loop that works for a few times but then throws me a typeerro: string indices must be integers.
I want to call an api to get a json back and get some parts of the json response. heres the code:
class API(object):
def __init__(self, api_key):
self.api_key = api_key
def _request(self, api_url, params={}):
args = {'api_key': self.api_key}
for key, value in params.items():
if key not in args:
args[key] = value
response = requests.get(
Consts.URL['base'].format(
url=api_url
),
params=args
)
if response.status_code == requests.codes.ok:
return response.json()
else:
return "not possible"
print(response.url)
def get_list(self):
excel = EXCEL('s6.xlsx')
api_url = Consts.URL['list'].format(
version = Consts.API_VERSIONS['matchversion'],
start = excel.get_gamenr()
)
return self._request(api_url)
def get_match(self, matchid):
idlist = matchid
api_url = Consts.URL['match'].format(
version = Consts.API_VERSIONS['matchversion'],
matchId = idlist
)
return self._request(api_url)
def match_ids(self):
api = API('c6ea2f68-7ed6-40fa-9b99-fd591c55c05f')
x = api.get_list()
y = x['matches']
count = len(y)
ids = []
while count > 0:
count = count - 1
temp = y[0]
ids.append(temp['matchId'])
del y[0]
return ids
def match_info(self):
matchids = self.match_ids()
print(matchids)
matchinfolist = {}
counter = 1
for gameids in matchids:
info = self.get_match(gameids)
myid = self.find_partid(info['participantIdentities'])
prepdstats = info['participants'][myid-1]
print(prepdstats)
matchinfolist['stats' + str(counter)] = prepdstats
return matchinfolist
def find_partid(self, partlist):
partid = 0
idlist = partlist
while partid < 10:
partid = partid + 1
tempplayer = idlist[0]['player']
if tempplayer['summonerId'] == 19204660:
playernr = partid
partid = 500
del idlist[0]
return playernr
when i run the match_info() function i get this error
Traceback (most recent call last):
File "C:\Users\Niklas\Desktop\python riot\main.py", line 17, in <module>
main()
File "C:\Users\Niklas\Desktop\python riot\main.py", line 10, in main
print(api.match_info())
File "C:\Users\Niklas\Desktop\python riot\api.py", line 78, in match_info
myid = self.find_partid(info['participantIdentities'])
TypeError: string indices must be integers
but only after the loop in the function has run for a few times. I have no idea what im doing wrong. Any help would be nice.
Here is a link to the json: https://euw.api.pvp.net/api/lol/euw/v2.2/match/2492271473?api_key=c6ea2f68-7ed6-40fa-9b99-fd591c55c05f
The error shows up on
myid = self.find_partid(info['participantIdentities'])
For this line to execute, info must be a mapping with string keys, not a string itself. info is
info = self.get_match(gameids)
get_match ends with
return self._request(api_url)
_request ends with
if response.status_code == requests.codes.ok:
return response.json()
else:
return "not possible"
For the loop to ever run, response.json() must be a dict with key 'participantIdentities'. Your bug is expecting that to always be true.
One fix might be to make the expectation always ture. If there is a satisfactory default value, return {'participantIdentities': <default value>}. Otherwise, return None and change the loop to
info = self.get_match(gameids)
if info is not None:
# as before
else:
# whatever default action you want

scrapy unhandled exception

I am using scrapy 0.16.2 version on linux. I'm running:
scrapy crawl mycrawlspider -s JOBDIR=/mnt/mycrawlspider
I'm getting this error which blocks scrapy (hangs and doesn't finish automatically, only ^C stops it)
2012-11-20 15:04:51+0000 [-] Unhandled Error Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/scrapy/commands/crawl.py", line 45, in run
self.crawler.start() File "/usr/lib/python2.7/site-packages/scrapy/crawler.py", line 80, in start
reactor.run(installSignalHandlers=False) # blocking call File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1169, in run
self.mainLoop() File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 1178, in mainLoop
self.runUntilCurrent() --- <exception caught here> --- File "/usr/lib/python2.7/site-packages/twisted/internet/base.py", line 800, in runUntilCurrent
call.func(*call.args, **call.kw) File "/usr/lib/python2.7/site-packages/scrapy/utils/reactor.py", line 41, in __call__
return self._func(*self._a, **self._kw) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 116, in
_next_request
self.crawl(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 172, in crawl
self.schedule(request, spider) File "/usr/lib/python2.7/site-packages/scrapy/core/engine.py", line 176, in schedule
return self.slots[spider].scheduler.enqueue_request(request) File "/usr/lib/python2.7/site-packages/scrapy/core/scheduler.py", line 48, in enqueue_request
if not request.dont_filter and self.df.request_seen(request): exceptions.AttributeError: 'NoneType' object has no attribute 'dont_filter'
BTW this worked in version 0.14
Here is the code:
class MySpider(CrawlSpider):
name = 'alrroya'
NEW_IGNORED_EXTENSIONS = list(IGNORED_EXTENSIONS)
NEW_IGNORED_EXTENSIONS.remove('pdf')
download_delay = 0.05
# Stay within these domains when crawling
allowed_domains = []
all_domains = {}
start_urls = []
# Add our callback which will be called for every found link
rules = [
Rule(SgmlLinkExtractor(deny_extensions=NEW_IGNORED_EXTENSIONS, tags=('a', 'area', 'frame', 'iframe'), attrs=('href', 'src')), follow=True, callback='parse_crawled_page')
]
# How many pages crawled
crawl_count = 0
# How many PDFs we have found
pdf_count = 0
def __init__(self, *args, **kwargs):
CrawlSpider.__init__(self, *args, **kwargs)
dispatcher.connect(self._spider_closed, signals.spider_closed)
dispatcher.connect(self._spider_opened, signals.spider_opened)
self.load_allowed_domains_and_start_urls()
def allowed_to_start(self):
curr_date = datetime.today()
curr_date = datetime(curr_date.year, curr_date.month, curr_date.day)
jobdir = self.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
day = timedelta(days=1)
if os.path.exists(checkfile):
f = open(checkfile, 'r')
data = f.read()
f.close()
data = data.split('\n')
reason = data[0]
try:
reason_date = datetime.strptime(data[1], '%Y-%m-%d')
except Exception as ex:
reason_date = None
if reason_date and 'shutdown' in reason:
reason = True
else:
if reason_date and reason_date + day <= curr_date and 'finished' in reason:
reason = True
else:
reason = False
else:
reason = True
return reason
def _spider_opened(self, spider):
if spider is not self:
return
curr_date = datetime.today()
curr_date = datetime(curr_date.year, curr_date.month, curr_date.day)
jobdir = spider.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
day = timedelta(days=1)
if os.path.exists(checkfile):
f = open(checkfile, 'r')
data = f.read()
f.close()
data = data.split('\n')
reason = data[0]
try:
reason_date = datetime.strptime(data[1], '%Y-%m-%d')
except Exception as ex:
reason_date = None
if reason_date and 'shutdown' in reason:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
else:
if reason_date and reason_date + day <= curr_date and 'finished' in reason:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
else:
crawler.engine.close_spider(self, 'finished')
if jobdir and os.path.exists(jobdir):
shutil.rmtree(jobdir)
f = open(checkfile, 'w')
f.write('finished\n')
f.write(str(date.today()))
f.close()
os._exit(1)
else:
f = open(checkfile, 'w')
f.write('started\n')
f.write(str(date.today()))
f.close()
def _spider_closed(self, spider, reason):
if spider is not self:
return
jobdir = spider.settings['JOBDIR']
if jobdir:
mnt = os.path.dirname(os.path.normpath(jobdir))
else:
mnt = ''
checkfile = os.path.join(mnt, '%s.crawlercheck' % self.__class__.name)
if 'shutdown' in reason:
f = open(checkfile, 'w')
f.write('shutdown\n')
f.write(str(date.today()))
f.close()
else:
if jobdir and os.path.exists(jobdir):
shutil.rmtree(jobdir)
f = open(checkfile, 'w')
f.write('finished\n')
f.write(str(date.today()))
f.close()
def _requests_to_follow(self, response):
if getattr(response, 'encoding', None) != None:
return CrawlSpider._requests_to_follow(self, response)
else:
return []
def make_requests_from_url(self, url):
http_client = httplib2.Http()
try:
headers = {
'content-type': 'text/html',
'user-agent': random.choice(USER_AGENT_LIST)
}
response, content = http_client.request(url, method='HEAD', headers=headers)
#~ if 'pdf' in response['content-type'].lower() or (url.endswith('.pdf') and 'octet-stream' in response['content-type'].lower()):
if 'pdf' in response['content-type'].lower() or 'octet-stream' in response['content-type'].lower():
if self.allowed_to_start():
self.get_pdf_link(url)
else:
return CrawlSpider.make_requests_from_url(self, url)
except Exception as ex:
return CrawlSpider.make_requests_from_url(self, url)
def get_pdf_link(self, url):
source = self.__class__.name
parsed_url = urlparse(url)
url_domain = parsed_url.netloc
url_path = parsed_url.path
if url_domain:
for domain, paths in self.__class__.all_domains[source]['allow_domains'].iteritems():
if url_domain.endswith(domain):
pre_and = False
pre_or = False
and_cond = True
or_cond = False
for path in paths:
if path[0:1] == '!':
pre_and = True
if path[1:] not in url_path:
and_cond = and_cond and True
else:
and_cond = and_cond and False
else:
pre_or = True
if path in url_path:
or_cond = or_cond or True
else:
or_cond = or_cond or False
if pre_and and pre_or:
if and_cond and or_cond:
self.pdf_process(source, url)
return
elif pre_and:
if and_cond:
self.pdf_process(source, url)
return
elif pre_or:
if or_cond:
self.pdf_process(source, url)
return
else:
self.pdf_process(source, url)
return
def parse_crawled_page(self, response):
self.__class__.crawl_count += 1
crawl_count = self.__class__.crawl_count
if crawl_count % 100 == 0:
print 'Crawled %d pages' % crawl_count
if 'pdf' in response.headers.get('content-type', '').lower():
self.get_pdf_link(response.url)
return Item()
def load_allowed_domains_and_start_urls(self):
day = timedelta(days=1)
currdate = date.today()
alrroya = ('http://epaper.alrroya.com/currentissues.php?editiondt=' + currdate.strftime('%Y/%m/%d'),)
self.__class__.all_domains = {
'alrroya': {
'start_urls': alrroya,
'allow_domains': {
'epaper.alrroya.com': frozenset(()),
}
}
}
for domain in self.__class__.all_domains[self.__class__.name]['allow_domains']:
self.__class__.allowed_domains.append(domain)
self.__class__.start_urls.extend(self.__class__.all_domains[self.__class__.name]['start_urls'])
def pdf_process(self, source, url):
print '!!! ' + source + ' ' + url
This appears to be a bug in Scrapy. The current version doesn't seem to accept lists returned from make_requests_from_url(). I was able to modify the Scrapy code in the following way to work around the issue.
In the file Scrapy-0.16.5-py2.7.egg/scrapy/spider.py
Change:
def start_requests(self):
for url in self.start_urls:
yield self.make_requests_from_url(url)
To:
def start_requests(self):
for url in self.start_urls:
requests = self.make_requests_from_url(url)
if type(requests) is list:
for request in requests:
yield request
else:
yield requests
I expect that the official Scrapy people will fix this eventually.