Python 2 to 3 = TypeError: descriptor 'find' for 'str' objects doesn't apply to a 'bytes' object - python-2to3

Hello we try to convert python 2 to 3 but we are stuck with an error.
Maybe someone has an idea.
Thanks
if episode_num is not None:
episode_num = str.encode(str(episode_num), 'ascii','ignore')
if str.find(episode_num, ".") != -1:
splitted = str.split(episode_num, ".")
if splitted[0] != "":
#TODO fix dk format
try:
season = int(splitted[0]) + 1
is_movie = None # fix for misclassification
if str.find(splitted[1], "/") != -1:
episode = int(splitted[1].split("/")[0]) + 1
elif splitted[1] != "":
episode = int(splitted[1]) + 1
except:
episode = ""
season = ""
if str.find(episode_num, ".") != -1:
TypeError: descriptor 'find' for 'str' objects doesn't apply to a 'bytes' object
https://www.dropbox.com/s/viszyzlpbl92yj0/source.py?dl=1

Python 3 is much more strict about mixing str and bytes strings. Just be consistent. When you use encode you create a bytes string.
if bytes.find(episode_num, b".") != -1:
Better, learn to use in:
if b"." in episode_num:

Related

Parsing incomplete / doubled / damaged JSON String

In Python, how would one retrieve information from a incomplete or "overcomplete" JSON string?
Example (incomplete):
msg = '{"MESSAGE": {"MESSAGE_SIZE": "2230", "IMAGE_NUM'
Example (overcomplete):
msg = '{"MESSAGE": {"MESSAGE_SIZE": "2230", "IMAGE_NUMBER": "16227"}}{"MESSA'
Specifying the key MESSAGE_SIZE, I want to retrieve the integer 2230.
The position of the key inside the string is mutable.
One working solution I found is the following (ugly) piece of code. I'm sure there are better solutions though:
key = "\"MESSAGE_SIZE\":"
len_start_idx = 0
len_str = ""
len_int = 0
# Find position right after key
start = msg.find(key)
msg = msg[start+len(key):]
# Find the first value digit after the key
for c in msg:
if not c.isdigit():
len_start_idx += 1
else:
break
msg = msg[len_start_idx:]
# Append value digits until no more digits are found
for c in msg:
if c.isdigit():
len_str += c
else:
break
len_int = int(len_str)
Interestingly, there is a npm-module to untruncate json files but I couldn't find anything similar for python.
It shouldn't be too complicated to implement one though. This should work in most cases (also closing lists):
import json
import re
closing_chars = {
'{': '}',
'[': ']'
}
def close_structs(json_string):
stack = []
for char in json_string:
if char in ['{', '[']:
stack.append(char)
elif char in ['}', ']']:
stack.pop()
for open_struct in stack[::-1]:
json_string += closing_chars[open_struct]
return json_string
def untruncate_json(json_string):
while True:
try:
d = json.loads(json_string)
return d
except Exception as e:
if "Expecting ':' delimiter" in repr(e):
json_string += ": null"
elif "Unterminated string starting at" in repr(e):
json_string += '"'
elif "Expecting property name enclosed in double quotes" in repr(e) \
or "Expecting value" in repr(e):
json_string = re.sub(r'[:,]\s*$', '', json_string)
elif "Expecting ',' delimiter" in repr(e):
json_string = close_structs(json_string)
else:
print(json_string)
raise e
msg = '{"MESSAGE": {"MESSAGE_SIZE": "2230", "IMAGE_NUMBER": "16227", "TE'
d = untruncate_json(msg)
# getting the desired value:
print(int(d["MESSAGE"]["MESSAGE_SIZE"]))
Edit: handling cases "Expecting property name enclosed in double quotes" and "Expecting value" + stripping all kinds of whitespace chars with re.sub (in order to include \n)

how to raise an excpetion and return none value when the conditions are not satisfied

I am new to python and have just to learn python. I have a written a code to find the common characters in two string and I am getting the desired output. I want to modify the code if the following cases arise and it should return None for the following conditions
1) For two string, if there is no match
2) any of string1 or string2 is nil/empty
3) any of string1 or string2 is hash/array/set/Fixnum [i.e anything other than string]
I am supposed to raise an exception for the above cases. I have gone through the forums and links but could not figure it out correctly. Could anyone please help me on how do raise exception for the above condition
This is the code
class CharactersInString:
def __init__(self, value1, value2):
self.value1 = value1
self.value2 = value2
def find_chars_order_n(self):
new_string = [ ]
new_value1 = list(self.value1)
new_value2 = list(self.value2)
print( "new_value1: ", new_value1)
print( "new_value2: ", new_value2)
for i in new_value1:
if i in new_value2 and i not in new_string:
new_string.append(i)
final_list = list(new_string)
return ''.join(final_list)
if __name__ == "__main__":
obj = CharactersInString("ho", "killmse")
print(obj.find_chars_order_n())

UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 7240: character maps to <undefined>

I am student doing my master thesis. As part of my thesis, I am working with python. I am reading a log file of .csv format and writing the extracted data to another .csv file in a well formatted way. However, when the file is read, I am getting this error:
Traceback (most recent call last): File
"C:\Users\SGADI\workspace\DAB_Trace\my_code\trace_parcer.py", line 19,
in for row in reader:
File "C:\Users\SGADI\Desktop\Python-32bit-3.4.3.2\python-3.4.3\lib\encodings\cp1252.py",
line 23, in decode return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 7240: character maps to <undefined>
import csv
import re
#import matplotlib
#import matplotlib.pyplot as plt
import datetime
#import pandas
#from dateutil.parser import parse
#def parse_csv_file():
timestamp = datetime.datetime.strptime('00:00:00.000', '%H:%M:%S.%f')
timestamp_list = []
snr_list = []
freq_list = []
rssi_list = []
dab_present_list = []
counter = 0
f = open("output.txt","w")
with open('test_log_20150325_gps.csv') as csvfile:
reader = csv.reader(csvfile, delimiter=';')
for row in reader:
#timestamp = datetime.datetime.strptime(row[0], '%M:%S.%f')
#timestamp.split(" ",1)
timestamp = row[0]
timestamp_list.append(timestamp)
#timestamp = row[0]
details = row[-1]
counter += 1
print (counter)
#if(counter > 25000):
# break
#timestamp = datetime.datetime.strptime(row[0], '%M:%S.%f')
#timestamp_list.append(float(timestamp))
#search for SNRLevel=\d+
snr = re.findall('SNRLevel=(\d+)', details)
if snr == []:
snr = 0
else:
snr = snr[0]
snr_list.append(int(snr))
#search for Frequency=09ABC
freq = re.findall('Frequency=([0-9a-fA-F]+)', details)
if freq == []:
freq = 0
else:
freq = int(freq[0], 16)
freq_list.append(int(freq))
#search for RSSI=\d+
rssi = re.findall('RSSI=(\d+)', details)
if rssi == []:
rssi = 0
else:
rssi = rssi[0]
rssi_list.append(int(rssi))
#search for DABSignalPresent=\d+
dab_present = re.findall('DABSignalPresent=(\d+)', details)
if dab_present== []:
dab_present = 0
else:
dab_present = dab_present[0]
dab_present_list.append(int(dab_present))
f.write(str(timestamp) + "\t")
f.write(str(freq) + "\t")
f.write(str(snr) + "\t")
f.write(str(rssi) + "\t")
f.write(str(dab_present) + "\n")
print (timestamp, freq, snr, rssi, dab_present)
#print (index+1)
#print(timestamp,freq,snr)
#print (counter)
#print(timestamp_list,freq_list,snr_list,rssi_list)
'''if snr != []:
if freq != []:
timestamp_list.append(timestamp)
snr_list.append(snr)
freq_list.append(freq)
f.write(str(timestamp_list) + "\t")
f.write(str(freq_list) + "\t")
f.write(str(snr_list) + "\n")
print(timestamp_list,freq_list,snr_list)'''
f.close()
I searched for the special character and I did not find any. I searched the Internet which suggested to change the format: I tried ut8, latin1 and few other formats, but i am still getting this error. Can you please help me how to solve with pandas as well. I also tried with pandas but I am still getting the error.
I even removed a line in the log file, but the error occurs in the next line.
Please help me finding a solution, thank you.
i have solved this issue.
we can use this code
import codecs
types_of_encoding = ["utf8", "cp1252"]
for encoding_type in types_of_encoding:
with codecs.open(filename, encoding = encoding_type, errors ='replace') as csvfile:
your code
....
....
I have solved this issue by simply adding a parameter in open()
with open(filename, encoding = 'cp850') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
with open('input.tsv','rb') as f:
for ln in f:
decoded=False
line=''
for cp in ('cp1252', 'cp850','utf-8','utf8'):
try:
line = ln.decode(cp)
decoded=True
break
except UnicodeDecodeError:
pass
if decoded:
# use 'line'

Reading input from text file and storing the output

I have a function created for validating ISBN 10. It works well enough if the input is from the user. That's not enough, I want input from text file and store the output in text file.
print("ISBN-Validation Program")
isbn = input("Enter isbn: ")
def stripDashes(isbn):
sisbn = isbn.replace("-", "").replace(" ", "").upper();
return checkFormat(sisbn)
def checkFormat(isbn):
if len(isbn) == 10 and isbn[:9].isdigit()\
and (isbn[-1] == "x" or isbn[-1] == "X" or isbn[-1].isdigit()):
return isValiDisbn(isbn)
else:
print("ISBN is not properly formatted.")
def isValiDisbn(isbn):
if (isbn[-1] == "x" or isbn[-1] == "X"):
total = int(isbn[0])*10 + int(isbn[1])*9 + int(isbn[2])*8\
+ int(isbn[3])*7 + int(isbn[4])*6 + int(isbn[5])*5\
+ int(isbn[6])*4 + int(isbn[7])*3 + int(isbn[8])*2 + 10
else:
total = int(isbn[0])*10 + int(isbn[1])*9 + int(isbn[2])*8\
+ int(isbn[3])*7 + int(isbn[4])*6 + int(isbn[5])*5\
+ int(isbn[6])*4 + int(isbn[7])*3 + int(isbn[8])*2 + int(isbn[9])
if total % 11 == 0:
print("The number is valid.")
else:
print("The number is not valid.")
stripDashes(isbn)
The above function works for user input, I guess my code for the calling function and printing in text file is wrong somewhere.
def main():
inFile = open("isbn.txt", "r")
outFile = open("isbnOut.txt", "a")
for line in open("isbn.txt", "r"):
isbns = line.split()
for isbn in isbns:
if checkFormat(isbn) == False:
outFile.write(isbn.strip()+"\nISBN is not properly formatted.\n")
if isValiDisbn(isbn) == True:
outFile.write(isbn.strip()+"\nThe number is valid.\n")
if isValiDisbn(isbn) == False:
outFile.write(isbn.strip()+"\nThe number is not valid.\n")
inFile.close()
outFile.close()
def stripDashes(isbn):
sisbn = isbn.replace("-", "").replace(" ", "").upper();
return checkFormat(sisbn)
def checkFormat(isbn):
if len(isbn) == 10 and isbn[:9].isdigit() and (isbn[-1] == "x" or isbn[-1] == "X" or isbn[-1].isdigit()) == True:
return isValiDisbn(isbn)
else:
return False
#print("ISBN is not properly formatted.")
def isValiDisbn(isbn):
if (isbn[-1] == "x" or isbn[-1] == "X"):
total = int(isbn[0])*10 + int(isbn[1])*9 + int(isbn[2])*8 + int(isbn[3])*7\
+ int(isbn[4])*6 + int(isbn[5])*5 + int(isbn[6])*4 + int(isbn[7])*3\
+ int(isbn[8])*2 + 10
else:
total = int(isbn[0])*10 + int(isbn[1])*9 + int(isbn[2])*8 + int(isbn[3])*7\
+ int(isbn[4])*6 + int(isbn[5])*5 + int(isbn[6])*4 + int(isbn[7])*3\
+ int(isbn[8])*2 + int(isbn[9])
if total % 11 == 0:
return True
#print("The number is valid.")
else:
return False
#print("The number is not valid.")
main()
Can anyone tell me what is wrong here and help me to get through this?
The problem is that you forget to call stripDashes, so the ISBN is fed into int in the form 0-306-40615-2 instead of as 0306406152
def stripDashes(isbn):
sisbn = isbn.replace("-", "").replace(" ", "").upper();
return sisbn
And as part of your loop, do:
def main():
inFile = open("isbn.txt", "r")
outFile = open("isbnOut.txt", "a")
for line in open("isbn.txt", "r"):
isbns = line.split()
for isbn in isbns:
stripped_isbn = stripDashes(isbn)
if checkFormat(stripped_isbn) == False:
outFile.write(isbn.strip()+"\nISBN is not properly formatted.\n")
if isValiDisbn(stripped_isbn) == True:
outFile.write(isbn.strip()+"\nThe number is valid.\n")
if isValiDisbn(stripped_isbn) == False:
outFile.write(isbn.strip()+"\nThe number is not valid.\n")
It works fine:
$ cat isbn.txt
0-306-40615-2
$ python isbn.py
$ cat isbnOut.txt
0306406152
The number is valid.
Other things to note:
You open the in-file as inFile = open("isbn.txt", "r"), but then reopen it in the loop for line in open("isbn.txt", "r"):. Just use your open file like for line in inFile: or remove the first opening line.
Your code that does the out-file writing is broken.
if checkFormat(isbn) == False:
outFile.write(isbn.strip()+"\nISBN is not properly formatted.\n")
if isValiDisbn(isbn) == True:
outFile.write(isbn.strip()+"\nThe number is valid.\n")
if isValiDisbn(isbn) == False:
outFile.write(isbn.strip()+"\nThe number is not valid.\n")
If checkFormat return False, it will still check the ISBN for validity, so an elif should be used for the following lines. isValiDisbn(isbn) == False is also not needed, as it calls isValiDisbn twice, and compares it to False (just use not), so just use an else:
if checkFormat(isbn) == False:
outFile.write(isbn.strip()+"\nISBN is not properly formatted.\n")
elif isValiDisbn(isbn) == True:
outFile.write(isbn.strip()+"\nThe number is valid.\n")
else:
outFile.write(isbn.strip()+"\nThe number is not valid.\n")

Reading binary .SAVE files?

I was wondering how to open or read a binary file that has been saved in octave, with the extension .SAVE? I have tried opening it with MATLAB, using the 'load' function in octave, but nothing seems to be working. I'm trying to understand someone else's code and they have saved the output of a simulation in this file.
The Octave binary format is briefly described in the comments before the function read_binary_data() in load-save.cc.
Are you sure the file is in "Octave binary format". The file ending ".SAVE" can be choosen arbitrary so this could also be CSV, gzipped...
You can run "file yourfile.SAFE" and paste the output or check the first bytes of your file if they are "Octave-1-L" or "Octave-1-B".
If you want to use these files from another program than GNU Octave I would suggest loading it in Octave and safe it in an other format. See "help save" for a list of supported formats.
EDIT:
Because the initial poster asked: Of course you can use GNU Octave from the terminal (no need for the GUI and I don't now to which software part you refer when you are using the phrase "octave GUI", see here Octave FAQ). Just install it for your used platform install instructions on wiki.octave.org and run it.
Code for reading octave binary save files in python 2/3.
Tested on:
strings
single and double precision real and complex floats
various integer types
scalar, matrix and array
Unsupported:
struct
cell array
...
Python code:
# This code is public domain
from __future__ import print_function
import sys
from collections import OrderedDict
import numpy as np
if sys.version_info[0] > 2:
def tostr(s):
return s.decode('utf8')
def decode(s, encoding='utf8'):
return s.decode(encoding)
STR_ENCODING = 'utf8'
else:
def tostr(s):
return s
def decode(s, encoding='utf8'):
return unicode(s, encoding)
STR_ENCODING = None
DATA_TYPES = {
1: "scalar",
2: "matrix",
3: "complex scalar",
4: "complex matrix",
5: "old_string",
6: "range",
7: "string",
}
TYPE_CODES = {
0: "u1",
1: "u2",
2: "u4",
3: "i1",
4: "i2",
5: "i4",
6: "f4",
7: "f8",
8: "u8",
9: "i8",
}
DTYPES = {k: np.dtype(v) for k, v in TYPE_CODES.items()}
def loadoct(fd, encoding=STR_ENCODING):
"""
Read an octave binary file from the file handle fd, returning
an array of structures. If encoding is not None then convert
strings from bytes to unicode. Default is STR_ENCODING, which
is utf8 for python 3 and None for python 2, yielding arrays
of type str in each dialect.
"""
magic = fd.read(10)
assert(magic == b"Octave-1-L" or magic == b"Octave-1-B")
endian = "<" if magic[-1:] == b"L" else ">"
# Float type is 0: IEEE-LE, 1: IEEE-BE, 2: VAX-D, 3: VAX-G, 4: Cray
# Not used since Octave assumes IEEE format floats.
_float_format = fd.read(1)
len_dtype = np.dtype(endian + "i4")
def read_len():
len_bytes = fd.read(4)
if not len_bytes:
return None
return np.frombuffer(len_bytes, len_dtype)[0]
table = OrderedDict()
while True:
name_length = read_len()
if name_length is None: # EOF
break
name = tostr(fd.read(name_length))
doc_length = read_len()
doc = tostr(fd.read(doc_length)) if doc_length else ''
is_global = bool(ord(fd.read(1)))
data_type = ord(fd.read(1))
if data_type == 255:
type_str = tostr(fd.read(read_len()))
else:
type_str = DATA_TYPES[data_type]
#print("reading", name, type_str)
if type_str.endswith("scalar"):
if type_str == "scalar":
dtype = DTYPES[ord(fd.read(1))]
elif type_str == "complex scalar":
_ = fd.read(1)
dtype = np.dtype('complex128')
elif type_str == "float complex scalar":
_ = fd.read(1)
dtype = np.dtype('complex64')
else:
dtype = np.dtype(type_str[:-7])
dtype = dtype.newbyteorder(endian)
data = np.frombuffer(fd.read(dtype.itemsize), dtype)
table[name] = data[0]
elif type_str.endswith("matrix"):
ndims = read_len()
if ndims < 0:
ndims = -ndims
dims = np.frombuffer(fd.read(4*ndims), len_dtype)
else:
dims = (ndims, read_len())
count = np.prod(dims)
if type_str == "matrix":
dtype = DTYPES[ord(fd.read(1))]
elif type_str == "complex matrix":
_ = fd.read(1)
dtype = np.dtype('complex128')
elif type_str == "float complex matrix":
_ = fd.read(1)
dtype = np.dtype('complex64')
else:
dtype = np.dtype(type_str[:-7])
dtype = dtype.newbyteorder(endian)
data = np.frombuffer(fd.read(count*dtype.itemsize), dtype)
# Note: Use data.copy() to make a modifiable array.
table[name] = data.reshape(dims, order='F')
elif type_str == "old_string":
data = fd.read(read_len())
if encoding is not None:
data = decode(data, encoding)
table[name] = data
elif type_str in ("string", "sq_string"):
nrows = read_len()
if nrows < 0:
ndims = -nrows
dims = np.frombuffer(fd.read(4*ndims), len_dtype)
count = np.prod(dims)
fortran_order = np.frombuffer(fd.read(count), dtype='uint8')
c_order = np.ascontiguousarray(fortran_order.reshape(dims, order='F'))
data = c_order.view(dtype='|S'+str(dims[-1]))
if encoding is not None:
data = np.array([decode(s, encoding) for s in data.flat])
table[name] = data.reshape(dims[:-1])
else:
data = [fd.read(read_len()) for _ in range(nrows)]
if encoding is not None:
data = [decode(s, encoding) for s in data]
table[name] = np.array(data)
else:
raise NotImplementedError("unknown octave type "+type_str)
#print("read %s:%s"%(name, type_str), table[name])
return table
def _dump(filename, encoding=STR_ENCODING):
import gzip
if filename.endswith('.gz'):
with gzip.open(filename, 'rb') as fd:
table = loadoct(fd, encoding)
else:
with open(filename, 'rb') as fd:
table = loadoct(fd, encoding)
for k, v in table.items():
print(k, v)
if __name__ == "__main__":
#_dump(sys.argv[1], encoding='utf8') # unicode
#_dump(sys.argv[1], encoding=None) # bytes
_dump(sys.argv[1]) # str, encoding=STR_ENCODING