AWS Sagemaker batch transform with JSON input filter - json

I have a custom Sagemaker instance on a NLP task and trying to run a batch transform on the following json file
{"id":123, "features":"This is a test message"}'
and im looking to output the following:
{"id":123,"SageMakerOutput":spam}
Here's my batch transform code:
transformer = sklearn.transformer(instance_count=1,
instance_type='local',
accept='application/json',
output_path="s3://spam-detection-messages-output/json_examples")
transformer.transform("s3://spam-detection-messages/json_examples", content_type='application/json', input_filter="$.features", join_source="Input", output_filter="$['features', SageMakerOutput']")
print('Waiting for transform job: ' + transformer.latest_transform_job.job_name)
transformer.wait()
According to this document,
https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html#batch-transform-data-processing-examples
i should be able to grab the "features" object using input_filter,
however, it grabs the entire json payload. and only outputs the prediction
I'm also including my training code
import argparse
import pandas as pd
import os
import glob
import io
import json
from sklearn import tree
from sklearn.externals import joblib
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
import numpy as np
import nltk
nltk.download('punkt')
nltk.download('wordnet')
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
vectorizer = TfidfVectorizer()
def remove_stop_words(words):
result = [i for i in words if i not in ENGLISH_STOP_WORDS]
return result
def word_stemmer(words):
return [stemmer.stem(o) for o in words]
def word_lemmatizer(words):
return [lemmatizer.lemmatize(o) for o in words]
def remove_characters(words):
return [word for word in words if len(word)> 1]
def clean_token_pipeline(words):
cleaning_utils = [remove_stop_words, word_lemmatizer]
for o in cleaning_utils:
words = o(words)
return words
def process_text(X_train, X_test, y_train, y_test):
X_train = [word_tokenize(o) for o in X_train]
X_test = [word_tokenize(o) for o in X_test]
X_train = [clean_token_pipeline(o) for o in X_train]
X_test = [clean_token_pipeline(o) for o in X_test]
X_train = [" ".join(o) for o in X_train]
X_test = [" ".join(o) for o in X_test]
return X_train, X_test, y_train, y_test
def convert_to_feature(raw_tokenize_data):
raw_sentences = [' '.join(o) for o in raw_tokenize_data]
return vectorizer.transform(raw_sentences)
def _npy_loads(data):
"""
Deserializes npy-formatted bytes into a numpy array
"""
stream = io.BytesIO(data)
return np.load(stream)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# Sagemaker specific arguments. Defaults are set in the environment variables.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
args = parser.parse_args()
train_data = pd.read_csv(args.train+"/spamAssassin_min.csv", index_col=0)
train_data.dropna(inplace=True)
print(train_data.head())
X_train, X_test, y_train, y_test = train_test_split(train_data['message'], train_data['label'], test_size = 0.2, random_state = 1)
X_train, X_test, y_train, y_test = process_text(X_train, X_test, y_train, y_test)
X_train = [o.split(" ") for o in X_train]
X_test = [o.split(" ") for o in X_test]
vectorizer = TfidfVectorizer()
raw_sentences = [' '.join(o) for o in X_train]
vectorizer.fit(raw_sentences)
# print("saving transformer to {}".format(args.model_dir))
joblib.dump(vectorizer, os.path.join(args.model_dir, "vectorizer.joblib"))
x_train_features = convert_to_feature(X_train)
x_test_features = convert_to_feature(X_test)
clf = GaussianNB()
clf.fit(x_train_features.toarray(),y_train)
y_true, y_pred = y_test, clf.predict(x_test_features.toarray())
print(classification_report(y_true, y_pred))
joblib.dump(clf, os.path.join(args.model_dir, "model.joblib"))
def model_fn(model_dir):
"""Deserialized and return fitted model
Note that this should have the same name as the serialized model in the main method
"""
clf = joblib.load(os.path.join(model_dir, "model.joblib"))
# print("model loaded {}".format(clf))
return clf
def input_fn(request_body, request_content_type):
print("** input_fn**")
print("request_body:{} request_content_type:{}".format(request_body, request_content_type))
if request_content_type == "text/plain":
#convert to string
message = str(request_body)
return message
elif request_content_type == "application/json":
request_body_json = json.loads(request_body)
# print("json {}".format(request_body_json))
return request_body_json['features']
elif request_content_type == "application/x-npy":
return " ".join(_npy_loads(request_body))
else:
# Handle other content-types here or raise an Exception
# if the content type is not supported.
return request_body
def predict_fn(input_data, model):
print("** predict_fn**")
print("input_data: {} model:{}".format(input_data, model))
print("\n")
prefix = '/opt/ml/'
model_path = os.path.join(prefix, 'model')
my_vect = joblib.load(os.path.join(model_path, "vectorizer.joblib"))
message = "".join(clean_token_pipeline(input_data))
print("processed message: {}".format(message))
message = my_vect.transform([message])
message = message.toarray()
prediction = model.predict(message)
return prediction

Related

Unable to use method of a class in different class-missing 2 required positional arguments

I have two python classes:- One class(CloudLink) is responsible for sending JSON events to the app and another(ReadData) is responsible for building the JSON data.
The ReadData class will be using the CloudLink methods to send the JSON data to the App. But I'm getting error _buildJSONdata() missing 1 required positional argument: 'Data'.
ReadData class
from pyspark.sql import SparkSession
import functools
from pyspark.sql import DataFrame
from pyspark.sql.functions import explode
from cosmosconnect import azurecosmos
class ReadData:
#exception(logger)
def __init__(self):
self.spark_session = (
SparkSession.builder
.appName("readData")
.getOrCreate()
)
mssparkutils.fs.unmount('/mnt/test')
logger.info("Drive unmounted")
mssparkutils.fs.mount(
'abfss://abc#transl.dfs.core.windows.net/',
'/mnt/test',
{'linkedService': "linkCosmos"}
)
logger.info("Mounted Successfully")
self.input_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/input_path"
)
self.output_directory = (f"synfs:/{mssparkutils.env.getJobId()}/mnt/test/output_path"
)
'''
Reading the schema from csv file
'''
#exception(logger)
def readConfig(self):
try:
logger.info(f"Reading the Config present in {self.input_directory} ")
dfConfig = self.spark_session.read.option("multiline","true") \
.json(self.input_directory)
#for f in dfConfig.select("Entity","Query","Business_Rule").collect():
dfConfig=dfConfig.select(explode('Input').alias('Input_Data'))\
.select('Input_Data.Validation_Type','Input_Data.Entity','Input_Data.Query','Input_Data.Business_Rule')
for f in dfConfig.rdd.toLocalIterator():
#for index, f in dfConfig.toPandas().iterrows():
self.Validation_Type=f[0]
self.container=f[1]
self.query=f[2]
self.rule=f[3]
self.readCosmos(self)
except:
raise ValueError("")
#exception(logger)
def readCosmos(self,*params):
#from cosmosconnect import azurecosmos
#a=[]
linkedService='fg'
df=azurecosmos.cosmosConnect(linkedService,self.query,self.container)
df.cache()
if len(df.head(1)) >0:
outputpath=self.output_directory+'/'+self.container
df.coalesce(1).write.mode('overwrite').parquet(outputpath)
Status="Validation Failure"
Data= {"Validation_Type":[],"Status":[],"Container":[],"Business_Rule":[]}
Data["Validation_Type"].append(self.Validation_Type)
Data["Status"].append(Status)
Data["Container"].append(self.container)
Data["Business_Rule"].append(self.rule)
CloudLink._buildJSONdata(Data)
if __name__ == "__main__":
p = ReadData()
p.readConfig()
CloudLink class
import json
import datetime
import hashlib
import json
import sys
import traceback
import adal
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import logging
from functools import wraps
import sys
def create_logger():
#create a logger object
#logger = logging.getLogger()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logfile = logging.FileHandler('exc_logger.log')
#logfile = logging.StreamHandler(sys.stdout)
fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
formatter = logging.Formatter(fmt)
logfile.setFormatter(formatter)
logger.addHandler(logfile)
return logger
logger = create_logger()
def exception(logger):
def decorator(func):
#wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except:
issue = "exception in "+func.__name__+"\n"
issue = issue+"-------------------------\
------------------------------------------------\n"
logger.exception(issue)
raise
return wrapper
return decorator
class CloudLink(object):
_token = None
_instance = None
http = None
cloudclient = TokenLibrary.getSecret("xxxx", "rtrt")
clientid = TokenLibrary.getSecret("xxxx", "tyty")
clientcredentials = TokenLibrary.getSecret("xxxx", "abcabc")
authority_url = TokenLibrary.getSecret("xxxx", "abab")
cloudtest = TokenLibrary.getSecret("xxxx", "yyyy")
#staticmethod
def getInstance():
if not CloudLink._instance:
CloudLink._instance = CloudLink()
return CloudLink._instance
def __init__(self):
retry_strategy = Retry(
total=3,
backoff_factor=0,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["HEAD", "GET", "OPTIONS"],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.http = requests.Session()
self.http.mount("https://", adapter)
self.http.mount("http://", adapter)
print("Inside init")
def parseJSON(self, t):
try:
eventData = json.loads(t)
logger.info(f"Sending {eventData} to cloud")
self.sendToCloud(eventData)
except ValueError as e:
print("Error: %s Please validate JSON in https://www.jsonschemavalidator.net/"% e)
return None # or: raise
def sendToCloud(self, eventData):
cloudData = {"eventData": eventData, "metadata": self._buildMetadata()}
logger.info(f"Raising alert with data=({cloudData}")
response = self.http.post(
self.cloudtest, headers=self._buildHeaders(), json=cloudData
)
logger.info(f"cloud alert response={response}")
if response.status_code == 202 or response.status_code == 200:
logger.info("Mail sent to Cloud")
else:
raise Exception(f"Cloud reporting failed with Error {response}")
def _buildJSONdata(self,Data):
if len(Data) == 0:
raise Exception("JSON is empty")
else:
t = json.dumps(self.Data)
self.parseJSON(t)
def _buildMetadata(self):
return {
"messageType": "Send Email",
"messageVersion": "0.0.1",
"sender": "Send Email",
}
def _buildHeaders(self):
self._refreshADToken()
headers = {
"Authorization": "Bearer {}".format(self._token["accessToken"]),
"Content-type": "application/json",
"Accept": "text/plain",
}
return headers
def _refreshADToken(self):
def shouldRenew(token):
"""Returns True if the token should be renewed"""
expiresOn = datetime.datetime.strptime(
token["expiresOn"], "%Y-%m-%d %H:%M:%S.%f"
)
now = datetime.datetime.now()
return (expiresOn - now) < datetime.timedelta(minutes=5)
if not self._token or shouldRenew(self._token):
logger.info("Renewing credentials for Alerting")
result = None
try:
context = adal.AuthenticationContext(CloudLink.authority_url)
result = context.acquire_token_with_client_credentials(CloudLink.cloudclient, CloudLink.clientid,CloudLink.clientcredentials)
except Exception as e:
error = "Failed to renew client credentials."
logger.info(error)
raise
if result and "accessToken" in result:
self._token = result
else:
logger.error(
"Failed to acquire bearer token. accessToken not found in result object on renewing credentials."
)
raise Exception("Could not acquire a bearer token")

Converting COCO Format to LabelMe Format

I am trying to convert COCO json file to LabelMe json file. I used a python script called "coco2labelme.py" to convert the json file.
It successfully converts the json file, the only problem is that I get an error every time I try to load the converted json file in LabelMe. An error occurs regarding the 'imageData' of the file.
Does anyone have an idea on how to convert from COCO to LabelMe format with the image data?
below is the code for coco2labelme.py
[Source: https://gist.github.com/travishsu/6efa5c9fb92ece37b4748036026342f6]
import os
import json
import subprocess
import numpy as np
import pandas as pd
from skimage.measure import find_contours
class CocoDatasetHandler:
def __init__(self, jsonpath, imgpath):
with open(jsonpath, 'r') as jsonfile:
ann = json.load(jsonfile)
images = pd.DataFrame.from_dict(ann['images']).set_index('id')
annotations = pd.DataFrame.from_dict(ann['annotations']).set_index('id')
categories = pd.DataFrame.from_dict(ann['categories']).set_index('id')
annotations = annotations.merge(images, left_on='image_id', right_index=True)
annotations = annotations.merge(categories, left_on='category_id', right_index=True)
annotations = annotations.assign(
shapes=annotations.apply(self.coco2shape, axis=1))
self.annotations = annotations
self.labelme = {}
self.imgpath = imgpath
self.images = pd.DataFrame.from_dict(ann['images']).set_index('file_name')
def coco2shape(self, row):
if row.iscrowd == 1:
shapes = self.rle2shape(row)
elif row.iscrowd == 0:
shapes = self.polygon2shape(row)
return shapes
def rle2shape(self, row):
rle, shape = row['segmentation']['counts'], row['segmentation']['size']
mask = self._rle_decode(rle, shape)
padded_mask = np.zeros(
(mask.shape[0]+2, mask.shape[1]+2),
dtype=np.uint8,
)
padded_mask[1:-1, 1:-1] = mask
points = find_contours(mask, 0.5)
shapes = [
[[int(point[1]), int(point[0])] for point in polygon]
for polygon in points
]
return shapes
def _rle_decode(self, rle, shape):
mask = np.zeros([shape[0] * shape[1]], np.bool)
for idx, r in enumerate(rle):
if idx < 1:
s = 0
else:
s = sum(rle[:idx])
e = s + r
if e == s:
continue
assert 0 <= s < mask.shape[0]
assert 1 <= e <= mask.shape[0], "shape: {} s {} e {} r {}".format(shape, s, e, r)
if idx % 2 == 1:
mask[s:e] = 1
# Reshape and transpose
mask = mask.reshape([shape[1], shape[0]]).T
return mask
def polygon2shape(self, row):
# shapes: (n_polygons, n_points, 2)
shapes = [
[[int(points[2*i]), int(points[2*i+1])] for i in range(len(points)//2)]
for points in row.segmentation
]
return shapes
def coco2labelme(self):
fillColor = [255, 0, 0, 128]
lineColor = [0, 255, 0, 128]
groups = self.annotations.groupby('file_name')
for file_idx, (filename, df) in enumerate(groups):
record = {
'imageData': None,
'fillColor': fillColor,
'lineColor': lineColor,
'imagePath': filename,
'imageHeight': int(self.images.loc[filename].height),
'imageWidth': int(self.images.loc[filename].width),
}
record['shapes'] = []
instance = {
'line_color': None,
'fill_color': None,
'shape_type': "polygon",
}
for inst_idx, (_, row) in enumerate(df.iterrows()):
for polygon in row.shapes:
copy_instance = instance.copy()
copy_instance.update({
'label': row['name'],
'group_id': inst_idx,
'points': polygon
})
record['shapes'].append(copy_instance)
if filename not in self.labelme.keys():
self.labelme[filename] = record
def save_labelme(self, file_names, dirpath, save_json_only=False):
if not os.path.exists(dirpath):
os.makedirs(dirpath)
else:
raise ValueError(f"{dirpath} has existed")
for file in file_names:
filename = os.path.basename(os.path.splitext(file)[0])
with open(os.path.join(dirpath, filename+'.json'), 'w') as jsonfile:
json.dump(self.labelme[file], jsonfile, ensure_ascii=True, indent=2)
if not save_json_only:
subprocess.call(['cp', os.path.join(self.imgpath, file), dirpath])
ds = CocoDatasetHandler('cocodataset/annotations/instances_train2014.json', 'cocodataset/train2014/')
ds.coco2labelme()
ds.save_labelme(ds.labelme.keys(), 'cocodataset/labelme/train2014')

load_model() doesn't load a model

I have Python version 3.6 and tensorflow version 2.0.1 installed. I created a model and tried to load it, but ran into a problem:
Traceback (most recent call last):
File "C:/Users/Irina/Documents/PYTHON4/main.py", line 86, in <module>
main()
File "C:/Users/Irina/Documents/PYTHON4/main.py", line 73, in main
predictions = get_predictions(data, model)
File "C:/Users/Irina/Documents/PYTHON4/main.py", line 59, in get_predictions
predictions = [predict(np.array([image]), model) for image in data]
File "C:/Users/Irina/Documents/PYTHON4/main.py", line 59, in <listcomp>
predictions = [predict(np.array([image]), model) for image in data]
File "C:\Users\Irina\Documents\PYTHON4\eval.py", line 53, in predict
pred = model.predict(data)
AttributeError: 'list' object has no attribute 'predict'
Process finished with exit code 1
I suspect that the error is in the non-working load_model () function, although it may be due to the incompatibility of Python and tensorflow.
It's main.py:
# -*- coding: utf-8 -*-
import cv2 # computer vision library
import os
from sklearn.metrics import f1_score
import numpy as np
from imutils import paths
from eval import standardize_input, predict, load_final_model
def unison_shuffled_copies(a, b):
assert len(a) == len(b)
p = np.random.permutation(len(a))
return a[p], b[p]
# Image data directories
def one_hot_encode(label):
dictAnimal = {'cats': 0, 'dogs': 1}
return dictAnimal[label]
def load_data():
IMAGE_DIR_VALIDATION = "animalsval"
imagePaths = sorted(list(paths.list_images(IMAGE_DIR_VALIDATION)))
data = []
labels = []
for imagePath in imagePaths:
data.append(standardize_input(imagePath))
label = imagePath.split(os.path.sep)[-2]
labels.append(one_hot_encode(label))
data = np.array(data, dtype="float")
labels = np.array(labels)
data, labels = unison_shuffled_copies(data, labels)
return data, labels
def get_predictions(data, model):
predictions = [predict(np.array([image]), model) for image in data]
return predictions
def main():
data, labels = load_data()
try:
model = load_final_model()
except:
print('The model is not loaded, we use a constant classifier')
model = None
predictions = get_predictions(data, model)
try:
f1 = f1_score(labels, predictions)
print('F1-Classifier measure:', f1)
except Exception as e:
print('Error: ', e)
file = open("score.txt", "w")
file.write(str(f1))
file.close()
if __name__ == '__main__':
main()
It's eval.py:
# -*- coding: utf-8 -*-
import numpy as np
import cv2
from tensorflow.keras.models import load_model
def standardize_input(image):
standard_im = cv2.imread(image)
standard_im = cv2.resize(standard_im, (32, 32))
standard_im = np.reshape(standard_im, (1, 32, 32, 3))
return standard_im
MODEL_FILE_NAME = 'EasyNet.h5'
def load_final_model():
try:
model = load_model(MODEL_FILE_NAME)
print(type(model))
except Exception:
print("Error")
model = []
return model
def predict(image, model):
data = image
label = 0
pred = model.predict(data)
if pred[0][0] >= pred[0][1]:
label = 0
else:
label = 1
return label
It's model:
import matplotlib
matplotlib.use("Agg")
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layer import Dense
from tensorflow.keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import random
import pickle
import cv2
import os
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
ImagePaths = list(paths.list_images("/content/drive/My Drive/GoogleColab/dogs/"))
random.seed(42)
ImagePaths = ImagePaths[:1800] + list(paths.list_images("/content/drive/My Drive/GoogleColab/cats/"))
print(len(ImagePaths))
random.shuffle(ImagePaths)
data = []
labels = []
i=0
for imagepath in ImagePaths:
if i % 100 == 0:
print(str(i)+"/3555")
i+=1
image = cv2.imread(imagepath)
image = cv2.resize(image, (32, 32))
data.append(image)
label = imagepath.split(os.path.sep)[-2]
if label == "cats":
label = [1,0]
else:
label = [0,1]
labels.append(label)
print(labels)
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
with open("/content/drive/My Drive/GoogleColab/Lesson_4/data.pickle", 'wb') as f:
pickle.dump(data, f)
print("Data saved")
with open("/content/drive/My Drive/GoogleColab/Lesson_4/labels.pickle", 'wb') as f:
pickle.dump(labels, f)
print("Labels saved")
with open("/content/drive/My Drive/GoogleColab/Lesson_4/data.pickle", 'rb') as f:
data = pickle.load(f)
print("Data loaded")
with open("/content/drive/My Drive/GoogleColab/Lesson_4/labels.pickle", 'rb') as f:
labels = pickle.load(f)
print("Labels loaded")
(trainX, testX, trainY, testY) = train_test_split(data, labels,
test_size=0.15,
random_state=42)
print("Dataset prepared")
from tensorflow.keras.layers import Conv2D, Flatten, Dropout, Activation, MaxPooling2D
from tensorflow.keras.optimizers import Adam
model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same", input_shape=(32, 32, 3)))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same", activation="relu"))
model.add(Conv2D(64, (3, 3), padding="same", activation="relu"))
model.add (MaxPooling2D( pool_size = ( 2 , 2 )))
model.add(Dropout(0.25))
model.add(Flatten())
#model.add(Dense(1024,input_shape=(3072,), activation='sigmoid'))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
INIT_LR = 0.01
#opt = SGD(lr=INIT_LR)
opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(loss="binary_crossentropy",optimizer=opt,
metrics=["accuracy"]) #categorial_crosentropy
print ("Model compiled")
model.summary()
EPOCHS = 30
checkpointer = ModelCheckpoint(filepath='/content/drive/My Drive/GoogleColab/Lesson_4/ConvNN.h5', verbose=1, save_best_only=True)
H = model.fit(trainX, trainY, validation_data=(testX,testY),
epochs=EPOCHS, batch_size=32,
shuffle=True,
callbacks=[checkpointer])
print("Model trained")
predictions = model.predict (testX, batch_size = 32)
print(predictions)
print(classification_report(testY.argmax(axis=1),
predictions.argmax(axis=1), target_names=("cats","dogs")))
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="vall_loss")
plt.plot(N, H.history["accuracy"], label="train_acc")
plt.plot(N, H.history["val_accuracy"], label="val_acc")
plt.title("Results")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy/")
plt.legend()
plt.savefig("/content/drive/My Drive/GoogleColab/Lesson_4/Loss.png")
model.save("/content/drive/My Drive/GoogleColab/Lesson_4/EasyNet.h5")
print("End")

Stream multiple videos using OpenCV Python Flask

I'm trying to stream 2 webcams at once using Flask Python but I'm not able to do so, when I run my code, both webcams light up but only one of the cameras show on the webpage and I'm not sure.
Here is the code I'm using:
from vCamera import VideoCamera
import pdb
app = Flask(__name__)
#app.route('/')
def index():
return render_template('index.html')
def gen(vCamera0):
while True:
frame0 = vCamera0.get_frame0()
yield (b'--frame0\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame0 + b'\r\n\r\n')
frame2 = vCamera0.get_frame2()
yield (b'--frame2\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame2 + b'\r\n\r\n')
#app.route('/video_feed0')
def video_feed0():
return Response(gen(VideoCamera()),
mimetype='multipart/x-mixed-replace; boundary=frame0')
#app.route('/video_feed2')
def video_feed2():
return Response(gen(VideoCamera()),
mimetype='multipart/x-mixed-replace; boundary=frame2')
if __name__ == '__main__':
app.run(host='127.0.0.1', debug=True)
And this is my camera file:
import pdb
import cv2
fullbody_cascade = cv2.CascadeClassifier('haarcascade_fullbody.xml')
upperbody_cascade = cv2.CascadeClassifier('haarcascade_upperbody.xml')
class VideoCamera(object):
def __init__(self):
self.video0 = cv2.VideoCapture(0)
self.video2 = cv2.VideoCapture(2)
def __del__(self):
self.video0.release()
def get_frame0(self):
success0, frame0 = self.video0.read()
gray0 = cv2.cvtColor(frame0, cv2.COLOR_BGR2GRAY)
fullbody0 = fullbody_cascade.detectMultiScale(gray0)
upperbody0 = upperbody_cascade.detectMultiScale(gray0)
for (x,y,w,h) in fullbody0:
cv2.rectangle(frame0, (x,y), (x+w, y+h), (255,0,0), 2)
for (x,y,w,h) in upperbody0:
cv2.rectangle(frame0, (x,y), (x+w, y+h), (255,0,0), 2)
ret0, jpeg0 = cv2.imencode('.jpg', frame0)
return jpeg0.tobytes()
def get_frame2(self):
success2, frame2 = self.video2.read()
gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
fullbody2 = fullbody_cascade.detectMultiScale(gray2)
upperbody2 = upperbody_cascade.detectMultiScale(gray2)
for (x,y,w,h) in fullbody2:
cv2.rectangle(frame2, (x,y), (x+w, y+h), (255,0,0), 2)
for (x,y,w,h) in upperbody2:
cv2.rectangle(frame2, (x,y), (x+w, y+h), (255,0,0), 2)
ret2, jpeg2 = cv2.imencode('.jpg', frame2)
return jpeg2.tobytes()
I am very new to Flask so I'm not quite sure what the issue with the code I have written is. Any advice would be helpful!

Why Field names are sliced and raise error in python csv writing

During practicing selenium i failed to write a dictionary into csv.I have searched the problem solution like it but it did not help me. My problem is when i want to write a python dictionary into csv file using dictwriter i reach at exception i.e.
ValueError: dict contains fields not in fieldnames: u'S', u'k', u'u'
but the field name is
Sku
Why it is sliced and gives me queer exception but i supplied proper filednames in the dictwriter.
My experimenting code. is-
import os,sys,bs4,random,codecs,requests
import unicodecsv as csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from contextlib import contextmanager
from selenium.webdriver.support.expected_conditions import staleness_of
from selenium.webdriver.support import expected_conditions as EC
current_file = sys.argv[0]
link_dir = os.path.dirname(current_file)
link_path = os.path.join(link_dir,'lnks.txt')
Image_folder = os.path.join(link_dir,"images")+"\\"
urls = [line.strip() for line in open(link_path, 'r')]
urls = list(set(urls))
url = urls[0]
driver = webdriver.Firefox()#Chrome()##chromedriver)##
base_url = 'http://www.hotleathers.com'
Header = [u'Url',u'Name',u'Sku',u'Price',u'Color',u'Size']
#def get_data(url):
#try:
print "Scraping : %s"%url
driver.get(url)
driver.implicitly_wait(3)
detpage_lnks = driver.find_elements_by_xpath("//div[#style='margin-top:0px;margin-bottom:5px']/a")
detpage_lnks = map(lambda x: x.get_attribute('href'),detpage_lnks)
for i in detpage_lnks:
Data = []
#try:
driver.get(i)
driver.implicitly_wait(3)
Name_v=driver.find_element_by_xpath("//table [#class='showproductpage']/tbody/tr/td/h1").text
Sku_v=driver.find_element_by_xpath("(//table[#cellspacing = '0'])[3]//td[#style='padding-left:5px; font-size:16px; font-weight:bold;']").text
image_name = Sku_v+".jpg"
image_url = "http://www.hotleathers.com/Assets/ProductImages/large/"+image_name
res = requests.get(image_url)
if res.status_code == requests.codes.ok:
out = open(Image_folder+image_name,'wb')
out.write(res.content)
Price_v=driver.find_element_by_xpath("((//table[#cellspacing = '0'])[3]//tr)[2]//span").text
Color=driver.find_elements_by_xpath("(//table[#class='buyProductForm'])//tr[2]/td/select/option")
Color_v = '"'+':'.join([i.text for i in Color[1:]])+'"'
Size=driver.find_elements_by_xpath("(//table[#class='buyProductForm'])//tr[3]/td/select/option")
Size_v = '"'+':'.join([i.text for i in Size[1:]])+'"'
temp = [driver.current_url,Name_v,Sku_v,Price_v,Color_v,Size_v]
Data.append(zip(Header,temp))
Data = [item for sublst in Data for item in sublst]
my_dict = dict(Data)
with codecs.open(os.path.join(link_dir,"Image_info.csv"),'wb',encoding="utf-8") as f:
# Using dictionary keys as fieldnames for the CSV file header
writer = csv.DictWriter(f,delimiter=",", fieldnames=Header,lineterminator='\n')
writer.writeheader()
for d in my_dict:
writer.writerow(d)
driver.close()
I tried both unicodecsv and csv but with no success.
After many tries i found solution as below-
I did not understand that writerow expects a dictionary!
import os,sys,bs4,random,codecs,requests
import unicodecsv as csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from contextlib import contextmanager
from selenium.webdriver.support.expected_conditions import staleness_of
from selenium.webdriver.support import expected_conditions as EC
current_file = sys.argv[0]
link_dir = os.path.dirname(current_file)
link_path = os.path.join(link_dir,'lnks.txt')
Image_folder = os.path.join(link_dir,"images")+"\\"
urls = [line.strip() for line in open(link_path, 'r')]
urls = list(set(urls))
url = urls[0]
driver = webdriver.Firefox()#Chrome()##chromedriver)##
base_url = 'http://www.hotleathers.com'
Header = [u'Url',u'Name',u'Sku',u'Price',u'Color',u'Size']
#def get_data(url):
#try:
print "Scraping : %s"%url
driver.get(url)
driver.implicitly_wait(3)
detpage_lnks = driver.find_elements_by_xpath("//div[#style='margin-top:0px;margin-bottom:5px']/a")
detpage_lnks = map(lambda x: x.get_attribute('href'),detpage_lnks)
for i in detpage_lnks:
Data = []
#try:
driver.get(i)
driver.implicitly_wait(3)
Name_v=driver.find_element_by_xpath("//table [#class='showproductpage']/tbody/tr/td/h1").text
Sku_v=driver.find_element_by_xpath("(//table[#cellspacing = '0'])[3]//td[#style='padding-left:5px; font-size:16px; font-weight:bold;']").text
image_name = Sku_v+".jpg"
image_url = "http://www.hotleathers.com/Assets/ProductImages/large/"+image_name
res = requests.get(image_url)
if res.status_code == requests.codes.ok:
out = open(Image_folder+image_name,'wb')
out.write(res.content)
Price_v=driver.find_element_by_xpath("((//table[#cellspacing = '0'])[3]//tr)[2]//span").text
Color=driver.find_elements_by_xpath("(//table[#class='buyProductForm'])//tr[2]/td/select/option")
Color_v = '"'+':'.join([i.text for i in Color[1:]])+'"'
Size=driver.find_elements_by_xpath("(//table[#class='buyProductForm'])//tr[3]/td/select/option")
Size_v = '"'+':'.join([i.text for i in Size[1:]])+'"'
temp = [driver.current_url,Name_v,Sku_v,Price_v,Color_v,Size_v]
Data.append(zip(Header,temp))
Data = [item for sublst in Data for item in sublst]
my_dict = dict(Data)
with codecs.open(os.path.join(link_dir,"Image_info.csv"),'ab',encoding="utf-8") as f:
# Using dictionary keys as fieldnames for the CSV file header
writer = csv.DictWriter(f,fieldnames=my_dict.keys())
writer.writerow(my_dict)
driver.close()