I got an error in Pycham after trying to run a test code for deep learning - deep-learning

# USAGE
# python train_simple_nn.py --dataset animals --model output/simple_nn.model --label-bin output/simple_nn_lb.pickle --plot output/simple_nn_plot.png
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset of images")
ap.add_argument("-m", "--model", required=True, help="path to output trained model")
ap.add_argument("-l", "--label-bin", required=True, help="path to output label binarizer")
ap.add_argument("-p", "--plot", required=True, help="path to output accuracy/loss plot")
args = vars(ap.parse_args())
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []
# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images(args["dataset"])))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
# load the image, resize the image to be 32x32 pixels (ignoring
# aspect ratio), flatten the image into 32x32x3=3072 pixel image
# into a list, and store the image in the data list
image = cv2.imread(imagePath)
image = cv2.resize(image, (32, 32)).flatten()
data.append(image)
# extract the class label from the image path and update the
# labels list
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
I found a test code for studing Deep learning.
And tried to run in Pycharm. but I got this error message.
Actually I couldn't understand what that parser function is doing here.
could you explain about that code and about the error?
---error i got in Pycharm -----------------------
C:\Users\giyeo\anaconda3\envs\tf\python.exe "D:/GiyeonLee/09. Machine Learning/Pycharm/Tutorial/keras-tutorial/train_simple_nn.py"
2020-07-06 13:56:28.409237: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
usage: train_simple_nn.py [-h] -d DATASET -m MODEL -l LABEL_BIN -p PLOT
train_simple_nn.py: error: the following arguments are required: -d/--dataset, -m/--model, -l/--label-bin, -p/--plot
Process finished with exit code 2
Thanks for reading my quation..

Related

Facing the issue while fitting my model (bi-lstm + crf). ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list)

I am trying to solve a problem which contains bi-LSTM and CRF, while fitting the model, i am facing this issue ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list). Below is the structure of the dataframe.
Columns named "CompanyId" that contains integer. "Name" that contains string. "TableTypeCode" that is a string that is constant and is same as "BS". and final column named "BlockName". I want to train a model using bidirectional lstm and crf . Input being "CompanyId", "Name", and "TableTypeCode" and should predict "BlockName".
import numpy as np
import pandas as pd
df=pd.read_excel("data.xlsx")
from keras.layers import TimeDistributed
from keras.layers import Dense
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed, Bidirectional
from keras.models import Model
!pip install tensorflow-addons==0.16.1
import tensorflow_addons as tfa
X = df[['CompanyId', 'Name', 'TableTypeCode']]
y = df['BlockName']
# Preprocess the data
# One-hot encode the 'CompanyId' and 'TableTypeCode' columns
X = pd.get_dummies(X, columns=['CompanyId', 'TableTypeCode'])
# Tokenize the 'Name' column
X['Name'] = X['Name'].apply(str)
tokenizer = Tokenizer()
X['Name'] = X['Name'].apply(lambda x: x.split())
X['Name'] = tokenizer.texts_to_sequences(X['Name'])
# Encode the target column
encoder = LabelEncoder()
y = encoder.fit_transform(y)
y = to_categorical(y)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
n_classes = df['BlockName'].nunique()
# Define the model architecture
input_ = Input(shape=(X.shape[1],))
embedding = Embedding(input_dim=X.shape[1], output_dim=50)(input_)
lstm = Bidirectional(LSTM(units=100))(embedding)
output = Dense(n_classes, activation='softmax')(lstm)
model = Model(input_, output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train)
There was no issue till the last line of code. Help me fix this and train my model.

putting HTML output from SHAP into the Dash output layout callback

I am trying to make a dashboard where the output from shap forceplot is illustrated. Shap.forceplot is HTML decorated with json. The example is here
I made a very simple dashboard using the tutorial which should plot the desirable figure after clicking the submit
here is the code
# -*- coding: utf-8 -*-
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import pandas as pd
from sqlalchemy import create_engine
import shap
from sources import *
import xgboost
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
dcc.Input(id='input-cvr-state', type='text', value='12'),
html.Button(id='submit-button', n_clicks=0, children='Submit'),
html.Div(id='output-state'),
html.Div(id='output-shap')
])
#app.callback(Output('output-shap', 'children'),
[Input('submit-button', 'n_clicks')],
[State('input-cvr-state', 'value')])
def update_shap_figure(n_clicks, input_cvr):
shap.initjs()
# train XGBoost model
X,y = shap.datasets.boston()
model = xgboost.train({"learning_rate": 0.01}, xgboost.DMatrix(X, label=y), 100)
# explain the model's predictions using SHAP values(same syntax works for LightGBM, CatBoost, and scikit-learn models)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explanation
return(shap.force_plot(explainer.expected_value, shap_values[0,:], X.iloc[0,:])) # matplotlib=True
if __name__ == '__main__':
app.run_server(debug=True)
I managed it by following steps:
import shap
from shap.plots._force_matplotlib import draw_additive_plot
# ... class dashApp
# ... callback as method
# matplotlib=False => retrun addaptativevisualizer,
# if set to True the visualizer will render the result is the stdout directly
# x is index of wanted input
# class_1 is ma class to draw
force_plot = shap.force_plot(
self.explainer.expected_value[class_1],
self.shap_values[class_1][x[0], :],
self.data.iloc[x, :].drop(columns=["TARGET"], errors="ignore"),
matplotlib=False
)
# set show=False to force the figure to be returned
force_plot_mpl = draw_additive_plot(force_plot.data, (30, 7), show=False)
return figure_to_html_img(force_plot_mpl)
def figure_to_html_img(figure):
""" figure to html base64 png image """
try:
tmpfile = io.BytesIO()
figure.savefig(tmpfile, format='png')
encoded = base64.b64encode(tmpfile.getvalue()).decode('utf-8')
shap_html = html.Img(src=f"data:image/png;base64, {encoded}")
return shap_html
except AttributeError:
return ""
The result will be like it
An alternative is to use html.IFrame which will produce a better looking and fully interactive plot.
Here's an example that can be used directly as an Output
def _force_plot_html(*args):
force_plot = shap.force_plot(*args, matplotlib=False)
shap_html = f"<head>{shap.getjs()}</head><body>{force_plot.html()}</body>"
return html.Iframe(srcDoc=shap_html,
style={"width": "100%", "height": "200px", "border": 0})

raise_FirstSetError in SpaCy topic modeling

I want to create a LDA topic model and am using SpaCy to do so, following a tutorial. The error I receive when I try to use spacy is one I cannot find on google, so I'm hoping someone here knows what it's about.
I'm running this code on Anaconda:
import numpy as np
import pandas as pd
import re, nltk, spacy, gensim
# Sklearn
from sklearn.decomposition import LatentDirichletAllocation, TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from pprint import pprint
# Plotting tools
import pyLDAvis
import pyLDAvis.sklearn
import matplotlib.pyplot as plt
df = pd.DataFrame(data)
def sent_to_words(sentences):
for sentence in sentences:
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))
# deacc=True removes punctuations
data_words = list(sent_to_words(data))
print(data_words[:1])
def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
"""https://spacy.io/api/annotation"""
texts_out = []
for sent in texts:
doc = nlp(" ".join(sent))
texts_out.append(" ".join([token.lemma_ if token.lemma_ not in ['-PRON-'] else '' for token in doc if token.pos_ in allowed_postags]))
return texts_out
nlp = spacy.load('en', disable=['parser', 'ner'])
# Do lemmatization keeping only Noun, Adj, Verb, Adverb
data_lemmatized = lemmatization(data_words, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])
print(data_lemmatized[:1])
And I receive the following error:
File "C:\Users\maart\AppData\Local\Continuum\anaconda3\lib\site-packages\_regex_core.py", line 1880, in get_firstset
raise _FirstSetError()
_FirstSetError
The error must occur somewhere after the lemmatization, because the other parts work fine.
Thanks a bunch!
I had this same issue and I was able to resolve it by uninstalling regex (I had the wrong version installed) and then running python -m spacy download en again. This will reinstall the correct version of regex.

Imbed matplotlib figure into iPython HTML

I want to dynamically write and display HTML with a code cell in Jupyter Notebook. The objective is to generate the HTML to display table, div, img tags in some way I choose. I want to capture img data and place it where I want in this auto generated HTML.
So far I've figured out that I can do the following:
from IPython.core.display import HTML
HTML("<h1>Hello</h1>")
and get:
Hello
That's great. However, I want to be able to do this:
HTML("<h1>Hello</h1><hr/><img src='somestring'/>")
and get something similar to a Hello with a horizontal line and an image below it, where the image is the same one as below.
import pandas as pd
import numpy as np
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
df.plot.scatter(0, 1)
The result should look like this:
Question
What do I replace 'something' with in order to implement this? And more to the point, how do I get it via python?
I would have imagined there was an attribute on a figure object that would hold an serialized version of the image but I can't find it.
After some digging around. Credit to Dmitry B. for pointing me in the right direction.
Solution
from IPython.core.display import HTML
import binascii
from StringIO import StringIO
import matplotlib.pyplot as plt
# open IO object
sio = StringIO()
# generate random DataFrame
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
# initialize figure and axis
fig, ax = plt.subplots(1, 1)
# plot DataFrame
ax.scatter(df.iloc[:, 0], df.iloc[:, 1]);
# print raw canvas data to IO object
fig.canvas.print_png(sio)
# convert raw binary data to base64
# I use this to embed in an img tag
img_data = binascii.b2a_base64(sio.getvalue())
# keep img tag outter html in its own variable
img_html = '<img src="data:image/png;base64,{}
">'.format(img_data)
HTML("<h1>Hello</h1><hr/>"+img_html)
I end up with:
from IPython.core.display import Image
import io
s = io.BytesIO()
# make your figure here
plt.savefig(s, format='png', bbox_inches="tight")
plt.close()
Image(s.getvalue())
Let say you have base64 encoded image data:
img_data =
"iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAYAAADDPmHLAAAb2ElEQVR42u1dB3wU5bY/m+xuOklIARIgdKQqeunk2kClSRNsKD9UVFR4ei8PBFTKu1f8Xd8PeCpeBCPlonRBmggiXaogYBIJJQkppPdNts68cybZzZaZrbNJNsyByexO3++c73/Kd843MpZlQaJ7l+RiXUiGRMK0ZMkSWXJysqy5NVSvXr1MPWXRokUs/lzTPtaHe5FMpGeXTZkyxQ8byb+8vNwfya+6uloWGxsLtPaVxggODjY1RkFBgcX20NBQNjc3F+Li4pji4mJWo9Ew+Jnt2bMnu337dgshMQqILwiGGAIgw15PjFcEBAQEMgwThEuAVquVI/kkEqAAE4O5dd0mRqfTsfjd4OfnZ8Dfp8ffZkDS48IEBQWxuI2hz6WlpWyHDh0YOgeRkDUKxeLFi9mmiBYeCwAy3w9XysrKylC9Xh+Fkh+NbRGODRWIDYIrP18TAmoTP2Q2g7+Fwd/E4HcGf4ce9+nwsxY/a3GfBn8nrXUkFLhdT4JB3/FcHQlHRESEHlGDwY5hMCIGCUZTEghPBYDr/QiJwfg5BnvC4926dZtHKoA6Ut31fUoAUGUFIJq1IEYRM3GtwaUCEaAE9+Wo1eo0ZG4B7lPh9hr8rRqjYNCxKAzVtB2PUdN3hUKhxc9aPJ8ERxcVFaXH9uIEAtGCIYRoTJXhsQCg7ld06dIlDH9QW2yMyTNnzlyAEGja72vwj8yCsrIyqKqqAmQUlJSUADIKampqAJkPiHQsfVYqlWxgYCCpgCrcfxOPv4pokYNMrkIkqMK2oHU1flfRGr+rcOGEA7dpSHAqKip0aCcRsjBoSxhSUlJYQoaGFAQxEECBPz4CJbwjNspzKAD/hQLg016AsU1obd0+aNtAVlYWpKamcoKBzITo6GgSHBYNR0alUumwPfJQcK7hsel4Sin27kpcyglJaMFzKvG6lUa0QEFSE0qgsalDlWEgZNi2bRvTEKjgsQDMnj1bGRYWFoHw2AUNo+ffQvJ1AXDg7gL2aE4wCC3u3LkDFy5cADIau3btCt27d+cQJDs7m/Yx2Mv1KBTliBxpuL6BKJGJjCehKMVrkMtUhp8rSCBw4dQK2g6kTvRoRBpIRXgTFUSJA2DvN+p6v+YeOCE+kBDQgsyDTp06QUJCAiCj4ejRo3Dz5k0YNmwY9OnTB3r37u2HxytROGLy8/Nj0tPTB+Nag51FhUsm9vQzKBB38FpFeK0ivHwJfi7D7ZXYmapjYmLUqIZ0iAb6OptEdESQg0QeCwMaetCyZUsYN24cIJPh2LFjFC+AAQMGcPsR4jkhad++PQlEEC0oCNG///57n8LCQhUanWm4nMbtmXg8BSAKUX2UoEooQ+GpwuvVoH2gnTx5soE8EzGFQBQVgD8wEh+4CzbEC6gB3mzOKsAZoSB1QGhANsKTTz7JIYXRnjC3K4yfc3Jy4OrVq+qioqIKVB9XEE2OI6OzccnDc8njKEG1U0nqITw8nDwTRiy1ICGAF2wE9Pth+PDh8Ouvv8KBAwdg1KhRgJAuKABt27aF+Pj4QPwciHbD8HPnzg1C6E9FAdqP6jUDr5mDh+ejEJArWonIoEEB0IuhEiQB8JIQkFoYMmQIt963bx+MHTvWQgjMBcB8G6EnqoswNCL7owD1RG8iGZdduP8WoQIKQD6ibSkaoDWoEvSeqgRJALxIxHyjHXD8+HEYMWKEIPOtt7dr145iLKF3794dcPr06R5oK1xEQfgWhYjC7RRmL27durUKkUDnCRL4SWzyLlGvf+ihh7j4QWZmJhc34FvITuDbhqpBhj29xSOPPPLXNm3azMOe3xu3J+A6Cq8dgqpCgULgts0lCUADIcHIkSPh7NmznCAICYG9BeMB8tGjR3dFe2EhdvZ+eNn26EJGoj0QiMEjf3ejrpIANJBNEBISAgMHDoQTJ064JQC0oGtJaNAa7YT52PsHIhK0RpsgDLcraDheZp6kINkATYsIzilKePnyZQqc0ViCXYMQo4acyqCwM6EGRR2NKqVz584R2Pv/hvvJMCzHMQpVZGQk5x5KAtDEhQAHzuDatWvQv39/CwGg2AGFlW/dusWFkmk7MpWgH9D3Bxxp5c6nfVeuXJGhELXEkPFk/J6LAlCMKFCDtgBJCSsJQBMlgvEHH3wQtm7dyqkDYjJa94B+PzfyiFlH0KNHD+jYsSMXS6DjjWFnI+G4C6AxSJFHGbqJT+DA00a8ToC76lwSgEbwCoi5ZBBmZGRw8E69/IknnuCMRaO+d4QkJEgXL16U47GUgSXHkUg/FCbJBvAFFEhMTIRTp07B4MGDuSggMdS6pzsyKkmIWrRoUUqpavjdH9FDRmgiCYAPeAQE4RMnTjQx3t3rkL4nyx8NRLfjAJIANJIQkCoQ41Keptx5TQDIhVm4cCHn8rhKmDcHX375peD+Dz/8ENLS0uzqWbKgaUiWhmGHDh0KZmlqEjWEANTlz7l1LulHe0S+MV3fHlFOHx1HFjZZ3agvYe7cudC3b1+J6w0hAJQgQQYPGSaUQkW9kqxcoz9rJIxkcShhXOgcDG+KDrkkjB988AGHBJ999pnEeW8LAKYzwa5du0zfiQGUYbty5Uq4ffs2t40YTulTU6dO5WCfAh6uGkWYScNF2Ohc821U2UMoQNk4RreKrk0ZO6tWrYK3335b8Jo7zmfAxbQcfvWEeZx/f+5xh66aEJ25VQI/nEnl3Rci08Ks0f0AAzy+LwDWRAEMWgiKzYl0NIY23b4uhkThvffeg/vuu493P6VnrVixwsINI8GkoVmsYeA953JmKWQbInn3USx/1sRETrDcoRt55YLXLr59Fcb2yoAHHnig+QlAYxEOo8KgQYMAB0tM6EIDM8uXL4fVq1fbt2MqSqDq2k8WKilGn2+hwlylPm1CYPf2LZZM6DAAlJFtmpcKaEpEvfW1116DpKQk0zaKwjkibXUFvP/C45wQiUX9OsXC9yvnW2yb891FyNM1TtvcM8PB1tY/JWz6EpWodFBWo5cQwF2i6Ju17UBGIg3AuENkaG7ZsoUzZCnfj2wZIcKULm4YmCqIJkyYwIVxXaGj14sh6WwejRlDQsVl+OfslyQBcJUw5dpGANxlPhEZs4cOHeKuc/DgQS7xk4/IBSa3k+IW5IE888wzLt/ryLVs/KsgNwZOXS/gruWuEXrPqgAKCpkT9UZPYwvkSRBR8oaQTUE9nxhGgkI1Au6EgGOCWMDJBzApBFEr77ZYYeR7SwB++OEHi+9Ux+cpPf/88yY0+Pzzz3mP2b9/P7em2UVeffVVt+4zY0RvaJl/DspObYJZ44eIKgDye4X5WFpl+p6Xlwevv/66x9el4BPVBlJgi8b3qZebM4einIQARBTcEYpVcJlBqCoYA39GV7BSDl/Mf03yAlwhKtakqt033ngDvvnmG9N2Sr3CWnzADFtR7mPs1TT4ZJwryEg0pkE2AKkIinYKEcG7Xq/Dtb7B28nnEYB6FsX4jcEZSp4UGoGknka9f8eOHaLdHyuAufENmjRi06ZNJrVAtGfPHhPiPPvss4LXIN1u0PlxaykO4CIR44nhZGjRIsR8rLDhrH5SB55Y/3xEVcFEJATGcQ5iOhmeJHSUAGrvntTzSX3glFOSAIhFBPU0+ETGF6HCV199Bd99951df92EFAxbOwmEwTmGkG9PRGMdRmOQqoOJaFDKkfFHut+AKsAgqQD3AjI0GQPpYE6icbAH6+q4gR4y0CgpxNURRpzsCxkiA72TDCGfnCKNWOIN58+fr/XdjxwxqSQa8bRHBur9OkayAdwhgk4a2hWysN0hrkeiKsE54pw+h8YacK4Ezh6gyB/lQVDG76RJk5y4H6kAGScIkgA0ASLG1zLE+REaUi3GnIT169dza5r4Ydq0aU4IAAZ59KxdFaBSU7KMDIKU4rJMqg3kg2TGUKeTXTPKXnjhBW6N079wayrwsB6DEPYChN3AQ5duw/D5m+DhOevgoxVJkgB4HQEIknV6lyGZQsPGah4q4Zo+fbqT9zPY9QK2HvkNdLiP4kRbDp93K9FWEgCXVEAtAjB61xqaXFKju0cC4GywyWAw2PUCIoP8TFPTacprcywlAWgAFaB30SqnVHWKQBJDaY4gSoJ12uawY3TOm/oUtNFlg+7WKXhr4sMuDydLRqDLKsBglyG8sQPsnZRsSkRVvo7SzWxsAC4UzK8CYiJCYP+apb7lBVCyxcmTJ7lxeFooA4d6B1nGppvjwAnNooUzYnIjauTLU3Yw+dRUMNmYNkAtJDOgRv+8qkZby2RcFDgPdqCVJU6xiJ07d3JDwhT3p3Aw1fzxkVZnAI2VrjcwjTdhuNcEgBjtqBeQsUQRO+sqn2XLlnFBlUZTAaSTEQEUYVGw+MdsYH/czHGfmxiy8DqcXLeEM9rIxaNt5kYZ6f45c+YIXvvLPWdg4y8pTQbtJBUgGJjRc32em8CBhmtrcR6K8/M4I4yElwSFgj30neoe6NgNGzbY1f2uupY+KwBUrPHuu+9y8OgqOUq7Xrp0KVy6dMn0XQhu3aVPX31cuDxt+Dhuehdy99asWcMN/lCvp9oGCj07Cju/M24QdFYKJaTGcrGDZoMAjz76qFeui3Pucou3iNLFxo8f79RxrqaWkQXvzLUbiiQ38B4nSQAkAZDoXibJC7BDyw8kQ2GZCkwzr9V5Ax0jFTDjqX5uVwhLAuAjdPb6XVCzCpMLyLEf/x86eACeHdK5Qcu4fVoATt8uh0q1/bBqgNwPHu0W6bVn+OXPIqjR1vrgprhb3SSNT/aO5SJ81sSa+f9G5hvjApIKcIE2nboJlWyQw+POHT8M78+Y4pVnWHfsOqhlgbXMs2AswJ9Ht8Ocd2fxnGWcydOM+cBCc3rddoMoMWensj9TFABVKpVXnoEx9lybXs1y0Tz+54b6c4zMb2YvW29SVkxYXFf4x9od3pJCYUi3d46J6fVh4eYkBU3OjL2ubw2FRcXe4H8dlBsZWQ/pgufUCUk988GEGpIAeIlCYtrB0qQfvIQAZnrcGUg3f52LGfMlG8DLlBvYGTKzc8Tlvz1Id4AabDNlfpMVgKCIGFiStF98BDDT+xa92p4SMEMMo1HISirA+15DZXRvuJaaJq4nYq73ndDn9WaD8bjmxfwmIQBlWdd5XUZlcAtYtvmoqErAXO+bIN0+AFhAv+QFeIFiylOwPRkz5tcbXvo2/eDU+cuiegHmTDR5Bna9gHrXUbIBvEDRGCBsUXWnPvWKrdfX/spAWL77nLgIUNerWWcgnTVLCbOOIUgCIB40z31mEDA6rU2Qhv7IE/4Ce38+KQ4ECEG6fdgwsxvqhchZUml0kFFQAclZxZBXqsIKH6ZJCUDjjgbWNXDH1i2hleEsFPi3s4rQsTjkKoc1R1Jh7PBEzxEArKDfCUi3Tgp1hBqZhZVw6Pc7cOTybShWaaF2/Kke2eizQsZArzbB8OKI/tC/a2uu6POeEwDrRlz43MPw9uY/ONg3979pHdThQdj4/Y/w8sSRnscBLHo1OIgD8I0d8B97Ni0Pvj50FW4WqMwEBXjP1+JLPi5lV8Olb47j7H9aePmvXWH66MH3jgrg079U/dI1oNTSWDO5aX6w5UKuZzVxZnF/a2PTfuwAeMYO6s+hoo41h5Nhwbdn6plvZctYqzVzW0TLymHt8XQYvzAJcoormr8AWBt75jT/+cfAoK608NmNDR6ScD98sWm3x4jDWvVOp7wAnhgCURXW7M/++hhsPpkGnGwKDTiZXYc3rIzrfE0ATFqyBc7/caO5CgDrEFJDg5TQL1JtE6Qxnnfgukpw6Nal+5v1TodxAJ6xA6Pl8M8dFyAlu8wW0XgSSMyRzcIWMUchRRC88++f4cr1jGYmAHYh0ZL++7nHQK8qtbHU6XNofHf4ZO029+1NnvuzjmwA3uFgnHzyYhbq/XwbZKGS8uIbFyH98HooOrkB/P/YCcq0H6Hq0i7I//0w6FRl9WFlYG3OlwWEwIyVeyEjO6+ZGYG8kGh7GKVmPdZBCcfybY01+vdrvpxLGgnFlz64fH8Z3/3tuXXWzK89N6RVAmw8edOSebjcvXwY2mgyYO60F7H4Y4HN21EIvX7EiaXX7joKBSHdwT8wlN/TCAyDaYvWwPGkj5oRAghAIh/NHD8UDJWFvJAaHNsBFq3a7JYTKAzJTngOZoITFBVfW9Fbt4/RaeDW3pUwZ/xDcPzIT/Dyyy/bMJ8Lb2NJ2binn4Z961bAwlGdQFuYIagWa8LawadrtzQPAWB506qELXA/rK+b0DfK0lI308fJNS1dTxphwen7W5xjoTJYyzQx+oiTSaQfXA3ff73cpfmHx416Arb/z3TQVxTwqiVaNp/Nwd9Z1AwEAMDGvXMUUZv65ABgK/LMjLV65gVFtoaFq7a66XpaQbodT8DaUrdwH+u2ZZ/ZDUn/WggDBgxwuV06tY+HFa+P4CaKtu4k9N8vJAIWfLq2OagAnkY3xdT5G59iY9MTO/H4z7XnZWHUMP1OtptegJPpXTyWurkQGBD6+8XKuPcAuEuJ/ftCfIDaRjiNz3YGU+r1Xpw/sIEQQNj/tQfCY4b0BnlFLq//rAyLhA9Wf++iGuKBdBcCVtYxhPyrRzGd/B2P22f+tKd4kYmWgNhOsGn7Dz6OAHxpVU5m1swefb/g+SWhXeBK8nXn1RDf/R0khFiqLHNEYHD8Io97BbynNKBnR4iASt5OQsvGfSeagREoFPxwQMP6doZgVQ7v+YqgUFiy/kf34hHWYwJCwQOBGEZx2gX428xXRGujWZMSbZGp7tlyynVey0RqMBuAZVkBSHVM708ews2gxXe+OqoHHD/7m1NCyHt/V8cCjHZD8S1RJ3p4+q/98F3wBl5kkkfE231buk+oAJvIngupVX06x3Nv7OSDZD9FAPxr6wmnej/w3J916rltYwjtYyNFrw4OkTO8nSIgPAaOnz7n4wgAIFCa5dwlPnzpcWBp5k4+SG7TG3YdPOZ8PAIsz7evAfhjGG1jwkVvpshg4cmlzl39s3kEgtxhPlEHTBppLy/hhWSZzA9WHbjshP73tDSsds1g8KdT21ait1NcVJjgvsKSch9GALCK6bNOND4PLZk+EhhtDS8kK+N7w9db9rhxf8dp4dYxBF1lGSS0byd6G3WIE55wqrJG68MIALYJGI4an4+iw0Ogd7iaP6yMf7/9NcNO0gj//R1Gg3liEOrKIq/MUtatvfDU8tUavQ8jgM0YvPul1h9NHw36miresHJA3H2wPGmrsA3Id38nh4PN3TOdqtwrAhATKawCanxZAPj0rnm1jUuWcqAChrWT84eVcb0nuZQ/aYQ3LYt1ujTMXIgVWLTi6nuInKGSyhrhJzFofRwBQKA0yw2a99JTwFBiBU9YOQhDp0tXbeJXQyw/pNs3Xm1jCEqcQ5hmBBebsguFDb3gALkPI4BF9M0SUt0hShoZ0yeKN6xL6xNZtXP38nkB7paGmXsziuBwyMgUXwDScwoF94UGKnwYAWzG4J1ofAf01qRHgFUV847UBbSMgwX/9x9bNcR7f+dKw6wDRzcyc0VvpayCUsF9LYIDfBwBeEa6wIMSK0oaeXFIB5uwrhFtrpQFQX5BoVOQ7ih2wBdDSM8VP1GjoLxGUBTbRof5uhcgUJrlAb00cjD4qwp5I3zKFjEwb+UmW+bb3N+RF8AfQ7hbUiV6M1Wo+aeS15QVQP9+fX1bBfCOdIlQbfvWyL6C2b63DdFwK/2OlSFqdX8n4gB8MYRCJlTUEbrTyVmgZ/k9i+qCDBg4cKCP2wCCkOrZlccMux8C1QVWkcG6GrygcJj3+RZLV5QH0u17AfwxDEVsN1i/dbdoLbR2/3nh56jM516F20y8AHBO/7pAcycP5fLq+NKqCpRt4dK1FMH728UgFngrlo3XWbP3rCjPn5JZCFcziwUbr1dciFfiDg3sBQiVZnlOw+7vCpGGIl5j018ZBB98tVv4/qz9ugB7cwtpWnSAoyfPePz8SQeF8xkq7vwBM15+3mucaQQvwGo4VyRaNO0JLkWbL8hTFdYZNAZ7pWHOqAHbGILMXwEfr9vn0XNnFpTDsWvCMQXD3T9gzJgxvi0AjqplxaDemDQSLy/jtfT9/OXAyBQCuQguloZZxRCKAtvBZ5v2uvXM5So1vPPFPkEg1JTlw9RRidzr9XwbAawa3aLqVkT6x4yxmDqm4y3gEPIUWNbRWIBwDIN7jRzmIqw7lQVJu35x6VkLy1Xw1mf7sCRcJXjvqqsH4L333vUqb5pkaZi7lNA6CrqH1fAWcNje3/GIpFAdQU1Jbq2g1ZEMEWbVzzfhxY++huKKGrvPqMeXUe45ex3GL9oMqdklgscVp5yGTxbMhpYtW3qVNfKGQQDWvdIsN+jjNyfCxGWYGCJX2hibnpWGgWmtLs6BoZ0j4LcSy+ZLLWZgxPyNkBAhh7HDHoD42AiIbhGM8whouTePpN4phJ8u3oBqrf2JLrQVRdAztIJ7A6m3qYGqg1nB0iyxKQqTRvq38YMLBayAC2cF6U5MFs1Xx/Dp36fB+LmroEIRY3WWDDLLDPDFvt/cen5tFaJCyh7YeHBvg3CmEUrDnJ2m1X1a+sYEYDQq4RwAZyd/dlDHsPXjmcCUZon23LrqctD9vhMO7f0eYmJimo8A1GfX2pZmeYOCA5Uwonu43RwAd0rDrKlVdEv4acUsCC7902N7pjz9CshTdsPhvTshLi4OGoqafGmYuzT/lacxj6qcd25gZ0vD6oVAmFq1ioUTm/4XBrYoger8dNchv7IEco6uh1eGxsGF08e9+kbURrMBxneTw7oN/zHTkmzdGqDnSO+8XpaSRhaMuQ9WrlrtkLFd+4/nPealv0TBth07bbZHKzUWL4imApF/L5sPqamp8NWGLXDw0m2AiARQhkaCPMhyGJfFF1Nrq0qhMisZ/EvTYcLwIfDm7vVei/U3CQGYMmYEtzQ0jXw8kVvcpRlTn+EWZ6lHjx6w8pMlnHDRy6dTUlIgKycHM30KoKCoFCJClJAQ1wriu8fBQ6+/CcOGDWv0dw9K7w30AtHATWJiIrc0dZJeHXuPkyQAkgBIJAmAOHrPsyxPidzyr+vavXEFAF0ieggKcBsknjSosUkCYMD5B5ng4GC2UQQgKioKvR6WwQfR4aI2SqbEngbp/QYkjU6nM1RVVbnV5h67gbm5uSwOWepRCFTo05bfvXsXJCFoGMLp4xhMFinDtQYFgenVq5fLbS7zNBw7ZcoU/4SEhEB8gIiAgIBueL0huERgmTaVsvhDbcBPIrG6PRJ2NELbalwyNRrNCVznh4eHVy1evFjHushQjxGgTup0xcXFVIyXhvZAHkJSCAqEUkbpMhKJrveRDP7+/mps56qgoKBybHt1dna2W/aXxwiADyRDyaNeLkc9pAgNDVXgAylQSv3QOJF6v3dQgMGORnpfhypAh71ft2jRIoM7alcmxoicUQh69uwp+/nnn/1wOFOGQiAx34tExjd5XsnJyey2bdsYd20umZhDsrK66oXm9nrVJqwOTHaBu9f4fyVgzJGpmA/3AAAAAElFTkSuQmCC"
then in have it rendered inside of an iPython cell you simply do:
from IPython.core.display import Image
Image(data=img_data)
I'm going to build on what was answered by others (piRSquared) because it didn't work for me with Jupyter and Python 3. I wrote the following function, which will take any plot function I define and call it, and capture the outputs without displaying them in Jupyter. I personally use this in to build custom HTML machine learning reports based on many model iterations I execute using Livy and Spark.
from IPython.core.display import HTML
import binascii
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import base64
def capturePlotHTML(plotFunction):
# open IO object
sio3 = BytesIO()
plotFunction()
plt.savefig(sio3)
sio3.seek(0)
data_uri = base64.b64encode(sio3.read()).decode('ascii')
html_out = '<html><head></head><body>'
html_out += '<img src="data:image/png;base64,{0}" align="left">'.format(data_uri)
html_out += '</body></html>'
#prevents plot from showing in output
plt.close()
return (HTML(html_out))
# Plot Wrappers
# Advanced Wrapper for more complex visualizations (seaborn, etc)
class plotRegline:
def __init__(self):
#// could also pass in name as arg like this #def __init__(self, name):
reg_line_prepped_pdf = pandas_input_pdf
sns.lmplot(x='predicted',y='actual',data=reg_line_prepped_pdf,fit_reg=True, height=3, aspect=2).fig.suptitle("Regression Line")
# Basic Wrapper for simple matplotlib visualizations
def plotTsPred():
ts_plot_prepped_pdf = pandas_input_pdf
ts_plot_prepped_pdf.index = pd.to_datetime(ts_plot_prepped_pdf.DAYDATECOLUMN)
ts_plot_prepped_pdf = ts_plot_prepped_pdf.drop(columns=["DAYDATECOLUMN"])
ts_plot_prepped_pdf.plot(title="Predicted Vs Actual -- Timeseries Plot -- Days", figsize=(25,6))
#building the plots and capturing the outputs
regline_html = capturePlotHTML(plotRegline)
ts_plot_day_html = capturePlotHTML(plotTsPred)
# could be any list number of html objects
html_plots = [regline_html, ts_plot_day_html]
combined_html_plots = display_html(*html_plots)
# the following can be run in this code block or another display the results
combined_html_plotes
The answer by piRSquared no longer works with Python 3. I had to change it to:
from IPython.core.display import HTML
import binascii
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# open IO object
bio = BytesIO()
# generate random DataFrame
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
# initialize figure and axis
fig, ax = plt.subplots(1, 1);
# plot DataFrame
ax.scatter(df.iloc[:, 0], df.iloc[:, 1]);
# print raw canvas data to IO object
fig.canvas.print_png(bio)
plt.close(fig)
# convert raw binary data to base64
# I use this to embed in an img tag
img_data = binascii.b2a_base64(bio.getvalue()).decode()
# keep img tag outter html in its own variable
img_html = '<img src="data:image/png;base64,{}
">'.format(img_data)
HTML("<h1>Hello</h1><hr/>"+img_html)
Specifically, I import from io, not StringIO, and I use BytesIO rather than StringIO. I needed to decode the bytes into a string for inserting into the HTML. I also added the required imports of numpy and pandas for the example plot to work, and added plt.close(fig) so that you don't end up with two figures in the output.
If you want to show the results of DataFrame.plot in an iPython cell, try this:
import pandas as pd
import numpy as np
%matplotlib inline
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
df.plot.scatter(0, 1)

Scrapy / Pipeline not inserting data to MySQL database

I'm making a pipeline in scrapy to store scraped data in a mysql database. When the spider is run in terminal it works perfectly. Even the pipeline is opened. However the data is not being sent to the database. Any help appreciated! :)
here's the pipeline code:
import sys
import MySQLdb
import hashlib
from scrapy.exceptions import DropItem
from scrapy.http import Request
from tutorial.items import TutorialItem
class MySQLTest(object):
def __init__(self):
db = MySQLdb.connect(user='root', passwd='', host='localhost', db='python')
cursor = db.cursor()
def process_item(self, spider, item):
try:
cursor.execute("INSERT INTO info (venue, datez) VALUES (%s, %s)", (item['artist'], item['date']))
self.conn.commit()
except MySQLdb.Error, e:
print "Error %d: %s" % (e.args[0], e.args[1])
return item
and heres the spider code
import scrapy # Import required libraries.
from scrapy.selector import HtmlXPathSelector # Allows for path detection in a websites code.
from scrapy.spider import BaseSpider # Used to create a simple spider to extract data.
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor # Needed for the extraction of href links in HTML to crawl further pages.
from scrapy.contrib.spiders import CrawlSpider # Needed to make the crawl spider.
from scrapy.contrib.spiders import Rule # Allows specified rules to affect what the link
import spotipy
import soundcloud
import mysql.connector
from tutorial.items import TutorialItem
class AllGigsSpider(CrawlSpider):
name = "allGigs" # Name of the Spider. In command promt, when in the correct folder, enter "scrapy crawl Allgigs".
allowed_domains = ["www.allgigs.co.uk"] # Allowed domains is a String NOT a URL.
start_urls = [
"http://www.allgigs.co.uk/whats_on/London/clubbing-1.html",
"http://www.allgigs.co.uk/whats_on/London/festivals-1.html",
"http://www.allgigs.co.uk/whats_on/London/comedy-1.html",
"http://www.allgigs.co.uk/whats_on/London/theatre_and_opera-1.html",
"http://www.allgigs.co.uk/whats_on/London/dance_and_ballet-1.html"
] # Specify the starting points for the web crawler.
rules = [
Rule(SgmlLinkExtractor(restrict_xpaths='//div[#class="more"]'), # Search the start URL's for
callback="parse_me",
follow=True),
]
def parse_me(self, response):
for info in response.xpath('//div[#class="entry vevent"]|//div[#class="resultbox"]'):
item = TutorialItem() # Extract items from the items folder.
item ['artist'] = info.xpath('.//span[#class="summary"]//text()').extract() # Extract artist information.
item ['date'] = info.xpath('.//span[#class="dates"]//text()').extract() # Extract date information.
#item ['endDate'] = info.xpath('.//abbr[#class="dtend"]//text()').extract() # Extract end date information.
#item ['startDate'] = info.xpath('.//abbr[#class="dtstart"]//text()').extract() # Extract start date information.
item ['genre'] = info.xpath('.//div[#class="header"]//text()').extract()
yield item # Retreive items in item.
client = soundcloud.Client(client_id='401c04a7271e93baee8633483510e263')
tracks = client.get('/tracks', limit=1, license='cc-by-sa', q= item['artist'])
for track in tracks:
print(tracks)
I believe the problem was in my settings.py file where i had missed a comma... yawn.
ITEM_PIPELINES = {
'tutorial.pipelines.MySQLTest': 300,
}