Pytesseract OCR with handwriten images - ocr

I am trying to get some text from input images. Mainly my input image is some numbers on a paper. I grayscale and then black and white the image before sending it to tesseract. I keep getting empty outputs on handwriten text. On any text image that i download to test, it works(numbers or text). Have tried it with lang='eng' or empty lang argument. Would i need to do anything extra for handwriten text?
from pytesseract import pytesseract
import cv2 as cv
import tkinter as tk
from tkinter import filedialog
def grayscale(image):
return cv.cvtColor(image, cv.COLOR_BGR2RGB)
def main():
root = tk.Tk()
root.withdraw()
image_file = tk.filedialog.askopenfilename()
img = cv.imread(image_file)
# Grayscale input image
gray_image = grayscale(img)
cv.imwrite('gray_image.jpg', gray_image)
# Binarize the grayscale image
thresh, img_bw = cv.threshold(gray_image, 120, 240, cv.THRESH_BINARY)
cv.imwrite('binary_img.jpg', img_bw)
# PyTesseract OCR
pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"
image_tesseract = cv.imread("new_black.jpg")
text = pytesseract.image_to_string(image_tesseract, lang='digits')
print(text)
if __name__ == “__main__”:
main()

Related

unable to use pytesseract on mac, after downloading tesseract through homebrew in terminal

the code i would like to run:
import cv2
import pytesseract
img = cv2.imread("the path to the png file")
print(pytesseract.image_to_string(img))
the answer i get:
TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information.
have you put this variable :
pytesseract.pytesseract.tesseract_cmd = "/usr/local/bin/tesseract"
I use pytesseract on Mac.
here is an example of code :
"
import cv2
import pytesseract
from pytesseract import Output
pytesseract.pytesseract.tesseract_cmd = "/usr/local/bin/tesseract
custom_config = r'-c tessedit_char_blacklist=|[]{}?!&§()$*:ùûÿ --psm 6 --oem 3'
custom config allows me to remove unwanted characters
test=path+"image.png"
img = cv2.imread(test)
d=pytesseract.image_to_data(img, lang='fra',config=custom_config,output_type='data.frame').dropna()
output_type = 'data.frame' to use pandas to process the data
test1 = pytesseract.image_to_string(img ,lang = 'fra', config = custom_config)

I got an error in Pycham after trying to run a test code for deep learning

# USAGE
# python train_simple_nn.py --dataset animals --model output/simple_nn.model --label-bin output/simple_nn_lb.pickle --plot output/simple_nn_plot.png
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset of images")
ap.add_argument("-m", "--model", required=True, help="path to output trained model")
ap.add_argument("-l", "--label-bin", required=True, help="path to output label binarizer")
ap.add_argument("-p", "--plot", required=True, help="path to output accuracy/loss plot")
args = vars(ap.parse_args())
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []
# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images(args["dataset"])))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
# load the image, resize the image to be 32x32 pixels (ignoring
# aspect ratio), flatten the image into 32x32x3=3072 pixel image
# into a list, and store the image in the data list
image = cv2.imread(imagePath)
image = cv2.resize(image, (32, 32)).flatten()
data.append(image)
# extract the class label from the image path and update the
# labels list
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
I found a test code for studing Deep learning.
And tried to run in Pycharm. but I got this error message.
Actually I couldn't understand what that parser function is doing here.
could you explain about that code and about the error?
---error i got in Pycharm -----------------------
C:\Users\giyeo\anaconda3\envs\tf\python.exe "D:/GiyeonLee/09. Machine Learning/Pycharm/Tutorial/keras-tutorial/train_simple_nn.py"
2020-07-06 13:56:28.409237: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
usage: train_simple_nn.py [-h] -d DATASET -m MODEL -l LABEL_BIN -p PLOT
train_simple_nn.py: error: the following arguments are required: -d/--dataset, -m/--model, -l/--label-bin, -p/--plot
Process finished with exit code 2
Thanks for reading my quation..

How to make Image data to 2-D distribution plot?

How to make Image data to 2-D distribution plot ?
I want to visualize image date to 2-dimensional distribution.
Is this possible ?
Image like MNIST :
2D distribution like this :
About mnist, it does not mean anything I think. Because you have one channel of binary image so you have only zero or one. So you cannot have any plot like that.
But if you have grayscale image or colored image, you can do this:
source image:
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('img.jpg', -1)
cv2.imshow('image',img)
color = ('b','g','r')
for channel,col in enumerate(color):
histr = cv2.calcHist([img],[channel],None,[256],[0,256])
plt.plot(histr,color = col)
plt.xlim([0,256])
plt.title('Histogram for color scale picture')
plt.show()
cv2.destroyAllWindows()
And if your image is grayscale:
# 1 channel image - grayscale
import cv2
from matplotlib import pyplot as plt
gray_img = cv2.imread('mnist.png')
cv2.imshow('image',gray_img)
hist = cv2.calcHist([gray_img],[0],None,[256],[0,256])
plt.hist(gray_img.ravel(),256,[0,256])
plt.title('Histogram for gray scale picture')
plt.show()
cv2.destroyAllWindows()
And the matplotlib way is:
import matplotlib.image as mpimg
from matplotlib import pyplot as plt
img=mpimg.imread('img.jpg')
imgplot = plt.imshow(img)
plt.hist(img.ravel(), bins=256, fc='k', ec='k')

Imbed matplotlib figure into iPython HTML

I want to dynamically write and display HTML with a code cell in Jupyter Notebook. The objective is to generate the HTML to display table, div, img tags in some way I choose. I want to capture img data and place it where I want in this auto generated HTML.
So far I've figured out that I can do the following:
from IPython.core.display import HTML
HTML("<h1>Hello</h1>")
and get:
Hello
That's great. However, I want to be able to do this:
HTML("<h1>Hello</h1><hr/><img src='somestring'/>")
and get something similar to a Hello with a horizontal line and an image below it, where the image is the same one as below.
import pandas as pd
import numpy as np
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
df.plot.scatter(0, 1)
The result should look like this:
Question
What do I replace 'something' with in order to implement this? And more to the point, how do I get it via python?
I would have imagined there was an attribute on a figure object that would hold an serialized version of the image but I can't find it.
After some digging around. Credit to Dmitry B. for pointing me in the right direction.
Solution
from IPython.core.display import HTML
import binascii
from StringIO import StringIO
import matplotlib.pyplot as plt
# open IO object
sio = StringIO()
# generate random DataFrame
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
# initialize figure and axis
fig, ax = plt.subplots(1, 1)
# plot DataFrame
ax.scatter(df.iloc[:, 0], df.iloc[:, 1]);
# print raw canvas data to IO object
fig.canvas.print_png(sio)
# convert raw binary data to base64
# I use this to embed in an img tag
img_data = binascii.b2a_base64(sio.getvalue())
# keep img tag outter html in its own variable
img_html = '<img src="data:image/png;base64,{}
">'.format(img_data)
HTML("<h1>Hello</h1><hr/>"+img_html)
I end up with:
from IPython.core.display import Image
import io
s = io.BytesIO()
# make your figure here
plt.savefig(s, format='png', bbox_inches="tight")
plt.close()
Image(s.getvalue())
Let say you have base64 encoded image data:
img_data =
"iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAYAAADDPmHLAAAb2ElEQVR42u1dB3wU5bY/m+xuOklIARIgdKQqeunk2kClSRNsKD9UVFR4ei8PBFTKu1f8Xd8PeCpeBCPlonRBmggiXaogYBIJJQkppPdNts68cybZzZaZrbNJNsyByexO3++c73/Kd843MpZlQaJ7l+RiXUiGRMK0ZMkSWXJysqy5NVSvXr1MPWXRokUs/lzTPtaHe5FMpGeXTZkyxQ8byb+8vNwfya+6uloWGxsLtPaVxggODjY1RkFBgcX20NBQNjc3F+Li4pji4mJWo9Ew+Jnt2bMnu337dgshMQqILwiGGAIgw15PjFcEBAQEMgwThEuAVquVI/kkEqAAE4O5dd0mRqfTsfjd4OfnZ8Dfp8ffZkDS48IEBQWxuI2hz6WlpWyHDh0YOgeRkDUKxeLFi9mmiBYeCwAy3w9XysrKylC9Xh+Fkh+NbRGODRWIDYIrP18TAmoTP2Q2g7+Fwd/E4HcGf4ce9+nwsxY/a3GfBn8nrXUkFLhdT4JB3/FcHQlHRESEHlGDwY5hMCIGCUZTEghPBYDr/QiJwfg5BnvC4926dZtHKoA6Ut31fUoAUGUFIJq1IEYRM3GtwaUCEaAE9+Wo1eo0ZG4B7lPh9hr8rRqjYNCxKAzVtB2PUdN3hUKhxc9aPJ8ERxcVFaXH9uIEAtGCIYRoTJXhsQCg7ld06dIlDH9QW2yMyTNnzlyAEGja72vwj8yCsrIyqKqqAmQUlJSUADIKampqAJkPiHQsfVYqlWxgYCCpgCrcfxOPv4pokYNMrkIkqMK2oHU1flfRGr+rcOGEA7dpSHAqKip0aCcRsjBoSxhSUlJYQoaGFAQxEECBPz4CJbwjNspzKAD/hQLg016AsU1obd0+aNtAVlYWpKamcoKBzITo6GgSHBYNR0alUumwPfJQcK7hsel4Sin27kpcyglJaMFzKvG6lUa0QEFSE0qgsalDlWEgZNi2bRvTEKjgsQDMnj1bGRYWFoHw2AUNo+ffQvJ1AXDg7gL2aE4wCC3u3LkDFy5cADIau3btCt27d+cQJDs7m/Yx2Mv1KBTliBxpuL6BKJGJjCehKMVrkMtUhp8rSCBw4dQK2g6kTvRoRBpIRXgTFUSJA2DvN+p6v+YeOCE+kBDQgsyDTp06QUJCAiCj4ejRo3Dz5k0YNmwY9OnTB3r37u2HxytROGLy8/Nj0tPTB+Nag51FhUsm9vQzKBB38FpFeK0ivHwJfi7D7ZXYmapjYmLUqIZ0iAb6OptEdESQg0QeCwMaetCyZUsYN24cIJPh2LFjFC+AAQMGcPsR4jkhad++PQlEEC0oCNG///57n8LCQhUanWm4nMbtmXg8BSAKUX2UoEooQ+GpwuvVoH2gnTx5soE8EzGFQBQVgD8wEh+4CzbEC6gB3mzOKsAZoSB1QGhANsKTTz7JIYXRnjC3K4yfc3Jy4OrVq+qioqIKVB9XEE2OI6OzccnDc8njKEG1U0nqITw8nDwTRiy1ICGAF2wE9Pth+PDh8Ouvv8KBAwdg1KhRgJAuKABt27aF+Pj4QPwciHbD8HPnzg1C6E9FAdqP6jUDr5mDh+ejEJArWonIoEEB0IuhEiQB8JIQkFoYMmQIt963bx+MHTvWQgjMBcB8G6EnqoswNCL7owD1RG8iGZdduP8WoQIKQD6ibSkaoDWoEvSeqgRJALxIxHyjHXD8+HEYMWKEIPOtt7dr145iLKF3794dcPr06R5oK1xEQfgWhYjC7RRmL27durUKkUDnCRL4SWzyLlGvf+ihh7j4QWZmJhc34FvITuDbhqpBhj29xSOPPPLXNm3azMOe3xu3J+A6Cq8dgqpCgULgts0lCUADIcHIkSPh7NmznCAICYG9BeMB8tGjR3dFe2EhdvZ+eNn26EJGoj0QiMEjf3ejrpIANJBNEBISAgMHDoQTJ064JQC0oGtJaNAa7YT52PsHIhK0RpsgDLcraDheZp6kINkATYsIzilKePnyZQqc0ViCXYMQo4acyqCwM6EGRR2NKqVz584R2Pv/hvvJMCzHMQpVZGQk5x5KAtDEhQAHzuDatWvQv39/CwGg2AGFlW/dusWFkmk7MpWgH9D3Bxxp5c6nfVeuXJGhELXEkPFk/J6LAlCMKFCDtgBJCSsJQBMlgvEHH3wQtm7dyqkDYjJa94B+PzfyiFlH0KNHD+jYsSMXS6DjjWFnI+G4C6AxSJFHGbqJT+DA00a8ToC76lwSgEbwCoi5ZBBmZGRw8E69/IknnuCMRaO+d4QkJEgXL16U47GUgSXHkUg/FCbJBvAFFEhMTIRTp07B4MGDuSggMdS6pzsyKkmIWrRoUUqpavjdH9FDRmgiCYAPeAQE4RMnTjQx3t3rkL4nyx8NRLfjAJIANJIQkCoQ41Keptx5TQDIhVm4cCHn8rhKmDcHX375peD+Dz/8ENLS0uzqWbKgaUiWhmGHDh0KZmlqEjWEANTlz7l1LulHe0S+MV3fHlFOHx1HFjZZ3agvYe7cudC3b1+J6w0hAJQgQQYPGSaUQkW9kqxcoz9rJIxkcShhXOgcDG+KDrkkjB988AGHBJ999pnEeW8LAKYzwa5du0zfiQGUYbty5Uq4ffs2t40YTulTU6dO5WCfAh6uGkWYScNF2Ohc821U2UMoQNk4RreKrk0ZO6tWrYK3335b8Jo7zmfAxbQcfvWEeZx/f+5xh66aEJ25VQI/nEnl3Rci08Ks0f0AAzy+LwDWRAEMWgiKzYl0NIY23b4uhkThvffeg/vuu493P6VnrVixwsINI8GkoVmsYeA953JmKWQbInn3USx/1sRETrDcoRt55YLXLr59Fcb2yoAHHnig+QlAYxEOo8KgQYMAB0tM6EIDM8uXL4fVq1fbt2MqSqDq2k8WKilGn2+hwlylPm1CYPf2LZZM6DAAlJFtmpcKaEpEvfW1116DpKQk0zaKwjkibXUFvP/C45wQiUX9OsXC9yvnW2yb891FyNM1TtvcM8PB1tY/JWz6EpWodFBWo5cQwF2i6Ju17UBGIg3AuENkaG7ZsoUzZCnfj2wZIcKULm4YmCqIJkyYwIVxXaGj14sh6WwejRlDQsVl+OfslyQBcJUw5dpGANxlPhEZs4cOHeKuc/DgQS7xk4/IBSa3k+IW5IE888wzLt/ryLVs/KsgNwZOXS/gruWuEXrPqgAKCpkT9UZPYwvkSRBR8oaQTUE9nxhGgkI1Au6EgGOCWMDJBzApBFEr77ZYYeR7SwB++OEHi+9Ux+cpPf/88yY0+Pzzz3mP2b9/P7em2UVeffVVt+4zY0RvaJl/DspObYJZ44eIKgDye4X5WFpl+p6Xlwevv/66x9el4BPVBlJgi8b3qZebM4einIQARBTcEYpVcJlBqCoYA39GV7BSDl/Mf03yAlwhKtakqt033ngDvvnmG9N2Sr3CWnzADFtR7mPs1TT4ZJwryEg0pkE2AKkIinYKEcG7Xq/Dtb7B28nnEYB6FsX4jcEZSp4UGoGknka9f8eOHaLdHyuAufENmjRi06ZNJrVAtGfPHhPiPPvss4LXIN1u0PlxaykO4CIR44nhZGjRIsR8rLDhrH5SB55Y/3xEVcFEJATGcQ5iOhmeJHSUAGrvntTzSX3glFOSAIhFBPU0+ETGF6HCV199Bd99951df92EFAxbOwmEwTmGkG9PRGMdRmOQqoOJaFDKkfFHut+AKsAgqQD3AjI0GQPpYE6icbAH6+q4gR4y0CgpxNURRpzsCxkiA72TDCGfnCKNWOIN58+fr/XdjxwxqSQa8bRHBur9OkayAdwhgk4a2hWysN0hrkeiKsE54pw+h8YacK4Ezh6gyB/lQVDG76RJk5y4H6kAGScIkgA0ASLG1zLE+REaUi3GnIT169dza5r4Ydq0aU4IAAZ59KxdFaBSU7KMDIKU4rJMqg3kg2TGUKeTXTPKXnjhBW6N079wayrwsB6DEPYChN3AQ5duw/D5m+DhOevgoxVJkgB4HQEIknV6lyGZQsPGah4q4Zo+fbqT9zPY9QK2HvkNdLiP4kRbDp93K9FWEgCXVEAtAjB61xqaXFKju0cC4GywyWAw2PUCIoP8TFPTacprcywlAWgAFaB30SqnVHWKQBJDaY4gSoJ12uawY3TOm/oUtNFlg+7WKXhr4sMuDydLRqDLKsBglyG8sQPsnZRsSkRVvo7SzWxsAC4UzK8CYiJCYP+apb7lBVCyxcmTJ7lxeFooA4d6B1nGppvjwAnNooUzYnIjauTLU3Yw+dRUMNmYNkAtJDOgRv+8qkZby2RcFDgPdqCVJU6xiJ07d3JDwhT3p3Aw1fzxkVZnAI2VrjcwjTdhuNcEgBjtqBeQsUQRO+sqn2XLlnFBlUZTAaSTEQEUYVGw+MdsYH/czHGfmxiy8DqcXLeEM9rIxaNt5kYZ6f45c+YIXvvLPWdg4y8pTQbtJBUgGJjRc32em8CBhmtrcR6K8/M4I4yElwSFgj30neoe6NgNGzbY1f2uupY+KwBUrPHuu+9y8OgqOUq7Xrp0KVy6dMn0XQhu3aVPX31cuDxt+Dhuehdy99asWcMN/lCvp9oGCj07Cju/M24QdFYKJaTGcrGDZoMAjz76qFeui3Pucou3iNLFxo8f79RxrqaWkQXvzLUbiiQ38B4nSQAkAZDoXibJC7BDyw8kQ2GZCkwzr9V5Ax0jFTDjqX5uVwhLAuAjdPb6XVCzCpMLyLEf/x86eACeHdK5Qcu4fVoATt8uh0q1/bBqgNwPHu0W6bVn+OXPIqjR1vrgprhb3SSNT/aO5SJ81sSa+f9G5hvjApIKcIE2nboJlWyQw+POHT8M78+Y4pVnWHfsOqhlgbXMs2AswJ9Ht8Ocd2fxnGWcydOM+cBCc3rddoMoMWensj9TFABVKpVXnoEx9lybXs1y0Tz+54b6c4zMb2YvW29SVkxYXFf4x9od3pJCYUi3d46J6fVh4eYkBU3OjL2ubw2FRcXe4H8dlBsZWQ/pgufUCUk988GEGpIAeIlCYtrB0qQfvIQAZnrcGUg3f52LGfMlG8DLlBvYGTKzc8Tlvz1Id4AabDNlfpMVgKCIGFiStF98BDDT+xa92p4SMEMMo1HISirA+15DZXRvuJaaJq4nYq73ndDn9WaD8bjmxfwmIQBlWdd5XUZlcAtYtvmoqErAXO+bIN0+AFhAv+QFeIFiylOwPRkz5tcbXvo2/eDU+cuiegHmTDR5Bna9gHrXUbIBvEDRGCBsUXWnPvWKrdfX/spAWL77nLgIUNerWWcgnTVLCbOOIUgCIB40z31mEDA6rU2Qhv7IE/4Ce38+KQ4ECEG6fdgwsxvqhchZUml0kFFQAclZxZBXqsIKH6ZJCUDjjgbWNXDH1i2hleEsFPi3s4rQsTjkKoc1R1Jh7PBEzxEArKDfCUi3Tgp1hBqZhZVw6Pc7cOTybShWaaF2/Kke2eizQsZArzbB8OKI/tC/a2uu6POeEwDrRlz43MPw9uY/ONg3979pHdThQdj4/Y/w8sSRnscBLHo1OIgD8I0d8B97Ni0Pvj50FW4WqMwEBXjP1+JLPi5lV8Olb47j7H9aePmvXWH66MH3jgrg079U/dI1oNTSWDO5aX6w5UKuZzVxZnF/a2PTfuwAeMYO6s+hoo41h5Nhwbdn6plvZctYqzVzW0TLymHt8XQYvzAJcoormr8AWBt75jT/+cfAoK608NmNDR6ScD98sWm3x4jDWvVOp7wAnhgCURXW7M/++hhsPpkGnGwKDTiZXYc3rIzrfE0ATFqyBc7/caO5CgDrEFJDg5TQL1JtE6Qxnnfgukpw6Nal+5v1TodxAJ6xA6Pl8M8dFyAlu8wW0XgSSMyRzcIWMUchRRC88++f4cr1jGYmAHYh0ZL++7nHQK8qtbHU6XNofHf4ZO029+1NnvuzjmwA3uFgnHzyYhbq/XwbZKGS8uIbFyH98HooOrkB/P/YCcq0H6Hq0i7I//0w6FRl9WFlYG3OlwWEwIyVeyEjO6+ZGYG8kGh7GKVmPdZBCcfybY01+vdrvpxLGgnFlz64fH8Z3/3tuXXWzK89N6RVAmw8edOSebjcvXwY2mgyYO60F7H4Y4HN21EIvX7EiaXX7joKBSHdwT8wlN/TCAyDaYvWwPGkj5oRAghAIh/NHD8UDJWFvJAaHNsBFq3a7JYTKAzJTngOZoITFBVfW9Fbt4/RaeDW3pUwZ/xDcPzIT/Dyyy/bMJ8Lb2NJ2binn4Z961bAwlGdQFuYIagWa8LawadrtzQPAWB506qELXA/rK+b0DfK0lI308fJNS1dTxphwen7W5xjoTJYyzQx+oiTSaQfXA3ff73cpfmHx416Arb/z3TQVxTwqiVaNp/Nwd9Z1AwEAMDGvXMUUZv65ABgK/LMjLV65gVFtoaFq7a66XpaQbodT8DaUrdwH+u2ZZ/ZDUn/WggDBgxwuV06tY+HFa+P4CaKtu4k9N8vJAIWfLq2OagAnkY3xdT5G59iY9MTO/H4z7XnZWHUMP1OtptegJPpXTyWurkQGBD6+8XKuPcAuEuJ/ftCfIDaRjiNz3YGU+r1Xpw/sIEQQNj/tQfCY4b0BnlFLq//rAyLhA9Wf++iGuKBdBcCVtYxhPyrRzGd/B2P22f+tKd4kYmWgNhOsGn7Dz6OAHxpVU5m1swefb/g+SWhXeBK8nXn1RDf/R0khFiqLHNEYHD8Io97BbynNKBnR4iASt5OQsvGfSeagREoFPxwQMP6doZgVQ7v+YqgUFiy/kf34hHWYwJCwQOBGEZx2gX428xXRGujWZMSbZGp7tlyynVey0RqMBuAZVkBSHVM708ews2gxXe+OqoHHD/7m1NCyHt/V8cCjHZD8S1RJ3p4+q/98F3wBl5kkkfE231buk+oAJvIngupVX06x3Nv7OSDZD9FAPxr6wmnej/w3J916rltYwjtYyNFrw4OkTO8nSIgPAaOnz7n4wgAIFCa5dwlPnzpcWBp5k4+SG7TG3YdPOZ8PAIsz7evAfhjGG1jwkVvpshg4cmlzl39s3kEgtxhPlEHTBppLy/hhWSZzA9WHbjshP73tDSsds1g8KdT21ait1NcVJjgvsKSch9GALCK6bNOND4PLZk+EhhtDS8kK+N7w9db9rhxf8dp4dYxBF1lGSS0byd6G3WIE55wqrJG68MIALYJGI4an4+iw0Ogd7iaP6yMf7/9NcNO0gj//R1Gg3liEOrKIq/MUtatvfDU8tUavQ8jgM0YvPul1h9NHw36miresHJA3H2wPGmrsA3Id38nh4PN3TOdqtwrAhATKawCanxZAPj0rnm1jUuWcqAChrWT84eVcb0nuZQ/aYQ3LYt1ujTMXIgVWLTi6nuInKGSyhrhJzFofRwBQKA0yw2a99JTwFBiBU9YOQhDp0tXbeJXQyw/pNs3Xm1jCEqcQ5hmBBebsguFDb3gALkPI4BF9M0SUt0hShoZ0yeKN6xL6xNZtXP38nkB7paGmXsziuBwyMgUXwDScwoF94UGKnwYAWzG4J1ofAf01qRHgFUV847UBbSMgwX/9x9bNcR7f+dKw6wDRzcyc0VvpayCUsF9LYIDfBwBeEa6wIMSK0oaeXFIB5uwrhFtrpQFQX5BoVOQ7ih2wBdDSM8VP1GjoLxGUBTbRof5uhcgUJrlAb00cjD4qwp5I3zKFjEwb+UmW+bb3N+RF8AfQ7hbUiV6M1Wo+aeS15QVQP9+fX1bBfCOdIlQbfvWyL6C2b63DdFwK/2OlSFqdX8n4gB8MYRCJlTUEbrTyVmgZ/k9i+qCDBg4cKCP2wCCkOrZlccMux8C1QVWkcG6GrygcJj3+RZLV5QH0u17AfwxDEVsN1i/dbdoLbR2/3nh56jM516F20y8AHBO/7pAcycP5fLq+NKqCpRt4dK1FMH728UgFngrlo3XWbP3rCjPn5JZCFcziwUbr1dciFfiDg3sBQiVZnlOw+7vCpGGIl5j018ZBB98tVv4/qz9ugB7cwtpWnSAoyfPePz8SQeF8xkq7vwBM15+3mucaQQvwGo4VyRaNO0JLkWbL8hTFdYZNAZ7pWHOqAHbGILMXwEfr9vn0XNnFpTDsWvCMQXD3T9gzJgxvi0AjqplxaDemDQSLy/jtfT9/OXAyBQCuQguloZZxRCKAtvBZ5v2uvXM5So1vPPFPkEg1JTlw9RRidzr9XwbAawa3aLqVkT6x4yxmDqm4y3gEPIUWNbRWIBwDIN7jRzmIqw7lQVJu35x6VkLy1Xw1mf7sCRcJXjvqqsH4L333vUqb5pkaZi7lNA6CrqH1fAWcNje3/GIpFAdQU1Jbq2g1ZEMEWbVzzfhxY++huKKGrvPqMeXUe45ex3GL9oMqdklgscVp5yGTxbMhpYtW3qVNfKGQQDWvdIsN+jjNyfCxGWYGCJX2hibnpWGgWmtLs6BoZ0j4LcSy+ZLLWZgxPyNkBAhh7HDHoD42AiIbhGM8whouTePpN4phJ8u3oBqrf2JLrQVRdAztIJ7A6m3qYGqg1nB0iyxKQqTRvq38YMLBayAC2cF6U5MFs1Xx/Dp36fB+LmroEIRY3WWDDLLDPDFvt/cen5tFaJCyh7YeHBvg3CmEUrDnJ2m1X1a+sYEYDQq4RwAZyd/dlDHsPXjmcCUZon23LrqctD9vhMO7f0eYmJimo8A1GfX2pZmeYOCA5Uwonu43RwAd0rDrKlVdEv4acUsCC7902N7pjz9CshTdsPhvTshLi4OGoqafGmYuzT/lacxj6qcd25gZ0vD6oVAmFq1ioUTm/4XBrYoger8dNchv7IEco6uh1eGxsGF08e9+kbURrMBxneTw7oN/zHTkmzdGqDnSO+8XpaSRhaMuQ9WrlrtkLFd+4/nPealv0TBth07bbZHKzUWL4imApF/L5sPqamp8NWGLXDw0m2AiARQhkaCPMhyGJfFF1Nrq0qhMisZ/EvTYcLwIfDm7vVei/U3CQGYMmYEtzQ0jXw8kVvcpRlTn+EWZ6lHjx6w8pMlnHDRy6dTUlIgKycHM30KoKCoFCJClJAQ1wriu8fBQ6+/CcOGDWv0dw9K7w30AtHATWJiIrc0dZJeHXuPkyQAkgBIJAmAOHrPsyxPidzyr+vavXEFAF0ieggKcBsknjSosUkCYMD5B5ng4GC2UQQgKioKvR6WwQfR4aI2SqbEngbp/QYkjU6nM1RVVbnV5h67gbm5uSwOWepRCFTo05bfvXsXJCFoGMLp4xhMFinDtQYFgenVq5fLbS7zNBw7ZcoU/4SEhEB8gIiAgIBueL0huERgmTaVsvhDbcBPIrG6PRJ2NELbalwyNRrNCVznh4eHVy1evFjHushQjxGgTup0xcXFVIyXhvZAHkJSCAqEUkbpMhKJrveRDP7+/mps56qgoKBybHt1dna2W/aXxwiADyRDyaNeLkc9pAgNDVXgAylQSv3QOJF6v3dQgMGORnpfhypAh71ft2jRIoM7alcmxoicUQh69uwp+/nnn/1wOFOGQiAx34tExjd5XsnJyey2bdsYd20umZhDsrK66oXm9nrVJqwOTHaBu9f4fyVgzJGpmA/3AAAAAElFTkSuQmCC"
then in have it rendered inside of an iPython cell you simply do:
from IPython.core.display import Image
Image(data=img_data)
I'm going to build on what was answered by others (piRSquared) because it didn't work for me with Jupyter and Python 3. I wrote the following function, which will take any plot function I define and call it, and capture the outputs without displaying them in Jupyter. I personally use this in to build custom HTML machine learning reports based on many model iterations I execute using Livy and Spark.
from IPython.core.display import HTML
import binascii
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import base64
def capturePlotHTML(plotFunction):
# open IO object
sio3 = BytesIO()
plotFunction()
plt.savefig(sio3)
sio3.seek(0)
data_uri = base64.b64encode(sio3.read()).decode('ascii')
html_out = '<html><head></head><body>'
html_out += '<img src="data:image/png;base64,{0}" align="left">'.format(data_uri)
html_out += '</body></html>'
#prevents plot from showing in output
plt.close()
return (HTML(html_out))
# Plot Wrappers
# Advanced Wrapper for more complex visualizations (seaborn, etc)
class plotRegline:
def __init__(self):
#// could also pass in name as arg like this #def __init__(self, name):
reg_line_prepped_pdf = pandas_input_pdf
sns.lmplot(x='predicted',y='actual',data=reg_line_prepped_pdf,fit_reg=True, height=3, aspect=2).fig.suptitle("Regression Line")
# Basic Wrapper for simple matplotlib visualizations
def plotTsPred():
ts_plot_prepped_pdf = pandas_input_pdf
ts_plot_prepped_pdf.index = pd.to_datetime(ts_plot_prepped_pdf.DAYDATECOLUMN)
ts_plot_prepped_pdf = ts_plot_prepped_pdf.drop(columns=["DAYDATECOLUMN"])
ts_plot_prepped_pdf.plot(title="Predicted Vs Actual -- Timeseries Plot -- Days", figsize=(25,6))
#building the plots and capturing the outputs
regline_html = capturePlotHTML(plotRegline)
ts_plot_day_html = capturePlotHTML(plotTsPred)
# could be any list number of html objects
html_plots = [regline_html, ts_plot_day_html]
combined_html_plots = display_html(*html_plots)
# the following can be run in this code block or another display the results
combined_html_plotes
The answer by piRSquared no longer works with Python 3. I had to change it to:
from IPython.core.display import HTML
import binascii
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# open IO object
bio = BytesIO()
# generate random DataFrame
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
# initialize figure and axis
fig, ax = plt.subplots(1, 1);
# plot DataFrame
ax.scatter(df.iloc[:, 0], df.iloc[:, 1]);
# print raw canvas data to IO object
fig.canvas.print_png(bio)
plt.close(fig)
# convert raw binary data to base64
# I use this to embed in an img tag
img_data = binascii.b2a_base64(bio.getvalue()).decode()
# keep img tag outter html in its own variable
img_html = '<img src="data:image/png;base64,{}
">'.format(img_data)
HTML("<h1>Hello</h1><hr/>"+img_html)
Specifically, I import from io, not StringIO, and I use BytesIO rather than StringIO. I needed to decode the bytes into a string for inserting into the HTML. I also added the required imports of numpy and pandas for the example plot to work, and added plt.close(fig) so that you don't end up with two figures in the output.
If you want to show the results of DataFrame.plot in an iPython cell, try this:
import pandas as pd
import numpy as np
%matplotlib inline
np.random.seed(314)
df = pd.DataFrame(np.random.randn(1000, 2), columns=['x', 'y'])
df.plot.scatter(0, 1)

ipython notebook .png figures after nbconvert not loaded by latest chrome/firefox

Running $ipython3 notebook --pylab=inline locally, I saved a simple notebook with a small png figure using pylab and python 3.3.
Contents of notebook cell:
from pylab import *
x = linspace(0, 5, 10)
y = x ** 2
figure()
plot(x, y, 'r')
xlabel('x')
ylabel('y')
title('title')
show()
running the cell resulted in an inline png figure being displayed.
The saved file (my_notebook.ipynb) has a .png saved as a data uri:
{ ..., "png":"iVBO...ZUmwK\n...", ... }
after executing command:
ipython3 nbconvert --to html my_notebook.html
my_notebook.html is generated with the figure as a data uri like this:
<img src="data:image/png;base64,b'iVBO...ZUmwk\n..." >
In latest chrome or firefox the image data uri does not load/display when opening file:///.../my_notebook.html locally and chrome console reports 'failed to load resource' for the img tag.
I have had the same results with images loaded and then displayed with imshow().
The figures appear fine in the notebook. It is after nbconvert to html that they do not display (at all).
(notice the escaped newline in the image data uri - I tried replacing all escaped newlines in the data string with actual newlines with no change in results)
How can I get png figures to display in an nbconverted-html-version of an ipython notebook opened locally ("file:///.../my_notebook.html") in browser?
(I would rather not have to save each figure and hand modify the converted html to reference the saved figure on disk.)
EDIT:
versions:
python 3.3.1
ipython==1.0.0
matplotlib==1.2.1
Pillow==2.1.0 (PIL)
Install BeautifulSoup4 first:
pip install BeautifulSoup4
Then use following function to freeze your generated html file. The images will be placed in the images folder under the same directory as the html file.
import os
import re
import base64
from bs4 import BeautifulSoup as BS
from uuid import uuid4
def dump(path, data):
root = os.path.dirname(path)
if not os.path.exists(root):
os.makedirs(root)
with open(path, 'wb') as f:
f.write(data)
# for windows
return path.replace('\\', '/')
def freeze_html(path):
'''pass in absolute path of your html'''
root = os.path.dirname(path)
with open(path, 'rb') as f:
soup = BS(f.read())
for img in soup.find_all('img'):
m = re.search(r"data:image/png;base64,b'(.*)'", img['src'])
if m:
iname = uuid4()
ipath = os.path.join(root, 'images', '%s.png' % iname)
# remove '\n'
s = m.group(1).replace(r'\n', '')
img['src'] = os.path.relpath(
dump(ipath, base64.b64decode(s.encode('ascii'))),
root
)
with open(path, 'wb') as f:
f.write(soup.encode('utf-8'))
If you do not need to further convert it to tex or pdf, you can just write string (\n removed) back to img['src'](with data:image/png;base64, prefix):
import re
from bs4 import BeautifulSoup as BS
def freeze_html(path):
'''pass in absolute path of your html'''
with open(path, 'rb') as f:
soup = BS(f.read())
for img in soup.find_all('img'):
m = re.search(r"data:image/png;base64,b'(.*)'", img['src'])
if m:
# remove '\n'
s = m.group(1).replace(r'\n', '')
img['src'] = 'data:image/png;base64,' + s
with open(path, 'wb') as f:
f.write(soup.encode('utf-8'))
I prefer to save png to separate file because it's more friendly to xelatex.