unable to use pytesseract on mac, after downloading tesseract through homebrew in terminal - ocr

the code i would like to run:
import cv2
import pytesseract
img = cv2.imread("the path to the png file")
print(pytesseract.image_to_string(img))
the answer i get:
TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information.

have you put this variable :
pytesseract.pytesseract.tesseract_cmd = "/usr/local/bin/tesseract"
I use pytesseract on Mac.
here is an example of code :
"
import cv2
import pytesseract
from pytesseract import Output
pytesseract.pytesseract.tesseract_cmd = "/usr/local/bin/tesseract
custom_config = r'-c tessedit_char_blacklist=|[]{}?!&§()$*:ùûÿ --psm 6 --oem 3'
custom config allows me to remove unwanted characters
test=path+"image.png"
img = cv2.imread(test)
d=pytesseract.image_to_data(img, lang='fra',config=custom_config,output_type='data.frame').dropna()
output_type = 'data.frame' to use pandas to process the data
test1 = pytesseract.image_to_string(img ,lang = 'fra', config = custom_config)

Related

i don`t know what to do now with the problem invalid syntax (<unknown>, line 20)pylint(syntax-error)

I'm trying to build my own desktop assistant and got problem with first line. Checking if i got extra space or line but all looks ok, could you please check if anything is wrong?
I did add my script in progress and picture. Thank you all !!
import speech_recognition as sr
import os
import sys
import re
import webbrowser
import smtplib
import requests
import subprocess
from pyowm import OWM
import youtube_dl
import vlc
import urllib
import urllib2
import json
from bs4 import BeautifulSoup as soup
from urllib2 import urlopen
import wikipedia
import random
from time import strftime
def sofiaResponse (audio);
"speaks audio passed as argument"
print(audio)
for line in audio.splitlines():
os.system("say" + audio)
def myCommand ():
"listens for commands"
r = sr.Recognizer()
with sr.Microphone() as source:
print('Say something...')
r.pause_threshold = 1
r.adjust_for_ambient_noise(source, duration=1)
audio = r.listen(source)
try:
command = r.recognize_google(audio).lower()
print('You said: ' + command + '\n')
#loop back to continue listening
except sr.UnknownValueError:
print('Error, help me error')
command = myCommand();
return command
def assistant (command):
"if statements for executing commands"
enter image description here
On line 22, you made a typo :
def sofiaResponse (audio);
should be
def sofiaResponse (audio):

AttributeError: module 'carla' has no attribute 'Client'

I've been trying to play around with the Carla self-driving car environment but I run into "AttributeError: module 'carla' has no attribute 'Client'" when I try running the code from this tutorial: https://pythonprogramming.net/control-camera-sensor-self-driving-autonomous-cars-carla-python/.
I have made a few changes to the code, including changing the .egg file to it's exact file path within my computer.
this is my code...
'''
import glob
import os
import sys
try:
sys.path.append(glob.glob('C:\Downloads\CARLA_0.9.9.4\WindowsNoEditor\PythonAPI\carla\dist\carla-0.9.9-py3.7-win-amd64.egg'))
except IndexError:
pass
import carla
actor_list = []
#try:
client = carla.Client("localhost", 2000)
client.set_timeout(2.0)
world = client.get_world()
blueprint_library = world.get_blueprint_library()
#finally:
for actor in actor_list:
actor.destroy()
print("All cleaned up!")
'''
Just for refrence I'm running on a windows 10 that has anaconda3 and python 3.7.7 and I'm using carla version 0.9.9.4. Thanks in advance!
Just correct your folder path. Would need to rename path in file structure like this...
Remove all "." from the path.
path = glob.glob('C:\Downloads\CARLA_0994\WindowsNoEditor\PythonAPI\carla\dist\carla-099-py37-win-amd64.egg')[0]
sys.path.append(path)
Full Example:
import glob
import os
import sys
try:
path = glob.glob('C:\Downloads\CARLA_0994\WindowsNoEditor\PythonAPI\carla\dist\carla-099-py37-win-amd64.egg')[0]
sys.path.append(path)
except IndexError:
pass
import carla
actor_list = []
try:
client = carla.Client("localhost", 2000)
client.set_timeout(5.0)
world = client.get_world()
blueprint_library = world.get_blueprint_library()
print("Map = ", world.get_map())
finally:
for actor in actor_list:
actor.destroy()
print("All cleaned up!")

I got an error in Pycham after trying to run a test code for deep learning

# USAGE
# python train_simple_nn.py --dataset animals --model output/simple_nn.model --label-bin output/simple_nn_lb.pickle --plot output/simple_nn_plot.png
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True, help="path to input dataset of images")
ap.add_argument("-m", "--model", required=True, help="path to output trained model")
ap.add_argument("-l", "--label-bin", required=True, help="path to output label binarizer")
ap.add_argument("-p", "--plot", required=True, help="path to output accuracy/loss plot")
args = vars(ap.parse_args())
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []
# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images(args["dataset"])))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
# load the image, resize the image to be 32x32 pixels (ignoring
# aspect ratio), flatten the image into 32x32x3=3072 pixel image
# into a list, and store the image in the data list
image = cv2.imread(imagePath)
image = cv2.resize(image, (32, 32)).flatten()
data.append(image)
# extract the class label from the image path and update the
# labels list
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
I found a test code for studing Deep learning.
And tried to run in Pycharm. but I got this error message.
Actually I couldn't understand what that parser function is doing here.
could you explain about that code and about the error?
---error i got in Pycharm -----------------------
C:\Users\giyeo\anaconda3\envs\tf\python.exe "D:/GiyeonLee/09. Machine Learning/Pycharm/Tutorial/keras-tutorial/train_simple_nn.py"
2020-07-06 13:56:28.409237: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
usage: train_simple_nn.py [-h] -d DATASET -m MODEL -l LABEL_BIN -p PLOT
train_simple_nn.py: error: the following arguments are required: -d/--dataset, -m/--model, -l/--label-bin, -p/--plot
Process finished with exit code 2
Thanks for reading my quation..

raise_FirstSetError in SpaCy topic modeling

I want to create a LDA topic model and am using SpaCy to do so, following a tutorial. The error I receive when I try to use spacy is one I cannot find on google, so I'm hoping someone here knows what it's about.
I'm running this code on Anaconda:
import numpy as np
import pandas as pd
import re, nltk, spacy, gensim
# Sklearn
from sklearn.decomposition import LatentDirichletAllocation, TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from pprint import pprint
# Plotting tools
import pyLDAvis
import pyLDAvis.sklearn
import matplotlib.pyplot as plt
df = pd.DataFrame(data)
def sent_to_words(sentences):
for sentence in sentences:
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))
# deacc=True removes punctuations
data_words = list(sent_to_words(data))
print(data_words[:1])
def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
"""https://spacy.io/api/annotation"""
texts_out = []
for sent in texts:
doc = nlp(" ".join(sent))
texts_out.append(" ".join([token.lemma_ if token.lemma_ not in ['-PRON-'] else '' for token in doc if token.pos_ in allowed_postags]))
return texts_out
nlp = spacy.load('en', disable=['parser', 'ner'])
# Do lemmatization keeping only Noun, Adj, Verb, Adverb
data_lemmatized = lemmatization(data_words, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])
print(data_lemmatized[:1])
And I receive the following error:
File "C:\Users\maart\AppData\Local\Continuum\anaconda3\lib\site-packages\_regex_core.py", line 1880, in get_firstset
raise _FirstSetError()
_FirstSetError
The error must occur somewhere after the lemmatization, because the other parts work fine.
Thanks a bunch!
I had this same issue and I was able to resolve it by uninstalling regex (I had the wrong version installed) and then running python -m spacy download en again. This will reinstall the correct version of regex.

Can't load dataset into ipython. UnicodeDecodeError: 'utf-8' codec can't decode byte 0xcd in position 1: invalid continuation byte

Fairly new to using ipython so I'm still getting confused quite easily. Here is my code so far. After loading I have to display only the first 5 rows of the file.
# Import useful packages for data science
from IPython.display import display, HTML
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Load concerts.csv
path1 = 'C:\\Users\\Cathal\\Documents\\concerts.csv'
concerts = pd.read_csv(path1)
Thanks in advance for any help.
try
concerts = pd.read_csv(path1, encoding = 'utf8')
if that doesnt work try
concerts = pd.read_csv(path1, encoding = "ISO-8859-1")