Google sheets OCR image at URL in Col1, output text in Col2? - google-apps-script

in a Google Sheet, I'm trying to use a function to OCR an image whose URL is in column A, and output the OCR text in column B. The code I'm attempting to use is from this thread:
ocr images from list of urls and store the results in spreadsheet
function onOpen() {
var ss = SpreadsheetApp.getActive();
var menuItems = [
{name: 'RUN', functionName: 'doGet2'}
];
ss.addMenu('OCR', menuItems);
}
function doGet2() {
var ROW_START = 3;
var URL_COL = 1;
var TEXT_COL = 2;
var ss = SpreadsheetApp.getActive();
var sheet = ss.getActiveSheet();
var urls = sheet.getRange(ROW_START,URL_COL, sheet.getLastRow()-ROW_START+1,1).getValues();
var texts = [];
for(var i=0; i<urls.length; i++) {
var url = urls[i];
if(url != undefined && url != "") {
var imageBlob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: imageBlob.getName(),
mimeType: imageBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, imageBlob, options);
var doc = DocumentApp.openById(docFile.id);
var text = doc.getBody().getText().replace("\n", "");
texts.push([text]);
Drive.Files.remove(docFile.id);
}
else {
texts.push("request error");
}
}
sheet.getRange(ROW_START,TEXT_COL, urls.length,1).setValues(texts);
}
Here is the Google Sheet I'm testing with. Anyone can edit:
https://docs.google.com/spreadsheets/d/1jhSmE295bTOzjoXmgcyymNi-6ylKg5PLEKktPZJTD1c/edit?usp=sharing
I think the original code is meant to pull from Google Drive. I don't need that, I just want to pull from a URL. Being able to use the function inside an arrayformula would be a plus.
Thanks!

Related

Get range of google spreadsheet into an image using google script error

Get range of google spreadsheet into an image using google script
ZektorH wrote a script to save a selected range of cells to google drive as an image. I imported the script and Im able to save images to google drive but only if all the cells in the selected range have text. otherwise I get this error
"Exception: The object (SLIDES_APIxxxxxxxxxx_0) has no text."
Is there a way to avoid having text on all cells? for example on merged cells
function onOpen(e) {
//Create custom menu to export range to Slides.
SpreadsheetApp.getUi()
.createMenu('Custom Functions')
.addItem('Export Range to Image Files', 'SelectedRangeToImage')
.addToUi();
}
function SelectedRangeToImage() {
var slide = RangeToSlides();
var slideId = slide.getId();
var images = [];
for (var x=0; x<slide.getSlides().length;x++) {
var image = SlidesToImage(slide.getName()+x, slideId, slide.getSlides()[x].getObjectId());
images.push(image);
}
//Show interface with links to all images
var ui = SpreadsheetApp.getUi();
var html = HtmlService.createHtmlOutput();
html.append("<p>Your images:</p>");
html.append("<ul>");
for (var i=0; i<images.length; i++) {
html.append("<li><a href='"+images[i].getUrl()+"'>"+images[i].getName()+"</a></li>");
}
html.append("</ul>");
html.append("<input type='button' value='Close' onclick='google.script.host.close()' />");
ui.showModalDialog(html, "Exporting results:");
}
function RangeToSlides() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var range = ss.getActiveRange();
var rangeValues = range.getDisplayValues();
var rangeHorizontalAlignments = range.getHorizontalAlignments()
var rangeBackgrounds = range.getBackgrounds();
var rangeFontWeights = range.getFontWeights();
var sl = SlidesApp.create("ExportToImage"+new Date());
var slide = sl.getSlides()[0];
//Create table with size of the range
var table = slide.insertTable(rangeValues.length, rangeValues[0].length);
for (var x=0; x<rangeValues.length; x++) {
for (var y=0; y<rangeValues[x].length; y++) {
var cell = table.getCell(x,y);
cell.getText().setText(rangeValues[x][y]); //Set text
cell.getFill().setSolidFill(rangeBackgrounds[x][y]); //Set background
cell.getText().getTextStyle().setBold(rangeFontWeights[x][y]=="bold"?true:false); //Set text formatting
var alignment;
switch(rangeHorizontalAlignments[x][y]) {
case "general-left":
alignment = SlidesApp.ParagraphAlignment.START;
break;
case "general-right":
alignment = SlidesApp.ParagraphAlignment.END;
break;
case "center":
alignment = SlidesApp.ParagraphAlignment.CENTER;
break;
}
cell.getText().getParagraphStyle().setParagraphAlignment(alignment); //Set text alignment
}
}
sl.saveAndClose();
return sl;
}
function SlidesToImage(name, presentationId, slideId) {
var url = "https://slides.googleapis.com/v1/presentations/"+presentationId+"/pages/"+slideId+"/thumbnail";
var options = {
headers: {
Authorization: 'Bearer ' + ScriptApp.getOAuthToken()
}
};
var response = UrlFetchApp.fetch(url, options);
var responseJson = JSON.parse(response.getContentText());
var imageurl = responseJson.contentUrl;
var imageResponse = UrlFetchApp.fetch(imageurl, options);
var blob = imageResponse.getBlob();
blob.setName(name);
var resultingFile = DriveApp.createFile(blob);
return resultingFile;
}
ZektorH script below

Get the hyperlink and its text value using google docs

I hope everyone is in good health.
My main goal
So main goal was to get the hyperlink and the text linked with it. I initially used code as below :
var ekArr = [];
function myFunction() {
var doc= DocumentApp.getActiveDocument();
var isCell4Blank1 = doc.getBlob().getDataAsString();
var data = Utilities.parseCsv(isCell4Blank1);
var linku = doc.getBody().getTables()[0].getCell(2,1);
var t = doc.getBody().getTables()[0].getCell(2,1).getText();
Logger.log("link is. " + linku);
// Logger.log(doc.getBody().getText());
for(var find in data){
var fi = data[find].toString().includes("http");
if(fi===true){
var newArr = data[find].toString().split(" ");
var output = newArr[1].replace("(","").replace(")","").replace(">>>>","")
ekArr.push(output);
}
}
var tex = doc.getBody().getText();
var fo = tex.split(ekArr[0]);
Logger.log(tex);
doc.getBody().getTables()[0].getCell(2,2).setText(ekArr);
doc.getBody().getTables()[0].getCell(2,3).setText(t);
Logger.log(ekArr);
return ekArr;
}
I need to extract the links and content from the cells of a table.
From above code I am able to extract the links however I am not able to extract the text linked with it.
Also I have another code which helped me to extract the links and text but from googlesheets. As I am new to google docs I want to modify the below code according to google docs.
function sheetFunction() {
var sheet= SpreadsheetApp.getActiveSheet();
var isCell4Blank1 = sheet.getRange("A1").isBlank();
if (!isCell4Blank1) {
var linkData =
sheet.getRange("A1").getRichTextValue().getRuns().reduce((ar, e) => {
var url = e.getLinkUrl();
Logger.log(url);
if (url) {
var color = e.getTextStyle().getForegroundColor();
var startIndex = e.getStartIndex();
var endIndex = e.getEndIndex();
var text = e.getText()
ar.push(color);
ar.push(text);
ar.push(startIndex);
ar.push(endIndex);
ar.push(url);
}
return ar;
}, [])
}
Use the methods getText() and getLinkUrl()
Sample based on your first code snippet:
function myFunction() {
var doc = DocumentApp.getActiveDocument();
var linku = doc.getBody().getTables()[0].getCell(2,1);
Logger.log("link text is: " + linku.getText());
Logger.log("link is: " + linku.getLinkUrl());
}

=URL() Script code issue | ReferenceError: btn is not defined (line 9, file "Code")

I am having an issue trying to use a script to extract the URL from one test with hyperlink.
Here is the script I am using:
function URL(reference) {
var sheet = SpreadsheetApp.getActiveSheet();
var formula = SpreadsheetApp.getActiveRange().getFormula();
var args = formula.match(/=\w+\((.*)\)/i);
try {
var range = sheet.getRange(args[1]);
}
catch(e) {
throw new Error(args[1] + 'is not a valid range');
}
var formulas = range.getFormulas();
var output = [];
for (var i = 0; i < formulas.length; i++) {
var row = [];
for (var j = 0; j < formulas[0].length; j++) {
var url = formulas[i][j].match(/=hyperlink\("([^"]+)"/i);
row.push(url ? url[1] : '');
}
output.push(row);
}
return output
}
After I run the script I get this error message:
TypeError: Cannot read property '1' of null (line 9, file "Code")
Any idea where the issue comes from and I can solve this?
You have try/catch block that throws error, but in catch block you are trying to iterate null object again:
try {
var range = sheet.getRange(args[1]);
}
catch(e) {
throw new Error(args[1] + 'is not a valid range'); // <- `args[1]` is producing new error
}
If you change catch content to throw new Error(formula + 'is not a valid range');, it should work.
Retrieve the hyperlink with the Advanced Sheets Service and assign the script to a button
The way you are following so far (extracting a hyperlink from a formula) is an old solution that does not work anymore
Currently, to retrieve a spreadsheet you need to use the Sheets API method spreadsheets:get
To use the Sheest API in Apps Script, you need to enable it in the editor
Sample script:
function URL() {
var spreadsheet =SpreadsheetApp.getActive();
var sheet = spreadsheet.getActiveSheet();
var id = spreadsheet.getId();
var range = sheet.getActiveRange();
var linkArray = Sheets.Spreadsheets.get(id, {ranges: sheet.getName() + "!" + range.getA1Notation(), fields: "sheets/data/rowData/values/hyperlink"});
var link = linkArray.sheets[0].data[0].rowData[0].values[0].hyperlink;
var cellInB = sheet.getRange(range.getRow(), 2);
cellInB.setValue(link);
}
As mentioned in the comments, in your use case it is not possible to use custom formulas, instead create a drawing and assign the script to it:
Now, select a cell in column A, click on the button and the link will be populated in column B.
UPDATE:
To retrieve all URLs at once you can use the followign script and run it manually:
function getAllURLs() {
var spreadsheet = SpreadsheetApp.getActive();
var sheet = spreadsheet.getActiveSheet();
var id = spreadsheet.getId();
var lastRow = sheet.getRange("A2").getNextDataCell(SpreadsheetApp.Direction.DOWN).getRow();
var range = sheet.getRange(2,1,lastRow-1, 1);
var linkArray = Sheets.Spreadsheets.get(id, {ranges: sheet.getName() + "!" + range.getA1Notation(), fields: "sheets/data/rowData/values/hyperlink"});
var links = [];
for (var i = 0; i< linkArray.sheets[0].data[0].rowData.length; i++){
var link = linkArray.sheets[0].data[0].rowData[i].values[0].hyperlink;
links[i] = [];
links[i].push(link);
}
var cellsInB = sheet.getRange(2,2,lastRow-1, 1);
cellsInB.setValues(links);
}

Get range of google spreadsheet into an image using google script

I'm building a search tool by which one can convert a google range into an image using google script. I tried to paste that data range to google slides and from there I get thumbnails and their url. I'm searching any other tools that directly give me the url of image of the selected range of a google sheet.
Thanks
This is a very interesting question.
I am unsure of the reasoning behind doing this, but nonetheless, here is an answer:
This creates a custom menu on the top of your sheet that says:
Custom Functions => Export Range to Image Files.
When you click that it:
Turns whatever you have selected into a table in sheets
Saves it
Generates an image from that
Saves image to drive
Show a pop-up with the links of the saved images.
The code is ready to handle multiple ranges being exported, but right now it exports only the selected range.
function onOpen(e) {
//Create custom menu to export range to Slides.
SpreadsheetApp.getUi()
.createMenu('Custom Functions')
.addItem('Export Range to Image Files', 'SelectedRangeToImage')
.addToUi();
}
function SelectedRangeToImage() {
var slide = RangeToSlides();
var slideId = slide.getId();
var images = [];
for (var x=0; x<slide.getSlides().length;x++) {
var image = SlidesToImage(slide.getName()+x, slideId, slide.getSlides()[x].getObjectId());
images.push(image);
}
//Show interface with links to all images
var ui = SpreadsheetApp.getUi();
var html = HtmlService.createHtmlOutput();
html.append("<p>Your images:</p>");
html.append("<ul>");
for (var i=0; i<images.length; i++) {
html.append("<li><a href='"+images[i].getUrl()+"'>"+images[i].getName()+"</a></li>");
}
html.append("</ul>");
html.append("<input type='button' value='Close' onclick='google.script.host.close()' />");
ui.showModalDialog(html, "Exporting results:");
}
function RangeToSlides() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var range = ss.getActiveRange();
var rangeValues = range.getDisplayValues();
var rangeHorizontalAlignments = range.getHorizontalAlignments()
var rangeBackgrounds = range.getBackgrounds();
var rangeFontWeights = range.getFontWeights();
var sl = SlidesApp.create("ExportToImage"+new Date());
var slide = sl.getSlides()[0];
//Create table with size of the range
var table = slide.insertTable(rangeValues.length, rangeValues[0].length);
for (var x=0; x<rangeValues.length; x++) {
for (var y=0; y<rangeValues[x].length; y++) {
var cell = table.getCell(x,y);
cell.getText().setText(rangeValues[x][y]); //Set text
cell.getFill().setSolidFill(rangeBackgrounds[x][y]); //Set background
cell.getText().getTextStyle().setBold(rangeFontWeights[x][y]=="bold"?true:false); //Set text formatting
var alignment;
switch(rangeHorizontalAlignments[x][y]) {
case "general-left":
alignment = SlidesApp.ParagraphAlignment.START;
break;
case "general-right":
alignment = SlidesApp.ParagraphAlignment.END;
break;
case "center":
alignment = SlidesApp.ParagraphAlignment.CENTER;
break;
}
cell.getText().getParagraphStyle().setParagraphAlignment(alignment); //Set text alignment
}
}
sl.saveAndClose();
return sl;
}
function SlidesToImage(name, presentationId, slideId) {
var url = "https://slides.googleapis.com/v1/presentations/"+presentationId+"/pages/"+slideId+"/thumbnail";
var options = {
headers: {
Authorization: 'Bearer ' + ScriptApp.getOAuthToken()
}
};
var response = UrlFetchApp.fetch(url, options);
var responseJson = JSON.parse(response.getContentText());
var imageurl = responseJson.contentUrl;
var imageResponse = UrlFetchApp.fetch(imageurl, options);
var blob = imageResponse.getBlob();
blob.setName(name);
var resultingFile = DriveApp.createFile(blob);
return resultingFile;
}
Hope this helps!
References:
https://stackoverflow.com/a/51391196/11869748
How to download Google Slides as images?
https://developers.google.com/slides/reference/rest/v1/presentations.pages/getThumbnail

ocr images from list of urls and store the results in spreadsheet

Hello I have a list of image URLs that contain numbers and I want to OCR them and store the results in google spreadsheet
I've found these google scripts to ocr images
1- https://gist.github.com/tagplus5/07dde5ca61fe8f42045d
2- https://ctrlq.org/code/20128-extract-text-from-image-ocr
But I didn't know how to create a request variable so I've replaced request variable with URL variable like this:
function doGet(url) {
if (url != undefined && url != "") {
var imageBlob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: imageBlob.getName(),
mimeType: imageBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, imageBlob, options);
var doc = DocumentApp.openById(docFile.id);
var text = doc.getBody().getText().replace("\n", "");
Drive.Files.remove(docFile.id);
return ContentService.createTextOutput(text);
}
else {
return ContentService.createTextOutput("request error");
}
}
the problem is when I call the function like this doGet(B1) where B1 contain the url to the image in google spreadsheet to do the OCR and get the resulted text in the cell C1 it says Drive variable is undefined
Hope get answered soon
Okay, I modified your script and made a sheet to show an example. The sheet is here(anyone can edit) and its script is below. Drive API(v2) of Advanced Drive Service should be enabled to run this script.
function onOpen() {
var ss = SpreadsheetApp.getActive();
var menuItems = [
{name: 'RUN', functionName: 'doGet2'}
];
ss.addMenu('OCR', menuItems);
}
function doGet2() {
var ROW_START = 3;
var URL_COL = 1;
var TEXT_COL = 2;
var ss = SpreadsheetApp.getActive();
var sheet = ss.getActiveSheet();
var urls = sheet.getRange(ROW_START,URL_COL, sheet.getLastRow()-ROW_START+1,1).getValues();
var texts = [];
for(var i=0; i<urls.length; i++) {
var url = urls[i];
if(url != undefined && url != "") {
var imageBlob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: imageBlob.getName(),
mimeType: imageBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, imageBlob, options);
var doc = DocumentApp.openById(docFile.id);
var text = doc.getBody().getText().replace("\n", "");
texts.push([text]);
Drive.Files.remove(docFile.id);
}
else {
texts.push("request error");
}
}
sheet.getRange(ROW_START,TEXT_COL, urls.length,1).setValues(texts);
}
The code is okay. V2 API is still alive. See this documentation. All you need is to enable Advanced Drive Service. In the script editor, select Resources > Advanced Google services and turn on Drive API (only v2 is selectable). Then your code actually works.