I have implemented following script to do OCR on single and multiple images using image URL.
function doOCRALL() {
var selected = SpreadsheetApp.getActiveSheet().getActiveRange().getValues().length;
for (var i = 0; i < selected; i++) {
var activeCol = SpreadsheetApp.getActiveSheet().getActiveCell().getColumn();
var activeRow = SpreadsheetApp.getActiveSheet().getActiveCell().getRow();
var valueURL = SpreadsheetApp.getActiveSheet().getRange(activeRow + i, activeCol).getValue();
var image = UrlFetchApp.fetch(valueURL).getBlob();
var file = {
title: 'OCR File',
mimeType: 'image/png'
};
// OCR is supported for PDF and image formats
file = Drive.Files.insert(file, image, {ocr: true});
var doc = DocumentApp.openByUrl(file.embedLink);
var body = doc.getBody().getText();
//Get link Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow + i, activeCol + 2).setValue(file.embedLink);
//Get Content of Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow + i, activeCol + 1).setValue(body);
}
}
function doOCR() {
//
var activeCol = SpreadsheetApp.getActiveSheet().getActiveCell().getColumn();
var activeRow = SpreadsheetApp.getActiveSheet().getActiveCell().getRow();
var valueURL = SpreadsheetApp.getActiveSheet().getRange(activeRow, activeCol).getValue();
var image = UrlFetchApp.fetch(valueURL).getBlob();
var file = {
title: 'OCR File',
mimeType: 'image/png'
};
// OCR is supported for PDF and image formats
file = Drive.Files.insert(file, image, {ocr: true});
var doc = DocumentApp.openByUrl(file.embedLink);
var body = doc.getBody().getText();
// Print the Google Document URL in the console
Logger.log("body: %s", body);
Logger.log("File URL: %s", file.embedLink);
//Get link Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow, activeCol + 2).setValue(file.embedLink);
//Get Content of Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow, activeCol + 1).setValue(body);
}
function onOpen() {
var ui = SpreadsheetApp.getUi();
// Or DocumentApp or FormApp.
ui.createMenu('OCR Tools')
.addItem('Extract Cell', 'doOCR')
.addItem('Extract All Cell', 'doOCRALL')
.addSeparator()
.addSubMenu(ui.createMenu('About US')
.addItem('Infomation', 'menuItem2'))
.addToUi();
}
function menuItem2() {
SpreadsheetApp.getUi() // Or DocumentApp or FormApp.
.alert('AIO Team');
}
When I provide an image URL for any image, it works. But if I upload the same image on my drive and then provide the image URL from drive, it only gives me "Sign in Main menu". For other drive images it gives the same text.
Thanks in advance.
If content is already in Drive, you do not need to get a link to it - just supply the file id (which you can get from a link to it).
Once you have the file ID, you can simply copy it, and use the optimal arguments to activate OCR. The full options list is, of course, available on the Drive REST API page: https://developers.google.com/drive/api/v2/reference/files/copy#parameters
I encourage you to also read about best practices such as fields specification (which is a requirement of the more recent drive API version).
This function takes an input Drive file ID that you got from somewhere, and a truth-y value to set the "use OCR" option.
Obvious assumptions are that you have permission, the id is valid, you have enabled the advanced service and the Drive API in cloud console, etc.
function getIdOfCopyOfDriveFile(fileId, useOcr) {
const options = {
fields: "choose the metadata fields to return in the response e.g. 'id,title,parents'"
};
const existingMetaData = Drive.Files.get(fileId, options);
options.ocr = !!useOcr;
existingMetaData.title += " (copied with" + (options.ocr ? " " : "out ") + "ocr)";
// We could do other modifications of fields we requested before
// copying, like changing the parents array to move the new file.
const newFileMetaData = Drive.Files.copy(existingMetaData, fileId, options);
return newFileMetaData.id;
}
Related
Hello i try to make simple template file for mixing toegether two images.
One is as overlay of slide and second one come from Google Form as background.
Problem is that i don't have any knowledge about this code.
I stuck at this point and i cant combine this two piece of code.
function onFormSubmit(e) {
//open the template presentation by ID
var templateDoc = DriveApp.getFileById('1bGKJ027bnUCgSpQrunXXBZCgFNXZHtF2ZnsaWHLh8B4');
//create a copy of the template, we don't wanna mess up the template presentation
var newTempFile = templateDoc.makeCopy();
//open the presentation for editing
var openSlide = SlidesApp.openById(newTempFile.getId());
//get the responses triggered by On Form Submit
var items = e.response.getItemResponses();
//find the text in the presentation and replace it with the Form response
//items[0].getResponse() is the first response in the Form
//and it is the "Title"
//openSlide.replaceAllText('{Title}', items[0].getResponse());
//items[1].getResponse() is the second and it is the date
//openSlide.replaceAllText('{Text}', items[1].getResponse());
//You can add as much as you have and change them in the Template Doc like this
//openSlide.replaceAllText('{number}', items[2].getResponse());
//openSlide.replaceAllText('{choice}', items[3].getResponse());
//and so on...
var image = items[0].getResponse();
openSlide.getSlides().forEach(s => {
s.getShapes().forEach(e => {
if (e.getText().asString().trim() == '{{image1}}') {
e.replaceWithImage(DriveApp.getFileById(Array.isArray(image) ? image[0] : image).getBlob());
}
})
});
//Save and Close the open document
DriveApp.getFileById(newTempFile.getId()).setName(items[0].getResponse());
}
// this is code for making PNG file from slide, it work like a charm when its running in simple slide file with slide id pasted in presentationId (below) but i cant figure it out how to present the id of new (above) just created slide to code below. folderId is easy i think. But problem is with this presentationId
var folderId = "xxxxxxxxxxxxxxxxxxxx"//เปลี่ยน ไอดีโฟลเดอร์เป็นของท่านเอง
var presentationId = "xxxxxxxxxxxxxxxxxxx"//เปลี่ยน ไอดี Slide เป็นของท่านเอง
function convertToPNG() {
var slideId = 'p';
var url = 'https://docs.google.com/presentation/d/' + presentationId +
'/export/png?id=' + presentationId + '&pageid=' + slideId;
var options = {
headers: {
Authorization: 'Bearer ' + ScriptApp.getOAuthToken()
}
};
var response = UrlFetchApp.fetch(url, options);
var image = response.getAs(MimeType.PNG);
image.setName("png-"+Math.random().toFixed(2));
//image.setName(DriveApp.getFileById(presentationId).getName());
var img = DriveApp.getFolderById(folderId).createFile(image).getUrl()
Logger.log(img)
}
I try my best to combine these two codes but nothing works for me (because I have zero knowledge of this scripting language)
Can someone try to give me a hint on how to tell the second part of the code to know from what SlideId should make a png file?
Working code
function onFormSubmit(e) {
// Open the template presentation by ID
var templateDoc =
SlidesApp.openById('1bGKJ027bnUCgSpQrunXXBZCgFNXZHtF2ZnsaWHLh8B4');
// Create a copy of the template, we don't want to mess up the template
presentation
var newTempFile = DriveApp.getFileById(templateDoc.getId()).makeCopy();
// Open the new presentation for editing
var newSlide = SlidesApp.openById(newTempFile.getId());
// Get the responses triggered by on form submit
var items = e.response.getItemResponses();
// Find the text in the presentation and replace it with the form
response
// Items[0].getResponse() is the first response in the form and it is
the "Title"
// OpenSlide.replaceAllText('{Title}', items[0].getResponse());
// Items[1].getResponse() is the second and it is the date
// OpenSlide.replaceAllText('{Text}', items[1].getResponse());
// You can add as much as you have and change them in the template doc
like this
// OpenSlide.replaceAllText('{number}', items[2].getResponse());
// OpenSlide.replaceAllText('{choice}', items[3].getResponse());
// And so on...
var image = items[0].getResponse();
newSlide.getSlides().forEach(s => {
s.getShapes().forEach(e => {
if (e.getText().asString().trim() == '{{image1}}') {
e.replaceWithImage(DriveApp.getFileById(Array.isArray(image) ?
image[0] : image).getBlob());
}
})
});
// Save and close the new document
newSlide.saveAndClose();
// Save the new slide as a PDF
var newSlideFile = DriveApp.getFileById(newTempFile.getId());
var pdfBlob = newSlideFile.getAs('application/pdf');
var pdfName = items[0].getResponse() + '.pdf';
var folder =
DriveApp.getFolderById('1lEP4qtXs6fITXS9GKeaYKLfbRONgbuOQ');
folder.createFile(pdfBlob.setName(pdfName));
var folderId = "10XCnKfhk4_5agWCYxWHGCO6wwxWSG7Y9"
// Save the new slide as a PNG file
var slideId = newSlide.getSlides()[0].getObjectId();
var url = 'https://docs.google.com/presentation/d/' + newSlide.getId()
+ '/export/png?id=' + newSlide.getId() + '&pageid=' + slideId;
var options = {
headers: {
Authorization: 'Bearer ' + ScriptApp.getOAuthToken()
}
};
var response = UrlFetchApp.fetch(url, options);
var image = response.getAs(MimeType.PNG);
image.setName("png-"+Math.random().toFixed(2));
//image.setName(DriveApp.getFileById(presentationId).getName());
var img = DriveApp.getFolderById(folderId).createFile(image).getUrl()
Logger.log(img)
}
In the example below I left the folder and ss blank.
Idea is to retrieve the number after the text "Emerging Markets (" found in the file at the url specified in the code and then insert it into cell b2 in the google sheet specified.
Not getting any errors, but code is not working. Would appreciate your help. Novice here.
Thanks!
const FOLDER_ID = ""; //Folder ID of all PDFs
const SS = "";//The spreadsheet ID
const SHEET = "MSCI";//The sheet tab name
function OpenFile() {
var url = "https://www.yardeni.com/pub/mscipe.pdf";
var blob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});
// Extract Text from PDF file
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
return text;
const identifier = {
start: `Emerging Markets (`,
start_include: false,
end: `)`,
end_include: false
};
let results = getDocItems(docID, identifier);
return results;
}
function importToSpreadsheet(results){
const sheet = SpreadsheetApp.openById(SS).getSheetByName(SHEET);
var cell = sheet.getRange("B2");
cell.setValue(results);
}
I see two functions: OpenFile() and importToSpreadsheet(results), but I see no lines where the functions are called.
Just a guess. Perhaps you need to add at the end of your code this line:
importToSpreadsheet(OpenFile());
Update
The OpenFile() function gets you all the text. If you need only the part of the text between 'Emerging Markets (' and ')' you can cut it out this way:
var text = OpenFile(); // all text
var part = text.split('Emerging Markets (')[1].split(')')[0]; // a part between 'Emerging Markets (' and ')'
importToSpreadsheet(part); // put the part in the cell
The lines from const identifier = {... to ...return results; are redundant. Probably they were taken from another sample and don't belong this code.
I have a PDF file saved in Google Drive, I want to find a text from that file i.e USD then pick the value next to found text i.e: 167.1764, and insert it in my google spreadsheet.
Below is the preview of my PDF File.
Link to my PDF File.
Here is the code below which I tried but failed to find the text and reached to that value which is next to it.
below is my code.
function extractTextFromPDF() {
var drive = DriveApp;
var folders = drive.getFolderById('folderid');
var newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
var blob = file1;
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});
// Extract Text from PDF file
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
Logger.log(text);
//DriveApp.getFileById(file.id).setTrashed(true);
var body = doc.getBody();
var foundElement = body.findText("(USD)");
while (foundElement != null) {
// Get the text object from the element
var foundText = foundElement.getElement().asText();
// Where in the element is the found text?
var start = foundElement.getStartOffset();
var end = foundElement.getEndOffsetInclusive();
}
// i want the value of USD i.e 167.1144 in log
Logger.log(foundText);
}
With the help of RegEx you can extract this. I'm not the best with those patterns. But maybe somebody else can optimize so the split is not necessary. (here is a link).
The code:
function extractTextFromPDF() {
const folders = DriveApp.getFolderById('1QVo_pxxx387WPH9Yx');
const newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
const blob = file1;
const resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
const file = Drive.Files.insert(resource, blob, {convert: true});
// Extract Text from PDF file
const doc = DocumentApp.openById(file.id);
const text = doc.getBody().getText();
Logger.log(text);
const buying = /USD\n(.*?)$/gm.exec(text)[1].trim();
const selling = /USD\n\s*\S*\n(.*?)$/gm.exec(text)[1].trim();
console.log(buying)
console.log(selling)
//Remove the converted file.
DriveApp.getFileById(file.id).setTrashed(true);
}
I'm trying to copy an image from a google doc "template" and paste in another doc with script. Already search for some solutions in web but none worked for me. Here's my code, this is getting an invalid image data error.
function creatingLabels(link, document, model, labelTemplate) {
var headerLabel = labelTemplate.getBody().getImages();
Logger.log(headerLabel.toString());
Logger.log(headerLabel);
var textLabel = labelTemplate.getBody().getText();
var text = textLabel.replace(' %LOCAL%', model);
var QrCode = getImageFromURL(link);
document.getBody().insertImage(1, headerLabel)
labelTemplate.getBody().setText(text);
labelTemplate.getBody().insertImage(1, QrCode);
}
function getImageFromURL(link) {
var url = encodeURI(link)
return UrlFetchApp.fetch(url, { muteHttpExceptions: true });
}
This function copies an image from one document and creates another document and appends the image to the new document. It also displays the image on a dialog. If your looking for the image and can't find it, then look in your root folder.
function copyImage() {
var doc=DocumentApp.getActiveDocument();
var body=doc.getBody();
var images=body.getImages();
var image=images[0];
var b64Url='data:' + image.getBlob().getContentType() + ';base64,' + Utilities.base64Encode(image.getBlob().getBytes());
var html=Utilities.formatString('<img src="%s" width="640" height="480" />',b64Url);
var userInterface=HtmlService.createHtmlOutput(html).setWidth(700).setHeight(550);
DocumentApp.getUi().showModelessDialog(userInterface, 'Images');
var doc1=DocumentApp.create('SO2');
doc1.getBody().appendImage(image.getBlob());
var image=doc1.getBody().getImages()[0];
image.setWidth(640);
image.setHeight(480);
doc1.saveAndClose();
}
I'm trying to make a google script for exporting (or printing) a new version of google spreadsheet (or sheet) to pdf, with page parameters (portrait/landscape, ...)
I've researched about this and found a possible solution here.
There are several similar solutions like this, but only work with old version of google spreadsheet.
Please, consider this code:
function exportAsPDF() {
//This code runs from a NEW version of spreadsheet
var oauthConfig = UrlFetchApp.addOAuthService("google");
oauthConfig.setAccessTokenUrl("https://www.google.com/accounts/OAuthGetAccessToken");
oauthConfig.setRequestTokenUrl("https://www.google.com/accounts/OAuthGetRequestToken?scope=https://spreadsheets.google.com/feeds/");
oauthConfig.setAuthorizationUrl("https://www.google.com/accounts/OAuthAuthorizeToken");
oauthConfig.setConsumerKey("anonymous"); oauthConfig.setConsumerSecret("anonymous");
var requestData = { "method": "GET", "oAuthServiceName": "google","oAuthUseToken": "always" };
var ssID1="0AhKhywpH-YlQdDhXZFNCRFROZ3NqWkhBWHhYTVhtQnc"; //ID of an Old version of spreadsheet
var ssID2="10xZX9Yz95AUAPu92BkBTtO0fhVk9dz5LxUmJQsJ7yPM"; //ID of a NEW version of spreadsheet
var ss1 = SpreadsheetApp.openById(ssID1); //Old version ss object
var ss2 = SpreadsheetApp.openById(ssID2); //New version ss object
var sID1=ss1.getActiveSheet().getSheetId().toString(); // old version sheet id
var sID2=ss2.getActiveSheet().getSheetId().toString(); // new version sheet id
//For Old version, this runs ok.
var url1 = "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key="+ssID1+"&gid="+sID1+"&portrait=true"+"&exportFormat=pdf";
var result1 = UrlFetchApp.fetch(url1 , requestData);
var contents1=result1.getBlob();
var pdfFile1=DriveApp.createFile(contents1).setName("FILE1.pdf");
//////////////////////////////////////////////
var url2 = "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key="+ssID2+"&gid="+sID2+"&portrait=true"+"&exportFormat=pdf";
var result2 = UrlFetchApp.fetch(url2 , requestData);
var contents2=result2.getBlob();
var pdfFile2=DriveApp.createFile(contents2).setName("FILE2.pdf");
}
It works right and generates the file “FILE1.pdf”, that can be opened correctly. But for the new version of spreadsheet, it results in error 302 (truncated server response) at “var result2 = UrlFetchApp.fetch(url2 , requestData);”. Well, it’s ok because the url format for the new version doesn’t include the “key” argument. A correct url for new versions must be like "https://docs.google.com/spreadsheets/d/"+ssID2+"/export?gid="+sID2+"&portrait=true&format=pdf"
Using this for url2 (var url2 = "https://docs.google.com/spreadsheets/d/"+ssID2+"/export?gid="+sID2+"&portrait=true&format=pdf") it fails again with error “Authorization can’t be performed for service: google”.
Well, this error could be due to an incorrect scope for the RequestTokenUrl. I’ve found the alternative scope https://docs.google.com/feeds and set it: oauthConfig.setRequestTokenUrl("https://www.google.com/accounts/OAuthGetRequestToken?scope=https://docs.google.com/feed/");
After the code runs again, a new error happens at the line with UrlFetchApp.fetch(url2 , requestData);: “Error OAuth” … I don’t know how to continue … I’ve tested hundreds of variations without good results.
Any ideas? is correct the scope docs.google.com/feeds for new version of spreadsheets? is correct the oauthConfig?
Thanks in advance.
Here is my spreadsheet-to-pdf script. It works with the new Google Spreadsheet API.
// Convert spreadsheet to PDF file.
function spreadsheetToPDF(id,index,url,name)
{
SpreadsheetApp.flush();
//define usefull vars
var oauthConfig = UrlFetchApp.addOAuthService("google");
var scope = "https://docs.google.com/feeds/";
//make OAuth connection
oauthConfig.setAccessTokenUrl("https://www.google.com/accounts/OAuthGetAccessToken");
oauthConfig.setRequestTokenUrl("https://www.google.com/accounts/OAuthGetRequestToken?scope="+scope);
oauthConfig.setAuthorizationUrl("https://www.google.com/accounts/OAuthAuthorizeToken");
oauthConfig.setConsumerKey("anonymous");
oauthConfig.setConsumerSecret("anonymous");
//get request
var request = {
"method": "GET",
"oAuthServiceName": "google",
"oAuthUseToken": "always",
"muteHttpExceptions": true
};
//define the params URL to fetch
var params = '?gid='+index+'&fitw=true&exportFormat=pdf&format=pdf&size=A4&portrait=true&sheetnames=false&printtitle=false&gridlines=false';
//fetching file url
var blob = UrlFetchApp.fetch("https://docs.google.com/a/"+url+"/spreadsheets/d/"+id+"/export"+params, request);
blob = blob.getBlob().setName(name);
//return file
return blob;
}
I've had to use the "muteHttpExceptions" parameter to know exactly the new URL. With this parameter, I downloaded my file with the HTML extension to get a "Moved permanently" page with my final url ("https://docs.google.com/a/"+url+"/spreadsheets/d/"+id+"/export"+params").
And note that I am in an organization. So I've had to specify its domain name ("url" parameter, ie "mydomain.com").
(Copied from this answer.)
This function is an adaptation of a script provided by "ianshedd..." here.
It:
Generates PDFs of ALL sheets in a spreadsheet, and stores them in the same folder containing the spreadsheet. (It assumes there's just one folder doing that, although Drive does allow multiple containment.)
Names pdf files with Spreadsheet & Sheet names.
Uses the Drive service (DocsList is deprecated.)
Can use an optional Spreadsheet ID to operate on any sheet. By default, it expects to work on the "active spreadsheet" containing the script.
Needs only "normal" authorization to operate; no need to activate advanced services or fiddle with oAuthConfig.
With a bit of research and effort, you could hook up to an online PDF Merge API, to generate a single PDF file. Barring that, and until Google provides a way to export all sheets in one PDF, you're stuck with separate files.
Script:
/**
* Export one or all sheets in a spreadsheet as PDF files on user's Google Drive,
* in same folder that contained original spreadsheet.
*
* Adapted from https://code.google.com/p/google-apps-script-issues/issues/detail?id=3579#c25
*
* #param {String} optSSId (optional) ID of spreadsheet to export.
* If not provided, script assumes it is
* sheet-bound and opens the active spreadsheet.
* #param {String} optSheetId (optional) ID of single sheet to export.
* If not provided, all sheets will export.
*/
function savePDFs( optSSId, optSheetId ) {
// If a sheet ID was provided, open that sheet, otherwise assume script is
// sheet-bound, and open the active spreadsheet.
var ss = (optSSId) ? SpreadsheetApp.openById(optSSId) : SpreadsheetApp.getActiveSpreadsheet();
// Get URL of spreadsheet, and remove the trailing 'edit'
var url = ss.getUrl().replace(/edit$/,'');
// Get folder containing spreadsheet, for later export
var parents = DriveApp.getFileById(ss.getId()).getParents();
if (parents.hasNext()) {
var folder = parents.next();
}
else {
folder = DriveApp.getRootFolder();
}
// Get array of all sheets in spreadsheet
var sheets = ss.getSheets();
// Loop through all sheets, generating PDF files.
for (var i=0; i<sheets.length; i++) {
var sheet = sheets[i];
// If provided a optSheetId, only save it.
if (optSheetId && optSheetId !== sheet.getSheetId()) continue;
//additional parameters for exporting the sheet as a pdf
var url_ext = 'export?exportFormat=pdf&format=pdf' //export as pdf
+ '&gid=' + sheet.getSheetId() //the sheet's Id
// following parameters are optional...
+ '&size=letter' // paper size
+ '&portrait=true' // orientation, false for landscape
+ '&fitw=true' // fit to width, false for actual size
+ '&sheetnames=false&printtitle=false&pagenumbers=false' //hide optional headers and footers
+ '&gridlines=false' // hide gridlines
+ '&fzr=false'; // do not repeat row headers (frozen rows) on each page
var options = {
headers: {
'Authorization': 'Bearer ' + ScriptApp.getOAuthToken()
}
}
var response = UrlFetchApp.fetch(url + url_ext, options);
var blob = response.getBlob().setName(ss.getName() + ' - ' + sheet.getName() + '.pdf');
//from here you should be able to use and manipulate the blob to send and email or create a file per usual.
//In this example, I save the pdf to drive
folder.createFile(blob);
}
}
Thank you!
Variant 2 works with me with options:
var requestData = {
"oAuthServiceName": "spreadsheets",
"oAuthUseToken": "always"
};
Then:
var ssID = ss.getId();
var sID = ss.getSheetByName(name).getSheetId();
//creating pdf
var pdf = UrlFetchApp.fetch("https://docs.google.com/spreadsheets/d/" + ssID + "/export?gid=" + sID + "&portrait=false&size=A4&format=pdf", requestData).getBlob();
//folder to created pdf in
var folder = DriveApp.getFolderById(id);
//creating pdf in this folder with given name
folder.createFile(pdf).setName(name);
I can change image size, orientation etc. with listed parameters perfectly.