Google App Script find text from Google Document next to key text - google-apps-script

I have a PDF file saved in Google Drive, I want to find a text from that file i.e USD then pick the value next to found text i.e: 167.1764, and insert it in my google spreadsheet.
Below is the preview of my PDF File.
Link to my PDF File.
Here is the code below which I tried but failed to find the text and reached to that value which is next to it.
below is my code.
function extractTextFromPDF() {
var drive = DriveApp;
var folders = drive.getFolderById('folderid');
var newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
var blob = file1;
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});
// Extract Text from PDF file
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
Logger.log(text);
//DriveApp.getFileById(file.id).setTrashed(true);
var body = doc.getBody();
var foundElement = body.findText("(USD)");
while (foundElement != null) {
// Get the text object from the element
var foundText = foundElement.getElement().asText();
// Where in the element is the found text?
var start = foundElement.getStartOffset();
var end = foundElement.getEndOffsetInclusive();
}
// i want the value of USD i.e 167.1144 in log
Logger.log(foundText);
}

With the help of RegEx you can extract this. I'm not the best with those patterns. But maybe somebody else can optimize so the split is not necessary. (here is a link).
The code:
function extractTextFromPDF() {
const folders = DriveApp.getFolderById('1QVo_pxxx387WPH9Yx');
const newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
const blob = file1;
const resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
const file = Drive.Files.insert(resource, blob, {convert: true});
// Extract Text from PDF file
const doc = DocumentApp.openById(file.id);
const text = doc.getBody().getText();
Logger.log(text);
const buying = /USD\n(.*?)$/gm.exec(text)[1].trim();
const selling = /USD\n\s*\S*\n(.*?)$/gm.exec(text)[1].trim();
console.log(buying)
console.log(selling)
//Remove the converted file.
DriveApp.getFileById(file.id).setTrashed(true);
}

Related

Use google apps script to extract text from pdf file found at web address and and insert it into a Google Sheet

In the example below I left the folder and ss blank.
Idea is to retrieve the number after the text "Emerging Markets (" found in the file at the url specified in the code and then insert it into cell b2 in the google sheet specified.
Not getting any errors, but code is not working. Would appreciate your help. Novice here.
Thanks!
const FOLDER_ID = ""; //Folder ID of all PDFs
const SS = "";//The spreadsheet ID
const SHEET = "MSCI";//The sheet tab name
function OpenFile() {
var url = "https://www.yardeni.com/pub/mscipe.pdf";
var blob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});
// Extract Text from PDF file
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
return text;
const identifier = {
start: `Emerging Markets (`,
start_include: false,
end: `)`,
end_include: false
};
let results = getDocItems(docID, identifier);
return results;
}
function importToSpreadsheet(results){
const sheet = SpreadsheetApp.openById(SS).getSheetByName(SHEET);
var cell = sheet.getRange("B2");
cell.setValue(results);
}
I see two functions: OpenFile() and importToSpreadsheet(results), but I see no lines where the functions are called.
Just a guess. Perhaps you need to add at the end of your code this line:
importToSpreadsheet(OpenFile());
Update
The OpenFile() function gets you all the text. If you need only the part of the text between 'Emerging Markets (' and ')' you can cut it out this way:
var text = OpenFile(); // all text
var part = text.split('Emerging Markets (')[1].split(')')[0]; // a part between 'Emerging Markets (' and ')'
importToSpreadsheet(part); // put the part in the cell
The lines from const identifier = {... to ...return results; are redundant. Probably they were taken from another sample and don't belong this code.

Google Apps Script how to export specific sheet as csv format

In MIT Inventor II, I use web component to get SpreadsheetID and SheetID through doGet() of google apps script. After I get the information I use another web component to set url as below to get csv-formatted file from specific sheet. My question is how to make GAS to get SpreadsheetID & SheetID and then export csv file at one time, so that I don't have to use 2 web components in Inventor side?
GAS codes is as below. This is to "return" spreadsheetID and sheetID.
function doGet(e) {
filename = e.parameter.account;
fileList = DriveApp.getFilesByName(filename);
while (fileList.hasNext()) {
var fileID = fileList.next().getId()
}
var file = SpreadsheetApp.openById(fileID) ;
sheet = file.getSheetByName("Message").activate()
var messageID = sheet.getSheetId();
return ContentService.createTextOutput([fileID,messageID]);
After I got SpreadsheetID & SheetID, I have to set 2nd web component from Inventor side to get csv file, see below.
https://docs.google.com/spreadsheets/d/xxxxSpreadsheetIDxxxx/edit#gid=sheetID
Here is how you can create a csv file of a selected sheet in google drive:
function sheetToCsv()
{
var ssID = SpreadsheetApp.getActiveSpreadsheet().getId();
var sheet_Name = "Sheet1"
var requestData = {"method": "GET", "headers":{"Authorization":"Bearer "+ScriptApp.getOAuthToken()}};
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheet_Name)
var sheetNameId = sheet.getSheetId().toString();
params= ssID+"/export?gid="+sheetNameId +"&format=csv"
var url = "https://docs.google.com/spreadsheets/d/"+ params
var result = UrlFetchApp.fetch(url, requestData);
var resource = {
title: sheet_Name+".csv",
mimeType: "application/vnd.csv"
}
var fileJson = Drive.Files.insert(resource,result)
}
The code creates a csv file that has the content of Sheet1.
In order to run the aforementioned function you need to activate the Advanced Drive Service.
Explanation:
Go to Resources => Advanced Google Services => activate Drive API
Another option is to create the csv file to a particular folder, then you need to replace the resource part of the code with this:
var folder_id ='id';
var resource = {
title: sheet_Name+".csv",
mimeType: "application/vnd.csv",
parents: [{ id: folder_id }]
}
My application was how to save a tab of a Google sheets spreadsheet to a CSV file in a shared drive. Doing it to the default "My Drive" was relatively easy based on Marios' answer in this post, but I struggled with this for a shared drive while until I came across ziganotschka's example which solved my problem.
Code for my simple App Script is:
function sheetToCsv()
{
var ssID = SpreadsheetApp.getActiveSpreadsheet().getId();
var sheet_Name = "[Your sheet name goes here]";
var requestData = {"method": "GET", "headers":{"Authorization":"Bearer "+ScriptApp.getOAuthToken()}};
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheet_Name);
var sheetNameId = sheet.getSheetId().toString();
params= ssID+"/export?gid="+sheetNameId +"&format=csv";
var url = "https://docs.google.com/spreadsheets/d/"+ params;
var result = UrlFetchApp.fetch(url, requestData);
var resource = {
title: sheet_Name+".csv",
mimeType: "MimeType.CSV",
parents:[{
"id": "[Your Shared Drive Folder ID goes here]"
}]
}
var optionalArgs={supportsAllDrives: true};
var fileJson = Drive.Files.insert(resource, result, optionalArgs);
}
I added a timestamp to the file name and a trigger to cause the script to execute daily via a timer.

Downloading a Google Slides presentation as PowerPoint doc using Google Apps Script?

The GUI of Google Slides offers to download a GSlides presentation as a Powerpoint (myFile.pptx). I could not find the equivalent in the Google Apps Script documentation - any pointer?
EDIT
Thanks to comments and answers, I tried this snippet:
function testFileOps() {
// Converts the file named 'Synthese' (which happens to be a Google Slide doc) into a pptx
var files = DriveApp.getFilesByName('Synthese');
var rootFolder = DriveApp.getRootFolder();
while (files.hasNext()) {
var file = files.next();
var blobPptx = file.getBlob().getAs('application/vnd.openxmlformats-officedocument.presentationml.presentation');
var result = rootFolder.createFile(blobPptx);
}
}
It returns an error:
Converting from application/vnd.google-apps.presentation to
application/vnd.openxmlformats-officedocument.presentationml.presentation
is not supported. (line 7, file "Code")
SECOND EDIT
As per another suggestion in comments, I tried to make an http call from Google App Script, that would directly convert the gslides into pptx, without size limit. It produces a file on G Drive, but this file is corrupted / unreadable. The GAS script:
function convertFileToPptx() {
// Converts a public Google Slide file into a pptx
var rootFolder = DriveApp.getRootFolder();
var response = UrlFetchApp.fetch('https://docs.google.com/presentation/d/1Zc4-yFoUYONXSLleV_IaFRlNk6flRKUuAw8M36VZe-4/export/pptx');
var blobPptx = response.getContent();
var result = rootFolder.createFile('test2.pptx',blobPptx,MimeType.MICROSOFT_POWERPOINT);
}
Notes:
I got the mime type for pptx here
using the mime type 'pptx' returns the same error message
How about this modification?
Modification point:
response.getContent() returns byte array. So please use response.getBlob().
Modified script:
function convertFileToPptx() {
var fileId = "1Zc4-yFoUYONXSLleV_IaFRlNk6flRKUuAw8M36VZe-4";
var outputFileName = "test2.pptx";
var url = 'https://docs.google.com/presentation/d/' + fileId + '/export/pptx';
var rootFolder = DriveApp.getRootFolder();
var response = UrlFetchApp.fetch(url);
var blobPptx = response.getBlob();
var result = rootFolder.createFile(blobPptx.setName(outputFileName));
}
Note:
If you want to convert Google Slides, which are not published, in your Google Drive, please use access token. At that time please modify url as follows.
var url = 'https://docs.google.com/presentation/d/' + fileId + '/export/pptx?access_token=' + ScriptApp.getOAuthToken();
DriveApp.createFile() creates a file on root folder as the default.
References:
Class HTTPResponse
getOAuthToken()
As mentioned by tehhowch, you could get the Google Slide file from your Drive and get it as a .pptx. (Not sure of mime type.)
File#getAs:
I add all modifications with token part and specific folder
function convertFileToPptx() {
var fileId = "Your File ID";
var outputFileName = "name.pptx";
var url = 'https://docs.google.com/presentation/d/' + fileId + '/export/pptx';
//var rootFolder = DriveApp.getRootFolder();
var rootFolder = DriveApp.getFolderById("Your Folder ID")
var params = {method:"GET", headers:{"authorization":"Bearer "+ ScriptApp.getOAuthToken()}};
var response = UrlFetchApp.fetch(url,params);
var blobPptx = response.getBlob();
var result = rootFolder.createFile(blobPptx.setName(outputFileName));
}
To get the byte[] do:
function downloadAsPPTX(){
var presentation = SlidesApp.getActivePresentation();
var fileId = presentation.getId();
var url = 'https://docs.google.com/presentation/d/' + fileId + '/export/pptx';
var response = UrlFetchApp.fetch(url);
var blobPptx = response.getBlob();
Logger.log("size: "+blobPptx.getBytes().length);
}

OCR images from google drive using Google App Script

I have implemented following script to do OCR on single and multiple images using image URL.
function doOCRALL() {
var selected = SpreadsheetApp.getActiveSheet().getActiveRange().getValues().length;
for (var i = 0; i < selected; i++) {
var activeCol = SpreadsheetApp.getActiveSheet().getActiveCell().getColumn();
var activeRow = SpreadsheetApp.getActiveSheet().getActiveCell().getRow();
var valueURL = SpreadsheetApp.getActiveSheet().getRange(activeRow + i, activeCol).getValue();
var image = UrlFetchApp.fetch(valueURL).getBlob();
var file = {
title: 'OCR File',
mimeType: 'image/png'
};
// OCR is supported for PDF and image formats
file = Drive.Files.insert(file, image, {ocr: true});
var doc = DocumentApp.openByUrl(file.embedLink);
var body = doc.getBody().getText();
//Get link Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow + i, activeCol + 2).setValue(file.embedLink);
//Get Content of Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow + i, activeCol + 1).setValue(body);
}
}
function doOCR() {
//
var activeCol = SpreadsheetApp.getActiveSheet().getActiveCell().getColumn();
var activeRow = SpreadsheetApp.getActiveSheet().getActiveCell().getRow();
var valueURL = SpreadsheetApp.getActiveSheet().getRange(activeRow, activeCol).getValue();
var image = UrlFetchApp.fetch(valueURL).getBlob();
var file = {
title: 'OCR File',
mimeType: 'image/png'
};
// OCR is supported for PDF and image formats
file = Drive.Files.insert(file, image, {ocr: true});
var doc = DocumentApp.openByUrl(file.embedLink);
var body = doc.getBody().getText();
// Print the Google Document URL in the console
Logger.log("body: %s", body);
Logger.log("File URL: %s", file.embedLink);
//Get link Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow, activeCol + 2).setValue(file.embedLink);
//Get Content of Doc that Generated
SpreadsheetApp.getActiveSheet().getRange(activeRow, activeCol + 1).setValue(body);
}
function onOpen() {
var ui = SpreadsheetApp.getUi();
// Or DocumentApp or FormApp.
ui.createMenu('OCR Tools')
.addItem('Extract Cell', 'doOCR')
.addItem('Extract All Cell', 'doOCRALL')
.addSeparator()
.addSubMenu(ui.createMenu('About US')
.addItem('Infomation', 'menuItem2'))
.addToUi();
}
function menuItem2() {
SpreadsheetApp.getUi() // Or DocumentApp or FormApp.
.alert('AIO Team');
}
When I provide an image URL for any image, it works. But if I upload the same image on my drive and then provide the image URL from drive, it only gives me "Sign in Main menu". For other drive images it gives the same text.
Thanks in advance.
If content is already in Drive, you do not need to get a link to it - just supply the file id (which you can get from a link to it).
Once you have the file ID, you can simply copy it, and use the optimal arguments to activate OCR. The full options list is, of course, available on the Drive REST API page: https://developers.google.com/drive/api/v2/reference/files/copy#parameters
I encourage you to also read about best practices such as fields specification (which is a requirement of the more recent drive API version).
This function takes an input Drive file ID that you got from somewhere, and a truth-y value to set the "use OCR" option.
Obvious assumptions are that you have permission, the id is valid, you have enabled the advanced service and the Drive API in cloud console, etc.
function getIdOfCopyOfDriveFile(fileId, useOcr) {
const options = {
fields: "choose the metadata fields to return in the response e.g. 'id,title,parents'"
};
const existingMetaData = Drive.Files.get(fileId, options);
options.ocr = !!useOcr;
existingMetaData.title += " (copied with" + (options.ocr ? " " : "out ") + "ocr)";
// We could do other modifications of fields we requested before
// copying, like changing the parents array to move the new file.
const newFileMetaData = Drive.Files.copy(existingMetaData, fileId, options);
return newFileMetaData.id;
}

Can I download file from URL link generated by google apps script

Please help I'm learning google-apps-script for short time.
I want to download file from remote site by url generating from data that stores in my spreadsheet.
for example, i have 2 paremeters:
Cell1 = val1, val2, ... valN
Cell2 = val21, val22, ... val2N
I split string from cell data to Arrays and than generate URL. for example: http://mysite.com/files/file.val1.val22.zip
Than i need to download file from this link...
Can I do this process automaticaly ?
This example function will retrieve your zip file, and place it into your Google Drive in folder "StackOverflow". You can also download a more complete version from this gist.
function getFile(fileURL) {
// see https://developers.google.com/apps-script/class_urlfetchapp
var response = UrlFetchApp.fetch(fileURL);
var fileBlob = response.getBlob()
var folder = DocsList.getFolder('StackOverflow');
var result = folder.createFile(fileBlob);
debugger; // Stop to observe if in debugger
}
For example:
getFile( "http://mysite.com/files/file.val1.val22.zip" );
Note that you cannot download per se, since you have no access to your PC's resources (e.g. file system) from apps-script. The file is still in "the cloud"... in this case, it's been copied from the web site it was on, into Google Drive. If you're running the Drive app, though, the file will now sync to your PC.
Yes, You can.
Hope below code can solve your problem:
//Declare function
function downloadFile() {
//Getting url,existing name and new name for image from the sheet in
//variable url, name and new_name respectively
var sh = SpreadsheetApp.getActiveSheet();
var row = sh.getLastRow();
Logger.log(row);
for (var i = 2; i <= row; i++) {
var url = sh.getRange(i, 10).getValue();
Logger.log(url);
var name = sh.getRange(i, 13).getValue();
var new_name = sh.getRange(i, 4).getValue();
//Creating authentication token for downloading image, it may not be //required if image can be downloaded without login into
var user = "***************";
var password = "************";
var headers = {
"Accept": "application/xml",
"Content-Type": "application/xml",
"Authorization": "Basic " + Utilities.base64Encode(user + ":" + password)
};
//defining method to download file
var options = {
"method": "get",
"headers": headers
};
//Getting folder name where to store downloaded image
var folders = DriveApp.getFoldersByName('test');
while (folders.hasNext()) {
var folder = folders.next();
Logger.log(folder.getName());
}
//Getting response on hit of url using downloading method defined //earlier storing in Blob
var response = UrlFetchApp.fetch(url, options).getBlob();
//Creating image in folder defined with response in blob and logging same file //in log to check, if required
var file = folder.createFile(response);
Logger.log(file);
//renaming image
var images = folder.getFiles();
while (images.hasNext()) {
var image = images.next();
file.setName(new_name);
Logger.log(image.getName());
}
}
}
//Hope you get it now
It's worked for me.
function downloadFile() {
var url = "https://raw.githubusercontent.com/hoat23/VisionArtificialAndImageProcessing/master/bin/utils_imgprocessing.py"
Logger.log(url);
var response = UrlFetchApp.fetch(url);
var text = response.getContentText()
var newFile = DriveApp.createFile('testfilegoogle.txt',text);
debugger; // Stop to observe if in debugger
}
//Hope you get it now