App script for pdf converter exceeded maximum execution time - google-apps-script

This is my app script code that converts google docs in a folder into pdf format. The script stops after converting around 60 documents with maximum execution time error. I am converting around hundreds of files in a run. What can I do to avoid this error?
//Module to convert doc to pdf
function gdocToPDF() {
var documentRootfolder = DriveApp.getFolderById("xx") // replace this with the ID of the folder that contains the documents you want to convert
var pdfFolder = DriveApp.getFolderById("xx"); // replace this with the ID of the folder that the PDFs should be put in.
var documentRootFiles = documentRootfolder.getFiles()
while(documentRootFiles.hasNext()) {
createPDF(documentRootFiles.next().getId(), pdfFolder.getId(), function (fileID, folderID) {
if (fileID) createPDFfile(fileID, folderID);
})
}
}
function createPDF(fileID, folderID, callback) {
var templateFile = DriveApp.getFileById(fileID);
var templateName = templateFile.getName();
var existingPDFs = DriveApp.getFolderById(folderID).getFiles();
//in case no files exist
if (!existingPDFs.hasNext()) {
return callback(fileID, folderID);
}
for (; existingPDFs.hasNext();) {
var existingPDFfile = existingPDFs.next();
var existingPDFfileName = existingPDFfile.getName();
if (existingPDFfileName == templateName + ".pdf") {
Logger.log("PDF exists already. No PDF created")
return callback();
}
if (!existingPDFs.hasNext()) {
Logger.log("PDF is created")
return callback(fileID, folderID)
}
}
}
function createPDFfile(fileID, folderID) {
var templateFile = DriveApp.getFileById(fileID);
var folder = DriveApp.getFolderById(folderID);
var theBlob = templateFile.getBlob().getAs('application/pdf');
var newPDFFile = folder.createFile(theBlob);
var fileName = templateFile.getName().replace(".", ""); //otherwise filename will be shortened after full stop
newPDFFile.setName(fileName + ".pdf");
}

I generate a lot of files. What I do to avoid this error is open a dialog and run a function that creates one file at a time. I write the number of remaining files in a cell in the sheet, update it with every run. I use the success and failure handlers to decide whether to continue with file generation or close the dialog. You can generate files for hours in this way.

Related

Import new (latest) .csv file into existing Google spreadsheet automatically

I receive a new CSV file every hour in my Google Drive.
I need my spreadsheet updated with the data in the latest CSV file after it has been received in the Google Drive folder.
The files coming into the folder has a unique name for each new one according to date and time.
For example: FileName_date_24hourtime.csv
FileName_20190524_1800.csv then FileName_20190524_1900.csv etc.
Firstly I'm not sure what the best approach is:
simply with a formula (probably not possible with not knowing the exact filename?) like =IMPORTDATA
a google script to find latest .csv file and automatically import as soon as file was added to Google Drive folder
Any assistance will be great!
The .csv file:
.csv file contains 28 rows and data should be split by ;
.csv file looks like this:
NAME;-63.06;-58.08;50.62;-66.67;-80.00
NAME;-61.82;-56.83;-50.55;-77.78;-70.00
NAME;-57.77;-50.21;52.88;-77.78;-70.00
NAME1;-57.69;-61.48;-55.59;-55.56;-60.00
NAME2;-61.62;-53.79;50.34;-66.67;-70.00
NAME3;-54.62;-54.57;-52.22;55.56;-60.00
... with total of 28 rows
Data should go to "Import_Stats" sheet.
The best approach here would be a script with a trigger that runs a function that performs data import to a spreadsheet.
Create a time-based trigger with 1-hour offset:
function trigger() {
var trg = ScriptApp.newTrigger('checkFiles');
trg.timeBased().everyHours(1).create();
}
Create function that checks files in a folder (e.g. "checkFiles").
function checkFiles(alreadyWaited) {
//get spreadsheet and sheet;
var id = 'yourSpreadsheetId';
var ss = SpreadsheetApp.openById(id);
var sh = ss.getSheetByName('Import_Stats');
var folderId = 'yourIdHere'; //folder by id is the simplest way;
//get folder and files in it;
var folder = DriveApp.getFolderById(folderId);
var files = folder.getFilesByType('text/csv');
var filesImport = folder.getFilesByType('text/csv'); //fetch files again;
//try to fetch number of files;
var scriptProps = PropertiesService.getScriptProperties();
var numFiles = scriptProps.getProperty('numFiles');
//count current number of files;
var numCurr = 0;
while(files.hasNext()) {
var f = files.next();
numCurr++;
}
//if this is the first time, save current number;
if(numFiles===null) {
scriptProps.setProperty('numFiles',numCurr);
}else {
numFiles = parseInt(numFiles);
}
if(numFiles===null||numFiles===(numCurr-1)) {
//get today and reset everything except hours;
var today = new Date();
today.setMinutes(0);
today.setSeconds(0);
today.setMilliseconds(0);
//iterate over files;
while(files.hasNext()) {
var file = files.next();
//get file creation date and reset;
var created = file.getDateCreated();
created.setMinutes(0);
created.setSeconds(0);
created.setMilliseconds(0);
//calculate offset, equals 0 for each file created this hour;
var offset = today.valueOf()-created.valueOf();
if(offset===0) {
//perform data import here;
var data = file.getBlob().getDataAsString();
//ignore empty files;
if(data!=='') {
//split data in rows;
var arr = data.split('\r\n');
//resplit array if only one row;
if(arr.length===1) {
arr = data.split('\n');
}
//append rows with data to sheet;
arr.forEach(function(el){
el = el.split(';');
sh.appendRow(el);
});
}
}
}
}else {
//if never waited, set minute to wait, else add minute;
if(!alreadyWaited) {
alreadyWaited = 60000;
}else {
alreadyWaited += alreadyWaited;
}
//if waited for 10 minutes -> end recursion;
if(alreadyWaited===600000) {
Logger.log('Waited 10 minutes but recieved no files!');
return;
}
//wait a minute and recheck;
Utilities.sleep(60000);
return checkFiles(alreadyWaited);
}
}
And this is what should happen:

How to extract files from .tar archive with Google Apps Script

Good day all,
I'm trying to get a tar.gz attachment from Gmail, extract the file and save it to Google Drive. It's a daily auto generated report which I'm getting, compressed due to >25mb raw size.
I got this so far:
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Setup");
var gmailLabels = sheet.getRange("B2:B2").getValue(); //I have my Gmail Label stored here
var driveFolder = sheet.getRange("B5:B5").getValue(); //I have my GDrive folder name stored here
// apply label filter, search only last 24hrs mail
var filter = "has:attachment label:" + gmailLabels + " after:" + Utilities.formatDate(new Date(new Date().getTime()-1*(24*60*60*1000)), "GMT", "yyyy/MM/dd");
var threads = GmailApp.search(filter, 0, 1); // check only 1 email at a time
var folder = DriveApp.getFoldersByName(driveFolder);
if (folder.hasNext()) {
folder = folder.next();
} else {
folder = DriveApp.createFolder(driveFolder);
}
var message = threads[0].getMessages()[0];
var desc = message.getSubject() + " #" + message.getId();
var att = message.getAttachments();
for (var z=0; z<att.length; z++) {
var attName = att[z].getName()
var attExt = attName.search('csv')
if (attExt > 0){ var fileType = "csv"; }
else {
var attExt = attName.search('tar.gz');
if (attExt > 0){ var fileType = "gzip"; }
else {
threads[x].addLabel(skipLabel);
continue;
}
}
// save the file to GDrive
try {
file = folder.createFile(att[z]);
file.setDescription(desc);
}
catch (e) {
Logger.log(e.toString());
}
// extract if gzip
if (fileType == 'gzip' ){
var ungzippedFile = Utilities.ungzip(file);
try {
gz_file = folder.createFile(ungzippedFile);
gz_file.setDescription(desc);
}
catch (e) {
Logger.log(e.toString());
}
}
}
Everything works fine, but in the last step it only decompresses the .gz file saving .tar file in the Drive. What can I do with it next? The .tar file contains a .csv file which I need to extract and process afterwards.
I should probably add that I'm limited to use GAS only.
Any help warmly appreciated.
How about this answer? Unfortunately, in the current stage, there are no methods for extracting files from a tar file in Google Apps Script, yet. But fortunately, from wiki of tar, we can retrieve the structure of the tar data. I implemented this method with Google Apps Script using this structure data.
1. Unarchive of tar data:
Before you run this script, please set the file ID of tar file to run(). Then, run run().
Sample script:
function tarUnarchiver(blob) {
var mimeType = blob.getContentType();
if (!mimeType || !~mimeType.indexOf("application/x-tar")) {
throw new Error("Inputted blob is not mimeType of tar. mimeType of inputted blob is " + mimeType);
}
var baseChunkSize = 512;
var byte = blob.getBytes();
var res = [];
do {
var headers = [];
do {
var chunk = byte.splice(0, baseChunkSize);
var headerStruct = {
filePath: function(b) {
var r = [];
for (var i = b.length - 1; i >= 0; i--) {
if (b[i] != 0) {
r = b.slice(0, i + 1);
break;
}
}
return r;
}(chunk.slice(0, 100)),
fileSize: chunk.slice(124, 124 + 11),
fileType: Utilities.newBlob(chunk.slice(156, 156 + 1)).getDataAsString(),
};
Object.keys(headerStruct).forEach(function(e) {
var t = Utilities.newBlob(headerStruct[e]).getDataAsString();
if (e == "fileSize") t = parseInt(t, 8);
headerStruct[e] = t;
});
headers.push(headerStruct);
} while (headerStruct.fileType == "5");
var lastHeader = headers[headers.length - 1];
var filePath = lastHeader.filePath.split("/");
var blob = Utilities.newBlob(byte.splice(0, lastHeader.fileSize)).setName(filePath[filePath.length - 1]).setContentTypeFromExtension();
byte.splice(0, Math.ceil(lastHeader.fileSize / baseChunkSize) * baseChunkSize - lastHeader.fileSize);
res.push({fileInf: lastHeader, file: blob});
} while (byte[0] != 0);
return res;
}
// Following function is a sample script for using tarUnarchiver().
// Please modify this to your situation.
function run() {
// When you want to extract the files from .tar.gz file, please use the following script.
var id = "### file ID of .tar.gz file ###";
var gz = DriveApp.getFileById(id).getBlob().setContentTypeFromExtension();
var blob = Utilities.ungzip(gz).setContentTypeFromExtension();
// When you want to extract the files from .tar file, please use the following script.
var id = "### file ID of .tar file ###";
var blob = DriveApp.getFileById(id).getBlob().setContentType("application/x-tar");
// Extract files from a tar data.
var res = tarUnarchiver(blob);
// If you want to create the extracted files to Google Drive, please use the following script.
res.forEach(function(e) {
DriveApp.createFile(e.file);
});
// You can see the file information by below script.
Logger.log(res);
}
2. Modification of your script:
If this script is used for your script, for example, how about this? tarUnarchiver() of above script is used. But I'm not sure how you want to use this script. So please think of this as a sample.
Sample script:
// extract if gzip
if (fileType == 'gzip' ){
var ungzippedFile = Utilities.ungzip(file);
try {
var blob = ungzippedFile.setContentType("application/x-tar"); // Added
tarUnarchiver(blob).forEach(function(e) {folder.createFile(e.file)}); // Added
}
catch (e) {
Logger.log(e.toString());
}
}
In this modified script, the blob of ungzippedFile (tar data) is put to my script and run tarUnarchiver(). Then, each file is created to the folder.
Note:
When you run this script, if an error related to mimeType occurs, please set the mimeType of "tar" to the input blob.
As the method for setting the mimeType, you can use as follows.
blob.setContentTypeFromExtension() Ref
blob.setContentType("application/x-tar") Ref
It might have already been got the mimeType in the blob. At that time, setContentTypeFromExtension() and setContentType() are not required.
If you want to retrieve the file path of each file, please check the response from tarUnarchiver(). You can see it as a property of fileInf from the response.
Limitations:
When this script is used, there is the limitations as follows. These limitations are due to Google's specification.
About the file size, when the size of tar data is over 50 MB (52,428,800 bytes), an error related to size limitation occurs.
When the size of extracted file is over 50 MB, an error occurs.
When a single file size of extracted file is near to 50 MB, there is a case that an error occurs.
In my environment, I could confirm that the size of 49 MB can be extracted. But in the case of just 50 MB
, an error occurred.
Reference:
tar (wiki)
In my environment, I could confirm that the script worked. But if this script didn't work, I apologize. At that time, can you provide a sample tar file? I would like to check it and modify the script.

Checking a folder for a filename, and exporting as PDF if it is not found

I am quite new to scripting and has attempted this for embarrassingly many hours now, so I hope you can help me.
I have a dashfolder that contains Google Sheets called "X", and I have a pdffolder containing pdfs that are called "X.pdf". I am trying to loop through the names of my dashfiles + ".pdf" to find those which are missing, and finally create its pdf in that same folder. My script, however, loops too many times. I want it to skip the dashfile if a file with the name+".pdf" are already in the pdffolder. Here is my code
function createPdf() {
var pdfFolder = DriveApp.getFolderById("ID")
var pdfFiles = pdfFolder.getFiles();
var dashFolder = DriveApp.getFolderById('ID');
var dashFiles = dashFolder.getFiles();
var pdfNames = [];
var dashNames = [];
while (pdfFiles.hasNext()) {
var currentFile2 = pdfFiles.next();
var fileName2 = currentFile2.getName();
pdfNames.push(fileName2);
}
while (dashFiles.hasNext()) {
var currentFile = dashFiles.next();
var fileName = currentFile.getName();
dashNames.push(fileName);
for (p in pdfNames) {
if((fileName + ".pdf") == pdfNames[p]) {
Logger.log("YES");
}
else {
var xlsBlob = currentFile.getBlob(); // Blob source of Excel file for conversion
var xlsFilename = currentFile.getName(); // File name to give to converted file; defaults to same as source file
pdfFolder.createFile(currentFile.getAs(MimeType.PDF));
Logger.log("pdf Created");
}
}
}
}
My real problem stems from the fact that I will have 100+ sheets that needs to be converted to pdf's and that will exceed the 6 minutes limit. So I am trying to build a script that can trigger itself and continue where it left off, skipping sheets that are already in the pdffolder and creating those that are missing.
I might be way over my head here, so I hope someone can give me some hints :-)
You wrong use loops. Currently you create more pdfs of same sheet, because when you traversating over dashFiles, you create PDF one more pdf for each exists PDF.
Change part of code like this:
var pdfNames = {}; //Object instead of array
while (pdfFiles.hasNext()) {
var currentFile2 = pdfFiles.next();
var fileName2 = currentFile2.getName();
pdfNames[fileName2] = true; // use PDF name as key for faster searching
}
while (dashFiles.hasNext()) {
var currentFile = dashFiles.next();
var fileName = currentFile.getName();
if(pdfNames[fileName + ".pdf"]) { // is exists pdf?
Logger.log("YES");
}
else {
var xlsBlob = currentFile.getBlob(); // Blob source of Excel file for conversion
var xlsFilename = currentFile.getName(); // File name to give to converted file; defaults to same as source file
pdfFolder.createFile(currentFile.getAs(MimeType.PDF));
Logger.log("pdf Created");
}
}
You can create an object of file names with a value of true, and then check for the existence of the file name in the object. If the file name exists, then continue to loop.
var pdfNames = {};//Create an object - not an array
pdfNames[fileName2] = true;//Put the file name into the object
if (pdfNames[fileName]) {//Test for file name in the object
Code:
function createPdf() {
var currentFile,fileName,xlsBlob,xlsFilename;
var pdfFolder = DriveApp.getFolderById("ID")
var pdfFiles = pdfFolder.getFiles();
var dashFolder = DriveApp.getFolderById('ID');
var dashFiles = dashFolder.getFiles();
var pdfNames = {};
var dashNames = [];
while (pdfFiles.hasNext()) {
var currentFile2 = pdfFiles.next();
var fileName2 = currentFile2.getName();
pdfNames[fileName2] = true;//Put the file name into the object
}
while (dashFiles.hasNext()) {
currentFile = dashFiles.next();
fileName = currentFile.getName();
dashNames.push(fileName);
if (pdfNames[fileName]) {//The file name was found in the object of pdf files
Logger.log("YES");
continue;
}
xlsBlob = currentFile.getBlob(); // Blob source of Excel file for conversion
xlsFilename = currentFile.getName(); // File name to give to converted file; defaults to same as source file
pdfFolder.createFile(currentFile.getAs(MimeType.PDF));
Logger.log("pdf Created");
}
}
Original Answer:
Test for the existence of the file name in the pdf array in a different way.
pdfNames.indexOf(fileName + ".pdf") !== -1
If a value is not found in an array, then indexOf() returns minus one.
So, if the return value is not minus one, then a file name was found. If a file name was found, you don't want a new file created, so continue.
function createPdf() {
var currentFile,fileName,xlsBlob,xlsFilename;
var pdfFolder = DriveApp.getFolderById("ID")
var pdfFiles = pdfFolder.getFiles();
var dashFolder = DriveApp.getFolderById('ID');
var dashFiles = dashFolder.getFiles();
var pdfNames = [];
var dashNames = [];
while (pdfFiles.hasNext()) {
var currentFile2 = pdfFiles.next();
var fileName2 = currentFile2.getName();
pdfNames.push(fileName2);
}
while (dashFiles.hasNext()) {
currentFile = dashFiles.next();
fileName = currentFile.getName();
dashNames.push(fileName);
if (pdfNames.indexOf(fileName + ".pdf") !== -1) {//The file name was found in the array of pdf files
Logger.log("YES");
continue;
}
xlsBlob = currentFile.getBlob(); // Blob source of Excel file for conversion
xlsFilename = currentFile.getName(); // File name to give to converted file; defaults to same as source file
pdfFolder.createFile(currentFile.getAs(MimeType.PDF));
Logger.log("pdf Created");
}
}

Skip processing .xls to Google Sheets script if the file already exists in Google Drive

I am currently using this code to automatically convert all uploaded .xls files in Google Drive to Google Sheets.
function importXLS(){
var files = DriveApp.searchFiles('title contains ".xls"');
while(files.hasNext()){
var xFile = files.next();
var name = xFile.getName();
if (name.indexOf('.xls')>-1){
var ID = xFile.getId();
var xBlob = xFile.getBlob();
var newFile = { title : name,
key : ID,
'parents':[{"id":"12FcKokB-ppW7rSBtAIG96uoBOJtTlNDT"}]
}
file = Drive.Files.insert(newFile, xBlob, {
convert: true
});
}
}
}
It works perfectly, but fails if there is already a file in the output folder with the same name. Even though I never technically get to see this error below (since it runs on a schedule and not fired manually like in the screenshot), I would prefer to simply skip the conversion process if the file already exists.
If possible, I would also like to avoid overwriting it each time, as I feel that would be a waste of processing time. How would I edit this code to say that if the file name already exists in that folder, skip the entire code completely?
Thanks!
Two things you can try:
Get the files names that are already in the destination folder and check if the file exists before you try copying.
Wrap the section of your code that does the copying in a try..catch statement.
Both of these should work independently, but using the try..catch statement will catch all errors, so it would be best to combine them. (You can review the error logs in the Developer Console.) Doing this you'll be able to skip files that have the same name as those already in your destination folder and any other error that might come up will not terminate your script from completing.
function importXLS(){
var files = DriveApp.searchFiles('title contains ".xls"');
var destinationFolderId = "12FcKokB-ppW7rSBtAIG96uoBOJtTlNDT";
var existingFileNames = getFilesInFolder(destinationFolderId);
while(files.hasNext()){
var xFile = files.next();
var name = xFile.getName();
try {
if (!existingFileNames[name] && (name.indexOf('.xls')>-1)) {
var ID = xFile.getId();
var xBlob = xFile.getBlob();
var newFile = { title : name,
key : ID,
'parents':[{"id": destinationFolderId}]
}
file = Drive.Files.insert(newFile, xBlob, {
convert: true
});
}
} catch (error) {
console.error("Error with file " + name + ": " + error);
}
}
}
/**
* Get an object of all file names in the specified folder.
* #param {string} folderId
* #returns {Object} files - {filename: true}
*/
function getFilesInFolder(folderId) {
var folder = DriveApp.getFolderById(folderId);
var filesIterator = folder.getFiles();
var files = {};
while (filesIterator.hasNext()) {
var file = filesIterator.next();
files[file.getName()] = true;
}
return files;
}

How can I transform a link to a file name?

I have a Google spreadsheet file with almost 2000 rows of URL such as this one:
https://docs.google.com/uc?id=0B4ptELk-D3USblk1aWd1OWowRWs
Each URL contains an image, stored in Google Drive. My question is: How can I transform this link ( https://docs.google.com/uc?id=0B4ptELk-D3USblk1aWd1OWowRWs) to a file name? For instance, I would like to know if it could be a /something/folder/Photofile1.jpg.
Is there anyway of doing this?
Try script:
function TESTgetFileName() {
Logger.log(getFileName('0B4ptELk-D3USVi1NZ3ZGbkhqYXc'));
// ^^^^^^^^^^ file ID ^^^^^^^^^
}
function getFileName(id) {
var file = DriveApp.getFileById(id);
var fileName = file.getName();
var strFolders = getFolders(file);
return strFolders + '/' + fileName;
}
function getFolders(object) {
var folders = object.getParents();
if (!folders.hasNext()) { return 'My Drive'; }
var folder = folders.next();
var folderNames = [];
while (folder.getParents().hasNext()) {
var folderName = folder.getName();
folderNames.unshift(folderName);
folder = folder.getParents().next();
}
return folderNames.join('/');
}
Script function will return the result:
0B4ptELk-D3USVi1NZ3ZGbkhqYXc → My Drive/something/folder/Photofile1.jpg
Tests
I opened script editor, selected function TESTgetFileName and get the result
My Drive/Detalhes_Farmacia.Fotografia_Fachada_1.JPEG.
This function can be run from script, not directly from spreadsheets. When you try using it as custom formula from sheet, it throws error:
You do not have permission to call getFileById (line 8).
So better use it from script to get data and then write the result to the sheet.