How to use ContinuationToken with recursive folder iterator - google-apps-script

Because of Drive API Quotas, Services Quotas and limit of script execution time 6 min it's often critical to split Google Drive files manipulations on chunks.
We can use PropertiesService to store continuationToken for FolderIterator or FileIterator.
This way we can stop our script and on next run continue from the place we stop.
Working example (linear iterator)
// Logs the name of every file in the User's Drive
// this is useful as the script may take more that 5 minutes (max execution time)
var userProperties = PropertiesService.getUserProperties();
var continuationToken = userProperties.getProperty('CONTINUATION_TOKEN');
var start = new Date();
var end = new Date();
var maxTime = 1000*60*4.5; // Max safe time, 4.5 mins
if (continuationToken == null) {
// firt time execution, get all files from Drive
var files = DriveApp.getFiles();
} else {
// not the first time, pick up where we left off
var files = DriveApp.continueFileIterator(continuationToken);
}
while (files.hasNext() && end.getTime() - start.getTime() <= maxTime) {
var file = files.next();
Logger.log(file.getName());
end = new Date();
}
// Save your place by setting the token in your user properties
if(files.hasNext()){
var continuationToken = files.getContinuationToken();
userProperties.setProperty('CONTINUATION_TOKEN', continuationToken);
} else {
// Delete the token
PropertiesService.getUserProperties().deleteProperty('CONTINUATION_TOKEN');
}
Problem (recursive iterator)
For retrieve tree-like structure of folder and get it's files we have to use recursive function. Somethiong like this:
doFolders(DriveApp.getFolderById('root folder id'));
// recursive iteration
function doFolders(parentFolder) {
var childFolders = parentFolder.getFolders();
while(childFolders.hasNext()) {
var child = childFolders.next();
// do something with folder
// go subfolders
doFolders(child);
}
}
However, in this case I have no idea how to use continuationToken.
Question
How to use ContinuationToken with recursive folder iterator, when we need to go throw all folder structure?
Assumption
Is it make sense to construct many tokens with name based on the id of each parent folder?

If you're trying to recursively iterate on a folder and want to use continuation tokens (as is probably required for large folders), you'll need a data structure that can store multiple sets of continuation tokens. Both for files and folders, but also for each folder in the current hierarchy.
The simplest data structure would be an array of objects.
Here is a solution that gives you the template for creating a function that can recursively process files and store continuation tokens so it can resume if it times out.
Simply modify MAX_RUNNING_TIME_MS to your desired value (now it's set to 1 minute).
You don't want to set it more than ~4.9 minutes as the script could timeout before then and not store its current state.
Update the processFile method to do whatever you want on files.
Finally, call processRootFolder() and pass it a Folder. It'll be smart enough to know how to resume processing the folder.
Sure there is room for improvement (e.g. it simply checks the folder name to see if it's a resume vs. a restart) but this will most likely be sufficient for 95% of people that need to iterate recursively on a folder with continuation tokens.
function processRootFolder(rootFolder) {
var MAX_RUNNING_TIME_MS = 1 * 60 * 1000;
var RECURSIVE_ITERATOR_KEY = "RECURSIVE_ITERATOR_KEY";
var startTime = (new Date()).getTime();
// [{folderName: String, fileIteratorContinuationToken: String?, folderIteratorContinuationToken: String}]
var recursiveIterator = JSON.parse(PropertiesService.getDocumentProperties().getProperty(RECURSIVE_ITERATOR_KEY));
if (recursiveIterator !== null) {
// verify that it's actually for the same folder
if (rootFolder.getName() !== recursiveIterator[0].folderName) {
console.warn("Looks like this is a new folder. Clearing out the old iterator.");
recursiveIterator = null;
} else {
console.info("Resuming session.");
}
}
if (recursiveIterator === null) {
console.info("Starting new session.");
recursiveIterator = [];
recursiveIterator.push(makeIterationFromFolder(rootFolder));
}
while (recursiveIterator.length > 0) {
recursiveIterator = nextIteration(recursiveIterator, startTime);
var currTime = (new Date()).getTime();
var elapsedTimeInMS = currTime - startTime;
var timeLimitExceeded = elapsedTimeInMS >= MAX_RUNNING_TIME_MS;
if (timeLimitExceeded) {
PropertiesService.getDocumentProperties().setProperty(RECURSIVE_ITERATOR_KEY, JSON.stringify(recursiveIterator));
console.info("Stopping loop after '%d' milliseconds. Please continue running.", elapsedTimeInMS);
return;
}
}
console.info("Done running");
PropertiesService.getDocumentProperties().deleteProperty(RECURSIVE_ITERATOR_KEY);
}
// process the next file or folder
function nextIteration(recursiveIterator) {
var currentIteration = recursiveIterator[recursiveIterator.length-1];
if (currentIteration.fileIteratorContinuationToken !== null) {
var fileIterator = DriveApp.continueFileIterator(currentIteration.fileIteratorContinuationToken);
if (fileIterator.hasNext()) {
// process the next file
var path = recursiveIterator.map(function(iteration) { return iteration.folderName; }).join("/");
processFile(fileIterator.next(), path);
currentIteration.fileIteratorContinuationToken = fileIterator.getContinuationToken();
recursiveIterator[recursiveIterator.length-1] = currentIteration;
return recursiveIterator;
} else {
// done processing files
currentIteration.fileIteratorContinuationToken = null;
recursiveIterator[recursiveIterator.length-1] = currentIteration;
return recursiveIterator;
}
}
if (currentIteration.folderIteratorContinuationToken !== null) {
var folderIterator = DriveApp.continueFolderIterator(currentIteration.folderIteratorContinuationToken);
if (folderIterator.hasNext()) {
// process the next folder
var folder = folderIterator.next();
recursiveIterator[recursiveIterator.length-1].folderIteratorContinuationToken = folderIterator.getContinuationToken();
recursiveIterator.push(makeIterationFromFolder(folder));
return recursiveIterator;
} else {
// done processing subfolders
recursiveIterator.pop();
return recursiveIterator;
}
}
throw "should never get here";
}
function makeIterationFromFolder(folder) {
return {
folderName: folder.getName(),
fileIteratorContinuationToken: folder.getFiles().getContinuationToken(),
folderIteratorContinuationToken: folder.getFolders().getContinuationToken()
};
}
function processFile(file, path) {
console.log(path + "/" + file.getName());
}

Related

Resumes a file iteration using a continuation token from a previous iterator [duplicate]

I've written a script to iterate through a large number of files in a Google Drive folder. Due to the processing I am doing on those files it exceeds the maximum execution time. Naturally I wrote into the script to use DriveApp.continueFileIterator(continuationToken): the token gets stored in the Project Properties and when the script runs it checks to see if there's a token, if there is it creates the FileIterator from the token if not it starts afresh.
What have I found is even though the script restarts with the continuation token it still starts from the beginning of the iteration, trying to process the same files again which wastes time for the subsequent executions. Have I missed something vital as in a command or method to make it start from where it left off? Am I supposed to update the continuation token at various stages thoughout the while(contents.hasNext()) loop?
Here's the sample code slimmed down to give you an idea:
function listFilesInFolder() {
var id= '0fOlDeRiDg';
var scriptProperties = PropertiesService.getScriptProperties();
var continuationToken = scriptProperties.getProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
var lastExecution = scriptProperties.getProperty('LAST_EXECUTION');
if (continuationToken == null) {
// first time execution, get all files from drive folder
var folder = DriveApp.getFolderById(id);
var contents = folder.getFiles();
// get the token and store it in a project property
var continuationToken = contents.getContinuationToken();
scriptProperties.setProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN', continuationToken);
} else {
// we continue to import from where we left
var contents = DriveApp.continueFileIterator(continuationToken);
}
var file;
var fileID;
var name;
var dateCreated;
while(contents.hasNext()) {
file = contents.next();
fileID = file.getId();
name = file.getName();
dateCreated = file.getDateCreated();
if(dateCreated > lastExecution) {
processFiles(fileID);
}
}
// Finished processing files so delete continuation token
scriptProperties.deleteProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
var currentExecution = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd HH:mm:ss");
scriptProperties.setProperty('LAST_EXECUTION',currentExecution);
};
Like Jonathon said, you're comparing dates wrongly. But that's not the main issue with your script nor what you asked.
The main concept you're getting wrong is that the continuation token can't be saved before you do your loop. When you get the token, it saves where you were at that moment, if you continue iterating afterwards, that's not saved and you will repeat those steps later, just like you're experiencing.
To get the token later you cannot let your script terminate with an error. You have to measure how many files you can process under 5 minutes and stop your script manually before that, so you can have a chance at saving the token.
Here's the correct way of doing it:
function listFilesInFolder() {
var MAX_FILES = 20; //use a safe value, don't be greedy
var id = 'folder-id';
var scriptProperties = PropertiesService.getScriptProperties();
var lastExecution = scriptProperties.getProperty('LAST_EXECUTION');
if( lastExecution === null )
lastExecution = '';
var continuationToken = scriptProperties.getProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
var iterator = continuationToken == null ?
DriveApp.getFolderById(id).getFiles() : DriveApp.continueFileIterator(continuationToken);
try {
for( var i = 0; i < MAX_FILES && iterator.hasNext(); ++i ) {
var file = iterator.next();
var dateCreated = formatDate(file.getDateCreated());
if(dateCreated > lastExecution)
processFile(file);
}
} catch(err) {
Logger.log(err);
}
if( iterator.hasNext() ) {
scriptProperties.setProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN', iterator.getContinuationToken());
} else { // Finished processing files so delete continuation token
scriptProperties.deleteProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
scriptProperties.setProperty('LAST_EXECUTION', formatDate(new Date()));
}
}
function formatDate(date) { return Utilities.formatDate(date, "GMT", "yyyy-MM-dd HH:mm:ss"); }
function processFile(file) {
var id = file.getId();
var name = file.getName();
//your processing...
Logger.log(name);
}
Anyway, it may be possible that a file gets created between your runs and you do not get it on your continued-iteration. Then, by saving the execution time after your the last run, you may miss it on your next run too. I do not know your use-case, if it's acceptable to eventually reprocess some files or to miss some. If you can't have either situations at all, then the only solution I see is to save the ids of all files you have already processed. You may need to store those on a drive file, because PropertiesService may be too small for too many ids.
your date comparison won't work in the way you have it.
var currentExecution = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd HH:mm:ss");
will store "2014-04-18 08:32:01" whereas the file date file.getDateCreated() will return a Date object comparing these using either < or > will always return false.
so I'd suggest that you store the time as a timestamp (because you cant store Date objects) and then compare that to the timestamp of the file created date.
// stored time stamp
var lastExecution = scriptProperties.getProperty('LAST_EXECUTION');
…
dateCreated = file.getDateCreated().getTime();
…
var currentExecution = new Date().getTime();
scriptProperties.setProperty('LAST_EXECUTION',currentExecution);
that comparison will work as you expect it.

App script for pdf converter exceeded maximum execution time

This is my app script code that converts google docs in a folder into pdf format. The script stops after converting around 60 documents with maximum execution time error. I am converting around hundreds of files in a run. What can I do to avoid this error?
//Module to convert doc to pdf
function gdocToPDF() {
var documentRootfolder = DriveApp.getFolderById("xx") // replace this with the ID of the folder that contains the documents you want to convert
var pdfFolder = DriveApp.getFolderById("xx"); // replace this with the ID of the folder that the PDFs should be put in.
var documentRootFiles = documentRootfolder.getFiles()
while(documentRootFiles.hasNext()) {
createPDF(documentRootFiles.next().getId(), pdfFolder.getId(), function (fileID, folderID) {
if (fileID) createPDFfile(fileID, folderID);
})
}
}
function createPDF(fileID, folderID, callback) {
var templateFile = DriveApp.getFileById(fileID);
var templateName = templateFile.getName();
var existingPDFs = DriveApp.getFolderById(folderID).getFiles();
//in case no files exist
if (!existingPDFs.hasNext()) {
return callback(fileID, folderID);
}
for (; existingPDFs.hasNext();) {
var existingPDFfile = existingPDFs.next();
var existingPDFfileName = existingPDFfile.getName();
if (existingPDFfileName == templateName + ".pdf") {
Logger.log("PDF exists already. No PDF created")
return callback();
}
if (!existingPDFs.hasNext()) {
Logger.log("PDF is created")
return callback(fileID, folderID)
}
}
}
function createPDFfile(fileID, folderID) {
var templateFile = DriveApp.getFileById(fileID);
var folder = DriveApp.getFolderById(folderID);
var theBlob = templateFile.getBlob().getAs('application/pdf');
var newPDFFile = folder.createFile(theBlob);
var fileName = templateFile.getName().replace(".", ""); //otherwise filename will be shortened after full stop
newPDFFile.setName(fileName + ".pdf");
}
I generate a lot of files. What I do to avoid this error is open a dialog and run a function that creates one file at a time. I write the number of remaining files in a cell in the sheet, update it with every run. I use the success and failure handlers to decide whether to continue with file generation or close the dialog. You can generate files for hours in this way.

Combine Google Docs documents

Is it possible to merge 100 Google Docs documents into one?
I've tried copy-pasting, but it seems too long and it's not possible to copy comments.
This can be done with Google Apps Script. See this example. The most relevant parts (example assumes nothing but Google Docs in the folder):
function combine() {
var folder = DriveApp.getRootFolder();
if (folder == null) { Logger.log("Failed to get root folder"); return; }
var combinedTitle = "Combined Document Example";
var combo = DocumentApp.create(combinedTitle);
var comboBody = combo.getBody();
var hdr = combo.addHeader();
hdr.setText(combinedTitle)
var list = folder.getFiles();
while (list.hasNext()) {
var doc = list.next();
var src = DocumentApp.openById(doc.getId());
var srcBody = src.getBody();
var elems = srcBody.getNumChildren();
for (var i = 0; i < elems; i++ ) {
elem = srcBody.getChild(i).copy();
// fire the right method based on elem's type
switch (elem.getType()) {
case DocumentApp.ElementType.PARAGRAPH:
comboBody.appendParagraph(elem);
break;
case // something
}
}
}
}
Note that you don't copy the source document's contents in one lump; you have to loop through them as individual elements and fire the correct append* method to add them to the merged/destination file.
I expanded on #noltie's answer to support merging docs in a folder structure recursively, starting from an arbitrary folder (not necessarily the root folder of google docs) and guard agains script failures on too many unsaved changes.
function getDocsRec(rootFolder) {
var docs = [];
function iter(folder) {
var childFolders = folder.getFolders();
while (childFolders.hasNext()) {
iter(childFolders.next());
}
var childFiles = folder.getFiles();
while (childFiles.hasNext()) {
var item = childFiles.next();
var docName = item.getName();
var docId = item.getId();
var doc = {name: docName, id: docId};
docs.push(doc);
}
}
iter(rootFolder);
return docs;
}
function combineDocs() {
// This function assumes only Google Docs files are in the root folder
// Get the id from the URL of the folder.
var folder = DriveApp.getFolderById("<root folder id>");
if (folder == null) { Logger.log("Failed to get root folder"); return; }
var combinedTitle = "Combined Document Example";
var combo = DocumentApp.create(combinedTitle);
var comboBody = combo.getBody();
// merely get the files recursively, does not get them in alphabetical order.
var docArr = getDocsRec(folder);
// Log all the docs we got back. Click "Edit -> Logs" to see.
docArr.forEach(function(item) {
Logger.log(item.name)
});
// this sort will fail if you have files with identical names
// docArr.sort(function(a, b) { return a.name < b.name ? -1 : 1; });
// Now load the docs into the combo doc.
// We can't load a doc in one big lump though;
// we have to do it by looping through its elements and copying them
for (var j = 0; j < docArr.length; j++) {
// There is a limit somewhere between 50-100 unsaved changed where the script
// wont continue until a batch is commited.
if (j % 50 == 0) {
combo.saveAndClose();
combo = DocumentApp.openById(combo.getId());
comboBody = combo.getBody();
}
var entryId = docArr[j].id;
var entry = DocumentApp.openById(entryId);
var entryBody = entry.getBody();
var elems = entryBody.getNumChildren();
for (var i = 0; i < elems; i++) {
var elem = entryBody.getChild(i).copy();
switch (elem.getType()) {
case DocumentApp.ElementType.HORIZONTAL_RULE:
comboBody.appendHorizontalRule();
break;
case DocumentApp.ElementType.INLINE_IMAGE:
comboBody.appendImage(elem);
break;
case DocumentApp.ElementType.LIST_ITEM:
comboBody.appendListItem(elem);
break;
case DocumentApp.ElementType.PAGE_BREAK:
comboBody.appendPageBreak(elem);
break;
case DocumentApp.ElementType.PARAGRAPH:
comboBody.appendParagraph(elem);
break;
case DocumentApp.ElementType.TABLE:
comboBody.appendTable(elem);
break;
default:
var style = {};
style[DocumentApp.Attribute.BOLD] = true;
comboBody.appendParagraph("Element type '" + elem.getType() + "' could not be merged.").setAttributes(style);
}
}
// page break at the end of each entry.
comboBody.appendPageBreak();
}
}
You can create and run a script with the above code on https://script.google.com/home
Both the above fail for me with the script returning a red lozenge:
Service unavailable: Docs Dismiss
(the documents in the folder are found, as are the document id's, and the combined doc is created, but empty)
Fixed that - had a document in the list that wasn't owned by me or was created by conversion. Removed that and away we go.
Google Docs does not support any type of merge, yet.
You can select all 100 docs, download them and try to merge them offline.
Download all the files as Docx, then use Microsoft Word or Open Office to merge the documents using the "master document" feature. (Word also refers to this as "Outline.")

Iterators built with different continuation tokens are producing the same results in Google Apps

I am programming a Google Apps script within a spreadsheet. My use case includes iterating over a large set of folders that are children of a given one. The problem is that the processing takes longer than the maximum that Google allows (6 minutes), so I had to program my script to be able to resume later. I am creating a trigger to resume the task, but that is not part of my problem (at least, not the more important one at this moment).
My code looks like this (reduced to the minimum to illustrate my problem):
function launchProcess() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty(SOURCE_PARENT_FOLDER_KEY, SOURCE_PARENT_FOLDER_ID);
scriptProperties.deleteProperty(CONTINUATION_TOKEN_KEY);
continueProcess();
}
function continueProcess() {
try {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty(SOURCE_PARENT_FOLDER_KEY);
var continuationToken = scriptProperties.getProperty(CONTINUATION_TOKEN_KEY);
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFolders() : DriveApp.continueFolderIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
while (iterator.hasNext() && !timeLimitIsNear) {
var folder = iterator.next();
processFolder_(folder);
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
}
if (!iterator.hasNext()) {
scriptProperties.deleteProperty(CONTINUATION_TOKEN_KEY);
} else {
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty(CONTINUATION_TOKEN_KEY, contToken);
}
} catch (e) {
//sends a mail with the error
}
}
When launchProcess is invoked, it only prepares the program for the other method, continueProcess, that iterates over the set of folders. The iterator is obtained by using the continuation token, when it is present (it will not be there in the first invocation). When the time limit is near, continueProcess obtains the continuation token, saves it in a property and waits for the next invocation.
The problem I have is that the iterator is always returning the same set of folders although it has been built from different tokens (I have printed them, so I know they are different).
Any idea about what am I doing wrong?
Thank you in advance.
It appears that your loop was not built correctly. (edit : actually, probably also another issue about how we break the while loop, see my thoughts about that in comments)
Note also that there is no special reason to use a try/catch in this context since I see no reason that the hasNext() method would return an error (but if you think so you can always add it)
here is an example that works, I added the trigger creation / delete lines to implement my test.
EDIT : code updated with logs and counter
var SOURCE_PARENT_FOLDER_ID = '0B3qSFd3iikE3MS0yMzU4YjQ4NC04NjQxLTQyYmEtYTExNC1lMWVhNTZiMjlhMmI'
var MAX_RUNNING_TIME = 5*35*6;
function launchProcessFolder() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty('SOURCE_PARENT_FOLDER_KEY', SOURCE_PARENT_FOLDER_ID);
scriptProperties.setProperty('counter', 0);
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.newTrigger('continueProcess').timeBased().everyMinutes(10).create();
continueProcessFolder();
}
function continueProcessFolder() {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty('SOURCE_PARENT_FOLDER_KEY');
var continuationToken = scriptProperties.getProperty('CONTINUATION_TOKEN_KEY');
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFolders() : DriveApp.continueFolderIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
var counter = Number(scriptProperties.getProperty('counter'));
while (iterator.hasNext() && !timeLimitIsNear) {
var folder = iterator.next();
counter++;
Logger.log(counter+' - '+folder.getName());
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
if (!iterator.hasNext()) {
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
Logger.log('******************no more folders**************');
break;
}
}
if(timeLimitIsNear){
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty('CONTINUATION_TOKEN_KEY', contToken);
scriptProperties.setProperty('counter', counter);
Logger.log('write to scriptProperties');
}
}
EDIT 2 :
(see also last comment)
Here is a test with the script modified to get files in a folder. From my different tests it appears that the operation is very fast and that I needed to set a quite short timeout limit to make it happen before reaching the end of the list.
I added a couple of Logger.log() and a counter to see exactly what was happening and to know for sure what was interrupting the while loop.
With the current values I can see that it works as expected, the first (and second) break happens with time limitation and the logger confirms that the token is written. On a third run I can see that all files have been dumped.
var SOURCE_PARENT_FOLDER_ID = '0B3qSFd3iikE3MS0yMzU4YjQ4NC04NjQxLTQyYmEtYTExNC1lMWVhNTZiMjlhMmI'
var MAX_RUNNING_TIME = 5*35*6;
function launchProcess() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty('SOURCE_PARENT_FOLDER_KEY', SOURCE_PARENT_FOLDER_ID);
scriptProperties.setProperty('counter', 0);
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.newTrigger('continueProcess').timeBased().everyMinutes(10).create();
continueProcess();
}
function continueProcess() {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty('SOURCE_PARENT_FOLDER_KEY');
var continuationToken = scriptProperties.getProperty('CONTINUATION_TOKEN_KEY');
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFiles() : DriveApp.continueFileIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
var counter = Number(scriptProperties.getProperty('counter'));
while (iterator.hasNext() && !timeLimitIsNear) {
var file = iterator.next();
counter++;
Logger.log(counter+' - '+file.getName());
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
if (!iterator.hasNext()) {
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
Logger.log('******************no more files**************');
break;
}
}
if(timeLimitIsNear){
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty('CONTINUATION_TOKEN_KEY', contToken);
scriptProperties.setProperty('counter', counter);
Logger.log('write to scriptProperties');
}
}
As of January 1, 2016 this is still a problem. The bug report lists a solution using the Advanced Drive API, which is documented here, under "Listing folders".
If you don't want to use Advanced services, an alternative solution would be to use the Folder Iterator to make an array of File Ids.
It appears to me that the Folder Iterator misbehaves only when created using DriveApp.continueFolderIterator(). When using this method, only 100 Folders are included in the returned Folder Iterator.
Using DriveApp.getFolders() and only getting Folder Ids, I am able to iterate through 694 folders in 2.734 seconds, according the Execution transcript.
function allFolderIds() {
var folders = DriveApp.getFolders(),
ids = [];
while (folders.hasNext()) {
var id = folders.next().getId();
ids.push(id);
}
Logger.log('Total folders: %s', ids.length);
return ids;
}
I used the returned array to work my way through all the folders, using a trigger. The Id array is too big to save in the cache, so I created a temp file and used the cache to save the temp file Id.
This is caused by a bug in GAS:
https://code.google.com/p/google-apps-script-issues/issues/detail?id=4116
It appears you're only storing a single continuation token. If you want to recursively iterate over a set of folders and allow the script to pause at any point (e.g. to avoid the timeout) and resume later, you'll need to store a bunch more continuation tokens (e.g. in an array of objects).
I've outlined a template that you can use here to get it working properly. This worked with thousands of nested files over the course of 30+ runs perfectly.

Correct usage of DriveApp.continueFileIterator(continuationToken)

I've written a script to iterate through a large number of files in a Google Drive folder. Due to the processing I am doing on those files it exceeds the maximum execution time. Naturally I wrote into the script to use DriveApp.continueFileIterator(continuationToken): the token gets stored in the Project Properties and when the script runs it checks to see if there's a token, if there is it creates the FileIterator from the token if not it starts afresh.
What have I found is even though the script restarts with the continuation token it still starts from the beginning of the iteration, trying to process the same files again which wastes time for the subsequent executions. Have I missed something vital as in a command or method to make it start from where it left off? Am I supposed to update the continuation token at various stages thoughout the while(contents.hasNext()) loop?
Here's the sample code slimmed down to give you an idea:
function listFilesInFolder() {
var id= '0fOlDeRiDg';
var scriptProperties = PropertiesService.getScriptProperties();
var continuationToken = scriptProperties.getProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
var lastExecution = scriptProperties.getProperty('LAST_EXECUTION');
if (continuationToken == null) {
// first time execution, get all files from drive folder
var folder = DriveApp.getFolderById(id);
var contents = folder.getFiles();
// get the token and store it in a project property
var continuationToken = contents.getContinuationToken();
scriptProperties.setProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN', continuationToken);
} else {
// we continue to import from where we left
var contents = DriveApp.continueFileIterator(continuationToken);
}
var file;
var fileID;
var name;
var dateCreated;
while(contents.hasNext()) {
file = contents.next();
fileID = file.getId();
name = file.getName();
dateCreated = file.getDateCreated();
if(dateCreated > lastExecution) {
processFiles(fileID);
}
}
// Finished processing files so delete continuation token
scriptProperties.deleteProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
var currentExecution = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd HH:mm:ss");
scriptProperties.setProperty('LAST_EXECUTION',currentExecution);
};
Like Jonathon said, you're comparing dates wrongly. But that's not the main issue with your script nor what you asked.
The main concept you're getting wrong is that the continuation token can't be saved before you do your loop. When you get the token, it saves where you were at that moment, if you continue iterating afterwards, that's not saved and you will repeat those steps later, just like you're experiencing.
To get the token later you cannot let your script terminate with an error. You have to measure how many files you can process under 5 minutes and stop your script manually before that, so you can have a chance at saving the token.
Here's the correct way of doing it:
function listFilesInFolder() {
var MAX_FILES = 20; //use a safe value, don't be greedy
var id = 'folder-id';
var scriptProperties = PropertiesService.getScriptProperties();
var lastExecution = scriptProperties.getProperty('LAST_EXECUTION');
if( lastExecution === null )
lastExecution = '';
var continuationToken = scriptProperties.getProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
var iterator = continuationToken == null ?
DriveApp.getFolderById(id).getFiles() : DriveApp.continueFileIterator(continuationToken);
try {
for( var i = 0; i < MAX_FILES && iterator.hasNext(); ++i ) {
var file = iterator.next();
var dateCreated = formatDate(file.getDateCreated());
if(dateCreated > lastExecution)
processFile(file);
}
} catch(err) {
Logger.log(err);
}
if( iterator.hasNext() ) {
scriptProperties.setProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN', iterator.getContinuationToken());
} else { // Finished processing files so delete continuation token
scriptProperties.deleteProperty('IMPORT_ALL_FILES_CONTINUATION_TOKEN');
scriptProperties.setProperty('LAST_EXECUTION', formatDate(new Date()));
}
}
function formatDate(date) { return Utilities.formatDate(date, "GMT", "yyyy-MM-dd HH:mm:ss"); }
function processFile(file) {
var id = file.getId();
var name = file.getName();
//your processing...
Logger.log(name);
}
Anyway, it may be possible that a file gets created between your runs and you do not get it on your continued-iteration. Then, by saving the execution time after your the last run, you may miss it on your next run too. I do not know your use-case, if it's acceptable to eventually reprocess some files or to miss some. If you can't have either situations at all, then the only solution I see is to save the ids of all files you have already processed. You may need to store those on a drive file, because PropertiesService may be too small for too many ids.
your date comparison won't work in the way you have it.
var currentExecution = Utilities.formatDate(new Date(), "GMT", "yyyy-MM-dd HH:mm:ss");
will store "2014-04-18 08:32:01" whereas the file date file.getDateCreated() will return a Date object comparing these using either < or > will always return false.
so I'd suggest that you store the time as a timestamp (because you cant store Date objects) and then compare that to the timestamp of the file created date.
// stored time stamp
var lastExecution = scriptProperties.getProperty('LAST_EXECUTION');
…
dateCreated = file.getDateCreated().getTime();
…
var currentExecution = new Date().getTime();
scriptProperties.setProperty('LAST_EXECUTION',currentExecution);
that comparison will work as you expect it.