Calling scriptDb.saveBatch with ~7600 items results in a rateMax error - google-apps-script

I'm working on an apps script to periodically check for modified items on a web service. Because API calls were taking too long, I've been trying to cache some of the data periodically in ScriptDb. However, trying to update data using scriptDb.saveBatch always results in the following error:
Service invoked too many times in a short time: scriptdb rateMax. Try Utilities.sleep(1000) between calls.
My script is querying ScriptDb and returning a result set of ~7600 records, modifying those records, and then saving everything back in a batch. I can't think of any way, given the tools Google makes available, to reduce the number of database calls I make. Is this really too much for ScriptDb to handle, or is there some way to improve on my code?
function getRootFolders() {
var updateTimestamp = new Date().valueOf();
var results = GetModifiedFolders(ROOT_FOLDER); //Returns results from an API call
var data = results.data; //The actual data from the API, as an array
var len = data.length;
if (len > 0) {
//Get a collection of dbMaps from ScriptDb
var maps = {}; //Store as an object for easy updating
var getMaps = db.query({'type': 'baseFolder'}).limit(50000); //Returns 7621 items
while (getMaps.hasNext()) {
var map = getMaps.next();
maps[map.boxId] = map;
}
//Iterate through the results
for (i = 0; i < len; i++) {
var item = data[i];
var map = maps[item.boxId]; //Try to retrive an existing dbMap
if (map) { //If it exists, update the existing dbMap
map.modified = item.modified;
map.updateTimestamp = updateTimestamp;
}
else { //Otherwise, insert the result into the collection of dbMaps
item.type = 'baseFolder';
item.updateTimestamp = updateTimestamp;
maps[item.boxId] = item;
}
}
//Convert the object back to an array, and use that to save to ScriptDb
var toSave = [];
for (var prop in dbMaps) {
toSave.push(dbMaps[prop]);
}
var mutations = db.saveBatch(toSave, false); //FAIL with scriptdb rateMax
if (db.allOk(mutations)) {
( . . . )
}
}
}
EDIT:
I've made a few changes in an effort to stop this from happening, but to no avail. I'm sleeping for several minutes before calling saveBatch, and then I'm saving in multiple, smaller batches, sleeping in between each one.
At this point, I can't imagine why I'm still getting this rateMax error. Is there something wrong with my code that I'm missing, or is this a bug in apps script? I assume it's my fault, but I can't see it.
Here's what I've added:
//Retrieving data from the API takes ~1 minute
//Sleep for a while to avoid rateMax error
var waitUntil = updateTimestamp + 240000; //Wait until there's only 1 minute left in the 5 minute quota
var msToWait = waitUntil - (now.valueOf());
Utilities.sleep(msToWait); //Sleep for ~3 minutes
//Save in batches
var batchSize = 250;
var batch = [];
var i = 0;
for (var prop in maps) {
batch.push(maps[prop]);
i++;
//When the batch reaches full size, save it
if (i % batchSize == 0 || i == len) {
Utilities.sleep(1000);
var mutations = db.saveBatch(batch, false);
if (!db.allOk(mutations)) {
return false;
}
batch = [];
}
}

Split the batch in smaller parts.
Wont affect the code because batch is not atomic anyways.

Related

Use importData to fetch and parse JSON

I had the following function running perfectly:
var ss = SpreadsheetApp.getActiveSpreadsheet();
var habSheet = ss.getSheetByName("Harvests");
var bVals = habSheet.getRange("b2:b").getValues();
var habs = bVals.filter(String).length;
var habitats = habSheet.getRange("B2:B"+habs+1).getDisplayValues();
var data = [];
var traitNames = habSheet.getRange("D1:U1").getValues();
var values = new Array(habs);
for (i = 0; i < habs; i++) {
values[i] = new Array(traitNames[0].length);
for (j=0; j<traitNames[0].length; j++){
values[i][j] = [""];
}
}
var rawData = "";
var names = new Array(habs);
for (i = 0; i < habs; i++) {
names[i] = new Array(1);
}
for (i=0; i<habs; i++){
try{
rawData = UrlFetchApp.fetch("https://api.genopets.me/habitat/"+habitats[i]);
data[i] = JSON.parse(rawData.getContentText());
names[i][0] = data[i].name;
for (j=0; j<data[i].attributes.length; j++){
value = data[i].attributes[j].value;
trait = data[i].attributes[j].trait_type;
for (k=0; k<=21; k++){
if (traitNames[0][k] == trait){
values[i][k] = value;
}
}
}
}
catch(err){
But I'm exceeding max fetch calls daily. I'm in an emergency situation because this needs to run again within an hour.
I'm trying to build a temporary fix, so I'm using importData to call the API with the following formula:
=join(",",IMPORTDATA("https://api.genopets.me/habitat/"&B2,","))
Then, I want to just replace rawData in the code with this imported data. However, now it comes in as text and can't be parsed in the same way. Is there a quick way to force it into JSON format or otherwise convert to a dictionary as before so that I can parse it with the same code?
I'm getting stuck because .name, .length, etc. are failing as the "rawData" is now just a string.
This is the code snippet I'm playing with to try and get this right and build the quick patch for right now:
// for (i=0; i<habs; i++){
var i=0;
importData = habSheet.getRange("AL1").getDisplayValue();
rawData = JSON.stringify(importData);
// Logger.log(rawData);
data[i] = rawData;
// data[i] = JSON.parse(rawData.getContentText());
names[i][0] = data[i].name;
for (j=0; j<data[i].attributes.length; j++){
value = data[i].attributes[j].value;
trait = data[i].attributes[j].trait_type;
for (k=0; k<=21; k++){
if (traitNames[0][k] == trait){
values[i][k] = value;
}
}
}
I've tried as above, and also without stringify, but I can't get this yet.
For reference, this is an example of the API response:
https://api.genopets.me/habitat/7vTz9dniU14Egpt8XHkMxP1x36BLRd15C11eUTaWhB19
Appreciate any help!
I have done a lot of testing to find a simple workaround, but could not find one, the string resulting from the =join(",",IMPORTDATA(url,",")) (and none of the other =IMPORTXXX functions) will work for your code. When using these IMPORT functions the data is interpreted and certain characters are removed or the values formatted, it is NOT recommended to use these functions.
Since you mentioned the message you are getting is related to quota limits you should consider splitting the load of this script in multiple Apps Script projects. As a possible immediate solution you can make a copy of the script (or file bound to the script), authorize the new copy and try again.
To increase performance you could try using the calls in bulk, use this other function fetchAll (https://developers.google.com/apps-script/reference/url-fetch/url-fetch-app#fetchallrequests). There is a 100 request limit for this method. This will result in the same quota usage.

App Scripts If Statement always returning Else Condition

I am working on a script to perform an Index/Match task on two seperate workbooks. The code seems to be working but my If statement is always returning its Else condition. I have logged the compared vairables find and searchref and found that they do match at some point durring the loop but the If statement still returns its Else condition.
I suspect this has something to do with how I am comparing these arrays but I have not been able to figure it out.
Here is a snip of the first few columns and rows for the source spreadsheet for searchData I am trying to access the information in column B.
Source data for searchData
Here is the output from Logger.log for findData and searchData
Logger with labels
Logger arrays
Source data for findData
function generateBillOfMaterials() {
// --------------------------------------------------------------------
// Declare variables
var i, j
var find
var searchref
var found = []
// --------------------------------------------------------------------
var search_spreadsheet = SpreadsheetApp.openById("Searched-Spreadsheet-ID");
var find_spreadsheet = SpreadsheetApp.openById("1xg2yVimBwE5rGvSFMtID9O9CB7RauID34wqIH5LLTeE");
var ssheet = search_spreadsheet.getSheetByName("METAL PARTS");
var fsheet = find_spreadsheet.getSheetByName("Bill of Materials");
var FMaxR = fsheet.getMaxRows();
fsheet.getRange(2, 3, FMaxR, 1).clear({contentsOnly: true});
var findData = fsheet.getDataRange().getValues();
var searchData = ssheet.getDataRange().getValues();
for (i = 0; i < findData.length; i++) {
for (j = 0; j < searchData.length; j++) {
find = findData[i][1];
//Logger.log(find)
searchref = searchData[j][0];
//Logger.log(searchref)
if (find == searchref && find != "")
{
found[i] = searchData[j][1]
}
else
{
found[i] = ['n/a']
}
// found = ssheet.getRange(j+1,2,1,1).getDisplayValue();
// fsheet.getRange(i+1,16,1,1).setValue(found);
}
}
Logger.log(found)
fsheet.getRange(2, 3, found.length, 1).setValues(found)
}
The main problem in the sample code is the else statement containing this:
found[i] = ['n/a']
This will overwrite whatever is found earlier in the loop, because even after a match has been found (and assigned to the found array), the loop continues comparing the remaining values in the inner loop.
The following approach shows how to correct this, making as few changes as possible to your existing code:
function generateBillOfMaterials() {
// --------------------------------------------------------------------
// Declare variables
var i, j
var find
var searchref
// --------------------------------------------------------------------
var search_spreadsheet = ... ;
var find_spreadsheet = ... ;
var ssheet = search_spreadsheet.getSheetByName("METAL PARTS");
var fsheet = find_spreadsheet.getSheetByName("Bill of Materials");
var FMaxR = fsheet.getMaxRows();
fsheet.getRange(2, 3, FMaxR, 1).clear({contentsOnly: true});
var findData = fsheet.getDataRange().getValues();
var found = new Array(findData.length).fill('n/a');
var searchData = ssheet.getDataRange().getValues();
for (i = 0; i < findData.length; i++) {
for (j = 0; j < searchData.length; j++) {
find = findData[i][1];
searchref = searchData[j][0];
if (find === searchref && find !== "") {
found[i] = searchData[j][1];
break;
}
}
}
const found2 = found.slice(1).map(x => [x]);
fsheet.getRange(2, 3, found.length-1, 1).setValues(found2);
}
Notes:
We pre-fill the array of "found" values with "n/a":
var found = new Array(findData.length).fill('n/a');
This allows us to overwrite "n/a" when we find a value - otherwise we leave the "n/a" untouched.
When a match is found, we break out of the inner loop using break.
Then we can remove the else condition - as we no longer need it.
The remaining changes are to ensure the final shape of the found data is a two-dimensional array which can be written to the spreadsheet.
The above approach involves repeatedly looping over the data in the inner loop.
In reality, we only need to visit each list once, in order to perform the lookups we need.
Implementing this alternative approach would basically be a rewrite of what you have - and I would imagine that what you have, even if it is somewhat inefficient, is perfectly OK for your needs. But I did want to mention this.
The other note which may be of interest is that my alternative approach is more-or-less the equivalent of using a Google Sheets vlookup formula. Apologies if you are already aware of that. And I have never tried using that formula across 2 separate files, anyway. But again, just wanted to mention it, for completeness.
Update 2
"Is there a lookup command that could be used in place of the for loops?"
It's more a question of avoiding the nested loops, and using a JavaScript data structure that supports lookups (a Map).
Here is a sketch:
// assume we have already populated searchData and findData, as usual.
// first iterate searchData to build a lookup map:
let partsLookup = new Map();
for (i = 1; i < searchData.length; i++) {
partsLookup.set( searchData[i][0], searchData[i][1] );
}
// now iterate the BOM data and use the lookup map:
for (i = 1; i < findData.length; i++) {
var foundValue = partsLookup.get( findData[i][1] );
console.log( foundValue ); // add this to the "found" array
}
This is obviously not "finished" code - it just shows the approach. But no nested iterations are needed.
The number of loops performed is searchData.length + findData.length, instead of up to searchData.length * findData.length

getMessageById() slows down

I am working on a script that works with e-mails and it needs to fetch the timestamp, sender, receiver and subject for an e-mail. The Google script project has several functions in separate script files so I won't be listing everything here, but essentially the main function performs a query and passes it on to a function that fetches data:
queriedMessages = Gmail.Users.Messages.list(authUsr.mail, {'q':query, 'pageToken':pageToken});
dataOutput_double(sSheet, queriedMessages.messages, queriedMessages.messages.length);
So this will send an object to the function dataOutput_double and the size of the array (if I try to get the size of the array inside the function that outputs data I get an error so that is why this is passed here). The function that outputs the data looks like this:
function dataOutput_double(sSheet, messageInfo, aLenght) {
var sheet = sSheet.getSheets()[0],
message,
dataArray = new Array(),
row = 2;
var i, dateCheck = new Date;
dateCheck.setDate(dateCheck.getDate()-1);
for (i=aLenght-1; i>=0; i--) {
message = GmailApp.getMessageById(messageInfo[i].id);
if (message.getDate().getDate() == dateCheck.getDate()) {
sheet.insertRowBefore(2);
sheet.getRange(row, 1).setValue(message.getDate());
sheet.getRange(row, 2).setValue(message.getFrom());
sheet.getRange(row, 3).setValue(message.getTo());
sheet.getRange(row, 4).setValue(message.getSubject());
}
}
return;
};
Some of this code will get removed as there are leftovers from other types of handling this.
The problem as I noticed is that some messages take a long time to get with the getMessageById() method (~ 4 seconds to be exact) and when the script is intended to work with ~1500 mails every day this makes it drag on for quite a while forcing google to stop the script as it takes too long.
Any ideas of how to go around this issue or is this just something that I have to live with?
Here is something I whipped up:
function processEmails() {
var ss = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var messages = Gmail.Users.Messages.list('me', {maxResults:200, q:"newer_than:1d AND label:INBOX NOT label:PROCESSED"}).messages,
headers,
headersFields = ["Date","From","To","Subject"],
outputValue=[],thisRowValue = [],
message
if(messages.length > 0){
for(var i in messages){
message = Gmail.Users.Messages.get('me', messages[i].id);
Gmail.Users.Messages.modify( {addLabelIds:["Label_4"]},'me',messages[i].id);
headers = message.payload.headers
for(var ii in headers){
if(headersFields.indexOf(headers[ii].name) != -1){
thisRowValue.push(headers[ii].value);
}
}
outputValue.push(thisRowValue)
thisRowValue = [];
}
var range = ss.getRange(ss.getLastRow()+1, ss.getLastColumn()+1, outputValue.length, outputValue[0].length);
range.setValues(outputValue);
}
}
NOTE: This is intended to run as a trigger. This will batch the trigger call in 200 messages. You will need to add the label PROCESSED to gmail. Also on the line:
Gmail.Users.Messages.modify( {addLabelIds:["Label_4"]},'me',messages[i].id);
it shows Label_4. In my gmail account "PROCESSED" is my 4th custom label.

Logging (lots of) Gmail Data into a Google Sheet

The object of my Google App Script is to log all basic data from email within a certain range of time (not yet implemented), from within all labels, into a Google Sheet.
This currently works to a limited degree. It will collect a small number of emails, but if I increase this (I have a lot of emails to log), this code will either ‘Exceeded maximum execution time’ if I pause for 500 or 1000 milliseconds (as below), otherwise I hit the other ratemax quota Service invoked too many times in a short time: gmail rateMax. Try Utilities.sleep(1000) between calls.
In the code below, I believe I attempted to repeat the main FOR loop 20 times with a WHILE loop, this was a way of seeing if I could perform this single grab n load var labelThreads = GmailApp.getUserLabelByName(label).getThreads(start, 1), 20 times. This would be one way to begin tracking "batches" -- It didn't quite work and I believe there is a better way to approach this, need some help.
function whenV24() {
function setColumnNames(range, columnNames){
var cell = SpreadsheetApp.getActiveSpreadsheet().getSheets()[0].getRange(range);
cell.setValues(columnNames);
}
setColumnNames("A1:G1", [["Date","Label","To","From","Subject","Body","File Names"]]);
betterGetV24();
}
function betterGetV24() {
var myspreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var mysheet = myspreadsheet.getSheets()[0];
var threads = GmailApp.getInboxThreads();
var messages = GmailApp.getMessagesForThreads(threads);
// ** LABEL NAME COLLECTION **
var labels = GmailApp.getUserLabels();
// ** CREATE EMPTY DATA ARRAYS **
var emailFrom = [];
var emailTo = [];
var emailBody = [];
var emailDate = [];
var emailLabel = [];
var emailSubject = [];
// ** LOAD "INBOX EMAIL DATA" INTO EMPTY ARRAYS **
for(var i = 0; i < threads.length; i++) {
emailBody.push([" "]);
emailFrom.push([messages[i][0].getFrom()]);
emailTo.push([messages[i][0].getTo()]);
emailSubject.push([messages[i][0].getSubject()]);
emailDate.push([Utilities.formatDate(messages[i][0].getDate(), "GMT", "yyyy-MM-dd'T'HH:mm:ss'Z'")]);
emailLabel.push(["Inbox"]);
};
// ** LOAD "LABELED EMAIL DATA" INTO EMPTY ARRAYS **
for (var l = 0; l < labels.length; l++) { // ** Runs a for-loop over "labels array".
var label = labels[l].getName(); // Gets "this" label name.
var start = 0; // sets start number as 0
var tracker = 0;
// this section of code has to loop based on a separate set of logic
while (start < 20){
tracker++;
Logger.log("tracker :" + tracker);
var labelThreads = GmailApp.getUserLabelByName(label).getThreads(start, 1); // Gets threads in "this" label. (*Set Limits Here*)
var labelMessages = GmailApp.getMessagesForThreads(labelThreads); // Gets array with each email from "this" thread.
Utilities.sleep(500); // pause in the loop for 500 milliseconds
for (var t = 0; t <labelThreads.length; t++){ // ** Runs a for-loop over threads in a label.
Logger.log("part 1 - inside for-loop over message number: " + labelMessages[t][0].getId());
Utilities.sleep(500);// **pause in the loop for 500 milliseconds
if (labelMessages[t] == undefined){} // If it's empty, skip.
else { // If it's not empty.
Logger.log("part 2 - inside if statement in for-loop > push emailData into arrays");
emailBody.push([" "]);
emailFrom.push([labelMessages[t][0].getFrom()]);
emailTo.push([labelMessages[t][0].getTo()]);
emailDate.push([Utilities.formatDate(labelMessages[t][0].getDate(), "GMT", "yyyy-MM-dd'T'HH:mm:ss'Z'")]);
emailSubject.push([labelMessages[t][0].getSubject()]);
emailLabel.push([labels[l].getName()]);
mysheet.getRange(2,2,emailLabel.length,1).setValues(emailLabel);
}
}
Logger.log("part 3 - outside if statement -> start += 2")
var start = start + 3;
}
Logger.log("part 4 - outside while loop");
}
// ** THEN, LOG THE FILLED DATA ARRAYS TO ROWS **
//getSheetValues(startRow, startColumn, numRows, numColumns)
mysheet.getRange(2,4,emailFrom.length,1).setValues(emailFrom);
mysheet.getRange(2,3,emailTo.length,1).setValues(emailTo);
mysheet.getRange(2,1,emailDate.length,1).setValues(emailDate);
mysheet.getRange(2,5,emailSubject.length,1).setValues(emailSubject);
mysheet.getRange(2,6,emailBody.length,1).setValues(emailBody);
}
I think at this point I should be using triggers, but it seems like I also have to track the previous batch of emails-logged and continue to the next batch. I'm also not aware of how to tie this in with triggers.
Thanks for reading. Any help is appreciated.
EDIT
I was setting data to the spreadsheet incorrectly here. For each loop I was trying to set data in the spreadsheet. I'm not sure how I ended up doing that. Simply moving this out of the loop and setting it later fixed my issue. updated code here: http://pastie.org/9793256#96,100,109,117,123-125,131,135-139
for (var t = 0; t <labelThreads.length; t++){ // ** Runs a for-loop over threads in a label.
Logger.log("part 1 - inside for-loop over message number: " + labelMessages[t][0].getId());
Utilities.sleep(500);// **pause in the loop for 500 milliseconds
if (labelMessages[t] == undefined){} // If it's empty, skip.
else { // If it's not empty.
Logger.log("part 2 - inside if statement in for-loop > push emailData into arrays");
emailBody.push([" "]);
emailFrom.push([labelMessages[t][0].getFrom()]);
emailTo.push([labelMessages[t][0].getTo()]);
emailDate.push([Utilities.formatDate(labelMessages[t][0].getDate(), "GMT", "yyyy-MM-dd'T'HH:mm:ss'Z'")]);
emailSubject.push([labelMessages[t][0].getSubject()]);
emailLabel.push([labels[l].getName()]);
// ** INCORRECTLY TRYING TO SET DATA PER LOOP **
mysheet.getRange(2,2,emailLabel.length,1).setValues(emailLabel);
}
}
You can use PropertiesService to locally store the index of the last processed email. You can then add the "start" parameter to the GmailApp.search() method to begin searching from the last position and set this whole thing to trigger every 5 or 10 minutes.
function myTrigger() {
var start = PropertiesService.getScriptProperties().getProperty("startIndex");
var threads = GmailApp.search("in:inbox", start, 200);
for (var t in threads) {
// Log the thread using your existing code
start++;
}
PropertiesService.getScriptProperties().setProperty("startIndex", start);
}

Iterators built with different continuation tokens are producing the same results in Google Apps

I am programming a Google Apps script within a spreadsheet. My use case includes iterating over a large set of folders that are children of a given one. The problem is that the processing takes longer than the maximum that Google allows (6 minutes), so I had to program my script to be able to resume later. I am creating a trigger to resume the task, but that is not part of my problem (at least, not the more important one at this moment).
My code looks like this (reduced to the minimum to illustrate my problem):
function launchProcess() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty(SOURCE_PARENT_FOLDER_KEY, SOURCE_PARENT_FOLDER_ID);
scriptProperties.deleteProperty(CONTINUATION_TOKEN_KEY);
continueProcess();
}
function continueProcess() {
try {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty(SOURCE_PARENT_FOLDER_KEY);
var continuationToken = scriptProperties.getProperty(CONTINUATION_TOKEN_KEY);
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFolders() : DriveApp.continueFolderIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
while (iterator.hasNext() && !timeLimitIsNear) {
var folder = iterator.next();
processFolder_(folder);
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
}
if (!iterator.hasNext()) {
scriptProperties.deleteProperty(CONTINUATION_TOKEN_KEY);
} else {
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty(CONTINUATION_TOKEN_KEY, contToken);
}
} catch (e) {
//sends a mail with the error
}
}
When launchProcess is invoked, it only prepares the program for the other method, continueProcess, that iterates over the set of folders. The iterator is obtained by using the continuation token, when it is present (it will not be there in the first invocation). When the time limit is near, continueProcess obtains the continuation token, saves it in a property and waits for the next invocation.
The problem I have is that the iterator is always returning the same set of folders although it has been built from different tokens (I have printed them, so I know they are different).
Any idea about what am I doing wrong?
Thank you in advance.
It appears that your loop was not built correctly. (edit : actually, probably also another issue about how we break the while loop, see my thoughts about that in comments)
Note also that there is no special reason to use a try/catch in this context since I see no reason that the hasNext() method would return an error (but if you think so you can always add it)
here is an example that works, I added the trigger creation / delete lines to implement my test.
EDIT : code updated with logs and counter
var SOURCE_PARENT_FOLDER_ID = '0B3qSFd3iikE3MS0yMzU4YjQ4NC04NjQxLTQyYmEtYTExNC1lMWVhNTZiMjlhMmI'
var MAX_RUNNING_TIME = 5*35*6;
function launchProcessFolder() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty('SOURCE_PARENT_FOLDER_KEY', SOURCE_PARENT_FOLDER_ID);
scriptProperties.setProperty('counter', 0);
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.newTrigger('continueProcess').timeBased().everyMinutes(10).create();
continueProcessFolder();
}
function continueProcessFolder() {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty('SOURCE_PARENT_FOLDER_KEY');
var continuationToken = scriptProperties.getProperty('CONTINUATION_TOKEN_KEY');
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFolders() : DriveApp.continueFolderIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
var counter = Number(scriptProperties.getProperty('counter'));
while (iterator.hasNext() && !timeLimitIsNear) {
var folder = iterator.next();
counter++;
Logger.log(counter+' - '+folder.getName());
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
if (!iterator.hasNext()) {
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
Logger.log('******************no more folders**************');
break;
}
}
if(timeLimitIsNear){
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty('CONTINUATION_TOKEN_KEY', contToken);
scriptProperties.setProperty('counter', counter);
Logger.log('write to scriptProperties');
}
}
EDIT 2 :
(see also last comment)
Here is a test with the script modified to get files in a folder. From my different tests it appears that the operation is very fast and that I needed to set a quite short timeout limit to make it happen before reaching the end of the list.
I added a couple of Logger.log() and a counter to see exactly what was happening and to know for sure what was interrupting the while loop.
With the current values I can see that it works as expected, the first (and second) break happens with time limitation and the logger confirms that the token is written. On a third run I can see that all files have been dumped.
var SOURCE_PARENT_FOLDER_ID = '0B3qSFd3iikE3MS0yMzU4YjQ4NC04NjQxLTQyYmEtYTExNC1lMWVhNTZiMjlhMmI'
var MAX_RUNNING_TIME = 5*35*6;
function launchProcess() {
var scriptProperties = PropertiesService.getScriptProperties();
scriptProperties.setProperty('SOURCE_PARENT_FOLDER_KEY', SOURCE_PARENT_FOLDER_ID);
scriptProperties.setProperty('counter', 0);
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.newTrigger('continueProcess').timeBased().everyMinutes(10).create();
continueProcess();
}
function continueProcess() {
var startTime = (new Date()).getTime();
var scriptProperties = PropertiesService.getScriptProperties();
var srcParentFolderId = scriptProperties.getProperty('SOURCE_PARENT_FOLDER_KEY');
var continuationToken = scriptProperties.getProperty('CONTINUATION_TOKEN_KEY');
var iterator = continuationToken == null ? DriveApp.getFolderById(srcParentFolderId).getFiles() : DriveApp.continueFileIterator(continuationToken);
var timeLimitIsNear = false;
var currTime;
var counter = Number(scriptProperties.getProperty('counter'));
while (iterator.hasNext() && !timeLimitIsNear) {
var file = iterator.next();
counter++;
Logger.log(counter+' - '+file.getName());
currTime = (new Date()).getTime();
timeLimitIsNear = (currTime - startTime >= MAX_RUNNING_TIME);
if (!iterator.hasNext()) {
scriptProperties.deleteProperty('CONTINUATION_TOKEN_KEY');
ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
Logger.log('******************no more files**************');
break;
}
}
if(timeLimitIsNear){
var contToken = iterator.getContinuationToken();
scriptProperties.setProperty('CONTINUATION_TOKEN_KEY', contToken);
scriptProperties.setProperty('counter', counter);
Logger.log('write to scriptProperties');
}
}
As of January 1, 2016 this is still a problem. The bug report lists a solution using the Advanced Drive API, which is documented here, under "Listing folders".
If you don't want to use Advanced services, an alternative solution would be to use the Folder Iterator to make an array of File Ids.
It appears to me that the Folder Iterator misbehaves only when created using DriveApp.continueFolderIterator(). When using this method, only 100 Folders are included in the returned Folder Iterator.
Using DriveApp.getFolders() and only getting Folder Ids, I am able to iterate through 694 folders in 2.734 seconds, according the Execution transcript.
function allFolderIds() {
var folders = DriveApp.getFolders(),
ids = [];
while (folders.hasNext()) {
var id = folders.next().getId();
ids.push(id);
}
Logger.log('Total folders: %s', ids.length);
return ids;
}
I used the returned array to work my way through all the folders, using a trigger. The Id array is too big to save in the cache, so I created a temp file and used the cache to save the temp file Id.
This is caused by a bug in GAS:
https://code.google.com/p/google-apps-script-issues/issues/detail?id=4116
It appears you're only storing a single continuation token. If you want to recursively iterate over a set of folders and allow the script to pause at any point (e.g. to avoid the timeout) and resume later, you'll need to store a bunch more continuation tokens (e.g. in an array of objects).
I've outlined a template that you can use here to get it working properly. This worked with thousands of nested files over the course of 30+ runs perfectly.