GitHub Actions - Open/Write to two groups in paralell - github-actions

I'm creating a GitHub action, where I have two loops running in parallel, writing to the console.
Example:
async function loop(name) {
core.startGroup(name);
for (var i = 0; i < 10; i++) {
core.info(`[INFO] {${name}}: ${i} { type = log, task = ${name} }`);
await delay(1000); // delay = promise that wraps setTimeout
}
core.endGroup();
}
loop('a');
loop('b');
I expect each loop to write to its respective group
Instead, I see this in the job logs
Which isn't a surprise since loop('b') overrides the group opening.
Is there a way to write output to a specific group, even when running in parallel?

I couldn't reproduce your result using github-script, result here. Maybe it's fixed in that version. Here's the GH action I used:
name: "Checks loop"
on: [push]
jobs:
check_group_loop:
runs-on: ubuntu-latest
steps:
- name: Use Node.js
uses: actions/github-script#v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
function loop(name) {
core.startGroup(name);
for (var i = 0; i < 10; i++) {
core.info("[INFO] " + name + ": " + i );
}
core.endGroup();
}
loop('a');
loop('b');
Maybe you can show the whole action so that we can check?
Anyway, the problem seems to be the calls are asynchronous, so instead of using start/end, could you wrap loggging in a group

Related

How to make a task job with PM2?

I want to make a repeatable job to send mail every 15 minutes taking data from a database table. In node js I can create the job but through PM2 I don't understand where to place the code and how it works.
Use the --cron option:
-c --cron <cron_pattern>
For example:
pm2 start sendMail.js --cron "*/15 * * * *"
Pm2 will now restart the sendMail.js script on the hour, and at 15, 30 and 45 minutes past the hour
This is what worked for me,
I split the cron in a different file which runs in a different process because i want to free up resources after cron has completed execution.
ecosystem.config.js:
module.exports = {
/**
* Application configuration section
* http://pm2.keymetrics.io/docs/usage/application-declaration/
*/
apps: [
// Main API Hosting
{
name: 'API',
script: 'bin/www',
env: {
COMMON_VARIABLE: 'true'
},
instances: 1,
exec_mode: 'cluster',
watch: false,
autorestart: true
},
{
name: 'CRON',
script: "crons/cronjob.js",
instances: 1,
exec_mode: 'fork',
cron_restart: "0,30 * * * *",
watch: false,
autorestart: false
}
]
};
The following lines are important in the cron executable
cron_restart: "0,30 * * * *" <- cron expression
autorestart: false <- important because otherwise pm2 will restart the cron after completion immediately
Also make sure your instances is 1 otherwise multiple cron processes will run.
Key caveats:
Whenever you do pm2 restart all, the cron job will run irrespective of the cron expression. If Its critical to run only at specific times, add this additional check in the beginning of the cron file
if (new Date().getHours() !== 0 ) {
console.log(`Current hours is ${new Date().getHours()}, not running.`)
process.exit(0);
}
If you use PM2 ecosystem then in the config file add cron sequence to script param by wrapping it with single quotes. Somehow double quotes didn't work for me.
module.exports = {
apps : [{
name : "Send-mail",
script : "./sendMail.js --cron '*/15 * * * *'",
watch : true
}]
}
alternatively (my preference)
module.exports = {
apps : [{
name : "Send-mail",
script : "./sendMail.js",
cron_restart: "*/15 * * * *",
watch : true
}]
}
If you execute the following command:
pm2 start handle-cron.js --cron "*/15 * * * *"
PM2 will initiate your cron job, but it will also continuously restart the cron-job after completion. --Infinite Loop
You want to set the instance to 1 and set no-autorestart.
pm2 start handle-cron.js --no-autorestart --instances 1 --cron "0 * * * *"
You can also use the node-schedule module which allows you to define cron style rules. Then, you can run the program normally in pm2, I use this with PM2 for a lot of projects and it has never let me down.
var schedule = require('node-schedule');
var rule = new schedule.RecurrenceRule();
rule.hour = [10]; // 10am
rule.minute = [0]; // 0mins
var job = schedule.scheduleJob(rule, function(){
console.log("10am every day")
});
//Rule 2 - 6am every wednesday
var rule2 = new schedule.RecurrenceRule();
rule2.dayOfWeek = 3; // 0 = Sunday
rule2.hour = 6;
rule2.minute = 0;
var job2 = schedule.scheduleJob(rule2, function(){
console.log("Every Wednesday # 6am");
});
var rule3 = new schedule.RecurrenceRule();
rule3.minute = [0, 15, 30, 45]; // Specific Minutes
var job3 = schedule.scheduleJob(rule3, function(){
console.log("Run at specific minutes")
});
It also supports CRON style rules, but I prefer the above method.
Check out the documentation at https://www.npmjs.com/package/node-schedule
there are three ways to implement cron through pm2
pm2 command line
pm2 ecosyste.config file
pm2-api
refer this this helped me .
thank you for your answer; i do it in this way and just set the email
1.
npm install node-crontab
var crontab = require('node-crontab');
var jobId = crontab.scheduleJob("*/15 * * * *", function(){
//This will call this function every 15 minutes
console.log("It's been 15 minutes!");
});

Gmail Snooze error: moveSnoozes "service invoked too many times in a short time"

I trying to find a way to snooze e-mail in my Gmail. Yes, I had Mailbox.app on my iDevice, but I want a way to Snooze or "Laterize" some emails.
I found a Blog Spot post and a Lifehacker one. The Lifehacker article just adds more pictures to the Blog Sport one. I followed the Lifehacker instructions. I made only one deviation from the code. In the two instances that the code says "7", I replaced it with "200".
The code I have is:
var MARK_UNREAD = false;
var ADD_UNSNOOZED_LABEL = false;
function getLabelName(i) {
return "Snooze/Snooze " + i + " days";
}
function setup() {
// Create the labels we’ll need for snoozing
GmailApp.createLabel("Snooze");
for (var i = 1; i <= 200; ++i) {
GmailApp.createLabel(getLabelName(i));
}
if (ADD_UNSNOOZED_LABEL) {
GmailApp.createLabel("Unsnoozed");
}
}
function moveSnoozes() {
var oldLabel, newLabel, page;
for (var i = 1; i <= 200; ++i) {
newLabel = oldLabel;
oldLabel = GmailApp.getUserLabelByName(getLabelName(i));
page = null;
// Get threads in "pages" of 100 at a time
while(!page || page.length == 100) {
page = oldLabel.getThreads(0, 100);
if (page.length > 0) {
if (newLabel) {
// Move the threads into "today’s" label
newLabel.addToThreads(page);
} else {
// Unless it’s time to unsnooze it
GmailApp.moveThreadsToInbox(page);
if (MARK_UNREAD) {
GmailApp.markThreadsUnread(page);
}
if (ADD_UNSNOOZED_LABEL) {
GmailApp.getUserLabelByName("Unsnoozed")
.addToThreads(page);
}
}
// Move the threads out of "yesterday’s" label
oldLabel.removeFromThreads(page);
}
}
}
}
But I'm getting an error:
Your script, Gmail Snooze, has recently failed to finish
successfully. A summary of the failure(s) is shown below.
Details:
Start: 9/10/13 12:16 AM
Function: moveSnoozes
Error Message: Service invoked too many times in a short time: gmail rateMax. Try Utilities.sleep(1000) between calls. (line 24, file "Code")
Trigger: time-based
End: 9/10/13 12:22 AM
Does anybody know how I can make use of Utilities.sleep(1000)? Where do I type that in?
The error you got is because creating 200 labels at the speed-of-code is too fast for Gmail and considered abuse of it's API. SO, as the error message suggests, you could use Utilities.sleep(1000) to wait a second between each label creation.
I'm not familiar with the script (Boomerang works well for me), but you might try making your 30, 60, 90 180 day labels by hand and letting the script pick up on them. Then you don't need to worry about the label creation at all.
In an effort to help you better understand the code you are reading. Below is an explanation of what's going on and how to insert the Utilities.sleep(). You will need to decide for how long you want to wait 1000ms means that you're waiting for 200 seconds, and if you have 3 min to spare that will do just fine. You could experiment to see at what point the Gmail app says 'Woah that's enough there. Take it slower please.'
NOTE: I am not attempting to re-engineer this to allow for a better creation/management of labels. There are a probably a number of enhancements that can be made to the code, and although tempting, they will be left as an exercise for the reader.
RE your pastie:
function setup() {
// This creates 1 label called "Snooze"
// it probably isn't causing the problem
GmailApp.createLabel("Snooze");
// This piece is a loop that will execute 200 times.
// It will do so as fast as GAS will let it.
// This is probably where the problem is...
for (var i = 1; i <= 200; ++i) {
// Create the label
GmailApp.createLabel(getLabelName(i));
// We should wait a bit before making the next one
// Utilities.sleep() will do well to go here
Utilities.sleep(/*Milliseconds to wait*/);
}
// I doubt this is causing the problem either.
// It's only one label creation
if (ADD_UNSNOOZED_LABEL) {
GmailApp.createLabel("Unsnoozed");
}
}

Hiding initial value in Imported Database Model of Enterprise Architect

I am currently creating database model by doing reverse-engineering MS SQL Server 2008 into Sparx's Enterprise Architect version 10.
I have been able to import tables, and hide items that are not required (such operations and stereotypes). However, I did not find the option to hide for Column Initial values when importing tables or in diagram properties, which left me the option to edit each column one by one (time consuming).
Do I miss any configuration to hide the Initial value? If such configuration does not exist, what is the best method to hide/remove the Initial value without configuration?
Finally got the solution by creating EA script. Feels free to use and enhance it :)
!INC Local Scripts.EAConstants-JScript
function OnProjectBrowserScript()
{
// Show the script output window
Repository.EnsureOutputVisible( "Script" );
var treeSelectedType = Repository.GetTreeSelectedItemType();
switch ( treeSelectedType )
{
case otElement: // if select table
{
removeInitial(Repository.GetContextObject());
break;
}
case otPackage: // if select package containing table
{
var selectedObject as EA.Element;
selectedObject = Repository.GetContextObject();
for ( var i = 0 ; i < selectedObject.Elements.Count ; i++ )
{
removeInitial(selectedObject.Elements.GetAt( i ));
}
break;
}
default:
{
// Error message
Session.Prompt( "This script does not support items of this type.", promptOK );
}
}
}
function removeInitial(selectedObject)
{
for (var i = 0 ; i < selectedObject.Attributes.Count; i++)
{
var attrib as EA.Attribute;
attrib = selectedObject.Attributes.GetAt(i);
attrib.Default = "";
attrib.Update();
}
Session.Output("finished updating " + selectedObject.Name);
}
OnProjectBrowserScript();

How to trigger manual clean of Hudson workspaces

We have a Hudson cluster with eight nodes. When a particular code branch is no longer in active use, we disable the build job, but the workspaces for that job still hang around taking up space on all the nodes.
I am looking for a way to trigger workspace cleanup across all nodes. Note that I am not looking for a "clean workspace before build" solution.
You do not need to write a plugin. You can write a job that utilizes Groovy plugin to write a Groovy system script. The job would run, say, nightly. It would identify disabled projects and erase their workspaces. Here is a link to Hudson Model API that your script will tap into. There is a Groovy script console at http://<hudson-server>/script that is very useful for debugging.
Here is a code snippet that should be of direct benefit to you. Run it in the script console and examine the output:
def hi = hudson.model.Hudson.instance
hi.getItems(hudson.model.Job).each {
job ->
println(job.displayName)
println(job.isDisabled())
println(job.workspace)
}
You may also find code snippets in this answer useful. They refer to Jenkins API, but on this level I do not think there is a difference between Jenkins and Hudson.
Update:
Here's how you can do it on multiple slaves: create a multi-configuration job (also called "matrix job") that runs on all the slaves. On each slave the following system Groovy script will give you for every job its workspace on that slave (as well as enabled/disabled flag):
def hi = hudson.model.Hudson.instance
def thr = Thread.currentThread()
def build = thr?.executable
def node = build.executor.owner.node
hi.getItems(hudson.model.Job).each {
job ->
println("---------")
println(job.displayName)
println(job.isDisabled())
println(node.getWorkspaceFor(job))
}
As the script runs on the slave itself you can wipe out the workspace directly from it. Of course, the worskspace may not exist, but it's not a problem. Note that you write the script only once - Jenkins will run it on all the slaves you specify in the matrix job automatically.
I have tried following script and it works for Single node,
def hi = hudson.model.Hudson.instance
hi.getItems(hudson.model.Job).each {
job ->
if(job.isDisabled())
{
println(job.displayName)
job.doDoWipeOutWorkspace()
}
}
The following Groovy script wipes workspaces of certain jobs on all nodes. Execute it from "Jenkins host"/computer/(master)/script
In the TODO part, change the job name to the one that you need.
import hudson.model.*
// For each job
for (item in Hudson.instance.items)
{
jobName = item.getFullDisplayName()
// check that job is not building
if (!item.isBuilding())
{
// TODO: Modify the following condition to select which jobs to affect
if (jobName == "MyJob")
{
println("Wiping out workspaces of job " + jobName)
customWorkspace = item.getCustomWorkspace()
println("Custom workspace = " + customWorkspace)
for (node in Hudson.getInstance().getNodes())
{
println(" Node: " + node.getDisplayName())
workspacePath = node.getWorkspaceFor(item)
if (workspacePath == null)
{
println(" Could not get workspace path")
}
else
{
if (customWorkspace != null)
{
workspacePath = node.getRootPath().child(customWorkspace)
}
pathAsString = workspacePath.getRemote()
if (workspacePath.exists())
{
workspacePath.deleteRecursive()
println(" Deleted from location " + pathAsString)
}
else
{
println(" Nothing to delete at " + pathAsString)
}
}
}
}
}
else
{
println("Skipping job " + jobName + ", currently building")
}
}
its a bit late, but i ran into the same problem. my script will check if atleast 2 GB space is available. if this is not the case, all workspaces on the node are cleared to free space.
import hudson.FilePath.FileCallable
import hudson.slaves.OfflineCause
for (node in Jenkins.instance.nodes) {
computer = node.toComputer()
if (computer.getChannel() == null) continue
rootPath = node.getRootPath()
size = rootPath.asCallableWith({f, c -> f.getUsableSpace()} as FileCallable).call()
roundedSize = size / (1024 * 1024 * 1024) as int
println("node: " + node.getDisplayName() + ", free space: " + roundedSize + "GB")
if (roundedSize < 2) {
computer.setTemporarilyOffline(true, [toString: {"disk cleanup"}] as OfflineCause)
for (item in Jenkins.instance.items) {
jobName = item.getFullDisplayName()
if (item.isBuilding()) {
println(".. job " + jobName + " is currently running, skipped")
continue
}
println(".. wiping out workspaces of job " + jobName)
workspacePath = node.getWorkspaceFor(item)
if (workspacePath == null) {
println(".... could not get workspace path")
continue
}
println(".... workspace = " + workspacePath)
customWorkspace = item.getCustomWorkspace()
if (customWorkspace != null) {
workspacePath = node.getRootPath().child(customWorkspace)
println(".... custom workspace = " + workspacePath)
}
pathAsString = workspacePath.getRemote()
if (workspacePath.exists()) {
workspacePath.deleteRecursive()
println(".... deleted from location " + pathAsString)
} else {
println(".... nothing to delete at " + pathAsString)
}
}
computer.setTemporarilyOffline(false, null)
}
}
I was recently also looking to clean up my jenkins workspaces, but with a little twist: I wanted to only remove workspaces from jobs that no longer exist. This is because jenkins does not get rid of workspaces when deleting a job, which is pretty annoying.
And we only use a master at the moment, no separate nodes.
I found a script somewhere (can't find the link anymore) but tweaked it a bit for our usage, putting it in a jenkins job with an 'Execute system Groovy script' build step, running daily:
import hudson.FilePath
import jenkins.model.Jenkins
import hudson.model.Job
def deleteUnusedWorkspace(FilePath root, String path) {
root.list().sort{child->child.name}.each { child ->
String fullName = path + child.name
def item = Jenkins.instance.getItemByFullName(fullName);
println "Checking '$fullName'"
try{
if (item.class.canonicalName == 'com.cloudbees.hudson.plugins.folder.Folder') {
println "-> going deeper into the folder"
deleteUnusedWorkspace(root.child(child.name), "$fullName/")
} else if (item == null) {
// this code is never reached, non-existing projects generate an exception
println "Deleting (no such job): '$fullName'"
child.deleteRecursive()
} else if (item instanceof Job && !item.isBuildable()) {
// don't remove the workspace for disabled jobs!
//println "Deleting (job disabled): '$fullName'"
//child.deleteRecursive()
}
} catch (Exception exc) {
println " Exception happened: " + exc.message
println " So we delete '" + child + "'!"
child.deleteRecursive()
}
}
}
println "Beginning of cleanup script."
// loop over possible slaves
for (node in Jenkins.instance.nodes) {
println "Processing $node.displayName"
def workspaceRoot = node.rootPath.child("workspace");
deleteUnusedWorkspace(workspaceRoot, "")
}
// do the master itself
deleteUnusedWorkspace(Jenkins.instance.rootPath.child("workspace"), "")
println "Script has completed."
Might need some individual tweaking though.
Obviously you should run this script with all delete statements commented out first, and make sure you have a backup before doing an actual run.
It sounds like you are looking for a "delete workspace when disabling build" solution. You could write a Hudson plugin to do this. Which is probably overkill.
If I had to do this (which I wouldn't as we don't have a disk space shortage), I would write a unit script to find all disabled jobs under the hudson directory. A job is represented by an XML file. Then I'd have the script delete the workspace for any matches. And I'd probably set it up in cron so it runs nightly or weekly or whatever is appropriate in the environment.

Exceeded maximum execution time in Google Apps Script [duplicate]

My Google Apps Script is iterating through the user's Google Drive files and copying and sometimes moving files to other folders. The script is always stopped after certain minutes with no error message in the log.
EDITOR's NOTE: The time limit have varied over the time and might vary between "consumer" (free) and "Workspace" (paid) accounts but as of December 2022 most of the answers are still valid.
I am sorting tens or sometimes thousands files in one run.
Are there any settings or workarounds?
One thing you could do (this of course depends on what you are trying to accomplish) is:
Store the necessary information (i.e. like a loop counter) in a spreadsheet or another permanent store(i.e. ScriptProperties).
Have your script terminate every five minutes or so.
Set up a time driven trigger to run the script every five minutes(or create a trigger programmatically using the Script service).
On each run read the saved data from the permanent store you've used and continue to run the script from where it left off.
This is not a one-size-fit-all solution, if you post your code people would be able to better assist you.
Here is a simplified code excerpt from a script that I use every day:
function runMe() {
var startTime= (new Date()).getTime();
//do some work here
var scriptProperties = PropertiesService.getScriptProperties();
var startRow= scriptProperties.getProperty('start_row');
for(var ii = startRow; ii <= size; ii++) {
var currTime = (new Date()).getTime();
if(currTime - startTime >= MAX_RUNNING_TIME) {
scriptProperties.setProperty("start_row", ii);
ScriptApp.newTrigger("runMe")
.timeBased()
.at(new Date(currTime+REASONABLE_TIME_TO_WAIT))
.create();
break;
} else {
doSomeWork();
}
}
//do some more work here
}
NOTE#1: The variable REASONABLE_TIME_TO_WAIT should be large enough for the new trigger to fire. (I set it to 5 minutes but I think it could be less than that).
NOTE#2: doSomeWork() must be a function that executes relatively quick( I would say less than 1 minute ).
NOTE#3 : Google has deprecated Script Properties, and introduced Properties Service in its stead. The function has been modified accordingly.
NOTE#4: 2nd time when the function is called, it takes the ith value of for loop as a string. so you have to convert it into an integer
Quotas
The maximum execution time for a single script is 6 mins / execution
- https://developers.google.com/apps-script/guides/services/quotas
But there are other limitations to familiarize yourself with. For example, you're only allowed a total trigger runtime of 1 hour / day, so you can't just break up a long function into 12 different 5 minute blocks.
Optimization
That said, there are very few reasons why you'd really need to take six minutes to execute. JavaScript should have no problem sorting thousands of rows of data in a couple seconds. What's likely hurting your performance are service calls to Google Apps itself.
You can write scripts to take maximum advantage of the built-in caching, by minimizing the number of reads and writes. Alternating read and write commands is slow. To speed up a script, read all data into an array with one command, perform any operations on the data in the array, and write the data out with one command.
- https://developers.google.com/apps-script/best_practices
Batching
The best thing you can possibly do is reduce the number of service calls. Google enables this by allowing batch versions of most of their API calls.
As a trivial example, Instead of this:
for (var i = 1; i <= 100; i++) {
SpreadsheetApp.getActiveSheet().deleteRow(i);
}
Do this:
SpreadsheetApp.getActiveSheet().deleteRows(i, 100);
In the first loop, not only did you need 100 calls to deleteRow on the sheet, but you also needed to get the active sheet 100 times as well. The second variation should perform several orders of magnitude better than the first.
Interweaving Reads and Writes
Additionally, you should also be very careful to not go back and forth frequently between reading and writing. Not only will you lose potential gains in batch operations, but Google won't be able to use its built-in caching.
Every time you do a read, we must first empty (commit) the write cache to ensure that you're reading the latest data (you can force a write of the cache by calling SpreadsheetApp.flush()). Likewise, every time you do a write, we have to throw away the read cache because it's no longer valid. Therefore if you can avoid interleaving reads and writes, you'll get full benefit of the cache.
- http://googleappsscript.blogspot.com/2010/06/optimizing-spreadsheet-operations.html
For example, instead of this:
sheet.getRange("A1").setValue(1);
sheet.getRange("B1").setValue(2);
sheet.getRange("C1").setValue(3);
sheet.getRange("D1").setValue(4);
Do this:
sheet.getRange("A1:D1").setValues([[1,2,3,4]]);
Chaining Function Calls
As a last resort, if your function really can't finish in under six minutes, you can chain together calls or break up your function to work on a smaller segment of data.
You can store data in the Cache Service (temporary) or Properties Service (permanent) buckets for retrieval across executions (since Google Apps Scripts has a stateless execution).
If you want to kick off another event, you can create your own trigger with the Trigger Builder Class or setup a recurring trigger on a tight time table.
Also, try to minimize the amount of calls to google services. For example, if you want to change a range of cells in the spreadsheets, don't read each one, mutate it and store it back.
Instead read the whole range (using Range.getValues()) into memory, mutate it and store all of it at once (using Range.setValues()).
This should save you a lot of execution time.
Anton Soradoi's answer seems OK but consider using Cache Service instead of storing data into a temporary sheet.
function getRssFeed() {
var cache = CacheService.getPublicCache();
var cached = cache.get("rss-feed-contents");
if (cached != null) {
return cached;
}
var result = UrlFetchApp.fetch("http://example.com/my-slow-rss-feed.xml"); // takes 20 seconds
var contents = result.getContentText();
cache.put("rss-feed-contents", contents, 1500); // cache for 25 minutes
return contents;
}
Also note that as of April 2014 the limitation of script runtime is 6 minutes.
G Suite Business / Enterprise / Education and Early Access users:
As of August 2018, max script runtime is now set to 30 minutes for these users.
Figure out a way to split up your work so it takes less than 6 minutes, as that's the limit for any script. On the first pass, you can iterate and store the list of files and folders in a spreadsheet and add a time-driven trigger for part 2.
In part 2, delete each entry in the list as you process it. When there are no items in the list, delete the trigger.
This is how I'm processing a sheet of about 1500 rows that gets spread to about a dozen different spreadsheets. Because of the number of calls to spreadsheets, it times out, but continues when the trigger runs again.
I have used the ScriptDB to save my place while processing a large amount of information in a loop. The script can/does exceed the 5 minute limit. By updating the ScriptDb during each run, the script can read the state from the db and pick up where it left off until all processing is complete. Give this strategy a try and I think you'll be pleased with the results.
If you are using G Suite Business or Enterprise edition.
You can register early access for App Maker after App maker enabled your script run runtime will increase run time from 6 minutes to 30 minutes :)
More details about app maker Click here
Here's an approach based very heavily on Dmitry Kostyuk's absolutely excellent article on the subject.
It differs in that it doesn't attempt to time execution and exit gracefully. Rather, it deliberately spawns a new thread every minute, and lets them run until they are timed out by Google. This gets round the maximum execution time limit, and speeds things up by running processing in several threads in parallel. (This speeds things up even if you are not hitting execution time limits.)
It tracks the task status in script properties, plus a semaphore to ensure no two threads are editing the task status at any one time. (It uses several properties as they are limited to 9k each.)
I have tried to mimick the Google Apps Script iterator.next() API, but cannot use iterator.hasNext() as that would not be thread-safe (see TOCTOU). It uses a couple of facade classes at the bottom.
I would be immensely grateful for any suggestions. This is working well for me, halving the processing time by spawning three parallel threads to run through a directory of documents. You could spawn 20 within quota, but this was ample for my use case.
The class is designed to be drop-in, usable for any purpose without modification. The only thing the user must do is when processing a file, delete any outputs from prior, timed out attempts. The iterator will return a given fileId more than once if a processing task is timed out by Google before it completes.
To silence the logging, it all goes through the log() function at the bottom.
This is how you use it:
const main = () => {
const srcFolder = DriveApp.getFoldersByName('source folder',).next()
const processingMessage = processDocuments(srcFolder, 'spawnConverter')
log('main() finished with message', processingMessage)
}
const spawnConverter = e => {
const processingMessage = processDocuments()
log('spawnConverter() finished with message', processingMessage)
}
const processDocuments = (folder = null, spawnFunction = null) => {
// folder and spawnFunction are only passed the first time we trigger this function,
// threads spawned by triggers pass nothing.
// 10,000 is the maximum number of milliseconds a file can take to process.
const pfi = new ParallelFileIterator(10000, MimeType.GOOGLE_DOCS, folder, spawnFunction)
let fileId = pfi.nextId()
const doneDocs = []
while (fileId) {
const fileRelativePath = pfi.getFileRelativePath(fileId)
const doc = DocumentApp.openById(fileId)
const mc = MarkupConverter(doc)
// This is my time-consuming task:
const mdContent = mc.asMarkdown(doc)
pfi.completed(fileId)
doneDocs.push([...fileRelativePath, doc.getName() + '.md'].join('/'))
fileId = pfi.nextId()
}
return ('This thread did:\r' + doneDocs.join('\r'))
}
Here's the code:
const ParallelFileIterator = (function() {
/**
* Scans a folder, depth first, and returns a file at a time of the given mimeType.
* Uses ScriptProperties so that this class can be used to process files by many threads in parallel.
* It is the responsibility of the caller to tidy up artifacts left behind by processing threads that were timed out before completion.
* This class will repeatedly dispatch a file until .completed(fileId) is called.
* It will wait maxDurationOneFileMs before re-dispatching a file.
* Note that Google Apps kills scripts after 6 mins, or 30 mins if you're using a Workspace account, or 45 seconds for a simple trigger, and permits max 30
* scripts in parallel, 20 triggers per script, and 90 mins or 6hrs of total trigger runtime depending if you're using a Workspace account.
* Ref: https://developers.google.com/apps-script/guides/services/quotas
maxDurationOneFileMs, mimeType, parentFolder=null, spawnFunction=null
* #param {Number} maxDurationOneFileMs A generous estimate of the longest a file can take to process.
* #param {string} mimeType The mimeType of the files required.
* #param {Folder} parentFolder The top folder containing all the files to process. Only passed in by the first thread. Later spawned threads pass null (the files have already been listed and stored in properties).
* #param {string} spawnFunction The name of the function that will spawn new processing threads. Only passed in by the first thread. Later spawned threads pass null (a trigger can't create a trigger).
*/
class ParallelFileIterator {
constructor(
maxDurationOneFileMs,
mimeType,
parentFolder = null,
spawnFunction = null,
) {
log(
'Enter ParallelFileIterator constructor',
maxDurationOneFileMs,
mimeType,
spawnFunction,
parentFolder ? parentFolder.getName() : null,
)
// singleton
if (ParallelFileIterator.instance) return ParallelFileIterator.instance
if (parentFolder) {
_cleanUp()
const t0 = Now.asTimestamp()
_getPropsLock(maxDurationOneFileMs)
const t1 = Now.asTimestamp()
const { fileIds, fileRelativePaths } = _catalogFiles(
parentFolder,
mimeType,
)
const t2 = Now.asTimestamp()
_setQueues(fileIds, [])
const t3 = Now.asTimestamp()
this.fileRelativePaths = fileRelativePaths
ScriptProps.setAsJson(_propsKeyFileRelativePaths, fileRelativePaths)
const t4 = Now.asTimestamp()
_releasePropsLock()
const t5 = Now.asTimestamp()
if (spawnFunction) {
// only triggered on the first thread
const trigger = Trigger.create(spawnFunction, 1)
log(
`Trigger once per minute: UniqueId: ${trigger.getUniqueId()}, EventType: ${trigger.getEventType()}, HandlerFunction: ${trigger.getHandlerFunction()}, TriggerSource: ${trigger.getTriggerSource()}, TriggerSourceId: ${trigger.getTriggerSourceId()}.`,
)
}
log(
`PFI instantiated for the first time, has found ${
fileIds.length
} documents to process. getPropsLock took ${t1 -
t0}ms, _catalogFiles took ${t2 - t1}ms, setQueues took ${t3 -
t2}ms, setAsJson took ${t4 - t3}ms, releasePropsLock took ${t5 -
t4}ms, trigger creation took ${Now.asTimestamp() - t5}ms.`,
)
} else {
const t0 = Now.asTimestamp()
// wait for first thread to set up Properties
while (!ScriptProps.getJson(_propsKeyFileRelativePaths)) {
Utilities.sleep(250)
}
this.fileRelativePaths = ScriptProps.getJson(_propsKeyFileRelativePaths)
const t1 = Now.asTimestamp()
log(
`PFI instantiated again to run in parallel. getJson(paths) took ${t1 -
t0}ms`,
)
spawnFunction
}
_internals.set(this, { maxDurationOneFileMs: maxDurationOneFileMs })
// to get: _internal(this, 'maxDurationOneFileMs')
ParallelFileIterator.instance = this
return ParallelFileIterator.instance
}
nextId() {
// returns false if there are no more documents
const maxDurationOneFileMs = _internals.get(this).maxDurationOneFileMs
_getPropsLock(maxDurationOneFileMs)
let { pending, dispatched } = _getQueues()
log(
`PFI.nextId: ${pending.length} files pending, ${
dispatched.length
} dispatched, ${Object.keys(this.fileRelativePaths).length -
pending.length -
dispatched.length} completed.`,
)
if (pending.length) {
// get first pending Id, (ie, deepest first)
const nextId = pending.shift()
dispatched.push([nextId, Now.asTimestamp()])
_setQueues(pending, dispatched)
_releasePropsLock()
return nextId
} else if (dispatched.length) {
log(`PFI.nextId: Get first dispatched Id, (ie, oldest first)`)
let startTime = dispatched[0][1]
let timeToTimeout = startTime + maxDurationOneFileMs - Now.asTimestamp()
while (dispatched.length && timeToTimeout > 0) {
log(
`PFI.nextId: None are pending, and the oldest dispatched one hasn't yet timed out, so wait ${timeToTimeout}ms to see if it will`,
)
_releasePropsLock()
Utilities.sleep(timeToTimeout + 500)
_getPropsLock(maxDurationOneFileMs)
;({ pending, dispatched } = _getQueues())
if (pending && dispatched) {
if (dispatched.length) {
startTime = dispatched[0][1]
timeToTimeout =
startTime + maxDurationOneFileMs - Now.asTimestamp()
}
}
}
// We currently still have the PropsLock
if (dispatched.length) {
const nextId = dispatched.shift()[0]
log(
`PFI.nextId: Document id ${nextId} has timed out; reset start time, move to back of queue, and re-dispatch`,
)
dispatched.push([nextId, Now.asTimestamp()])
_setQueues(pending, dispatched)
_releasePropsLock()
return nextId
}
}
log(`PFI.nextId: Both queues empty, all done!`)
;({ pending, dispatched } = _getQueues())
if (pending.length || dispatched.length) {
log(
"ERROR: All documents should be completed, but they're not. Giving up.",
pending,
dispatched,
)
}
_cleanUp()
return false
}
completed(fileId) {
_getPropsLock(_internals.get(this).maxDurationOneFileMs)
const { pending, dispatched } = _getQueues()
const newDispatched = dispatched.filter(el => el[0] !== fileId)
if (dispatched.length !== newDispatched.length + 1) {
log(
'ERROR: A document was completed, but not found in the dispatched list.',
fileId,
pending,
dispatched,
)
}
if (pending.length || newDispatched.length) {
_setQueues(pending, newDispatched)
_releasePropsLock()
} else {
log(`PFI.completed: Both queues empty, all done!`)
_cleanUp()
}
}
getFileRelativePath(fileId) {
return this.fileRelativePaths[fileId]
}
}
// ============= PRIVATE MEMBERS ============= //
const _propsKeyLock = 'PropertiesLock'
const _propsKeyDispatched = 'Dispatched'
const _propsKeyPending = 'Pending'
const _propsKeyFileRelativePaths = 'FileRelativePaths'
// Not really necessary for a singleton, but in case code is changed later
var _internals = new WeakMap()
const _cleanUp = (exceptProp = null) => {
log('Enter _cleanUp', exceptProp)
Trigger.deleteAll()
if (exceptProp) {
ScriptProps.deleteAllExcept(exceptProp)
} else {
ScriptProps.deleteAll()
}
}
const _catalogFiles = (folder, mimeType, relativePath = []) => {
// returns IDs of all matching files in folder, depth first
log(
'Enter _catalogFiles',
folder.getName(),
mimeType,
relativePath.join('/'),
)
let fileIds = []
let fileRelativePaths = {}
const folders = folder.getFolders()
let subFolder
while (folders.hasNext()) {
subFolder = folders.next()
const results = _catalogFiles(subFolder, mimeType, [
...relativePath,
subFolder.getName(),
])
fileIds = fileIds.concat(results.fileIds)
fileRelativePaths = { ...fileRelativePaths, ...results.fileRelativePaths }
}
const files = folder.getFilesByType(mimeType)
while (files.hasNext()) {
const fileId = files.next().getId()
fileIds.push(fileId)
fileRelativePaths[fileId] = relativePath
}
return { fileIds: fileIds, fileRelativePaths: fileRelativePaths }
}
const _getQueues = () => {
const pending = ScriptProps.getJson(_propsKeyPending)
const dispatched = ScriptProps.getJson(_propsKeyDispatched)
log('Exit _getQueues', pending, dispatched)
// Note: Empty lists in Javascript are truthy, but if Properties have been deleted by another thread they'll be null here, which are falsey
return { pending: pending || [], dispatched: dispatched || [] }
}
const _setQueues = (pending, dispatched) => {
log('Enter _setQueues', pending, dispatched)
ScriptProps.setAsJson(_propsKeyPending, pending)
ScriptProps.setAsJson(_propsKeyDispatched, dispatched)
}
const _getPropsLock = maxDurationOneFileMs => {
// will block until lock available or lock times out (because a script may be killed while holding a lock)
const t0 = Now.asTimestamp()
while (
ScriptProps.getNum(_propsKeyLock) + maxDurationOneFileMs >
Now.asTimestamp()
) {
Utilities.sleep(2000)
}
ScriptProps.set(_propsKeyLock, Now.asTimestamp())
log(`Exit _getPropsLock: took ${Now.asTimestamp() - t0}ms`)
}
const _releasePropsLock = () => {
ScriptProps.delete(_propsKeyLock)
log('Exit _releasePropsLock')
}
return ParallelFileIterator
})()
const log = (...args) => {
// easier to turn off, json harder to read but easier to hack with
console.log(args.map(arg => JSON.stringify(arg)).join(';'))
}
class Trigger {
// Script triggering facade
static create(functionName, everyMinutes) {
return ScriptApp.newTrigger(functionName)
.timeBased()
.everyMinutes(everyMinutes)
.create()
}
static delete(e) {
if (typeof e !== 'object') return log(`${e} is not an event object`)
if (!e.triggerUid)
return log(`${JSON.stringify(e)} doesn't have a triggerUid`)
ScriptApp.getProjectTriggers().forEach(trigger => {
if (trigger.getUniqueId() === e.triggerUid) {
log('deleting trigger', e.triggerUid)
return ScriptApp.delete(trigger)
}
})
}
static deleteAll() {
// Deletes all triggers in the current project.
var triggers = ScriptApp.getProjectTriggers()
for (var i = 0; i < triggers.length; i++) {
ScriptApp.deleteTrigger(triggers[i])
}
}
}
class ScriptProps {
// properties facade
static set(key, value) {
if (value === null || value === undefined) {
ScriptProps.delete(key)
} else {
PropertiesService.getScriptProperties().setProperty(key, value)
}
}
static getStr(key) {
return PropertiesService.getScriptProperties().getProperty(key)
}
static getNum(key) {
// missing key returns Number(null), ie, 0
return Number(ScriptProps.getStr(key))
}
static setAsJson(key, value) {
return ScriptProps.set(key, JSON.stringify(value))
}
static getJson(key) {
return JSON.parse(ScriptProps.getStr(key))
}
static delete(key) {
PropertiesService.getScriptProperties().deleteProperty(key)
}
static deleteAll() {
PropertiesService.getScriptProperties().deleteAllProperties()
}
static deleteAllExcept(key) {
PropertiesService.getScriptProperties()
.getKeys()
.forEach(curKey => {
if (curKey !== key) ScriptProps.delete(key)
})
}
}
If you're a business customer, you can now sign up for Early Access to App Maker, which includes Flexible Quotas.
Under the flexible quota system, such hard quota limits are removed. Scripts do not stop when they reach a quota limit. Rather, they are delayed until quota becomes available, at which point the script execution resumes. Once quotas begin being used, they are refilled at a regular rate. For reasonable usage, script delays are rare.
If you are using G Suite as a Business, Enterprise or EDU customer the execution time for running scripts is set to:
30 min / execution
See: https://developers.google.com/apps-script/guides/services/quotas
The idea would be to exit gracefully from the script, save your progress, create a trigger to start again from where you left off, repeat as many times as necessary and then once finished clean up the trigger and any temporary files.
Here is a detailed article on this very topic.
As many people mentioned, the generic solution to this problem is to execute your method across multiple sessions. I found it to be a common problem that I have a bunch of iterations I need to loop over, and I don't want the hassle of writing/maintaining the boilerplate of creating new sessions.
Therefore I created a general solution:
/**
* Executes the given function across multiple sessions to ensure there are no timeouts.
*
* See https://stackoverflow.com/a/71089403.
*
* #param {Int} items - The items to iterate over.
* #param {function(Int)} fn - The function to execute each time. Takes in an item from `items`.
* #param {String} resumeFunctionName - The name of the function (without arguments) to run between sessions. Typically this is the same name of the function that called this method.
* #param {Int} maxRunningTimeInSecs - The maximum number of seconds a script should be able to run. After this amount, it will start a new session. Note: This must be set to less than the actual timeout as defined in https://developers.google.com/apps-script/guides/services/quotas (e.g. 6 minutes), otherwise it can't set up the next call.
* #param {Int} timeBetweenIterationsInSeconds - The amount of time between iterations of sessions. Note that Google Apps Script won't honor this 100%, as if you choose a 1 second delay, it may actually take a minute or two before it actually executes.
*/
function iterateAcrossSessions(items, fn, resumeFunctionName, maxRunningTimeInSeconds = 5 * 60, timeBetweenIterationsInSeconds = 1) {
const PROPERTY_NAME = 'iterateAcrossSessions_index';
let scriptProperties = PropertiesService.getScriptProperties();
let startTime = (new Date()).getTime();
let startIndex = parseInt(scriptProperties.getProperty(PROPERTY_NAME));
if (Number.isNaN(startIndex)) {
startIndex = 0;
}
for (let i = startIndex; i < items.length; i++) {
console.info(`[iterateAcrossSessions] Executing for i = ${i}.`)
fn(items[i]);
let currentTime = (new Date()).getTime();
let elapsedTime = currentTime - startTime;
let maxRunningTimeInMilliseconds = maxRunningTimeInSeconds * 1000;
if (maxRunningTimeInMilliseconds <= elapsedTime) {
let newTime = new Date(currentTime + timeBetweenIterationsInSeconds * 1000);
console.info(`[iterateAcrossSessions] Creating new session for i = ${i+1} at ${newTime}, since elapsed time was ${elapsedTime}.`);
scriptProperties.setProperty(PROPERTY_NAME, i+1);
ScriptApp.newTrigger(resumeFunctionName).timeBased().at(newTime).create();
return;
}
}
console.log(`[iterateAcrossSessions] Done iterating over items.`);
// Reset the property here to ensure that the execution loop could be restarted.
scriptProperties.deleteProperty(PROPERTY_NAME);
}
You can now use this pretty easily like so:
let ITEMS = ['A', 'B', 'C'];
function execute() {
iterateAcrossSessions(
ITEMS,
(item) => {
console.log(`Hello world ${item}`);
},
"execute");
}
It'll automatically execute the internal lambda for each value in ITEMS, seamlessly spreading across sessions as needed.
For example, if you use a 0-second maxRunningTime it would run across 4 sessions with the following outputs:
[iterateAcrossSessions] Executing for i = 0.
Hello world A
[iterateAcrossSessions] Creating new session for i = 1.
[iterateAcrossSessions] Executing for i = 1.
Hello world B
[iterateAcrossSessions] Creating new session for i = 2.
[iterateAcrossSessions] Executing for i = 2.
Hello world C
[iterateAcrossSessions] Creating new session for i = 3.
[iterateAcrossSessions] Done iterating over items.