I have a sheet with ~18k rows in. I have written a script to divide the number of rows by the number of users who want to get involved in calculating values in the rows in the sheet and then allocating those rows to those users. A collaborative effort to speed things up as each row takes approximately 1s and it takes ~ 6-9 hours to go through it on my own with network issues etc.
When a user opens the sheet, they are allocated a user number and a chunk of rows to work on with 5 users 18k rows breaks down to 3600 rows each.
The issue is that when more than 1 or 2 users are working on the sheet, the performance becomes erratic. Sometimes a row takes 15-20s to be processed. I am saving nothing by having friends share the task.
I read somewhere that only 100 users can work on a sheet at the same time but here I am talking about 5 (up to a maximum of maybe 10 or 11) people working on the sheet, each running maybe 5-8 functions each on a trigger. Right now I have 5 users connected to the sheet, each running 8 functions and the work has slowed almost to a stop.
Does anyone have any experience with this and know of any limits google place on accounts working on a script? Any way to work with apps script to make this work properly?
Thanks for your insights!
Chad
/*NEW CODE WITH TRIGGER - WE START IT UP WITH A CALL TO runTriggersTwoTimes()*/
function runTriggersTwoTimes() {
createSplitWorkTrigger();
ScriptApp.newTrigger('createSplitWorkTrigger')
.timeBased()
.after(80 * 60 * 1000)
.create();
}
function createSplitWorkTrigger() {
ScriptApp.newTrigger('splitWork2')
.timeBased()
.everyMinutes(5)
.create();
ScriptApp.newTrigger('deleteAllTriggers')
.timeBased()
.after(60 * 60 * 1000)
.create();
}
/*This function splits the work of getting the "From" prices between multiple accounts*/
function splitWork2() {
var accountsArray = [user1, user2, user3, user4, user5#gmail.com];
var numberAccounts = accountsArray.length;
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheet = spreadsheet.getSheetByName("CK Formatted");
var RANGE = sheet.getDataRange();
var rangeVals = RANGE.getValues();
var numberRows = rangeVals.length;
var totalChunkSize = Math.floor(numberRows / numberAccounts);
var userName = Session.getActiveUser().getEmail();
var userNumber;
for (i = 0; i < numberAccounts; i++) {
if (userName == accountsArray[i]) {
userNumber = i;
}
}
var usersStartRow = userNumber * totalChunkSize + 1
var usersLastRow = usersStartRow + totalChunkSize - 1;
if (userName == accountsArray[numberAccounts - 1]) {
usersLastRow = numberRows;
}
// This one does the main work
findFromPricesByChunks2(userNumber, usersStartRow, usersLastRow, 110, totalChunkSize);
}
/*This function adds the "From" prices in chunks*/
function findFromPricesByChunks2(userNumber, startRow, lastRow, chunkSize, totalChunkSize, checkingRound = false) {
if (startRow >= lastRow) {
var allTriggers = ScriptApp.getProjectTriggers();
for (var i = 0; i < allTriggers.length; i++) {
ScriptApp.deleteTrigger(allTriggers[i]);
}
return;
}
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheet = spreadsheet.getSheetByName("CK Formatted");
var RANGE = sheet.getDataRange();
var rangeVals = RANGE.getValues();
Logger.log(rangeVals.length);
var addedPrices = 0;
for (var i = startRow; i <= lastRow; i++) {
if (addedPrices == chunkSize) {
return;
}
var cellValue = sheet.getRange(`A${i + 1}`).getValue();
Logger.log('Cell Value:' + cellValue);
if (cellValue == '') {
Utilities.sleep(1000);
var html = UrlFetchApp.fetch(URL).getContentText();
var $ = Cheerio.load(html);
var s = $('#table .price-container .font-weight-bold').first().text();
// if we fail to get a value because of network issues or whatever this next piece is going ahead and putting a full stop in what should be an empty box. WIll think about this
if (s) {
s = s.replace(".", ",");
s = s.substring(0, s.lastIndexOf(',')) + '.' + s.substring(s.lastIndexOf(',') + 1);
}
fromArrayCell = sheet.getRange(`A${i + 1}`);
fromArrayCell.setValue(s);
addedPrices++;
}
}
var sheetCk = spreadsheet.getSheetByName("CK");
// This adds the amount of processed items to the processed rows counter (because now there are multiple workers working
// at the same time)
if (!checkingRound) {
sheetCk.getRange(userNumber + 2, 9).setValue(sheetCk.getRange(userNumber + 2, 9).getValue() + addedPrices);
} else {
sheetCk.getRange(userNumber + 2, 10).setValue(sheetCk.getRange(userNumber + 2, 10).getValue() + addedPrices);
}
}
function deleteAllTriggers() {
var allTriggers = ScriptApp.getProjectTriggers();
for (var i = 0; i < allTriggers.length; i++) {
ScriptApp.deleteTrigger(allTriggers[i]);
}
}
The single most important thing to come out of this post was the limit of 30 executions which I was told before I posted any code :p I didn't know about that and it was the crux of the problem I was having. Bear in mind that the original code (a few versions back) was trying to cram 13 users in with 8 processes each :)
After drifting for a few days I had a good sleep and came up with this new improved version of the above:
function splitWork() {
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheetCk = spreadsheet.getSheetByName("CK");
var sheet = spreadsheet.getSheetByName("CK Formatted");
var RANGE = sheet.getDataRange();
var rangeVals = RANGE.getValues();
var userName = Session.getActiveUser().getEmail();
Logger.log(userName)
var accountsArray = [gmail1, gmail2, gmail3, gmail4, gmail5];
var numberRows = rangeVals.length;
var numberAccounts = accountsArray.length;
var totalChunkSize = Math.floor(numberRows / numberAccounts);
var userNumber;
for (i = 0; i < numberAccounts; i++) {
if (userName == accountsArray[i]) {
userNumber = i;
}
}
var usersStartRow = (userNumber * totalChunkSize) + 1; // = 1
var usersLastRow = usersStartRow + totalChunkSize - 1; // = 211
if(userName == accountsArray[numberAccounts - 1]) { usersLastRow = numberRows; }
var completeRowsOriginal = sheetCk.getRange(userNumber + 2, 9).getValue() // = 109
usersStartRow = usersStartRow + completeRowsOriginal // = 110
for (var i = usersStartRow; i <= usersLastRow; i++) { // 110 - 211
var cellValue = sheet.getRange(`A${i + 1}`).getValue();
var completeRows = 0
if (cellValue == '') {
Utilities.sleep(1000);
var html = UrlFetchApp.fetch(URL).getContentText();
var $ = Cheerio.load(html);
var fromArrayCell = sheet.getRange(`A${i + 1}`);
if(sheetCk.getRange(userNumber + 2, 10).getValue() != 1){
var s = $('#table .price-container .font-weight-bold').first().text();
}
else if(sheetCk.getRange(userNumber + 2, 10).getValue() == 1) {
var s = $('#table .price-container .font-weight-bold').first().text();
if(!s) {
var s = $('#tabContent-info .info-list-container dd:nth-child(12)').text();
if(s) { fromArrayCell.setBackground('#ff8680'); }
}
}
if (s) {
s = s.replace(".", ",");
s = s.substring(0, s.lastIndexOf(',')) + '.' + s.substring(s.lastIndexOf(',') + 1);
}
fromArrayCell.setValue(s);
completeRows++
sheetCk.getRange(userNumber + 2, 9).setValue(sheetCk.getRange(userNumber + 2, 9).getValue() + 1)
}
if(completeRows >= 100 || usersStartRow + completeRows >= usersLastRow) {break}
}
if(sheetCk.getRange(userNumber + 2, 9).getValue() + usersStartRow >= usersLastRow) {
if(sheetCk.getRange(userNumber + 2, 10).getValue() != 1) {
sheetCk.getRange(userNumber + 2, 9).setValue(0)
sheetCk.getRange(userNumber + 2, 10).setValue(1)
}
}
else { deleteAllTriggers() }
}
Which works like a charm.
A user opens the sheet, is allocated a chunk in the background, presses a button to start a trigger to work on their chunk every 5 minutes and if they are feeling frisky they can run the function above while waiting for the trigger to start (also from a button). They'll scrape values for 100 rows then wait for another trigger. After they have gone through their whole chunk their counter resets and their master counter is set to 1. They then make a second pass over their chunk filling in any gaps due to network errors. If they can't get a value from the regular place on the second pass they can get a value from another place but it is marked with a red background as it is not reliable.
Don't think there is anything else left to say about this one. Try it yourselves if you have a large sheet of data to churn through and want to do it in less time with help from your friends with google accounts.
PS: CK is the sheet which is used to hold counters only
PPS!! If you literally do it as above, you will manage to process about 1 row per second. The real power coms from getting all the helper account to make their own copy of the main sheet and work on that. Here we'd get 13 rows per second processed, for instance. Then it's a simple matter to recombine everything into single sheet when they're done.
Related
I am facing an issue like many before with regards to a timeout out Google Apps Script, I am reading the data from a indexed/persisted table in a MySQL Database, the table in question has 71 columns and a total of 28000 rows, the sheet in google sheets I am writing to has no calculations etc on it which might slow things down - those happen on other sheets.
Please can you review the below that I am using and propose any changes to assist in avoiding the time out?
var server = 'xx.xx.xx.xxx';
var port = xxxx;
var dbName = 'test';
var username = 'test';
var password = 'xxx';
var url = 'jdbc:mysql://'+server+':'+port+'/'+dbName;
function readDataPast() {
var conn = Jdbc.getConnection(url, username, password);
var stmt = conn.createStatement();
var results = stmt.executeQuery('SELECT * FROM test.test_table');
var metaData = results.getMetaData();
var numCols = metaData.getColumnCount();
var spreadsheet = SpreadsheetApp.getActive();
var sheet = spreadsheet.getSheetByName('Raw_Data');
sheet.clearContents();
var arr = [];
let row = [];
for (var col = 0; col < numCols; col++) {
row.push(metaData.getColumnName(col + 1));
}
arr.push(row);
while (results.next()) {
row = [];
for (var col = 0; col < numCols; col++) {
row.push(results.getString(col + 1));
}
arr.push(row)
}
sheet.getRange(1, 1, arr.length, arr[0].length).setValues(arr);
results.close();
stmt.close();
}
Issue:
I don't think the script can be made considerably faster, since potential improvements (e.g. using Sheets API as suggested by Ninca Tirtil) don't affect significatively the bulk of the script (iterating through 28000 rows).
Workaround:
Therefore, instead of trying to speed it up, I'd suggest accomplishing this in multiple executions. To that goal, I'd do the following:
Check execution time after each iteration. If this time is close to the time limit, end the loop and write current data to the sheet. You can use the Date object for this.
Create the following time-based trigger at the end of your function: after(durationMilliseconds). Thanks to this, the function will fire automatically after the amount of milliseconds you indicate. After each execution, a trigger will be created to fire the next execution.
Because you want to split the loop, you have to store the row index somewhere (you could use PropertiesService at the end of each execution, for example) and retrieve it at the beginning of the next, so that in each successive execution, the script resumes the loop where it left it. You can get the row index via getRow(), and then move to that row in the next execution via relative(rows).
Code sample:
var maxTimeDiff = 1000 * 60 * 5; // 5 minutes
const PROPERTY_KEY = "Row index";
function setRowIndex(rowIndex) {
const scriptProps = PropertiesService.getScriptProperties();
scriptProps.setProperty(PROPERTY_KEY, rowIndex);
}
function getRowIndex() {
const scriptProps = PropertiesService.getScriptProperties();
const rowIndex = scriptProps.getProperty(PROPERTY_KEY);
return rowIndex;
}
function createTrigger() {
ScriptApp.newTrigger("readDataPast")
.timeBased()
.after(60 * 1000) // Next execution after a minute
.create();
}
function readDataPast() {
var startTime = new Date();
var conn = Jdbc.getConnection(url, username, password);
var stmt = conn.createStatement();
var results = stmt.executeQuery('SELECT * FROM test.test_table');
var spreadsheet = SpreadsheetApp.getActive();
var sheet = spreadsheet.getSheetByName('Raw_Data');
var rowIndex = getRowIndex();
var arr = [];
let row = [];
if (!rowIndex || rowIndex == 0) { // Clear sheet and add metadata if first execution
sheet.clearContents();
var metaData = results.getMetaData();
var numCols = metaData.getColumnCount();
for (var col = 0; col < numCols; col++) {
row.push(metaData.getColumnName(col + 1));
}
arr.push(row);
} else {
results.relative(rowIndex); // Move to current row
}
while (results.next()) {
row = [];
for (var col = 0; col < numCols; col++) {
row.push(results.getString(col + 1));
}
arr.push(row);
if (new Date() - startTime > maxTimeDiff) break; // End iteration if long time
}
var currentRow = results.getRow(); // 0 if all rows have been iterated
setRowIndex(currentRow);
var lastRow = sheet.getLastRow();
sheet.getRange(lastRow + 1, 1, arr.length, arr[0].length).setValues(arr);
results.close();
stmt.close();
if (currentRow) createTrigger(); // Create trigger if iteration is not finished
}
The code below works but requires too much time to work. In fact, the script cannot finish the code, since the time to process the script has passed several times.
I gladly received help to fine-tune this code
The script was created to block (grouped) rows, if a value is entered in column E. This code is activated every 24 hours as a trigger.
Objective: The worksheet with 1000 rows, is accessible to 250 people, whether or not registered with a google account, and serves to record recreational tennis matches.
var MaxRow = Sheet.getDataRange().getNumRows();
var RowCount = 1;
var Cell = Sheet.getRange("D"+ RowCount);
var CellValue = Cell.getValue();
var BlockStart = 0;
var BlockEnd = 0;
var LockRange = Sheet.getRange(59,6,1,2);
for (RowCount = 4; RowCount <= MaxRow ; RowCount++) {
Cell = Sheet.getRange("D"+ RowCount);
CellValue = Cell.getValue();
if (CellValue != "") {
if (BlockStart == 0) {
BlockStart = RowCount;
BlockEnd = RowCount;
}
else {
BlockEnd = RowCount;
}
}
else {
if (BlockStart > 0) {
LockRange = Sheet.getRange(BlockStart,6,BlockEnd-BlockStart + 1,2);
var Protection = LockRange.protect().setDescription('Rows ' +
BlockStart + ' To ' + BlockEnd + ' Protected');
Protection.removeEditors(Protection.getEditors());
BlockStart = 0;
BlockEnd = 0;
}
}
}
Without a complete function I can't really figure out what you're trying to do. But let's say that you want to read the values in column D from line 4 to the bottom of data, then here's a simple and fast way to do it. This isn't the only way. But it's a lot faster than using getValue() on each row. This will probably run about 10000 times faster.
function getDataInColumnD() {
var ss=SpreadsheetApp.getActive();
var sh=ss.getActiveSheet();
var rg=sh.getRange(4,4,Sheet.getLastRow()-3,1);
var vA=rg.getValues();
for (var i=0;i<vA.length;i++) {
var value=vA[i][0];
var row=i+4;
Logger.log('col: 4, row: %s, value: %s',row,value);
}
}
well, i'm trying to do what described in title. Both spreadsheets have only one sheet that are the ones i'm comparing. One spreadsheet is and update of the other, so i'm trying to get only new content. (if it were a fc (dos command) like function this would be easy...)
After doing some search, i have the folloing script that should work on most cases, that uses arrays for each sheet.
function test() {
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId =Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
var newarray = getNewData(array,old_array);
Logger.log('there are ' + newarray.length + 'different rows');
}
function getNewData(array1,array2){
var diff =array2;
for (var i = 0; i<array1.length; i++){
var duplicate = false;
for (var j = 0;j<diff.length;j++){
if (array1[i].join() == diff[j].join()){
Logger.log('duplicated line found on rows ' + i + ':' + j);
diff.splice(j,1);
var duplicate= true;
break;
}
}
if (duplicate==false) {
Logger.log('not duplicated line found on row ' + i);
diff.push(array1[i]);
}
}
return diff;
}
The thing is that the files are too big, almost 30000 rows, so the scripts exceed 5 minutes limit for execution.
Is there a way to improve this, like for instance, eliminate the inner for loop?
Or there is a way to do it in parts? like first the first 5000 rows, and so on.
Regards,
EDIT: after analizing the spreadsheet a little, i found out that there is a ID for every row, so now i can concentrate the search only in one column of each spreadsheet. So here is my new implementation:
function test(){
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId =Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
//The COlumn has an indicator, so i search for that. I don't control the formatting of the files, so i search in both spreadsheet for the indicator
var searchString = 'NAME';
for (var i = 0; i < old_array.length; i++) {
for (var j = 0; j < old_array[i].length; j++) {
if (old_array[i][j] == searchString) {
var Row_old = i+1;
var Column_old = j;
break;
}
}
if (Row_old != undefined){
break;
}
}
for (var i = 0; i < array.length; i++) {
for (var j = 0; j < array[i].length; j++) {
if (array[i][j] == searchString) {
var Row = i+1;
var Column = j;
break;
}
}
if (Row != undefined){
break;
}
}
Logger.log(Row_old+':::'+Column_old+'\n'+Row+':::'+Column);
var diff_index =[];
var row_ind = 0;
for (var i=Row;i<array.length;i++){
Logger.log(i);
var existe = ArrayLib.indexOf(old_array, Column_old, array[i][Column]);
if (existe==-1){
Logger.log(row_ind+'!!!');
diff_index[row_ind]=i;
row_ind++;
}
}
Logger.log(diff_index);
}
This still run out of time... I will now try to incorporate your comments.
Your script has a few major bottlenecks that slow it down massively:
Starting both loops at 0 every time makes its runtime explode
splicing every time you find a duplicate requires to move the array around
string concatenating an array on every iteration
We can circumvent these issues by:
sorting the second range once
I'm sure there's something clever to be done by iteratively binary searching through every column but we'd have to resort every time so we'll binary search the first column and then do a linear search.
We will use ArrayLib for the sorting (I hope it's a fast sorting algorithm).
Let's start with a function to find the first row where the first column matches a value (the first column of the current row):
function firstRowMatchingCol1(target, lookupRange) {
var min = 0;
var max = lookupRange.length - 1;
var guess;
var guessVal;
while(min <= max) {
guess = (min + max) / 2 | 0;
guessVal = lookupRange[guess][0];
if (guessVal < target) {
min = guess + 1;
} else if (guessVal > target) {
max = guess - 1;
} else {
while (guess > 0 && lookupRange[guess - 1][0] === target) {
guess -= 1;
}
return guess;
}
}
return -1;
}
Now we can go linearly go through every row and check if the columns match until the first column doesn't match anymore.
function matchExists(row, lookupRange) {
var index = firstRowMatchingCol1(row[0], lookupRange);
if (index === -1) {return false;}
while (index < lookupRange.length && lookupRange[index][0] === row[0]) {
for (var col = 1; col < row.length; col++) {
if (row[col] !== lookupRange[index][col]) {break;}
if (col === row.length - 1) {return true;} // This only works if the ranges are at least two columns wide but if they are one column wide you can just check if index > -1
}
index += 1;
}
return false;
}
And finally we can get the duplicates like this:
function getNonDuplicates(r1, r2) {
r2 = ArrayLib.sort(r2, 0, true);
return r1.filter(function(row) {return !matchExists(row, r2);});
}
Like mTorres' code this is untested
The solution I'm proposing is a "hack" around the time limit. But if you want a cleaner solution, you could, if possible, reorganize and make your code more efficient by having the arrays ordered somehow.
You don't specify the data inside array1 and array2, if rows had some sort of ID field you could order by this ID and check row i on array1 and row i on array2 instead of comparing every row in array1 with every row in array2 (which is extremely inefficient with 30000 rows).
If your data does not have an ID field to order the rows, then what you could is something based on my proposed solution: add a track for every compared row on array1. When the run reaches the time limit then you run again the function but starting from the last compared row (you would know which was because you'll be tracking the compared rows), and when the second run times out you repeat, and so on.
Every time you run your comparison you ask if it's the first run (or use a boolean - I prefer to ask the user, this way you won't forget to change the boolean), if it's the first run, you delete the tracking
column, if it's not the first run, you'll start with the next to last tracked row so basically continuing your script where it ended. I've been using this technique with good results.
In code (untested, so check it out before running it with real data):
/**
* Only checks if it's the first run and calls the real work function
*/
function test() {
var firstRun = "yes" === Browser.msgBox("Question", "Is this the first run?", Browser.Buttons.YES_NO);
doTest(firstRun);
}
/**
* Gets the data of the 2 spreadsheets and also the starting
* row
*/
function doTest(firstRun) {
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId = Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
/**
* Here is the code to create the tracking hability
*/
var strartFromRow = 0; // 0 because row 1 is array 0 index when you getValues();
var trackSheet = old_spreadsheet.getSheetByName("Tracking");
if (trackSheet === null) {
trackSheet = old_spreadsheet.insertSheet("Tracking");
}
if (firstRun) {
trackSheet.getRange("A:A").clearContent(); // make sure there no row is tracked yet
}
else {
// we have to continue from the previous row, keep in mind you're making the comparison
// with array which is 0 based, but sheet is 1 based, but you want the next one so getLasRow()
// should be the first item to compare on your array
strartFromRow = trackSheet.getLastRow();
}
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
// when you call the DIFF function, pass the tracking sheet and the start Row
var newarray = getNewData(array,old_array, trackSheet, startFromRow);
Logger.log('there are ' + newarray.length + 'different rows');
}
/**
* Creates a diff array using array1 and array2
* It marks each element on array1 once it has checked if it's in array2
*/
function getNewData(array1, array2, trackingSheet, startFromRow){
var logRow = trackingSheet.getLastRow();
var diff = array2;
for (var i = startFromRow; i < array1.length; i++){
var duplicate = false;
for (var j = 0; j < diff.length;j++){
if (array1[i].join() == diff[j].join()){
Logger.log('duplicated line found on rows ' + i + ':' + j);
diff.splice(j,1);
duplicate = true;
break;
}
}
if (duplicate === false) {
Logger.log('not duplicated line found on row ' + i);
diff.push(array1[i]);
}
trackingSheet.getRange(logRow++, 1).setValue("Checked!"); // Mark i row as checked
}
return diff;
}
Here's an alternate solution that gets around the time limit. Create a new dedicated spreadsheet along with a custom sidebar. The sidebar will require you to create some HTML that will ultimately be embedded and rendered in an iframe on the client. You can embed pure javascript into the HTML via script tags.
The beauty of this approach is that these scripts will not run server-side but on the client independently of Google Apps Script's server-side environment and are not subject to the 6 minute limit. Moreover, they can also call functions in your Google Script. So one approach would be to have the client-side scripts call a Google Script function to retrieve the requisite data, do all the heavy processing in the client-side scripts, and then send the results back to the server-side script to update the sheet.
Here's a link to setting up a custom sidebar to get you started:
https://developers.google.com/apps-script/guides/dialogs#custom_sidebars
Finally, i decided to go for the Cache service option, here is the code and i'm testing it to see if i keep with this.
function getNewData() {
//deleting triggers
var triggers = ScriptApp.getProjectTriggers();
for (var i = 0; i < triggers.length; i++) {
if (triggers[i].getHandlerFunction()=='getNewData'){
ScriptApp.deleteTrigger(triggers[i]);
}
}
//max running time = 5.5 min
var MAX_RUNNING_TIME = 330000;
var startTime= (new Date()).getTime();
//get cache
var cache = CacheService.getUserCache();
var downloaded =JSON.parse(cache.get('downloaded'));
var compared =JSON.parse(cache.get('compared'));
//start
if (downloaded==1 && compared!=1){
//folder
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId = licitacionesFolder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_array = old_sheet.getDataRange().getValues();
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(old_file.getId());
var sheet = spreadsheet.getSheets()[0];
var array = sheet.getDataRange().getValues();
Logger.log(array.length+'::'+old_array.length);
// Column
var searchString = 'NAME';
var RC = getColumn(array,searchString);
var Row = RC.Row;
var Column = RC.Column;
var RC = getColumn(old_array,searchString);
var Row_old = RC.Row;
var Column_old = RC.Column;
Logger.log(Row_old+':::'+Column_old+'\n'+Row+':::'+Column);
//compare
var diff_index =JSON.parse(cache.get('diff_index'));
var row_ind =JSON.parse(cache.get('row_ind'));
var Roww =JSON.parse(cache.get('Row'));
if (diff_index==null){var diff_index = [];}
if (row_ind==null){var row_ind = 0;}
if (Roww==null){var Roww = Row;}
Logger.log(row_ind+'\n'+Roww);
for (var i=Roww;i<array.length;i++){
var currTime = (new Date()).getTime();
if(currTime - startTime >= MAX_RUNNING_TIME){
Logger.log((currTime - startTime)/(1000*60));
Logger.log(i+'::'+row_ind);
cache.putAll({'diff_index': JSON.stringify(diff_index),'row_ind': JSON.stringify(row_ind),'Row': JSON.stringify(i-1)},21600);
ScriptApp.newTrigger('getNewData').timeBased().after(2 * 60 * 1000).create();
return;
} else {
Logger.log(i);
var existe = ArrayLib.indexOf(old_array, Column_old, array[i][Column]);
if (existe==-1){
Logger.log(row_ind+'!!!');
diff_index[row_ind]=i;
row_ind++;
}
}
}
cache.putAll({'diff_index': JSON.stringify(diff_index),'Row': JSON.stringify(Row),'compared': JSON.stringify(1)},21600);
} else {
Logger.log('file not downloaded yet or already compared');
}
}
function getColumn(array,searchString){
for (var i = 0; i < array.length; i++) {
for (var j = 0; j < array[i].length; j++) {
if (array[i][j] == searchString) {
var Row = i+1;
var Column = j;
break;
}
}
if (Row != undefined){
break;
}
}
return {Row: Row, Column: Column};
}
I have a list of row numbers in a spreadsheet which I need to change the background colour of. As the spreadsheet is quite large (10+ sheets, each with almost 5000 rows), I am trying to construct a range so I can batch set the background, as doing each row individually was taking over the max time of 6 minutes.
Here's the code I have:
// highlight required rows
var first = -1, last = -1;
for(var j = 0; j < rowNumsToHighlight.length; j++) {
if(first == -1) {
first = rowNumsToHighlight[j];
continue;
}
// if the current row number is one more than the previous, update last to be the current row number
if(rowNumsToHighlight[j] - 1 == rowNumsToHighlight[j - 1]) {
last = rowNumsToHighlight[j];
continue;
}
// otherwise the last row should be the previous one
else {
last = rowNumsToHighlight[j - 1];
}
var numRows = (last - first) + 1;
var range = sheet.getRange(first, 1, numRows, 4);
if(range.getBackground().toUpperCase() != highlightColour.toUpperCase()) {
range.setBackground(highlightColour);
}
first = -1;
last = -1;
}
rowNumsToHighlight is just an array that looks like: [205,270,271,272,278,279]. So, with that as an example, setBackground should be ran on row 205, on rows 270-272, and on 278-279.
I'm fairly sure the solution is simple, but just can't see it. Thanks for any help.
==== Updated Code ====
Based on Serge's code below, I made it more efficient again by reducing the number of getRange() calls made. Time is down from 78 to 54 seconds.
function updateColours(sheet, array, colour){
var columns = sheet.getLastColumn();
var rows = sheet.getLastRow();
var range = sheet.getRange(1, 1, rows, columns);
Logger.log("Resetting highlight on all rows...");
range.setBackground(null);
var backgrounds = range.getBackgrounds();
for(var n = 0; n < backgrounds.length; n++){
var rowIdx = n + 1;
if(array.indexOf(rowIdx) > -1){
for(var c = 0; c < columns; c++){
backgrounds[n][c] = colour;
}
}
}
Logger.log("Highlighting non-translated rows...");
range.setBackgrounds(backgrounds);
}
Maybe this one is faster(?) and built in a way that will make your work easier (function with arguments).
It writes only once to the sheet (or 2 if you clear colors before writing)...
use like below :
function testBG(){
updateColors(0,[7,8,9,18,19,23]);
}
function updateColors(sheetNum,array){
var sh = SpreadsheetApp.getActiveSpreadsheet().getSheets()[sheetNum];
var columns = sh.getMaxColumns();
var range = sh.getRange(1,1,sh.getMaxRows(),columns);
sh.getRange(1,1,sh.getMaxRows(),columns).setBackground(null);// use this if you want to clear all colors before setting them
var backGrounds = range.getBackgrounds();// get all cells BG
for(var n=0;n<backGrounds.length;n++){
var rowIdx = n+1;
if(array.indexOf(rowIdx)>-1){
for(c=0;c<columns;c++){
backGrounds[n][c]="#F00";// if row number is in the array fill in red
}
}
}
sh.getRange(1,1,sh.getMaxRows(),columns).setBackgrounds(backGrounds);//update sheet in one call
}
test sheet in view only, make a copy to test.
This is how I would do it:
function createRanges() {
var rowNumsToHighlight = [5,7,8,9,18,19];
var arrayLength = rowNumsToHighlight.length;
var loopCounter = 0, thisNumberInArray=0, nextNumberInArray=0, crrentNmbrPlusOne=0;
var currentRangeBegin=0, numberOfRowsInRange=1;
currentRangeBegin = rowNumsToHighlight[0];
for(loopCounter=0; loopCounter < arrayLength; loopCounter+=1) {
thisNumberInArray = rowNumsToHighlight[loopCounter];
nextNumberInArray = rowNumsToHighlight[loopCounter+1];
crrentNmbrPlusOne = thisNumberInArray+1;
if (nextNumberInArray===undefined) {
workOnTheRange(currentRangeBegin, numberOfRowsInRange);
return;
};
if (nextNumberInArray!==crrentNmbrPlusOne) {
workOnTheRange(currentRangeBegin, numberOfRowsInRange);
numberOfRowsInRange = 1; //Reset to 1
currentRangeBegin = nextNumberInArray;
} else {
numberOfRowsInRange+=1;
};
};
};
function workOnTheRange(first,numRows) {
var range = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('Sheet11').getRange(first, 1, numRows, 4);
range.setBackground("red");
};
I've tested the code and it works.
I have developed a Google Apps script in a Google Drive spreadsheet that processes e-mails with a certain label (download notifications) and adds these to the spreadsheet. I'm running this through the script editor of the spreadsheet.
My initial solution was quite inefficient - the whole analysis was repeated each time on all e-mails, which caused the runtime to increase for each day. A few days ago I got an error message "runtime exceeded", which is not strange.
My problem is that when trying to update the script to be more efficient, I get some weird problems. The script either stops randomly after processing a few e-mails, or simply refuses to start. Especially the script debugger, it begins to load but never starts.
I have tried several times over the last few days, and even created a new spreadsheet with the same code (in the same account), but still having these problems.
Sometimes when the script manages to run for a while, I have noted that the script output log does not match recent changes in the logging. I have of course saved the script before running it. It feels like there is some lock preventing my script from running/updating/refreshing?
Is there anyone here that recognize these problems?
The code is attached below.
The two relevant entry points are:
processInbox: Updates the spreadsheet based on new (starred) e-mails with a specific label. The label and star is set by an e-mail filter on reception. The star (indicating "new") is removed from each message after processing.
resetAllMsgs: Clears the spreadsheet and stars all relevant messages, causing processInbox to process all messages received with the relevant label.
function resetAllMsgs() {
Logger.log("Starting ResetAll");
var d = SpreadsheetApp.getActive();
var dl_sheet = d.getSheetByName("Download List");
var dlperday_sheet = d.getSheetByName("DownloadsPerDay");
dl_sheet.clear();
dlperday_sheet.clear();
Logger.log("Clearing spreadsheet");
dl_sheet.appendRow(["", "", "Downloads", ""]);
dl_sheet.appendRow(["", "", "Downloaders", ""]);
dl_sheet.appendRow(["Last Download Date", "First Download Date", "Email" , "Product", "Download Count"]);
dlperday_sheet.appendRow(["Date", "Download Count"]);
var label = GmailApp.getUserLabelByName("Download Notification");
// get all threads of the label
var threads = label.getThreads();
for (var i = 0; i < threads.length; i++) {
// get all messages in a given thread
var messages = threads[i].getMessages();
Logger.log("Starring thread " + i);
for (var j = 0; j < messages.length; j++) {
Logger.log(" Starring message " + j);
// Only starred messages are processed by processInbox
messages[j].star();
Utilities.sleep(100);
}
}
};
function processInbox() {
var d = SpreadsheetApp.getActive();
var dl_sheet = d.getSheetByName("Download List");
var dlperday_sheet = d.getSheetByName("DownloadsPerDay");
// If empty spreadsheet, reset the status of all relevant e-mails and add the spreadsheet headers
if (dl_sheet.getLastRow() <= 1) {
resetAll();
}
var label = GmailApp.getUserLabelByName("Download Notification");
var k = 0;
// get all threads of the label
var threads = label.getThreads();
for (var i = 0; i < threads.length; i++) {
if (threads[i].hasStarredMessages()) {
// get all messages in a given thread
var messages = threads[i].getMessages();
// iterate over each message
for (var j = 0; j < messages.length; j++) {
// Unread messages are not previously processed...
if (messages[j].isStarred()) {
var msg = messages[j].getBody();
msg = msg.replace(/\r?\n/g, "");
var email = getDownloader(msg);
if (email == "") {
Logger.log("Found no downloader info: " + messages[j].getSubject() + " " + messages[j].getDate());
}
var date = formatDate(getDownloadDate(msg));
// Check if a new date
var dateCell = find(date, dlperday_sheet.getDataRange(), 0);
if (dateCell == null) {
// If new date, append row in "downloads per day"
dlperday_sheet.appendRow([date, 1]);
dlperday_sheet.getRange(2, 1, dl_sheet.getLastRow()-1, 2).sort([1]);
}
else
{
// If existing date, update row in "downloads per day"
var dlcount = dlperday_sheet.getRange(dateCell.getRow(), dateCell.getColumn()+1).getValue();
}
var productname = getProductName(msg);
// Check if e-mail (user) already exists in the downloads list
var matchingCell = find(email, dl_sheet.getDataRange(), 0);
if ( matchingCell != null ) {
// If user e-mail exists, update this row
var lastDownloadDate = dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()-1).getValue();
var lastDownloadCount = dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()+2).getValue();
if (lastDownloadDate != date) {
dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()-1).setValue(date);
}
dl_sheet.getRange(matchingCell.getRow(), matchingCell.getColumn()+2).setValue(lastDownloadCount+1);
}
else // If new user e-mail, add new download row
{
dl_sheet.appendRow([date, date, email, productname, 1]);
dl_sheet.getRange(2, 4).setValue(dl_sheet.getRange(2, 4).getValue() + 1);
dl_sheet.getRange(4, 1, dl_sheet.getLastRow()-3, 5).sort([1]);
}
// Mark message as processed, to avoid processing it on the next run
messages[j].unstar();
}
}
}
}
};
/**
* Finds a value within a given range.
* #param value The value to find.
* #param range The range to search in.
* #return A range pointing to the first cell containing the value,
* or null if not found.
*/
function find(value, range, log) {
var data = range.getValues();
for (var i = 0; i < data.length; i++) {
for (var j = 0; j < data[i].length; j++) {
if (log == 1)
{
Logger.log("Comparing " + data[i][j] + " and " + value);
}
if (data[i][j] == value) {
return range.getCell(i + 1, j + 1);
}
}
}
return null;
};
function getDownloader(bodystring) {
var marker = "Buyer Info";
var marker_begin_index = bodystring.indexOf(marker, 1000);
var email_begin_index = bodystring.indexOf("mailto:", marker_begin_index) + 7;
var email_end_index = bodystring.indexOf("\"", email_begin_index);
if (email_end_index < 1000)
{
return "";
}
var email = bodystring.substring(email_begin_index, email_end_index);
if (log == 1)
{
Logger.log("Found e-mail address: " + email + "");
Logger.log(" marker_begin_index: " + marker_begin_index);
Logger.log(" email_begin_index: " + email_begin_index);
Logger.log(" email_end_index: " + email_end_index);
}
latestIndex = email_end_index;
return email;
};
function formatDate(mydate)
{
var str = "" + mydate;
var dateParts = str.split("/");
var day = dateParts[1];
if (day.length == 1)
day = "0" + day;
var month = dateParts[0];
if (month.length == 1)
month = "0" + month;
return dateParts[2] + "-" + month + "-" + day;
};
function getDownloadDate(bodystring) {
var marker = "Download Date:</strong>";
var marker_begin_index = bodystring.indexOf(marker, latestIndex);
var date_begin_index = marker_begin_index + marker.length;
var date_end_index = bodystring.indexOf("<br>", date_begin_index);
latestIndex = date_end_index;
return bodystring.substring(date_begin_index, date_end_index).trim();
};
function getProductName(bodystring) {
var marker = "Item:</strong>";
var marker_begin_index = bodystring.indexOf(marker, latestIndex);
var pname_begin_index = marker_begin_index + marker.length;
var pname_end_index = bodystring.indexOf("</td>", pname_begin_index);
latestIndex = pname_end_index;
return bodystring.substring(pname_begin_index, pname_end_index).trim();
};
Update: Any script I run stops after about 5 seconds, even if it does not call any services.
I tried the following code:
function test() {
Logger.log("Test begins");
Utilities.sleep(5000);
Logger.log("Test ends");
}
The script terminates after about 5 sec, but the last line is not printed. If decreasing the delay to 3 seconds it behaves as expected.
Moreover, to make the script update properly after modifying it, I need to save it, start it, click cancel, and then start it again, otherwise it the log output seems to come from the old version. (I'm running this through the script editor.)
Also, the debugger refuses to start, even for the small script above. Seems to be some problem with my account (johan.kraft#percepio.se). Are there any google employee out there that can check this?