Concatenate large dataset from multiple worksheets of a Google Sheet - google-apps-script

I'm using the code below to concatenate certain sheets in my file into a sheet named "Master".
Currently, my script duplicates the header row, but I would like to prevent this. The data in all the sheets is in exactly the same format with the same header.
Each sheet has 58 columns and one of the sheets has 5000 rows.
Ideally I'd like the script to overwrite the "Master" sheet each time the script is run.
How can I modify my script to accomplish these goals?
function concatAllSheets()
{
var includedSheet = ['Virtue data - Norway - NOK', 'Virtue data - Sweden - SKK', 'Virtue data - Denmark - DKK', 'Virtue Data - GBP', 'Virtue data - EUR markets', 'Virtue data - Arabia - USD'];
var ss = SpreadsheetApp.getActive();
var allSheets = ss.getSheets();
var sheetName = 'Master'
var mother = ss.insertSheet(sheetName);
for(var i = 0; i < allSheets.length; i++)
{
var sht = allSheets[i];
if(includedSheet.indexOf(sht.getName()) > -1)
{
var rng = sht.getDataRange();
var rngA = rng.getValues();
for(var j = 0; j < rngA.length; j++)
{
var row = rngA[j];
mother.appendRow(row);
}
}
}
}

Rather than calling appendRow() for each row of data, which will really slow your script down, use a batch operation like setValues().
To do so, create an allData array to hold what will be the content of your "Master" sheet. As you iterate through the sheets, append their data into allData, and finally print it to the sheet.
As you're iterating, you can do a simple check of allData's length, to see if the header row is already there. If allData is empty, then there is obviously no header row.
function concatAllSheets()
{
var includedSheet=['Virtue data - Norway - NOK','Virtue data - Sweden - SKK','Virtue data - Denmark - DKK','Virtue Data - GBP','Virtue data - EUR markets','Virtue data - Arabia - USD'];
var ss=SpreadsheetApp.getActive();
var allSheets=ss.getSheets();
var sheetName='Master'
var mother=ss.insertSheet(sheetName);
var allData = [];
for(var i=0;i<allSheets.length;i++)
{
var sht=allSheets[i];
if(includedSheet.indexOf(sht.getName())>-1)
{
var rng=sht.getDataRange();
var rngA=rng.getValues();
if (allData.length == 0) // This will only ever be true on the first sheet copied
{
allData = rngA;
} else {
rngA.shift(); // Remove the first row
allData = allData.concat(rngA);
}
}
}
mother.getRange(1, 1, allData.length, allData[0].length).setValues(allData); // Use a batch operation to insert the data
}

Given the size of the dataset, you need to minimize reads and writes by using the batch methods Range#getValues and Range#setValues. You can also avoid iterating over the unnecessary sheets by only binding the ones in your array of names, using Array#forEach. This pattern also ensures that you collect all the data you intend - if there is a typo in your names array, or the sheet's name is changed unintentionally, this will throw an exception rather than silently not including the data.
function concatenateSheets() {
var sheetNames = ["name1", "name2", ...];
var ss = SpreadsheetApp.getActive();
var dest = ss.getSheetByName("someName");
var output = [], header = [];
// Assemble a single paste output from all the sheets.
sheetNames.forEach(function (name) {
var sheet = ss.getSheetByName(name);
if(!sheet)
throw new Error("Incorrect sheet name '" + name + "'");
var vals = sheet.getDataRange().getValues();
// Remove the header row.
header = vals.splice(0, 1);
// Append to existing output array.
output = [].concat(output, vals);
});
// Serialize output data array.
if(dest && output.length && output[0].length) {
// Remove all existing data values on the destination sheet.
// (Only necessary if the number of rows or columns can decrease.)
dest.getDataRange().clearContent();
// Prepend the header on the output data array.
output.unshift(header);
dest.getRange(1, 1, output.length, output[0].length).setValues(output);
}
}

Related

Copy and Paste the Row based on condition in another Sheet

I've been looking on this website and others for an answer but can't find anything relevant and was wondering if someone could help please.
I've using Apps Script - Google Sheets.
The main sheet was called "M-Front". So it got quotation number with revision number. If new quotation, the revision will be zero. If we revise the quotation once, the revision number will be 1.
For example, GE-2108-0271-Rev.0 is new quotation.
If we revise once, it would be GE-2108-0271-Rev.1.
I have other sheet called "M-Items".
Let say for GE-2108-0271-Rev.0, we have three rows for that quotation number represent three items for that quotation.
My intention is , if new revision was made in "M-Front". It will be GE-2108-0271-Rev.1. But how to detect the last row from "M-Front" quotation number and in "M-Items" we copy and paste the rows. In this case, 3 rows/items. and make the quotation in "M-Items" from GE-2108-0271-Rev.0 to GE-2108-0271-Rev.1.
M-Front
M-Items
The Script i've stuck
function CopyRange() {
var sourcespread = SpreadsheetApp.openById('1Zgs1jzjIeaBpd5Ms7emQgxhVJBMtlEOlDNDfxlhSRiY'); //replace with source ID
var sourcesheet = sourcespread.getSheetByName('M-Front'); //replace with source Sheet tab name
var destspread = SpreadsheetApp.openById('1Zgs1jzjIeaBpd5Ms7emQgxhVJBMtlEOlDNDfxlhSRiY'); //replace with destination ID
var destsheet = destspread.getSheetByName('M-Items'); //replace with destination Sheet tab name
var testrange = sourcesheet.getRange('M:M').getLastRow;
var testvalue = (testrange.getValues());
var data = [];
var j =[];
for (i=0;i<testvalue.length;i++) {
if (testvalue[i] /= 0) {
data.push.apply(data,sourcesheet.getRange('A:A').getValues());
//Copy matched ROW numbers to j
j.push(i);
}
}
//Copy data array to destination sheet
destsheet.getRange(destsheet.getLastRow()+1,1,data.length,data[0].length).setValues(data);
}
You might want to try the below script:
function CopyRange() {
var sourcespread = SpreadsheetApp.openById('1Zgs1jzjIeaBpd5Ms7emQgxhVJBMtlEOlDNDfxlhSRiY'); //replace with source ID
var sourcesheet = sourcespread.getSheetByName('M-Front'); //replace with source Sheet tab name
var destsheet = sourcespread.getSheetByName('M-Items'); //replace with destination Sheet tab name
var mcol= sourcesheet.getRange('M1:M').getValues();
var acol = sourcesheet.getRange('A1:A').getValues();
var ccol = destsheet.getRange('C1:C').getValues();
var acol2 = destsheet.getRange('A1:A').getValues();
var mvalues = mcol.filter(String);
var avalues = acol.filter(String);
var cvalues = ccol.filter(String);
var avalues2 = acol2.filter(String);
for (let i=1; i<mvalues.length; i++) {
if (mvalues[i][0] != 0) {
if (avalues[i][0] == avalues2[i][0] && mvalues[i][0] == cvalues[i][0] - 1)
var valuescopy = sourcesheet.getRange("RANGE_FOR_THE_VALUES_TO_COPY").getValues();
destsheet.getRange("RANGE_WHERE_TO_PASTE").setValues(valuescopy);
cvalues[i][0]++;
}
}
}
It is important to note the following points:
getLastRow will return the last row for the entire range - this means that the rows which have no values are also included this;
In order to retrieve the rows which contain a value, the filter has been used.
getValues method will return a 2D array, so you will have to access an element in this way array[i][j].
Moreover, please bear in mind that you will have to adjust the ranges properly such that they match your needs accordingly; more specifically, the range from where you want to copy the values and also the destination range.
Reference
getLastRow();
getValues();
getRange(row, column, numRows, numColumns).

How to remove duplicate rows in Google Sheets using script

I currently have a column of data titled JobID. In this column, there are duplicates from an import that runs daily and grabs the latest data on the JobID's in question and appends them to the top of the sheet.
Therefore the most recent JobID rows are the ones with the data we need.
I'd like to know if there is a script that can be run on the sheet called 'History' to look up the column JobID, search every row below for duplicates and remove them, leaving the top, most recent JobID rows in the sheet.
I know that it is really easy to remove duplicates using the "Remove Duplicates" tool in Google Sheets... but I'm lazy and I'm trying to automate as much of this process as possible.
The script I have below runs without an error but is still not doing what I need it to. Wondering where I am going wrong here:
function removeDuplicates() {
//Get current active Spreadsheet
var sheet = SpreadsheetApp.getActive();
var history = sheet.getSheetByName("History");
//Get all values from the spreadsheet's rows
var data = history.getDataRange().getValues();
//Create an array for non-duplicates
var newData = [];
//Iterate through a row's cells
for (var i in data) {
var row = data[i];
var duplicate = false;
for (var j in newData) {
if (row.join() == newData[j].join()) {
duplicate = true;
}
}
//If not a duplicate, put in newData array
if (!duplicate) {
newData.push(row);
}
}
//Delete the old Sheet and insert the newData array
history.clearContents();
history.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
Remove Duplicate JobIDs
This function will keep the ones nearest to the top of the list. If you want to go the other way then resort the list in reverse order.
function removeDuplicates() {
var ss=SpreadsheetApp.getActive();
var sh=ss.getSheetByName("History");
var vA=sh.getDataRange().getValues();
var hA=vA[0];
var hObj={};
hA.forEach(function(e,i){hObj[e]=i;});//header title to index
var uA=[];
var d=0;
for(var i=0;i<vA.length;i++) {
if(uA.indexOf(vA[i][hObj['JobID']])==-1) {
uA.push(vA[i][hObj['JobID']]);
}else{
sh.deleteRow(i+1-d++);
}
}
}
Remove Duplicate JobIDs in Python
Based on Cooper's answer I wrote the same function in Python:
gsheet_id = "the-gsheet-id"
sh = gc.open_by_url("https://docs.google.com/spreadsheets/d/%s/edit#gid=0" % gsheet_id)
wks = sh[0]
def removeDuplicates(gwks):
headerRow = gwks[1]
columnToIndex = {}
i = 0
for column in headerRow:
columnToIndex[column] = i
i += 1
uniqueArray = []
d = 0
row_i = 0
for row in gwks:
row_i += 1
if gwks[row_i][columnToIndex['JobID']] not in uniqueArray:
uniqueArray.append(gwks[row_i][columnToIndex['JobID']])
else:
d += 1
gwks.delete_rows(row_i + 1 - d, 1)
removeDuplicates(wks)

Google Sheets Macro for deleting columns in specific tabs based on header

I wrote a macro that creates sheet tabs and populates them based on specific criteria. For example, if I want to isolate rows indicating Closed Won, and move them to a new tab. I will run this function on my main tab called 'data' and create a new tab called 'Closed Won'.
This new tab will duplicate the same header as in 'data', and then it will populate with all rows with "Closed Won" in column L.
However, this new tab has more data than I need. I want to delete specific columns IF they have a column name AND tab name (so it does not delete the columns in my original data tab).
I am having trouble with the IF. Can someone help with a simple script that I can add to the end of the original function?
function closed_won() {
var spreadsheet = SpreadsheetApp.getActive();
spreadsheet.getRange('1:1').activate();
spreadsheet.insertSheet(1);
spreadsheet.getRange('1:1').activate();
spreadsheet.getActiveSheet().setName('closed_won');
spreadsheet.getRange('data!1:1').copyTo(spreadsheet.getActiveRange(), SpreadsheetApp.CopyPasteType.PASTE_VALUES, false);
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName('data');
var testrange = sheet.getRange('L:L');
var testvalue = (testrange.getValues());
var csh = ss.getSheetByName('closed_won');
var data = [];
var j =[];
for (i=0; i<testvalue.length;i++) {
if ( testvalue[i] == 'Closed Won') {
data.push.apply(data,sheet.getRange(i+1,1,1,25).getValues());
j.push(i);
}
}
csh.getRange(csh.getLastRow()+1,1,data.length,data[0].length).setValues(data);
// THIS IS WHERE I WANT TO ADD THE DELETE COLUMN CODE
}
You want to delete columns where the header name matches a sheet name - though obviously not on the current sheet.
The process is:
get the sheets and their names; save them in an array
get the header row. This will be a 2D array, so "flatten" it to 1D for easier matching
loop through the headers and look for a match with sheet names. The code uses indexOf for this.
3.1 when you find a match of header name and column name, make sure that its not the match for the current sheet
3.2. create an array of the column numbers to be deleted.
After the loop, reverse the order of the array so that the "highest" to-be-deleted column numbers are listed first.
Loop through the to-be-deleted column numbers, and delete the columns
function SO5819343001() {
var spreadsheet = SpreadsheetApp.getActive();
// get sheets names
var thesheets = [];
var sheets = spreadsheet.getSheets();
if (sheets.length > 1) {
for (var i=0; i<sheets.length;i++){
thesheets.push(sheets[i].getName());
}
}
// move to 'closed_won'
var csh = spreadsheet.getSheetByName('closed_won');
// get the headers
//last column for range
var cshLC = csh.getLastColumn();
var headers = csh.getRange(1,1,1,cshLC).getValues();
// flatten headers from 2D to 1D
var flatheaders = headers.reduce(function(a, b){return a.concat(b);});
// Logger.log(flatheaders); DEBUG
// create variables for loop
var cshname = csh.getName();
var deletions = [];
// loop through the headers and compare to sheet names
for (var h = 0; h<cshLC;h++){
var idx = thesheets.indexOf(flatheaders[h]);
// Logger.log("DEBUG: h = "+h+", header = "+flatheaders[h]+", match = "+idx)
// is this a match?
if (idx !=-1){
// found a match for column name and sheet name
// make sure it is not this sheet
if (flatheaders[h] != cshname){
// Logger.log("DEBUG: the match is NOT on this sheet. Proceeding")
// create an array of the column numbers to be deleted
deletions.push(h+1);
}
else{
Logger.log("the match IS on this sheet. Abort.")
}
}
}
// Logger.log(deletions); DEBUG
// reverse the column order
var rev_deletions = deletions.reverse();
// Logger.log(rev_deletions); // DEBUG
// loop through the 'to-be-deleted' columns and delete them
for (d = 0;d<deletions.length;d++){
csh.deleteColumn(rev_deletions[d]);
}
}

Looping through sheets in Google Spreadsheet and using getSheetName()

I'm writing a script to loop through each sheet in one spreadsheet and copy data from specific cells into a corresponding sheet on another spreadsheet. I am getting an error on line 18 of the below code, however, stating that it can't call the getLastRow method of null. I used a couple of Logger.log lines to check my variables and see that targetSheet is coming back as null. Any advice on what I've got wrong?
//Export each sheet's daily data to another sheet *Test Version*
function exportReports() {
var sourceSS = SpreadsheetApp.getActiveSpreadsheet();
//Open Back Production Record *Test Version*
var targetSS = SpreadsheetApp.openById("1ZJKZi-UXvqyGXW9V7KVx8whxulZmx0HXt7rmgIJpUY4");
var allSourceSheets = sourceSS.getSheets();
//For-Loop to loop through hourly production sheets, running the move data for-loop on each
for(var s in allSourceSheets){
var loopSheet = allSourceSheets[s];
var loopSheetName = loopSheet.getSheetName();
var targetSheet = targetSS.getSheetByName(loopSheetName);
Logger.log(s);
Logger.log(loopSheet);
Logger.log(targetSheet);
Logger.log(loopSheetName);
var targetRow = targetSheet.getLastRow()+1;
var currentDate = Utilities.formatDate(new Date(), "GMT-5", "MM/dd/yy");
targetSheet.getRange(targetRow, 1).setValue(currentDate);
//For-Loop to move data from source to target
for(var i=6;i<=10;i++){
var sourceRange = sourceSheet.getRange(i, 2);
var targetRange = targetSheet.getRange(targetRow, i-4);
var holder = sourceRange.getValue();
targetRange.setValue(holder);
}
}
}
Per the documentation on getSheetByName, if the target sheet name does not exist, then you get null as a return value.
getSheetByName(name)
Returns a sheet with the given name.
If multiple sheets have the same name, the leftmost one is returned. Returns null if there is no sheet with the given name.
So, the desired sheet with name specified by loopSheetName does not exist in the target workbook. Perhaps someone has created a new sheet, or renamed an existing sheet in the source workbook.
You haven't asked about it, but you can improve the performance of your copy code as well, by reading the inputs as a multi-row range array, creating a row array to hold the results, and writing that once:
var sourceData = sourceSheet.getRange(6, 2, 5, 1).getValues(); // (6,2) through (10, 2)
var output = [];
// Transpose row array to column array (could use other functions, but this is easier to understand)
for(var i = 0; i < sourceData.length; ++i) { output.push(sourceData[i][0]); }
targetSheet.getRange(targetRow, 2, 1, output.length).setValues([output]); // i0 = 6 -> 6 - 4 = 2

Automatically add variables to array?

In a google script I have written something to check my monthly expenses, which are listed in a google sheet.
Based on words the script finds, every line gets a category tag. It works fine, but the number of words to search for is getting big. And the array is getting big too.
I have listed 6 pairs (words to find, tag to add) - but in real version I have as many as 35. How can I create the pairs, and load everything automatically in the array?
This is my script:
function myFunction() {
// check usual suspects
var A1 = ["CAFE", "HORECA"]
var A2 = ["ALBERT", "AH"]
var A3 = ["VOMAR","Vomar"]
var A4 = ["HEMA","HEMA"]
var A5 = ["KRUID","Drogist"]
var A6 = ["RESTA", "Horeca"]
// in Array
var expenses = [A1,A2,A3,A4,A5,A6]
var ss = SpreadsheetApp.getActiveSheet();
var data = ss.getDataRange().getValues(); // read all data in the sheet
for (i in expenses)
{for(n=0;n<data.length;++n){ // iterate row by row and examine data in column A
if(data[n][3].toString().toUpperCase().match(expenses[i][0])==expenses[i][0]){ data[n][4] = expenses[i][1]};
// if column D contains 'xyz' then set value in index [5] (is column E)
}
Logger.log(data)
ss.getRange(1,1,data.length,data[0].length).setValues(data); // write back to the sheet
}
}
I can propose you that:
function multiPass(){
var searchCriterions = [
["CAFE","HORECA" ],
["ALBERT", "AH"],
["VOMAR","Vomar"],
["HEMA","HEMA"]
];
var dico = {};
var patt = "";
for (var i in searchCriterions) {
dico[searchCriterions[i][0]] = searchCriterions[i][1];
patt += "("+searchCriterions[i][0]+")";
if((Number(i)+1)<searchCriterions.length){
patt += "|";
}
}
var re = new RegExp(patt,"");
var ss = SpreadsheetApp.getActiveSheet();
var data = ss.getDataRange().getValues(); // read all data in the sheet
Logger.log(re);
for(n=0;n<data.length;++n){ // iterate row by row and examine data in column A
// THAT'S NOT COLUMN "A", 3 --> "D"
var test = data[n][3].toString().toUpperCase().match(re);
Logger.log(test);
if(test!==null){
data[n][4] = dico[test[0]]
};
}
ss.getRange(1,1,data.length,data[0].length).setValues(data); // write back to the sheet
}
instead of using variable for your "pairs" prefer to use a big table (it's less painfull to write)
then transform your pairs in object to quickly access the second argument of the pair and create a big regexp that check at once all the keywords instead of parsing them one by one.
Now as we are using a big array as search criterions we can totally imagine that this big array is loaded instead of hard coding it. If you have a sheet where the data is you can change the code this way:
var searchCriterions = SpreadsheetApp.getActive().getRange("namedRange").getValues();