Build Google App Script to Combine Similar Rows into One Row - google-apps-script

Many people are asking why I want to do this. I want to do this so that when I do my mail merge (this sends students their overdue book lists from the library), I don't send a student an email more than once. I never use this data more than once, I only use it to send a quick message, I never manipulate or work with the data so I don't care if it's hard to work with! I hope this makes sense! Thank you for your feedback thus far.
Google Sheet starts like this:
I want it to look like this:
I have started some script, which I am sure you will all laugh at (I know very little of programming). However, It'd be awesome to be able to do this. Basically, combine the rows that have an identical entry in column 1, by putting the values for the columns for those rows together into one row. The numbers can be added, and the data separated by a comma or a line break.
This is what I have so far....
function myFunction() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet();
var last = sheet.getLastRow();
//find identical entries in column 1//
for(i in data){
var row = data[i];
var duplicate = false;
for(j in newData){
if(row.join() == newData[j].join()){
duplicate = true;
}
}
if(!duplicate){
newData.push(row);
}
}
//add information from rows of identical entries into one row using a comma//
//delete empty rows//
}

It's a bit of a kluge but it runs with my fake data. Hopefully you can get it running with yours. It usually takes me some tweaking to get something like this a little more streamlined. I have a few comments in there to explain some of the key items.
function rowMerge() {
var firstRow = 2;
var firstCol = 1;
var sht = SpreadsheetApp.getActiveSheet();
sht.getRange(firstRow, firstCol, sht.getLastRow() - firstRow + 1, sht.getLastColumn() - firstCol + 1).sort(1);
sht.appendRow(['***','***','Control-z one more time','***','***']); //need a throwaway row to get last data element out since I'm moving out element[i-1]
var datR = sht.getDataRange();
var lastRow = datR.getLastRow();
var lastCol = datR.getLastColumn();
var datA = datR.getValues();
sht.getRange(2,1,lastRow - firstRow + 1,lastCol - firstCol + 1).clearContent().setWrap(true);
var datoutA = [];
var k=1;
var n = 0;
for(var i = 0;i < datA.length; i++)
{
if(i > 1)
{
if(datA[i][0] == datA[i-1][0])
{
k++; //k is the number of consecutive matching values
}
else
{
datoutA[n] = [];
if(k == 1)
{
// if k = 1 the datA[i-1] row gets copied into output array
for(var c = 0;c < datA[i-1].length; c++)
{
datoutA[n][c]=datA[i-1][c];
}
}
else
{
//i-1 to i-k rows get merged and copied into output array
var firstTime = true;
for(var a = 1;a <= k;a++)//input rows
{
for(var b = 0;b < datA[i].length -1;b++)//input columns
{
if(a > 1 || b > 0) //no delimiter for first row or first column
{
datoutA[n][b] += ', ';
}
if(firstTime || b == 0)// straight assignment for first row and running sum after that same with first column because we only want one of them because they're all the same.
{
datoutA[n][b] = datA[i - a][b];
}
else
{
datoutA[n][b] += datA[i - a][b];
}
}
if(firstTime)//first assignment then running sums for last column
{
datoutA[n][datA[i].length - 1] = Number(datA[i - a][datA[i].length-1]);
}
else
{
datoutA[n][datA[i].length - 1] += Number(datA[i - a][datA[i].length-1]);
}
firstTime=false;
}
var end = 'is near';
}
k=1; //consecutive counter
n++; //datoutA index
}
}
}
var datoutR = sht.getRange(2, 1, datoutA.length , datoutA[0].length);
datoutR.setValues(datoutA);
var colwidth = 250;
sht.setColumnWidth(2, colwidth);
sht.setColumnWidth(3, colwidth);
sht.setColumnWidth(4, colwidth);
}

There actually are many reasons why one would want to combine similar rows.
I for one, have a form that allows users to fill out information about properties. New info comes in everyday, so the forms will be reused for new entries.
All properties have a unique identifier, but the first form entry is useful to give the owner's name and address, along with other information.
The next form entry does not need to populate, (and re-type) all that info, but it does need to add other new details to the first entry.
Lastly, I want to report based on unique entries, with other info combined to read with the property.
Part of the answer for me is a custom function created by Hyde, called JoinRows.
Search for that.
Good luck!

Related

Would like to script on Google Sheets a way to remove certain rows

The first row has a column heading: First Name, Last Name, Email, Grade, Date
We have instances where people need to re-take tests and therefore will have another entry into the spreadsheet, what we are looking for, is a way that if someone re-takes the test and then adds their new data into the spreadsheet, we can automatically remove their old information
So, I have made some progress, this script removes duplicate rows, but exactly duplicate, so I need to somehow edit it to only look at the first 2 columns and then delete the oldest entry based on the Date column:
function removeDuplicates() {
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
var newData = [];
for (var i in data) {
var row = data[i];
var duplicate = false;
for (var j in newData) {
if (row.join() == newData[j].join()) {
duplicate = true;
}
}
if (!duplicate) {
newData.push(row);
}
}
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
You only want to compare the first two columns in each row instead of the full row.
Therefore, you should just compare indexes 0 and 1, and not the full array (something which you are doing with join). You could just replace this line:
if (row.join() == newData[j].join()) {
With this one:
if (row[0] === newData[j][0] && row[1] === newData[j][1]) {
Edit:
If you want to keep the newest, and not the oldest, loop your 2D array in reverse instead. For example, you can use reverse when retrieving the source data:
var data = sheet.getDataRange().getValues().reverse();
Alternatively, use for to loop in reverse:
for (let i = data.length - 1; i >= 0; i--) {
More in general, consider using removeDuplicates(columnsToCompare).
Edit 2:
If you have headers, just use reverse again when setting the values in order to keep the original order:
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData.reverse());

How to remove duplicate rows in Google Sheets using script

I currently have a column of data titled JobID. In this column, there are duplicates from an import that runs daily and grabs the latest data on the JobID's in question and appends them to the top of the sheet.
Therefore the most recent JobID rows are the ones with the data we need.
I'd like to know if there is a script that can be run on the sheet called 'History' to look up the column JobID, search every row below for duplicates and remove them, leaving the top, most recent JobID rows in the sheet.
I know that it is really easy to remove duplicates using the "Remove Duplicates" tool in Google Sheets... but I'm lazy and I'm trying to automate as much of this process as possible.
The script I have below runs without an error but is still not doing what I need it to. Wondering where I am going wrong here:
function removeDuplicates() {
//Get current active Spreadsheet
var sheet = SpreadsheetApp.getActive();
var history = sheet.getSheetByName("History");
//Get all values from the spreadsheet's rows
var data = history.getDataRange().getValues();
//Create an array for non-duplicates
var newData = [];
//Iterate through a row's cells
for (var i in data) {
var row = data[i];
var duplicate = false;
for (var j in newData) {
if (row.join() == newData[j].join()) {
duplicate = true;
}
}
//If not a duplicate, put in newData array
if (!duplicate) {
newData.push(row);
}
}
//Delete the old Sheet and insert the newData array
history.clearContents();
history.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
Remove Duplicate JobIDs
This function will keep the ones nearest to the top of the list. If you want to go the other way then resort the list in reverse order.
function removeDuplicates() {
var ss=SpreadsheetApp.getActive();
var sh=ss.getSheetByName("History");
var vA=sh.getDataRange().getValues();
var hA=vA[0];
var hObj={};
hA.forEach(function(e,i){hObj[e]=i;});//header title to index
var uA=[];
var d=0;
for(var i=0;i<vA.length;i++) {
if(uA.indexOf(vA[i][hObj['JobID']])==-1) {
uA.push(vA[i][hObj['JobID']]);
}else{
sh.deleteRow(i+1-d++);
}
}
}
Remove Duplicate JobIDs in Python
Based on Cooper's answer I wrote the same function in Python:
gsheet_id = "the-gsheet-id"
sh = gc.open_by_url("https://docs.google.com/spreadsheets/d/%s/edit#gid=0" % gsheet_id)
wks = sh[0]
def removeDuplicates(gwks):
headerRow = gwks[1]
columnToIndex = {}
i = 0
for column in headerRow:
columnToIndex[column] = i
i += 1
uniqueArray = []
d = 0
row_i = 0
for row in gwks:
row_i += 1
if gwks[row_i][columnToIndex['JobID']] not in uniqueArray:
uniqueArray.append(gwks[row_i][columnToIndex['JobID']])
else:
d += 1
gwks.delete_rows(row_i + 1 - d, 1)
removeDuplicates(wks)

Google Script Optimization

I have a sheet in which IDs are saved. Now occasionally I need to read these IDs and check if other values(Name) in the sheet still fits the ID. The code I have is:
var name = sheet.getRange(line,row).getValue();
var id = sheet.getRange(line,10).getValue();
if(name!== "" && id === "")
{
try
{ get an ID}
Now from what I read I know that calling each cell value seperately takes way more time. However I am not sure how to apply getValues to have these cases fixed.
Basically the same problem in a different dress I have with the following code:
var id = sheet.getRange(line,row).getValue();
if(id!== "" && id!="Not Found" && id!="Not Found old")
{
var url = "some api url "+id+"api key";
try
{
var str = UrlFetchApp.fetch(url).getContentText();
So how do I use get values to check every ID I get. I guess I would have to use some sort of
foreach(id)
or a
for(i= 0; i <= id.range; i++)
{
use id[i] to blablabal
}
But I have no idea how to implement it, any ideas?
Is there maybe even a different, more efficient way?
After calling getValues you get a double array, like [[valueA1, valueB1], [value A2, value B2]]. Process it in a for loop, which may be a double loop if all row/column combinations are involved. Keep in mind that spreadsheet uses 1-bases indexing but in JavaScript they are 0-based. Often, the top row is headers, so it's omitted from the loop below.
function processData() {
var sheet = SpreadsheetApp.getActiveSheet();
var range = sheet.getDataRange(); // all of data. could be some part of it
var values = range.getValues();
for (var i = 1; i < values.length; i++) { // skipping with header row
for (var j = 0; j < values[0].length; j++) {
if (condition) {
// do something to values[i][j];
}
}
}
range.setValues(values); // put updated values back
}
If the data of interest is in, say, column D, then you can decide to fetch only that column. For example:
function fetchStuff() {
var sheet = SpreadsheetApp.getActiveSheet();
var lastRow = sheet.getLastRow();
var range = sheet.getRange(1, 4, lastRow, 1); // all of column D, note 1-based indexing
var values = range.getValues();
for (var i = 1; i < values.length; i++) { // skipping header row
if (condition) {
var str = UrlFetchApp.fetch(values[i][0]).getContentText();
// do something
}
}
}
Keep in mind that fetch is by far the slowest operation here, and that it is subject to quotas. Use Utilities.sleep(ms) to avoid invoking services too often, and keep track of how much data you are asking the script to fetch.

Search for a string in a sheet and remove it when found

I'm trying to read a cell from sheet INPUT and check to see if it is present in sheet STORAGE (which has only one column). If it is found, it would need to be removed. The script can terminate if the string is found. I have written some code that seems like it would work in theory, but just times out after six minutes when I try using it.
I've tried setting it up so that the loops only iterate once each, but it still seems to be stuck somewhere. Here is my code:
var active_spreadsheet = SpreadsheetApp.getActive();
var sheet_input = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('INPUT');
var sheet_storage = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('STORAGE');
var last_row_counter = 0;
function findAndRemove() {
last_row_counter = sheet_storage.getLastRow();
var n = 1;
while (n <= last_row_counter) {
if (sheet_input.getRange("B2").getValue() == sheet_storage.getRange(n,1)) {
sheet_storage.deleteRow(n);
n = last_row_counter;
}
n = n++;
}
Brian,
See if this runs better:
function findAndRemove() {
var ss = SpreadsheetApp.getActive(),
valToFind = ss.getSheetByName('INPUT')
.getRange('B2')
.getValue(),
sheetToCheck = ss.getSheetByName('STORAGE'),
values = sheetToCheck.getRange('A2:A')
.getValues();
for (var i = 0, l = values.length; i < l; i++) {
if (values[i][0] == valToFind) {
sheetToCheck.deleteRow(i + 2);
break;
}
}
}
As you mentioned the code will exit after the first time the value is found. I used col A as the column that needs to be searched (change to suit). As you see this code gets all the values of that column in one call, then loops through it to see if a match is found.
Where do I start?!
I suggest you store the sheet_input.getRange("B2").getValue() in a variable before running the loop, as this means the script only needs to query the value once, instead of once per row.
In javascript n++ already means "increment n by 1" ie: n=n+1;, so the assignment is over the top. Just use: n++;
A for loop is better to use in this case as it includes the start number, the end condition and the increment all-in-one; Some info Here
The main problem is that when you do your comparison sheet_input.getRange("B2").getValue() == sheet_storage.getRange(n,1) you are comparing a value eg "hello" to a CELL, not its value. So you need sheet_storage.getRange(n,1).getValue() instead.
Instead of artificially saying "if a matching value is found, remove the row and then increment the row count to max" to stop the loop, just use the break; clause, which exits the loop.
I strongly suggest you provide some kind of prompt to let the user know when the function finished, and if it worked. See info on class Browser.msgBox()
Here's my code:
var active_spreadsheet = SpreadsheetApp.getActive();
var sheet_input = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('INPUT');
var sheet_storage = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('STORAGE');
function findAndRemove() {
var comapreValue = sheet_input.getRange("B2").getValue();
var last_row_counter = sheet_storage.getLastRow();
var sum_of_deleted_rows = 0;
for (var n = 1; n <= last_row_counter; n++) {
if (comapreValue == sheet_storage.getRange(n,1).getValue()) {
sheet_storage.deleteRow(n);
sum_of_deleted_rows++;
// break; //if you would like to stop after just one row is removed, un-comment this line
}
}
Browser.msgBox(sum_of_deleted_rows + " row(s) deleted");
}

Delete a row in google spreadsheet base on contents of cell in row

i have this script to delete certain rows, if the selected cell in the selected colum has the metioned content but i don't understand where it fails
function DeleteRowByKeyword() {
var value_to_check = Browser.inputBox("Enter the keyword to trigger delete Row","", Browser.Buttons.OK);
// prendo quello attivo
var DATA_SHEET = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var FIRST_COLUMN = Browser.inputBox("Number of Column to look at (eg: for column A enter 1)","", Browser.Buttons.OK);
ss.toast("removing duplicates...","",-1);
var dataCopy1 = DATA_SHEET.getDataRange().getValues();
var deleted_rows = 0;
var rangeToCopy = '';
if (dataCopy1.length > 0) {
var i = 1;
while (i < DATA_SHEET.getMaxRows() - deleted_rows) {
if ((dataCopy1[i][FIRST_COLUMN]).search(value_to_check) != -1) {
ss.deleteRow(i);
deleted_rows++;
}
i++;
}
}
ss.toast("Done! " + deleted_rows + ' rows removed',"",5);
}
thanks in advance for any help
There are a few things to be improved:
Remember that spreadsheet rows and columns are numbered starting at
1, for all methods in the SpreadsheetApp, while javascript arrays
start numbering from 0. You need to adjust between those numeric
bases when working with both.
The String.search() method may be an inappropriate choice here, for
two reasons. The .search() will match substrings, so
('Testing').search('i') finds a match; you may want to look for exact
matches instead. Also, .search() includes support for regex
matching, so users may be surprised to find their input interpreted
as regex; .indexOf() may be a better choice.
To limit operations to rows that contain data, use .getLastRow()
instead of .getMaxRows().
When deleting rows, the size of the spreadsheet dataRange will get
smaller; you did take that into account, but because you're looping
up to the maximum size, the code is complicated by this requirement.
You can simplify things by looping down from the maximum.
The input of a Column Number is error-prone, so let the user enter a
letter; you can convert it to a number.
You had not defined a value for ss (within this function).
Here's the updated code:
function DeleteRowByKeyword() {
var value_to_check = Browser.inputBox("Enter the keyword to trigger delete Row", "", Browser.Buttons.OK);
var matchCol = Browser.inputBox("Column Letter to look at", "", Browser.Buttons.OK);
var FIRST_COLUMN = (matchCol.toUpperCase().charCodeAt(0) - 'A'.charCodeAt(0) + 1); // Convert, e.g. "A" -> 1
// prendo quello attivo (get active sheet)
var ss = SpreadsheetApp.getActiveSpreadsheet();
var DATA_SHEET = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
ss.toast("removing duplicates...", "", -1);
var dataCopy1 = DATA_SHEET.getDataRange().getValues();
var deleted_rows = 0;
if (dataCopy1.length > 0) {
var i = DATA_SHEET.getLastRow(); // start at bottom
while (i > 0) {
if (dataCopy1[i-1][FIRST_COLUMN-1] === value_to_check) {
ss.deleteRow(i);
deleted_rows++;
}
i--;
}
}
ss.toast("Done! " + deleted_rows + ' rows removed', "", 5);
}
You need to ensure that the index for deletion is the correct index. When you delete a row all bellow rows has their indexes changed -1.
So try this code:
if (dataCopy1.length > 0) {
var i = 1;
while (i < DATA_SHEET.getMaxRows() - deleted_rows) {
if ((dataCopy1[i][FIRST_COLUMN]).search(value_to_check) != -1) {
ss.deleteRow(i - deleted_rows);
deleted_rows++;
}
i++;
}
}