Google Script to remove duplicates from top to bottom - google-apps-script

I have a script which combines three scripts to do the following:
1) Insert rows from one tab to the top of another tab
2) Remove duplicates from the tab in which the data was just added
3) Clear out the old tab from which the data was just ported over from
For the De-dupe script, it deletes rows starting at the bottom and then goes up. So I'm having established and existing data deleted. What I need it to do is start at the top and go down. So if new row records ported over from the first script are found to be a duplicate, it should delete those instead.
How can I get the de-dupe script to essentially process the opposite way?
I did find reverse logic with the below link, but I can't find a way to make it work with my script and keep getting errors. I'm also not sure if this would be the best methodology to fit in with my overall script.
Link: Removing Duplicate Rows in a google Spreadsheet from the end row
function Run(){
insert();
removeDuplicates();
clear1();
}
function insert() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var source = ss.getSheetByName('Candidate Refresh'); // change here
var des = ss.getSheetByName('Candidate Listing'); // change here
var sv = source
.getDataRange()
.getValues();
sv.shift();
des.insertRowsAfter(1, sv.length);
des.getRange(2, 1, sv.length, source.getLastColumn()).setValues(sv);
}
//Code in Question Start
function removeDuplicates() {
var sheet = SpreadsheetApp.getActiveSheet();
var rows = sheet.getLastRow();
var firstColumn = sheet.getRange(1, 2, rows, 1).getValues();
firstColumn = firstColumn.map(function (e) {return e[0]})
for (var i = rows; i >0; i--) {
if (firstColumn.indexOf(firstColumn[i-1]) != i-1) {
sheet.deleteRow(i);
}
}
}
//Code in Question End
function clear1() {
var sheet = SpreadsheetApp.getActive().getSheetByName('Candidate Refresh');
sheet.getRange('A2:K100').clearContent()
}
If new rows at the top of the sheet are found to be a duplicate, delete the new rows at the top.

try this:
function removeDuplicates() {
var sheet=SpreadsheetApp.getActiveSheet();
var rows=sheet.getLastRow();
var firstColumn=sheet.getRange(1, 2, rows, 1).getValues();
firstColumn = firstColumn.map(function(e){return e[0]})
var uA=[];
for (var i=rows;i>0;i--) {
if (uA.indexOf(firstColumn[i-1])!=-1) {
sheet.deleteRow(i);
}else{
uA.push(firstColumn[i-1]);
}
}
}

Related

Google Apps Script that removes/deletes any empty rows

Essentially I am looking for a script that keeps my single Google Sheet (titled, Main) free of any empty rows. If I have a row that contains data, and after some time, all of the data in that row is cleared, I want the script to notice and immediately delete that row. Any help/scripts appreciated.
I am managing the data in the sheet from an external app, and what happens is that I have the ability to "delete" a row (from the app). But what it actually does is just clear all the data from that row, it doesn't actually delete the row itself. So what I'm left with is a sheet that has scattered rows of data. For example, after a while, maybe there's data in row 3, and row 12, and row 125, and row 356, and what happens is the space between those rows (that contain data) are empty rows, when instead, using a script, it could keep checking for empty rows, and delete them as they become empty.
Edit - working solution:
function deleteEmptyRows() {
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheet = spreadsheet.getSheetByName("Main");
var data = sheet.getRange(2, 1, sheet.getLastRow() - 1, sheet.getLastColumn()).getValues();
var row = sheet.getLastRow();
while (row > 2) {
var rec = data.pop();
if (rec.join('').length === 0) {
sheet.deleteRow(row);
}
row--;
}
var maxRows = sheet.getMaxRows();
var lastRow = sheet.getLastRow();
if (maxRows - lastRow != 0) {
sheet.deleteRows(lastRow + 1, maxRows - lastRow);
}
}
Have a look at Trigger. You will find the onEdit() trigger particularly useful.
Every time an edit happens in the associated Spreadsheet, the JavaScript function onEdit() will be run and it will get an event object passed to it. You can use the event object to detect which kind of event happend e.g. a row was deleted. Filter the kind of event you want to react to and then do whatever you need to do using the APIs Google App Script provides. For a list of available changeTypes (i.e. row deleted, column inserted etc.) of a event object and its properties for Google Sheets see the documentation.
Delete Empty Lines in Sheet Main every five minutes
function delMts() {
const ss = SpreadsheetApp.getActive();
ss.toast('', "Removing Empties", 5);
const sh = ss.getSheetByName("Main");
const vs = sh.getRange(1, 1, sh.getLastRow(), sh.getLastColumn()).getValues().filter(r => r.every(c => c != ''));
sh.deleteRows(vs.length + 1,sh.getMaxRows() - vs.length);
sh.getRange(1,1,vs.length,vs[0].length).setValues(vs);
}
//run this once
function createTriggerfordleMts() {
if(ScriptApp.getProjectTriggers().filter(t => t.getHandlerFunction() == "delMts").length == 0) {
ScriptApp.newTrigger("delMts").timeBased().everyMinutes(5).create();
}
}
This script grabs the sheet, iterates from the bottom up (starting from the last data containing row) and checks for empty rows, if any are found, it deletes them, it then checks for remaining empty rows beyond the last data containing row and deletes them all at once, if they are empty. Also, this script is setup with a corresponding onChange trigger so that the script runs anytime the sheet is edited, catching any newly cleared rows.
function deleteEmptyRows() {
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheet = spreadsheet.getSheetByName("Main");
var data = sheet.getRange(2, 1, sheet.getLastRow() - 1, sheet.getLastColumn()).getValues();
var row = sheet.getLastRow();
while (row > 2) {
var rec = data.pop();
if (rec.join('').length === 0) {
sheet.deleteRow(row);
}
row--;
}
var maxRows = sheet.getMaxRows();
var lastRow = sheet.getLastRow();
if (maxRows - lastRow != 0) {
sheet.deleteRows(lastRow + 1, maxRows - lastRow);
}
}

Remove duplicates based on one column and keep latest entry in google sheets

I was working on some automation and wanted to remove the duplicate rows in my google sheet by comparing it on basis of 3rd column. I found one code which is working flawlessly but it does not remove the old entry in sheet, it removes the latest one. I wanted to keep the latest one from the duplicates.
This is the code which I found for appscript by Cooper:
function removeDuplicates() {
var sh=SpreadsheetApp.getActiveSheet();
var dt=sh.getDataRange().getValues();
var uA=[];
var d=0;
for(var i=0;i<dt.length;i++) {
if(uA.indexOf(dt[i][2])==-1) {
uA.push(dt[i][2]);
}else{
sh.deleteRow(i+1-d++);
}
}
}
Can anyone help me with the code which does the same work "removing duplicate rows (Keeps latest entry removes the old entry) based on column" ?
From I wanted to keep the latest one from the duplicates., when the latest one is the last row, in your script, how about the following modification?
Modified script:
function removeDuplicates() {
var sh = SpreadsheetApp.getActiveSheet();
var dt = sh.getDataRange().getValues();
var uA = [];
for (var i = dt.length - 1; i >= 0; i--) {
if (uA.indexOf(dt[i][2]) == -1) {
uA.push(dt[i][2]);
} else {
sh.deleteRow(i + 1);
}
}
}
My suggestion:
function myFunction() {
var sh = SpreadsheetApp.getActiveSheet();
// get values of column C
var col = sh.getDataRange().getValues().map(x => x[2]);
// get indexes of duplicated values in the column
var duplicates = col.map((x,i) =>
col.slice(i+1).includes(x) ? i+1 : '').filter(String);
// remove rows by the indexes
duplicates.reverse().forEach(x => sh.deleteRow(x));
}
Before:
After:
Update
If there will some glitches it makes sense to add the command flush() after every deleteRow(). The last line of the code should be like this:
// remove rows by the indexes
duplicates.reverse().forEach(x => { sh.deleteRow(x); SpreadsheetApp.flush() });

Google Sheet speedup Loops

I have around 5000 rows. Now I want to move data for one sheet to another if their status is Delivered.
This is what I am doing:
function move(){
var activeSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Sheet2");
var sourceSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Sheet1");
var lastSourceRow = sourceSheet.getLastRow();
var lastSourceCol = sourceSheet.getLastColumn();
var sourceRange = sourceSheet.getRange(1, 1, lastSourceRow, lastSourceCol);
var sourceData = sourceRange.getValues();
var activeRow = 0;
//Loop through every retrieved row from the Source
for (row = lastSourceRow; row > 1; row--) {
//IF Column B in this row has 'deal', then work on it.
if (sourceData[row-1][1] === 'Delivered') {
//then push that into the variables which holds all the new values to be returned
activeSheet.appendRow(sourceData[row-1]);
//delete current
sourceSheet.deleteRow(row);
}
Logger.log(row);
}
}
My problems:
Script is so slow it takes so much time to execute.
As per google policy script timeout in 5 mins.
So the loop has to be started again. So also index is set to null. So I can't check the record at the top if they are delivered or not.
Please can anyone provide me a better solution for this? I search for a faster loop but still no luck.
function move(){
const ash=SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Sheet2");
const ssh=SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Sheet1");
var data=ssh.getDataRange().getValues();
var d=0;
var oA=[];
data.forEach(function(r,i){
if(r[1]=="Delivered") {
oA.push(r);
ssh.deleteRow(i+1-d++);
}
});
ash.getRange(ash.getLastRow()+1,1,oA.length,oA[0].length).setValues(oA);
}
The deletion of the rows always takes time. You could splice them out of the data array and clear the sheet and place them back in with a setvalues(). But if you have formulas this may mess some things up. I'll leave that up to you.

How to remove duplicate rows in Google Sheets using script

I currently have a column of data titled JobID. In this column, there are duplicates from an import that runs daily and grabs the latest data on the JobID's in question and appends them to the top of the sheet.
Therefore the most recent JobID rows are the ones with the data we need.
I'd like to know if there is a script that can be run on the sheet called 'History' to look up the column JobID, search every row below for duplicates and remove them, leaving the top, most recent JobID rows in the sheet.
I know that it is really easy to remove duplicates using the "Remove Duplicates" tool in Google Sheets... but I'm lazy and I'm trying to automate as much of this process as possible.
The script I have below runs without an error but is still not doing what I need it to. Wondering where I am going wrong here:
function removeDuplicates() {
//Get current active Spreadsheet
var sheet = SpreadsheetApp.getActive();
var history = sheet.getSheetByName("History");
//Get all values from the spreadsheet's rows
var data = history.getDataRange().getValues();
//Create an array for non-duplicates
var newData = [];
//Iterate through a row's cells
for (var i in data) {
var row = data[i];
var duplicate = false;
for (var j in newData) {
if (row.join() == newData[j].join()) {
duplicate = true;
}
}
//If not a duplicate, put in newData array
if (!duplicate) {
newData.push(row);
}
}
//Delete the old Sheet and insert the newData array
history.clearContents();
history.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
Remove Duplicate JobIDs
This function will keep the ones nearest to the top of the list. If you want to go the other way then resort the list in reverse order.
function removeDuplicates() {
var ss=SpreadsheetApp.getActive();
var sh=ss.getSheetByName("History");
var vA=sh.getDataRange().getValues();
var hA=vA[0];
var hObj={};
hA.forEach(function(e,i){hObj[e]=i;});//header title to index
var uA=[];
var d=0;
for(var i=0;i<vA.length;i++) {
if(uA.indexOf(vA[i][hObj['JobID']])==-1) {
uA.push(vA[i][hObj['JobID']]);
}else{
sh.deleteRow(i+1-d++);
}
}
}
Remove Duplicate JobIDs in Python
Based on Cooper's answer I wrote the same function in Python:
gsheet_id = "the-gsheet-id"
sh = gc.open_by_url("https://docs.google.com/spreadsheets/d/%s/edit#gid=0" % gsheet_id)
wks = sh[0]
def removeDuplicates(gwks):
headerRow = gwks[1]
columnToIndex = {}
i = 0
for column in headerRow:
columnToIndex[column] = i
i += 1
uniqueArray = []
d = 0
row_i = 0
for row in gwks:
row_i += 1
if gwks[row_i][columnToIndex['JobID']] not in uniqueArray:
uniqueArray.append(gwks[row_i][columnToIndex['JobID']])
else:
d += 1
gwks.delete_rows(row_i + 1 - d, 1)
removeDuplicates(wks)

Issue with ss.insertSheet - inserted sheet has wrong name

Hello I wrote a small script to copy one template sheet in a spreadsheet, as a new sheet in the same spreadsheet.
I wrote two versions of it, one driven by a menu that asks for the name of the new sheet to be created:
function addonenewSheet() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var temp = ss.getSheetByName('template');
var naame = Browser.inputBox("CustomerID to be created");
try {
ss.setActiveSheet(ss.getSheetByName(naame));
}
catch (e) {
ss.insertSheet(naame, {template:temp});
}
}
This one works as intended, and names the new sheet 234 if I say so in the inputbox.
The second function is very similar, but parses some values and attempts to create many sheets at once:
function addmissingSheets() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var temp = ss.getSheetByName('template');
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
for (var i = 10; i < data.length; i++) {
if(typeof data[i][1] == 'number'){
try {
ss.setActiveSheet(ss.getSheetByName(data[i][1]));
}
catch (e) {
Logger.log('Customer ID: ' + data[i][1]);
var insertpage = data[i][1];
ss.insertSheet(insertpage, {template:temp});
}
}
}
}
As long as Logger.log is concerned, data[i][1] has the right value, but somehow insertSheet creates sheets named "copy of template", "copy of template 2"... Instead of taking the value assigned in data[i][1]
Would anyone know why this behaviour and how I can solve this issue?
your second script does not use correct variable types. The method you are using insert sheet uses types (<string>, {template:<sheet>}). Since your customer ID is a number it does not work. There is a simple fix you can do
Change
var insertpage = data[i][1];
into:
var insertpage = data[i][1].toString();
and you will now be able to use the customer ID (which is a number) to create a sheet name (which is a string)