Search a spreadsheet column for duplicate entries - google-apps-script

I have some customer data in a Google Spreadsheet as shown below. New entries are added regularly, and I would like to use a script to search the contact number column to check if the contact number for a new entry already exists.
Also, if possible, I'd like the script to give some kind of notification that indicates the row number of the existing entry. For example, the notification could be a new column where it would just write the "row number" or "no entries found".
Data Available:
Contact Number Email ID Venue Address Service Date
1234567890 Test1#gmail.com cypress 21/04/2016
0123456789 Test2#gmail.com river run drive 22/04/2016
What I am looking for:
Contact Number Email ID Venue Address Service Date Duplicate entry in
1234567890 Test1#gmail.com cypress 21/04/2016 row1
0123456789 Test2#gmail.com river run drive 22/04/2016 no entries found

So, your description implies that you want to search for entry before it's input. Rather than searching for duplicates, which means there is already duplicate rows in your data. I have constructed a method for both for you.
This shows an example of both, this will either find the duplicates or it will find an entry by phone number. The remainder of the logic and customization is up to you.
Here is the example sheet for this: Example Spreadsheet
Edit: Added column finding logic, and added a button you can press to see the duplicates in the sheet. NOTE: You must be signed into a google account for the script to run
//Entry Point
function myFunction() {
var sheet = SpreadsheetApp.getActiveSheet(); //Get the current sheet
var dataRange = sheet.getDataRange();
var valuesRange = dataRange.getValues(); //The array of data on the sheet
var columns = GetColumns(valuesRange, dataRange.getNumColumns(), 0);
var duplicates = SearchForDuplicates(valuesRange, columns.columns['Contact Number'].index); //Search for existing duplicates
var elementIndex = SearchForValue(valuesRange, columns.columns['Contact Number'].index, 123456789); //Search for a phone number of 123456789
if(duplicates.length > 0){ //If there are duplicates
var isDuplicateColumn = columns.columns['Is Duplicate'].index;
for(var i = 0; i < duplicates.length; i++){
valuesRange[duplicates[i].index][isDuplicateColumn] = 'Yes'; //Assign Yes to the appropriate row
}
dataRange.setValues(valuesRange); //Set the spreadsheets values to that of the modified valuesRange
}
Logger.log(duplicates);
Logger.log(elementIndex);
}
//Searches an array for duplicate entries
function SearchForDuplicates(array, columnIndex){
var uniqueElements = {};
var duplicates = [];
for(var i = 0; i < array.length; i++){
if(typeof uniqueElements[array[i][columnIndex]] === 'undefined'){
uniqueElements[array[i][columnIndex]] = 0; //If the element does not yet exist in this object, add it
} else {
duplicates.push({row: array[i], index: i}); //If the element does exist, it's a duplicate
}
}
return duplicates;
}
//Searches an array for a value
function SearchForValue(array, columnIndex, value){
for(var i = 0; i < array.length; i++){
if(array[i][columnIndex] == value){
return i; //Element found, return index
}
}
return -1; //No element found, return -1
}
//Gets a columns object for the sheet for easy indexing
function GetColumns(valuesRange, columnCount, rowIndex)
{
var columns = {
columns: {},
length: 0
}
Logger.log("Populating columns...");
for(var i = 0; i < columnCount; i++)
{
if(valuesRange[0][i] !== ''){
columns.columns[valuesRange[0][i]] = {index: i ,value: valuesRange[0][i]};
columns.length++;
}
}
return columns;
}

Related

Copy/paste values using google app script

Here is the sheet with an example of the data that I'm using
https://docs.google.com/spreadsheets/d/1_k3xclEgdREfMMks7H2-uk0tzxXqCaoeVW_G8ase-3o/edit?usp=sharing
I only put the formulas on the rows without color, and the information about the schedule comes from other tab.
the colors in green are the ones with dates, where I store inside the numbers, and the blue ones also contain formulas and I wanna skip those columns when pasting the values because is correlated to another worksheet, so I can't paste the values
I've tryied two ways
this first one I receive an erro message: Exception: The parameters (number[]) don't match the method signature for SpreadsheetApp.Range.setValues.
//destination = sheet.getRange(1,colA[j],table.length,1)
//destination.setValues(table); // where we paste the values by column
For this one, it paste on other tab on the first columns
//sheet.getRange(1,colA[j],table.length,1).copyTo(sheet.getActiveRange(), SpreadsheetApp.CopyPasteType.PASTE_VALUES, false);
Here is the script that Im using
function PasteValues() {
var ss = SpreadsheetApp.openById("...");
var sheet = ss.getSheetByName("Testsheet");
var rows = sheet.getDataRange().getValues();
var dates = rows[2];
//Logger.log(dates)
var yesterday = new Date(Date.now() - 864e5);
var numbers = [];
for(var i = 2; i < dates.length; i++) {
let columns = i
if (dates[i]!=="" && dates[i] !== null){
numbers.push(columns);
}
if (dates[i]==="") {
continue;
}
if (dates[i].getDate() == yesterday.getDate() && dates[i].getMonth() == yesterday.getMonth() ){
break;
}
}
colA=numbers.slice(-5)
var table = [];
Logger.log(rows.length)
Logger.log(colA)
for(var j=0;j<colA.length;j++)
{
table =[];
for (var i = 0; i < rows.length;i++ )
{
table[i] = rows[i][colA[j]];
}
Logger.log("the number of the column is: "+colA[j]);
Logger.log(table);
// where I paste the data
}
}
This is the example on how my data is to copy/paste it based on the column number
When you retrieve values from the spreadsheet, getValues() already returns them to you in a 2-D array - there is no need to manually transfer them into another array
You can either do:
var table = sheet.getDataRange().getValues();
destination = sheet.getRange(1,statColumn,table.length,table[0].length);
destination.setValues(table);
Or:
sheet.getDataRange.copyTo(sheet.getActiveRange(), SpreadsheetApp.CopyPasteType.PASTE_VALUES, false);
UPDATE
Exception: The parameters (number[]) don't match the method signature
for SpreadsheetApp.Range.setValues.
means that you are trying to assign a row (1-D array) to a range (2-D array).
Also, table.length will retrieve you the number of columns and not rows if table is a row.
This can be easily solved by defining:
table = [table];
Sample snippet:
for(var j=0;j<colA.length;j++)
{
table =[];
for (var i = 0; i < rows.length;i++ )
{
table[i] = rows[i][colA[j]];
}
Logger.log("the number of the column is: "+colA[j]);
table = [table];
Logger.log(table);
// where I paste the data
destination = sheet.getRange(1,colA[j],table.length,1)
destination.setValues([table]); // where we paste the values by column
}
UPDATE
If what you want is to copy paste selected data column by column, you need to create a 2D array table and populate it as following:
for(var i = 2; i < dates.length; i++) {
let columns = i
if (dates[i]!=="" && dates[i] !== null){
numbers.push(columns);
}
if (dates[i]==="") {
continue;
}
if (dates[i] instanceof Date && dates[i].getDate() == yesterday.getDate() && dates[i].getMonth() == yesterday.getMonth() ){
break;
}
}
colA=numbers.slice(-5)
var table = [];
Logger.log(rows.length)
Logger.log(colA)
for(var j=0;j<colA.length;j++)
{
for (var i = 0; i < rows.length;i++ )
{
table[i] =[];
table[i][0] = rows[i][colA[j]];
}
Logger.log("the number of the column is: "+colA[j]);
Logger.log(table);
// where I paste the data
destination = sheet.getRange(1,colA[j],table.length,1)
destination.setValues(table); // where we paste the values by column
}
It is important to make sure that the array is 2-dimensional and that its dimensions (rows and columns) correspond to the dimensions of the range into which you want to set the data.

How to remove duplicate rows in Google Sheets using script

I currently have a column of data titled JobID. In this column, there are duplicates from an import that runs daily and grabs the latest data on the JobID's in question and appends them to the top of the sheet.
Therefore the most recent JobID rows are the ones with the data we need.
I'd like to know if there is a script that can be run on the sheet called 'History' to look up the column JobID, search every row below for duplicates and remove them, leaving the top, most recent JobID rows in the sheet.
I know that it is really easy to remove duplicates using the "Remove Duplicates" tool in Google Sheets... but I'm lazy and I'm trying to automate as much of this process as possible.
The script I have below runs without an error but is still not doing what I need it to. Wondering where I am going wrong here:
function removeDuplicates() {
//Get current active Spreadsheet
var sheet = SpreadsheetApp.getActive();
var history = sheet.getSheetByName("History");
//Get all values from the spreadsheet's rows
var data = history.getDataRange().getValues();
//Create an array for non-duplicates
var newData = [];
//Iterate through a row's cells
for (var i in data) {
var row = data[i];
var duplicate = false;
for (var j in newData) {
if (row.join() == newData[j].join()) {
duplicate = true;
}
}
//If not a duplicate, put in newData array
if (!duplicate) {
newData.push(row);
}
}
//Delete the old Sheet and insert the newData array
history.clearContents();
history.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
Remove Duplicate JobIDs
This function will keep the ones nearest to the top of the list. If you want to go the other way then resort the list in reverse order.
function removeDuplicates() {
var ss=SpreadsheetApp.getActive();
var sh=ss.getSheetByName("History");
var vA=sh.getDataRange().getValues();
var hA=vA[0];
var hObj={};
hA.forEach(function(e,i){hObj[e]=i;});//header title to index
var uA=[];
var d=0;
for(var i=0;i<vA.length;i++) {
if(uA.indexOf(vA[i][hObj['JobID']])==-1) {
uA.push(vA[i][hObj['JobID']]);
}else{
sh.deleteRow(i+1-d++);
}
}
}
Remove Duplicate JobIDs in Python
Based on Cooper's answer I wrote the same function in Python:
gsheet_id = "the-gsheet-id"
sh = gc.open_by_url("https://docs.google.com/spreadsheets/d/%s/edit#gid=0" % gsheet_id)
wks = sh[0]
def removeDuplicates(gwks):
headerRow = gwks[1]
columnToIndex = {}
i = 0
for column in headerRow:
columnToIndex[column] = i
i += 1
uniqueArray = []
d = 0
row_i = 0
for row in gwks:
row_i += 1
if gwks[row_i][columnToIndex['JobID']] not in uniqueArray:
uniqueArray.append(gwks[row_i][columnToIndex['JobID']])
else:
d += 1
gwks.delete_rows(row_i + 1 - d, 1)
removeDuplicates(wks)

Build Google App Script to Combine Similar Rows into One Row

Many people are asking why I want to do this. I want to do this so that when I do my mail merge (this sends students their overdue book lists from the library), I don't send a student an email more than once. I never use this data more than once, I only use it to send a quick message, I never manipulate or work with the data so I don't care if it's hard to work with! I hope this makes sense! Thank you for your feedback thus far.
Google Sheet starts like this:
I want it to look like this:
I have started some script, which I am sure you will all laugh at (I know very little of programming). However, It'd be awesome to be able to do this. Basically, combine the rows that have an identical entry in column 1, by putting the values for the columns for those rows together into one row. The numbers can be added, and the data separated by a comma or a line break.
This is what I have so far....
function myFunction() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet();
var last = sheet.getLastRow();
//find identical entries in column 1//
for(i in data){
var row = data[i];
var duplicate = false;
for(j in newData){
if(row.join() == newData[j].join()){
duplicate = true;
}
}
if(!duplicate){
newData.push(row);
}
}
//add information from rows of identical entries into one row using a comma//
//delete empty rows//
}
It's a bit of a kluge but it runs with my fake data. Hopefully you can get it running with yours. It usually takes me some tweaking to get something like this a little more streamlined. I have a few comments in there to explain some of the key items.
function rowMerge() {
var firstRow = 2;
var firstCol = 1;
var sht = SpreadsheetApp.getActiveSheet();
sht.getRange(firstRow, firstCol, sht.getLastRow() - firstRow + 1, sht.getLastColumn() - firstCol + 1).sort(1);
sht.appendRow(['***','***','Control-z one more time','***','***']); //need a throwaway row to get last data element out since I'm moving out element[i-1]
var datR = sht.getDataRange();
var lastRow = datR.getLastRow();
var lastCol = datR.getLastColumn();
var datA = datR.getValues();
sht.getRange(2,1,lastRow - firstRow + 1,lastCol - firstCol + 1).clearContent().setWrap(true);
var datoutA = [];
var k=1;
var n = 0;
for(var i = 0;i < datA.length; i++)
{
if(i > 1)
{
if(datA[i][0] == datA[i-1][0])
{
k++; //k is the number of consecutive matching values
}
else
{
datoutA[n] = [];
if(k == 1)
{
// if k = 1 the datA[i-1] row gets copied into output array
for(var c = 0;c < datA[i-1].length; c++)
{
datoutA[n][c]=datA[i-1][c];
}
}
else
{
//i-1 to i-k rows get merged and copied into output array
var firstTime = true;
for(var a = 1;a <= k;a++)//input rows
{
for(var b = 0;b < datA[i].length -1;b++)//input columns
{
if(a > 1 || b > 0) //no delimiter for first row or first column
{
datoutA[n][b] += ', ';
}
if(firstTime || b == 0)// straight assignment for first row and running sum after that same with first column because we only want one of them because they're all the same.
{
datoutA[n][b] = datA[i - a][b];
}
else
{
datoutA[n][b] += datA[i - a][b];
}
}
if(firstTime)//first assignment then running sums for last column
{
datoutA[n][datA[i].length - 1] = Number(datA[i - a][datA[i].length-1]);
}
else
{
datoutA[n][datA[i].length - 1] += Number(datA[i - a][datA[i].length-1]);
}
firstTime=false;
}
var end = 'is near';
}
k=1; //consecutive counter
n++; //datoutA index
}
}
}
var datoutR = sht.getRange(2, 1, datoutA.length , datoutA[0].length);
datoutR.setValues(datoutA);
var colwidth = 250;
sht.setColumnWidth(2, colwidth);
sht.setColumnWidth(3, colwidth);
sht.setColumnWidth(4, colwidth);
}
There actually are many reasons why one would want to combine similar rows.
I for one, have a form that allows users to fill out information about properties. New info comes in everyday, so the forms will be reused for new entries.
All properties have a unique identifier, but the first form entry is useful to give the owner's name and address, along with other information.
The next form entry does not need to populate, (and re-type) all that info, but it does need to add other new details to the first entry.
Lastly, I want to report based on unique entries, with other info combined to read with the property.
Part of the answer for me is a custom function created by Hyde, called JoinRows.
Search for that.
Good luck!

Quickly looping through multiple sheets to find data

Hi i'm trying to write a custom function that takes a pair of cells, loops through all the worksheets in the spreadsheet to find an identical matching pair of cells, and returns another value from that same row.
Background; sheet 0 is a master sheet of all LOA and ID combinations (essentially location and serial #) which need to have there inspection dates filled in. The people who do these inspections update their personal worksheets with the LOA-ID combination + inspection data on google drive. Im trying to get the master sheet to update automatically whenever this data is added.
The sheets all follow the same format (LOA, ID in 1st & 2nd columns, inspection date in the 14th). This is a custom function im using which does what i intend, but works painfully slow. How do i make this run faster? It takes several seconds PER CELL; i need to run this over 10k+ cells.
function findMatch(LOA,GRID) {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheets = ss.getSheets();
var returnDate = "not found"
for (var sheetNum = 1; sheetNum < sheets.length; sheetNum++){
var ws = ss.getSheets()[sheetNum]
for (var count = 1; count<ws.getLastRow(); count++){
if (ws.getRange(count,1,1,1).getValues()==LOA && ws.getRange(count,2,1,1).getValues()==GRID)
{
returnDate = ws.getRange(count,14,1,1).getValue()
break;
}
else
{
}
}
}
Logger.log(returnDate)
return returnDate
It is best practice to perform as few Spreadsheet Service calls as possible, and particularly avoid making them inside loops. Instead, retrieve all the data in a batch with getValues() and use Javascript to iterate over that data.
function findMatch(LOA,GRID)
{
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheets = ss.getSheets();
var returnDate = "not found", data;
for (var sheetNum = 1; sheetNum < sheets.length; sheetNum++)
{
data = sheets[sheetNum].getDataRange().getValues();
for (var count = 1; count < data.length; count++)
{
if (data[count][0] == LOA && data[count][1] == GRID)
{
returnDate = data[count][13];
break;
}
}
}
Logger.log(returnDate);
return returnDate;
}

Automatically move data from one sheet to another in google docs

i have a spreadsheet that i keep track of tasks i need to do, once complete i enter a date in the last column. What i want is for that completed task to be moved to sheet 2.
At present i have sheet 1 named SUD_schedule and i want the completed row of data to be moved to sheet 2 named SUD_archive. I've looked through the forum posts already and i've tried a variation of scripts but so far no luck. The closest i have come is this script:
function onEdit() {
var sheet1 = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();//Original sheet
var sheet2 = SpreadsheetApp.getActiveSpreadsheet().getSheets()[1];//target sheet
// to act on only one sheet, check the sheet name here:
//If it si not first sheet, it will do nothing
if (sheet1.getSheetName() != "SUD_schedule") {
return;
}
//Get Row and column index of active cell.
var rowIndex = sheet1.getActiveRange().getRowIndex();
var colIndex = sheet1.getActiveRange().getColumnIndex();
//If the selected column is 10th and it is not a header row
if (colIndex == 16 && rowIndex > 1) {
//Get the data from the current row
var data = sheet1.getRange(rowIndex,1,1,9).getValues();
var lastRow2;
(sheet2.getLastRow()==0)?lastRow2=1:lastRow2=sheet2.getLastRow()+1;
//Copy the data to the lastRow+1th row in target sheet
sheet2.getRange(lastRow2,1,1,data[0].length).setValues(data);
}
}
Column P (16) is the task complete date, row 1 is frozen and contains column headers.
Can anybody help show where i'm going wrong.
Kind regards
Den
Your code is not generic and you are more complicating your objective. Below will work out your need.
function onEdit(){
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet1 = ss.getSheetByName('SUD_schedule');
var sheet2 = ss.getSheetByName('SUD_archive');
var dateColumn = "16";
var array = []
var range = sheet1.getRange(1, 1, sheet1.getLastRow(), dateColumn);
for (var i = 2; i <= sheet1.getLastRow(); i++) //i iterates from 2 as you say R1 is header
{
if(isValidDate(range.getCell(i, dateColumn).getValue()) == true) //checking if any values on column16 is valid date
{
data = sheet1.getRange(i, 1, 1, dateColumn).getValues(); //Getting the range values of particular row where C16 is date
for (var j = 0; j < dateColumn; j++) //Adding the row in array
{
array.push(data[0][j]);
}
}
if(array.length > 0)
{
sheet2.appendRow(array); //Appending the row in sheet2
array = [];
sheet1.deleteRow(i); //deleting the row in sheet as you said you want to move, if you copy remove this and next line
i=i-1; //managing i value after deleting a row.
}
}
}
//Below function return true if the given String is date, else false
function isValidDate(d) {
if ( Object.prototype.toString.call(d) !== "[object Date]" )
return false;
return !isNaN(d.getTime());
}
I am not sure that the syntax you have as used below is entirely correct.
(sheet2.getLastRow()==0)?lastRow2=1:lastRow2=sheet2.getLastRow()+1;
sheet2.getRange(lastRow2,1,1,data[0].length).setValues(data);
What I know will work for certain is if you omit the variable lastRow2 all together and use this instead.
sheet2.getRange(getLastRow+1,1,1,data[0].length).setValues(data);
To complement Joachin's answer, here is how you can adapt that code if you don't have the date in the last row. In the below shown part of the code replace Lastcolumnumber with your last column.
//Getting the range values of particular row where C16 is date
data = sheet1.getRange(i, 1, 1, LASTCOLUMNNUMBER).getValues();
//Adding the row in array
for (var j = 0; j < LASTCOLUMNNUMBER; j++)