I have the function below where I am trying to scrape 4 websites, and then combine the results into a spreadsheet. Is there a faster way to match over a large array that isn't the INDEX/MATCH formulas. My desired output would be (obv this is an example)
MLBID | FG_ID | PA | K | K% | wOBA
12345 | 12345 | 12 | 5 | 41.7% | .300
While the code I have below works, it takes wayyyy too long reaches the 6-minute limit of Google Script. The matching that I am trying to do is with ~4000 rows. I have commented my code as much as possible.
function minors_batting_stats() {
//this is the spreadsheet where I have a list of all of the IDs -- MLB and FG
var ids = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Player List");
//this is the output sheet
var mb18vR_sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("2018 minors bat vs R");
//various URLs I am trying to scrape
var mb18vR_PA_url = 'https://www.mlb.com/prospects/stats/search?level=11&level=12&level=13&level=14&level=15&level=16&pitcher_throws=R&batter_stands=&game_date_gt=&game_date_lt=&season=2017&home_away=&draft_year=&prospect=&player_type=batter&sort_by=results&sort_order=desc&group_by=name&min_pa=&min_pitches=#results'
var mb18vR_SO_url = 'https://www.mlb.com/prospects/stats/search?pa_result=strikeout&level=11&level=12&level=13&level=14&level=15&level=16&pitcher_throws=R&batter_stands=&game_date_gt=&game_date_lt=&season=2017&home_away=&draft_year=&prospect=&player_type=batter&sort_by=results&sort_order=desc&group_by=name&min_pa=&min_pitches=#results'
var mb18vR_wOBA_url = 'https://www.mlb.com/prospects/stats/search?level=11&level=12&level=13&level=14&level=15&level=16&pitcher_throws=R&batter_stands=&game_date_gt=&game_date_lt=&season=2017&home_away=&draft_year=&prospect=&player_type=batter&sort_by=woba&sort_order=desc&group_by=name&min_pa=&min_pitches=#results'
//creating an array for each scrape
var res = [];
var res1 = [];
var res2 = [];
var res3 = [];
//getting the MLB and FG ids from the spreadsheet
var mlbids = ids.getRange(1, 11, ids.getLastRow()).getValues();
var fgids = ids.getRange(1,9, ids.getLastRow()).getValues();
//scraping SO against RHP
var content_SO = UrlFetchApp.fetch(mb18vR_SO_url).getContentText();
var e_SO = Parser.data(content_SO).from('tbody').to('</tbody>').build();
var rows_SO = Parser.data(e_SO).from('<tr class="player_row"').to('</tr>').iterate();
for (var i=0; i<rows_SO.length; i++) { //rows.length
res1[i] = [];
res1[i][0] = Parser.data(rows_SO[i]).from('/player/').to('/').build();
var SOs = Parser.data(rows_SO[i]).from('<td align="left">').to('</td>').iterate();
res1[i][1] = SOs[1];
}
//scraping wOBA against RHP
var content_wOBA = UrlFetchApp.fetch(mb18vR_wOBA_url).getContentText();
var e_wOBA = Parser.data(content_wOBA).from('tbody').to('</tbody>').build();
var rows_wOBA = Parser.data(e_wOBA).from('<tr class="player_row"').to('</tr>').iterate();
for (var i=0; i<rows_wOBA.length; i++) { //rows.length
res2[i] = [];
res2[i][0] = Parser.data(rows_wOBA[i]).from('/player/').to('/').build();
var wOBAs = Parser.data(rows_wOBA[i]).from('<td align="left">').to('</td>').iterate();
res2[i][1] = wOBAs[2];
}
//scraping PA against RHP
var content = UrlFetchApp.fetch(mb18vR_PA_url).getContentText();
var e = Parser.data(content).from('tbody').to('</tbody>').build();
var rows = Parser.data(e).from('<tr class="player_row"').to('</tr>').iterate();
for (var i=0; i<rows.length; i++) { //rows.length
res[i] = [];
res[i][0] = Parser.data(rows[i]).from('/player/').to('/').build();
res[i][1] = [];
//matching the MLB_ID with FG_ID
var mlbID = res[i][0];
for(var j = 0; j<mlbids.length;j++){
if(mlbids[j] == mlbID){
res[i][1] = fgids[j];
}
}
var PAs = Parser.data(rows[i]).from('<td align="left">').to('</td>').iterate();
res[i][2] = PAs[1];
//matching the MLB_ID from PA (res) with SO (res1)
res[i][3] = 0;
for (var w=0; w<res1.length; w++) {
if (res[i][0] == res1[w][0]) {
res[i][3] = res1[w][1];
}
}
//Calculating K%
res[i][4] = res[i][3] / res[i][2]
//matching the MLB_ID from PA (res) with wOBA (res1)
res[i][5] = 0;
for (var v=0; v<res2.length; v++) {
if (res[i][0] == res2[v][0]) {
res[i][5] = res2[v][1];
}
}
}
//pasting values
mb18vR_sheet.getRange(2, 1, res.length, res[0].length).setValues(res);
}
The issue you have is that you are forcing your script to loop through large datasets many many times for each row of compared data. A better approach is to build a lookup object, which maps between a desired unique identifier and the row of the data array you want to access:
/* Make an object from an Array[][] that has a unique identifier in one of the columns.
* #param Array[][] data The 2D array of data to index, e.g. [ [r1c1, r1c2, ...], [r2c1, r2c2, ...], ... ]
* #param Integer idColumn The column in the data array that is a unique row identifier
e.g. the column index that contains the product's serial number, in a data
array that has only a single row per unique product.
#return Object {} An object that maps between an id and a row index, such that
`object[id]` = the row index for the specific row in data that has id = id
*/
function makeKey(data, idColumn) {
if(!data || !data.length || !data[0].length)
throw new ValueError("Input data argument is not Array[][]");
// Assume the first column is the column with the unique identifier if not given by the caller.
if(idColumn === undefined)
idColumn = 0;
var key = {};
for(var r = 0, rows = data.length; r < rows; ++r) {
var id = data[r][idColumn];
if (key[id])
throw new ValueError("ID is not unique for id='" + id + "'");
key[id] = r;
}
return key;
}
Usage:
var database = someSheet.getDataRange().getValues();
var lookup = makeKey(database, 3); // here we say that the 4th column has the unique values.
var newData = /* read a 2D array from somewhere */;
for(var r = 0, rows < newData.length; r < rows; ++r) {
var id = newData[r][3];
var existingIndex = lookup[id];
if (existingIndex) {
var oldDataRow = database[existingIndex];
} else {
// No existing data.
}
}
By making a lookup object for your data arrays, you no longer have to re-search them and make comparisons, because you did the search once and stored the relationship, rather than discarding it every time. Note that the key that was made is based on a specific (and unique) property of the data. Without that relationship, this particular indexing approach won't work - but a different one will.
Related
I am trying to use Apps Script to query 2 datasets and compare certain columns across them. I am hoping to...
a) identify missing ID values;
b) reconcile differences in other fields, when the ID values match.
INPUT:
Spreadsheet with 2 tabs (tab1, tab2).
The key ID in each B column (Btab1, Btab2)
I want to identify instances where a unique value (B) is in one dataset but not in the other (the rows are not in the same order)
Run a function & push to an output tab if Btab1 is not in tab2 || Btab2 is not in tab1
When a value of B is in both tabs (the majority of the time), I want to identify instances of data discrepancies in a few columns...
For all instances of B, push B and the relevant columns below to the output tab if...
Column M in tab1 doesn't match column E in tab2
Column P in tab1 <> column F in tab2
Column AN tab1 <> Column G tab2
OUTPUT:
tab that displays problem areas in the datasets.
First column is ID Key.
Second column explains the issue via text string
Again, the challenge here is that the values are not sorted the same, and there could be a slight difference in total # rows
function compare() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
ss.insertSheet(1);
ss.getActiveSheet().setName('output');
var sheet1 = ss.getSheetByName('sheet1');
var sheet2 = ss.getSheetByName('sheet2');
var sheet_output = ss.getSheetByName('output');
var range1 = sheet1.getRange(1,1,sheet1.getLastRow(),sheet1.getLastColumn()).getValues();
var output1 = [];
var a1;
var b1;
var h1;
var i1;
var j1;
var m1;
var o1;
var p1;
var an1;
var ao1;
var x;
var range2 = sheet2.getRange(1,1,sheet2.getLastRow(),sheet2.getLastColumn()).getValues();
var output2 = [];
var a2;
var b2;
var c2;
var d2;
var e2;
var f2;
var g2;
var h2;
var y;
/// can i do for(x in range1; y in range2) { all in one function?? If so, what is the proper syntax?
for(x in range1, y in range2) {
a1 = range1[x][0];
b1 = range1[x][1];
h1 = range1[x][7];
i1 = range1[x][8];
j1 = range1[x][9];
m1 = range1[x][12];
o1 = range1[x][14];
p1 = range1[x][15];
an1 = range1[x][39];
ao1 = range1[x][40];
a2 = range2[y][0];
b2 = range2[y][1];
c2 = range2[y][2];
d2 = range2[y][3];
e2 = range2[y][4];
f2 = range2[y][5];
g2 = range2[y][6];
h2 = range2[y][7];
if (
(b1 != b2) ||
(m1 != e2) // etc etc etc
)
{
//push to output
}}
Whilst your syntax for(x in range1, y in range2) will not return an error, it won't give you the desired result neither if the rows are not in the same order
Reason:
During each iteration both x and y will change, e.g. if var range1 = [1,2,3] and var range2 = [4,5,6], your loop will iterate 3 times and the values in your sample loop iterations will be:
iteration
range1[x] = 1 and range2[y] = 4
iteration
range1[x] = 2 and range2[y] = 5
iteration
range1[x] = 3 and range2[y] = 6
In this case you will not retrieve the combination
range1[x] = 1 and range2[y] = 4
or
range1[x] = 2 and range2[y] = 6
and so on.
Instead you need to use two nested for loops, which would iterate through all possible combinations of x and y:
for(x in range1) {
for(y in range2){
...
}
}
Sidenote:
Even if your rows would be in the same order, you still need to be careful. Because for(x in range1) opposed to for(x = 0; x < range1.length; i++) gives you no control about in which folder the loop will iterate over the range.
Now to your query for duplicates
A possible way to implement the functionality in a not too complicated manner would be the following:
Define boolean variable and use it to check for each x either it has a duplicate
If a duplicate (for column B) is found - further criteria will be evaluated
If two rows match by all criteria, the inner loop will be exited with break and the function will jump to the next x
If rows with identical key IDs, but discrepancies in other columns are found - both rows will be pushed into sheet output for comparison purposes (this is easier to implement than specifying what exactly is discrepant)
After this the inner loop will also be exited
In oth cases above duplicate will be set to true
If a unique Id is found in sheet1 (duplicate = false) - it will be immediately pushed into output
Sample
function compare() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
ss.insertSheet(1);
ss.getActiveSheet().setName('output');
var sheet1 = ss.getSheetByName('sheet1');
var sheet2 = ss.getSheetByName('sheet2');
var sheet_output = ss.getSheetByName('output');
var range1 = sheet1.getRange(1,1,sheet1.getLastRow(),sheet1.getLastColumn()).getValues();
var output1 = [];
var b1;
var m1;
var p1;
var an1;
var x;
var range2 = sheet2.getRange(1,1,sheet2.getLastRow(),sheet2.getLastColumn()).getValues();
var output2 = [];
var b2;
var e2;
var f2;
var g2;
var y;
var array = [];
for(x in range1) {
var duplicate = false;
for(y in range2){
b1 = range1[x][1];
m1 = range1[x][12];
p1 = range1[x][15];
an1 = range1[x][39];
b2 = range2[y][1];
e2 = range2[y][4];
f2 = range2[y][5];
g2 = range2[y][6];
if (
(b1 == b2)
)
{
Logger.log("found");
duplicate = true;
if((m1 != e2)||
(p1 != f2) ||
(an1 != g2)){
array.push(range1[x]);
array.push(range2[y]);
}
break;
}
}
if (duplicate == false){
Logger.log("duplicate false");
array.push(range1[x]);
}
}
//push to output
if(array[0]){
sheet_output.getRange(sheet_output.getLastRow()+1, 1, array.length, array[0].length).setValues(array);
}
}
I have the following which works great for finding a header value:
var lastCN = formResponsesSht.getLastColumn();
var data = formResponsesSht.getRange(1,1,1,lastCN).getValues();//Get 2D array of all values in row one
var data = data[0];//Get the first and only inner array
var statusCN = data.indexOf('Status') + 1;
Now I would like to find a value on a specific column. In this case B.
var lastRN = formResponsesSht.getLastRow();
var data = formResponsesSht.getRange(1,2,lastRN,1).getValues();//Get 2D array of all values in row one
var data = data[0];//Get the first and only inner array
var statusCN = data.indexOf('Status') + 1;
Why does this not work and is it possible to use the same strategy of indexOf?
If your sheet looked like this:
Then the output of
var data = formResponsesSht.getRange(1,2,lastRN,1).getValues();
is: [[a], [b], [c], [d], [e], [f], [g], [h], [i]]
And
var data = data[0];
gets you: [a], i.e. the first cell in that column.
.getValues() fetches data as an array where the "first and inner array" is an array of all the values in that row.
Edit: For finding the index of 'Status' in a column of data, try:
for (var d = 0; d < data.length; d++) {
if (data[d].indexOf('Status') > -1) {
var statusCN = d + 1;
break;
}
}
Logger.log(statusCN);
In my sheet column A is date and column B is time duration values, I want to find the dates which are repeated and sum up the corresponding time values of the repeated dates and show the sum in the last relevant repeated date. And delete all the other repeated dates. ie if 18/07/2019 is repeated 4 times i have to sum up all the four duration values and display the sum value in the 4th repeated position and delete the first three date 18/07/2019. I have to do this all those dates that are repeated. I have wrote code to my best knowledge
function countDate() {
var data = SpreadsheetApp.getActive();
var sheet = data.getSheetByName("Sheet5");
var lastRow = sheet.getLastRow();
var sh = sheet.getRange('A1:A'+lastRow);
var cell = sh.getValues();
var data= sheet.getRange('B1:B'+lastRow).getValues();
for (var i =0; i < lastRow; ++i){
var count = 0;
var column2 = cell[i][0];
for (var j =0; j < i; j++)
{
var p=0;
var column4 = cell[j][0];
if (column4 - column2 === 0 )
{
var value1 = data[j][0];
var value2 = data[i][0];
var d = value2;
d.setHours(value1.getHours()+value2.getHours()+0);
d.setMinutes(value1.getMinutes()+value2.getMinutes());
sheet.getRange('C'+(i+1)).setValue(d).setNumberFormat("[hh]:mm:ss");
sheet.deleteRow(j+1-p);
p++;
}
}
}
}
The copy of the sheet is shown
column C is the values I obtain through the above code AND column D is the desired value
After computing the sum I need to delete the repeated rows till 15 here
Answer:
You can do this by converting your B-column to a Plain text format and doing some data handling with a JavaScript dictionary.
Code:
function sumThemAllUp() {
var dict = {};
var lastRow = SpreadsheetApp.getActiveSpreadsheet().getSheets()[0].getLastRow();
var dates = SpreadsheetApp.getActiveSpreadsheet().getRange('A1:A' + lastRow).getValues();
var times = SpreadsheetApp.getActiveSpreadsheet().getRange('B1:B' + lastRow).getValues();
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheets()[0];
sheet.getRange(1, 1, sheet.getLastRow(), sheet.getLastColumn()).setNumberFormat("#");
for (var i = 0; i < dates.length; i++) {
if (!dict[dates[i][0]]) {
dict[dates[i][0]] = times[i][0];
}
else {
var temp = dict[dates[i][0]];
var hours = parseInt(temp.split(':')[0]);
var minutes = parseInt(temp.split(':')[1]);
var additionalHours = parseInt(times[i][0].split(':')[0]);
var additionalMinutes = parseInt(times[i][0].split(':')[1]);
var newMinutes = minutes + additionalMinutes;
var newHours = hours + additionalHours;
if (newMinutes > 60) {
newHours = newHours + 1;
newMinutes = newMinutes - 60;
}
dict[dates[i][0]] = newHours + ':' + newMinutes;
}
}
SpreadsheetApp.getActiveSpreadsheet().getSheets()[0].getRange('A1:B' + lastRow).clear();
var keys = Object.keys(dict);
for (var i = 0; i < keys.length; i++) {
SpreadsheetApp.getActiveSpreadsheet().getSheets()[0].getRange('A' + (i + 1)).setValue(keys[i]);
SpreadsheetApp.getActiveSpreadsheet().getSheets()[0].getRange('B' + (i + 1)).setValue(dict[keys[i]]);
}
}
Assumptions I made:
There are a few assumptions I made when writing this, you can edit as needed but I figured I should let you know:
There are only dates in Column A and only times in Column B.
The times in column B are either Hours:Minutes or Minutes:Seconds. Either way, if the value to the right of the : hits 60, it adds one to the left value and resets.
The Sheet within the Spreadsheet is the first sheet; that which is returned by Spreadsheet.getSheets()[0].
References:
w3schools - JavaScript Objects
Spreadsheet.getSheets()
w3schools - JavaScript String split() Method
MDN web docs - parseInt() method
Google Sheets > API v4 - Date and Number Formats
I need to update the value of "product quantity" based on the value of "order quantity" but only when "order sku" is equal to "product sku".
function productLoop2() {
var app = SpreadsheetApp;
var ss = app.getActiveSpreadsheet();
var activeSheet = ss.getActiveSheet();
var t = 2;
var n = 2;
var s = 0;
for(var t = 2; t < 52; t++) {
var x = activeSheet.getRange(t, 1).getValue();
//x is the ORDER SKU
var r = activeSheet.getRange(t, 2).getValue();
//r is the ORDER QUANTITY
var q = activeSheet.getRange(n, 3).getValue();
//q is the PRODUCT SKU
var u = activeSheet.getRange(n, 4).getValue();
//u is the PRODUCT QUANTITY
if (x != q) {
n++;
} else {
s = u - r;
}
var m = activeSheet.getRange(n,4).setValue(s);
}
}
I need the cell "n,4" (order quantity) to update so the value equals the result of "u"(product quantity) minus "r"(order quantity)
The code "if" fragment should be corrected as below:
if (x != q) {
n++;
} else {
activeSheet.getRange(n,4).setValue(u - r);
}
Update after discussion:
function updateProductQuantities() {
var activeSheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var values = activeSheet.getDataRange().getValues();
// Sum quantities by orders (columns A and B)
var sku = {};
for (var i in values) {
if (i == 0) continue; // headers row
if (sku[values[i][0]] == undefined) {
sku[values[i][0]] = values[i][1];
} else {
sku[values[i][0]] += values[i][1];
}
}
// Update product quantities (columns C and D)
for (i in values) {
if (sku[values[i][2]] != undefined) {
values[i][3] -= sku[values[i][2]];
}
}
// return values to the sheet
activeSheet.getDataRange().setValues(values);
}
You should use 2 "for" loops. One is for sum of orders quantities, and the other is for subtraction.
Here is how you can get all the data, modify it and set it in the sheet.
function productLoop2() {
var sheet = SpreadsheetApp.getActive().getActiveSheet();
// Get the active sheet of the active spreadsheet
var orderRange = sheet.getRange('A2:B52');
var productRange = sheet.getRange('C2:D52');
// Get the data ranges (change these references as necessary)
var orderData = orderRange.getValues();
var productData = productRange.getValues();
// Get the values from the ranges. This method returns an array.
for (var row = 0; row < orderData.length; row++) {
// Loops through every row of the order array
// Arrays are zero-based; this means the first element is element 0,
// the second element in element 1 and so on.
// Data is accessed with [row index][column index];
var oSku = orderData[row][0];
var oQty = orderData[row][1];
for (var productIndex = 0; productIndex < productData.length; productIndex++) {
// Loops through every product in the product array
var pSku = productData[productIndex][0];
var pQty = productData[productIndex][1];
if (oSku === pSku) {
productData[productIndex][1] = pQty - oQty;
// Changes the pQty value in the array
break;
// Added upon suggestion from user tehhowch
}
}
}
productRange.setValues(productData);
// Sets all product values in the array to the range from which they were taken
}
References:
Multidimensional arrays
Best practices - batch operations
For loops
I am looking to save products that are chosen in an invoice to another sheet in the same workbook when a payment method is selected. Here is a copy of the sheet. How the sheet works:
1) User places "x" into selection column in "Protocol Selection" (WORKING)
2) In the next sheet in the workbook "Patient Invoice", an invoice with bottle count is generated (WORKING)
3) I want the past invoices (with product, date, pill-count, etc.) to be copied over to the "Past Invoices" sheet when the "Method of Payment" is selected. This is a drop-down Data Validation cell. (NOT WORKING)
Is there a way to do this without a custom script? IF not, what is the script?
Probably not the most efficient method but it does work...
Script
function makePastInvoice() {
var patientInvoice = SpreadsheetApp.getActive().getSheetByName('Patient Invoice');
var pastInvoices = SpreadsheetApp.getActive().getSheetByName('Past Invoices');
// vistor details
var visitDate = patientInvoice.getRange("D3").getValue();
var patientName = patientInvoice.getRange("G5").getValue();
var doctorName = patientInvoice.getRange("I5").getValue();
var programLength = patientInvoice.getRange("I3").getValue();
var invoiceNumber = patientInvoice.getRange("G3").getValue();
var total = patientInvoice.getRange("K30").getValue();
var paymentMethod = patientInvoice.getRange("D5").getValue();
var visitorDetails = [visitDate, patientName, doctorName, programLength, invoiceNumber, total, paymentMethod];
var lastRow = pastInvoices.getLastRow();
var currentRow = lastRow + 1;
// set values of patient details to Past Invoices sheet
for (var i = 0; i < visitorDetails.length; i++) {
pastInvoices.getRange(currentRow, i+1).setValue(visitorDetails[i]);
}
// 15 possible max items (per Past Invoices)
var productIDs = patientInvoice.getRange("A8:A22").getValues();
// count number of products
var productCount = 0;
for(var i = 0; i < productIDs.length; i++) {
if (productIDs[i] > "") {
productCount++;
}
}
// Patient Invoice
var firstItemRow = 8;
// Past Invoices
var firstItemCol = 8 // column H
// loop through purchases
for(var i = 0; i < productCount; i++) {
// purchase details
var productName = patientInvoice.getRange(firstItemRow, 2).getValue(); // col B
var dosage = patientInvoice.getRange(firstItemRow, 5).getValue(); // col E
var numBottles = patientInvoice.getRange(firstItemRow, 6).getValue(); // col F
var purchaseDetails = [productName, dosage, numBottles];
// set values of purchase details to Past Invoices sheet
for(var j = 0; j < purchaseDetails.length; j++) {
pastInvoices.getRange(currentRow, firstItemCol).setValue(purchaseDetails[j])
firstItemCol ++;
}
firstItemRow++;
}
}
Trigger
I'm sure there's a way to trigger the script when the payment method changes or any other method of choice but I created a "submit" trigger with a drawing and assigned it to the makePastInvoice function.
Also, you'll probably want to change the "Visit Date" column's format to date.