Automate ImportHTML to get twitter followers for many cells - google-apps-script

I have a spreadsheet with a cell range of twitter profile urls. I want to automate getting their follower count. This is what I came up with.
function getFollowers1() {
var queryString = Math.random();
var sheet = SpreadsheetApp.getActiveSpreadsheet();
var cellRange = sheet.getRange("L6:L25").getValues();
var startCell = sheet.getRange("K6");
for (var i = 0; i < cellRange.length; i++) {
var value = '=substitute(query(IMPORTHTML("' + cellRange[i] + '?' + queryString + '",' +'"table", 3), "select Col3"), char(10)&"Followers","")';
startCell.setValue(value);
startCell = startCell.offset(1,0);
}
var range = sheet.getRange("K6:K25");
range.copyTo(range, {contentsOnly: true});
}
I have 4 separate functions getFollowers1, getFollowers2, getFollowers3, getFollowers4 the difference between them is the range of cells. I have about 80 cells and doing it all in one function would always have some cells hang at Loading.... I have these 4 functions set to triggers at different times of the day. However, sometimes the function works and sometimes all cells in the function just hang at Loading....
I don't know the reason for this, throttling maybe? I'm wondering how to change things to accomplish what I want in a more efficient and reliable way. To get daily updates of twitter followers.

Related

Improving Apps Script flexibility by using a column of sheet data instead of hard-coded IDs

Background: My coworkers originally each had a worksheet within the same Google Sheets file that makes a lot of calculations (and was getting unusable). Now, everyone has their own (known) Google Sheets file. To run the same calculations, we need to consolidate all that data into a master sheet (image ref below). We tried =importrange(...), but it's too heavy and breaks often (i.e., Loading... and other unfilled cells).
I've written some code to do this import, but right now its only manual: manually repeating the code and manually add the sheet IDs and changing the destrange.getRange(Cell range) each time. We have 80+ analysts, and fairly high turnover rates, so this would take an absurd amount of time. I'm new to Sheets and Apps Script, and know how to make the script use a cell as reference for a valid range or a valid ID, but I need something that can move a cell down and reference the new info.
Example:
Sheet 1 has a column of everyone Sheet ID
Script Pseudocode
get first row's id(Row 1), get sheet tab, get range, copies to active sheet's corresponding row(Row 1).
gets second row's id(Row 2), get sheet tab, get range, copies to active sheet's corresponding row (Row 2)
etc.
My script understanding is way to low to know how to process this. I have no idea what to read and learn to make it work properly.
function getdata() {
var confirm = Browser.msgBox('Preparing to draw data','Draw the data like your french girls?', Browser.Buttons.YES_NO);
if(confirm == 'yes'){
// I eventually want this to draw the ID from Column A:A, not hard-coded
var sourcess = SpreadsheetApp.openById('1B9sA5J-Jx0kBLuzP5vZ3LZcSw4CN9sS6A_mSbR9b26g');
var sourcesheet = sourcess.getSheetByName('Data Draw'); // source sheet name
var sourcerange = sourcesheet.getRange('E4:DU4'); // range
var sourcevalues = sourcerange.getValues();
var ss = SpreadsheetApp.getActiveSpreadsheet(); //
var destsheet = ss.getSheetByName('Master Totals'); //
// This range needs to somehow move one down after each time it pastes a row in.
var destrange = destsheet.getRange('E4:DU4');
destrange.setValues(sourcevalues); // Data into destsheet
}
}
Any suggestions are greatly appreciated!
Thanks to tehhowch for pointing me in the right direction!
function getdata() {
var ss = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
var destsheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('Master Totals');
var confirm = Browser.msgBox('Drawing Data','Would you like to update the sheet? It may take 2 to 5 minutes.', Browser.Buttons.YES_NO);
if(confirm =='yes'){
var lr = ss.getLastRow();
for (var i = 4; i<=lr; i++) {
var currentID = ss.getRange(i, 1).getValue();
var sourcess = SpreadsheetApp.openByUrl(currentID);
var sourcesheet = sourcess.getSheetByName('Data Draw');
var sourcerange = sourcesheet.getRange('E4:DU4');
var sourcevalues = sourcerange.getValues();
var destrange = destsheet.getRange('E' +i+':'+ 'DU'+ i);
destrange.setValues(sourcevalues);
I just had to learn how to use a variable loop.
Edit: thanks also to Phil for making my question more presentable!
Now that you've figured out one way to do it, I'll offer an alternative that uses batch methods (i.e. is much more time- and resource-efficient):
function getData() {
var wb = SpreadsheetApp.getActive();
var ss = wb.getActiveSheet();
var dest = wb.getSheetByName('Master Totals');
if (!dest || "yes" !== Browser.msgBox('Drawing Data', 'Would you like to update the sheet? It may take 2 to 5 minutes.', Browser.Buttons.YES_NO))
return;
// Batch-read the first column into an array of arrays of values.
var ssids = ss.getSheetValues(4, 1, ss.getLastRow() - 4, 1);
var output = [];
for (var row = 0; row < ssids.length; ++row) {
var targetID = ssids[row][0];
// Open the remote sheet (consider using try-catch
// and adding error handling).
var remote = SpreadsheetApp.openById(targetID);
var source = remote.getSheetByName("Data Draw");
var toImport = source.getRange("E4:DU4").getValues();
// Add this 2D array to the end of our 2D output.
output = [].concat(output, toImport);
}
// Write collected data, if any, anchored from E4.
if(output.length > 0 && output[0].length > 0)
dest.getRange(4, 5, output.length, output[0].length).setValues(output);
}
Each call to getRange and setValues adds measurable time to the execution time - i.e. on the order of hundreds of milliseconds. Minimizing use of the Google interface classes and sticking to JavaScript wherever possible will dramatically improve your scripts' responsiveness.

converting nonadjacent columns to proper/title case in Google Sheets

I found something online and it works for changing text to title case; however, it only works on adjacent columns and I need to apply this to multiple nonadjacent columns. I will paste in the script. If you can give a fix to being able to take care of letting the script run on multiple nonadjacent columns that I specify, that would be great. I found some stuff online that says it can do that, but it is not clear to me.
Here is the script that works:
function onEdit() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheets()[0];
var range = sheet.getRange("A_range");
range.activate();
var values = range.getValues();
if (values.map) {
range.setValues(values.map(function(row) {
return row.map(titleCase);
}));
}
else {
range.setValue(titleCase(values));
}
}
function titleCase(str) {
return str.toString().split(/\b/).map(function(word) {
return word ? word.charAt(0).toUpperCase() + word.slice(1).toLowerCase() : '';
}).join('');
}
This was from other people on this concept:
The stuff to make it run on other columns is this:
var range1=sheet.getRange("A1:A19").getValues();
var range2=sheet.getRange("C1:C19").getValues();
var range=[],i=-1;
while ( range1[++i] ) {
range.push( [ range1[i][0], range2[i][0] ] );
}
where range will have content from both columns.
data = sheet.getRange("A1:C19").getValues();
for (i = 0; i < data[0].length; i++) {
// do something with data[0][i]
// do something with data[2][i]
}
I am not sure how to implement these 2 other ideas listed above. If you could be really specific, like actually put something into the first script that lets it run on Col. A and Col. D,for example, it would be much better than generalities, as I am really really new to this and have spent an enormous amount of time trying to learn it/get a handle on it. Thanks!
Because making as few calls as possible to the SpreadsheetApp API is better for speed, i'd prefer to simply take a Range of all the cells between the first and the last column, apply the transformation to selected columns and then write the whole lot back again. The only place to edit then if the columns change is a single pattern array.
var columnPattern = [1,5,6,7] // Equivalent to [A,E,F,G]
The script then runs a simple map over the two-dimensional array representing the sheet.
function transform() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheets()[0];
// Rows (Could be first row and last row, OP isn't clear.
var startRow = 1, endRow = sheet.getLastRow();
// An array with Column indexes for those you want in Title Case.
var columnPattern = [1,5,6,7];
var firstColumn = parseInt(columnPattern.slice(0,1));
var lastColumn = parseInt(columnPattern.slice(-1));
// The whole range
var range = sheet.getRange(startRow,
firstColumn,
endRow,
lastColumn - firstColumn)
// Apply Title Case to selected columns.
var data = range.getValues().map(function(row, i, rows) {
row = row.map(function(col, j, row) {
if(columnPattern.indexOf(j + firstColumn) >= 0) {
col = titleCase(col);
}
return col;
});
return row;
});
range.setValues(data);
}
The only point I'd perhaps clarify is the line where it identifies the columns to amend.
if(columnPattern.indexOf(j + firstColumn) >= 0) {
This just corrects for the columnPattern array not being the same dimension as your sheet. An alternative would be to have an array that did match the x-dimension with boolean values, but this would be less adaptable to your sheet changing size.
I'd resist putting this in an onEdit() function but it depends on your use case as to how often data changed.

Google sheet script, times out. Need a new way or flip it upside down

noob here, so the script works great if there are less than 800 rows on a sheet, however this time I have a sheet of almost 1500 rows and the script times out.
Basically it is a quick way to get a quote. (quick here means 5-6mins, not an issue) It hides columns with calculations, hides columns with sensitive information and rows where there was no value in column H.
What I want to know is, if I can do the same with a different code, or if someone knows how to make getRange().getValue(); start at the bottom of the sheet, then I could have two scripts starting one after the other to finish the sheet and produce a printable quote.
Any help is greatly appreciated.
Many Thanks
here is the script:
function Quote()
{
var ss = SpreadsheetApp.getActiveSpreadsheet();
var s = ss.getSheetByName("Quote"); `
var datarange = s.hideColumns(6);
var datarange = s.hideColumns(9);
var datarange = s.hideColumns(10);
var datarange = s.hideColumns(12);
var datarange = s.hideColumns(13);
var datarange = s.hideColumns(14);
var lastRow = s.getLastRow();
for( i=1 ; i<=lastRow ; i++) {
var status = s.getRange("H"+i).getValue();
if (status == "") {
s.hideRows(i);
}
}
}
Your problem is row:
s.getRange("H"+i).getValue()
This code takes data from spreadsheet, this is very slow process when you use it inside loop. You may use this conctruction:
var data = s.getDataRange().getValues();
for (var i=0; i < data.length; i++) {
var status = data[i][7]; // takes column H
// other code goes here...
}
This way you read data from the spreadsheet only once using getDataRange(), and convert it into array with getValues(). Then loop should work way more faster.
When hiding rows, remember to add 1 because array starts from 0, heres code for hiding rows inside loop:
if (status == "") {
s.hideRows(i + 1); // adding 1
}

setFormula then get and set Value - is this the best way?

I have a growing dataset on a google sheet with which I need to run some complex filters/queries/vlookups on. However I only need to do this daily as the new data arrives. The number of complex formulas is starting to slow the spreadsheet to a grinding halt, particularly with our woeful broadband connection.!
I therefore came up with a workaround of using GAS to set the formula in a cell, then to get the Value and then to set the Value, knowing that GAS doesn't run the spreadsheet functions natively (as per VBA). As I have already worked up the filters and vlookups on the sheets I need, I didn't go into scripting the formulas to achieve the same thing.
Here is a simplified version of the code:
function myFunction() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sh = ss.getSheetByName('Sheet1');
var rng = sh.getRange('B11');
rng.setFormula('=sum(filter(C2:E5,A2:A5="Bob"))');
var val = rng.getValue();
rng.setValue(val);
}
In my production spreadsheet I can have # 300 formulas on each of 30 sheets, so if these are all pinging away at the dataset I get lengthy periods of the grey progress bar. (In essence the formulas are filtering/summing or counting daily data to weekly data) My example above shows everything happening on one sheet.
Wondered if there was a better/different way of doing this?
Thanks
Tim
Well, I have not come up with anything better so will post my solution. Two scripts. First one checks that the user actually want to update their values, if they say yes, then checks with the user again and shows them the date range it will update. Then runs the second script, which in simple terms just applies a formula to a cell then copies the value generated and pastes the value. On testing with full data load, spreadsheet does no "waiting/progress grey box" at all so solves my issue.
function runWriteBehavs() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sy = ss.getSheetByName("summary");
//gets last row in column B - data is contigious
var BVals = sy.getRange("B1:B").getValues();
var BLast = BVals.filter(String).length;
var rowBeh = BLast + 1;
var lastDate = sy.getRange("A" + rowBeh).getValue();
var lastEndDate = new Date(lastDate.getTime()+6*3600000*24);;
//formats dates
lastDate = Utilities.formatDate(new Date(lastDate), "GB", "dd/MM/yyyy");
lastEndDate = Utilities.formatDate(new Date(lastEndDate), "GB", "dd/MM/yyyy");
//message box for confirmation
var output = Browser.msgBox("This script will run the summaries for the week:\\n" + lastDate + " - " + lastEndDate + "\\n Are you really sure you want to continue?",Browser.Buttons.YES_NO);
if ( output == "yes" ) {
//calls main update script
writeBehavs();
}
}
//Needs to be run once all data is entered
function writeBehavs() {
//get variables
var ss = SpreadsheetApp.getActiveSpreadsheet();
var db = ss.getSheetByName("database");
var sy = ss.getSheetByName("summary");
var sL = ss.getSheetByName("lists");
//gets number of behaviours, a counta() of list on sheet
var bCount = sL.getRange("H1").getValue();
//gets column listing hard coded on sheet
var bCol = sL.getRange("H2:H30").getValues();
//gets last row in column B - data is contigious
var BVals = sy.getRange("B1:B").getValues();
var BLast = BVals.filter(String).length;
//for each number on behaviour count
for (var i=0; i<bCount; ++i) {
//set the column
var colBeh = [bCol[i]];
//set the correct row for data entry and start date check
var rowBeh = BLast + 1;
//sets correct row for end date check
var rowBeh2 = rowBeh + 1;
//gets first empty row in Column from iteration
var rng = sy.getRange(colBeh+rowBeh);
//enters the formula in the cell
rng.setFormula('=iferror(sum(filter(database!$E$2:$E,database!$D$2:$D='+ colBeh + '$1,database!$A$2:$A=lists!$G$2,database!$B$2:$B>=$A' + rowBeh + ',database!$B$2:$B<$A' + rowBeh2 + ')),"0")');
//captures the value generated by the formula
var val = rng.getValue();
//pastes the formula to the cell
rng.setValue(val);
//Job Done!
}
}

How to solve #N/A value using Google Sheet =importxml function

I have a sheet that want to import value value of Google search result using importxml function. Sometimes I get #N/A in cell A2.
I used while loop to keep trying to fetch the data and still I don't get data even if I wait for long time. Is it possible to setup a JavaScript timer so runs until I get value in cell A2 then timer stops and allow the rest of code to continue?
What should I do in order to avoid such cases and always get value on cell A2? Any Alternative solution?
var queryString = Math.random();
var cellFunction1 = '=IMPORTXML("' + SpreadsheetApp.getActiveSheet().getRange('C2').getValue() + '&randomNumber=' + queryString + '","'+ SpreadsheetApp.getActiveSheet().getRange('D2').getValue() + '")';
SpreadsheetApp.getActiveSheet().getRange('A2').setValue(cellFunction1);
var stop = 0;
while (SpreadsheetApp.getActiveSheet().getRange('A2').getValue() === "#N/A" && stop++<10) {
Utilities.sleep(5000);
var queryString3 = Math.random();
var cellFunction1 = '=IMPORTXML("' + SpreadsheetApp.getActiveSheet().getRange('C2').getValue() + '&randomNumber=' + queryString3 + '","'+ SpreadsheetApp.getActiveSheet().getRange('D2').getValue() + '")';
SpreadsheetApp.getActiveSheet().getRange('A2').setValue(cellFunction1);
}
I faced the same problem. I suspect it's because the code tries to .getValue from the cells before the cells' values are updated from IMPORTXML.
My workaround was to write two separate functions, one to force the IMPORTXML to refresh, and then the other to write the data into the sheet. Then I gave the IMPORTXML a trigger to run every 5 minutes, and then the write function with its own trigger.
You might want to try this:
function refresher() {
var ss = SpreadsheetApp.getActiveSheet();
var sheet = ss.getSheetById("input");
// Cell input!A2 holds the formula '=IMPORTXML(A1, XMLpath)'
//So now we give input!A1 a URL with a random ?number behind...
//...to force IMPORTXML to refresh
sheet.getRange(1, 1)
.setValue("http://www.urlWhereInfoResides.com/?" + Math.floor(Math.random() * 40));
}
^ Give refresher() a trigger to run every 5 minutes
function writer() {
var ss2 = SpreadsheetApp.getActiveSheet();
//assumes desired value sits in cell input!A2
var input = ss2.getSheetById("input")
.getRange(2, 1)
.getValue();
//adds input value into a new row in output sheet
var outputSheet = ss2.getSheetById("output");
var output = outputSheet.getRange(outputSheet.getLastRow() + 1, 1)
.setValue(input);
}
^ Give writer() a trigger to run every 5 minutes or whatever is needed