I'm trying to scrape a webpage & to put the value in cache in order to not hit the daily urlFetch limit.
This is the code I'm using, it works without the Cache & Properties service but not when I try to add that element.
function scrapercache(url) {
var url = "https://www.gurufocus.com/term/fscore/nyse:ABBV/Piotroski-F-Score";
var result = [];
var description;
var options = {
'muteHttpExceptions': true,
'followRedirects': false,
};
Logger.log('line 16 OK');
var cache = CacheService.getScriptCache();
var properties = PropertiesService.getScriptProperties();
Logger.log('line 21 OK');
let res = cache.get(url);
// if(res){
// return JSON.parse(res)
//}
Logger.log(res);
Logger.log('line 24 OK');
if (res) {
// trim url to prevent (rare) errors
url.toString().trim();
var r = UrlFetchApp.fetch(url, options);
Logger.log(r);
Logger.log('line 34 OK');
var c = r.getResponseCode();
Logger.log(c);
Logger.log('line 38 OK');
// check for meta refresh if 200 ok
if (c == 200) {
var html = r.getContentText();
cache.put(url, "cached", 21600);
properties.setProperty(url, html);
Logger.log('line 46 OK');
var $ = Cheerio.load(html); // make sure this lib is added to your project!
Logger.log('line 49 OK');
// meta description
if ($('meta[name=description]').attr("content")) {
description = $('meta[name=description]').attr("content").trim();
var trim_des = description.substr(0, 40);
Logger.log('line 55 OK');
}
}
result.push([trim_des]);
Logger.log('line 60 OK');
}
return result;
Logger.log('line 64 OK');
}
I call the function like that:
=scrapercache("https://www.gurufocus.com/term/fscore/nyse:ABBV/Piotroski-F-Score")
& I get the following error message
Error: Reference does not exist
EDIT: I added log lines to see if the script was processing correctly & it looks like it's ok only until like 28
You're not putting the results in the cache, you're putting the string "cached" there. Consider:
let cached = cache.get(url)
if(cached){
return JSON.parse(cached)
}
let results = ["blah","blah"] // stuff we got with cheerio
cache.put(url, JSON.stringify(results), 120)
return results
Error: “Reference does not exist”
This error message is usually returned by calling a custom function in a spreadsheet cell that does not return a value. It is explicitly mentioned by the official docs, but the error message is not provided, so the confusion is understandable.
An empty array is not a valid return value (since there are no elements to return). The error is easily reproducible with the following custom function:
/**
* #customfunction
*/
function testReferenceError() {
const list = [];
return list;
}
Which, when called in a cell, resulting in the "desired" error:
Applied to your case
In your situation, when there is a cached data in cache, the if statement clause evaluates to false (truthy value when negated evaluates to false). When it does, nothing gets pushed to the result, and an empty array is returned in finally (see above for the explanation of consequences). Consider this mock:
const cache = {
get() {
return "cached";
}
};
let res = cache.get("mock_url");
//ternary operator here acts the same as "if...else":
console.log( !res ? "will enter if block" : "will enter else block" );
Note on return in finally: If you put a return statement into a finally block, expect it to override the return statements in try or catch. Consider this example close to how your program is structured:
const final = (add = false) => {
const list = [];
try {
add && list.push(1);
return [1,2]; //this return is skipped
}
catch(error) {
list.push(error);
}
finally {
return list;
}
};
console.log( final() );
Also, the question already has an answer here
Related
I need to be able to validate the values in specific table cells, and later, to click on a particular cell that holds a link. I get that Node and the browser (or emulator) are two different process spaces, so I can't pass references. I was hoping that puppeteer would hide this fact in a read-only manner such as return someArray; in a function run in the browser context being "magically" replicated by puppeteer on the Node side, but alas.
test("get a certain row from a certain table", async function getRow() {
await page.waitForSelector("#actionItemsView-table");
const
cellText = await page.evaluate(function getCells() {
const
row = Array.from(document.querySelectorAll("#actionItemsView-table tbody tr"));
for (let i = row.length - 1; i >= 0; --i) {
// the row we want is the one that has text of interest in cells[2]
if (row[i].cells[2] === "some text that identifies the row of interest") {
return row[i]; // we can't pass this back to Node, so this is wrong
// but some version of this what we need to do
}
}
return null; // no such row
});
console.log(cellText); // cellText is an empty array
}, testTimeout);
Lacking that, I have run through various intermediate experiments, all the way to this, seemingly simplest case, just to get something that works and then work my way back up to what I need, but this doesn't work either:
test("get the text from a single cell", async function getInnerText() {
await page.waitForSelector("#actionItemsView-table");
const
cellText = await page.evaluate(function getText() {
let
ct,
row = Array.from(document.querySelectorAll("#actionItemsView-table tbody tr"));
for (let i = row.length - 1; i >= 0; --i) {
if (row[i].cells[2] === "some text that identifies the row of interest") {
ct = row[i].cells[3].innerText; // the text of the next cell to the right
break;
}
}
return ct; // ct is not a string!
});
console.log(cellText); // cellText is undefined!
}, testTimeout);
If I do things like
document.querySelect("#actionItemsView-table").rows[2].cells[3].innerText
they work, so my selectors and javascript syntax seems to be correct.
There has to be a way to do this and it has to be way easier than I have made it -- what am I missing? Why is the above not working but something like this does work:
await page.$eval("input[name=emailAddr]", function setId(el, id) { el.value = id; return id; }, id);
Here's an easier way to find that cell:
await page.evaluate(() => {
let td = [...document.querySelectorAll("td")].find(td => td.innerText === "something")
return td?.nextElementSibling?.innerText
})
This will return the text or undefined
I have been trying to run some URL redirect testing using Google Apps Script in Google Sheets, I've been successful by getting a response code and also the final redirect URL for some of them but most of the links are not working.
Examples of the links I would like to check:
https://www.airbnb.com/rooms/4606613
https://www.airbnb.com/rooms/4661522
https://www.airbnb.com/rooms/6014647
https://www.airbnb.com/rooms/14452305
https://www.airbnb.com/rooms/15910617
Pretty much I need to check if those links will redirect to https://www.airbnb.com/s/homes
Using the script below, I get the following list, which is not correct since all of them will redirect to https://www.airbnb.com/s/homes:
https://www.airbnb.com/rooms/4606613
https://www.airbnb.com/s/homes
https://www.airbnb.com/s/homes
https://www.airbnb.com/rooms/14452305
https://www.airbnb.com/rooms/15910617
It seems that the website is taking 1 second to do the redirect and probably that could be the issue.
Below the code:
function urlProtocol(url){
return URI(url).protocol()
}
function urlHostname(url){
return URI(url).hostname()
}
function getRedirects(url) {
eval(UrlFetchApp.fetch('https://rawgit.com/medialize/URI.js/gh-pages/src/URI.js').getContentText());
var params = {
'followRedirects': false,
'muteHttpExceptions': true
};
var baseUrl = urlProtocol(url) + "://" + urlHostname(url),
response = UrlFetchApp.fetch(url, params),
responseCode = response.getResponseCode();
if(response.getHeaders()['Location']){
var redirectedUrl = getRedirects(baseUrl + response.getHeaders()['Location']);
return redirectedUrl;
} else {
return url;
}
}
Seems like the final redirect on some of the URLs happens after the page is loaded. Most likely there is a client-side script that initiates the change of window.location. Therefore, your correct logic fails to catch such pages.
To make matters worse, after-load redirect seem to be inconsistent as sometimes the pages you provided are not redirected to https://www.airbnb.com/s/homes. I was able to stop this redirect from happening, so the theory is confirmed - will update with what exactly causes it.
Apart from that, there are several optimizations you can apply to your script:
Get rid of eval and, actually, of the whole library unless you really need it (see how to do the same in just two lines). Improved security is the main benefit: no eval() of external scripts means less possibilities for breach.
Check for status code in 3xx range before looking through the Location header (as a precaucion).
/**
*
* #param {string} target
*/
const getRedirects = (target) =>
/**
* #param {string}
* #returns {boolean}
*/
(url) => {
if(url === target) {
return false;
}
const response = UrlFetchApp.fetch(url, {
'followRedirects': false,
'muteHttpExceptions': true
});
const code = response.getResponseCode();
let { Location } = response.getHeaders();
if (code < 300 || code >= 400) {
return true;
}
if (!Location) {
return false;
}
if (/^\/\w+/.test(Location)) {
const [protocol, , base] = url.split("/");
Location = `${protocol}//${base}${Location}`;
}
console.log(Location);
return getRedirects(target)(Location);
};
const testRedirects = () => {
const redirectsToHome = getRedirects("https://www.airbnb.com/s/homes");
const accessible = [
"https://www.airbnb.com/rooms/23861670",
"https://www.airbnb.com/rooms/4606613",
"https://www.airbnb.com/rooms/4661522",
"https://www.airbnb.com/rooms/6014647",
"https://www.airbnb.com/rooms/14452305",
"https://www.airbnb.com/rooms/15910617"
].filter(redirectsToHome);
console.log(accessible);
};
Since the clarification that the function is a custom function, you can add a wrapper function that will serve as public API that you can reference in a cell that will call the utility, something like this:
const checkIfRedirects = (source, target = "https://www.airbnb.com/s/homes") => getRedirects(target)(source);
You can then use it like you would do a formula:
=checkIfRedirects(A20)
I created an array in a separate GS file using the code provided below. I tried calling it in my HTML file. My goal is to compare the contents the array to the parameter email. However, the value returned by google.script.run.withSuccessHandler() is undefined
//in GS
function mailGetter()
{
//open sheet
var sheet = SpreadsheetApp.openByUrl("https://sheet.url").getSheetByName("Email Sheet").activate();
//get size of given row range
var row_data_email = sheet.getRange("C2:C").getValues();
var emailArray = row_data_email.join().split(',').filter(Boolean);
Logger.log(emailArray);
return emailArray;
}
//in HTML
function checkEmail(email)
{
var reg1 = /^[a-z0-9._%+-]+#[a-z0-9.-]+\.[a-z]{2,4}$/;
var arraySize = google.script.run.withSuccessHandler(misc).sizeGetter();
console.log(arraySize);
var emailArray = new Array(arraySize);
emailArray = google.script.run.withSuccessHandler(misc).mailGetter();
console.log(emailArray);
if (reg1.test(email) == false)
{
emails.style.border = "1px solid red";
document.getElementById('submitBtn').disabled = true;
}
else if (reg1.test(email) == true)
{
emails.style.border = "1px solid green";
document.getElementById('submitBtn').disabled = false;
}
for (var row = 0; row < arraySize; row++)
{
if (emailArray[row][0] == email)
{
emails.style.border = "1px solid green";
document.getElementById('submitBtn').disabled = false;
break;
}
else if (emailArray[row][0] != email)
{
emails.style.border = "1px solid red";
document.getElementById('submitBtn').disabled = true;
}
}
}
function misc()
{
console.log("Pass");
}
Issue:
Using a asynchronous function's(google.script.run) return value, which will always be undefined.
Solution:
Use successHandler as mentioned in another answer or We can use promises with async/await.
Snippet:
/*Create a promise around old callback api*/
const p = func =>
new Promise(resolve=>
google.script.run.withSuccessHandler(resolve)[func]()
);
async function checkEmail(email) //modified
{
var arraySize = await p('sizeGetter');//Wait to resolve
console.log(arraySize);
//var emailArray = new Array(arraySize);
var emailArray = await p('mailGetter');//Wait to resolve
console.log(emailArray);
//....
}
Note:
It's better to reduce the number of calls to the server. If you can combine both Getters to a single server function, it'll be better.
The above is a snippet showing how to use async/await. But if you wait for each response from the server as shown above, your front end/UI will be slow. Wait only if absolutely necessary. Calls to server should be non-blocking/asynchronous.
References:
Promises
async
await
Issue is in these lines:
emailArray = google.script.run.withSuccessHandler(misc).mailGetter();
console.log(emailArray);
You're trying to execute mailGetter() and expecting it to return value which you're storing in emailArray but this method is asynchronous and does not return directly
Rather you'll get the value in callback which you have defined as SuccessHandler
Suggested solutions :
Calling Apps Script functions from a template : https://developers.google.com/apps-script/guides/html/templates#apps_script_code_in_scriptlets
Calling Apps Script APIs directly : https://developers.google.com/apps-script/guides/html/templates#calling_apps_script_apis_directly
Pushing variables to templates : https://developers.google.com/apps-script/guides/html/templates#pushing_variables_to_templates
Reference : https://developers.google.com/apps-script/guides/html/reference/run#myFunction(...)
I am using Promise bluebird to process a json array objects from file. The problem arises if I want to store data in a json array (called list) and return this in the final process.
The list is empty/undefined after the return of list or even in the final process. Running the code, I always have 1 value that is not false which trigger the adding/push of the json in the list.
Can you help me with this issue? Below you will find my code.
Thanks in advance !!!
var Promise = require('bluebird');
var join = Promise.join;
var fs = Promise.promisifyAll(require("fs"));
fs.readdirAsync(dir).map(function (filename) {
return fs.readFileAsync(dir + "/" + filename, "utf8");
}).then(function(result){
var list=[];
result.map(function(row, index){
Promise.coroutine(function*() {
update(row, index).then(function(value){
if (value!=false){
var trade_update = new updated_Item(row.ID, row.Quantity, row.Price, row.Remark);
list.push(trade_update);
console.log(JSON.stringify(list)); <-- This works. It gives me data
}
return list;
})
})();
});
console.log('list: ' + JSON.stringify(list)); <-- output: list:[]
return list;
}).finally(function(result){
console.log('Final outcome: '+ ' ' + JSON.stringify(result)); <-- output: Final outcome: undefined
})
With the help of Samuel my code is now:
var Promise = require('bluebird');
var join = Promise.join;
var fs = Promise.promisifyAll(require("fs"));
function updateOrder(done){
fs.readdirAsync(dir).map(function (filename) {
return fs.readFileAsync(dir + "/" + filename, "utf8");
}).then(function(result){
var list=[];
result.map(function(row, index){
Promise.coroutine(function*() {
update(row, index).then(function(value){
if (value!=false){
var trade_update = new updated_Item(row.ID, row.Quantity, row.Price, row.Remark);
list.push(trade_update);
done(list);
}
})
})();
});
//done(list); <--if I put the done callback here, it will give me an empty list. I though once the result.map finished processing all the values give me the end result.
}
}
updateOrder(function(resultList){
console.log('List' + JSON.stringify(resultList));
})
This code give me whole resultList everytime the list has been updated (pushed) now.
I would to receive the resultList at the end once the function updateOrder is finished.
As noted in the comment. Promise.coroutine is asynchronous so this means that a result is not going to get return straight after your code reaches it. And this pretty much explains the phenomenon you are seeing where the latter print statements you got in the code is suggesting that list is undefined.
What you could do is wrap the entire code you got there in a function, then add a callback function as a parameter for the async functions to invoke when it has finished its duty, together returning the populated list back for later processing.
I have written a pseudo code for your case, unfortunately I couldn't test it on my IDE but the concept is there and it should work.
Consider my pseudo code:
var Promise = require('bluebird');
var join = Promise.join;
var fs = Promise.promisifyAll(require("fs"));
// Wrap everything you got into a function with a `done` parameter (callback fn)
function doStuff(done) {
fs.readdirAsync(dir).map(function (filename) {
return fs.readFileAsync(dir + "/" + filename, "utf8");
}).then(function(result){
var list=[];
result.map(function(row, index){
Promise.coroutine(function*() {
update(row, index).then(function(value){
if (value!=false){
var trade_update = new updated_Item(row.ID, row.Quantity, row.Price, row.Remark);
list.push(trade_update);
}
done(list);
})
})();
});
}).finally(function(result){
console.log('File read finish, but this doesnt mean I have finished doing everything!');
})
}
// call your function and provide a callback function for the async method to call
doStuff(function(resultList) {
console.log('list: ' + JSON.stringify(resultList));
// Continue processing the list data.
});
I am currently exploring possible methods to handle application-wide exceptions in AngularJS.
One of the things we really wanted to avoid was wrapping multiple parts of the application in nested try/catch blocks, but handle things cleanly - i.e throw an exception in response to a promise.
Has anyone covered this issue before and have any recommendations?
Any suggestions on how to pick up exceptions in services as well as controllers/directives. (See below - broadcast works ok, but only if you can attach a listener to a scope).
Progress so far
A few short design goals:
Allow exceptions from one part of the application to be handled elsewhere - or possibly multiple places (i.e. 'display error notification to user', 'disable widget').
Provide central management of common error conditions - i.e. log to server, display notification to user, redirect to login.
Allow exceptions to be thrown from controllers, directives, services etc.
Eventually allow localized messages.
The current leaning of my team is to write a service to handle exceptions, which would expose a range of simple calls:
exceptionService.warn('exception_token');
exceptionService.crit('another_exception_token');
This service would then format an 'exception' object and broadcast this from the rootscope. This would allow a default handler to watch for any broadcasts and apply default actions, as well as allow custom listeners to be set in others scopes, which could handle more specific conditions - i.e. disable a part of the UI.
var exception = {
token: 'exception_token',
severity': 'crit'
};
// broadcast exception
$rootScope.$broadcast(
'application_exception',
exception
);
I was thinking about the same recently, and it occurred to me that when it comes to a good error handling in javascript, it is irrelevant which framework you are using, Angular on something else. I wrote one such error handler recently for an AngularJS project, but I did it in a way it can be used in any framework.
Here's the complete code. You can either use it directly, or modify to your needs...
/*
Factory errorFact is to simplify error handling and reporting in other objects.
It supports detailed error output as a text string and into the browser's console.
Usage example:
A function that supports return of an error object would have the following declaration
as its very first line:
var e = errorFact.create("objectName.funcName", arguments);
- in this declaration we specify the full object + method name as the first string parameter,
- and as the second parameter we pass javascript's reserved variable called arguments, which
provides reference to all of the function's parameters for logging.
When an error occurs, the function would return:
return e.error("Error description text");
- this line will create and return a complete error context.
When a function that supports return of an error object makes a call into another
function that also supports the error context, then it can return the nested error
result by passing the embedded error to the current error object instead of the error
text.
Example:
var e = errorFact.create("objectName.funcName", arguments);
var data = callAnotherFunc(...); // calling a function that support an error object;
if(data.isError){ // If an error was triggered;
return e.error(data); // return that error from the current context;
}
The top-level code that calls an error-returning function would do verification
and if an error occurred, log all its details into console (typically).
Example:
var data = getData(...);
if(data.isError){
data.log(); // Output all the error details into the browser's console;
}
*/
"use strict";
app.factory("errorFact", function(){
return {
// creates a new error context;
create: function(method, args){
var result = {
// initiates and returns the error context;
error: function(msg){
this.info.isError = true;
if(msg.isError){
this.info.details.caller = msg;
}else{
this.info.details.msg = msg;
}
return this.info;
},
info:
{
isError: false,
details: {},
log: function(){
if(this.isError){
console.error(this.format());
}
},
// formats complete error details into a text string;
format: function(){
if(this.details.caller){
var txt = this.details.caller.format();
txt += "\nCALLER: " + this.details.method + "(" + this.formatArguments() + ")";
return txt;
}
if(this.details.method){
return "Error calling " + this.details.method + "(" + this.formatArguments() + "): " + this.details.msg;
}else{
return this.details.msg;
}
return "";
},
// formats function argument details into a text string;
formatArguments: function(){
if(!this.details.args){
return "";
}
var params = "";
for(var i = 0;i < this.details.args.length;i ++){
if(params.length > 0){
params += ",";
}
var p = this.details.args[i];
if(p === undefined){
params += "undefined";
}else{
if(p === null){
params += "null";
}else{
if(typeof(p) == "object"){
params += "Object";
}else{
params += p;
}
}
}
}
return params;
}
}
};
if(method){
result.info.details.method = method;
}
if(args){
result.info.details.args = args;
}
return result;
}
}
});
Below is a factory that shows how it is used:
"use strict";
app.factory('moduleFact', ['errorFact', function(errorFact){
return {
// Locates existing module and expands its key Id references
// into corresponding object references:
// - If 'hintGroupId' is present, property 'hints' is added from
// the corresponding hint group.
// - If 'repModules' is present, properties 'question' and 'refs'
// are added.
// On success, return the expanded module object.
// On failure, returns an error object.
//
// NOTE: Currently supports only the first value in repModules.
expandModule: function(moduleData, moduleId){
var e = errorFact.create("moduleFact.expandModule", arguments);
if(!moduleData || !moduleData.modules || !moduleId){
return e.error("Invalid parameters passed");
}
var mod = this.findModule(moduleData, moduleId);
if(mod.isError){
return e.error(mod);
}
var src = mod;
if(mod.repModules){
var repId = mod.repModules[0];
if(!repId){
return e.error("Invalid repModules encountered");
}
///////////////////////////////////////
// temporary check to throw a warning:
if(mod.repModules.length > 1){
console.warn("Multiple values in property repModules: " + JSON.stringify(mod.repModules) +
", which is not supported yet (only the first value is used)");
}
///////////////////////////////////////
src = this.findModule(moduleData, repId);
if(src.isError){
return e.error(src);
}
}
if(src.question){
mod.question = src.question;
}else{
return e.error("Question not specified");
}
if(src.refs){
mod.refs = src.refs;
}
if(src.hintGroupId){
var hg = this.findHintGroup(moduleData, src.hintGroupId);
if(hg.isError){
return e.error(hg);
}
mod.hints = hg.hints;
}
return mod; // needed extra: expand attribute repModules
},
// Expands all the modules and returns the data;
expandAllModules: function(moduleData){
var e = errorFact.create("moduleFact.expandAllModules", arguments);
if(!moduleData || !moduleData.modules){
return e.error("Invalid parameters passed");
}
for(var i = 0;i < moduleData.modules.length;i ++){
var result = this.expandModule(moduleData, moduleData.modules[i].id);
if(result.isError){
return e.error(result);
}
}
return moduleData;
},
// Locates and returns module by its Id;
findModule: function(moduleData, moduleId){
var e = errorFact.create("moduleFact.findModule", arguments);
if(!moduleData || !moduleData.modules || !moduleId){
return e.error("Invalid parameters passed");
}
for(var i = 0;i < moduleData.modules.length;i ++){
if(moduleData.modules[i].id == moduleId){
return moduleData.modules[i];
}
}
return e.error("Module with Id = " + moduleId + " not found");
},
// Locates and returns Hint Group by its Id;
findHintGroup: function(moduleData, hintGroupId){
var e = errorFact.create("moduleFact.findHintGroup", arguments);
if(!moduleData || !moduleData.hintGroups || !hintGroupId){
return e.error("Invalid parameters passed");
}
for(var i = 0;i < moduleData.hintGroups.length;i ++){
if(moduleData.hintGroups[i].id == hintGroupId){
return moduleData.hintGroups[i];
}
}
return e.error("Hint Group with Id = " + hintGroupId + " not found");
}
}
}]);
So, when you have such factory in place, your high-level code, such as in a controller would just log any issues as shown in the example below:
"use strict";
app.controller('standardsCtrl', ['$scope', 'moduleFact', function($scope, moduleFact){
var data = ...//getting data;
var mod = moduleFact.expandAllModules(data);
if(mod.isError){
mod.log(); // log all error details into the console;
}else{
// use the data
}
});
}]);
You can override the $exceptionHandler in order to pass the exceptions to your own central service for exceptions, but the $exceptionHandler seems to only receive the exceptions thrown from your controllers, directives, etc... but not for the exceptions originated from ajax calls. For those exceptions you can implement an interceptor like the one described in this page:
EDITED: Link is dead permanently.
Archive.org link
whats your opinion to create a centralized error handling function for your app
so whenever an error happened with your frontend tear (angular, API calls,...) it executed, so no need to write your error handling every time
so here is my code
(function () {
'use strict';
angular
.module('app')
.factory('$exceptionHandler', ExceptionHandler);
ExceptionHandler.$inject = ['$injector']; //for minification
function ExceptionHandler($injector) {
var $log, sweetAlert, $translate;
return function exceptionHandler(exception, cause) {
// Add DI here to prevent circular dependency
$log = $log || $injector.get('$log');
sweetAlert = sweetAlert || $injector.get('sweetAlert'); //19degrees.ngSweetAlert2
$translate = $translate || $injector.get('$translate');
// $loggerService = $loggerService || $injector.get('$loggerService');
var title, message;
title = $translate.instant('General error title');
message = $translate.instant('General error message', { exceptionMessage: exception.message });
sweetAlert.error(title, message);
$log.error(exception, cause);
// loggerService.logErrorsToBackend(exception, cause);
};
}
})();
I'm not sure if this approach considered to be a best practice but hope it helps you.