Google Sheets count number of tables in HTML - html

I would like to return the number of tables in a HTML page to a google sheet. The code below can get me the number of tables in the chrome console.
var i = 1; [].forEach.call(document.getElementsByTagName("table"),
function(x) { (i++, x); });
console.log (i)
But I dont know how to get this result (i) in Google App Script so I can return it to my sheet. Something on the lines of
function doGet() {
var html = UrlFetchApp.fetch('http://allqs.saqa.org.za/showUnitStandard.php?id=7743').getContentText();
var table = getElementsByClassName(html, 'table')[0];
var i = 1; [].forEach.call(document.getElementsByTagName("table"),
(i++, x);
console.log (i)

You want to retrieve the number of tags of <table> fronm the URL of http://allqs.saqa.org.za/showUnitStandard.php?id=7743 using GAS. If my understanding is correct, how about this modification?
Modification points :
In the native GAS, getElementsByTagName() can't be used.
In this answer, the number of <table> was retrieved using regex.
Modified script :
var url = "http://allqs.saqa.org.za/showUnitStandard.php?id=7743";
var res = UrlFetchApp.fetch(url).getContentText();
var numberOfTables = res.match(/<table/g).length;
Logger.log(numberOfTables) // 96 is retrieved.
If I misunderstand your question, I'm sorry.

Related

Google Apps Script for Yahoo Finance Returns Empty Cell

A previously working solution that was resolved here by #tanaike suddenly returns an empty cell upon execution. I don't get an error message and in the google apps scripts edit page I get "Notice Execution completed".
It looks like it's working in the background but having trouble returning a value to the cell, my guess would be something wrong with the last line that may resolve it?
function pressReleases(code) {
var url = 'https://finance.yahoo.com/quote/' + code + '/press-releases'
var html = UrlFetchApp.fetch(url).getContentText().match(/root.App.main = ([\s\S\w]+?);\n/);
if (!html || html.length == 1) return;
var obj = JSON.parse(html[1].trim());
// --- I modified the below script.
const { _cs, _cr } = obj;
if (!_cs || !_cr) return;
const key = CryptoJS.algo.PBKDF2.create({ keySize: 8 }).compute(_cs, JSON.parse(_cr)).toString();
const obj2 = JSON.parse(CryptoJS.enc.Utf8.stringify(CryptoJS.AES.decrypt(obj.context.dispatcher.stores, key)));
var res = obj2.StreamStore.streams["YFINANCE:" + code + ".mega"].data.stream_items[0].title;
// ---
return res || "No value";
}
The CryptoJS code saved as a script in google apps script is here
When I tested this script, at if (!_cs || !_cr) return;, I confirmed that the values of _cs and _cr are undefined. From this result, I understood that recently, the specification of the key for decrypting the data has been changed at the server side. When I saw this thread, I confirmed the same situation. In the thread, I noticed that, in the current stage, the key can be simply retrieved from the HTML data. So, as with the current script, how about the following modification?
Usage:
1. Get crypto-js.
Please access https://cdnjs.cloudflare.com/ajax/libs/crypto-js/4.1.1/crypto-js.min.js. And, copy and paste the script to the script editor of Google Apps Script, and save the script.
2. Modify script.
function pressReleases(code) {
var url = 'https://finance.yahoo.com/quote/' + code + '/press-releases'
var html = UrlFetchApp.fetch(url).getContentText().match(/root.App.main = ([\s\S\w]+?);\n/);
if (!html || html.length == 1) return;
var obj = JSON.parse(html[1].trim());
var key = Object.entries(obj).find(([k]) => !["context", "plugins"].includes(k))[1];
if (!key) return;
const obj2 = JSON.parse(CryptoJS.enc.Utf8.stringify(CryptoJS.AES.decrypt(obj.context.dispatcher.stores, key)));
var res = obj2.StreamStore.streams["YFINANCE:" + code + ".mega"].data.stream_items[0].title;
// console.log(res); // Check the value in the log.
return res || "No value";
}
When this script is run with code = "PGEN", the value of Precigen Provides Pipeline and Corporate Updates at the 41st Annual J.P. Morgan Healthcare Conference is obtained.
Note:
If you want to load crypto-js directly, you can also use the following script. But, in this case, the process cost becomes higher than that of the above flow. Please be careful about this.
const cdnjs = "https://cdnjs.cloudflare.com/ajax/libs/crypto-js/4.1.1/crypto-js.min.js";
eval(UrlFetchApp.fetch(cdnjs).getContentText());
I can confirm that this method can be used for the current situation (January 14, 2023). But, when the specification in the data and HTML is changed in the future update on the server side, this script might not be able to be used. Please be careful about this.
Reference:
crypto-js

Apps Script custom function working in script editor but not in Google Sheet custom function

I have built a simple custom function in Apps Script using URLFetchApp to get the follower count for TikTok accounts.
function tiktok_fans() {
var raw_data = new RegExp(/("followerCount":)([0-9]+)/g);
var handle = '#charlidamelio';
var web_content = UrlFetchApp.fetch('https://www.tiktok.com/'+ handle + '?lang=en').getContentText();
var match_text = raw_data.exec(web_content);
var result = (match_text[2]);
Logger.log(result)
return result
}
The Log comes back with the correct number for followers.
However, when I change the code to;
function tiktok_fans(handle) {
var raw_data = new RegExp(/("followerCount":)([0-9]+)/g);
//var handle = '#charlidamelio';
var web_content = UrlFetchApp.fetch('https://www.tiktok.com/'+ handle + '?lang=en').getContentText();
var match_text = raw_data.exec(web_content);
var result = (match_text[2]);
Logger.log(result)
return result
}
and use it in a spreadsheet for example =tiktok_fans(A1), where A1 has #charlidamelio I get an #ERROR response in the cell
TypeError: Cannot read property '2' of null (line 6).
Why does it work in the logs but not in the spreadsheet?
--additional info--
Still getting the same error after testing #Tanaike answer below, "TypeError: Cannot read property '2' of null (line 6)."
Have mapped out manually to see the error, each time the below runs, a different log returns "null". I believe this is to do with the ContentText size/in the cache. I have tried utilising Utilities.sleep() in between functions with no luck, I still get null's.
code
var raw_data = new RegExp(/("followerCount":)([0-9]+)/g);
//tiktok urls
var qld = UrlFetchApp.fetch('https://www.tiktok.com/#thisisqueensland?lang=en').getContentText();
var nsw = UrlFetchApp.fetch('https://www.tiktok.com/#visitnsw?lang=en').getContentText();
var syd = UrlFetchApp.fetch('https://www.tiktok.com/#sydney?lang=en').getContentText();
var tas = UrlFetchApp.fetch('https://www.tiktok.com/#tasmania?lang=en').getContentText();
var nt = UrlFetchApp.fetch('https://www.tiktok.com/#ntaustralia?lang=en').getContentText();
var nz = UrlFetchApp.fetch('https://www.tiktok.com/#purenz?lang=en').getContentText();
var aus = UrlFetchApp.fetch('https://www.tiktok.com/#australia?lang=en').getContentText();
var vic = UrlFetchApp.fetch('https://www.tiktok.com/#visitmelbourne?lang=en').getContentText();
//find folowers with regex
var match_qld = raw_data.exec(qld);
var match_nsw = raw_data.exec(nsw);
var match_syd = raw_data.exec(syd);
var match_tas = raw_data.exec(tas);
var match_nt = raw_data.exec(nt);
var match_nz = raw_data.exec(nz);
var match_aus = raw_data.exec(aus);
var match_vic = raw_data.exec(vic);
Logger.log(match_qld);
Logger.log(match_nsw);
Logger.log(match_syd);
Logger.log(match_tas);
Logger.log(match_nt);
Logger.log(match_nz);
Logger.log(match_aus);
Logger.log(match_vic);
Issue:
From your situation, I remembered that the request of UrlFetchApp with the custom function is different from the request of UrlFetchApp with the script editor. So I thought that the reason for your issue might be related to this thread. https://stackoverflow.com/a/63024816 In your situation, your situation seems to be the opposite of this thread. But, it is considered that this issue is due to the specification of the site.
In order to check this difference, I checked the file size of the retrieved HTML data.
The file size of HTML data retrieved by UrlFetchApp executing with the script editor is 518k bytes.
The file size of HTML data retrieved by UrlFetchApp executing with the custom function is 9k bytes.
It seems that the request of UrlFetchApp executing with the custom function is the same as that of UrlFetchApp executing withWeb Apps. The data of 9k bytes are retrieved by using this.
From the above result, it is found that the retrieved HTML is different between the script editor and the custom function. Namely, the HTML data retrieved by the custom function doesn't include the regex of ("followerCount":)([0-9]+). By this, such an error occurs. I thought that this might be the reason for your issue.
Workaround:
When I tested your situation with Web Apps and triggers, the same issue occurs. By this, in the current stage, I thought that the method for automatically executing the script might not be able to be used. So, as a workaround, how about using a button and the custom menu? When the script is run by the button and the custom menu, the script works. It seems that this method is the same as that of the script editor.
The sample script is as follows.
Sample script:
Before you run the script, please set range. For example, please assign this function to a button on Spreadsheet. When you click the button, the script is run. In this sample, it supposes that the values like #charlidamelio are put to the column "A".
function sample() {
var range = "A2:A10"; // Please set the range of "handle".
var raw_data = new RegExp(/("followerCount":)([0-9]+)/g);
var sheet = SpreadsheetApp.getActiveSheet();
var r = sheet.getRange(range);
var values = r.getValues();
var res = values.map(([handle]) => {
if (handle != "") {
var web_content = UrlFetchApp.fetch('https://www.tiktok.com/'+ handle + '?lang=en').getContentText();
var match_text = raw_data.exec(web_content);
return [match_text[2]];
}
return [""];
});
r.offset(0, 1).setValues(res);
}
When this script is run, the values are retrieved from the URL and put to the column "B".
Note:
This is a simple script. So please modify it for your actual situation.
Reference:
Related thread.
UrlFetchApp request fails in Menu Functions but not in Custom Functions (connecting to external REST API)
Added:
About the following additional question,
whilst this works for 1 TikTok handle, when trying to run a list of multiple it fails each time, with the error TypeError: Cannot read property '2' of null. After doing some investigating and manually mapping out 8 handles, I can see that each time it runs, it returns "null" for one or more of the web_content variables. Is there a way to slow the script down/run each UrlFetchApp one at a time to ensure each returns content?
i've tried this and still getting an error. Have tried up to 10000ms. I've added some more detail to the original question, hope this makes sense as to the error. It is always in a different log that I get nulls, hence why I think it's a timing or cache issue.
In this case, how about the following sample script?
Sample script:
In this sample script, when the value cannot be retrieved from the URL, the value is tried to retrieve again as the retry. This sample script uses the 2 times as the retry. So when the value cannot be retrieved by 2 retries, the empty value is returned.
function sample() {
var range = "A2:A10"; // Please set the range of "handle".
var raw_data = new RegExp(/("followerCount":)([0-9]+)/g);
var sheet = SpreadsheetApp.getActiveSheet();
var r = sheet.getRange(range);
var values = r.getValues();
var res = values.map(([handle]) => {
if (handle != "") {
var web_content = UrlFetchApp.fetch('https://www.tiktok.com/'+ handle + '?lang=en').getContentText();
var match_text = raw_data.exec(web_content);
if (!match_text || match_text.length != 3) {
var retry = 2; // Number of retry.
for (var i = 0; i < retry; i++) {
Utilities.sleep(3000);
web_content = UrlFetchApp.fetch('https://www.tiktok.com/'+ handle + '?lang=en').getContentText();
match_text = raw_data.exec(web_content);
if (match_text || match_text.length == 3) break;
}
}
return [match_text && match_text.length == 3 ? match_text[2] : ""];
}
return [""];
});
r.offset(0, 1).setValues(res);
}
Please adjust the value of retry and Utilities.sleep(3000).
This works for me as a Custom Function:
function MYFUNK(n=2) {
const url = 'my website url'
const re = new RegExp(`<p id="un${n}.*\/p>`,'g')
const r = UrlFetchApp.fetch(url).getContentText();
const v = r.match(re);
Logger.log(v);
return v;
}
I used my own website and I have several paragraphs with ids from un1 to un7 and I'm taking the value of A1 for the only parameter. It returns the correct string each time I change it.

Google Sheets Scraping Options Chain from Yahoo Finance, Incomplete Results [duplicate]

This question already has answers here:
Scraping data to Google Sheets from a website that uses JavaScript
(2 answers)
Closed last month.
I'm attempting to scrape options pricing data from Yahoo Finance in Google Sheets. Although I'm able to pull the options chain just fine, i.e.
=IMPORTHTML("https://finance.yahoo.com/quote/TCOM/options?date=1610668800","table",2)
I find that it's returning results that don't completely match what's actually shown on Yahoo Finance. Specifically, the scraped results are incomplete - they're missing some strikes. i.e. the first 5 rows of the chart may match, but then it will start returning only every other strike (aka skipping every other strike).
Why would IMPORTHTML be returning "abbreviated" results, which don't match what's actually shown on the page? And more importantly, is there some way to scrape complete data (i.e. that doesn't skip some portion of the available strikes)?
In Yahoo finance, all data are available in a big json called root.App.main. So to get the complete set of data, proceed as following
var source = UrlFetchApp.fetch(url).getContentText()
var jsonString = source.match(/(?<=root.App.main = ).*(?=}}}})/g) + '}}}}'
var data = JSON.parse(jsonString)
You can then choose to fetch the informations you need. Take a copy of this example https://docs.google.com/spreadsheets/d/1sTA71PhpxI_QdGKXVAtb0Rc3cmvPLgzvXKXXTmiec7k/copy
edit
if you want to get a full list of available data, you can retrieve it by this simple script
// mike.steelson
let result = [];
function getAllDataJSON(url = 'https://finance.yahoo.com/quote/TCOM/options?date=1610668800') {
var source = UrlFetchApp.fetch(url).getContentText()
var jsonString = source.match(/(?<=root.App.main = ).*(?=}}}})/g) + '}}}}'
var data = JSON.parse(jsonString)
getAllData(eval(data),'data')
var sh = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet()
sh.getRange(1, 1, result.length, result[0].length).setValues(result);
}
function getAllData(obj,id) {
const regex = new RegExp('[^0-9]+');
for (let p in obj) {
var newid = (regex.test(p)) ? id + '["' + p + '"]' : id + '[' + p + ']';
if (obj[p]!=null){
if (typeof obj[p] != 'object' && typeof obj[p] != 'function'){
result.push([newid, obj[p]]);
}
if (typeof obj[p] == 'object') {
getAllData(obj[p], newid );
}
}
}
}
Here's a simpler way to get the last market price of a given option. Add this function to you Google Sheets Script Editor.
function OPTION(ticker) {
var ticker = ticker+"";
var URL = "finance.yahoo.com/quote/"+ticker;
var html = UrlFetchApp.fetch(URL).getContentText();
var count = (html.match(/regularMarketPrice/g) || []).length;
var query = "regularMarketPrice";
var loc = 0;
var n = parseInt(count)-2;
for(i = 0; i<n; i++) {
loc = html.indexOf(query,loc+1);
}
var value = html.substring(loc+query.length+9, html.indexOf(",", loc+query.length+9));
return value*100;
}
In your google sheets input the Yahoo Finance option ticker like below
=OPTION("AAPL210430C00060000")
I believe your goal as follows.
You want to retrieve the complete table from the URL of https://finance.yahoo.com/quote/TCOM/options?date=1610668800, and want to put it to the Spreadsheet.
Issue and workaround:
I could replicate your issue. When I saw the HTML data, unfortunately, I couldn't find the difference of HTML between the showing rows and the not showing rows. And also, I could confirm that the complete table is included in the HTML data. By the way, when I tested it using =IMPORTXML(A1,"//section[2]//tr"), the same result of IMPORTHTML occurs. So I thought that in this case, IMPORTHTML and IMPORTXML might not be able to retrieve the complete table.
So, in this answer, as a workaround, I would like to propose to put the complete table parsed using Sheets API. In this case, Google Apps Script is used. By this, I could confirm that the complete table can be retrieved by parsing the HTML table with Sheet API.
Sample script:
Please copy and paste the following script to the script editor of Spreadsheet, and please enable Sheets API at Advanced Google services. And, please run the function of myFunction at the script editor. By this, the retrieved table is put to the sheet of sheetName.
function myFunction() {
// Please set the following variables.
const url ="https://finance.yahoo.com/quote/TCOM/options?date=1610668800";
const sheetName = "Sheet1"; // Please set the destination sheet name.
const sessionNumber = 2; // Please set the number of session. In this case, the table of 2nd session is retrieved.
const html = UrlFetchApp.fetch(url).getContentText();
const section = [...html.matchAll(/<section[\s\S\w]+?<\/section>/g)];
if (section.length >= sessionNumber) {
if (section[sessionNumber].length == 1) {
const table = section[sessionNumber][0].match(/<table[\s\S\w]+?<\/table>/);
if (table) {
const ss = SpreadsheetApp.getActiveSpreadsheet();
const body = {requests: [{pasteData: {html: true, data: table[0], coordinate: {sheetId: ss.getSheetByName(sheetName).getSheetId()}}}]};
Sheets.Spreadsheets.batchUpdate(body, ss.getId());
}
} else {
throw new Error("No table.");
}
} else {
throw new Error("No table.");
}
}
const sessionNumber = 2; means that 2 of =IMPORTHTML("https://finance.yahoo.com/quote/TCOM/options?date=1610668800","table",2).
References:
Method: spreadsheets.batchUpdate
PasteDataRequest

Identify specific charts on google sheets using google script

I have a google Slides presentation containing a chart tied to a google Spreadsheets Sheet. I created a plug-in (google script), using which I am able to paste the sheets url and have all available charts by id.
I am unable to overcome the issue of identifying the chart I need solely using its id which is generated.
Right now I can do this:
var spreadSheetContainingChartAndData = Sheets.Spreadsheets.get(sheetsId)
var allSheets = spreadSheetContainingChartAndData.sheets
var allChartsOnTheOnlyExistingSheet = dataSheet.sheets[0].charts
var firstChart = allChartsOnTheOnlyExistingSheet[0]
var chartId = firstChart.chartId
Ideally I would want to find the chart by some form of user defined identifier like this:
var chartId = findByAnythingElse(allChartsOnTheOnlyExistingSheet, 'user-defined-tag')
The usecase is to have a spreadsheet template containing client data and a number of charts visualising said data.
Whenever someone wants to make a presentation, he can click on the plugin menu and have certain slides to be populated with specific charts which will be cropped and resized to a predefined format.
You want to retrieve the chartId from all charts in a Spreadsheet.
You want to search the chartId using the specific tag.
You want to achieve this using Google Apps Script.
If my understanding is correct, how about this sample script? Please think of this as just one of several answers.
In this answer, the chart's title, subtitle and alt title are used as the search tags.
Sample script:
Before you use this script, please enable Sheets API at Advanced Google Services. And when you test this script, please set the variables for searching the chart and run the function of run().
function findByAnythingElse(spreadsheetId, searchObj) {
var obj = Sheets.Spreadsheets.get(spreadsheetId, {fields: "sheets(charts(chartId,spec(altText,subtitle,title)))"});
var chartIds = [];
for (var i = 0; i < obj.sheets.length; i++) {
var charts = obj.sheets[i].charts;
if (charts) {
for (var j = 0; j < charts.length; j++) {
var title = charts[j].spec.title;
var subTitle = charts[j].spec.subtitle;
var altText = charts[j].spec.altText;
if (title == searchObj.searchTitle || subTitle == searchObj.searchSubTitle || altText == searchObj.searchAltText) {
chartIds.push(charts[j].chartId);
}
}
}
}
return chartIds;
}
// Please run this fnuction for testing.
function run() {
var searchObj = { // Please set search values.
searchTitle: "sample1",
searchSubTitle: "",
searchAltText: "",
}
var spreadsheetId = "###"; // Please set Spreadsheet ID here.
var res = findByAnythingElse(spreadsheetId, searchObj);
Logger.log(res)
}
In this sample script, when run() is run, one of searchTitle, searchSubTitle and searchAltText of searchObj is matched, the chart ID is retrieved.
For example, the chart ID can be also retrieved from only searchTitle.
When there are several charts with the same title, several chart IDs are returned.
Note:
This is a simple sample script. So please modify it for your situation.
References:
Advanced Google services
Charts
If I misunderstood your question and this was not the direction you want, I apologize.

Is GAS/google sheet an alternative option to PHP/mySQL?

I keep finding forum results that refer to the google visualiser for displaying my query results. But it just seems to dump the data in a pre-made table. I haven't found a way to write my own custom table dynamically.
In the past, when I have hacked together PHP to make use of mySQL DB, I would simply connect to my DB, run a query into an array, then cycle through the array and write the table in HTML. I could do IF statements in the middle for formatting or extra tweaks to the displayed data, etc. But I am struggling to find documentation on how to do something similar in a google script. What is the equivalent work flow? Can someone point me to a tutorial that will start me down this path?
I just want a simple HTML page with text box and submit button that runs a query on my Google sheet (back in the .gs file) and displays the results in a table back on the HTML page.
Maybe my understanding that GAS/google sheets is an alternative to PHP/mySQL is where I'm going wrong? Am I trying to make a smoothie with a toaster instead of a blender?
Any help appreciated
Welcome David. Ruben is right, it's cool to post up some code you have tried. But I spent many months getting my head around Apps-script and love to share what I know. There are several ways to get the data out of a Google sheet. There is a very well document Spreadsheet Service For GAS. There is also a client API..
There is the approach you mention. I suggest converting the incoming data to JSON so you can do with it as you like.
Unauthenticated frontend queries are also possible. Which needs the spreadsheet to be published and set to anyone with the link can view, and uses the Google visualisation API
var sql = 'SELECT A,B,C,D,E,F,G where A = true order by A DESC LIMIT 10 offset '
var queryString = encodeURIComponent(sql);
var query = new google.visualization.Query('https://docs.google.com/spreadsheets/d/'+ spreadsheetId +'/gviz/tq?tq=' + queryString);
query.send(handleSampleDataQueryResponse);
function handleSampleDataQueryResponseTotal(responsetotal) {
var myData = responsetotal.getDataTable();
var myObject = JSON.parse(myData.toJSON());
console.log(myObject)
}
Other Approaches
In your GAS back end this can get all of your data in columns A to C as an array of arrays ([[row1],[row2],[row3]]).
function getData(query){
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName('Sheet1');
var range = sheet.getRange('A1:C');
var data = range.getValues();
// return data after some query
}
In your GAS front end this can call to your backend.
var data = google.script.run.withSuccessHandler(success).getData(query);
var success = (e) => {
console.log(e)
}
In your GAS backend this can add data to your sheet. The client side API will also add data, but is more complex and needs authentication.
function getData(data){
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName('Sheet1');
var range = sheet.getRange('A1:C');
var data = range.setValues([[row1],[row2],[row3]]);
return 'success!'
}
In your GAS frontend this can send data to your backend.
var updateData = [[row1],[row2],[row3]]
var data = google.script.run.withSuccessHandler(success).getData(updateData);
var success = (e) => {
console.log(e)
}
Finally
Or you can get everything from the sheet as JSON and do the query in the client. This works okay if you manipulate the data in the sheet as you will need it. This also needs the spreadsheet to be published and set to anyone with the link can view.
var firstSheet = function(){
var spreadsheetID = "SOME_ID";
var url = "https://spreadsheets.google.com/feeds/list/" + spreadsheetID +"/1/public/values?alt=json";
return new Promise((resolve,reject)=>{
$.getJSON(url, (data)=>{
let result = data.feed.entry
resolve(result)
});
})
}
firstSheet().then(function(data){
console.log(data)
})