accessing table cells from puppeteer

accessing table cells from puppeteer - puppeteer

I need to be able to validate the values in specific table cells, and later, to click on a particular cell that holds a link. I get that Node and the browser (or emulator) are two different process spaces, so I can't pass references. I was hoping that puppeteer would hide this fact in a read-only manner such as return someArray; in a function run in the browser context being "magically" replicated by puppeteer on the Node side, but alas.
test("get a certain row from a certain table", async function getRow() {
await page.waitForSelector("#actionItemsView-table");
const
cellText = await page.evaluate(function getCells() {
const
row = Array.from(document.querySelectorAll("#actionItemsView-table tbody tr"));
for (let i = row.length - 1; i >= 0; --i) {
// the row we want is the one that has text of interest in cells[2]
if (row[i].cells[2] === "some text that identifies the row of interest") {
return row[i]; // we can't pass this back to Node, so this is wrong
// but some version of this what we need to do
}
}
return null; // no such row
});
console.log(cellText); // cellText is an empty array
}, testTimeout);
Lacking that, I have run through various intermediate experiments, all the way to this, seemingly simplest case, just to get something that works and then work my way back up to what I need, but this doesn't work either:
test("get the text from a single cell", async function getInnerText() {
await page.waitForSelector("#actionItemsView-table");
const
cellText = await page.evaluate(function getText() {
let
ct,
row = Array.from(document.querySelectorAll("#actionItemsView-table tbody tr"));
for (let i = row.length - 1; i >= 0; --i) {
if (row[i].cells[2] === "some text that identifies the row of interest") {
ct = row[i].cells[3].innerText; // the text of the next cell to the right
break;
}
}
return ct; // ct is not a string!
});
console.log(cellText); // cellText is undefined!
}, testTimeout);
If I do things like
document.querySelect("#actionItemsView-table").rows[2].cells[3].innerText
they work, so my selectors and javascript syntax seems to be correct.
There has to be a way to do this and it has to be way easier than I have made it -- what am I missing? Why is the above not working but something like this does work:
await page.$eval("input[name=emailAddr]", function setId(el, id) { el.value = id; return id; }, id);

Here's an easier way to find that cell:
await page.evaluate(() => {
let td = [...document.querySelectorAll("td")].find(td => td.innerText === "something")
return td?.nextElementSibling?.innerText
})
This will return the text or undefined

Related

Google Docs API - complete documentation (hyperlink issue)

I hope everyone is in good health. This post is my continue of my previous post
My main goal
So main goal was to get the hyperlink and change it the text linked with it. I initially used code from this post and modified it to change the text of first hyperlink. Here is my modified code to change the text of first hyperlink.
function onOpen() {
const ui = DocumentApp.getUi();
ui.createMenu('What to do?')
.addItem('HyperLink Modifier', 'findAndReplacetext')
.addToUi();
}
/**
* Get an array of all LinkUrls in the document. The function is
* recursive, and if no element is provided, it will default to
* the active document's Body element.
*
* #param element The document element to operate on.
* .
* #returns {Array} Array of objects, vis
* {element,
* startOffset,
* endOffsetInclusive,
* url}
*/
function getAllLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
links.push(curUrl); // add to links
curUrl = {};
}
}
}
if (inUrl) {
// in case the link ends on the same char that the element does
links.push(curUrl);
}
}
else {
var numChildren = element.getNumChildren();
for (var i=0; i<numChildren; i++) {
links = links.concat(getAllLinks(element.getChild(i)));
}
}
return links;
}
/**
* Replace all or part of UrlLinks in the document.
*
* #param {String} searchPattern the regex pattern to search for
* #param {String} replacement the text to use as replacement
*
* #returns {Number} number of Urls changed
*/
function findAndReplacetext() {
var links = getAllLinks();
while(links.length > 0){
var link = links[0];
var paragraph = link.element.getText();
var linkText = paragraph.substring(link.startOffset, link.endOffsetInclusive+1);
var newlinkText = `(${linkText})[${link.url}]`
link.element.deleteText(link.startOffset, link.endOffsetInclusive);
link.element.insertText(link.startOffset, newlinkText);
links = getAllLinks();
}
}
String.prototype.betterReplace = function(search, replace, position) {
if (this.length > position) {
return this.slice(0, position) + this.slice(position).replace(search, replace);
}
return this;
}
Note: I used insertText and deleteText functions to update the text value of hyperlink.
My problem with above code
Now the problem was that this code was running too slow. I thought may be it was because I was running the script every-time I needed to search for next hyperlink, So maybe I can break the loop and only get the first hyperlink each time. Then from my previous post the guy gave me a solution to break loop and only get the first hyperlink but when I tried the new code unfortunately it was still slow. In that post he also proposed me a new method by using Google Docs API, I tried using that it was was super fast. Here is the code using Google Docs API
function myFunction() {
const doc = DocumentApp.getActiveDocument();
const res = Docs.Documents.get(doc.getId()).body.content.reduce((ar, {paragraph}) => {
if (paragraph && paragraph.elements) {
paragraph.elements.forEach(({textRun}) => {
if (textRun && textRun.textStyle && textRun.textStyle.link) {
ar.push({text: textRun.content, url: textRun.textStyle.link.url});
}
});
}
return ar;
}, []);
console.log(res) // You can retrieve 1st link and test by console.log(res[0]).
}
My new problem
I liked the new code but I am stuck again at this point as I am unable to find how can I change the text associated with the hyperlink. I tried using the functions setContent and setUrl but they don't seem to work. Also I am unable to find the documentation for these functions on main documentation of this API. I did find I reference for previously mentioned functions here but they are not available for appscript. Here is the sample document I am working on
https://docs.google.com/document/d/1eRvnR2NCdsO94C5nqly4nRXCttNziGhwgR99jElcJ_I/edit?usp=sharing
End note:
I hope I was able to completly convey my message and all the details assosiated with it. If not kindly don't be mad at me, I am still in learning process and my English skills are pretty weak. Anyway if you want any other data let me know in the comments and Thanks for giving your time I really appreciate that.

In order to remove all the hyperlink from your document, you can do the following:
First, retrieve the start and end indexes of these hyperlinks. This can be done by calling documents.get, iterate through all elements in the body content, checking which ones are paragraphs, iterating through the corresponding TextRun, and checking which TextRuns contain a TextStyle with a link property. All this is already done in the code you provided in your question.
Next, for all TextRuns that include a link, retrieve their startIndex and endIndex.
Using these retrieved indexes, call batchUpdate to make an UpdateTextStyleRequest. You want to remove the link property between each pair of indexes, and for that you would just need to set fields to link (in order to specify which properties you want to update) and don't set a link property in the textStyle property you provide in the request since, as the docs for TextStyle say:
link: If unset, there is no link.
Code sample:
function removeHyperlinks() {
const doc = DocumentApp.getActiveDocument();
const hyperlinkIndexes = Docs.Documents.get(doc.getId()).body.content.reduce((ar, {paragraph}) => {
if (paragraph && paragraph.elements) {
paragraph.elements.forEach(element => {
const textRun = element.textRun;
if (textRun && textRun.textStyle && textRun.textStyle.link) {
ar.push({startIndex: element.startIndex, endIndex: element.endIndex });
}
});
}
return ar;
}, []);
hyperlinkIndexes.forEach(hyperlinkIndex => {
const resourceUpdateStyle = {
requests: [
{
updateTextStyle: {
textStyle: {},
fields: "link",
range: {
startIndex: hyperlinkIndex.startIndex,
endIndex: hyperlinkIndex.endIndex
}
}
}
]
}
Docs.Documents.batchUpdate(resourceUpdateStyle, doc.getId());
});
}

Any time a section is mentioned in the document, I want that mention to become a link to the corresponding bookmark

Goal: I have a very long document with many unique sections that each have bookmarks. Any time a section is mentioned in the document, I want that mention to become a link to the corresponding bookmark. It doesn't have to be event-driven, I intend to do it from a menu.
I have the below code written to get a list of the names of each bookmarked line so I can match it to the words in the doc. I'm trying to figure out what line of code to use to link specific text to that bookmark. I've tried to use the setLinkUrl("beginningofurl" + id[i]) code, but the ID of the bookmarks doesn't tell me if it's a header or regular text, and sometimes it is just regular text. I'm wondering if there's a better way of doing this?
var DOC = DocumentApp.getActiveDocument();
function Setlink() {
var bookmarks = DOC.getBookmarks();
var names = [];
for (var i = 0; i < bookmarks.length; i++){
names.push(bookmarks[i].getPosition().getSurroundingText().getText());
}
Logger.log(names);
}

Headings are a property of Paragraph elements. To check a Bookmark to see if it is in a paragraph of a certain Paragraph Heading, we need to get the Position, then the Element, and then check if the Element is indeed a Paragraph before we can check the Paragraph Heading.
We can put our test for if an Element is a heading in a predicate function named isElementInHeading that will return true or false when given an Element.
function isElementInHeading(element) {
if (element.getType() !== DocumentApp.ElementType.PARAGRAPH) {
return false;
}
const {ParagraphHeading} = DocumentApp;
switch (element.getHeading()) {
case ParagraphHeading.HEADING1:
case ParagraphHeading.HEADING2:
case ParagraphHeading.HEADING3:
case ParagraphHeading.HEADING4:
case ParagraphHeading.HEADING5:
case ParagraphHeading.HEADING6:
return true;
}
return false;
}
This can be used to both filter the bookmarks to include only those that mark headings, and to skip over the same headings when using setLinkUrl.
The strategy in this example is to collect both the bookmark's ID and the desired text in one go using a reducer function, then search through the document for each bit of text, check that we didn't just find the header again, and then apply the link.
I am not quite sure how you are getting the URL, but I found just copying and pasting the URL into the script as const url = "https://docs.google.com/.../edit#bookmark="; worked for me.
// for Array.prototype.reduce
function getHeadingBookmarksInfo(bookmarks, bookmark) {
const element = bookmark.getPosition().getElement();
if (isElementInHeading(element)) {
return [
...bookmarks,
{ id: bookmark.getId(), text: element.getText() }
];
}
return bookmarks;
}
function updateLinks() {
const doc = DocumentApp.getActiveDocument();
const bookmarks = doc.getBookmarks();
const headingBookmarksInfo = bookmarks.reduce(getHeadingBookmarksInfo, []);
const body = doc.getBody();
headingBookmarksInfo.forEach(function(info) {
const {id, text} = info;
let foundRef = body.findText(text);
while (foundRef !== null) {
const element = foundRef.getElement();
if (!isElementInHeading(element.getParent())) {
element.asText()
.setLinkUrl(
foundRef.getStartOffset(),
foundRef.getEndOffsetInclusive(),
url + id // assumes url is hardcoded in global scope
);
}
foundRef = body.findText(text, foundRef);
}
});
}

Error: "Reference does not exist" when using a custom function

I'm trying to scrape a webpage & to put the value in cache in order to not hit the daily urlFetch limit.
This is the code I'm using, it works without the Cache & Properties service but not when I try to add that element.
function scrapercache(url) {
var url = "https://www.gurufocus.com/term/fscore/nyse:ABBV/Piotroski-F-Score";
var result = [];
var description;
var options = {
'muteHttpExceptions': true,
'followRedirects': false,
};
Logger.log('line 16 OK');
var cache = CacheService.getScriptCache();
var properties = PropertiesService.getScriptProperties();
Logger.log('line 21 OK');
let res = cache.get(url);
// if(res){
// return JSON.parse(res)
//}
Logger.log(res);
Logger.log('line 24 OK');
if (res) {
// trim url to prevent (rare) errors
url.toString().trim();
var r = UrlFetchApp.fetch(url, options);
Logger.log(r);
Logger.log('line 34 OK');
var c = r.getResponseCode();
Logger.log(c);
Logger.log('line 38 OK');
// check for meta refresh if 200 ok
if (c == 200) {
var html = r.getContentText();
cache.put(url, "cached", 21600);
properties.setProperty(url, html);
Logger.log('line 46 OK');
var $ = Cheerio.load(html); // make sure this lib is added to your project!
Logger.log('line 49 OK');
// meta description
if ($('meta[name=description]').attr("content")) {
description = $('meta[name=description]').attr("content").trim();
var trim_des = description.substr(0, 40);
Logger.log('line 55 OK');
}
}
result.push([trim_des]);
Logger.log('line 60 OK');
}
return result;
Logger.log('line 64 OK');
}
I call the function like that:
=scrapercache("https://www.gurufocus.com/term/fscore/nyse:ABBV/Piotroski-F-Score")
& I get the following error message
Error: Reference does not exist
EDIT: I added log lines to see if the script was processing correctly & it looks like it's ok only until like 28

You're not putting the results in the cache, you're putting the string "cached" there. Consider:
let cached = cache.get(url)
if(cached){
return JSON.parse(cached)
}
let results = ["blah","blah"] // stuff we got with cheerio
cache.put(url, JSON.stringify(results), 120)
return results

Error: “Reference does not exist”
This error message is usually returned by calling a custom function in a spreadsheet cell that does not return a value. It is explicitly mentioned by the official docs, but the error message is not provided, so the confusion is understandable.
An empty array is not a valid return value (since there are no elements to return). The error is easily reproducible with the following custom function:
/**
* #customfunction
*/
function testReferenceError() {
const list = [];
return list;
}
Which, when called in a cell, resulting in the "desired" error:
Applied to your case
In your situation, when there is a cached data in cache, the if statement clause evaluates to false (truthy value when negated evaluates to false). When it does, nothing gets pushed to the result, and an empty array is returned in finally (see above for the explanation of consequences). Consider this mock:
const cache = {
get() {
return "cached";
}
};
let res = cache.get("mock_url");
//ternary operator here acts the same as "if...else":
console.log( !res ? "will enter if block" : "will enter else block" );
Note on return in finally: If you put a return statement into a finally block, expect it to override the return statements in try or catch. Consider this example close to how your program is structured:
const final = (add = false) => {
const list = [];
try {
add && list.push(1);
return [1,2]; //this return is skipped
}
catch(error) {
list.push(error);
}
finally {
return list;
}
};
console.log( final() );
Also, the question already has an answer here

A Discordjs Chat Filter

I'm making a discord.js bot and I was curious how to add a chat filter to it. As in a user says f*** and it will auto-delete.

You can do this with an array of words,
var profanities = ["test1", "test2", "..."];
then in your bot.on message handler, or how you use to handle messages,
bot.on('message', async message => {
let msg = message.content.toLowerCase();
let Admin = message.guild.roles.find('name', "Admin"); // Change this to meet what you want
let General = message.guild.channels.find(`name`, "general"); // All text channels are lowecase. Change this to meet what you want
for (x = 0; x < profanities.length; x++) {
if(message.member.roles.has(Admin.id) || message.channel.id === General.id) return; // if you changed the name of the variables above, change these too.
if (msg.includes(profanities[x])){
await message.reply("You cannot say that here!")
message.delete()
// Your code here
return;
}
}
});
EDIT:
This is very basic, it won't look for substitute letters such as $, # or numbers/spaces unless you code those directly in, which you can have a list of words then have a console log every word with substitute letters.

const blacklisted = ["word1", "word2", "word3"] // and continue doing this until you have entered all your desired words
client.on("message", message => {
let shouldDelete = false
for(word in blacklisted){
if(message.content.includes(word)) shouldDelete = true
}
if(shouldDelete) message.delete()
});
Please let me know if I messed up anywhere. This is just how I would do it. (not tested)

Here is a ChatFilter that is guaranteed to work!
bot.on("message", async message => {
let blacklisted = ['yourbadword1', 'yourbadword2', 'and some more bad words :3'];
let foundInText = false;
for (var i in blacklisted) {
if(message.content.toLowerCase().includes(blacklisted[i].toLowerCase())) foundInText = true;
}
if(foundInText) {
message.delete();
message.channel.send("Stop using this bad word!")
}
});

bot.on("message", async message => {
//Define the words that should be deleted after a successfull detection
let blacklisted2 = ['Some', 'Random', 'word'];
//Set foundInText to false
let foundInText2 = false;
//Check for the blacklisted words in blacklisted2
for (var i in blacklisted2) {
if (message.content.toLowerCase().includes(blacklisted2[i].toLowerCase()))
//If a blacklisted word has been detected in any sentences, it's going to set foundInText2 to true.
foundInText2 = true;
}
//If something has been detected, delete the message that has a blacklisted word inside of it.
if (foundInText2) {
//Delete the message.
message.delete();
//Then do whatever you want
}
});
Let me know
if everything works.If you have any problems, contact / answer me here.

i have created a simple way e.g
let badwords = ['word1', 'word2']
client.on('message', async (message) => {
if (badwords.includes(message.content.toLowerCase())) {
message.delete();
message.channel.send(`HEY! you cant send that here`);
}
})
this is a filter that i use for my bot. It first turns the message into lowercase then searches for the word in the array. If it finds the word is matching the word in the lowercase message then the message is deleted. BTW words in the array have to be lowercase for this to work.

client.on('message', message => {
message.delete(message.content.replace(/asshole/gi))
.catch(console.error);
});
This is just a example you can make it, with a text array too.

Query a JSON list of dict

[{"time":136803,"price":"1.4545","amount":"0.0885","ID":"112969"},
{"time":136804,"price":"2.5448","amount":"0.0568","ID":"5468489"},
{"time":136805,"price":"1.8948","amount":"0.0478","ID":"898489"}]
I have a large JSON file like the one above. It is a list of dictionaries. I want to choose a time and find the value assoaciated with that time. I will not know where in my list the time is located only the value for the time. Is there a way I can say, for time 136804, make x = to price? Or should I loop through each value? I also want to use this value (x) in a mathematical function.
My fist idea is to use brute force by going through each item and checking it for a matching time value in a loop.
Is this the best way?

Take a look at SpahQL http://danski.github.io/spahql/ which we use to query JSON in order to select values and subsequently change them as required.

I did something similar to this recently. JSON file I had to query had around 6000 lines and around 500 JSON objects. My query function given below loops through the each object to select the matching objects, but it can fetch any result within few milliseconds.
var data = '[{"time":136803,"price":"1.4545","amount":"0.0885","ID":"112969"},'+ '{"time":136804,"price":"2.5448","amount":"0.0568","ID":"5468489"},'+ '{"time":136805,"price":"1.8948","amount":"0.0478","ID":"898489"}]';
var data = JSON.parse(data);
var query = function(data, select, andwhere) {
var return_array = [];
$.each(data, function (i, obj) {
var temp_obj = {};
var where = true;
if (andwhere) {
$.each(andwhere, function(j, wh) {
if (obj[wh.col] !== wh.val) {
where = false;
}
});
}
if (where === false) {
return;
}
$.each(obj, function (j, elem) {
if (select.indexOf(j.trim())!==-1) {
temp_obj[j] = elem;
}
});
return_array.push(temp_obj);
});
return return_array;
};
var result = query(data, ['price','amount'],[{"col":"time","val":136804}]);
console.log(JSON.stringify(result));
http://jsfiddle.net/bejgy3sn/1/

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

accessing table cells from puppeteer - puppeteer

Here's an easier way to find that cell: await page.evaluate(() => { let td = [...document.querySelectorAll("td")].find(td => td.innerText === "something") return td?.nextElementSibling?.innerText }) This will return the text or undefined

Related

Google Docs API - complete documentation (hyperlink issue)

Any time a section is mentioned in the document, I want that mention to become a link to the corresponding bookmark

Error: "Reference does not exist" when using a custom function

A Discordjs Chat Filter

Query a JSON list of dict

Categories

Resources