Why do sometimes informations not are gathering using puppeter? - puppeteer

I'm using puppeter for gathering information and something one of these informations cames []. This is an example of how I'm calling the functions of puppeter
try {
const browser = await puppeteer.launch({
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
],
});
const page = await browser.newPage();
await page.goto(url);
await page.waitForSelector('div#results');
await page.waitForSelector('h6.bestPrice');
const result = await page.evaluate(() => {
let items = document.querySelectorAll('maintable > tbody > tr.productsName > td > div.products');
if (items.length !== 0) {
let data = [];
items.forEach(function (el) {
let object = null;
el.querySelectorAll('h5 a').forEach(function (el) {
object = { product: el.innerHTML, img: [] };
});
el.querySelectorAll('p a').forEach(function (el) {
object.description = el.innerHTML;
});
el.querySelectorAll('h6.bestPriceText').forEach(function (el) {
object.price = el.innerHTML;
});
el.querySelectorAll('div.slide a.fancybox-thumbs img').forEach(function (el) {
object.img.push(el.getAttribute('src'));
});
data.push(object);
});
return data;
}
let error = null;
document.querySelectorAll('div.noResults div.info-message h2').forEach(function (el) {
error = ({ message: el.innerHTML });
});
return error;
});
await browser.close();
return result;
} catch (error) {
return { message: `Problems loading the page! Please, try again.${error}` };
}
};
The information which something comes empty is price, sorry but I can't reveal the url, but I would like to know if exists any problem with this design code.

A Friend suggest me to add a timeout, because it could be a problem with rendering
const resultado = await page.evaluate(async () => {
const timeout = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
await timeout(5000);

Related

pupeteer function not returning array

Hi Guys can you please point my mistake on this code?
console.log(urls) is printing undefined.
Thanks in advance.
const puppeteer = require('puppeteer');
async function GetUrls() {
const browser = await puppeteer.launch( { headless: false,
executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe' })
const page = await browser.newPage();
await page.goto("https://some page");
await page.waitForSelector('a.review.exclick');
let urls = await page.evaluate(() => {
let results = [];
let items = document.querySelectorAll('a.review.exclick');
items.forEach((item) => {
results.push({
url: item.getAttribute('href'),
});
});
return results;
browser.close();
});
}
(async () => {
let URLS = await GetUrls();
console.log(URLS);
process.exit(1);
})();
Here is a list:
you don't have a return statement in your GetUrls() function
you close the browser after a return statement AND inside the page.evaluate() method
Keep in mind that anything that is executed within the page.evaluate() will relate to the browser context. To quickly test this, add a console.log("test") before let results = []; and you will notice that nothing appears in your Node.js console, it will appear in your browser console instead.
Therefore, the browser variable is visible within the GetUrls() function but NOT visible within the page.evaluate() method.
Here is the corrected code sample:
const puppeteer = require('puppeteer');
async function GetUrls() {
const browser = await puppeteer.launch({
headless: false,
executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
})
const page = await browser.newPage();
await page.goto("https://some page");
await page.waitForSelector('a.review.exclick');
let urls = await page.evaluate(() => {
let results = [];
let items = document.querySelectorAll('a.review.exclick');
items.forEach((item) => {
results.push({
url: item.getAttribute('href'),
});
});
return results;
});
await browser.close();
return urls;
}
(async () => {
let URLS = await GetUrls();
console.log(URLS);
process.exit(1);
})();

puppeteer: Protocol error (Runtime.callFunctionOn): Target closed

I came a across a website that puppeteer can't handle. When making screenshot, Protocol error (Runtime.callFunctionOn): Target closed or Protocol error (Emulation.setDeviceMetricsOverride): Target closed is triggered. Before taking a screenshot, I scroll it so that all images are loaded. The page is large so I set '--disable-dev-shm-usage','--shm-size=3gb', params in hope to prevent any memory issues. This is sample code with url included. Any idea why the page is closed in the middle of the operation? In addition to puppeteer-cluster ("^0.23.0"), I am also using puppeteer-extra-plugin-stealth("^2.9.0") and puppeteer-extra("^3.2.3")
import puppeteer from 'puppeteer-extra';
import {Cluster} from 'puppeteer-cluster';
import StealthPlugin from "puppeteer-extra-plugin-stealth";
puppeteer.use(StealthPlugin());
const cluster = await Cluster.launch({
puppeteer,
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 1,
puppeteerOptions:{
headless : true,
args: [
'--disable-setuid-sandbox',
'--no-sandbox',
'--window-size=1920,1080',
'--disable-dev-shm-usage',
'--shm-size=3gb',
]
}
});
await cluster.task(async ({ page, data: url }) => {
let response = await page.goto(url, { waitUntil:"networkidle2" });
await Screenshot(page, screenshotPaths);
});
async function autoScroll(page){
await page.evaluate(async () => {
try {
await new Promise((resolve, reject) => {
let totalHeight = 0;
let distance = 389;
let counter = 0;
let timer = setInterval(() => {
counter++;
var scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if((totalHeight >= scrollHeight - window.innerHeight) || counter > 100){
clearInterval(timer);
resolve();
}
}, 50);
});
}catch (e) {
console.log("we got scrolling error:");
console.log(e);
}
});
}
async function Screenshot(page) {
let save = true;
try {
await page.waitForTimeout(6000);
await page.setViewport({ width:390, height:844});
await autoScroll(page);
await page.evaluate(() => window.scrollTo(0, 0));
await page.waitForTimeout(2000);
if(save) await page.screenshot({path: "./mobile.jpg", fullPage: true});
await page.setViewport({ width:1920, height:1080});
await autoScroll(page);
await page.evaluate(() => window.scrollTo(0, 0));
await page.waitForTimeout(2000);
if(save) await page.screenshot({path: "./desktop.jpg", fullPage: true});
}catch(error) {
console.log("we got screenshot error");
console.log(error);
}
}
cluster.queue("https://www.sinsay.com/si/sl/sale/woman/view-all-clothes");
await cluster.idle();
await cluster.close();
stack trace:
ProtocolError: Protocol error (Runtime.callFunctionOn): Target closed.
at /path/to/puppeteer/node_modules/puppeteer/lib/cjs/puppeteer/common/Connection.js:230:24
at new Promise (<anonymous>)
at CDPSession.send (/path/to/puppeteer/node_modules/puppeteer/lib/cjs/puppeteer/common/Connection.js:226:16)
at next (/path/to/puppeteer/node_modules/puppeteer-extra-plugin-stealth/evasions/sourceurl/index.js:32:41)
at CDPSession.send (/path/to/puppeteer/node_modules/puppeteer-extra-plugin-stealth/evasions/sourceurl/index.js:65:16)
at ExecutionContext._evaluateInternal (/path/to/puppeteer/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:204:50)
at ExecutionContext.evaluate (/path/to/puppeteer/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:110:27)
at DOMWorld.evaluate (/path/to/puppeteer/node_modules/puppeteer/lib/cjs/puppeteer/common/DOMWorld.js:123:24)
at processTicksAndRejections (node:internal/process/task_queues:96:5) {
originalMessage: ''
}

Wikipedia API not working alexa javascript

i am very new to amazon skill and javascript and i am trying to create a skill that returns wikipedia first paragraph. I am trying to use Promise to retrieve content from external api however I can't seem to find my issue. I have tried async and await as well but didn't get anywhere with it.
This is my code
'''
const GetInfoIntentHandler = {
canHandle(handlerInput) {
return (
handlerInput.requestEnvelope.request.type === "IntentRequest" &&
handlerInput.requestEnvelope.request.intent.name === "GetInfoIntent"
);
},
async handle(handlerInput) {
let outputSpeech = 'This is the default message.';
await getRemoteData("https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=god&origin=*&format=json")
.then((response) => {
const data = JSON.parse(response);
outputSpeech = data;
})
.catch((err) => {
console.log(`ERROR: ${err.message}`);
// set an optional error message here
outputSpeech = "hereeeee";
});
return handlerInput.responseBuilder
.speak(outputSpeech)
.getResponse();
},
};
const getRemoteData = (url) => new Promise((resolve, reject) => {
const client = url.startsWith('https') ? require('https') : require('http');
const request = client.get(url, (response) => {
if (response.statusCode < 200 || response.statusCode > 299) {
reject(new Error(`Failed with status code: ${response.statusCode}`));
}
const body = [];
response.on('data', (chunk) => body.push(chunk));
});
//request.on('error', (err) => reject(err));
});
'''
[My error][1]
This is the API i am using:"https://en.wikipedia.org/w/api.php?format=json&origin=*&action=query&prop=extracts&exlimit=max&explaintext&titles="+query+"&redirects=",
[My BUILD ][2]
Can you please tell me where i am going wrong
[1]: https://i.stack.imgur.com/Gvp2Q.png
[2]: https://i.stack.imgur.com/bAvzR.png

How to get text from xPath in Puppeteer node js

I need to get a text from the span tag and to verify whether the text equals to "check".
How can I achieve this in puppeteer?
Below is the example of the code I've written, if anyone could put me help me figure this out, please.
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
// "slowMo": 50,
args: ["--start-fullscreen"],
defaultViewport: null,
});
//Page
const page2 = await browser.newPage();
await page2.goto("https://www.flipkart.com");
await page2.waitFor(2000);
await page2.$x("//input[#class='_2zrpKA _1dBPDZ']").then(async (ele) => {
await ele[0].type(username);
});
await page2.waitFor(2000);
await page2.$x("//input[#type='password']").then(async (ele) => {
await ele[0].type(password);
});
await page2.waitFor(2000);
await page2
.$x("//button[#class='_2AkmmA _1LctnI _7UHT_c']")
.then(async (ele) => {
await ele[0].click();
});
await page2.waitFor(2000);
await page2.$x("//input[#class='LM6RPg']").then(async (ele) => {
await ele[0].type("iPhone 11");
});
await page2.waitFor(2000);
await page2.$x("//button[#class='vh79eN']").then(async (ele) => {
await ele[0].click();
});
await page2.waitFor(2000);
await page2.$x("//div[#class='col col-7-12']/div").then(async (ele) => {
await ele[0].click();
});
await page2.waitFor(2000);
let [element] = await page2.$x('//span[#class="_2aK_gu"]');
let text = await page2.evaluate((element) => element.textContent, element);
if (text.includes("Check")) {
console.log("Check Present");
}
if (text.includes("Change")) {
console.log("Change Present");
}
})();
//get the xpath of the webelement
const [getXpath] = await page.$x('//div[]');
//get the text using innerText from that webelement
const getMsg = await page.evaluate(name => name.innerText, getXpath);
//Log the message on screen
console.log(getMsg)
Here is the complete code for getting div or any html element data using xpath....
const puppeteer = require("puppeteer");
async function scrape () {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto("https://twitter.com/elonmusk", {waitUntil: "networkidle2"})
await page.waitForXPath('/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/section/div/div/div[1]/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div[1]/div/div[1]/a/div/div[1]/span/span');
let [el] = await page.$x('/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/section/div/div/div[1]/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div[1]/div/div[1]/a/div/div[1]/span/span');
// console.log()
const names = await page.evaluate(name => name.innerText, el);
console.log(names);
await browser.close();
};
scrape();
You can get the text form the selected element like this:
await page.goto(url, {waitUntil: "networkidle2"});
await page.waitForXPath('//span[#class="_2aK_gu"]');
//assuming it's the first element
let [element] = await page.$x('//span[#class="_2aK_gu"]');
let text = await page.evaluate(element => element.textContent, element);
Note that page.$x returns an array of ElementHandles, so the code here assumes it's the first element. I'd suggest you chose a more specific XPath than a class as many elements may have it.
For the condition:
if (text.includes("Check"))
//do this
else if (text.includes("Change"))
//do that

How do I wait for each of these promises to execute asynchronously?

conversation.user and conversation.secondUser are appended to the conversation object but the nested messages loop executes after the response is sent to the client.
find: [
async (context) => {
await Promise.all(context.result.data.map((conversation) => {
return context.app.service('users').get(conversation.userId).then((data) => {
conversation.user = data;
return context.app.service('users').get(conversation.secondUserId).then((data) => {
conversation.secondUser = data;
return conversation.messages.map((message) => {
return context.app.service('users').get(message.userId).then((data) => {
console.log(data);
message.user = data;
});
});
});
});
}));
context.dispatch = context.result;
return context;
}
],
Two things:
You forgot a Promise.all in the last section
You are making your life harder by not fully making use of async/await
This should work:
find: [
async (context) => {
await Promise.all(context.result.data.map(async (conversation) => {
const data = await context.app.service('users').get(conversation.userId);
const secondData = await context.app.service('users').get(conversation.secondUserId);
conversation.user = data;
conversation.secondUser = secondData;
await Promise.all(conversation.messages.map(async (message) => {
const data = await context.app.service('users').get(message.userId);
console.log(data);
message.user = data;
}));
}));
context.dispatch = context.result;
return context;
}
]