Im Trying for the first time the puppeteer-proxy lib,and Im getting this error.
I don know is an error of puppeteer-proxy or of the function await page.setRequestInterception(true) because this guy has the same error as me
Code
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
const {proxyRequest} = require('puppeteer-proxy')
puppeteer.use(StealthPlugin())
var browser = null
const func = (async () => {
browser = await puppeteer.launch({
headless: false,
executablePath: "chrome-win/chrome.exe"
})
const page = await browser.newPage()
await page.setRequestInterception(true);
page.on('request', async (request) => {
await proxyRequest({
page,
proxyUrl: "https://username:password#174.25.210.207:6286",
request,
});
});
await page.goto("https://www.google.com/")
})();
Error
C:\Users\edina\Documents\Last Developer Projects\Scraping Browser\node_modules\puppeteer-core\lib\cjs\puppeteer\common\Frame.js:238
? new Error(`${response.errorText} at ${url}`)
^
Error: net::ERR_FAILED at https://www.google.com/
at navigate (C:\Users\edina\Documents\Last Developer Projects\Scraping Browser\node_modules\puppeteer-core\lib\cjs\puppeteer\common\Frame.js:238:23)
at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
at async Frame.goto (C:\Users\edina\Documents\Last Developer Projects\Scraping Browser\node_modules\puppeteer-core\lib\cjs\puppeteer\common\Frame.js:207:21)
at async CDPPage.goto (C:\Users\edina\Documents\Last Developer Projects\Scraping Browser\node_modules\puppeteer-core\lib\cjs\puppeteer\common\Page.js:439:16)
at async C:\Users\edina\Documents\Last Developer Projects\Scraping Browser\test.js:28:5
Related
Hi Guys can you please point my mistake on this code?
console.log(urls) is printing undefined.
Thanks in advance.
const puppeteer = require('puppeteer');
async function GetUrls() {
const browser = await puppeteer.launch( { headless: false,
executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe' })
const page = await browser.newPage();
await page.goto("https://some page");
await page.waitForSelector('a.review.exclick');
let urls = await page.evaluate(() => {
let results = [];
let items = document.querySelectorAll('a.review.exclick');
items.forEach((item) => {
results.push({
url: item.getAttribute('href'),
});
});
return results;
browser.close();
});
}
(async () => {
let URLS = await GetUrls();
console.log(URLS);
process.exit(1);
})();
Here is a list:
you don't have a return statement in your GetUrls() function
you close the browser after a return statement AND inside the page.evaluate() method
Keep in mind that anything that is executed within the page.evaluate() will relate to the browser context. To quickly test this, add a console.log("test") before let results = []; and you will notice that nothing appears in your Node.js console, it will appear in your browser console instead.
Therefore, the browser variable is visible within the GetUrls() function but NOT visible within the page.evaluate() method.
Here is the corrected code sample:
const puppeteer = require('puppeteer');
async function GetUrls() {
const browser = await puppeteer.launch({
headless: false,
executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
})
const page = await browser.newPage();
await page.goto("https://some page");
await page.waitForSelector('a.review.exclick');
let urls = await page.evaluate(() => {
let results = [];
let items = document.querySelectorAll('a.review.exclick');
items.forEach((item) => {
results.push({
url: item.getAttribute('href'),
});
});
return results;
});
await browser.close();
return urls;
}
(async () => {
let URLS = await GetUrls();
console.log(URLS);
process.exit(1);
})();
When a page is rendered using the page.setContent method of some static Html content, what is the current folder for attributes such as the src of img tags?
For example, for:
await page.setContent("<img src="./pic.jpg" />");
where is the folder ./?
Maybe it's undefined, here is my test result:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox']});
const page = await browser.newPage();
page.on('request', request => console.log('send request: ' + request.url()));
page.on('console', message => console.log('console: ' + message.text()));
await page.setContent('<img src="./test.jpg" /><script>console.log("href="+window.location.href);</script>');
await browser.close();
})();
output:
console: href=about:blank
The page URL is about:blank and there's no requests sent.
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox']});
const page = await browser.newPage();
page.on('request', request => console.log('send request: ' + request.url()));
page.on('console', message => console.log('console: ' + message.text()));
await page.setContent('<base href="https://www.google.com"><img src="./test.jpg" /><script>console.log("href="+window.location.href);</script>');
await browser.close();
})();
output:
console: href=about:blank
send request: https://www.google.com/test.jpg
console: Failed to load resource: the server responded with a status of 404 ()
browser request test.jpg after appending a base element while the URL is still about:blank
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox']});
const page = await browser.newPage();
page.on('request', request => console.log('send request: ' + request.url()));
page.on('console', message => console.log('console: ' + message.text()));
// set base href to local URL
await page.setContent('<base href="file:///abc/index.html"><img src="./test.jpg" /><script>console.log("href="+window.location.href);</script>');
await browser.close();
})();
output:
console: href=about:blank
console: Not allowed to load local resource: file:///abc/test.jpg
send request: file:///abc/test.jpg
The folder is located from the page you are visiting.
For example if the URL is
mydomain.com/directory1/page.html
The image can be found at mydomain.com/directory1/pic.jpg
Tried below code but getting an error.
Error: net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH at
https://www.xxxxxxsolutions.com/
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ignoreHTTPSErrors: true, acceptInsecureCerts: true, args: ['--proxy-bypass-list=*', '--disable-gpu', '--disable-dev-shm-usage', '--disable-setuid-sandbox', '--no-first-run', '--no-sandbox', '--no-zygote', '--single-process', '--ignore-certificate-errors', '--ignore-certificate-errors-spki-list', '--enable-features=NetworkService']});
const page = await browser.newPage();
try {
await page.goto('https://www.xxxxxxxsolutions.com/', {waitUntil: 'networkidle2', timeout: 59000});
const cookies = await page._client.send('Network.getAllCookies');
JSON.stringify(cookies, null, 4);
} catch (e) {
console.log(e);
}
await browser.close();
})();
#mujuonly, this is version related issue. Please try the same code above 1.16.0 or latest version 2.0. It's working fine.
How to get into the application which use windows authentication,
Hi All am new to puppeteer trying to do some automation and performance testing with puppeteer, so while trying to get into to application and do a sample check am not able to proceed because windows authentication not able to get through please help, i tried below code not working :(
const puppeteer = require('puppeteer');
async function test() {
const proxyUrl = 'URL';
const username = 'Uname';
const password = 'pwd';
const browser = await puppeteer.launch({
args: [`--proxy-server=${proxyUrl}`],
headless: false,
});
// let browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.authenticate({ username, password });
await page.goto('URL')
const html = await page.$eval('.ds-tile-container', e => e.innerHTML)
expect(html).not.toBeNull();
await page.pdf({ path: 'hn.pdf', format: 'A4' });
browser.close()
}
test();
I tried below code also
const oldProxyUrl = 'https://siteurl:8080';
const newProxyUrl = await proxyChain.anonymizeProxy(oldProxyUrl);
console.log(newProxyUrl);
const args = [
'--disable-setuid-sandbox',
'--no-sandbox',
'--ignore-certificate-errors',
'--ignore-certificate-errors-spki-list ',
];
const options = {
args,
headless: true,
ignoreHTTPSErrors: true,
};
const browser = await puppeteer.launch(options);
now am getting error like
(node:20520) UnhandledPromiseRejectionWarning: Unhandled promise
rejection (rejection id: 1): Error: Invalid "proxyUrl" option: only
HTTP proxies are currently supported. (node:20520) [DEP0018]
DeprecationWarning: Unhandled promise rejections are deprecated. In
the future, promise rejections that are not handled will terminate the
Node.js process with a non-zero exit code.
I trying to collect data from failing requests and js error.
I'm using the following site: https://nitzani1.wixsite.com/marketing-automation/3rd-page
The site has a request to https://api.fixer.io/1latest, which returns a status code of 404,
also the page contains thw following js error:
"Uncaught (in promise) Fetch did not succeed"
I've tried to code bellow to catch the 404 and js error but couldn't.
Not sure what I'm doing wrong, any idea as to how to solve it?
const puppeteer = require('puppeteer');
function wait (ms) {
return new Promise(resolve => setTimeout(() => resolve(), ms));
}
var run = async () => {
const browser = await puppeteer.launch({
headless: false,
args: ['--start-fullscreen']
});
page = await browser.newPage();
page.on('error', err=> {
console.log('err: '+err);
});
page.on('pageerror', pageerr=> {
console.log('pageerr: '+pageerr);
});
page.on('requestfailed', err => console.log('requestfailed: '+err));
collectResponse = [];
await page.on('requestfailed', rf => {
console.log('rf: '+rf);
});
await page.on('response', response => {
const url = response.url();
response.buffer().then(
b => {
// console.log(url+' : '+response.status())
},
e => {
console.log('response err');
}
);
});
await wait(500);
await page.setViewport({ width: 1920, height: 1080 });
await page.goto('https://nitzani1.wixsite.com/marketing-automation/3rd-page', {
});
};
run();
The complete worked answer is:
const puppeteer = require('puppeteer');
const run = async () => {
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
// Catch all failed requests like 4xx..5xx status codes
page.on('requestfailed', request => {
console.log(`url: ${request.url()}, errText: ${request.failure().errorText}, method: ${request.method()}`)
});
// Catch console log errors
page.on("pageerror", err => {
console.log(`Page error: ${err.toString()}`);
});
// Catch all console messages
page.on('console', msg => {
console.log('Logger:', msg.type());
console.log('Logger:', msg.text());
console.log('Logger:', msg.location());
});
await page.setViewport({ width: 1920, height: 1080 });
await page.goto('https://nitzani1.wixsite.com/marketing-automation/3rd-page', { waitUntil: 'domcontentloaded' });
await page.waitFor(10000); // To be sure all exceptions logged and handled
await browser.close();
};
run();
Save in .js file and easily run it.
Current puppeteer 8.0.0^ have a very small amount of information in message.text(). So we need to get a description of the error from JSHandle.
Please check this comment with fully descriptive console errors from JSHandle object
Check the link here https://stackoverflow.com/a/66801550/9026103