How can I get the fully rendered html+css of a client side rendered webpage? The page contents on puppeteer returns a very poorly rendered outcome with missing css
Simplified code:
const express = require('express')
const puppeteer = require('puppeteer');
const app = express()
const port = 3000
async function getHtml(url) {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox']
});
const page = await browser.newPage();
await page.goto(url,
{ waitUntil: ['networkidle0', 'networkidle2', 'load', 'domcontentloaded'] });
const k = await page.content()
await browser.close();
return k
};
app.get('/', (request, response) => {
getHtml(request.query.url)
.then(function (res) {
response.send(res);
})
.catch(function (err) {
console.error(err)
response.send(err);
})
});
app.listen(port)
Running this with any website; for example https://www.tesla.com/ gives something like
Although using the page.screenshot() method gives the desired results.
Any ideas on why this occurs? And more importantly, is there a way to get around this behaviour?
Related
HI Ive been trying to get puppeteer to take a screenshot of full pages, including all images. Unfortunately background images are getting omitted (see comparison below)... I can't figure out how to get them.
Here's my code
async function screeshotFullPage(url: string): Promise<string> {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle0" });
await page.evaluate(async () => {
const selectors = Array.from(document.querySelectorAll("img"));
//https://stackoverflow.com/questions/46160929/puppeteer-wait-for-all-images-to-load-then-take-screenshot
await document.body.scrollIntoView(false);
await Promise.all(
selectors.map((img) => {
if (img.complete) return;
return new Promise((resolve, reject) => {
img.addEventListener("load", resolve);
img.addEventListener("error", reject);
});
})
);
});
await sleep(5000); // resolves in 5 sec
const path = generateScreenshotPath();
await page.screenshot({
path,
fullPage: true,
});
return await browser.close();
}
await screeshotFullPage("https://chesskid.com")
Hi Guys can you please point my mistake on this code?
console.log(urls) is printing undefined.
Thanks in advance.
const puppeteer = require('puppeteer');
async function GetUrls() {
const browser = await puppeteer.launch( { headless: false,
executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe' })
const page = await browser.newPage();
await page.goto("https://some page");
await page.waitForSelector('a.review.exclick');
let urls = await page.evaluate(() => {
let results = [];
let items = document.querySelectorAll('a.review.exclick');
items.forEach((item) => {
results.push({
url: item.getAttribute('href'),
});
});
return results;
browser.close();
});
}
(async () => {
let URLS = await GetUrls();
console.log(URLS);
process.exit(1);
})();
Here is a list:
you don't have a return statement in your GetUrls() function
you close the browser after a return statement AND inside the page.evaluate() method
Keep in mind that anything that is executed within the page.evaluate() will relate to the browser context. To quickly test this, add a console.log("test") before let results = []; and you will notice that nothing appears in your Node.js console, it will appear in your browser console instead.
Therefore, the browser variable is visible within the GetUrls() function but NOT visible within the page.evaluate() method.
Here is the corrected code sample:
const puppeteer = require('puppeteer');
async function GetUrls() {
const browser = await puppeteer.launch({
headless: false,
executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
})
const page = await browser.newPage();
await page.goto("https://some page");
await page.waitForSelector('a.review.exclick');
let urls = await page.evaluate(() => {
let results = [];
let items = document.querySelectorAll('a.review.exclick');
items.forEach((item) => {
results.push({
url: item.getAttribute('href'),
});
});
return results;
});
await browser.close();
return urls;
}
(async () => {
let URLS = await GetUrls();
console.log(URLS);
process.exit(1);
})();
I'm trying to submit a login form, but all I get is a timeout after 30 seconds.
My code is rather simple and I can't find anything wrong:
const puppeteer = require('puppeteer');
const creds = {
user: "1234",
password: "1234"
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setViewport({width: 1280, height: 800});
await page.goto('https://shop2.idena.de/NewShop/');
await page.type('input[name="FORM_LOGIN"]', creds.user);
await page.type('input[name="FORM_PASSWD"]', creds.password);
await Promise.all([
page.click('button[name="FORM_TYPE"]'),
page.waitForNavigation()
]);
await page.screenshot({path: 'example.png', fullPage: true});
await browser.close();
})();
Any ideas what's going wrong here?
Change the order of the promises a bit, it could be possible, the navigation happens super fast and the waitForNavigation is just waiting for nothing. Or maybe your website loads very slow after clicking the login button.
await Promise.all([
page.waitForNavigation({timeout: 60000}),
page.click('button[name="FORM_TYPE"]'),
]);
If I use your example with headful option, I get this dialog that prevents the page from loading:
So this addition can help (not sure if some dialog emerges with correct credentials):
const puppeteer = require('puppeteer');
const creds = {
user: "1234",
password: "1234"
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setViewport({width: 1280, height: 800});
await page.goto('https://shop2.idena.de/NewShop/');
await page.type('input[name="FORM_LOGIN"]', creds.user);
await page.type('input[name="FORM_PASSWD"]', creds.password);
page.on('dialog', async dialog => {
console.log(dialog.message());
await dialog.accept();
});
await Promise.all([
page.click('button[name="FORM_TYPE"]'),
page.waitForNavigation()
]);
await page.screenshot({path: 'example.png', fullPage: true});
await browser.close();
})();
EDIT: I'm updating my answer since more infomration has been provided in the original question.
The problem is that there's a dialog you need to confirm/dismiss:
Perhaps you didn't see it because the script was too fast. I recommend debugging puppeteer scripts with headless set to false and slowMo to some number greater than 0:
const browser = await puppeteer.launch({ headless: false, slowMo: 200 });
Then you need to get rid of the dialog:
page.on('dialog', async (dialog) => {
await dialog.accept();
});
The whole script that now passes:
const puppeteer = require('puppeteer');
const creds = {
user: "1234",
password: "1234"
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setViewport({width: 1280, height: 800});
await page.goto('https://shop2.idena.de/NewShop/');
await page.type('input[name="FORM_LOGIN"]', creds.user);
await page.type('input[name="FORM_PASSWD"]', creds.password);
page.on('dialog', async (dialog) => {
await dialog.accept();
});
await Promise.all([
page.click('button[name="FORM_TYPE"]'),
page.waitForNavigation()
]);
await page.screenshot({path: 'example.png', fullPage: true});
await browser.close();
})();
I would like to login on a site, which is using Cloudfare DDOS protection like this:
The code is simple:
const puppeteer = require('puppeteer');
const C = require('./constants');
const USERNAME_SELECTOR = 'input[name="username"]';
const PASSWORD_SELECTOR = 'input[name="password"]';
const CTA_SELECTOR = '.button';
var cloudscraper = require('cloudscraper');
async function startBrowser() {
const browser = await puppeteer.launch({
headless: true,
slowMo: 10000,
});
const page = await browser.newPage();
return {browser, page};
}
async function closeBrowser(browser) {
return browser.close();
}
async function playTest(url) {
const {browser, page} = await startBrowser();
page.setViewport({width: 1366, height: 768});
await page.goto(url, {waituntil: 'domcontentloaded'});
await page.screenshot({path: 'debug.png'});
await page.click(USERNAME_SELECTOR);
await page.keyboard.type(C.username);
await page.click(PASSWORD_SELECTOR);
await page.keyboard.type(C.password);
await page.click(CTA_SELECTOR);
await page.waitForNavigation();
await page.screenshot({path: 'ipt.png'});
}
(async () => {
await playTest("https://xy.com/login.php");
process.exit(1);
})();
When I check debug.png, I see Cloudfare DDOS protection page only. I don't really understand why, I added slowMo 10sec to wait with the execution.
You can add a simple waitForSelector to wait until the username selector appears,
await page.waitForSelector(USERNAME_SELECTOR);
await page.click(USERNAME_SELECTOR);
I want to close pages when puppeteer faces on any error , sometimes page the page that i try to load crashes and it doesnt call .close();
(async () => {
const page = await browser.newPage();
await page.setViewport({width: resWidth, height: resHeight});
await page.goto(d["entities"]["urls"][0]["expanded_url"], {timeout :90000});
await page.screenshot({path: './resimdata/'+d['id']+'.png' ,fullPage: true});
await page.close();
})();
There is an issue/PR on puppeteer repo regarding this which will be helpful in similar situation.
Related Issue link: https://github.com/GoogleChrome/puppeteer/issues/952
Meanwhile, you can try this little hack, if the PR is there on version 0.12+, we don't have to worry about the following code.
(async() => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
function handleClose(msg){
console.log(msg);
page.close();
browser.close();
process.exit(1);
}
process.on("uncaughtException", () => {
handleClose(`I crashed`);
});
process.on("unhandledRejection", () => {
handleClose(`I was rejected`);
});
await page.goto("chrome://crash");
})();
Which will output something like the following,
▶ node app/app.js
I was rejected