I want to build a LinkedIn automatic request sender.
Task to do?
open linkedin.com
login into LinkedIn using login details
Do search for people with the keyword
send a connection request with a note.
I am unable t
const select = require('puppeteer-select');
const FORM = {
USERNAME_SELECTOR: '#username',
PASSWORD_SELECTOR: '#password',
BUTTON_SELECTOR: '.btn__primary--large.from__button--floating'
};
const CREDENTIALS = {
USERNAME: 'Username',
PASSWORD: 'password'
};
const SEARCH = {
SEARCH_SELECTOR: '#global-nav-search',
KEYWORD: '',
CONNECT: '#ember52'
};
const escapeXpathString = str => {
const splitedQuotes = str.replace(/'/g, `', "'", '`);
return `concat('${splitedQuotes}', '')`;
};
const clickByText = async (page, text) => {
const escapedText = escapeXpathString(text);
const linkHandlers = await page.$x(`//a[contains(text(), ${escapedText})]`);
if (linkHandlers.length > 0) {
await linkHandlers[0].click();
} else {
throw new Error(`Link not found: ${text}`);
}
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.linkedin.com/login?trk=guest_homepage-basic_nav-header-signin', {waitUntil: 'networkidle0'});
await page.click(FORM.USERNAME_SELECTOR);
await page.keyboard.type(CREDENTIALS.USERNAME);
await page.click(FORM.PASSWORD_SELECTOR);
await page.keyboard.type(CREDENTIALS.PASSWORD);
await page.click(FORM.BUTTON_SELECTOR);
await page.waitForNavigation();
await page.click(SEARCH.SEARCH_SELECTOR);
await page.focus(SEARCH.SEARCH_SELECTOR);
await page.keyboard.type(SEARCH.KEYWORD);
await page.keyboard.press('Enter');
await page.waitForNavigation();
clickByText(page,`people`);
await page.waitForNavigation();
await page.screenshot({path: 'verify16.png', fullPage: true});
console.log("Current page:", page.url());
//from here
const invitation = await select(page).getSend('span:contains(Send)');
await invitation.click();
//getting error
await browser.close();
})();```
I cannot click on connect button -> I have also to add notes and do for all other connections.
const puppeteer = require('puppeteer');
const select = require('puppeteer-select');
const FORM = {
USERNAME_SELECTOR: '#username',
PASSWORD_SELECTOR: '#password',
BUTTON_SELECTOR: '.btn__primary--large.from__button--floating'
};
const CREDENTIALS = {
USERNAME: 'user',
PASSWORD: 'password'
};
const SEARCH = {
SEARCH_SELECTOR: '#global-nav-search',
KEYWORD: 'keyword',
CONNECT: '#ember52'
};
const escapeXpathString = str => {
const splitedQuotes = str.replace(/'/g, `', "'", '`);
return `concat('${splitedQuotes}', '')`;
};
const clickByText = async (page, text) => {
const escapedText = escapeXpathString(text);
const linkHandlers = await page.$x(`//a[contains(text(), ${escapedText})]`);
if (linkHandlers.length > 0) {
await linkHandlers[0].click();
} else {
throw new Error(`Link not found: ${text}`);
}
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.linkedin.com/login?trk=guest_homepage-basic_nav-header-signin', {waitUntil: 'networkidle0'});
await page.click(FORM.USERNAME_SELECTOR);
await page.keyboard.type(CREDENTIALS.USERNAME);
await page.click(FORM.PASSWORD_SELECTOR);
await page.keyboard.type(CREDENTIALS.PASSWORD);
await page.click(FORM.BUTTON_SELECTOR);
await page.waitForNavigation();
await page.click(SEARCH.SEARCH_SELECTOR);
await page.focus(SEARCH.SEARCH_SELECTOR);
await page.keyboard.type(SEARCH.KEYWORD);
await page.keyboard.press('Enter');
await page.waitForNavigation();
clickByText(page,`people`);
await page.waitForNavigation();
await page.screenshot({path: `verifytest.png`, fullPage: true});
const [button] = await page.$x("//button[contains(., 'Connect')]");
if (button) {
await button.click();
}
const [buttonNote] = await page.$x("//button[contains(., 'Add a note')]");
if (buttonNote) {
await buttonNote.click();
}
await page.keyboard.type('Pardon! buddy i am just testing my bot ~ Manvendra Yadav');
const [buttonSendNote] = await page.$x("//button[contains(., 'Send')]");
if (buttonSendNote) {
await buttonSendNote.click();
}
let elements = await page.$$('#main > div > div > div:nth-child(2) > ul > li');
// loop trough items
for (let i = 0; i < elements.length; i++) {
const [button] = await elements[i].$x("//button[contains(., 'Connect')]");
if (button) {
await button.click();
}
const [buttonNote] = await page.$x("//button[contains(., 'Add a note')]");
if (buttonNote) {
await buttonNote.click();
}
await page.keyboard.type('Pardon! buddy i am just testing my bot ~ Manvendra Yadav');
await page.screenshot({path: `verify${i}.png`, fullPage: true});
const [buttonSendNote] = await page.$x("//button[contains(., 'Send')]");
if (buttonSendNote) {
await buttonSendNote.click();
}
}
await browser.close();
})();
Related
I have lists of URLs... from : http://books.toscrape.com
Let objArray =
[
{"Url": "books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html"},
{"Url": "books.toscrape.com/catalogue/tipping-the-velvet_999/index.html"},
{"Url": "books.toscrape.com/catalogue/soumission_998/index.html"}
]
As You Can See That All Links Have Similar Scraping.
I want to scrape the Titles, Prices And Stock Availability from above links.
I also try to loop through all of the URLs like this:
for (var i = 0; i < objArray.length; ++i) {
(async() => {
let browser;
try {
browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(url);
const content = await page.content();
const $ = cheerio.load(content);
const Product_details = []
const instock = $(div[class="col-sm-6 product_main"] p[class="instockavailability"]).text();
const title = $(div[class="col-sm-6 product_main"] ).text();
const price = $(div[class="col-sm-6 product_main"] p[price_color]).text()
Product_details.push({
Stock: instock,
Title: title,
Price: price,
});
fs.writeFileSync("files.json", JSON.stringify(Product_details), "utf8")
console.log(Product_details)
}
Now my above code not working.....I want to get the product details like: titles, prices
You can separate each page logic into a function and try something like this:
(async () => {
let browser;
try {
browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
const url = "http://books.toscrape.com/";
const Product_details = [];
await page.goto(url);
Product_details.push(await getData(page, Product_details));
while (await page.$('li[class="next"] a')) {
await Promise.all([
page.waitForNavigation(),
page.click('li[class="next"] a'),
]);
Product_details.push(await getData(page, Product_details));
}
fs.writeFileSync("Details.json", JSON.stringify(Product_details), "utf8");
} catch (e) {
console.log('Error-> ', e);
await browser.close();
}
})();
async function getData(page, details) {
console.log(page.url());
const html = await page.content();
const $ = cheerio.load(html);
const statsTable = $('li[col-xs-6 col-sm-4 col-md-3 col-lg-3]');
statsTable.each(function() {
const title = $(this).find('h3').text();
const Price = $(this).find('p[class="price_color"]').text();
details.push({
Title: title,
Price: Price
});
});
}
UPD: Answer for the last edition of the question:
const objArray = [
{ Url: 'books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html' },
{ Url: 'books.toscrape.com/catalogue/tipping-the-velvet_999/index.html' },
{ Url: 'books.toscrape.com/catalogue/soumission_998/index.html' },
];
(async () => {
let browser;
try {
const Product_details = [];
for (const { Url } of objArray) {
browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(`http://${Url}`);
const content = await page.content();
const $ = cheerio.load(content);
const instock = $('div[class="col-sm-6 product_main"] p[class="instockavailability"]').text().trim();
const title = $('div[class="col-sm-6 product_main"] h1').text().trim();
const price = $('div[class="col-sm-6 product_main"] p[class="price_color"]').text().trim;
Product_details.push({
Stock: instock,
Title: title,
Price: price,
});
await browser.close();
}
console.log(Product_details);
fs.writeFileSync('files.json', JSON.stringify(Product_details), 'utf8');
} catch (e) {
console.log('Error-> ', e);
await browser.close();
}
})();
Im trying to obtain all the product name and prices from all the categories from a supermarket website, all the tutorials that i have found do it just for one const url, i need to iterate through all of them. So far i have got this
const puppeteer = require('puppeteer');
async function scrapeProduct(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const [el2] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
await browser.close();
}
scrapeProduct('https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p');
which works just for one. Im using nodejs and puppeteer. How can i achieve this?
You can try for...of loop, using a single browser instance and a single page so that the scraper might not overload the server:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
const urls = [
'https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p',
// ...
];
for (const url of urls) {
await page.goto(url);
const [el2] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
}
await browser.close();
} catch (err) {
console.error(err);
}
})();
You can use an array of urls and forEach:
const puppeteer = require('puppeteer');
const urls = [ 'https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p' ];
urls.forEach(scrapeProduct);
async function scrapeProduct(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const [el2] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
await browser.close();
}
I need to get a text from the span tag and to verify whether the text equals to "check".
How can I achieve this in puppeteer?
Below is the example of the code I've written, if anyone could put me help me figure this out, please.
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
// "slowMo": 50,
args: ["--start-fullscreen"],
defaultViewport: null,
});
//Page
const page2 = await browser.newPage();
await page2.goto("https://www.flipkart.com");
await page2.waitFor(2000);
await page2.$x("//input[#class='_2zrpKA _1dBPDZ']").then(async (ele) => {
await ele[0].type(username);
});
await page2.waitFor(2000);
await page2.$x("//input[#type='password']").then(async (ele) => {
await ele[0].type(password);
});
await page2.waitFor(2000);
await page2
.$x("//button[#class='_2AkmmA _1LctnI _7UHT_c']")
.then(async (ele) => {
await ele[0].click();
});
await page2.waitFor(2000);
await page2.$x("//input[#class='LM6RPg']").then(async (ele) => {
await ele[0].type("iPhone 11");
});
await page2.waitFor(2000);
await page2.$x("//button[#class='vh79eN']").then(async (ele) => {
await ele[0].click();
});
await page2.waitFor(2000);
await page2.$x("//div[#class='col col-7-12']/div").then(async (ele) => {
await ele[0].click();
});
await page2.waitFor(2000);
let [element] = await page2.$x('//span[#class="_2aK_gu"]');
let text = await page2.evaluate((element) => element.textContent, element);
if (text.includes("Check")) {
console.log("Check Present");
}
if (text.includes("Change")) {
console.log("Change Present");
}
})();
//get the xpath of the webelement
const [getXpath] = await page.$x('//div[]');
//get the text using innerText from that webelement
const getMsg = await page.evaluate(name => name.innerText, getXpath);
//Log the message on screen
console.log(getMsg)
Here is the complete code for getting div or any html element data using xpath....
const puppeteer = require("puppeteer");
async function scrape () {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto("https://twitter.com/elonmusk", {waitUntil: "networkidle2"})
await page.waitForXPath('/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/section/div/div/div[1]/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div[1]/div/div[1]/a/div/div[1]/span/span');
let [el] = await page.$x('/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/section/div/div/div[1]/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div[1]/div/div[1]/a/div/div[1]/span/span');
// console.log()
const names = await page.evaluate(name => name.innerText, el);
console.log(names);
await browser.close();
};
scrape();
You can get the text form the selected element like this:
await page.goto(url, {waitUntil: "networkidle2"});
await page.waitForXPath('//span[#class="_2aK_gu"]');
//assuming it's the first element
let [element] = await page.$x('//span[#class="_2aK_gu"]');
let text = await page.evaluate(element => element.textContent, element);
Note that page.$x returns an array of ElementHandles, so the code here assumes it's the first element. I'd suggest you chose a more specific XPath than a class as many elements may have it.
For the condition:
if (text.includes("Check"))
//do this
else if (text.includes("Change"))
//do that
I would like to login on a site, which is using Cloudfare DDOS protection like this:
The code is simple:
const puppeteer = require('puppeteer');
const C = require('./constants');
const USERNAME_SELECTOR = 'input[name="username"]';
const PASSWORD_SELECTOR = 'input[name="password"]';
const CTA_SELECTOR = '.button';
var cloudscraper = require('cloudscraper');
async function startBrowser() {
const browser = await puppeteer.launch({
headless: true,
slowMo: 10000,
});
const page = await browser.newPage();
return {browser, page};
}
async function closeBrowser(browser) {
return browser.close();
}
async function playTest(url) {
const {browser, page} = await startBrowser();
page.setViewport({width: 1366, height: 768});
await page.goto(url, {waituntil: 'domcontentloaded'});
await page.screenshot({path: 'debug.png'});
await page.click(USERNAME_SELECTOR);
await page.keyboard.type(C.username);
await page.click(PASSWORD_SELECTOR);
await page.keyboard.type(C.password);
await page.click(CTA_SELECTOR);
await page.waitForNavigation();
await page.screenshot({path: 'ipt.png'});
}
(async () => {
await playTest("https://xy.com/login.php");
process.exit(1);
})();
When I check debug.png, I see Cloudfare DDOS protection page only. I don't really understand why, I added slowMo 10sec to wait with the execution.
You can add a simple waitForSelector to wait until the username selector appears,
await page.waitForSelector(USERNAME_SELECTOR);
await page.click(USERNAME_SELECTOR);
I'm trying to add html elements to the current page from
page.setContenet
but when it reaches:
await page.setContent('<div><h1>hello world<h1></div>')
Refresh the page and say goodbye to ./index.html
Is there a way that these 2 functions work in the same window at the same time?
full code:
'use strict'
const path = require('path');
const carlo = require('carlo');
const puppeteer = require('puppeteer-core');
const { getExecutablePath } = require('./utils');
const run = async () => {
const executablePath = await getExecutablePath({
// useLocalChromium: true
});
console.log('Executable path:', executablePath);
launchPuppeteer({ executablePath });
}
run();
const launchPuppeteer = async launchOptions => {
const test = path.join(__dirname, 'public')
const final = test + '/index.html';
const browser = await puppeteer.launch({
headless: false,
args: [`--app=${final}`, '--window-size=1280,1024'],
...launchOptions
});
const [page] = await browser.pages();
await page.setViewport({width: 1280, height: 1024});
await page.setContent('<div><h1>hello world<h1></div>')
}
Just use this
await page.evaluate(()=>{
document.body.innerHTML += '<div>Test</div>';
})
Edit: what about this?