How to download Csv from google trends using javascript apify puppeteer - csv

i use that code to download csv from google trends using apify js, but it doesn't work, could you help me?
the result is a csv with a wrong content.
i try to get all csv from google trends.
const path = require('path');
const downloadPath1 = path.resolve(__dirname, './downloads');
const fs = require('fs');
try{
const util = require('util');
await util.promisify(fs.mkdir)(downloadPath1);
}
catch( e){
}
await page.setRequestInterception(true);
//await page.click(csvSelector);
await page.waitForSelector(csvSelector)
const hrefElement = await page.$(csvSelector);
await hrefElement.click();
const xRequest = await new Promise(resolve => {
page.on('request', async interceptedRequest => {
const type = interceptedRequest.resourceType();
log.info(type)
if ( type == "xhr" ) {interceptedRequest.abort();
resolve(interceptedRequest);}
else interceptedRequest.continue();
});
});
log.info(xRequest._url);
const request = require('request-promise');
const options = {
encoding: null,
method: xRequest._method,
uri: xRequest._url,
body: xRequest._postData,
headers: xRequest._headers
}
/* add the cookies */
const cookies = await page.cookies();
options.headers.Cookie = cookies.map(ck => ck.name + '=' + ck.value).join(';');
const response = await request(options);
fs.writeFileSync(downloadPath1+'/binary.csv', response);
const fileObjs = fs.readdirSync(downloadPath1, { withFileTypes: true });
console.log("\nCurrent directory files:");
fileObjs.forEach(file => {
console.log(file);
});
// There won't be more files so let's pick the first
const fileData = fs.readFileSync(downloadPath1+`/${fileObjs[0].name}`);
log.info(fileData);
// Now we can use the data or save it into Key-value store.
await Apify.setValue('MY-Csv.csv', fileData, { contentType: 'application/csv'});
i hope that someone can propose a solution for this thanks

Related

How can I map the complete json data into my code? Right now I can only do it for one entry at a time

export default class FetchData extends React.Component{
state = {
loading: true,
currentPrice: null,
oneDayChange: null,
sevenDayChange: null
};
//these are the three values i need to get from the json result
async componentDidMount(){
const url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?CMC_PRO_API_KEY=xxxx'
const response = await fetch(url);
const info = await response.json();
this.setState({currentPrice:info.data[5].quote.USD.price})
this.setState({oneDayChange:info.data[5].quote.USD.percent_change_24h});
this.setState({sevenDayChange:info.data[5].quote.USD.percent_change_7d});
console.log(info.data[5].id);
}
//can only update states by giving array index of 1 entry
This might help
async componentDidMount(){
....
const info = await response.json();
const currentPrice = [];
const oneDayChange = [];
const sevenDayChange = [];
info.data.map(item => {
currentPrice.push(item.quote.USD.price);
oneDayChange.push(item.quote.USD.percent_change_24h);
sevenDayChange.push(item.quote.USD.ercent_change_7d);
});
this.setState({currentPrice, oneDayChange, sevenDayChange});
}
Multiple state changes might not be working as you might be trying to modify state object directly, below solution will replace state with new object copying all the old values and replacing mentioned properties.
This should work
async componentDidMount(){
const url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?CMC_PRO_API_KEY=xxxx'
const response = await fetch(url);
const [loading, newArray]
const info = await response.json();
const infoUSD = info.data[5].quote.USD;
this.setState(Object.assign({}, state , {currentPrice: infoUSD.price, oneDayChange: infoUSD.percent_change_24h, sevenDayChange: infoUSD.percent_change_7d});
console.log(info.data[5].id);
}

How to iterate through a supermarket website and getting the product name and prices?

Im trying to obtain all the product name and prices from all the categories from a supermarket website, all the tutorials that i have found do it just for one const url, i need to iterate through all of them. So far i have got this
const puppeteer = require('puppeteer');
async function scrapeProduct(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const [el2] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
await browser.close();
}
scrapeProduct('https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p');
which works just for one. Im using nodejs and puppeteer. How can i achieve this?
You can try for...of loop, using a single browser instance and a single page so that the scraper might not overload the server:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
const urls = [
'https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p',
// ...
];
for (const url of urls) {
await page.goto(url);
const [el2] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
}
await browser.close();
} catch (err) {
console.error(err);
}
})();
You can use an array of urls and forEach:
const puppeteer = require('puppeteer');
const urls = [ 'https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p' ];
urls.forEach(scrapeProduct);
async function scrapeProduct(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const [el2] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[#id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
await browser.close();
}

How can I add html elements to the current page? puppeter/carlo

I'm trying to add html elements to the current page from
page.setContenet
but when it reaches:
await page.setContent('<div><h1>hello world<h1></div>')
Refresh the page and say goodbye to ./index.html
Is there a way that these 2 functions work in the same window at the same time?
full code:
'use strict'
const path = require('path');
const carlo = require('carlo');
const puppeteer = require('puppeteer-core');
const { getExecutablePath } = require('./utils');
const run = async () => {
const executablePath = await getExecutablePath({
// useLocalChromium: true
});
console.log('Executable path:', executablePath);
launchPuppeteer({ executablePath });
}
run();
const launchPuppeteer = async launchOptions => {
const test = path.join(__dirname, 'public')
const final = test + '/index.html';
const browser = await puppeteer.launch({
headless: false,
args: [`--app=${final}`, '--window-size=1280,1024'],
...launchOptions
});
const [page] = await browser.pages();
await page.setViewport({width: 1280, height: 1024});
await page.setContent('<div><h1>hello world<h1></div>')
}
Just use this
await page.evaluate(()=>{
document.body.innerHTML += '<div>Test</div>';
})
Edit: what about this?

Creating a QR-code containing a link in NodejS

I'm working on a project right now, i need to create a qrcode that contains a specific information in NodeJS. I have started by creating the canvas in HTML, and take it in NodeJS
<canvas id="canvas" width="300" height="300"></canvas>
And then in my NodeJS file, i'm launching my function
const fs = require('fs');
const qrcode = require('qrcode');
module.exports = {
generateQr: function(link){
var canvas = new qrcode(document.getElementById('canvas'));
qrcode.toCanvas(canvas, link, function (error) {
if (error) console.error(error)
console.log('success!');
});
}
};
Unfortunately, i got the error :
ReferenceError: document is not defined
From the above code, does it look correct ? Does the Qrcode get the data i'm passing, and then what should i do so the QR-code appear in my HTML ?
Thank you for your help
document is a browser-specific global object, you can't access it in node
In node environment, you could generate an image with QR code and use it.
Example:
Async/Await style:
const fs = require('fs');
const qrcode = require('qrcode');
module.exports = {
generateQr: async link => {
const qrCodeDataUrl = await qrcode.toDataURL(link);
// ...
}
};
Promise style:
const fs = require('fs');
const qrcode = require('qrcode');
module.exports = {
generateQr: link => {
qrcode.toDataURL(link)
.then(data => {
const qrCodeDataUrl = data;
// ...
})
.catch(err => {
console.error(error);
// ...
});
}
};
Callback style:
const fs = require('fs');
const qrcode = require('qrcode');
module.exports = {
generateQr: link => {
qrcode.toDataURL(link, function(err, data) {
if (error) console.error(error);
const qrCodeDataUrl = data;
// ...
});
}
};
To render it in an HTML-file you could use template-engine of your choice:
Example with ejs:
const ejs = require('ejs');
// In some route
const qrCodeDataUrl = await generateQr('some link');
const html = ejs.render('<img src="<%= qrCodeDataUrl %>" />', {qrCodeDataUrl});
res.header('Content-Type', 'text/html');
res.send(html);
// ...
Note: It's a simplified example. Please check ejs docs for more details

Why can't I patch, update, or delete an AppPackage that I created?

I am trying to change the required engine version of an AppPackage that I have posted using v2 of the Design Automation API.
I've tried using Postman and the Forge Node Client. I'm using the Forge documentation as a reference.
https://forge.autodesk.com/en/docs/design-automation/v2/reference/http/AppPackages(':id')-PATCH/
My credentials are correct and I have a valid token, but for some reason I keep getting a 404 Not Found status and an error that says "AppPackage with the name MyPlugin doesn't belong to you. You cannot operate on AppPackage you do not own." Also, I get the same message when I try to delete or update the AppPackage.
That's really weird because I definitely own this AppPackage. I uploaded it with these same credentials and I can view it by doing a GET request to view all of my AppPackages. Furthermore, the name of the AppPackage is correct and I specified the right scope (code:all) when I authenticated.
Why does Design Automation think this AppPackage doesn't belong to me and why can't I patch, update, or delete it?
UPDATE 3/28/2019: Setting the resource value still results in the same error
UPDATE 4/2/2019: Getting a fresh upload URL doesn't work either. I get an internal server error saying "Object reference not set to an instance of an object."
const ForgeSDK = require('forge-apis');
const oAuth2TwoLegged = new ForgeSDK.AuthClientTwoLegged(FORGE_CLIENT_ID, FORGE_CLIENT_SECRET, SCOPES);
const appPackageApi = new ForgeSDK.AppPackagesApi();
const getToken = () => {
return oAuth2TwoLegged.authenticate();
};
const getUploadURL = () => {
return appPackageApi.getUploadUrl(oAuth2TwoLegged, oAuth2TwoLegged.getCredentials());
};
const patchPackage = (id, url) => {
const appPack = {
Resource: url,
RequiredEngineVersion: APP_PACKAGE_REQUIRED_ENGINE
};
return appPackageApi.patchAppPackage(id, appPack, oAuth2TwoLegged, oAuth2TwoLegged.getCredentials());
};
(async () => {
try {
const token = await getToken();
const url = await getUploadURL();
const patchPackRes = await patchPackage(APP_PACKAGE_ID, url);
if (patchPackRes.statusCode == 201)
console.log('Patch package succeeded!');
else
console.log('Patch package failed!' + patchPackRes.statusCode);
} catch (ex) {
console.log('Exception :(');
console.log(ex);
}
})();
When calling PATCH the "Resource" property must be set. It can be set to the same URL as the one you receive from GET but it must be present and valid.
This should work:
const ForgeSDK = require('forge-apis');
const oAuth2TwoLegged = new ForgeSDK.AuthClientTwoLegged(FORGE_CLIENT_ID, FORGE_CLIENT_SECRET, SCOPES);
const appPackageApi = new ForgeSDK.AppPackagesApi();
const getToken = () => {
return oAuth2TwoLegged.authenticate();
};
const getUploadURL = async (id) => {
const app = await appPackageApi.getAppPackage(id, oAuth2TwoLegged, oAuth2TwoLegged.getCredentials());
return app.body.Resource;
};
const patchPackage = (id, url) => {
const appPack = {
Resource: url,
RequiredEngineVersion: APP_PACKAGE_REQUIRED_ENGINE
};
return appPackageApi.patchAppPackage(id, appPack, oAuth2TwoLegged, oAuth2TwoLegged.getCredentials());
};
(async () => {
try {
const token = await getToken();
const url = await getUploadURL(APP_PACKAGE_ID);
const patchPackRes = await patchPackage(APP_PACKAGE_ID, url);
if (patchPackRes.statusCode == 201)
console.log('Patch package succeeded!');
else
console.log('Patch package failed!' + patchPackRes.statusCode);
} catch (ex) {
console.log('Exception :(');
console.log(ex);
}
})();