Node.js + bluebird + JSON. JSON is not valid after modification - json

Trying to get API data.
I have problem with creating valid JSON after modification.
Data should looks like this: [{"1"},{"2"},{"3"}, ... ,{201},{202},{203}, ...]
but now: [{"1"},{"2"},{"3"}, ...],[{"201"},{"202"},{"203"}, ...]
Where is my mistake?
var Promise = require("bluebird");
var request = require('bluebird').promisifyAll(require('request'));
var fs = Promise.promisifyAll(require('fs'));
var ladders = {"hardcore":"hardcore", "standard":"standard"};
function getJSONsync(urls) {
var ladder = [];
Promise.map(urls, function(url) {
return request
.getAsync(url)
.spread(function (res, body) {
if (res.statusCode != 200) {
throw new Error('Unsuccessful attempt. Code: '+ res.statusCode);
}
return JSON.stringify(ladder.concat(JSON.parse(body).entries), "", 4);
})
.catch(console.error);
},{ concurrency: 10 })
.then(function(arr) {
fs.writeFileAsync('file.json', arr);
})
}
function setUrls(ladderName, offset, limit) {
var arr = [];
while(offset < 15000 ) {
arr.push('http://api.pathofexile.com/ladders/'+ladderName+'?offset='+offset+'&limit='+limit);
offset = offset + 200;
}
return arr;
}
getJSONsync(setUrls(ladders.hardcore, 0, 200));
Thx for help.
Sorry for my Eng.
Finally:
var Promise = require("bluebird");
var request = require('bluebird').promisifyAll(require('request'));
var fs = Promise.promisifyAll(require('fs'));
var ladders = {"hardcore":"hardcore","standard":"standard"};
function getJSONsync(urls) {
Promise.map(urls, function(url) {
return request
.getAsync(url)
.spread(function (res, body) {
if (res.statusCode != 200) {
throw new Error('Unsuccessful attempt. Code: '+ res.statusCode);
}
return JSON.parse(body).entries;
})
.catch(console.error);
},{ concurrency: 10 })
.reduce(function(a, b) { return a.concat(b) })
.then(function(arr) {
fs.writeFileAsync('file.json', JSON.stringify(arr, "", 4));
console.log(arr.length);
})
}
function setUrls(ladder, offset, limit) {
var arr = [];
while(offset < 15000 ) {
arr.push('http://api.pathofexile.com/ladders/'+ladder+'?offset='+offset+'&limit='+limit);
offset = offset + 200;
}
return arr;
}
getJSONsync(setUrls(ladders.hardcore, 0, 200));

Promise.map returns an array, so when you do ladder.concat you return another array, so it becomes [[{"1"}], [{"1", "2"}]
You should just remove concat:
return JSON.stringify(JSON.parse(body).entries, "", 4);
But if you want to use variable ladder you may ladder.push(JSON.stringify(JSON.parse(body).entries, "", 4)) and use it instead of arr returned variable

Related

Data being hidden when importing data from a website via script

When the data is copied, in theory it should appear - or X:Y Half time results: Z:A, but some values as shown below are hidden, I would like to know what is wrong with the script that is doing this happen and how the script has to stay so that it doesn't happen anymore. Only the values appear when there is a V mark (in green color) together to the result of the game.
SITE:
CURRENT RESULTS:
EXPECTED RESULTS:
THE COLUMN THAT IS MISSING DATA TO COLLECT IS THIS:
function sample(placeOfUrl) {
// Retrieve URL.
var baseUrl = "http://old.statarea.com/predictions.php";
var res1 = UrlFetchApp.fetch(baseUrl);
if (res1.getResponseCode() != 200) throw new Erro("URL cannot be used.");
const from = '<td style="padding-top: 10px; text-align: center;">';
const to = ' </td>';
const htmlData1 = (from + Parser.data(res1.getContentText()).from(from).to(to).build() + to).replace(/\ /g, "");
const xmlRoot = XmlService.parse(htmlData1).getRootElement();
const c = xmlRoot.getChildren()[placeOfUrl - 1];
if (!c) return;
const url = c.getAttribute("href").getValue();
// Parse HTML data.
const res2 = UrlFetchApp.fetch(url);
if (res2.getResponseCode() != 200) throw new Erro("URL for retrieving data cannot be used.");
const htmlData2 = res2.getContentText();
const parsedData1 = Parser.data(htmlData2).from('<table class="style_1" cellspacing="0" cellpadding="0" width="918" border="0">').to('</table>').build();
const parsedData2 = Parser.data(parsedData1).from("<tr>").to("</tr>").iterate();
const data = parsedData2
.filter(function(e) {return /^<td width="35" align="center">/.test(e)})
.map(function(e) {return "<content>" + e.match(/<td.+?\/td>/g).map(function(f) {return f.replace(/\&nbsp\;|<div.+?>|<\/div>|<img.+?>|<input.+?>|\&team_guest|<\/h.+?>|\&/g, "")}).join("") + "</content>"})
.join("");
const xmlRootContent = XmlService.parse("<root>" + data + "</root>").getRootElement();
// Retrieve result values.
const content = xmlRootContent.getChildren();
const values = content.reduce((ar1, e) => {
const temp = e.getChildren().reduce((ar2, f, j) => {
if (f) {
if (f.getChild("a")) {
const t = f.getChild("a").getValue()
if (t) ar2.push(t);
} else {
if (f.getAttribute("style")) {
const v = f.getValue();
if (v && [5, 6, 7, 8, 15].includes(j)) {
ar2.push(v);
}
}
}
}
return ar2;
}, []);
ar1.push(temp);
return ar1;
}, []);
return values;
}
LINK TO SPREADSHEET:
https://docs.google.com/spreadsheets/d/1ZdU05slyWXHeOVnoRzhpUNCzb8lBuAsJlVys2bmdh20/edit?usp=sharing
In order to achieve your goal, for example, how about modifying values as follows?
From:
const values = content.reduce((ar1, e) => {
const temp = e.getChildren().reduce((ar2, f, j) => {
if (f) {
if (f.getChild("a")) {
const t = f.getChild("a").getValue()
if (t) ar2.push(t);
} else {
if (f.getAttribute("style")) {
const v = f.getValue();
if (v && [5, 6, 7, 8, 15].includes(j)) {
ar2.push(v);
}
}
}
}
return ar2;
}, []);
ar1.push(temp);
return ar1;
}, []);
To:
const values = content.reduce((ar1, e) => {
const temp = e.getChildren().reduce((ar2, f, j) => {
if (f) {
if (f.getChild("a")) {
const t = f.getChild("a").getValue();
if (t) ar2.push(t);
} else if (f.getChild("span")) {
ar2.push(f.getValue());
} else {
const v = f.getValue();
if (v && [5, 6, 7, 8, 15].includes(j)) {
ar2.push(v);
}
}
}
return ar2;
}, []);
ar1.push(temp);
return ar1;
}, []);

"Debugging connection was closed: Render process was gone", when trying to download a 7gb from cdn

We are trying to download a 7 GB from CDN using JSZip.js. The chrome browser suddenly seems to close the connection when the download reaches 3.5gb every time. The approximate time is around 15 mins. Is there a way we increase the tolerant time to 1 hr say?
$("#downloadJSZip").on('click', function () {
var result = [{ "cdn": "url....", "filename": "7.84 gb.zip", "size": 4194304, "path": "7.84 gb" }];
var Promise = window.Promise;
if (!Promise) {
Promise = JSZip.external.Promise;
}
function urlToPromise(url) {
return new Promise(function(resolve, reject) {
JSZipUtils.getBinaryContent(url, function (err, data) {
if(err) {
reject(err);
} else {
resolve(data);
}
});
});
}
var fileNameArray = [];
function changeFileName(fileName,j){
var i = fileName.lastIndexOf('.');
var newfilename = fileName.slice(0,i)+"--"+j+fileName.slice(i);
if(fileNameArray.indexOf(newfilename) != -1){
j = j+1;
changeFileName(fileName,j);
}
return newfilename;
}
var zip = new JSZip();
// find every checked item
result.forEach(function(file){
var filename = file.filename;
if(fileNameArray.indexOf(filename) != -1){
var newfilename = changeFileName(filename,1);
filename = newfilename;
}
fileNameArray.push(filename);
var url = file.cdn;
var folder = (file.path);
zip.folder(folder).file(filename, urlToPromise(url), {binary:true});
// zip.file(filename, urlToPromise(url), {binary:true});
});
// when everything has been downloaded, we can trigger the dl
zip.generateAsync({type:"blob",
}, function updateCallback(metadata) {
var msg = "progression : " + metadata.percent.toFixed(2) + " %";
if(metadata.currentFile) {
msg += ", current file = " + metadata.currentFile;
}
console.log(msg);
console.log(metadata.percent|0);
})
.then(function callback(blob) {
// see FileSaver.js
//console.log("blob=====>");
//console.dir(blob);
//saveAs(blob, "example.zip") ;
saveAs(blob, $scope.folderName+".zip") ;
//console.log("done !");
}, function (e) {
});
});
Is this chrome browser configuration?

nodejs: parsing chunks of json

I created a test server that sends chunks of stringified JSON. When I connect to the server it sends invalid JSON and for the life of me I can't figure out why. The output adds an extra double quotation mark.
Server code:
const net = require('net'),
server = net.createServer(function(connection) {
console.log('subscriber connected.');
// send first chunk immediately
connection.write('{"type":"changed","file":"targ"');
let timer = setTimeout(function() {
connection.write('et.txt","timestamp":1358175758495}' + '\n');
connection.end();
}, 1000);
connection.on('end', function() {
clearTimeout(timer);
console.log('subscriber disconnected');
});
});
server.listen(5432, function() {
console.log('test server listening for subs...')
});
ldj.js
'use strict';
const
events = require('events'),
util = require('util'),
// client constructor
LDJClient = function(stream) {
events.EventEmitter.call(this);
let self = this;
let buffer = '';
stream.on('data', function(data) {
buffer += data;
console.log(buffer)
let boundary = buffer.indexOf('\n');
while(boundary !== -1) {
let input = buffer.substr(0, boundary);
buffer = buffer.substr(boundary + 1);
//self.emit('message', JSON.parse(input));
boundary = buffer.indexOf('\n');
}
});
};
util.inherits(LDJClient, events.EventEmitter);
// expose module methods
exports.LDJClient = LDJClient;
exports.connect = function(stream) {
return new LDJClient(stream);
};
Output:
{"type":"changed","file":"targ"
{"type":"changed","file":"targ"et.txt","timestamp":1358175758495}
That extra " should not be in "target.txt" value. Any ideas?
TIA
rathern than splitting string manualy try to get whole string and split it to chunks and then send it:
var data = '{"type":"changed","file":"target.txt","timestamp":1358175758495}';
var chunks = data.match(/.{1,10}/g); // 10 symbol chunks
for(var i = 0; i < chunks.length; i++) {
var chunk = chunks[i];
setTimeout(function() {
if(connection) {
connection.write(chunk+'\n');
if(i + 1 == chunks.length) {
connection.end();
}
}
}, i*1000);
}
connection.on('end', function() {
console.log('subscriber disconnected');
});

JSON document mysteriously "empties"

I have written a small script in Node.js to scrape a web page and get the some links. The scrapping part is done with Cheerio. My code is here (simplified for space):
var request = require('request');
var cheerio = require('cheerio');
var base_url = 'http://www.naftemporiki.gr/finance/';
var mutuals = {};
mutuals.date = new Date();
mutuals.companies = [];
var company = {};
request(base_url + 'mtfCompanies', function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.blueRow.texttd.name a').each(function (i, element) {
var a = $(this);
company = {};
company.name = a.text();
company.link = a.attr('href');
mutuals.companies.push(company);
});
}
//console.log(mutuals); // 1st place
});
console.log(mutuals); // 2nd place
Here comes the fun part: When I try to output the JSON document from the "1st place", inside the 'request' block, it comes out nice and true. An example is here:
{ date: Wed Nov 26 2014 10:35:09 GMT+0200 (EET),
companies:
[ { name: ' J.P. MORGAN ASSET MANAGEMENT',
link: 'mtfCompany?id=J.P.+MORGAN+ASSET+MANAGEMENT' },
{ name: ' BNP PARIBAS INVESTMENT PARTNERS',
link: 'mtfCompany?id=BNP+PARIBAS+INVESTMENT+PARTNERS' },
{ name: ' PICTET', link: 'mtfCompany?id=PICTET' },
{ name: ' ALLIANZ ΑΕΔΑΚ',
link: 'mtfCompany?id=ALLIANZ+%ce%91%ce%95%ce%94%ce%91%ce%9a' },
{ name: ' ALLIANZ ΑΕΔΑΚ (ΑΝΤΙΠΡ.)',
link: 'mtfCompany?id=ALLIANZ+%ce%91%ce%95%ce%94%ce%91%ce%9a+(%ce%91%ce%9d%ce%a4%ce%99%ce%a0%ce%a1.)' },
{ name: ' ALLIANZ ΕΛΛΑΣ Α.Ε.',
link: 'mtfCompany?id=ALLIANZ+%ce%95%ce%9b%ce%9b%ce%91%ce%a3+%ce%91.%ce%95.' }]}
When I try to output the JSON document from the "2nd place", outside of ANY block and at the end of execution, this is what I get:
{ date: Wed Nov 26 2014 10:35:09 GMT+0200 (EET), companies: [] }
It looks like the 'companies' array inside the JSON document gets emptied. I have a suspicion that the 'mutuals.companies = [];' line gets executed again for some reason.
Can anyone help with this?
UPDATE 1:
Changed my code as suggested to use 'async.series...'. This is the updated version:
var request = require('request'),
async = require('async'),
cheerio = require('cheerio');
var base_url = 'http://www.naftemporiki.gr/finance/';
var mutuals = {};
mutuals.date = new Date();
mutuals.companies = [];
var company = {};
async.series([
function(callback) {
request(base_url + 'mtfCompanies', function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.blueRow.texttd.name a').each(function (i, element) {
var a = $(this);
company = {};
company.name = a.text();
company.link = a.attr('href');
mutuals.companies.push(company);
});
}
});
callback(null, 'one');
},
function (callback) {
console.log(mutuals);
callback(null, 'two');
}
]);
Still does not work. Still the JSON outputted is this:
{ date: Wed Nov 26 2014 10:35:09 GMT+0200 (EET), companies: [] }
Your "2nd place" is printing the variable before the request finishes.
Your "1st place" works because it's located inside the callback of the request. The request is made, the data is pulled, the callback then gets called and is successfully printed.
This is the way asynchronous code works. Nothing blocks. So when you issue your request, node stores the callback function so that it can execute code with the results of the request.
Update 1:
The issue with your update is mostly the same. In your first function in the series, the callback gets called before request has finished. If you move callback into the function passed to request then it gets called after request finishes.
function(callback) {
request(base_url + 'mtfCompanies', function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.blueRow.texttd.name a').each(function (i, element) {
var a = $(this);
company = {};
company.name = a.text();
company.link = a.attr('href');
mutuals.companies.push(company);
});
callback(null, 'one');
}
});
},
Suggestion 1
Developing in node.js with callbacks can leave you with a deep nested structure. Don't let your if statements make nesting worse. Use early returns instead of deeper nesting. Example:
function(callback) {
request(base_url + 'mtfCompanies', function (error, response, html) {
if(error) return callback(error);
if(response.statusCode !== 200) return callback('status code not 200');
var $ = cheerio.load(html);
$('.blueRow.texttd.name a').each(function (i, element) {
var a = $(this);
company = {};
company.name = a.text();
company.link = a.attr('href');
mutuals.companies.push(company);
});
callback(null, 'one');
});
},
Suggestion 2
When using async it can help to simplify things by using named functions. Example:
var request = require('request'),
async = require('async'),
cheerio = require('cheerio');
var base_url = 'http://www.naftemporiki.gr/finance/';
var mutuals = {};
mutuals.date = new Date();
mutuals.companies = [];
var company = {};
function getPage(callback) {
request(base_url + 'mtfCompanies', function (error, response, html) {
if(error) return callback(error);
if(response.statusCode !== 200) return callback('status code not 200');
var $ = cheerio.load(html);
$('.blueRow.texttd.name a').each(function (i, element) {
var a = $(this);
company = {};
company.name = a.text();
company.link = a.attr('href');
mutuals.companies.push(company);
});
callback(null, 'one');
});
}
function printMutuals(callback) {
console.log(mutuals);
callback(null, 'two');
}
async.series([
getPage,
printMutuals
]);

Use slice function to slice JSON object

I want to slice a JSON array, but get the following error:
Object # has no method 'slice'
The following is my code:
$scope.getPagedDataAsync = function (pageSize, page, searchText) {
setTimeout(function () {
var data;
programService.query({
id: $routeParams.id
}, function (result) {
data = {
'program': result
};
data = JSON.stringify(data);
data = JSON.parse(data);
$scope.setPagingData(data,page,pageSize);
});
}, 100);
};
$scope.setPagingData = function(data, page, pageSize){
var pagedData = data.slice(0, 3);
$scope.myData = pagedData;
$scope.totalServerItems = data.length;
if (!$scope.$$phase) {
$scope.$apply();
}
};
JSON data:
{programId:1,
programName:project1,
programContent:content1,
programStartDate:2012-01-01,
templateId: '1'}
I want to slice the array as follows: programId, 1, programName, project1, ...
I am so confused , please help.
Try
var data = {
programId:1,
programName:'project1',
programContent:'content1',
programStartDate:'2012-01-01',
templateId: '1'
}
var array = [];
for(var key in data){
if(!data.hasOwnProperty(key)){
continue;
}
array.push(key, data[key])
}
console.log(array, array.slice(0, 4))