Node.js request starvation during large IO - json

I have a node application that utilizes streams to read and process data from a DB.
I have a reader stream that makes a request via ZeroMQ and as it receives the data it pushes the data out to the next stream in the pipe.
The second stream will write the JSON data to a file and then pass the data on.
The final stream will convert the JSON data to a CSV and then write out a CSV file.
What I'm noticing is that when I'm receiving a "large" amount of data from the DB (over 10k rows and about 2MB of uncompressed raw data) that this process takes a considerable amount of time (~20 seconds). And during that 20 seconds, other requests are starved and can not complete.
Does this sound right? Is the there a way to relinquish the thread to allow it to do other work while the stream of data is being read/written? Or is there a better approach to handling this file I/O?
EDIT FOR CODE
function FirstStream(msg) {
stream.Readable.call(this, { objectMode: true });
this.request = msg;
this._read = function() {
var self = this;
DBRequest.send(self.request).then(function(json) {
json.Body.messages.forEach(function(item) {
self.push(JSON.stringify(item));
});
self.push(null);
});
};
}
util.inherits(FirstStream, stream.Readable);
function SecondStream(filename, maxFileSize) {
stream.Transform.call(this, { objectMode: true });
this.filename = filename;
this.jsonArray = [];
this.buf = '';
this.bufferTooBig = false;
this.id = 0;
this.maxFileSize = maxFileSize || MB;
// Buffers JSON data, if the Buffer gets too large, then don't bother writing the JSON file
this._write = function(chunk, encoding, done) {
// If our buffer is too large, don't worry about caching more data
if(!this.bufferTooBig) {
var json = JSON.parse(chunk);
this.jsonArray.push(json);
this.buf = new Buffer(JSON.stringify(this.jsonArray));
// If the filesize is going to be over our Max filesize, then forget about it
if(this.buf.length > this.maxFileSize) {
fs.unlink(filename, function(err) { });
this.jsonArray = [];
this.buf = '';
this.bufferTooBig = true;
}
}
// Pass the data on to the next stream
this.push(chunk);
done();
};
this._flush = function(done) {
// If the filesize is within reason, then write out the file
if(!this.bufferTooBig) {
fs.writeFile(filename, this.buf.toString(), function(err) {
if(err) {
throw err;
}
done();
});
} else {
done();
}
};
}
util.inherits(SecondStream, stream.Transform);
function ThirdStream(filename) {
stream.Transform.call(this, { objectMode: true });
this.fileStream = fs.createWriteStream(filename);
this._write = function(chunk, encoding, done) {
this.fileStream.write(csvMessage);
this.push(csvMessage);
done();
};
this._flush = function(done) {
this.fileStream.end();
done();
};
}
util.inherits(ThirdStream, stream.Transform);
// USE CASE
var backendStream = new FirstStream(request)
.pipe(new SecondStream(jsonFileName))
.pipe(new ThirdStream(csvFileName))
.on('finish', function() { /* write response back to client */ });

Related

Read individual JSON from log file containing multiple lines of JSON

I'm trying to read a log file where each entry is a line of JSON(JSON structured text).
What I ultimately hope to do is iterate over each line and if
"Event":"SparkListenerTaskEnd"
is found that JSON line will be parsed for the values of keys "Finish Time" and "Executor CPU Time".
I'm new to node.js so not may be completely wrong but so far I've got this block of code for iterating through the file:
exports.getId(function(err, id){
console.log(id);
var data = fs.readFileSync('../PC Files/' + id, 'utf8', function(err, data) {
var content = data.split('\n');
async.map(content, function (item, callback) {
callback(null, JSON.parse(item));
}, function (err, content) {
console.log(content);
});
});
//console.log(data);
});
This doesn't seem to be doing anything though. However, I know the log file can be read as I can see it if I uncomment //console.log(data);.
Below is an example JSON line that I'm talking about:
{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1514983570810,"Executor ID":"0","Host":"192.168.111.123","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1514983574496,"Failed":false,"Killed":false,"Accumulables":[{"ID":22,"Name":"internal.metrics.input.recordsRead","Update":99171,"Value":99171,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.shuffle.write.writeTime","Update":5893440,"Value":5893440,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":3872,"Value":3872,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":1468516,"Value":1468516,"Internal":true,"Count Failed Values":true},{"ID":10,"Name":"internal.metrics.peakExecutionMemory","Update":16842752,"Value":16842752,"Internal":true,"Count Failed Values":true},{"ID":9,"Name":"internal.metrics.diskBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":8,"Name":"internal.metrics.memoryBytesSpilled","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":7,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":6,"Name":"internal.metrics.jvmGCTime","Update":103,"Value":103,"Internal":true,"Count Failed Values":true},{"ID":5,"Name":"internal.metrics.resultSize","Update":2597,"Value":2597,"Internal":true,"Count Failed Values":true},{"ID":4,"Name":"internal.metrics.executorCpuTime","Update":1207164005,"Value":1207164005,"Internal":true,"Count Failed Values":true},{"ID":3,"Name":"internal.metrics.executorRunTime","Update":2738,"Value":2738,"Internal":true,"Count Failed Values":true},{"ID":2,"Name":"internal.metrics.executorDeserializeCpuTime","Update":542927064,"Value":542927064,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"internal.metrics.executorDeserializeTime","Update":835,"Value":835,"Internal":true,"Count Failed Values":true}]},"Task Metrics":{"Executor Deserialize Time":835,"Executor Deserialize CPU Time":542927064,"Executor Run Time":2738,"Executor CPU Time":1207164005,"Result Size":2597,"JVM GC Time":103,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":1468516,"Shuffle Write Time":5893440,"Shuffle Records Written":3872},"Input Metrics":{"Bytes Read":0,"Records Read":99171},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[{"Block ID":"broadcast_1_piece0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":false,"Replication":1},"Memory Size":5941,"Disk Size":0}},{"Block ID":"broadcast_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":true,"Replication":1},"Memory Size":9568,"Disk Size":0}},{"Block ID":"broadcast_0_piece0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":false,"Replication":1},"Memory Size":25132,"Disk Size":0}},{"Block ID":"broadcast_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Deserialized":true,"Replication":1},"Memory Size":390808,"Disk Size":0}}]}}
Update
Here is my whole code. I'm sure it's not pretty but it works. I'll now look at improving it.
var http = require("http");
var fs = require('fs');
var async = require('async');
var readline = require('readline')
//get file name
var options = {
"method" : "GET",
"hostname" : "xxx.xxx.xxx.xxx",
"port" : "18080",
"path" : "/api/v1/applications/"
};
exports.getId = function(callback) {
var req = http.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function () {
var body = JSON.parse(Buffer.concat(chunks));
var arrFound = Object.keys(body).filter(function(key) {
if (body[key].name.indexOf("TestName") > -1) {
return body[key].name;
}
}).reduce(function(obj, key){
obj = body[key].id;
return obj;
}, {});;
//console.log("ID: ", arrFound);
callback(null, arrFound);
});
});
req.end();
}
// parse log file line at a time and for any use lines where the Event = SparkListenerTaskEnd
exports.getId(function(err, id){
console.log(id);
var lineReader = readline.createInterface({
input: fs.createReadStream('../PC Files/' + id, 'utf8')
});
lineReader.on('line', function (line) {
var obj = JSON.parse(line);
if(obj.Event == "SparkListenerTaskEnd") {
console.log('Line from file:', obj['Task Info']['Finish Time']);
}
});
});
Adam, I tried your suggested code but got the following error:
null
fs.js:646
return binding.open(pathModule._makeLong(path), stringToFlags(flags), mode);
^
Error: ENOENT: no such file or directory, open '../PC Files/null'
at Object.fs.openSync (fs.js:646:18)
at Object.fs.readFileSync (fs.js:551:33)
at /test-runner/modules/getEventLog.js:61:19
at IncomingMessage.<anonymous> (/test-runner/modules/getEventLog.js:35:13)
at emitNone (events.js:111:20)
at IncomingMessage.emit (events.js:208:7)
at endReadableNT (_stream_readable.js:1056:12)
at _combinedTickCallback (internal/process/next_tick.js:138:11)
at process._tickCallback (internal/process/next_tick.js:180:9)
At first glance, it appears you are using callbacks incorrectly.
I assume you are using the getId function like:
getId(function(error, data) {
// Do something with data
}
In which case, the callback function should be returned like:
// Remove the error, this will not be entered as a parameter
// Add callback as parameter
exports.getId(function(id, callback){
console.log(id);
var data = fs.readFileSync('../PC Files/' + id, 'utf8', function(err, data) {
var content = data.split('\n');
// Removed callback from here
// We will not have access to the
// to it here
async.map(content, function (item) {
callback(null, JSON.parse(item));
// Add callback with error in place of null
}, function (err, content) {
callback(err)
console.log(content);
});
});
//console.log(data);
});

How to dynamically read external json files in node.js?

I am creating a website that reads externally hosted json files and then uses node.js to populate the sites content.
Just to demonstrate what I'm after, this is a really simplified version of what I'm trying to do in node.js
var ids = [111, 222, 333];
ids.forEach(function(id){
var json = getJSONsomehow('http://www.website.com/'+id+'.json');
buildPageContent(json);
});
Is what I want to do possible?
(Marked as a duplicate of "How do I return the response from an asynchronous call?" see my comment below for my rebuttal)
You are trying to get it synchronously. What you should aim for instead, is not a function used like this:
var json = getJSONsomehow('http://www.website.com/'+id+'.json');
but more like this:
getJSONsomehow('http://www.website.com/'+id+'.json', function (err, json) {
if (err) {
// error
} else {
// your json can be used here
}
});
or like this:
getJSONsomehow('http://www.website.com/'+id+'.json')
.then(function (json) {
// you can use your json here
})
.catch(function (err) {
// error
});
You can use the request module to get your data with something like this:
var request = require('request');
var url = 'http://www.website.com/'+id+'.json';
request.get({url: url, json: true}, (err, res, data) => {
if (err) {
// handle error
} else if (res.statusCode === 200) {
// you can use data here - already parsed as json
} else {
// response other than 200 OK
}
});
For a working example see this answer.
For more info see: https://www.npmjs.com/package/request
I think problem is in async request. Function will return result before request finished.
AJAX_req.open( "GET", url, true );
Third parameter specified async request.
You should add handler and do all you want after request finished.
For example:
function AJAX_JSON_Req( url ) {
var AJAX_req = new XMLHttpRequest.XMLHttpRequest();
AJAX_req.open( "GET", url, true );
AJAX_req.setRequestHeader("Content-type", "application/json");
AJAX_req.onreadystatechange = function() {
if (AJAX_req.readyState == 4 && AJAX_req.status == 200) {
console.log(AJAX_req.responseText);
}
};
}

How to write and read a json file in ionic

I'm using ionic to make an app and I need store some data and read, I don't know how to do, you can give me some directions? So... be more specific... In this app have an option to see the history of order(is a buy app), so everytime an user make an order I need save and when he wish he can see all order what he makes, to do this I need load all json from storage and show, and i dont know how to save of the right way to read dynamically after?
You can use $cordovaFile services :
const fileName = "orders.json"
var getUserOrders = function () {
var d = $q.defer(),
userOrders;
$cordovaFile.checkFile(cordova.file.dataDirectory, fileName).then(
function (success) {
$cordovaFile.readAsText(cordova.file.dataDirectory, fileName).then(
function (data) {
d.resolve(JSON.parse(data));
}, function (error) {
...
});
}, function (error) {
// No orders saved
d.resolve([]);
}
);
};
var saveAnOrder = function (order) {
var d = $q.defer(),
orderToSave = order;
getUserOrders().then(
function (data) {
var userOrders = data;
userOrders.push(orderToSave);
$cordovaFile.writeFile(cordova.file.dataDirectory, fileName, JSON.stringify(userOrders), true).then(
function (success) {
d.resolve(userOrders);
}, function (error) {
...
});
}, function (error) {
...
}
);
};

Call multiple JSON data/files in one getJson request

I have this code:
var graphicDataUrl = 'graphic-data.json';
var webDataUrl = 'web-data.json';
var templateHtml = 'templating.html';
var viewG = $('#view-graphic');
var viewW = $('#view-web');
$.getJSON(dataUrls, function(data) {
$.get(templateHtml, function(template) {
template = Handlebars.compile(template);
var example = template({ works: data });
viewG.html(example);
viewW.html(example);
});
});
What is the best way for call both webDataUrl and graphicDataUrl JSONs and use their data in order to display them in two different div (#viewG and #viewW)?
The best way is to do each one individually, and to handle error conditions:
$.getJSON(graphicDataUrl)
.then(function(data) {
// ...worked, put it in #view-graphic
})
.fail(function() {
// ...didn't work, handle it
});
$.getJSON(webDataUrl, function(data) {
.then(function(data) {
// ...worked, put it in #view-web
})
.fail(function() {
// ...didn't work, handle it
});
That allows the requests to happen in parallel, and updates the page as soon as possible when each request completes.
If you want to run the requests in parallel but wait to update the page until they both complete, you can do that with $.when:
var graphicData, webData;
$.when(
$.getJSON(graphicDataUrl, function(data) {
graphicData = data;
}),
$.getJSON(webDataUrl, function(data) {
webData = data;
})
).then(function() {
if (graphicData) {
// Worked, put graphicData in #view-graphic
}
else {
// Request for graphic data didn't work, handle it
}
if (webData) {
// Worked, put webData in #view-web
}
else {
// Request for web data didn't work, handle it
}
});
...but the page may seem less responsive since you're not updating when the first request comes back, but only when both do.
Just in case it is useful to anyone else who may come across this — and thanks to the Promise advances in jQuery — T.J. Crowder's answer can now be improved into one succinct and general function:
/**
* Load multiple JSON files.
*
* Example usage:
*
* jQuery.getMultipleJSON('file1.json', 'file2.json')
* .fail(function(jqxhr, textStatus, error){})
* .done(function(file1, file2){})
* ;
*/
jQuery.getMultipleJSON = function(){
return jQuery.when.apply(jQuery, jQuery.map(arguments, function(jsonfile){
return jQuery.getJSON(jsonfile);
})).then(function(){
var def = jQuery.Deferred();
return def.resolve.apply(def, jQuery.map(arguments, function(response){
return response[0];
}));
});
};
However the point about not giving any feedback to the user — whilst waiting for the full load — is a good one. So for those that prefer to give responsive feedback, here's a slightly more complicated version that supports progress.
/**
* Load multiple json files, with progress.
*
* Example usage:
*
* jQuery.getMultipleJSON('file1.json', 'file2.json')
* .progress(function(percent, count, total){})
* .fail(function(jqxhr, textStatus, error){})
* .done(function(file1, file2){})
* ;
*/
jQuery.getMultipleJSON = function(){
var
num = 0,
def = jQuery.Deferred(),
map = jQuery.map(arguments, function(jsonfile){
return jQuery.getJSON(jsonfile).then(function(){
def.notify(1/map.length * ++num, num, map.length);
return arguments;
});
})
;
jQuery.when.apply(jQuery, map)
.fail(function(){ def.rejectWith(def, arguments); })
.done(function(){
def.resolveWith(def, jQuery.map(arguments, function(response){
return response[0];
}));
})
;
return def;
};
This code is simple and you can access both response together in one function:
$.when(
$.getJSON(graphicDataUrl),
$.getJSON(webDataUrl)
).done(function(data1, data2) {
console.log(data1[0]);
console.log(data2[0]);
});

Node.js - Can I store writeable streams as JSON in Redis?

I am still working on fully understanding streams in node.js. If I create a writable stream, would I be able able to store the stream object as JSON in Redis, and then access it later, and continue writing to it (after JSON.parse)?
example:
var fs = require( 'fs' );
var redis = require( 'redis' );
var streamName = fs.createWriteStream(upfilePath, streamopts);
streamName = JSON.stringify(streamName);
rclient.set('streamJSON', streamName);
....
var myNewdata = 'whatever';
rclient.get('streamJSON', function (err, streamJSON) {
var recoveredStream = JSON.parse(streamJSON);
recoveredStream.write(myNewdata, function (err, written, buffer) {
//write successful??
}
}
You can't store variable references on redis. You would only need to store the filename, then reopen the stream with the a flag which allows you to append data to it.
I thought this was pretty an interesting question and created this that allows you to save the state of a stream and then use it later. But I don't see the point if you can just use the a flag. Might be useful for ReadableStreams though.
var fs = require('fs');
exports.stringify = function(stream) {
var obj = {
path: stream.path
, writable: stream.writable
, fd: stream.fd
, options: {
encoding: stream.encoding
, mode: stream.mode
}
};
if (stream.writable) {
obj.bytesWritten = stream.bytesWritten;
} else {
obj.options.bufferSize = stream.bufferSize;
obj.bytesRead = stream.bytesRead;
}
return JSON.stringify(obj);
};
exports.parse = function(json, callback) {
var obj = JSON.parse(json);
var stream;
if (obj.writable) {
obj.options.flags = 'a';
stream = fs.createWriteStream(obj.path, obj.options);
stream.bytesWritten = obj.bytesWritten;
} else {
stream = fs.createReadStream(obj.path, obj.options);
stream.bytesRead = obj.bytesRead;
}
// if stream was already opened, wait until it is
if (obj.fd !== null) {
stream.on('open', function() {
callback(null, stream);
});
} else {
process.nextTick(function() {
callback(null, stream);
});
}
return stream;
};