I am writing a backend API in node.js and need the functionality for users to be able to upload files with data and then calling stored procedures for inserting data into MySQL. I'm thinking of using fast-csv as parser, however I am struggling with how to set up the call to stored procedure in csv stream. the idea is something like this:
var fs = require("fs");
var csv = require("fast-csv");
var stream1 = fs.createReadStream("files/testCsvFile.csv");
csv
.fromStream(stream2, { headers: true })
.on("data", function(data) {
//CALL TO SP with params from "data"//
numlines++;
})
.on("end", function() {
console.log("done");
});
In other parts of application I have set up routes as follows:
auth.post("/verified", async (req, res) => {
var user = req.session.passwordless;
if (user) {
const rawCredentials = await admin.raw(getUserRoleCredentials(user));
const { user_end, role } = await normalizeCredentials(rawCredentials);
const user_data = { user_end, role };
res.send(user_data);
} else {
res.sendStatus(401);
}
});
..that is - routes are written in async/await way with queries (all are Stored Procedures called) being defined as Promises.. I would like to follow this pattern in upload/parse csv/call SP for every line function
This is doing the job for me - - can you please describe how to achive that with your framework - - I believe it should be done somehowe, I just need to configure it correctli
//use fast-csv to stream data from a file
csv
.fromPath(form.FileName, { headers: true })
.on("data", async data => {
const query = await queryBuilder({
schema,
routine,
parameters,
request
}); //here we prepare query for calling the SP with parameters from data
winston.info(query + JSON.stringify(data));
const rawResponse = await session.raw(query); //here the query gets executed
fileRows.push(data); // push each row - for testing only
})
.on("end", function() {
console.log(fileRows);
fs.unlinkSync(form.FileName); // remove temp file
//process "fileRows" and respond
res.end(JSON.stringify(fileRows)) // - for testing
});
As mentioned in the comment, I made my scramjet to handle such a use case with ease... Please correct me if I understood it wrong, but I understand you want to call the two await lines for every CSV row in the test.
If so, your code would look like this (updated to match your comment/answer):
var fs = require("fs");
var csv = require("fast-csv");
var stream1 = fs.createReadStream("files/testCsvFile.csv");
var {DataStream} = require("scramjet");
DataStream
// the following line will convert any stream to scramjet.DataStream
.from(csv.fromStream(stream2, { headers: true }))
// the next lines controls how many simultaneous operations are made
// I assumed 16, but if you're fine with 40 or you want 1 - go for it.
.setOptions({maxParallel: 16})
// the next line will call your async function and wait until it's completed
// and control the back-pressure of the stream
.do(async (data) => {
const query = await queryBuilder({
schema,
routine,
parameters,
request
}); //here we prepare query for calling the SP with parameters from data
winston.info(query + JSON.stringify(data));
const rawResponse = await session.raw(query); //here the query gets executed
return data; // push each row - for testing only)
})
// next line will run the stream until end and return a promise
.toArray()
.then(fileRows => {
console.log(fileRows);
fs.unlinkSync(form.FileName); // remove temp file
//process "fileRows" and respond
res.end(JSON.stringify(fileRows)); // - for testing
})
.catch(e => {
res.writeHead(500); // some error handling
res.end(e.message);
})
;
// you may want to put an await statement before this, or call then to check
// for errors, which I assume is your use case.
;
To answer your comment question - if you were to use an async function in the on("data") event - you would need to create an array of promises and await Promise.all of that array on stream end - but that would need to be done synchronously - so async function in an event handler won't do it.
In scramjet this happens under the hood, so you can use the function.
Related
Im very new to Redis but it seems like somthing my program need to work faster.
I have build my whole database with mongoose/mongodbAtlas.
But is there a way to update one item in the object I got from the database and set in cache. I want to update a location in the setted redis key many times and only need to save the last updated location to the actual database.
So far I have some code to get 1 object from the database and store it in redis but I want to implement the updating part in this function as it is used for the PUT request to update a persons location every second
const updateLocation = async (req, res) => {
const { id} = req.params;
if (!redisClient.isOpen) {
await redisClient.connect()
console.log('connected')
}
const value = await redisClient.get(`person-${id}`)
if (value) {
res.json(value)
// Here I would like to update the documents location everytime
//this endpoint is called from frontend
} else {
const res = await Person.findById(id);
await redisClient.set(`person-${id}`, res);
console.log("from source data")
res.status(200).json(res);
}
};
gulp.task('default', function(done) {
inquirer.prompt([{
type: `input`,
message: `Enter the path`,
default: `./admin/admin.json`,
name: `path`
}]).then(function(answers) {
console.log(answers.path);
console.log('answers');
mydefaultTaskTwo(null, answers.path).pipe(pipedFunction());
done();
})
});
function mydefaultTaskTwo(cb, path) {
let data = '';
try {
data = fs.readFileSync(path, 'utf-8');
} catch (e) {
console.log(`Error: ${e}`);
}
return data;
}
function pipedFunction() {
let object = JSON.parse(data);
object['main'] = 'admin';
data = JSON.stringify(object);
const readable = Readable.from(data)
return readable;
}
I understand that src returns a stream and pipe takes that stream and return a stream, but how do you feed in the stream into the pipedFunction called inside of pipe? I am unsure how it works. I get the following error:
ReferenceError: data is not defined.
Is there something I am misunderstanding about gulp scripts?
Basically you define data as a local scope-level variable and try to reach it from a different scope, where it's undefined. So, you need to make use of the fact that data is returned and pass it, like:
var data = mydefaultTaskTwo(null, answers.path);
data.pipe(pipedFunction(data));
I am trying to insert couple of millions records (with approximately 6 fields/columns) by receiving in requests from clients 10,000 records per bulk insert attempt (using sequelize.js and bulkCreate())
This obviously was a bad idea, so I tried looking into node-pg-copy-streams
However, I do not want to initiate a change on the client side, where a json array is sent as such
# python
data = [
{
"column a":"a values",
"column b":"b values",
},
...
# 10,000 items
...
]
request.post(data=json.dumps(data), url=url)
On the Server side in nodejs, how would I stream the received request.body in the following skeleton ?
.post(function(req, res){
// old sequelize code
/* table5.bulkCreate(
req.body, {raw:true}
).then(function(){
return table5.findAll();
}).then(function(result){
res.json(result.count);
});*/
// new pg-copy-streams code
pg.connect(function(err, client, done) {
var stream = client.query(copyFrom('COPY my_table FROM STDIN'));
// My question is here, how would I stream or pipe the request body ?
// ?.on('error', done);
// ?.pipe(stream).on('finish', done).on('error', done);
});
});
Here's how I solved my problem,
First a function to convert my req.body dict to a TSV (not a part of the initial problem)
/**
* Converts a dictionary and set of keys to a Tab Separated Value blob of text
* #param {Dictionary object} dict
* #param {Array of Keys} keys
* #return {Concatenated Tab Separated Values} String
*/
function convertDictsToTSV(dicts, keys){
// ...
}
Second the rest of my original .post function
.post(function(req, res){
// ...
/* requires 'stream' as
* var stream = require('stream');
* var copyFrom = require('pg-copy-streams').from;
*/
var read_stream_string = new stream.Readable();
read_stream_string.read = function noop() {};
var keys = [...]; // set of dictionary keys to extract from req.body
read_stream_string.push(convertDictsToTSV(req.body, keys));
read_stream_string.push(null);
pg.connect(connectionString, function(err, client, done) {
// ...
// error handling
// ...
var copy_string = 'Copy tablename (' + keys.join(',') + ') FROM STDIN'
var pg_copy_stream = client.query( copyFrom( copy_string ) );
read_stream_string.pipe(pg_copy_stream).on('finish', function(finished){
// handle finished and done appropriately
}).on('error', function(errored){
// handle errored and done appropriately
});
});
pg.end();
});
Technically, there is no streaming here, not in terms of how NodeJS streaming works.
You are sending a chunk of 10,000 records each time and expect your server-side to insert those and return an OK to the client to send another 10,000 records. That's throttling/paging data in, not streaming.
Once your server has received the next 10,000 records, insert them (usually as a transaction), and then respond with OK back to the client so it can send the next 10,000 records.
Writing transactions with node-postgres isn't an easy task, as it is too low-level for that.
Below is an example of how to do that with the help of pg-promise:
function insertRecords(records) {
return db.tx(t=> {
var inserts = [];
records.forEach(r=> {
var query = t.none("INSERT INTO table(fieldA, ...) VALUES(${propA}, ...)", r);
inserts.push(query);
});
return t.batch(inserts);
});
}
Then inside your HTTP handler, you would write:
function myPostHandler(req, res) {
// var records = get records from the request;
insertRecords(records)
.then(data=> {
// set response as success;
})
.catch(error=> {
// set response as error;
});
}
I am trying to create a simple web application which fires a http.request call, get the data and display it over to the html(ejs here). I am able to fire the request, get the data, massage it etc.. but unable to pass it to the view. Sample code is as below:
var searchData = [];
router.post('/',requesthandler);
function requesthandler(req,res){
var options = {
host: url,
port: 9999,
path: qstring,
method: 'GET'
};
var reqget = http.request(options,responsehandler);
reqget.end();
console.log('Rendering now:............................ ');
res.render('result',{title: 'Results Returned',searchdata : searchData});
}
function responsehandler(ress) {
console.log('STATUS: ' + ress.statusCode);
ress.on('data', function (chunk) {
output += chunk;
console.log('BODY: ' );
});
/* reqget.write(output); */
ress.on('end',parseresponse);
}
function parseresponse(){
var data = JSON.parse(output);
console.log(data.responseHeader);
// populate searchData here from data object
searchData.push({//some data});
}
function errorhandler(e) {
console.error(e);
}
module.exports = router;
Problem is I a unable to pass the objeect searchData to the view via res.render();
'Rendering now: ...........' gets executed before execution starts in parseresponse() and so the page is displayed without the data which seems to be in conjuction with using callbacks, So how can I pass the data object to the view once the searchData is loaded in parseresponse().
PS: I am able to print all console statements
define res variable globally:
var res;
function requesthandler(req,resObj){
res = resObj;//set it to the resObj
}
wrap res.render inside a function like this:
function renderPage(){
res.render('result',{title: 'Results Returned',searchdata : searchData});
}
then in parseresponse function do this:
function parseresponse(){
var data = JSON.parse(output);
searchData.push({some data});
renderPage();
}
Hope this solves your problem.
I'm pretty new to Angular so maybe I'm asking the impossible but anyway, here is my challenge.
As our server cannot paginate JSON data I would like to stream the JSON and add it page by page to the controller's model. The user doesn't have to wait for the entire stream to load so I refresh the view fo every X (pagesize) records.
I found oboe.js for parsing the JSON stream and added it using bower to my project. (bower install oboe --save).
I want to update the controllers model during the streaming. I did not use the $q implementation of pomises, because there is only one .resolve(...) possible and I want multiple pages of data loaded via the stream so the $digest needs to be called with every page. The restful service that is called is /service/tasks/search
I created a factory with a search function which I call from within the controller:
'use strict';
angular.module('myStreamingApp')
.factory('Stream', function() {
return {
search: function(schema, scope) {
var loaded = 0;
var pagesize = 100;
// JSON streaming parser oboe.js
oboe({
url: '/service/' + schema + '/search'
})
// process every node which has a schema
.node('{schema}', function(rec) {
// push the record to the model data
scope.data.push(rec);
loaded++;
// if there is another page received then refresh the view
if (loaded % pagesize === 0) {
scope.$digest();
}
})
.fail(function(err) {
console.log('streaming error' + err.thrown ? (err.thrown.message):'');
})
.done(function() {
scope.$digest();
});
}
};
});
My controller:
'use strict';
angular.module('myStreamingApp')
.controller('MyCtrl', function($scope, Stream) {
$scope.data = [];
Stream.search('tasks', $scope);
});
It all seams to work. After a while however the system gets slow and the http call doesn't terminate after refreshing the browser. Also the browser (chrome) crashes when there are too many records loaded.
Maybe I'm on the wrong track because passing the scope to the factory search function doesn't "feel" right and I suspect that calling the $digest on that scope is giving me trouble. Any ideas on this subject are welcome. Especially if you have an idea on implementing it where the factory (or service) could return a promise and I could use
$scope.data = Stream.search('tasks');
in the controller.
I digged in a little further and came up with the following solution. It might help someone:
The factory (named Stream) has a search function which is passed parameters for the Ajax request and a callback function. The callback is being called for every page of data loaded by the stream. The callback function is called via a deferred.promise so the scope can be update automatically with every page. To access the search function I use a service (named Search) which initially returns an empty aray of data. As the stream progresses the factory calls the callback function passed by the service and the page is added to the data.
I now can call the Search service form within a controller and assign the return value to the scopes data array.
The service and the factory:
'use strict';
angular.module('myStreamingApp')
.service('Search', function(Stream) {
return function(params) {
// initialize the data
var data = [];
// add the data page by page using a stream
Stream.search(params, function(page) {
// a page of records is received.
// add each record to the data
_.each(page, function(record) {
data.push(record);
});
});
return data;
};
})
.factory('Stream', function($q) {
return {
// the search function calls the oboe module to get the JSON data in a stream
search: function(params, callback) {
// the defer will be resolved immediately
var defer = $q.defer();
var promise = defer.promise;
// counter for the received records
var counter = 0;
// I use an arbitrary page size.
var pagesize = 100;
// initialize the page of records
var page = [];
// call the oboe unction to start the stream
oboe({
url: '/api/' + params.schema + '/search',
method: 'GET'
})
// once the stream starts we can resolve the defer.
.start(function() {
defer.resolve();
})
// for every node containing an _id
.node('{_id}', function(node) {
// we push the node to the page
page.push(node);
counter++;
// if the pagesize is reached return the page using the promise
if (counter % pagesize === 0) {
promise.then(callback(page));
// initialize the page
page = [];
}
})
.done(function() {
// when the stream is done make surethe last page of nodes is returned
promise.then(callback(page));
});
return promise;
}
};
});
Now I can call the service from within a controller and assign the response of the service to the scope:
$scope.mydata = Search({schema: 'tasks'});
Update august 30, 2014
I have created an angular-oboe module with the above solution a little bit more structured.
https://github.com/RonB/angular-oboe