How to do a very large query on sails-mongo? - json

I'm using sails 0.11.2. With the latest sails-mongo adapter.
I have a very large database (gigabytes of data) of mainly timestamp and values. And i make queries on it using the blueprint api.
If I query using localhost:1337/datatable?limit=100000000000 the nodejs hangs on 0.12 with a lot of CPU usage, and crashes on v4. It crashes on the toJSON function.
I've finded out that i need to make multiple queries on my API. But I don't how to proceed to make it.
How can i make multiple queries that "don't explode" my server?
Update:
On newer version 0.12.3 with latest waterline and sails-mongo, the queries goes much smoother. The crashes on the cloud was that I didn't had enough RAM to handle sailsjs and mongodb on same T2.micro instance.
I've moved the mongodb server to a M3.Medium instance. And now the server don't crash anymore, but it freezes. I'm using skip limit and it works nicely for sails.js but for mongodb is a great waste of resources!
Mongodb make an internal query using limit = skip + limit. and then moves the cursor to the desired data and returns. When you are making a lot's in pagination you are using lots of internal queries. As the query size will increase.

As this article explains, the way to get around the waste of resources in MongoDB is to avoid using skip and cleverly use _id as part of your query.
I did not use sails mongo but I did implement the idea above by using mongo driver in nodejs:
/**
* Motivation:
* Wanted to put together some code that used:
* - BlueBird (promises)
* - MongoDB NodeJS Driver
* - and paging that did not rely on skip()
*
* References:
* Based on articles such as:
* https://scalegrid.io/blog/fast-paging-with-mongodb/
* and GitHub puclic code searches such as:
* https://github.com/search?utf8=%E2%9C%93&q=bluebird+MongoClient+_id+find+limit+gt+language%3Ajavascript+&type=Code&ref=searchresults
* which yielded smaple code hits such as:
* https://github.com/HabitRPG/habitrpg/blob/28f2e9c356d7053884107d90d04e28dde75fa81b/migrations/api_v3/coupons.js#L71
*/
var Promise = require('bluebird'); // jshint ignore:line
var _ = require('lodash');
var MongoClient = require('mongodb').MongoClient;
var dbHandleForShutDowns;
// option a: great for debugging
var logger = require('tracer').console();
// option b: general purpose use
//var logger = console;
//...
var getPage = function getPage(db, collectionName, query, projection, pageSize, processPage) {
//console.log('DEBUG', 'filter:', JSON.stringify(query,null,2));
projection = (projection) ? projection['_id']=true : {'_id':true};
return db
.collection(collectionName)
.find(query)
.project(projection)
.sort({'_id':1}).limit(pageSize)
.toArray() // cursor methods return promises: http://mongodb.github.io/node-mongodb-native/2.1/api/Cursor.html#toArray
.then(function processPagedResults(documents) {
if (!documents || documents.length < 1) {
// stop - no data left to traverse
return Promise.resolve();
}
else {
if (documents.length < pageSize) {
// stop - last page
return processPage(documents);
}
else {
return processPage(documents) // process the results of the current page
.then(function getNextPage(){ // then go get the next page
var last_id = documents[documents.length-1]['_id'];
query['_id'] = {'$gt' : last_id};
return getPage(db, collectionName, query, projection, pageSize, processPage);
});
}
}
});
};
//...
return MongoClient
.connect(params.dbUrl, {
promiseLibrary: Promise
})
.then(function(db) {
dbHandleForShutDowns = db;
return getPage(db, collectionName, {}, {}, 5, function processPage(pagedDocs){console.log('do something with', pagedDocs);})
.finally(db.close.bind(db));
})
.catch(function(err) {
console.error("ERROR", err);
dbHandleForShutDowns.close();
});
The following two sections show how the code manipulates _id and makes it part of the query:
.sort({'_id':1}).limit(pageSize)
// [...]
var last_id = documents[documents.length-1]['_id'];
query['_id'] = {'$gt' : last_id};
Overall code flow:
Let getPage() handle the work, you can set the pageSize and query to your liking:
return getPage(db, collectionName, {}, {}, 5, function processPage(pagedDocs){console.log('do something with', pagedDocs);})
Method signature:
var getPage = function getPage(db, collectionName, query, projection, pageSize, processPage) {
Process pagedResults as soon as they become available:
return processPage(documents) // process the results of the current page
Move on to the next page:
return getPage(db, collectionName, query, projection, pageSize, processPage);
The code will stop when there is no more data left:
// stop - no data left to traverse
return Promise.resolve();
Or it will stop when working on the last page of data:
// stop - last page
return processPage(documents);
I hope this offers some inspiration, even if its not an exact solution for your needs.

1. run aggregate
const SailsMongoQuery = require('sails-mongo/lib/query/index.js')
const SailsMongoMatchMongoId = require('sails-mongo/lib/utils.js').matchMongoId
const fn = model.find(query).paginate(paginate)
const criteria = fn._criteria
const queryLib = new SailsMongoQuery(criteria, {})
const queryOptions = _.omit(queryLib.criteria, 'where')
const where = queryLib.criteria.where || {}
const queryWhere = Object.keys(where).reduce((acc, key) => {
const val = where[key]
acc[key] = SailsMongoMatchMongoId(val) ? new ObjectID(val) : val
return acc
}, {})
const aggregate = [
{ $match: queryWhere }
].concat(Object.keys(queryOptions).map(key => ({ [`$${key}`]: queryOptions[key] })))
// console.log('roge aggregate --->', JSON.stringify(aggregate, null, 2))
model.native((err, collection) => {
if (err) return callback(err)
collection.aggregate(aggregate, { allowDiskUse: true }).toArray(function (err, docs) {
if (err) return callback(err)
const pk = primaryKey === 'id' ? '_id' : primaryKey
ids = docs.reduce((acc, doc) => [...acc, doc[pk]], [])
callback()
})
})
2. run sails find by id`s
query = Object.assign({}, query, { [primaryKey]: ids }) // check primary key in sails model
fn = model.find(query) // .populate or another method
fn.exec((err, results) => { console.log('result ->>>>', err, results) })

Related

Algolia search with Firebase Cloud Functions returns null

I am currently writing a multi tenancy app. The app stores data in Firestore/Algolia & I have a cloud function to query the index. This is so that a user can only access data of the tenant they belong to.
The search works fine, but the cloud function keeps on returning null, although the logs shows the search results.
I suspect that the object returned by the Algolia JS client cannot be converted to JSON, hence the httpcallable framework is returning null. Below is a snapshot of the code.
I have tried to stringfy the object using JSON.stringify but no luck. Any ideas?
exports.getCustomerSuggestions = functions.region('europe-central2').https.onCall(async (data, context) => {
if(!context.auth){
throw new functions.https.HttpsError(
'unauthenticated',
'Authentication required.'
);
}
const uid = context.auth.uid;
functions.logger.info("Processing request from user " + uid + " with data: " + JSON.stringify(data));
const client = algoliasearch(algoliaAppId, algoliaApiKey);
const index = client.initIndex(angoliaIndex);
userData = await getUserData(uid);
userData = prepForFirestore(userData.data()); // strip object properties
const searchTerm = data.query;
const filters = 'serviceProviderId:' + userData.tenantId;
var result;
index.search(searchTerm, {
filters: filters
}).then(({hits}) => {
result = hits;
functions.logger.info("Returning: " + JSON.stringify(result)); //The log shows the search results
});
return {status: true, data: result}; // data is returned as null
});
since index.search is asynchronous your function returns before the search finishes. You can try to await the index.search call so that it will wait till the call finishes and after that return
const result = await index.search(searchTerm, {
filters: filters
})
return {status: true, data: result.hits}

calling store procedures within fast-csv asynchronously

I am writing a backend API in node.js and need the functionality for users to be able to upload files with data and then calling stored procedures for inserting data into MySQL. I'm thinking of using fast-csv as parser, however I am struggling with how to set up the call to stored procedure in csv stream. the idea is something like this:
var fs = require("fs");
var csv = require("fast-csv");
var stream1 = fs.createReadStream("files/testCsvFile.csv");
csv
.fromStream(stream2, { headers: true })
.on("data", function(data) {
//CALL TO SP with params from "data"//
numlines++;
})
.on("end", function() {
console.log("done");
});
In other parts of application I have set up routes as follows:
auth.post("/verified", async (req, res) => {
var user = req.session.passwordless;
if (user) {
const rawCredentials = await admin.raw(getUserRoleCredentials(user));
const { user_end, role } = await normalizeCredentials(rawCredentials);
const user_data = { user_end, role };
res.send(user_data);
} else {
res.sendStatus(401);
}
});
..that is - routes are written in async/await way with queries (all are Stored Procedures called) being defined as Promises.. I would like to follow this pattern in upload/parse csv/call SP for every line function
This is doing the job for me - - can you please describe how to achive that with your framework - - I believe it should be done somehowe, I just need to configure it correctli
//use fast-csv to stream data from a file
csv
.fromPath(form.FileName, { headers: true })
.on("data", async data => {
const query = await queryBuilder({
schema,
routine,
parameters,
request
}); //here we prepare query for calling the SP with parameters from data
winston.info(query + JSON.stringify(data));
const rawResponse = await session.raw(query); //here the query gets executed
fileRows.push(data); // push each row - for testing only
})
.on("end", function() {
console.log(fileRows);
fs.unlinkSync(form.FileName); // remove temp file
//process "fileRows" and respond
res.end(JSON.stringify(fileRows)) // - for testing
});
As mentioned in the comment, I made my scramjet to handle such a use case with ease... Please correct me if I understood it wrong, but I understand you want to call the two await lines for every CSV row in the test.
If so, your code would look like this (updated to match your comment/answer):
var fs = require("fs");
var csv = require("fast-csv");
var stream1 = fs.createReadStream("files/testCsvFile.csv");
var {DataStream} = require("scramjet");
DataStream
// the following line will convert any stream to scramjet.DataStream
.from(csv.fromStream(stream2, { headers: true }))
// the next lines controls how many simultaneous operations are made
// I assumed 16, but if you're fine with 40 or you want 1 - go for it.
.setOptions({maxParallel: 16})
// the next line will call your async function and wait until it's completed
// and control the back-pressure of the stream
.do(async (data) => {
const query = await queryBuilder({
schema,
routine,
parameters,
request
}); //here we prepare query for calling the SP with parameters from data
winston.info(query + JSON.stringify(data));
const rawResponse = await session.raw(query); //here the query gets executed
return data; // push each row - for testing only)
})
// next line will run the stream until end and return a promise
.toArray()
.then(fileRows => {
console.log(fileRows);
fs.unlinkSync(form.FileName); // remove temp file
//process "fileRows" and respond
res.end(JSON.stringify(fileRows)); // - for testing
})
.catch(e => {
res.writeHead(500); // some error handling
res.end(e.message);
})
;
// you may want to put an await statement before this, or call then to check
// for errors, which I assume is your use case.
;
To answer your comment question - if you were to use an async function in the on("data") event - you would need to create an array of promises and await Promise.all of that array on stream end - but that would need to be done synchronously - so async function in an event handler won't do it.
In scramjet this happens under the hood, so you can use the function.

Using KnexJS to query X number of tables?

I have a unique situation here which I am having trouble solving in an elegant fashion.
A user passes up an array of signals which they want to export data for. This array can be 1 -> Any_Number so first I go fetch the table names (each signal stores data in a separate table) based on the signals passed and store those in an object.
The next step is to iterate over that object (which contains the table names I need to query), execute the query per table and store the results in an object which will be passed to next chain in the Promise. I haven't seen any examples online of good ways to handle this but I know it's a fairly unique scenario.
My code prior to attempting to add support for arrays of signals was simply the following:
exports.getRawDataForExport = function(data) {
return new Promise(function(resolve, reject) {
var getTableName = function() {
return knex('monitored_parameter')
.where('device_id', data.device_id)
.andWhere('internal_name', data.param)
.first()
.then(function(row) {
if(row) {
var resp = {"table" : 'monitored_parameter_data_' + row.id, "param" : row.display_name};
return resp;
}
});
}
var getData = function(runningResult) {
return knexHistory(runningResult.table)
.select('data_value as value', 'unit', 'created')
.then(function(rows) {
runningResult.data = rows;
return runningResult;
});
}
var createFile = function(runningResult) {
var fields = ['value', 'unit', 'created'],
csvFileName = filePathExport + runningResult.param + '_export.csv',
zipFileName = filePathExport + runningResult.param + '_export.gz';
var csv = json2csv({data : runningResult.data, fields : fields, doubleQuotes : ''});
fs.writeFileSync(csvFileName, csv);
// create streams for gZipping
var input = fs.createReadStream(csvFileName);
var output = fs.createWriteStream(zipFileName);
// gZip
input.pipe(gzip).pipe(output);
return zipFileName;
}
getTableName()
.then(getData)
.then(createFile)
.then(function(zipFile) {
resolve(zipFile);
});
});
}
Obviously that works fine for a single table and I have gotten the getTableName() and createFile() methods updated to handle arrays of data so this question only pertains to the getData() method.
Cheers!
This kind of problem is far from unique and, approached the right way, is very simply solved.
Don't rewrite any of the three internal functions.
Just purge the explicit promise construction antipattern from .getRawDataForExport() such that it returns a naturally occurring promise and propagates asynchronous errors to the caller.
return getTableName()
.then(getData)
.then(createFile);
Now, .getRawDataForExport() is the basic building-block for your multiple "gets".
Then, a design choice; parallel versus sequential operations. Both are very well documented.
Parallel:
exports.getMultiple = function(arrayOfSignals) {
return Promise.all(arrayOfSignals.map(getRawDataForExport));
};
Sequential:
exports.getMultiple = function(arrayOfSignals) {
return arrayOfSignals.reduce(function(promise, signal) {
return promise.then(function() {
return getRawDataForExport(signal);
});
}, Promise.resolve());
};
In the first instance, for best potential performance, try parallel.
If the server chokes, or is likely ever to choke, on parallel operations, choose sequential.

when yield a function(yield client.query),why the codes of the function don't execute?

I tried to use nodejs to write some code to read mysql, my code like this:
exports.selectport = function*(projectname){
var strsql = "SELECT DISTINCT portname FROM invoketable where projectname = '"+projectname+"'";
var rest;
yield 1;
yield client.query(strsql, function(err, results) {
if(err) {
console.log(err);
return err;
}
rest = results[0].portname;
console.log(rest);
return rest;
});
yield rest;
}
and I used code to call the function:
var gen = mysqlinsertp.selectport(dataproject);
var ret = gen.next();
console.log(ret.value); // 1
console.log(ret.done); // false
var ret2 = gen.next();
console.log(ret2.value); // Query{...}
console.log(ret2.done); // false
var ret3 = gen.next();
console.log(ret3.value); // undefined
console.log(ret3.done); // false
var ret4 = gen.next();
console.log(ret4.value); // undefined
console.log(ret4.done); // true
I want to know why rest = results[0].portname; console.log(rest); did not execute.
It appears you're mixing generators and callbacks.
You would only use yield if this: client.query(sql) returns a promise or a thunk, in which case you wouldn't use a callback as you've shown above.
If the library you're using doesn't return promises or thunks, you'd need to either:
// yieldable
let results = yield client.query(sql);
// straight callback
client.query(sql, function(err, results){...});
choose a library that supports promise // yieldable
Write your own promise wrapper around the callback // easy to lookup
Just use callbacks and not try to yield // straight callback
Also you're sql is susceptible to sql injection attacks. If the user specifies the project name, they could send a project name like junk'; DROP TABLE users; in which case you'd lose your users table, or other tables...
You should escape your parameters or paramertrized queries connection.query('SELECT DISTINCT portname FROM invoketable where projectname ?', [project])

Call multiple JSON data/files in one getJson request

I have this code:
var graphicDataUrl = 'graphic-data.json';
var webDataUrl = 'web-data.json';
var templateHtml = 'templating.html';
var viewG = $('#view-graphic');
var viewW = $('#view-web');
$.getJSON(dataUrls, function(data) {
$.get(templateHtml, function(template) {
template = Handlebars.compile(template);
var example = template({ works: data });
viewG.html(example);
viewW.html(example);
});
});
What is the best way for call both webDataUrl and graphicDataUrl JSONs and use their data in order to display them in two different div (#viewG and #viewW)?
The best way is to do each one individually, and to handle error conditions:
$.getJSON(graphicDataUrl)
.then(function(data) {
// ...worked, put it in #view-graphic
})
.fail(function() {
// ...didn't work, handle it
});
$.getJSON(webDataUrl, function(data) {
.then(function(data) {
// ...worked, put it in #view-web
})
.fail(function() {
// ...didn't work, handle it
});
That allows the requests to happen in parallel, and updates the page as soon as possible when each request completes.
If you want to run the requests in parallel but wait to update the page until they both complete, you can do that with $.when:
var graphicData, webData;
$.when(
$.getJSON(graphicDataUrl, function(data) {
graphicData = data;
}),
$.getJSON(webDataUrl, function(data) {
webData = data;
})
).then(function() {
if (graphicData) {
// Worked, put graphicData in #view-graphic
}
else {
// Request for graphic data didn't work, handle it
}
if (webData) {
// Worked, put webData in #view-web
}
else {
// Request for web data didn't work, handle it
}
});
...but the page may seem less responsive since you're not updating when the first request comes back, but only when both do.
Just in case it is useful to anyone else who may come across this — and thanks to the Promise advances in jQuery — T.J. Crowder's answer can now be improved into one succinct and general function:
/**
* Load multiple JSON files.
*
* Example usage:
*
* jQuery.getMultipleJSON('file1.json', 'file2.json')
* .fail(function(jqxhr, textStatus, error){})
* .done(function(file1, file2){})
* ;
*/
jQuery.getMultipleJSON = function(){
return jQuery.when.apply(jQuery, jQuery.map(arguments, function(jsonfile){
return jQuery.getJSON(jsonfile);
})).then(function(){
var def = jQuery.Deferred();
return def.resolve.apply(def, jQuery.map(arguments, function(response){
return response[0];
}));
});
};
However the point about not giving any feedback to the user — whilst waiting for the full load — is a good one. So for those that prefer to give responsive feedback, here's a slightly more complicated version that supports progress.
/**
* Load multiple json files, with progress.
*
* Example usage:
*
* jQuery.getMultipleJSON('file1.json', 'file2.json')
* .progress(function(percent, count, total){})
* .fail(function(jqxhr, textStatus, error){})
* .done(function(file1, file2){})
* ;
*/
jQuery.getMultipleJSON = function(){
var
num = 0,
def = jQuery.Deferred(),
map = jQuery.map(arguments, function(jsonfile){
return jQuery.getJSON(jsonfile).then(function(){
def.notify(1/map.length * ++num, num, map.length);
return arguments;
});
})
;
jQuery.when.apply(jQuery, map)
.fail(function(){ def.rejectWith(def, arguments); })
.done(function(){
def.resolveWith(def, jQuery.map(arguments, function(response){
return response[0];
}));
})
;
return def;
};
This code is simple and you can access both response together in one function:
$.when(
$.getJSON(graphicDataUrl),
$.getJSON(webDataUrl)
).done(function(data1, data2) {
console.log(data1[0]);
console.log(data2[0]);
});