How do I transform my SQL query to a MongoDB query? - mysql

I migrated timeseries data from SQL to MongoDB. I'll give you an example:
Let's say we have a measurement device with an ID, where once per minute a value gets read. So per day, we have 24 hours * 60 minutes = 1440 values for that device.
In SQL, we have 1440 single rows for this device per day:
ID Timestamp Value
400001 01.01.2017 00:00:00 ...
"" 01.01.2017 00:01:00 ...
"" ... ...
"" 01.01.2017 23:59:00 ...
I migrated the data to MongoDB where I now have one document per day, with the values distributed to 24 hour array that respectively contain 60 minute fields containing the values (and only one Timestamp with the date XX-XX-XXXX 00:00:00):
{ ID: 400001,
Timestamp: 01.01.2017 00:00:00,
Hours:
[ 0: [0: ..., 1: ..., 2: ..., ....... 59: ... ],
1: [0: ..., 1: ..., 2: ..., ....... 59: ... ],
.
.
23: [0: ..., 1: ..., 2: ..., ....... 59: ... ]
]
}
My Problem is:
I want to transform the following SQL statement to mongoDB:
SELECT (Val) AS Val, (UNIX_TIMESTAMP(DATE_FORMAT(ArrivalTime, '%Y-%m-%d %H:%i:00'))) * 1000 AS timestmp FROM database WHERE ID = 400001 AND ArrivalTime BETWEEN FROM_UNIXTIME(1470002400) AND FROM_UNIXTIME(1475272800) ORDER BY ArrivalTime ASC
Output
Since in MongoDB I only save the day Timestamp and then split the values in arrays, I don't have a Timestamp for each Value like in SQL. So if I want to for example, get the values between 01.01.2017 02:14:00 and 01.01.2017 18:38:00, how would I do that?
I made a MongoDB query that can give me the Values between two whole days:
db.getCollection('test').aggregate([{$match: {ID: '400001', $and: [ {Timestamp_day: {$gte: new ISODate("2016-08-01 00:00:00.000Z")}}, {Timestamp_day: {$lte: new ISODate("2016-10-01 00:00:00.000Z")}}]}},{$unwind:"$Hours"}, {$unwind:"$Hours"}, {$group: {_id: '$Timestamp_day', Value: {$push: "$Hours"}}}, {$sort: {_id: 1}}]);
Output
But I need it like in SQL that I can also just give out the Values for a few hours, and with the correct Timestamp given per each Values.
I hope you can help me.

This should get you going:
db.collection.aggregate([{
$match: {
"ID": '400001',
"Timestamp_day": {
$gte: new ISODate("2017-01-01T00:00:00.000Z"),
$lte: new ISODate("2017-01-01T00:00:00.000Z")
}
}
}, {
$unwind: {
path: "$Hours",
includeArrayIndex: "Hour"
}
}, {
$unwind: {
path: "$Hours",
includeArrayIndex: "Minute"
}
}, {
$project: {
"_id": 0, // remove the "_id" field
"Val": "$Hours", // rename "Hours" to "Val"
"Timestamp": { // "resolve" our timestamp...
$add: // ...by adding
[
{ $multiply: [ "$Hour", 60 * 60 * 1000 ] }, // ...the number of hours in milliseconds
{ $multiply: [ "$Minute", 60 * 1000 ] }, // ...plus the number of minutes in milliseconds
"$Timestamp_day", // to the "Timestamp_day" value
]
}
}
}, {
$sort: {
"Timestamp": 1 // oh well, sort by timestamp ascending
}
}]);
With an input document of
{
"_id" : ObjectId("5a0e7d096216d24dd605cdec"),
"ID" : "400001",
"Timestamp_day" : ISODate("2017-01-01T00:00:00.000Z"),
"Hours" : [
[
0.0,
0.1,
2.0
],
[
1.0,
1.1,
2.1
],
[
2.0,
2.1,
2.2
]
]
}
the results look like this:
/* 1 */
{
"Val" : 0.0,
"Timestamp" : ISODate("2017-01-01T00:00:00.000Z")
}
/* 2 */
{
"Val" : 0.1,
"Timestamp" : ISODate("2017-01-01T00:01:00.000Z")
}
/* 3 */
{
"Val" : 2.0,
"Timestamp" : ISODate("2017-01-01T00:02:00.000Z")
}
/* 4 */
{
"Val" : 1.0,
"Timestamp" : ISODate("2017-01-01T01:00:00.000Z")
}
/* 5 */
{
"Val" : 1.1,
"Timestamp" : ISODate("2017-01-01T01:01:00.000Z")
}
/* 6 */
{
"Val" : 2.1,
"Timestamp" : ISODate("2017-01-01T01:02:00.000Z")
}
/* 7 */
{
"Val" : 2.0,
"Timestamp" : ISODate("2017-01-01T02:00:00.000Z")
}
/* 8 */
{
"Val" : 2.1,
"Timestamp" : ISODate("2017-01-01T02:01:00.000Z")
}
/* 9 */
{
"Val" : 2.2,
"Timestamp" : ISODate("2017-01-01T02:02:00.000Z")
}
UPDATE:
Based on your comment you need to calculate the difference between any value and its respective preceding value. This can be done the following way - there might be nicer ways of achieving the same thing, though... The first part is almost identical to the solution above except it has an added $match stage to remove null values as per your specification.
db.collection.aggregate([{
$match: {
"ID": '400001',
"Timestamp_day": {
$gte: new ISODate("2017-01-01T00:00:00.000Z"),
$lte: new ISODate("2017-01-01T00:00:00.000Z")
}
}
}, {
$unwind: {
path: "$Hours",
includeArrayIndex: "Hour"
}
}, {
$unwind: {
path: "$Hours",
includeArrayIndex: "Minute"
}
}, {
$match: {
"Hours": { $ne: null } // get rid of all null values
}
}, {
$project: {
"_id": 0, // remove the "_id" field
"Val": "$Hours", // rename "Hours" to "Val"
"Timestamp": { // "resolve" our timestamp...
$add: // ...by adding
[
{ $multiply: [ "$Hour", 60 * 60 * 1000 ] }, // ...the number of hours in milliseconds
{ $multiply: [ "$Minute", 60 * 1000 ] }, // ...plus the number of minutes in milliseconds
"$Timestamp_day", // to the "Timestamp_day" value
]
}
}
}, {
$sort: {
"Timestamp": 1 // oh well, sort by timestamp ascending
}
}, {
$group: {
"_id": null, // throw all documents in the same aggregated document
"Docs": {
$push: "$$ROOT" // and store our documents in an array
}
}
}, {
$unwind: {
path: "$Docs", // we flatten the "values" array
includeArrayIndex: "Docs.Index", // this will give us the index of every element - there might be more elegant solutions using $map and $let...
}
}, {
$group: { // YES, unfortunately a second time... but this time we have the array index for each element
"_id": null, // throw all documents in the same aggregated document
"Docs": {
$push: "$Docs" // and store our documents in an array
}
}
}, {
$addFields: {
"Docs": {
$let: {
vars: { "shiftedArray": { $concatArrays: [ [ null ], "$Docs.Val" ] } }, // shift value array by one to the right and put a null object at the start
in: {
$map: {
input: "$Docs",
as: "d",
in: {
"Timestamp" : "$$d.Timestamp",
"Val": { $ifNull: [ { $abs: { $subtract: [ "$$d.Val", { $arrayElemAt: [ "$$shiftedArray", "$$d.Index" ] } ] } }, 0 ] }
}
}
}
}
}
}
}, {
$unwind: "$Docs"
}, {
$replaceRoot: {
newRoot: "$Docs"
}
}]);
The results using your sample data set look like this:
/* 1 */
{
"Timestamp" : ISODate("2017-01-01T00:00:00.000Z"),
"Val" : 0.0
}
/* 2 */
{
"Timestamp" : ISODate("2017-01-01T00:01:00.000Z"),
"Val" : 0.0
}
/* 3 */
{
"Timestamp" : ISODate("2017-01-01T00:02:00.000Z"),
"Val" : 2.0
}
/* 4 */
{
"Timestamp" : ISODate("2017-01-01T00:04:00.000Z"),
"Val" : 3.0
}
/* 5 */
{
"Timestamp" : ISODate("2017-01-01T00:05:00.000Z"),
"Val" : 0.0
}
/* 6 */
{
"Timestamp" : ISODate("2017-01-01T00:06:00.000Z"),
"Val" : 1.0
}

Eventuell könntest du mir nochmal helfen, auch ein Tipp würde reichen #dnickless.. Ich bräuchte eine Query die mir den Betrag der Differenz zum vorherig gemessenen Wert gibt (in einem bestimmten Zeitraum, zu einer bestimmten ID).
Also als Beispiel:
Timestamp_day: ISODate("2017-01-01T01:00:00.000Z"),
Hours: [
[ 1.0, 1.0, -1.0, null, 2.0, 2.0, 3.0, ... ],
[ ... ],
...
]
Und dann als output:
{
'Timestamp' : ISODate("2017-01-01T00:00:00.000Z"),
'Val' : 0.0 /* nix - 1.0 */
}
{
'Timestamp' : ISODate("2017-01-01T00:01:00.000Z"),
'Val' : 0.0 /* 1.0 - 1.0 */
}
{
'Timestamp' : ISODate("2017-01-01T00:02:00.000Z"),
'Val' : 2.0 /* 1.0 - -1.0 */
}
{
'Timestamp' : ISODate("2017-01-01T00:04:00.000Z"),
'Val' : 3.0 /* -1.0 - (null) - 2.0 */
}
{
'Timestamp' : ISODate("2017-01-01T00:05:00.000Z"),
'Val' : 0.0 /* 2.0 - 2.0 */
}
{
'Timestamp' : ISODate("2017-01-01T00:06:00.000Z"),
'Val' : 1.0 /* 2.0 - 3.0 */
}
Hoffe es ist einigermaßen verständlich was ich meine

Related

Can this JSON with "timestamp" : Double format be aggregated for SUM, AVG in MongoDB

I have imported the above json data into it's own collection on mongoDB database. I'm trying to aggregate the values (ie 40, 30, 30) and SUM and AVG them as they reside in the inner most embedded document. I'm having a problem doing this when I try using dot notation and can not get any vaules. I feel the unique timestamps (ie 1567544426000, 1567541464000, 1567541475000) are a problem. Is this json file formatted correctly for aggregation and how would I do so. Thanks for any help or if you can even point me in the right direction where I can find out how to do SUM, AVG etc to the data.
I've tried use NoSQLBooster and Query ASsist for MongoDB
{
"Barcode": "97-1908-577-1032-BE1-332",
"IP": "192.162.656.111",
"VFD": {
"CurrentPV": {
"Type": "Speed",
"Data": {
"1567544426000": 40,
"1567541464000": 30
"1567541475000": 30
}
},
"CurrentSP": {
"Type": "Speed",
"Data": {
"1567544426000": 55,
"1567541464000": 5
"1567541488000": 10
}
},
"Program_Running": {
"Type": "Active",
"Data": {
"1567544426000": 1,
"1567541464000": 0
"1567541475000": 3
}
}
},
"Equipment": "PieceOfEquipment",
"Location": "Garage",
"RunEnd": "NA",
"RunStart": 1533541438
}
I can't seem to reach the values even when I use dot notation down to the "Data" branch object (ie Equipment.VFD.CurrentPV.Data) but no result sets are returned.
We can convert the VFD.CurrentPV.Data into an array of key-value pairs using $objectToArray and then perform SUM and AVG on the values itself.
The following query can get us the expected output:
db.collection.aggregate([
{
$addFields:{
"data":{
$objectToArray: "$VFD.CurrentPV.Data"
}
}
},
{
$project:{
"sum":{
$sum:"$data.v"
},
"avg":{
$avg:"$data.v"
}
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d830f3afb35a835fbd8638e"),
"Barcode" : "97-1908-577-1032-BE1-332",
"IP" : "192.162.656.111",
"VFD" : {
"CurrentPV" : {
"Type" : "Speed",
"Data" : {
"1567544426000" : 40,
"1567541464000" : 30,
"1567541475000" : 30
}
},
"CurrentSP" : {
"Type" : "Speed",
"Data" : {
"1567544426000" : 55,
"1567541464000" : 5,
"1567541488000" : 10
}
},
"Program_Running" : {
"Type" : "Active",
"Data" : {
"1567544426000" : 1,
"1567541464000" : 0,
"1567541475000" : 3
}
}
},
"Equipment" : "PieceOfEquipment",
"Location" : "Garage",
"RunEnd" : "NA",
"RunStart" : 1533541438
}
Output:
{
"_id" : ObjectId("5d830f3afb35a835fbd8638e"),
"sum" : 100,
"avg" : 33.333333333333336
}

Get the nth item of JSON array in MongoDB

Using MongoDb how do you get back the date, and 3rd "obs" back from below?
{ "data" : [
{ "val" : [
{ "obs" : "2/3/2016"
},
{ "obs" : 41.8599992990494
},
{ "obs" : 41.3111999630928
},
{ "obs" : 5.048
}
]
},
{ "val" : [
{ "obs" : "2/4/2016"
},
{ "obs" : 39.394998550415
},
{ "obs" : 41.8486998975277
},
{ "obs" : NumberInt(0)
}
]
},
{ "val" : [
{ "obs" : "2/5/2016"
},
{ "obs" : NumberInt(0)
},
{ "obs" : 40.2090013027191
},
{ "obs" : 24.2410004138947
},
{ "obs" : 3.629
}
]
}
]
}
Started with this:
db.myColl.find({},{"_id":0, "data.val.obs": 1, })
would like:
["2/3/2016", 41.3111], ["2/4/2016", 41.8486]
Here is how you could do this in MongoDB starting from v 3.4
db.getCollection('test').aggregate([
{
$addFields: {
data: {
$map: {
input: "$data",
as: "item",
in: {$concatArrays: [{$slice: ['$$item.val', 1]}, {$slice: ['$$item.val', 2, 1]}]}
}
}
}
}
]);
So basically I'm using $addFields not to lose other properties of a root document (as you might need them). If you don't need them you can switch to $project.
Example: collection records look like this: {_id: ..., data: [...], data_2: [...]}.
If you run the query as is you'll have 'data' array filtered. But you'll still have data_2 unchanged. If you replace $addFields with $project you'll lose data_2. (or you need to explicitly tell mongo to keep it by passing data_2: true)
Then I'm mapping each element of 'data' array and assign the result back to 'data' array so in fact data property is overridden by filtered array.
To get 1st and 3rd elements I use $slice (each $slice returns an array of one document). And then I join them into a single array by $concatArrays.

Retrieve item list by checking multiple attribute values in MongoDB in golang

This question based on MongoDB,How to retrieve selected items retrieve by selecting multiple condition.It is like IN condition in Mysql
SELECT * FROM venuelist WHERE venueid IN (venueid1, venueid2)
I have attached json data structure that I have used.[Ref: JSON STRUCTUE OF MONGODB ].
As an example, it has a venueList then inside the venue list, It has several attribute venue id and sum of user agents name and total count as value.user agents mean user Os,browser and device information. In this case I used os distribution.In that case i was count linux,ubuntu count on particular venueid.
it is like that,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 4
}
],
Finally I want to get count of all linux user count by selecting venueid list in one find query in MongoDB.
As example, I want to select all count of linux users by conditioning if venue id VID1212 or VID4343
Ref: JSON STRUCTUE OF MONGODB
{
"_id" : ObjectId("57f940c4932a00aba387b0b0"),
"tenantID" : 1,
"date" : "2016-10-09 00:23:56",
"venueList" : [
{
"id" : “VID1212”,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 4
}
],
“ssidList” : [ // this is list of ssid’s in venue
{
"id" : “SSID1212”,
"sum" : [
{
"name" : "linux",
"value" : 8
},
{
"name" : "ubuntu",
"value" : 6
}
],
“macList” : [ // this is mac list inside particular ssid ex: this is mac list inside the SSID1212
{
"id" : “12:12:12:12:12:12”,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 1
}
]
}
]
}
]
},
{
"id" : “VID4343”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
],
"ssidList" : [
{
"id" : “SSID4343”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
],
"macList" : [
{
"id" : “43:43:43:43:43:34”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
]
}
]
}
]
}
]
}
I am using golang as language to manipulation data with mongoldb using mgo.v2 package
expected out put is :
output
linux : 12+2 = 14
ubuntu : 4+0 = 4
Don't consider inner list in venuelist.
You'd need to use the aggregation framework where you would run an aggregation pipeline that first filters the documents in the collection based on
the venueList ids using the $match operator.
The second pipeline would entail flattening the venueList and sum subdocument arrays in order for the data in the documents to be processed further down the pipeline as denormalised entries. The $unwind operator is useful here.
A further filter using $match is necessary after unwinding so that only the documents you want to aggregate are allowed into the next pipeline.
The main pipeline would be the $group operator stage which aggregates the filtered documents to create the desired sums using the accumulator operator $sum. For the desired result, you would need to use a tenary operator like $cond to create the independent count fields since that will feed the number of documents to the $sum expression depending on the name value.
Putting this altogether, consider running the following pipeline:
db.collection.aggregate([
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList" },
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList.sum" },
{
"$group": {
"_id": null,
"linux": {
"$sum": {
"$cond": [
{ "$eq": [ "$venueList.sum.name", "linux" ] },
"$venueList.sum.value", 0
]
}
},
"ubuntu": {
"$sum": {
"$cond": [
{ "$eq": [ "$venueList.sum.name", "ubuntu" ] },
"$venueList.sum.value", 0
]
}
}
}
}
])
For usage with mGo, you can convert the above pipeline using the guidance in http://godoc.org/labix.org/v2/mgo#Collection.Pipe
For a more flexible and better performant alternative which executes much faster than the above, and also takes into consideration unknown values for the sum list, run the alternative pipeline as follows
db.collection.aggregate([
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList" },
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList.sum" },
{
"$group": {
"_id": "$venueList.sum.name",
"count": { "$sum": "$venueList.sum.value" }
}
},
{
"$group": {
"_id": null,
"counts": {
"$push": {
"name": "$_id",
"count": "$count"
}
}
}
}
])

Query for : How many elements of an array are matching within a string in mongoDb

Suppose my JSON is like following:
{ "id":0,"keywords":"amount,debited,account,ticket,not,generated,now" }
{ "id":1,"keywords":"how,safe,gocash" }
{ "id":2,"keywords":"how,referral,program,gocash,works" }
If my array is like
array =["how","safe","gocash"];
then how do I get the count that while checking with first; count should be zero, with second three and with third two. (That means how many elements of an array are present in the string)
Is it possible or what approach I should adopt?
One way of solving this would require some form of modification to your schema by adding an extra field that holds the keywords in an array. This field becomes quite handy when running an aggregation pipeline to return the desired count of elements of an array that match the original string.
To add the additional field you would need the Bulk API operations to update the collection as follows:
var bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.find({"keywords": { "$exists": true, "$type": 2 }}).forEach(function(doc) {
var keywordsArray = doc.keywords.split(',');
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "keywordsArray": keywordsArray }
});
count++;
if (count % 100 == 0) {
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
}
});
if (count % 100 != 0) { bulk.execute(); }
The above creates an additional field "keywordsArray" that is a result of splitting the keywords string to an array.
After the operation your sample collection would have the documents:
/* 0 */
{
"_id" : ObjectId("561e24e9ba53a16c763eaab4"),
"id" : 0,
"keywords" : "amount,debited,account,ticket,not,generated,now",
"keywordsArray" : [
"amount",
"debited",
"account",
"ticket",
"not",
"generated",
"now"
]
}
/* 1 */
{
"_id" : ObjectId("561e24e9ba53a16c763eaab5"),
"id" : 1,
"keywords" : "how,safe,gocash",
"keywordsArray" : [
"how",
"safe",
"gocash"
]
}
/* 2 */
{
"_id" : ObjectId("561e24e9ba53a16c763eaab6"),
"id" : 2,
"keywords" : "how,referral,program,gocash,works",
"keywordsArray" : [
"how",
"referral",
"program",
"gocash",
"works"
]
}
On to the next stage, the aggregation framework pipeline, run the following pipeline operation which uses the $let, $size and $setIntersection operators to work out the the desired count result:
var array = ["how","safe","gocash"];
db.collection.aggregate([
{
"$project": {
"id": 1, "keywords": 1,
"count": {
"$let": {
"vars": {
"commonToBoth": { "$setIntersection": [ "$keywordsArray", array ] }
},
"in": { "$size": "$$commonToBoth" }
}
}
}
}
])
Sample Output:
/* 0 */
{
"result" : [
{
"_id" : ObjectId("561e24e9ba53a16c763eaab4"),
"id" : 0,
"keywords" : "amount,debited,account,ticket,not,generated,now",
"count" : 0
},
{
"_id" : ObjectId("561e24e9ba53a16c763eaab5"),
"id" : 1,
"keywords" : "how,safe,gocash",
"count" : 3
},
{
"_id" : ObjectId("561e24e9ba53a16c763eaab6"),
"id" : 2,
"keywords" : "how,referral,program,gocash,works",
"count" : 2
}
],
"ok" : 1
}

Return a field that has an array in mongoDB, and return the first and last value in that array

Scenario: Consider the document present in the MongoDB in collection named twitCount.
{
"_id" : ObjectId("53d1340478441a1c0d25c40c"),
"items" : [
{
"date" : ISODate("2014-07-22T22:18:05.000Z"),
"value" : 4,
"_id" : ObjectId("53d134048b3956000063aa72")
},
{
"date" : ISODate("2014-07-21T22:09:20.000Z"),
"value" : 10,
"_id" : ObjectId("53d134048b3956000063aa71")
}
...
],
"ticker" : "OM:A1M"
}
I only want to fetch the first and last date inside "items". I've tried lot of different "queries". But I cannot get it right. The "ticker" is unique
The following query is the only one that returns something, but it returns everything(that is expected).
twitCount.aggregate([{ $match : { ticker: theTicker}} ], function(err, result){
if (err) {
console.log(err);
return;
}
console.log(result)
})
So, In the end I want the query to return it something like this [2013-02-01, 2014-07-24];
I really need help with this, all links on manual/core/aggregation are purple and I don't know where to get more information.
Hard to tell if your intent here is to work with a single document or multiple documents that match your condition. As suggested, a single document would really just involve using the shift and pop methods native to JavaScript on the singular result to get the first and last elements of the array. You might also need to employ array sort here
twitCount.findOne({ "ticker": "OM:A1M" },function(err,doc) {
doc.items = doc.items.sort(function(a,b) {
return ( a.date.valueOf() > b.date.valueOf() ) ? 1
: ( a.date.valueOf() < b.date.valueOf() ) ? -1 : 0;
});
doc.items = [doc.items.shift(),doc.items.pop()];
console.log( doc );
})
The other suggestions don't really apply as operators like $pop permanently mondify the array in updates. And the $slice operator that can be used in a query would really only be of use to you if the array contents are already sorted, and additionally you would be making two queries to return first and last, which is not what you want.
But if you really are looking to do this over multiple documents then the aggregation framework is the answer. The key area to understand when working with arrays is that you must use an $unwind pipeline stage on the array first. This "de-normalizes" to a form where a copy of the document is effectively produced for each array element:
twitCount.aggregate([
// Match your "documents" first
{ "$match": { "ticker": "OM:A1M" } },
// Unwind the array
{ "$unwind": "$items" },
// Sort the values
{ "$sort": { "items.date": 1 } },
// Group with $first and $last items
{ "$group": {
"_id": "$ticker",
"first": { "$first": "$items" },
"last": { "$last": "$items" }
}}
],function(err,result) {
If you really want "items" back as an array then you can just do things a little differently:
twitCount.aggregate([
// Match your "documents" first
{ "$match": { "ticker": "OM:A1M" } },
// Unwind the array
{ "$unwind": "$items" },
// Sort the values
{ "$sort": { "items.date": 1 } },
// Group with $first and $last items
{ "$group": {
"_id": "$ticker",
"first": { "$first": "$items" },
"last": { "$last": "$items" },
"type": { "$first": { "$const": [true,false] } }
}},
// Unwind the "type"
{ "$unwind": "$type" },
// Conditionally push to the array
{ "$group": {
"_id": "$_id",
"items": {
"$push": {
"$cond": [
"$type",
"$first",
"$last"
]
}
}
}}
],function(err,result) {
Or if your $match statement is just intended to select and you want the "first" and "last" from each document "_id" then you just change the key in the initial $group to "$_id" rather than the "$ticker" field value:
twitCount.aggregate([
// Match your "documents" first
{ "$match": { "ticker": "OM:A1M" } },
// Unwind the array
{ "$unwind": "$items" },
// Sort the values
{ "$sort": { "items.date": 1 } },
// Group with $first and $last items
{ "$group": {
"_id": "$_id",
"ticker": { "$first": "$ticker" },
"first": { "$first": "$items" },
"last": { "$last": "$items" },
"type": { "$first": { "$const": [true,false] } }
}},
// Unwind the "type"
{ "$unwind": "$type" },
// Conditionally push to the array
{ "$group": {
"_id": "$_id",
"ticker": { "$first": "$ticker" },
"items": {
"$push": {
"$cond": [
"$type",
"$first",
"$last"
]
}
}
}}
],function(err,result) {
In that last case, you would get something like this, based on the data you have provided:
{
"_id" : ObjectId("53d1340478441a1c0d25c40c"),
"ticker" : "OM:A1M",
"items" : [
{
"date" : ISODate("2014-07-21T22:09:20Z"),
"value" : 10,
"_id" : ObjectId("53d134048b3956000063aa71")
},
{
"date" : ISODate("2014-07-22T22:18:05Z"),
"value" : 4,
"_id" : ObjectId("53d134048b3956000063aa72")
}
]
}
You can find the Full List of Aggregation Operators in the documentation. It is worth getting to know how these function as depending on what you are doing the aggregation framework can be a very useful tool.