Count links in Arrays in MongoDB collection - json

I have a collection with objects, which are linking to other objects in the array:
{
"_id" : ObjectId("53f75bedc5489f86666d305e"),
"id" : "2",
"links_to" : [
{
"id" : 1,
"label" : null,
},
{
"id" : 3,
"label" : null,
},
{
"id" : 60,
"label" : null,
},
{
"id" : 23,
"label" : null,
},
},
{
"_id" : ObjectId("53f75bedc5489f86666d305e"),
"id" : "3",
"links_to" : [
{
"id" : 4,
"label" : null,
},
{
"id" : 8,
"label" : null,
},
{
"id" : 23,
"label" : null,
},
{
"id" : 2,
"label" : null,
},
},
...
Now I would like to write a query, which gives as an output for each id the number of links. Eg.:
{"id": 1, "numberOfLinks": 21},
{"id": 2, "numberOfLinks": 15},
...
Thanks in advance.

The best approach is to keep the count on the document and update it when you either $push or $pull elements of the array using the $inc operator. In this way the field is maintained on the document itself:
{
"links_to": [],
"linkCount": 0
}
When you "push"
db.collecction.update(
{},
{ "$push": { "links_to": newLink }, "$inc": { "linkCount": 1 } }
)
And "pull":
db.collecction.update(
{},
{ "$pull": { "links_to": newLink }, "$inc": { "linkCount": -1 } }
)
Without doing this, you can use the $size operator from the aggregation framework in Mondern MongoDB to get the array length:
db.collection.aggregate([
{ "$project": {
"numberOfLinks": { "$size": "$link_count" }
}}
])
Or in versions prior to MongoDB 2.6 you can count the array members after $unwind and $group:
db.collection.aggregate([
{ "$unwind": "$link_count" },
{ "$group": {
"_id": "$id",
"numberOfLinks": { "$sum": 1 }
}}
])
So usually unless you want something specifically "dynamic" then just maintain the count on the document. This avoids the overhead of calculation when you query.

Actually this is fairly simple to achieve using aggregation:
db.foo.aggregate([
{$unwind: "$links_to" },
{$group: { _id: {"lti":"$links_to.id"}, numberOfLinks: {$sum: 1} } },
{$project: { _id:0, id: "$_id.lti", numberOfLinks: "$numberOfLinks" } }
])
produces the desired output, though in reversed order of fields, at least in the shell output:
{ "numberOfLinks" : 3, "id" : 3 }
{ "numberOfLinks" : 3, "id" : 2 }
{ "numberOfLinks" : 1, "id" : 5 }
{ "numberOfLinks" : 2, "id" : 4 }
{ "numberOfLinks" : 3, "id" : 1 }
If you can live with an output like:
{ "_id" : { "linksToId" : 3 }, "numberOfLinks" : 3 }
{ "_id" : { "linksToId" : 2 }, "numberOfLinks" : 3 }
{ "_id" : { "linksToId" : 5 }, "numberOfLinks" : 1 }
{ "_id" : { "linksToId" : 4 }, "numberOfLinks" : 2 }
{ "_id" : { "linksToId" : 1 }, "numberOfLinks" : 3 }
you can skip the $project step of the aggregation pipeline.
This is extremely efficient. I did a test basically doing the same thing over a collection of 5M documents with roughly 17M relations. Takes 18 seconds on a not exactly high performance server.

Related

jq add element to each object inside array of objects

If I have an array of objects like this:
[
{
"remote" : [
{
"id" : 1
},
{
"id" : 2
},
{
" id" : 3
}
],
"text_id" : 1
},
{
"remote" : [
{
"id" : 4
},
{
"id" : 5
},
{
"id" : 6
}
],
"text_id" : 2
}
]
How would you add "text_id" field to every object inside .[].remote[] array so it would become
[
{
"remote" : [
{
"id" : 1,
"text_id" : 1
},
{
"id" : 2,
"text_id" : 1
},
{
" id" : 3,
"text_id" : 1
}
]
},
{
"remote" : [
{
"id" : 4,
"text_id" : 2
},
{
"id" : 5,
"text_id" : 2
},
{
"id" : 6,
"text_id" : 2
}
]
}
]
I have already spent several hours trying to figure this out. It looks like there has to be a way to do this using foreach directive, but after I checked the manual for it, it seemed to me pretty obscure so I though maybe someone could give an example.
Thanks.
jq 'map( .text_id as $t
| .remote |= map( . + {text_id : $t} )
| del(.text_id)
)'
You don't need map for that.
.[] |= (.remote[] += {text_id} | del(.text_id))
Online demo
I think you should use array.map()
You have two levels of array so it should look like this :
const fatArray = [
{
remote : [
{
id : 1
},
{
id : 2
},
{
id : 3
}
],
text_id : 1
},
{
remote : [
{
id : 4
},
{
id : 5
},
{
id : 6
}
],
text_id : 2
}
];
const finalArray = fatArray.map( arr => {
return arr.remote.map(elem => { return {text_id: arr.text_id, id: elem.id}})
}
)
console.log(finalArray);

Mix data from MySQL in MongoDB query?

I have the following query in MongoDB, which selects the last 10 conversations a user has participated in. They are sorted based on the last message in a conversation. I have a way to paginate between the results using $gt in the match statement. For now, this is commented out.
I limited the number of members to 3 (using slice), because a group conversation can contain, for example, 30, 40, 50, ... members.
The problem: I have a MySQL database to store relational information. So I have a table users and a table followers, to show who is following each other. I want to include this information in a way into MongoDB, so I can find 3 members I am following, to show relevant profile pictures of known people. Let's say I have a conversation with 50 members, then I want to retrieve 3 people I follow (if any), so I see familiar profile pictures. Is this possible in a way, or is my wish not possible?
query
db.getCollection('conversations').aggregate([
{
$lookup: {
foreignField: "c_ID",
from: "messages",
localField: "_id",
as: "messages"
}
},
{
"$unwind": "$messages"
},
{
"$sort": {
"messages.t": -1
}
},
{
"$group": {
"_id": "$_id",
"lastMessage": {
"$first": "$messages"
},
"allFields": {
"$first": "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$allFields",
{
"lastMessage": "$lastMessage"
}
]
}
}
},
{
$project: {
messages: 0
}
},
{
$match: {
"members.uID": "1",
//"lastMessage.t": { $gt: ISODate("2020-02-04 20:38:02.154Z") }
}
},
{
$sort: { "lastMessage.t": 1 }
},
{
$limit: 10
},
{
$project: {
members: {
$slice: [ {
$filter: {
input : "$members", as : "member", cond : {
$ne : ["$$member.uID" , "1"]
}
}
}, 3 ]
}
}
},
])
conversations document
{
"_id" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"members" : [
{
"uID" : "1",
"j" : 1580580922
},
{
"uID" : "4",
"j" : 1580580922
},
{
"uID" : "5",
"j" : 1580580922
}
]
}
messages document
{
"_id" : ObjectId("5e35ee5f40713a43aeeeb1c5"),
"c_ID" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"fromID" : "1",
"msg" : "What's up?",
"t" : 1580591922,
"d" : {
"4" : 1580592039
},
"r" : {
"4" : 1580592339
}
}

Can this JSON with "timestamp" : Double format be aggregated for SUM, AVG in MongoDB

I have imported the above json data into it's own collection on mongoDB database. I'm trying to aggregate the values (ie 40, 30, 30) and SUM and AVG them as they reside in the inner most embedded document. I'm having a problem doing this when I try using dot notation and can not get any vaules. I feel the unique timestamps (ie 1567544426000, 1567541464000, 1567541475000) are a problem. Is this json file formatted correctly for aggregation and how would I do so. Thanks for any help or if you can even point me in the right direction where I can find out how to do SUM, AVG etc to the data.
I've tried use NoSQLBooster and Query ASsist for MongoDB
{
"Barcode": "97-1908-577-1032-BE1-332",
"IP": "192.162.656.111",
"VFD": {
"CurrentPV": {
"Type": "Speed",
"Data": {
"1567544426000": 40,
"1567541464000": 30
"1567541475000": 30
}
},
"CurrentSP": {
"Type": "Speed",
"Data": {
"1567544426000": 55,
"1567541464000": 5
"1567541488000": 10
}
},
"Program_Running": {
"Type": "Active",
"Data": {
"1567544426000": 1,
"1567541464000": 0
"1567541475000": 3
}
}
},
"Equipment": "PieceOfEquipment",
"Location": "Garage",
"RunEnd": "NA",
"RunStart": 1533541438
}
I can't seem to reach the values even when I use dot notation down to the "Data" branch object (ie Equipment.VFD.CurrentPV.Data) but no result sets are returned.
We can convert the VFD.CurrentPV.Data into an array of key-value pairs using $objectToArray and then perform SUM and AVG on the values itself.
The following query can get us the expected output:
db.collection.aggregate([
{
$addFields:{
"data":{
$objectToArray: "$VFD.CurrentPV.Data"
}
}
},
{
$project:{
"sum":{
$sum:"$data.v"
},
"avg":{
$avg:"$data.v"
}
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d830f3afb35a835fbd8638e"),
"Barcode" : "97-1908-577-1032-BE1-332",
"IP" : "192.162.656.111",
"VFD" : {
"CurrentPV" : {
"Type" : "Speed",
"Data" : {
"1567544426000" : 40,
"1567541464000" : 30,
"1567541475000" : 30
}
},
"CurrentSP" : {
"Type" : "Speed",
"Data" : {
"1567544426000" : 55,
"1567541464000" : 5,
"1567541488000" : 10
}
},
"Program_Running" : {
"Type" : "Active",
"Data" : {
"1567544426000" : 1,
"1567541464000" : 0,
"1567541475000" : 3
}
}
},
"Equipment" : "PieceOfEquipment",
"Location" : "Garage",
"RunEnd" : "NA",
"RunStart" : 1533541438
}
Output:
{
"_id" : ObjectId("5d830f3afb35a835fbd8638e"),
"sum" : 100,
"avg" : 33.333333333333336
}

Retrieve item list by checking multiple attribute values in MongoDB in golang

This question based on MongoDB,How to retrieve selected items retrieve by selecting multiple condition.It is like IN condition in Mysql
SELECT * FROM venuelist WHERE venueid IN (venueid1, venueid2)
I have attached json data structure that I have used.[Ref: JSON STRUCTUE OF MONGODB ].
As an example, it has a venueList then inside the venue list, It has several attribute venue id and sum of user agents name and total count as value.user agents mean user Os,browser and device information. In this case I used os distribution.In that case i was count linux,ubuntu count on particular venueid.
it is like that,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 4
}
],
Finally I want to get count of all linux user count by selecting venueid list in one find query in MongoDB.
As example, I want to select all count of linux users by conditioning if venue id VID1212 or VID4343
Ref: JSON STRUCTUE OF MONGODB
{
"_id" : ObjectId("57f940c4932a00aba387b0b0"),
"tenantID" : 1,
"date" : "2016-10-09 00:23:56",
"venueList" : [
{
"id" : “VID1212”,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 4
}
],
“ssidList” : [ // this is list of ssid’s in venue
{
"id" : “SSID1212”,
"sum" : [
{
"name" : "linux",
"value" : 8
},
{
"name" : "ubuntu",
"value" : 6
}
],
“macList” : [ // this is mac list inside particular ssid ex: this is mac list inside the SSID1212
{
"id" : “12:12:12:12:12:12”,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 1
}
]
}
]
}
]
},
{
"id" : “VID4343”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
],
"ssidList" : [
{
"id" : “SSID4343”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
],
"macList" : [
{
"id" : “43:43:43:43:43:34”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
]
}
]
}
]
}
]
}
I am using golang as language to manipulation data with mongoldb using mgo.v2 package
expected out put is :
output
linux : 12+2 = 14
ubuntu : 4+0 = 4
Don't consider inner list in venuelist.
You'd need to use the aggregation framework where you would run an aggregation pipeline that first filters the documents in the collection based on
the venueList ids using the $match operator.
The second pipeline would entail flattening the venueList and sum subdocument arrays in order for the data in the documents to be processed further down the pipeline as denormalised entries. The $unwind operator is useful here.
A further filter using $match is necessary after unwinding so that only the documents you want to aggregate are allowed into the next pipeline.
The main pipeline would be the $group operator stage which aggregates the filtered documents to create the desired sums using the accumulator operator $sum. For the desired result, you would need to use a tenary operator like $cond to create the independent count fields since that will feed the number of documents to the $sum expression depending on the name value.
Putting this altogether, consider running the following pipeline:
db.collection.aggregate([
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList" },
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList.sum" },
{
"$group": {
"_id": null,
"linux": {
"$sum": {
"$cond": [
{ "$eq": [ "$venueList.sum.name", "linux" ] },
"$venueList.sum.value", 0
]
}
},
"ubuntu": {
"$sum": {
"$cond": [
{ "$eq": [ "$venueList.sum.name", "ubuntu" ] },
"$venueList.sum.value", 0
]
}
}
}
}
])
For usage with mGo, you can convert the above pipeline using the guidance in http://godoc.org/labix.org/v2/mgo#Collection.Pipe
For a more flexible and better performant alternative which executes much faster than the above, and also takes into consideration unknown values for the sum list, run the alternative pipeline as follows
db.collection.aggregate([
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList" },
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList.sum" },
{
"$group": {
"_id": "$venueList.sum.name",
"count": { "$sum": "$venueList.sum.value" }
}
},
{
"$group": {
"_id": null,
"counts": {
"$push": {
"name": "$_id",
"count": "$count"
}
}
}
}
])

Finding JSON objects in mongoDB

I'm trying to find objects using the built it queries and It just doesn't work..
My JSON file is something like this:
{ "Text1":
{
"id":"2"
},
"Text2":
{
"id":"2,3"
},
"Text3":
{
"id":"1"
}
}
And I write this db.myCollection.find({"id":2})
And it doesn't find anything.
When I write db.myCollection.find() it shows all the data as it should.
Anyone knows how to do it correctly?
Its hard to change the data-structure but as you want just your matching sub-document and you don't know where is your target sub-document (for example the query should be on Text1 or Text2 , ...) there is a good data structure for this:
{
"_id" : ObjectId("548dd9261a01c68fab8d67d7"),
"pair" : [
{
"id" : "2",
"key" : "Text1"
},
{
"id" : [
"2",
"3"
],
"key" : "Text2"
},
{
"id" : "1",
"key" : "Text3"
}
]
}
and your query is:
db.myCollection.findOne({'pair.id' : "2"} , {'pair.$':1, _id : -1}).pair // there is better ways (such as aggregation instead of above query)
as result you will have:
{
"0" : {
"id" : "2",
"key" : "Text1"
}
}
Update 1 (newbie way)
If you want all the document not just one use this
var result = [];
db.myCollection.find({'pair.id' : "2"} , {'pair.$':1, _id : -1}).forEach(function(item)
{
result.push(item.pair);
});
// the output will be in result
Update 2
Use this query to get all sub-documents
db.myCollection.aggregate
(
{ $unwind: '$pair' },
{ $match : {'pair.id' : "2"} }
).result
it produce output as
{
"0" : {
"_id" : ObjectId("548deb511a01c68fab8d67db"),
"pair" : {
"id" : "2",
"key" : "Text1"
}
},
"1" : {
"_id" : ObjectId("548deb511a01c68fab8d67db"),
"pair" : {
"id" : [
"2",
"3"
],
"key" : "Text2"
}
}
}
Since your are query specify a field in a subdocument this is what will work. see .find() documentation.
db.myCollection.find({"Text1.id" : "2"}, {"Text1.id": true})
{ "_id" : ObjectId("548dd798e2fa652e675af11d"), "Text1" : { "id" : "2" } }
If the query is on "Text1" or "Text2" the best thing to do here as mention in the accepted answer is changing you document structure. This can be easily done using the "Bulk" API.
var bulk = db.mycollection.initializeOrderedBulkOp(),
count = 0;
db.mycollection.find().forEach(function(doc) {
var pair = [];
for(var key in doc) {
if(key !== "_id") {
var id = doc[key]["id"].split(/[, ]/);
pair.push({"key": key, "id": id});
}
}
bulk.find({"_id": doc._id}).replaceOne({ "pair": pair });
count++; if (count % 300 == 0){
// Execute per 300 operations and re-Init
bulk.execute();
bulk = db.mycollection.initializeOrderedBulkOp();
}
})
// Clean up queues
if (count % 300 != 0 )
bulk.execute();
Your document now look like this:
{
"_id" : ObjectId("55edddc6602d0b4fd53a48d8"),
"pair" : [
{
"key" : "Text1",
"id" : [
"2"
]
},
{
"key" : "Text2",
"id" : [
"2",
"3"
]
},
{
"key" : "Text3",
"id" : [
"1"
]
}
]
}
Running the following query:
db.mycollection.aggregate([
{ "$project": {
"pair": {
"$setDifference": [
{ "$map": {
"input": "$pair",
"as": "pr",
"in": {
"$cond": [
{ "$setIsSubset": [ ["2"], "$$pr.id" ]},
"$$pr",
false
]
}
}},
[false]
]
}
}}
])
returns:
{
"_id" : ObjectId("55edddc6602d0b4fd53a48d8"),
"pair" : [
{
"key" : "Text1",
"id" : [
"2"
]
},
{
"key" : "Text2",
"id" : [
"2",
"3"
]
}
]
}