MongoDB query distinct in subdocuments - node.js

I'm using Mongoose with NodeJS (typescript).
I'm trying to sum the count per location. Example output :
[
{ name : "Bronx", count : 6 },
{ name : "Brooklyn", count : 6 },
{ name : "Manhattan", count : 6 },
{ name : "Queens", count : 6 }
]
Current data model:
data:
[
{
"news": {
"_id": "5c7615a4ef5238a6c47cbcb9",
"locations": [
{
"_id": "5c7615a4ef5238a6c47cbcc6",
"id": "1",
"name": "Manhattan",
"children": [
{
"_id": "5c7615a4ef5238a6c47cbcc8",
"count": 3
},
{
"_id": "5c7615a4ef5238a6c47cbcc7",
"count": 2
}
]
}
]
}
},
{
....
}
]
The last query that I build was :
DataModel.aggregate([
{ "$unwind": "$data.news.locations" },
{
"$group": {
"_id": "$data.news.locations",
"count": { "$sum": "$$data.news.locations.zipcodes.count" }
}
}]).exec(function(err, results){
if (err) throw err;
console.log(JSON.stringify(results, null, 4));
});
But I'm new handle queries in MongoDB with Mongoose, so any help I really appreciate. thanks.

You were kind of close, just a few changes:
DataModel.aggregate([
// Each array needs $unwind separately
{ "$unwind": "$data" },
// And then down to the next one
{ "$unwind": "$data.news.locations" },
// Group on the grouping key
{ "$group": {
"_id": "$data.news.locations.name",
"count": { "$sum": { "$sum": "$data.news.locations.children.count" } }
}}
],(err,results) => {
// remaining handling
})
So since you have arrays inside an array and you want to get down to the "name" property inside the "locations" you need to $unwind to that point. You must $unwind each array level separately.
Technically there is still the children array as well, but $sum can be used to "sum an array of values" as well as "accumulate for a grouping key". Hence the $sum: { $sum statement within the $group.
Returns:
{ "_id" : "Manhattan", "count" : 5 }
From the detail supplied in the question.

Related

Use Aggregate with $group in mongodb

I have data in worksheets collection like below:
/* 1 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6506"),
"isBilling" : true,
"hours" : 6,
"userId" : ObjectId("5c1f38a1d7537d1444738493"),
}
/* 2 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6507"),
"isBilling" : true,
"hours" : 4,
"userId" : ObjectId("5c1f38a1d7537d1444738493"),
}
/* 3 */
{
"_id" : ObjectId("5c21e10fae07cc1204a5b647"),
"isBilling" : false,
"hours" : 8,
"userId" : ObjectId("5c1f388fd7537d1444738492"),
}
I have to create a aggregate query to sum the hours, where isBilling equals to true, and where isBilling equals to false.I want the below output:
{
"billingHours":10,
"fixContract":8
}
I have to get data with the particular userId. I tried the below:
Worksheet.aggregate([
{
$match: conditions
},
{
$lookup:{
"from": "worksheets",
"let": {},
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$isBilling",false] } } },
{
"$group": { "_id": null, "totalHours": { "$sum": "$hours" } }
},
],
"as": "billingHours"
}
},
{
"$project":{"billingHours":1}
}
])
I am getting the below result:
[
{
"_id": "5c21d780f82aa31334ab6506",
"billingHours": [
{
"_id": null,
"totalHours": 16
}
]
},
{
"_id": "5c21d780f82aa31334ab6507",
"billingHours": [
{
"_id": null,
"totalHours": 16
}
]
}
]
I don't know why it is giving me 16 hours instead of 10 and giving me two objects instead of 1.
You don't need to use $lookup here. Simple $group with $cond will do the job.
db.collection.aggregate([
{ "$group": {
"_id": null,
"billingHours": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, "$hours", 0]
}
},
"fixContract": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, 0, "$hours"]
}
}
}}
])

How to distinct (count) value when value is nested in array?

I have a data sample something like this:
"diagnostics" : {
"_ID" : "554bbf7b761e06f02fef3561",
"tests" : [
{
"_id" : "59d678064e4645ec562a37e2",
"name" : "RBC",
},
{
"_id" : "59d678064e4645ec562a37e1",
"name" : "Calcium",
}
]
}
I want to get all distinct _ID and count of all test groups in with there names
which is something like this:
"_ID" : "554bbf7b761e06f02fef3561"{ {"name" : "Calcium", count :(count of Calcium)},{"name" : "RBC", count :(count of RBC)}
Thing to keep in mind are tests is inside diagnostics and contain any number of $name field it can be two or one or any number of times and I want individual count of each distinct name .
db.collection('transactions').aggregate([
{ $unwind : '$diagnostics.tests' },
{ $group : {
_id: {
"Test_Name" : '$diagnostics.tests.name',
"ID" : '$diagnostics._id'
},
test_count: { $sum: 1 }
}
}
])
and I am getting result something like this
[
{
"_id": {
"Test_Name": "Fasting Blood Sugar",
"ID": "554bbf7b761e06f02fef3561"
},
"test_count": 76
},
{
"_id": {
"Test_Name": "Fasting Blood Sugar",
"ID": "566726c35dc18d13242fffcc"
},
"test_count": 1
},
{
"_id": {
"Test_Name": "CBC - 7 Part",
"ID": "566726c35dc18d13242fffcc"
},
"test_count": 1
},
{
"_id": {
"Test_Name": "RBC",
"ID": "554bbf7b761e06f02fef3561"
},
"test_count": 1
},
{
"_id": {
"Test_Name": "Fasting Blood Sugar",
"ID": "5a2c9edfe0d0ec71aef1e526"
},
"test_count": 6
},
{
"_id": {
"Test_Name": "Calcium",
"ID": "554bbf7b761e06f02fef3561"
},
"test_count": 77
}
]
Can anybody help me with the query?
You need to use mulitple $group stages here.
First $unwind the tests and $group it by "name" and then resize it to original and lastly then $group by "diagnostics_ID" and for the tests count you can check the $size of the "tests" array.
db.collection.aggregate([
{ "$unwind": "$diagnostics.tests" },
{ "$group": {
"_id": {
"_id": "$diagnostics.tests.name",
"diagnosticID": "$diagnostics._ID"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": {
"_ID": "$_id.diagnosticID"
},
"tests": {
"$push": {
"name": "$_id._id",
"count": "$count"
}
}
}},
{ "$project": {
"diagnostics._ID": "$_id._ID",
"diagnostics.tests": "$tests",
"_id": 0,
"testCount": { "$size": "$tests" }
}}
])
Code snippet

how get count from mongodb with different status from one collection

I have appointment collection in that i have status codes like upcoming, cancelled, completed. i want to write an api to get count of each status using mongoose or mongodb methods.
output should be like below
[{
group : "grp1",
appointments_completed :4
appointments_upcoming :5
appointments_cancelled : 7
}]
thanks in advance.
I hope it help you
db.getCollection('codelist').aggregate([
{
$group:{
_id:{status:"$status"},
count:{$sum:1}
}
}
])
The result will be
[{
"_id" : {
"status" : "cancelled"
},
"count" : 13.0
},
{
"_id" : {
"status" : "completed"
},
"count" : 20.0
}
]
I think you can process it with nodejs
Using Aggregation Pipeline $group we can get this count
db.collection_name.aggregate([
{ $group: {
_id:null,
appointments_completed: {$sum : "$appointments_completed" },
appointments_upcoming:{$sum :"$appointments_upcoming"},
appointments_cancelled:{$sum: "$appointments_cancelled"}
}
}
]);
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator and a $replaceRoot pipeline to get the desired result. You would need to run the following aggregate pipeline:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$concat": ["appointments_", { "$toLower": "$status" }] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"k": "$_id.status",
"v": "$count"
}
}
} },
{ "$addFields": {
"counts": {
"$setUnion": [
"$counts", [
{
"k": "group",
"v": "$_id"
}
]
]
}
} },
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$counts" }
} }
])
For older versions, a more generic approach though with a different output format would be to group twice and get the counts as an array of key value objects as in the following:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
} }
])
which spits out:
{
"_id": "grp1"
"counts":[
{ "status": "completed", "count": 4 },
{ "status": "upcoming", "count": 5 }
{ "status": "cancelled", "count": 7 }
]
}
If the status codes are fixed then the $cond operator in the $group pipeline step can be used effectively to evaluate the counts based on the status field value. Your overall aggregation pipeline can be constructed as follows to produce the result in the desired format:
db.appointments.aggregate([
{ "$group": {
"_id": <group_by_field>,
"appointments_completed": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "completed" ] }, 1, 0 ]
}
},
"appointments_upcoming": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "upcoming" ] }, 1, 0 ]
}
},
"appointments_cancelled": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "cancelled" ] }, 1, 0 ]
}
}
} }
])

MongoDB aggregate count of items in two arrays across different documents?

Here is my MongoDB collection schema:
company: String
model: String
cons: [String] // array of tags that were marked as "cons"
pros: [String] // array of tags that were marked as "pros"
I need to aggregate it so I get the following output:
[{
"_id": {
"company": "Lenovo",
"model": "T400"
},
"tags": {
tag: "SomeTag"
pros: 124 // number of times, "SomeTag" tag was found in "pros" array in `Lenovo T400`
cons: 345 // number of times, "SomeTag" tag was found in "cons" array in `Lenovo T400`
}
}...]
I tried to do the following:
var aggParams = {};
aggParams.push({ $unwind: '$cons' });
aggParams.push({ $unwind: '$pros' });
aggParams.push({$group: {
_id: {
company: '$company',
model: '$model',
consTag: '$cons'
},
consTagCount: { $sum: 1 }
}});
aggParams.push({$group: {
_id: {
company: '$_id.company',
model: '$_id.model',
prosTag: '$pros'
},
prosTagCount: { $sum: 1 }
}});
aggParams.push({$group: {
_id: {
company:'$_id.company',
model: '$_id.model'
},
tags: { $push: { tag: { $or: ['$_id.consTag', '$_id.prosTag'] }, cons: '$consTagCount', pros: '$prosTagCount'} }
}});
But I got the following result:
{
"_id": {
"company": "Lenovo",
"model": "T400"
},
"tags": [
{
"tag": false,
"pros": 7
}
]
}
What is the right way to do this with aggregation?
Yes this is a bit harder considering that there are multiple arrays, and if you try both at the same time you end up with a "cartesian condition" where one arrray multiplies the contents of the other.
Therefore, just combine the array content at the beginning, which probably indicates how you should be storing the data in the first place:
Model.aggregate(
[
{ "$project": {
"company": 1,
"model": 1,
"data": {
"$setUnion": [
{ "$map": {
"input": "$pros",
"as": "pro",
"in": {
"type": { "$literal": "pro" },
"value": "$$pro"
}
}},
{ "$map": {
"input": "$cons",
"as": "con",
"in": {
"type": { "$literal": "con" },
"value": "$$con"
}
}}
]
}
}},
{ "$unwind": "$data" }
{ "$group": {
"_id": {
"company": "$company",
"model": "$model",
"tag": "$data.value"
},
"pros": {
"$sum": {
"$cond": [
{ "$eq": [ "$data.type", "pro" ] },
1,
0
]
}
},
"cons": {
"$sum": {
"$cond": [
{ "$eq": [ "$data.type", "con" ] },
1,
0
]
}
}
}
],
function(err,result) {
}
)
So via the first $project stage the $map operators are adding the "type" value to each item of each array. Not that it really matters here as all items should process "unique" anyway, the $setUnion operator "contatenates" each array into a singular array.
As mentioned earlier, you probably should be storing in this way in the first place.
Then process $unwind followed by $group, wherein each "pros" and "cons" is then evaluated via $cond to for it's matching "type", either returning 1 or 0 where the match is respectively true/false to the $sum aggregation accumulator.
This gives you a "logical match" to count each respective "type" within the aggregation operation as per the grouping keys specified.

MongoDB sort by relevance

I am using trying to get documents from MongoDB on node. Let's say documents have the following structure:
{ "_id": ObjectId, "title" : String, "tags" : Array<String> }
I'd like to sort them by relevance - so when I'm looking for documents that have either "blue" or "yellow" tag I'd like to get ones with both tags first. So far I managed by google, trial and error:
var tags = [ "yellow", "blue" ];
db.collection('files').aggregate([
{ $project : { tags: 1 } },
{ $unwind : "$tags" },
{ $match : { "tags": { "$in": tags } } },
{ $group : { _id: "$_id", relevance: { $sum:1 } } },
{ $sort : { relevance : -1 } },
], function(err, success) {
console.log(success);
});
It works just fine, I get sorted collection of ids:
[{"_id":"5371355045002fc820a09566","relevance":2},{"_id":"53712fc6c8fcd124216de6cd","relevance":2},{"_id":"5371302ebd4725dc1b908316","relevance":1}]
Now I would make another query and ask for documents with those ids - but here's my question: can it be done in one query?
Yes you can as is always the case when you are actually grouping on _id then that value is essentially equivalent to the whole document. So it is just a matter of storing the whole document under the _id field.
You have a couple of approaches to this depending on your MongoDB version, and in versions prior to MongoDB 2.6 you must specify the whole document structure in an initial $project stage ( which may optionally come after a $match which is generally a good idea ) in your pipeline before you actually manipulate the document:
var tags = ["yellow","blue"];
db.collection.aggregate([
{ "$project" : {
"_id": {
"_id": "$_id",
"title": "$title",
"tags": "$tags"
},
"tags": 1
}},
{ "$unwind": "$tags" },
{ "$match": { "tags": { "$in": tags } } },
{ "$group": { "_id": "$_id", "relevance": { "$sum":1 } } },
{ "$sort": { "relevance" : -1 } },
{ "$project": {
_id: "$_id._id",
"title": "$_id.title",
"tags": "$_id.tags"
}}
])
And of course, at the end of the pipeline you extract the information from the _id field in order to get back your original structure. That is optional, but you usually want that.
For MongoDB 2.6 and above there is a variable available to the pipeline stages that holds the structure of the document at that stage of the pipeline known as $$ROOT, and you can access this as a kind of shortcut to the above form like so:
var tags = ["yellow","blue"];
db.collection.aggregate([
{ "$project" : {
"_id": "$$ROOT",
"tags": 1
}},
{ "$unwind": "$tags" },
{ "$match": { "tags": { "$in": tags } } },
{ "$group": { "_id": "$_id", "relevance": { "$sum":1 } } },
{ "$sort": { "relevance" : -1 } },
{ "$project": {
"_id": "$_id._id",
"title": "$_id.title",
"tags": "$_id.tags"
}}
])
Keeping in mind that in order to restore the document you still need to specify all the required fields.
I would note that as you are "filtering" documents with your match condition in this case and as was mentioned earlier, you should actually be filtering with a $match statement at the "head" of your pipeline. This is the only place where the aggregation framework can select an index in order to optimize the query, and it also reduces the number of documents that do not meet your conditions ( presuming that not everything has the tags "yellow" or "blue" ) that go through the remaining pipeline stages:
db.collection.aggregate([
{ "$match": { "tags": { "$in": tags } } },
{ "$project" : {
"_id": {
"_id": "$_id",
"title": "$title",
"tags": "$tags"
},
"tags": 1
}},
{ "$unwind": "$tags" },
{ "$match": { "tags": { "$in": tags } } },
{ "$group": { "_id": "$_id", "relevance": { "$sum":1 } } },
{ "$sort": { "relevance" : -1 } },
{ "$project": {
_id: "$_id._id",
"title": "$_id.title",
"tags": "$_id.tags"
}}
])
At any rate that should be generally more effective than trying to do another query which of course would not maintain your sort order in the way that you have done.

Resources