My schema looks like this
{
_id: ObjectID,
gender: "MALE", // MALE or FEMALE
status: "ACTIVE", // ACTIVE or INACTIVE
method: "A" // A or B
}
The API needs to return a total document count, total count by gender, total count by status and total count by method. My current approach is making multiple aggregate calls and one count method.
As such,
const genderCursor = db.collection(Collection.Sample).aggregate([
{"$group": { _id: "$gender", count: { $sum: 1 }}}
]);
const statusCursor = db.collection(Collection.Sample).aggregate([
{"$group": { _id: "$status", count: { $sum: 1 }}}
]);
const methodCursor = db.collection(Collection.Sample).aggregate([
{"$group": { _id: "$method", count: { $sum: 1 }}}
]);
const total = await db.collection(Collection.Sample).count();
await genderCursor.forEach(x => gender.push({ name: x._id, count: x.count}))
await statusCursor.forEach(x => statuses.push({ name: x._id, count: x.count}))
await methodCursor.forEach(x => methods.push({ name: x._id, count: x.count}))
Results,
{
"total": 100,
"gender": [
{
"name": "MALE",
"count": 30
},
{
"name": "FEMALE",
"count": 70
},
],
"statuses": [
{
"name": "APPROVED",
"count": 81
},
{
"name": "CREATED",
"count": 19
},
],
"methods": [
{
"name": "A",
"count": 50
},
{
"name": "B",
"count": 50
},
],
}
Is there a better and cost effective method to achieve the same thing as above?
You should combine all the queries into a single Aggregation Query since it will reduce your network roundtrip times and load on MongoDB servers.
There are two methods in doing this.
Method-1: Using null Group
You can group with _id null and apply $cond Operator. This is much
faster than the second method, but you have to apply all the outcomes required in the $cond.
Choose whichever method works best for your use case.
db.collection.aggregate([
{
"$group": {
"_id": null,
"male": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$gender",
"MALE"
]
},
"then": 1,
"else": 0,
},
},
},
"female": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$gender",
"FEMALE"
]
},
"then": 1,
"else": 0,
},
}
},
"active": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$status",
"ACTIVE"
]
},
"then": 1,
"else": 0,
},
}
},
"inactive": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$status",
"INACTIVE"
]
},
"then": 1,
"else": 0,
}
},
},
"methodA": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$method",
"A"
]
},
"then": 1,
"else": 0,
},
}
},
"methodB": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$method",
"B"
]
},
"then": 1,
"else": 0,
},
},
}
}
},
])
Mongo Playground Sample Execution
Method-2: Using $facet
You can also use the $facet stage, but it requires more computation on MongoDB compared with $group, but you don't have to write all the outcomes manually.
db.collection.aggregate([
{
"$facet": {
"gender": [
{
"$group": {
"_id": "$gender",
"count": {
"$sum": 1
}
}
},
],
"status": [
{
"$group": {
"_id": "$status",
"count": {
"$sum": 1
}
}
},
],
"method": [
{
"$group": {
"_id": "$method",
"count": {
"$sum": 1
}
}
},
],
}
}
])
Mongo Playground Sample Execution
Related
i have a problem with aggregation framework in MongoDB (mongoose) this is the problem. i have the following database scheme.so what i want to do is count number of people who has access through Mobile only , Card only, or both. with out any order,
{
'_id': ObjectId,
'user_access_type': ['Mobile' , 'Card']
}
{
'_id': ObjectId,
'user_access_type': ['Card' , 'Mobile']
}
{
'_id': ObjectId,
'user_access_type': ['Mobile']
}
{
'_id': ObjectId,
'user_access_type': ['Card']
}
Now i am using this but it only groups by the order of the user_access_type array,
[ { "$group" : { "_id": {"User" : "$user_access_type"} , "count": {"$sum" : 1} }]
this is the output:
{
"_id": {
"User": [
"Card",
"Mobile"
]
},
"count": 1
},
{
"_id": {
"_id": "5f7dce2359aaf004985f98eb",
"User": [
"Mobile",
"Card"
]
},
"count": 1
},
{
"_id": {
"User": [
"Mobile"
]
},
"count": 1
},
{
"_id": {
"User": [
"Card"
]
},
"count": 1
},
vs what i want:
{
"_id": {
"User": [
"Card",
"Mobile" // we can say both
]
},
"count": 2 // does not depend on order
},
{
"_id": {
"User": [
"Mobile"
]
},
"count": 1
},
{
"_id": {
"User": [
"Card"
]
},
"count": 1
},
You can use other option as well using $function,
$function can allow to add javascript code, you can use sort() to sort the array
db.collection.aggregate([
{
$addFields: {
user_access_type: {
$function: {
body: function(user_access_type){
return user_access_type.sort();
},
args: ["$user_access_type"],
lang: "js"
}
}
}
},
{
$group: {
_id: "$user_access_type",
count: { $sum: 1 }
}
}
])
Second option,
If user_access_type array having always unique elements then you can use $setUnion operator on user_access_type array as self union, some how this will re-order array in same order,
db.collection.aggregate([
{
$addFields: {
user_access_type: {
$setUnion: "$user_access_type"
}
}
},
{
$group: {
_id: "$user_access_type",
count: { $sum: 1 }
}
}
])
Playground
I have been trying to come up with a query for these (simplified) documents below. My database consists of several data similar as these.
Since there is no nested querying in Mongo shell, is there another possible way to get what I want?
I am trying to get a list of Medicines that are owned by more than 30% of the pharmacies in my DB (regardless of quantity).
[
{
"Pharmacy": "a",
"Medicine": [
{
"MedName": "MedA",
"Quantity": 55
},
{
"MedName": "MedB",
"Quantity": 34
},
{
"MedName": "MedD",
"Quantity": 25
}
]
},
{
"Pharmacy": "b",
"Medicine": [
{
"MedName": "MedB",
"Quantity": 60
},
{
"MedName": "MedC",
"Quantity" : 34
}
]
}
]
How can I do this (if possible)?
Please check the answer here: https://mongoplayground.net/p/KVZ4Ee9Qhu-
var PharmaCount = db.collection.count();
db.collection.aggregate([
{
"$unwind": "$Medicine"
},
{
"$project": {
"medName": "$Medicine.MedName",
"Pharmacy": "$Pharmacy"
}
},
{
"$group": {
"_id": {
"medName": "$medName"
},
"count": {
"$sum": 1
}
}
},
{
"$project": {
"count": 1,
"percentage": {
"$concat": [
{
"$substr": [
{
"$multiply": [
{
"$divide": [
"$count",
{
"$literal": 2 // Your total number of pharmacies i.e PharmaCount
}
]
},
100
]
},
0,
3
]
},
"",
"%"
]
}
}
}
])
You should get results like:
[
{
"_id": {
"medName": "MedC"
},
"count": 1,
"percentage": "50%"
},
{
"_id": {
"medName": "MedD"
},
"count": 1,
"percentage": "50%"
},
{
"_id": {
"medName": "MedB"
},
"count": 2,
"percentage": "100%"
},
{
"_id": {
"medName": "MedA"
},
"count": 1,
"percentage": "50%"
}
]
Hope this helps.
You can not do this in a single query, but here is a way :
size = (db['01'].distinct("Pharmacy")).length;
minPN = Math.ceil(size*0.3);
db['01'].aggregate(
// Pipeline
[
// Stage 1
{
$unwind: {
path : "$Medicine",
}
},
// Stage 2
{
$group: {
_id:"$Medicine.MedName",
pharmacies:{$addToSet:"$Pharmacy"}
}
},
// Stage 3
{
$project: {
pharmacies:1,
pharmacies_count:{$size:"$pharmacies"}
}
},
{
$match:{pharmacies_count:{$gte:minPN}}
}
]
);
I have appointment collection in that i have status codes like upcoming, cancelled, completed. i want to write an api to get count of each status using mongoose or mongodb methods.
output should be like below
[{
group : "grp1",
appointments_completed :4
appointments_upcoming :5
appointments_cancelled : 7
}]
thanks in advance.
I hope it help you
db.getCollection('codelist').aggregate([
{
$group:{
_id:{status:"$status"},
count:{$sum:1}
}
}
])
The result will be
[{
"_id" : {
"status" : "cancelled"
},
"count" : 13.0
},
{
"_id" : {
"status" : "completed"
},
"count" : 20.0
}
]
I think you can process it with nodejs
Using Aggregation Pipeline $group we can get this count
db.collection_name.aggregate([
{ $group: {
_id:null,
appointments_completed: {$sum : "$appointments_completed" },
appointments_upcoming:{$sum :"$appointments_upcoming"},
appointments_cancelled:{$sum: "$appointments_cancelled"}
}
}
]);
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator and a $replaceRoot pipeline to get the desired result. You would need to run the following aggregate pipeline:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$concat": ["appointments_", { "$toLower": "$status" }] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"k": "$_id.status",
"v": "$count"
}
}
} },
{ "$addFields": {
"counts": {
"$setUnion": [
"$counts", [
{
"k": "group",
"v": "$_id"
}
]
]
}
} },
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$counts" }
} }
])
For older versions, a more generic approach though with a different output format would be to group twice and get the counts as an array of key value objects as in the following:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
} }
])
which spits out:
{
"_id": "grp1"
"counts":[
{ "status": "completed", "count": 4 },
{ "status": "upcoming", "count": 5 }
{ "status": "cancelled", "count": 7 }
]
}
If the status codes are fixed then the $cond operator in the $group pipeline step can be used effectively to evaluate the counts based on the status field value. Your overall aggregation pipeline can be constructed as follows to produce the result in the desired format:
db.appointments.aggregate([
{ "$group": {
"_id": <group_by_field>,
"appointments_completed": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "completed" ] }, 1, 0 ]
}
},
"appointments_upcoming": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "upcoming" ] }, 1, 0 ]
}
},
"appointments_cancelled": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "cancelled" ] }, 1, 0 ]
}
}
} }
])
I have following collection
[
{
"setting": "Volume",
"_id": ObjectId("5a934e000102030405000000"),
"counting": 1
},
{
"setting": "Brightness",
"_id": ObjectId("5a934e000102030405000001"),
"counting": 1
},
{
"setting": "Contrast",
"_id": ObjectId("5a934e000102030405000002"),
"counting": 1
},
{
"setting": "Contrast",
"_id": ObjectId("5a934e000102030405000003"),
"counting": 1
},
{
"setting": "Contrast",
"_id": ObjectId("5a934e000102030405000004"),
"counting": 0
},
{
"setting": "Sharpness",
"_id": ObjectId("5a934e000102030405000005"),
"counting": 1
},
{
"setting": "Sharpness",
"_id": ObjectId("5a934e000102030405000006"),
"counting": 1
},
{
"setting": "Language",
"_id": ObjectId("5a934e000102030405000007"),
"counting": 1
},
{
"setting": "Language",
"_id": ObjectId("5a934e000102030405000008"),
"counting": 0
}
]
Now I want to group by setting and want only top most two data in result rest in useless
So my output should be after sort by counting
[
{
"setting": "Contrast",
"counting": 2
},
{
"setting": "Sharpness",
"counting": 2
},
{
"setting": "Useless",
"counting": 3
}
]
If you can get away with it, then it's probably best to "stuff" the reduced results into a single document and then $slice the top two and $sum the rest:
Model.aggregate([
{ "$group": {
"_id": "$setting",
"counting": { "$sum": "$counting" }
}},
{ "$sort": { "counting": -1 } },
{ "$group": {
"_id": null,
"data": { "$push": "$$ROOT" }
}},
{ "$addFields": {
"data": {
"$let": {
"vars": { "top": { "$slice": ["$data", 0, 2 ] } },
"in": {
"$concatArrays": [
"$$top",
{ "$cond": {
"if": { "$gt": [{ "$size": "$data" }, 2] },
"then":
[{
"_id": "Useless",
"counting": {
"$sum": {
"$map": {
"input": {
"$filter": {
"input": "$data",
"cond": { "$not": { "$in": [ "$$this._id", "$$top._id" ] } }
}
},
"in": "$$this.counting"
}
}
}
}],
"else": []
}}
]
}
}
}
}},
{ "$unwind": "$data" },
{ "$replaceRoot": { "newRoot": "$data" } }
])
If it's potentially a very "large" result even reduced, then $limit use a $facet for the "rest":
Model.aggregate([
{ "$facet": {
"top": [
{ "$group": {
"_id": "$setting",
"counting": { "$sum": "$counting" }
}},
{ "$sort": { "counting": -1 } },
{ "$limit": 2 }
],
"rest": [
{ "$group": {
"_id": "$setting",
"counting": { "$sum": "$counting" }
}},
{ "$sort": { "counting": -1 } },
{ "$skip": 2 },
{ "$group": {
"_id": "Useless",
"counting": { "$sum": "$counting" }
}}
]
}},
{ "$project": {
"data": {
"$concatArrays": [
"$top","$rest"
]
}
}},
{ "$unwind": "$data" },
{ "$replaceRoot": { "newRoot": "$data" } }
])
Or even $lookup with MongoDB 3.6:
Model.aggregate([
{ "$group": {
"_id": "$setting",
"counting": { "$sum": "$counting" }
}},
{ "$sort": { "counting": -1 } },
{ "$limit": 2 },
{ "$group": {
"_id": null,
"top": { "$push": "$$ROOT" }
}},
{ "$lookup": {
"from": "colllection",
"let": { "settings": "$top._id" },
"pipeline": [
{ "$match": {
"$expr": {
"$not": { "$in": [ "$setting", "$$settings" ] }
}
}},
{ "$group": {
"_id": "Useless",
"counting": { "$sum": "$counting" }
}}
],
"as": "rest"
}},
{ "$project": {
"data": {
"$concatArrays": [ "$top", "$rest" ]
}
}},
{ "$unwind": "$data" },
{ "$replaceRoot": { "newRoot": "$data" } }
])
All pretty much the same really, and all return the same result:
{ "_id" : "Contrast", "counting" : 2 }
{ "_id" : "Sharpness", "counting" : 2 }
{ "_id" : "Useless", "counting" : 3 }
Optionally $project right at the end of each instead of the $replaceRoot if control over the field names is really important to you. Generally I just stick with the $group defaults
In the event that your MongoDB predates 3.4 and the resulting "Useless" remainder is actually too large to use any variant of the first approach, then simple Promise resolution is basically the answer, being one for the aggregate and the other for a basic count and simply do the math:
let [docs, count] = await Promise.all([
Model.aggregate([
{ "$group": {
"_id": "$setting",
"counting": { "$sum": "$counting" }
}},
{ "$sort": { "counting": -1 } },
{ "$limit": 2 },
]),
Model.count().exec()
]);
docs = [
...docs,
{
"_id": "Useless",
"counting": count - docs.reduce((o,e) => o + e.counting, 0)
}
];
Or without the async/await:
Promise.all([
Model.aggregate([
{ "$group": {
"_id": "$setting",
"counting": { "$sum": "$counting" }
}},
{ "$sort": { "counting": -1 } },
{ "$limit": 2 },
]),
Model.count().exec()
]).then(([docs, count]) => ([
...docs,
{
"_id": "Useless",
"counting": count - docs.reduce((o,e) => o + e.counting, 0)
}
]).then( result => /* do something */ )
Which is basically a variation on the age old "total pages" approach by simply running the separate query to count the collection items.
Running separate requests is generally the age old way of doing this and it often performs best. The rest of the solutions are essentially aimed at "aggregation tricks" since that was what you were asking for, and that's the answer you got by showing different variations on the same thing.
One variant put's all results into a single document ( where possible, due to the BSON limit of course ) and the others basically vary on the "age old" approach by running the query again in a different form. $facet in parallel and $lookup in series.
I'm trying to modify the second pipeline from this query (which I got from here nodejs + mongoose - query aggregate
db.todos.aggregate([
{
"$group": {
"_id": "$pic",
"open_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "open" ] }, 1, 0 ]
}
},
"progress_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "progress" ] }, 1, 0 ]
}
},
"done_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "done" ] }, 1, 0 ]
}
},
"archive_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "archive" ] }, 1, 0 ]
}
}
}
},
{
"$group": {
"_id": "$_id",
"detail": {
"$push": {
"name": "open",
"$todos": "$open_count"
},
"$push": {
"name": "progress",
"$todos": "$progress_count"
},
"$push": {
"name": "done",
"$todos": "$done_count"
},
"$push": {
"name": "archive",
"$todos": "$archive_count"
}
}
}
},
{
"$project": {
"_id": 0, "pic": "$_id", "detail": 1
}
}
])
I want this kind of JSON structure so I can put it on google chart, which the format is like this:
[
{
"pic": "A",
"detail": [
{
"name": "open",
"todos": 2
},
{
"name": "progress",
"todos": 1
},
{
"name": "done",
"todos": 8
},
{
"name": "archive",
"todos": 20
}
],
"pic": "B",
"detail": [
{
"name": "open",
"todos": 5
},
{
"name": "progress",
"todos": 2
},
{
"name": "done",
"todos": 5
},
{
"name": "archive",
"todos": 10
}
],
}
]
But I got this error
exception: FieldPath 'progress' doesn't start with $
Try with this aggregation query:
db.todos.aggregate([
{
"$group": {
"_id": {
"pic": "$pic",
"name": "$status"
},
"todos": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"pic": "$_id.pic",
"detail": {
"name": "$_id.name",
"todos": "$todos"
}
}
},
{
"$group": {
"_id": "$pic",
"detail": {
"$push": "$detail"
}
}
},
{
"$project": {
"_id": 0, "pic": "$_id", "detail": 1
}
}])