I am writing a small aggregation in mongoose to filter values from db and return them in order of frequency they occur.
For example:
Say multiple documents have partners array as a field, which is an array of objects, each having two values "partner_id" and "passed_tests". I want to return all the unique partners from all the documents sorted according to their frequency in the collection in decreasing order.
Here is a sample document:
{
"location": "eindhoven",
"partners": [
{
"partner_id": 3,
"passed_tests": true
},
{
"partner_id": 2,
"passed_tests": false
}
],
"_id": "3136323031333066306d4438",
"uid": "d95f2e446c052514c097e6c925408774",
"__v": 0,
"is_approved": true
}
My code is as follows:
function returnAll(callback) {
TestService.aggregate([
{
$match: { "is_approved": true }
},
{
$unwind: "$partners"
}, {
$group: {
"_id": {
partner: { $objectToArray: "$partners" },
partner_id: { $arrayElemAt: ["$partner", 0] }
},
"count": { "$sum": 1 }
}
},
{
$sort: { "count": -1 }
},
{
$project: {
"partner_values": {
$map: {
input: "$_id.partner",
as: "el",
in: {
$cond: {
if: {
$or: [{ $eq: ["$$el.v", true] },
{ $eq: ["$$el.v", false] }]
}, then: {
"passed_tests": "$$el.v"
}, else: {
"id": "$$el.v"
}
}
}
}
},
"count": "$count,
"_id": 0
}
},
], function (error, data) {
if (error) {
logger.error(error);
callback(null);
} else {
callback(data);
}
});
}
Which returns this JSON to my node app:
"data": [
{
"partner_values": [
{
"id": 2
},
{
"passed_tests": false
}
],
"count": 3
},
{
"partner_values": [
{
"id": 6
},
{
"passed_tests": true
}
],
"count": 1
},
{
"partner_values": [
{
"id": 3
},
{
"passed_tests": true
}
],
"count": 1
},
{
"partner_values": [
{
"id": 1
},
{
"passed_tests": true
}
],
"count": 1
}
]
If I don't use the projection pipeline in my aggregation, I get this:
"data": [
{
"_id": {
"partner": [
{
"k": "partner_id",
"v": 2
},
{
"k": "passed_tests",
"v": false
}
],
"partner_id": null
},
"count": 3
},
{
"_id": {
"partner": [
{
"k": "partner_id",
"v": 6
},
{
"k": "passed_tests",
"v": true
}
],
"partner_id": null
},
"count": 1
},
{
"_id": {
"partner": [
{
"k": "partner_id",
"v": 3
},
{
"k": "passed_tests",
"v": true
}
],
"partner_id": null
},
"count": 1
},
{
"_id": {
"partner": [
{
"k": "partner_id",
"v": 1
},
{
"k": "passed_tests",
"v": true
}
],
"partner_id": null
},
"count": 1
}
]
which is quite understandable as I am introducing "_id" and other extra fields to look into the array and find the value. However, the output I want is:
"data": {
"partners": [{
"id": 2,
"passed_tests": false,
"count": 3
}, {
"id": 6,
"passed_tests": false,
"count": 1
}, {
"id": 3,
"passed_tests": false,
"count": 1
}, {
"id": 1,
"passed_tests": false,
"count": 1
}]
}
Can I get some help on this, please? Thanks.
You can use below aggregation query.
TestService.aggregate([
{"$match":{"is_approved":true}},
{"$unwind":"$partners"},
{"$group":{
"_id":{"partner_id":"$partners.partner_id","passed_tests":"$partners.passed_tests"},
"count":{"$sum":1}
}},
{"$sort":{"count":-1}},
{"$group":{
"_id":null,
"partners":{"$push":{"id":"$_id.partner_id","passed_tests":"$_id.passed_tests","count":"$count"}}
}},
{"$project":{"partners":1}}
])
Related
I am fetching for category and subcategory i am using the following pipeline
Category:
{
"$match": {
"category": {
"$in": ["list of my categories"]
}
}
},
{
"$group": {
"category": "$category",
"count": {
"$sum": 1
}
}
},
This gives me:
{category: category name,
count: totalcount}
Subcategory pipeline
{
"$match": {
"category": {
"$in": ["list of my categories"]
}
}
},
{
"$group": {
"_id": { subCategory: "$subCategory", category: "$category" },
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.category",
"counts": {
"$push": {
"k": "$_id.subCategory",
"v": "$count"
},
},
"count":{
$sum: "$counts"
}
}
},
{
"$project": {
"counts": { "$arrayToObject": "$counts" },
}
},
This gives me
category: name {
subcategory1 : total count,
...
}
How do I join the two to get a single call to return something like
{category: categoryname,
count: totalcountforcategory,
subcategories: {
subcategory: totalcount,
subcategory2:totalcount}
Update
Here is my sample JSON
{
"category": "Category one",
"name": "Sample name",
"subCategory": "subCategory one",
},
{
"category": "Category one",
"name": "Sample name",
"subCategory": "subCategory two",
},
{
"category": "Category two",
"name": "Sample name",
"subCategory": "subCategory one",
},
{
"category": "Category one",
"name": "Sample name",
"subCategory": "subCategory two",
}
Expected OutPut
{
"Category one": 3,
subCategories: {
"subCategoryone": 2,
"subCategorytwo":3,
}
}
{
"Category two": 5,
subCategories: {
"subCategoryone": 2,
"subCategorytwo":3,
}
}
db.collection.aggregate([
{
"$group": {
"_id": {
cat: "$category",
sub: "$subCategory"
},
"count": {
"$sum": 1
},
"subCategory": {
$push: "$$ROOT"
}
}
},
{
"$group": {
"_id": "$_id.cat",
"counts": {
"$push": {
"k": "$_id.sub",
"v": "$count"
},
},
"count": {
$sum: "$counts"
}
}
},
{
"$project": {
"counts": {
"$arrayToObject": "$counts"
},
}
},
])
Playground
I used $facet to join pipelines i don't know if it is best practice or not
const pipeline = [
{
"$facet": {
"GroupAll":
[{ $project: { name: 1, category: 1,subCategory:1,votes:1 } }]
,
"GroupTotal": [
{
"$match": {
"nominationYear": {
"$eq": "2022"
}
}
},
{
$count: "total"
}
],
"GroupCategories": [
{
"$match": {
"category": {
"$in": ["Categories"]
}
}
},
{
"$group": {
"_id": { category: "$category" },
"count": {
"$sum": 1
}
}
},
],
"GroupSubCategories": [
{
"$match": {
"category": {
"$in": ["Categories"]
}
}
},
{
"$group": {
"_id": { subCategory: "$subCategory", category: "$category" },
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.category",
"counts": {
"$push": {
"k": "$_id.subCategory",
"v": "$count"
},
},
"count": {
$sum: "$counts"
}
}
},
{
"$project": {
"counts": { "$arrayToObject": "$counts" },
}
},
]
}
}
]
And here is my output
{
"nominations": [
{
"GroupAll": [""],
"GroupTotal": [""],
"GroupCategories": [""],
"GroupSubCategories":[""]
}
],
"total": ""
}
I achieved it using $facet to join multiple pipelines
My schema looks like this
{
_id: ObjectID,
gender: "MALE", // MALE or FEMALE
status: "ACTIVE", // ACTIVE or INACTIVE
method: "A" // A or B
}
The API needs to return a total document count, total count by gender, total count by status and total count by method. My current approach is making multiple aggregate calls and one count method.
As such,
const genderCursor = db.collection(Collection.Sample).aggregate([
{"$group": { _id: "$gender", count: { $sum: 1 }}}
]);
const statusCursor = db.collection(Collection.Sample).aggregate([
{"$group": { _id: "$status", count: { $sum: 1 }}}
]);
const methodCursor = db.collection(Collection.Sample).aggregate([
{"$group": { _id: "$method", count: { $sum: 1 }}}
]);
const total = await db.collection(Collection.Sample).count();
await genderCursor.forEach(x => gender.push({ name: x._id, count: x.count}))
await statusCursor.forEach(x => statuses.push({ name: x._id, count: x.count}))
await methodCursor.forEach(x => methods.push({ name: x._id, count: x.count}))
Results,
{
"total": 100,
"gender": [
{
"name": "MALE",
"count": 30
},
{
"name": "FEMALE",
"count": 70
},
],
"statuses": [
{
"name": "APPROVED",
"count": 81
},
{
"name": "CREATED",
"count": 19
},
],
"methods": [
{
"name": "A",
"count": 50
},
{
"name": "B",
"count": 50
},
],
}
Is there a better and cost effective method to achieve the same thing as above?
You should combine all the queries into a single Aggregation Query since it will reduce your network roundtrip times and load on MongoDB servers.
There are two methods in doing this.
Method-1: Using null Group
You can group with _id null and apply $cond Operator. This is much
faster than the second method, but you have to apply all the outcomes required in the $cond.
Choose whichever method works best for your use case.
db.collection.aggregate([
{
"$group": {
"_id": null,
"male": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$gender",
"MALE"
]
},
"then": 1,
"else": 0,
},
},
},
"female": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$gender",
"FEMALE"
]
},
"then": 1,
"else": 0,
},
}
},
"active": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$status",
"ACTIVE"
]
},
"then": 1,
"else": 0,
},
}
},
"inactive": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$status",
"INACTIVE"
]
},
"then": 1,
"else": 0,
}
},
},
"methodA": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$method",
"A"
]
},
"then": 1,
"else": 0,
},
}
},
"methodB": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$method",
"B"
]
},
"then": 1,
"else": 0,
},
},
}
}
},
])
Mongo Playground Sample Execution
Method-2: Using $facet
You can also use the $facet stage, but it requires more computation on MongoDB compared with $group, but you don't have to write all the outcomes manually.
db.collection.aggregate([
{
"$facet": {
"gender": [
{
"$group": {
"_id": "$gender",
"count": {
"$sum": 1
}
}
},
],
"status": [
{
"$group": {
"_id": "$status",
"count": {
"$sum": 1
}
}
},
],
"method": [
{
"$group": {
"_id": "$method",
"count": {
"$sum": 1
}
}
},
],
}
}
])
Mongo Playground Sample Execution
i have a problem with aggregation framework in MongoDB (mongoose) this is the problem. i have the following database scheme.so what i want to do is count number of people who has access through Mobile only , Card only, or both. with out any order,
{
'_id': ObjectId,
'user_access_type': ['Mobile' , 'Card']
}
{
'_id': ObjectId,
'user_access_type': ['Card' , 'Mobile']
}
{
'_id': ObjectId,
'user_access_type': ['Mobile']
}
{
'_id': ObjectId,
'user_access_type': ['Card']
}
Now i am using this but it only groups by the order of the user_access_type array,
[ { "$group" : { "_id": {"User" : "$user_access_type"} , "count": {"$sum" : 1} }]
this is the output:
{
"_id": {
"User": [
"Card",
"Mobile"
]
},
"count": 1
},
{
"_id": {
"_id": "5f7dce2359aaf004985f98eb",
"User": [
"Mobile",
"Card"
]
},
"count": 1
},
{
"_id": {
"User": [
"Mobile"
]
},
"count": 1
},
{
"_id": {
"User": [
"Card"
]
},
"count": 1
},
vs what i want:
{
"_id": {
"User": [
"Card",
"Mobile" // we can say both
]
},
"count": 2 // does not depend on order
},
{
"_id": {
"User": [
"Mobile"
]
},
"count": 1
},
{
"_id": {
"User": [
"Card"
]
},
"count": 1
},
You can use other option as well using $function,
$function can allow to add javascript code, you can use sort() to sort the array
db.collection.aggregate([
{
$addFields: {
user_access_type: {
$function: {
body: function(user_access_type){
return user_access_type.sort();
},
args: ["$user_access_type"],
lang: "js"
}
}
}
},
{
$group: {
_id: "$user_access_type",
count: { $sum: 1 }
}
}
])
Second option,
If user_access_type array having always unique elements then you can use $setUnion operator on user_access_type array as self union, some how this will re-order array in same order,
db.collection.aggregate([
{
$addFields: {
user_access_type: {
$setUnion: "$user_access_type"
}
}
},
{
$group: {
_id: "$user_access_type",
count: { $sum: 1 }
}
}
])
Playground
I have been trying to come up with a query for these (simplified) documents below. My database consists of several data similar as these.
Since there is no nested querying in Mongo shell, is there another possible way to get what I want?
I am trying to get a list of Medicines that are owned by more than 30% of the pharmacies in my DB (regardless of quantity).
[
{
"Pharmacy": "a",
"Medicine": [
{
"MedName": "MedA",
"Quantity": 55
},
{
"MedName": "MedB",
"Quantity": 34
},
{
"MedName": "MedD",
"Quantity": 25
}
]
},
{
"Pharmacy": "b",
"Medicine": [
{
"MedName": "MedB",
"Quantity": 60
},
{
"MedName": "MedC",
"Quantity" : 34
}
]
}
]
How can I do this (if possible)?
Please check the answer here: https://mongoplayground.net/p/KVZ4Ee9Qhu-
var PharmaCount = db.collection.count();
db.collection.aggregate([
{
"$unwind": "$Medicine"
},
{
"$project": {
"medName": "$Medicine.MedName",
"Pharmacy": "$Pharmacy"
}
},
{
"$group": {
"_id": {
"medName": "$medName"
},
"count": {
"$sum": 1
}
}
},
{
"$project": {
"count": 1,
"percentage": {
"$concat": [
{
"$substr": [
{
"$multiply": [
{
"$divide": [
"$count",
{
"$literal": 2 // Your total number of pharmacies i.e PharmaCount
}
]
},
100
]
},
0,
3
]
},
"",
"%"
]
}
}
}
])
You should get results like:
[
{
"_id": {
"medName": "MedC"
},
"count": 1,
"percentage": "50%"
},
{
"_id": {
"medName": "MedD"
},
"count": 1,
"percentage": "50%"
},
{
"_id": {
"medName": "MedB"
},
"count": 2,
"percentage": "100%"
},
{
"_id": {
"medName": "MedA"
},
"count": 1,
"percentage": "50%"
}
]
Hope this helps.
You can not do this in a single query, but here is a way :
size = (db['01'].distinct("Pharmacy")).length;
minPN = Math.ceil(size*0.3);
db['01'].aggregate(
// Pipeline
[
// Stage 1
{
$unwind: {
path : "$Medicine",
}
},
// Stage 2
{
$group: {
_id:"$Medicine.MedName",
pharmacies:{$addToSet:"$Pharmacy"}
}
},
// Stage 3
{
$project: {
pharmacies:1,
pharmacies_count:{$size:"$pharmacies"}
}
},
{
$match:{pharmacies_count:{$gte:minPN}}
}
]
);
I'm trying to modify the second pipeline from this query (which I got from here nodejs + mongoose - query aggregate
db.todos.aggregate([
{
"$group": {
"_id": "$pic",
"open_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "open" ] }, 1, 0 ]
}
},
"progress_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "progress" ] }, 1, 0 ]
}
},
"done_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "done" ] }, 1, 0 ]
}
},
"archive_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "archive" ] }, 1, 0 ]
}
}
}
},
{
"$group": {
"_id": "$_id",
"detail": {
"$push": {
"name": "open",
"$todos": "$open_count"
},
"$push": {
"name": "progress",
"$todos": "$progress_count"
},
"$push": {
"name": "done",
"$todos": "$done_count"
},
"$push": {
"name": "archive",
"$todos": "$archive_count"
}
}
}
},
{
"$project": {
"_id": 0, "pic": "$_id", "detail": 1
}
}
])
I want this kind of JSON structure so I can put it on google chart, which the format is like this:
[
{
"pic": "A",
"detail": [
{
"name": "open",
"todos": 2
},
{
"name": "progress",
"todos": 1
},
{
"name": "done",
"todos": 8
},
{
"name": "archive",
"todos": 20
}
],
"pic": "B",
"detail": [
{
"name": "open",
"todos": 5
},
{
"name": "progress",
"todos": 2
},
{
"name": "done",
"todos": 5
},
{
"name": "archive",
"todos": 10
}
],
}
]
But I got this error
exception: FieldPath 'progress' doesn't start with $
Try with this aggregation query:
db.todos.aggregate([
{
"$group": {
"_id": {
"pic": "$pic",
"name": "$status"
},
"todos": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"pic": "$_id.pic",
"detail": {
"name": "$_id.name",
"todos": "$todos"
}
}
},
{
"$group": {
"_id": "$pic",
"detail": {
"$push": "$detail"
}
}
},
{
"$project": {
"_id": 0, "pic": "$_id", "detail": 1
}
}])