Multiple Aggregate functions in one request - node.js

I have a data set which is the following:
{
item: '123',
array: [{
array2:[{
array3: [{
property1: 1234
}]
}],
anotherArray: [{
property2: 1234
}]
}]
}
Im trying to aggregate sum of property2 and property one in the same request.
here is my current aggregate function:
Item.aggregate([
{$match: {itemId: 1234}},
{$unwind: "$array"},
{$unwind: "$array.array2"},
{$unwind: "$array.array2.array3"},
{$unwind: "$array.anotherArray"},
{$group: {
_id: 0,
property1: {$sum: '$array.array2.array3.property1'},
property2: {$sum: '$array.anotherArray.property2'}
}},
{$project: {
_id: 0,
property1: "$property1",
property2: "$property2",
}},
], function (err, aggregate) {
callback(null, aggregate);
});
The problem is that the aggregates results of property one and two are always double the value they should be.
I guess the problem is with the $unwind of "anotherArray", because when i remove it I get the correct aggregation value.
Is it possible to make aggregation on multiple arrays with one aggregation function?
Currently im just making 2 different requests to the db with async parallel, but I want to make more complex aggregation in the future without making extra db calls.

As noted the structure is not a good one and should probably be reviewed as to it's intent. It's really not clear why it is so structured or if anything else in the array's in either case could mess up the results here.
But there is a general approach when you have multiple arrays in a document, that is basically to treat each array separately and get your "totals" per document first. Then sum the totals from all documents afterwards:
Item.aggregate([
// Unwind only 1 inner array first
{ "$unwind": "$array" },
{ "$unwind": "$array.array2" },
{ "$unwind": "$array.array2.array3" },
// Group back the sum of the element and the first of the other array
// and only per document
{ "$group": {
"_id": "$_id",
"property1": { "$sum": "$array.array2.array3.property1" },
"anotherArray": { "$first": "$array.anotherArray" }
}},
// Unwind the other array
{ "$unwind": "$anotherArray" },
// Group back the total and the first summed per document
{ "$group": {
"_id": "$_id",
"property1": { "$first": "$property1" },
"property2": { "$sum": "$anotherArray.property2" }
}},
// Total all documents and output
{ "$group": {
"_id": null,
"property1": { "$sum": "$property1" },
"property2": { "$sum": "$property2" },
}},
{ "$project": {
"_id": 0,
"property1": 1,
"property2": 1
}}
],callback);
So by containing to only one array at a time and getting the totals only within the original document first you avoid the duplication problems of creating multiple copies for each unwound item of the other array. With discrete document totals it is then simple to get the overall totals from your required selection.

Finally I've found a solution for my use case with MongoDB $setUnion.
Here is the code i used for my question:
Item.aggregate([
{$match: { itemID: '1234'}},
{$unwind: "$array1"},
{$unwind: "$array1.array2"},
{$project: {
_id: 0,
combined: {$setUnion: ['$array1.anotherArray', '$array1.array2.array3']},
}},
{$unwind: "$combined"},
{$group: {
_id: 0,
property1: {$sum: '$combined.property1'},
property2: {$sum: '$combined.property2'}
}},
], function (err, aggregate) {
cb(aggregate);
});

Related

Sum of subdocuments in Mongoose

Currently I have this schema.
var cartSchema = new Schema({
userPurchased: {type: Schema.Types.ObjectId, ref: 'users'},
products: [
{
product: {type: Schema.Types.ObjectId, ref: 'products'},
size: {type: String, required: true},
quantity: {type: Number, required: true},
subTotal: {type: Number, required: true}
}
],
totalPrice: {type: Number, default: 0, required: true}
});
Example of db record
{
"_id": {
"$oid": "586f4be94d3e481378469a08"
},
"userPurchased": {
"$oid": "586dca1f5f4a7810fc890f97"
},
"totalPrice": 0,
"products": [
{
"product": {
"$oid": "58466e8e734d1d2b0ceeae00"
},
"size": "m",
"quantity": 5,
"subTotal": 1995,
"_id": {
"$oid": "586f4be94d3e481378469a09"
}
},
{
"subTotal": 1197,
"quantity": 3,
"size": "m",
"product": {
"$oid": "58466e8e734d1d2b0ceeae01"
},
"_id": {
"$oid": "586f4ef64d3e481378469a0a"
}
}
],
"__v": 0
}
Is there any way to sum all the subTotal and put it in the total price field? Right now I am thinking about aggregate function but I doubt it will be the right approach in here. I guess I need an update query and sum method at the same time. Can anyone help me in here?
Using the aggregate() function, you can run the following pipeline which uses the $sum operator to get the desired results:
const results = await Cart.aggregate([
{ "$addFields": {
"totalPrice": {
"$sum": "$products.subTotal"
}
} },
]);
console.log(JSON.stringify(results, null, 4));
and the corresponding update operation follows:
db.carts.updateMany(
{ },
[
{ "$set": {
"totalPrice": {
"$sum": "$products.subTotal"
}
} },
]
)
Or if using MongoDB 3.2 and earlier versions, where $sum is available in the $group stage only, you can do
const pipeline = [
{ "$unwind": "$products" },
{
"$group": {
"_id": "$_id",
"products": { "$push": "$products" },
"userPurchased": { "$first": "$userPurchased" },
"totalPrice": { "$sum": "$products.subTotal" }
}
}
]
Cart.aggregate(pipeline)
.exec(function(err, results){
if (err) throw err;
console.log(JSON.stringify(results, null, 4));
})
In the above pipeline, the first step is the $unwind operator
{ "$unwind": "$products" }
which comes in quite handy when the data is stored as an array. When the unwind operator is applied on a list data field, it will generate a new record for each and every element of the list data field on which unwind is applied. It basically flattens the data.
This is a necessary operation for the next pipeline stage, the $group step where you group the flattened documents by the _id field, thus effectively regrouping the denormalised documents back to their original schema.
The $group pipeline operator is similar to the SQL's GROUP BY clause. In SQL, you can't use GROUP BY unless you use any of the aggregation functions. The same way, you have to use an aggregation function in MongoDB (called accumulators) as well. You can read more about the accumulators here.
In this $group operation, the logic to calculate the totalPrice and returning the original fields is through the accumulators. You get thetotalPrice by summing up each individual subTotal values per group with $sum as:
"totalPrice": { "$sum": "$products.subTotal }
The other expression
"userPurchased": { "$first": "$userPurchased" },
will return a userPurchased value from the first document for each group using $first. Thus effectively rebuilding the original document schema before the $unwind
One thing to note here is when executing a pipeline, MongoDB pipes operators into each other. "Pipe" here takes the Linux meaning: the output of an operator becomes the input of the following operator. The result of each operator is a new collection of documents. So Mongo executes the above pipeline as follows:
collection | $unwind | $group => result
As a side note, to help with understanding the pipeline or to debug it should you get unexpected results, run the aggregation with just the first pipeline operator. For example, run the aggregation in mongo shell as:
db.cart.aggregate([
{ "$unwind": "$products" }
])
Check the result to see if the products array is deconstructed properly. If that gives the expected result, add the next:
db.cart.aggregate([
{ "$unwind": "$products" },
{
"$group": {
"_id": "$_id",
"products": { "$push": "$products" },
"userPurchased": { "$first": "$userPurchased" },
"totalPrice": { "$sum": "$products.subTotal" }
}
}
])
Repeat the steps till you get to the final pipeline step.
If you want to update the field then you can add the $out pipeline stage as the last step. This will write the resulting documents of the aggregation pipeline to the same collection, thus technically updating the collection.
var pipeline = [
{ "$unwind": "$products" },
{
"$group": {
"_id": "$_id",
"products": { "$push": "$products" },
"userPurchased": { "$first": "$userPurchased" },
"totalPrice": { "$sum": "$products.subTotal" }
}
},
{ "$out": "cart" } // write the results to the same underlying mongo collection
]
UPDATE
To do both the update and query, you could then issue a find() call in the aggregate callback to get the updated json i.e.
Cart.aggregate(pipeline)
.exec(function(err, results){
if (err) throw err;
Cart.find().exec(function(err, docs){
if (err) return handleError(err);
console.log(JSON.stringify(docs, null, 4));
})
})
Using Promises, you could do this alternatively as
Cart.aggregate(pipeline).exec().then(function(res)
return Cart.find().exec();
).then(function(docs){
console.log(JSON.stringify(docs, null, 4));
});
I can't really say whether this approach is better than the aggregation, but in case you want to do it with virtuals:
cartSchema.virtual('totalPrice').get(function () {
return this.products.map(p => p.subTotal).reduce((a, b) => a + b);
});
But care:
If you use toJSON() or toObject() (or use JSON.stringify() on a mongoose document) mongoose will not include virtuals by default. Pass { virtuals: true } to either toObject() or toJSON()

Mongoose error on aggregate group

i have this model:
var Chat = new Schema({
from: String,
to: String,
satopId: String,
createdAt: Date
});
var Chat = mongoose.model('Chat', Chat);
I want do a query to do a query that returns the max created at grouping by to and from field. I tried with:
Chat.aggregate([
{
$group: {
_id: '$to',
from: '$from',
createdAt: {
$max: '$createdAt'
}
}
},
{
$project: {
_id: 1,
createdAt: 1,
from: 1,
to: 1
}
}
], function(err, docs){
})
But this generates this error:
the group aggregate field 'from' must be defined as an expression
inside an object
I don't understand what does it mean. How can i solve it?
Thanks
Anything "outside" if the _id expression in a $group statement requires a "grouping operator" of some sort.
Assuming you are happy with the idea of the "sole" grouping key to be the "to" field in your documents then you probably want something like $first:
Chat.aggregate([
{ "$group": {
"_id": "$to",
"from": { "$first": "$from" },
"createdAt": { "$max": "$createdAt" }
}},
function (err, docs) {
// do something here
}
])
Otherwise if you want "distinct" values on "both" fields then they both belong as a composite value of the grouping key:
Chat.aggregate([
{ "$group": {
"_id": {
"to": "$to",
"from": "$from"
},
"createdAt": { "$max": "$createdAt" }
}},
function (err, docs) {
// do something here
}
])
That's basically how it works. Either it's part of the key or something that needs a grouping operator just like with SQL.
Also note that your $project assertion is not really required as $group already does what you are asking there.

Finding only an item in an array of arrays by value with Mongoose

Here is an example of my Schema with some data:
client {
menus: [{
sections: [{
items: [{
slug: 'some-thing'
}]
}]
}]
}
And I am trying to select it like this:
Schema.findOne({ client._id: id, 'menus.sections.items.slug': 'some-thing' }).select('menus.sections.items.$').exec(function(error, docs){
console.log(docs.menus[0].sections[0].items[0].slug);
});
Of course "docs.menus[0].sections[0].items[0].slug" only works if there is only one thing in each array. How can I make this work if there is multiple items in each array without having to loop through everything to find it?
If you need more details let me know.
The aggregation framework is good for finding things in deeply nested arrays where the positional operator will fail you:
Model.aggregate(
[
// Match the "documents" that meet your criteria
{ "$match": {
"menus.sections.items.slug": "some-thing"
}},
// Unwind the arrays to de-normalize as documents
{ "$unwind": "$menus" },
{ "$unwind": "$menus.sections" },
{ "$unwind": "$menus.sections.items" }
// Match only the element(s) that meet the criteria
{ "$match": {
"menus.sections.items.slug": "some-thing"
}}
// Optionally group everything back to the nested array
// One step at a time
{ "$group": {
"_id": "$_id",
"items": { "$push": "$menus.sections.items.slug" }
}},
{ "$group": {
"_id": "$_id",
"sections": {
"$push": { "items": "$items" }
}
}},
{ "$group": {
"_id": "$_id",
"menus": {
"$push": { "sections": "$sections" }
}
}},
],
function(err,results) {
}
)
Also see the other aggregation operators such as $first for keeping other fields in your document when using $group.

Non strict behavior of $nin in mongodb

Is there a non strict $nin version in mongodb? for example
Let's say that we have a model called User and a Model called task
var TaskSchema = new Schema({
user_array: [{user: Schema.ObjectId}],
});
A quick sample would be this
task1 : [user1, user2, user4, user7]
task2 : [user2, user 5, user7]
if I have a list of user
[user1, user7]
I want to select the task that has the least overlapping in the user_array, in this case task2, I know $nin strictly returns the task that contains neither user1 or user7, but I would like to know if there are operation where $nin is non strict.
Alternatively, I could have write a DP function to this for me
Any advice would be appreciated
Thanks
Well in MongoDB version 2.6 and upwards you have the $setIntersection and $size operators available so you can perform an .aggregate() statement like this:
db.collection.aggregate([
{ "$project": {
"user_array": 1,
"size": { "$size": {
"$setIntersection": [ "$user_array", [ "user1", "user7" ] ]
}}
}},
{ "$match": { "size": { "$gt": 1 } },
{ "$sort": { "size": 1 }},
{ "$group": {
"_id": null
"user_array": { "$first": "$user_array" }
}}
])
So those operators help to reduce the steps required to find the least matching document.
Basically the $setIntersection returns the matching elements in the array to the one it is being compared with. The $size operator returns the "size" of that resulting array. So later you filter out with $match any documents where neither of the items in the matching list were found in the array.
Finally you just sort and return the item with the "least" matches.
But it can still be done in earlier versions with some more steps. So basically your "non-strict" implementation becomes an $or condition. But of course you still need to count the matches:
db.collection.aggregate([
{ "$project": {
"_id": {
"_id": "$_id",
"user_array": "$user_array"
},
"user_array": 1
}}
{ "$unwind": "$user_array" },
{ "$match": {
"$or": [
{ "user_array": "user1" },
{ "user_array": "user7" }
]
}},
{ "$group": {
"_id": "$_id",
"size": { "$sum": 1 }
}},
{ "$sort": { "size": 1 } },
{ "$group": {
"_id": null,
"user_array": { "$first": "$_id.user_array" }
}}
])
And that would do the same thing.

Count(field) on Mongo

I have a collection of songs and its metadata with the following structure:
[{
title:"title",
artist:"artist",
album:"album,
...
},...
Now I want to get a list of every artist with the number of songs and the number of albums it has using Node.js. So far, using the aggregation framework, I've been able to get an array of objects with each artist, its number of songs and an array with the album titles (instead of just the count), using the following pipeline:
collection.aggregate([
{ $project:{
artist:1,
album:1
}},
{ $group: {
_id: "$artist",
songs:{$sum: 1},
albums:{$addToSet:"$album"}
}},
{ $sort: { artist: 1 } }
]
If I replace $addToSet with $sum, I get albums:0 in every artist, because it expects numbers and not strings to sum.
I just can't get around it!
You need to add a couple of steps to your pipeline - the array of albums needs to be unwound and then counted. Here is what it would look like:
collection.aggregate([
{ $project:{
artist:1,
album:1
}},
{ $group: {
_id: "$artist",
songs:{$sum: 1},
albums:{$addToSet:"$album"}
}},
{ $unwind: "$albums"},
{ $group: {
_id: "$_id",
songs:{$first: 1},
albums:{$sum: 1}
}},
{ $sort: { artist: 1 } }
]
)

Resources