Aggregate to get average from document and of array elements - node.js

I've got this array of these objects:
item1 = {
name:'item',
val:1,
list:[
{type:'a',value:1},
{type:'b',value:1},
{type:'c',value:1}
]
};
item2 = {
name:'item',
val:5,
list:[
{type:'a',value:3},
{type:'b',value:99},
{type:'c',value:1}
]
};
They all have the same array, same types 'a', 'b' & 'c', but different values.
How can I get the average value of type 'a', 'b' & 'c'?
How can I get the average value of all items?
I'm expecting
itemAvg = {
name:'item',
val:3,
list:[
{type:'a',value:2},
{type:'b',value:50},
{type:'c',value:1}
]
};
I thought grouping first the val by name and pushing list.
Then unwinding list.
Then grouping the list by types.
But this doesn't work
model.aggregate([
{ $match : <condition> },
{ $group : {
_id:{name:'$name'},
ValAvg:{$avg:'$val'}
List:{$push:'list'}
}},
{ $unwind:'$List'},
{ $group:{
_id:{type:'$List.type',
ValueAvg:{$avg:'$List.value'}
}}
])
I was hoping the last group after the unwind would group by tune type and calculate the average of value for each distinct type... but no... I get ValueAvg=0
Thanks

You need two $unwind stages since you pushed arrays inside an array and then follow up with two $group stages:
model.aggregate([
{ "$match": { <condition> }},
{ "$group": {
"_id": "name",
"val": { "$avg": "$val" },
"list": { "$push": "$list" }
}},
{ "$unwind": "$list" },
{ "$unwind": "$list" },
{ "$group": {
"_id": { "name": "$_id", "type": "$list.type" },
"val": { "$avg": "$val" },
"valAvg": { "$avg": "$list.value" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.name",
"val": { "$avg": "$val" },
"list": { "$push": {
"type": "$_id.type",
"value": "$valAvg"
}}
}}
])
So by grouping at the "type" level first the obtained results can get the averages across the elements, then the original form is reconstructed. Note the $sort to retain the order of elements, otherwise they will be reversed:
{
"_id" : "name",
"val" : 3,
"list" : [
{
"type" : "a",
"value" : 2
},
{
"type" : "b",
"value" : 50
},
{
"type" : "c",
"value" : 1
}
]
}
If you are tempted to $unwind first to avoid putting arrays inside arrays then don't do that. The averages you seek outside the array will be affected by the number of elements in the array when unwound. So arrays with more elements in one document to another would "weight" their value more highly in determining the average there.

Related

How to combine two objects in mongodb aggregation project stage?

I had the following array of structure in my aggregation pipeline. Tried merge objects and setUnion operators.
{
combs:[
[
{
name:"A",
c_type:"A"
},
{
type:"visual",
severity:"Normal"
}
],
[
{
name:"B",
c_type:"B"
},
{
type:"visual",
severity:"Normal"
}
]
]
}
I am expecting the following results to produce some statistics. Please help me.
{
combs:[
{
name:"A",
c_type:"A",
type:"visual",
severity:"Normal"
}
{
name:"B",
c_type:"B",
type:"visual",
severity:"Normal"
}
]
}
"Is it possible to achieve without $unwind operation?"
Well YES. As long as your structure of arrays of arrays is consistently mapped that way then you really only need a single stage in the pipeline:
db.collection.aggregate([
{ "$addFields": {
"combs": {
"$map": {
"input": "$combs",
"in": { "$mergeObjects": "$$this" }
}
}
}}
])
So really the $map operator takes place here as a much more efficient method than $unwind for processing each array element. Also since $mergeObjects is expecting "an array of objects", this is what each element of your array of arrays actually is. So simply { "$mergeObjects": "$$this" } on each outer member of the array.
Produces the output from your supplied data:
{
"_id" : ObjectId("5d8865c273375a6a4cc9e76a"),
"combs" : [
{
"name" : "A",
"c_type" : "A",
"type" : "visual",
"severity" : "Normal"
},
{
"name" : "B",
"c_type" : "B",
"type" : "visual",
"severity" : "Normal"
}
]
}
Generally you should always prefer an inline processor like $map or other array operators in preference to $unwind where applicable.
You can use this aggregation query
db.collection.aggregate([
{ $unwind: "$combs" },
{ $addFields: { combs: { $mergeObjects: "$combs" }}},
{ $group: { _id: "$_id", combs: { $push: "$combs" }} }
])

Find duplicate inside array without $unwind

I have below users collection
[{
"_id": 1,
"adds": ["111", "222", "333", "111"]
}, {
"_id": 2,
"adds": ["555", "666", "777", "555"]
}, {
"_id": 3,
"adds": ["888", "999", "000", "888"]
}]
I need to find the duplicates inside the adds array
The expected output should be
[{
"_id": 1,
"adds": ["111"]
}, {
"_id": 2,
"adds": [ "555"]
}, {
"_id": 3,
"adds": ["888"]
}]
I have tried using many operators $setUnion, $setDifference but none of the did the trick.
Please help!!!
You can use $range to generate arrays of numbers from 1 to n where n is the $size of adds. Then you can "loop" through that numbers and check if adds at index ($arrayElemAt) exists somewhere before index if yes then it should be considered as a duplicate. You can use $indexOfArray to check if element exists in array specifying 0 and index as search range.
Then you just need to use $project and $map to replace indexes with actual elements. You can also add $setUnion to avoid duplicated duplicates in final result set.
db.users.aggregate([
{
$addFields: {
duplicates: {
$filter: {
input: { $range: [ 1, { $size: "$adds" } ] },
as: "index",
cond: {
$ne: [ { $indexOfArray: [ "$adds", { $arrayElemAt: [ "$adds", "$$index" ] }, 0, "$$index" ] }, -1 ]
}
}
}
}
},
{
$project: {
_id: 1,
adds: {
$setUnion: [ { $map: { input: "$duplicates", as: "d", in: { $arrayElemAt: [ "$adds", "$$d" ] } } }, [] ]
}
}
}
])
Prints:
{ "_id" : 1, "adds" : [ "111" ] }
{ "_id" : 2, "adds" : [ "555" ] }
{ "_id" : 3, "adds" : [ "888" ] }
Here is another version that you might want to compare in terms of performance:
db.users.aggregate({
$project:{
"adds":{
$reduce:{
"input":{$range:[0,{$size:"$adds"}]}, // loop variable from 0 to max. index of $adds array
//"input":{$range:[0,{$subtract:[{$size:"$adds"},1]}]}, // this would be enough but looks more complicated
"initialValue":[],
"in":{
$let:{
"vars":{
"curr": { $arrayElemAt: [ "$adds", "$$this"] } // the element we're looking at
},
"in":{
// if there is another identical element after the current one then we have a duplicate
$cond:[
{$ne:[{$indexOfArray:["$adds","$$curr",{$add:["$$this",1]}]},-1]},
{$setUnion:["$$value",["$$curr"]]}, // combine duplicates found so far with new duplicate
"$$value" // continue with current value
]
}
}
}
}
}
}
})
The logic is based on a loop variable which we get through the $range operator. This loop variable allows for sequential access of the adds array. For every item that we look at, we check if there is another identical one after the current index. If yes, we have a duplicate, otherwise not.
You can try below aggregation. The idea is to collect the distinct values and iterate over values and check if the value is present in adds array; if present keep the value else ignore the value.
db.users.aggregate({
"$project":{
"adds":{
"$reduce":{
"input":{"$setUnion":["$adds",[]]},
"initialValue":[],
"in":{
"$concatArrays":[
"$$value",
{"$let":{
"vars":{
"match":{
"$filter":{"input":"$adds","as":"a","cond":{"$eq":["$$a","$$this"]}}
}},
"in":{
"$cond":[{"$gt":[{"$size":"$$match"},1]},["$$this"],[]]
}
}}
]
}
}
}
}
})

Undo Unwind in aggregate in mongodb

I have multiple data something like this
{
"_id" : ObjectId("57189fcd72b6e0480ed7a0a9"),
"venueId" : ObjectId("56ce9ead08daba400d14edc9"),
"companyId" : ObjectId("56e7d62ecc0b8fc812b2aac5"),
"cardTypeId" : ObjectId("56cea8acd82cd11004ee67a9"),
"matchData" : [
{
"matchId" : ObjectId("57175c25561d87001e666d12"),
"matchDate" : ISODate("2016-04-08T18:30:00.000Z"),
"matchTime" : "20:00:00",
"_id" : ObjectId("57189fcd72b6e0480ed7a0ab"),
"active" : 3,
"cancelled" : 0,
"produced" : 3
},
{
"matchId" : ObjectId("57175c25561d87001e666d13"),
"matchDate" : ISODate("2016-04-09T18:30:00.000Z"),
"matchTime" : "20:00:00",
"_id" : ObjectId("57189fcd72b6e0480ed7a0aa"),
"active" : null,
"cancelled" : null,
"produced" : null
}
],
"__v" : 0
}
i m doing group by companyId and its work fine But i want to search in matchData based on matchtime and matchId For that purpose i am $unwind matchData after unwind i using my search query like this
db.getCollection('matchWiseData').aggregate([
{"$match":{
"matchData.matchId":{"$in":[ObjectId("57175c25561d87001e666d12")]}
}},
{"$unwind":"$matchData"},
{"$match":{
"matchData.matchId":{"$in":[ObjectId("57175c25561d87001e666d12")]}}
}])
its give me proper result but after applying unwind is there any way to undo it I m using unwind to just search inside subdocument or there is any other way to search inside subdocument.
Well you can of course just use $push and $first in a $group to get the document back to what it was:
db.getCollection('matchWiseData').aggregate([
{ "$match":{
"matchData.matchId":{"$in":[ObjectId("57175c25561d87001e666d12")]}
}},
{ "$unwind":"$matchData"},
{ "$match":{
"matchData.matchId":{"$in":[ObjectId("57175c25561d87001e666d12")]}
}},
{ "$group": {
"_id": "$_id",
"venueId": { "$first": "$venueId" },
"companyId": { "$first": "$companyId" },
"cardTypeId": { "$first": "$cardTypeId" },
"matchData": { "$push": "$matchData" }
}}
])
But you probably should have just used $filter with MongoDB 3.2 in the first place:
db.getCollection('matchWiseData').aggregate([
{ "$match":{
"matchData.matchId":{"$in":[ObjectId("57175c25561d87001e666d12")]}
}},
{ "$project": {
"venueId": 1,
"companyId": 1,
"cardTypeId": 1,
"matchData": {
"$filter": {
"input": "$matchData",
"as": "match",
"cond": {
"$or": [
{ "$eq": [ "$$match.matchId", ObjectId("57175c25561d87001e666d12") ] }
]
}
}
}
}}
])
And if you had at least MongoDB 2.6, you still could have used $map and $setDifference instead:
db.getCollection('matchWiseData').aggregate([
{ "$match":{
"matchData.matchId":{"$in":[ObjectId("57175c25561d87001e666d12")]}
}},
{ "$project": {
"venueId": 1,
"companyId": 1,
"cardTypeId": 1,
"matchData": {
"$setDifference": [
{ "$map": {
"input": "$matchData",
"as": "match",
"in": {
"$cond": [
{ "$or": [
{ "$eq": [ "$$match.matchId", ObjectId("57175c25561d87001e666d12") ] }
]},
"$$match",
false
]
}
}},
[false]
]
}
}}
])
That's perfectly fine when every array element already has a "unique" identifier, so the "set" operation just removes the false values from $map.
Both of those a ways to "filter" content from an array without actually using $unwind
N.B: Not sure if you really grasp that $in is used to match a "list of conditions" rather than being required to match on arrays. So generally the condition can just be:
"matchData.matchId": ObjectId("57175c25561d87001e666d12")
Where you only actually have a single value to match on. You use $in and $or when you have a "list" of conditions. Arrays themselves make no difference to the operator required.

MongoDB: Concatenate Multiple Arrays

I have 3 arrays of ObjectIds I want to concatenate into a single array, and then sort by creation date. $setUnion does precisely what I want, but I'd like to try without using it.
Schema of object I want to sort:
var chirpSchema = new mongoose.Schema({
interactions: {
_liked : ["55035390d3e910505be02ce2"] // [{ type: $oid, ref: "interaction" }]
, _shared : ["507f191e810c19729de860ea", "507f191e810c19729de860ea"] // [{ type: $oid, ref: "interaction" }]
, _viewed : ["507f1f77bcf86cd799439011"] // [{ type: $oid, ref: "interaction" }]
}
});
Desired result: Concatenate _liked, _shared, and _viewed into a single array, and then sort them by creation date using aggregate pipeline. See below
["507f1f77bcf86cd799439011", "507f191e810c19729de860ea", "507f191e810c19729de860ea", "55035390d3e910505be02ce2"]
I know I'm suppose to use $push, $each, $group, and $unwind in some combination or other, but I'm having trouble piecing together the documenation to make this happen.
Update: Query
model_user.aggregate([
{ $match : { '_id' : { $in : following } } }
, { $project : { 'interactions' : 1 } }
, { $project : {
"combined": { $setUnion : [
"$interactions._liked"
, "$interactions._shared"
, "$interactions._viewed"
]}
}}
])
.exec(function (err, data) {
if (err) return next(err);
next(data); // Combined is returning null
})
If all the Object _id values are "unique" then $setUnion is your best option. It is of course not "ordered" in any way as it works with a "set", and that does not guarantee order. But you can always unwind and $sort.
[
{ "$project": {
"combined": { "$setUnion": [
{ "$ifNull": [ "$interactions._liked", [] ] },
{ "$ifNull": [ "$interactions._shared", [] ] },
{ "$ifNull", [ "$interactions._viewed", [] ] }
]}
}},
{ "$unwind": "$combined" },
{ "$sort": { "combined": 1 } },
{ "$group": {
"_id": "$_id",
"combined": { "$push": "$combined" }
}}
]
Of course again since this is a "set" of distinct values you can do the old way instead with $addToSet, after processing $unwind on each array:
[
{ "$unwind": "$interactions._liked" },
{ "$unwind": "$interactions._shared" },
{ "$unwind": "$interactions._viewed" },
{ "$project": {
"interactions": 1,
"type": { "$const": [ "liked", "shared", "viewed" ] }
}}
{ "$unwind": "$type" },
{ "$group": {
"_id": "$_id",
"combined": {
"$addToSet": {
"$cond": [
{ "$eq": [ "$type", "liked" ] },
"$interactions._liked",
{ "$cond": [
{ "$eq": [ "$type", "shared" ] },
"$interactions._shared",
"$interactions._viewed"
]}
]
}
}
}},
{ "$unwind": "$combined" },
{ "$sort": { "combined": 1 } },
{ "$group": {
"_id": "$_id",
"combined": { "$push": "$combined" }
}}
]
But still the same thing applies to ordering.
Future releases even have the ability to concatenate arrays without reducing to a "set":
[
{ "$project": {
"combined": { "$concatArrays": [
"$interactions._liked",
"$interactions._shared",
"$interactions._viewed"
]}
}},
{ "$unwind": "$combined" },
{ "$sort": { "combined": 1 } },
{ "$group": {
"_id": "$_id",
"combined": { "$push": "$combined" }
}}
]
But still there is no way to re-order the results without procesing $unwind and $sort.
You might therefore consider that unless you need this grouped across multiple documents, that the basic "contenate and sort" operation is best handled in client code. MongoDB has no way to do this "in place" on the array at present, so per document in client code is your best bet.
But if you do need to do this grouping over multiple documents, then the sort of approaches as shown here are for you.
Also note that "creation" here means creation of the ObjectId value itself and not other properties from your referenced objects. If you need those, then you perform a populate on the id values after the aggregation or query instead, and of course sort in client code.

MongoDb - $match filter not working in subdocument

This is Collection Structure
[{
"_id" : "....",
"name" : "aaaa",
"level_max_leaves" : [
{
level : "ObjectIdString 1",
max_leaves : 4,
}
]
},
{
"_id" : "....",
"name" : "bbbb",
"level_max_leaves" : [
{
level : "ObjectIdString 2",
max_leaves : 2,
}
]
}]
I need to find the subdocument value of level_max_leaves.level filter when its matching with given input value.
And this how I tried,
For example,
var empLevelId = 'ObjectIdString 1' ;
MyModel.aggregate(
{$unwind: "$level_max_leaves"},
{$match: {"$level_max_leaves.level": empLevelId } },
{$group: { "_id": "$level_max_leaves.level",
"total": { "$sum": "$level_max_leaves.max_leaves" }}},
function (err, res) {
console.log(res);
});
But here the $match filter is not working. I can't find out exact results of ObjectIdString 1
If I filter with name field, its working fine. like this,
{$match: {"$name": "aaaa" } },
But in subdocument level its returns 0.
{$match: {"$level_max_leaves.level": "ObjectIdString 1"} },
My expected result was,
{
"_id" : "ObjectIdString 1",
"total" : 4,
}
You have typed the $match incorrectly. Fields with $ prefixes are either for the implemented operators or for "variable" references to field content. So you just type the field name:
MyModel.aggregate(
[
{ "$match": { "level_max_leaves.level": "ObjectIdString 1" } },
{ "$unwind": "$level_max_leaves" },
{ "$match": { "level_max_leaves.level": "ObjectIdString 1" } },
{ "$group": {
"_id": "$level_max_leaves.level",
"total": { "$sum": "$level_max_leaves.max_leaves" }
}}
],
function (err, res) {
console.log(res);
}
);
Which on the sample you provide produces:
{ "_id" : "ObjectIdString 1", "total" : 4 }
It is also good practice to $match first in your pipeline. That is in fact the only time an index can be used. But not only for that, as without the initial $match statement, your aggregation pipeline would perform an $unwind operation on every document in the collection, whether it met the conditions or not.
So generally what you want to do here is
Match the documents that contain the required elements in the array
Unwind the array of the matching documents
Match the required array content excluding all others

Resources