Having issue with aggregation in MongoDB - node.js

I'm using MongoDb with Node.js. And I'm having trouble with aggregation.This is the Example of data in Collection in Database:
{
"name": "abcdef",
"address": ghijk,
"reli":"A",
"prov:"a" ,
}
{
"name": "xyz",
"address": "vwz",
"reli":"B",
"prov:"b" ,
}
{
"name": "qwe",
"address": "rty",
"reli":'C',
"prov:"c" ,
},
{
"name": "abcdef",
"address": ghijk,
"reli":"A",
"prov:"a" ,
}
{
"name": "hat",
"address": "ate",
"reli":'C',
"prov:"c" ,
},
This is my query to count:
const count = await db.aggregate([
{
$facet: {
"reli": [
{ $group: { _id: '$reli', count: { $sum: 1 } } }
],
"prov": [
{ $group: { _id: '$prov', count: { $sum: 1 } } }
],
This is my result of the query:
[
{
"reli": [
{
"_id": "A",
"count": 2
},
{
"_id": "B",
"count": 1
},
{
"_id": "C",
"count": 2
}
"prov": [
{
"_id": "a",
"count": 2
},
{
"_id": "b",
"count": 1
},
{
"_id": "c",
"count": 2
}
}]
I want to aggregate this data. And only want values for every reli and prov and it's count in my results.
Expecting Output:
[
"reli":{
A:2 // As my Collection has 2 "A" And 2 is it's count
C:2 // As there is 2 "C" in Collection.
}
"prov":{
c:2 //As there are 2 "c" in Collection.
b:1 //As there are 1 "b" in Collection
}
]

You can use $arrayToObject to get what you want here, creating keys from your values. For example:
db.collection.aggregate([
{
$facet: {
reli: [
{$group: { _id: "$reli", v: {$sum: 1}}},
{$project: {v: 1, k: "$_id", _id: 0}}
],
prov: [
{$group: {_id: "$prov", v: {$sum: 1}}},
{$project: {v: 1, k: "$_id", _id: 0}}
]
}
},
{
$project: {
prov: {$arrayToObject: "$prov"},
reli: {$arrayToObject: "$reli"}
}
}
])
Playground example
If there is a correlation between the reli and prov group, as in your example, you can avoid the $facet, group once, count once and only project twice.

You may try to use $group like this, the output isn't exactly like expected, but it may help you :
const reli = await db.collection.aggregate([
{
'$group': {
'_id': '$reli',
'count': {
'$sum': 1
}
}
}, {
'$project': {
'_id': 0,
'reli': '$_id',
'count': 1
}
}
])
const prov = await db.collection.aggregate([
{
'$group': {
'_id': '$prov',
'count': {
'$sum': 1
}
}
}, {
'$project': {
'_id': 0,
'prov': '$_id',
'count': 1
}
}
])
const res = {
reli,
prov
};
output :
{
"prov": [
{ "count": 2, "prov": "a" },
{ "count": 1, "prov": "b" },
{ "count": 2, "prov": "c" }
],
"reli": [
{ "count": 2, "reli": "A" },
{ "count": 2, "reli": "C" },
{ "count": 1, "reli": "B" }
]
}
From this data, if you need it, you could re write key:value, for exact data model as you asked.

Related

Mongodb aggregation to pass both a matched array and an unmatched array

I've got a MongoDB / Nodes aggregation that looks a little like this (there are other values in there, but this is the basic idea).
[
{
'$unwind': {
'path': '$Vehicles'
}
},
{
'$match': {
'Vehicles.Manufacturer': 'FORD'
}
},
{
'$facet': {
'makes': [
{
'$group': {
'_id': '$Vehicles.Manufacturer',
'count': {
'$sum': 1
}
}
}
]
}
},
{
'$project': {
'makes': {
'$sortArray': {
'input': '$makes',
'sortBy': 1
}
}
}
}
]
This works fine. But I would also like to pass an unmatched list through. IE an an array of vehicles whose Manufacturer = FORD and an other list of all Manufacturer.
Can't get it to work. Any ideas please?
Thanks in advance.
Edit:-
The current output looks like this:
[{
"makes": [
{
"_id": "FORD",
"count": 285
}
]
}]
and ideally it would look something like this:
[{
"makes": [
{
"_id": "FORD",
"count": 285
}
],
"unfiltered_makes": [
{
"_id": "ABARTH",
"count": 1
},
{
"_id": "AUDI",
"count": 7
},
{
"_id": "BMW",
"count": 2
},
{
"_id": "CITROEN",
"count": 4
},
{
"_id": "DS",
"count": 1
},
{
"_id": "FIAT",
"count": 1
}.... etc
]
}]
The data looks a bit like this:
"Vehicles": [
{
"Id": 1404908,
"Manufacturer": "MG",
"Model": "3",
"Price": 11995 .... etc
},{
"Id": 1404909,
"Manufacturer": "FORD",
"ManufacturerId": 34,
"Model": "Focus",
"Price": 12000 .... etc
} ... etc
]
In this case you can do something like:
db.collection.aggregate([
{$unwind: "$Vehicles"},
{$group: {
_id: "$Vehicles.Manufacturer",
count: {$sum: 1}}
},
{$facet: {
makes: [{$match: {_id: "FORD"}}],
unfiltered_makes: [{$group: {_id: 0, data: {$push: "$$ROOT"}}}]
}
},
{$project: {makes: 1, unfiltered_makes: "$unfiltered_makes.data"}}
])
See how it works on the playground example
Another option is:
db.collection.aggregate([
{$unwind: "$Vehicles"},
{$group: {
_id: "$Vehicles.Manufacturer",
count: {$sum: 1}}
},
{$group: {
_id: 0,
unfiltered_makes: {$push: "$$ROOT"},
makes: {$push: {$cond: [{$eq: ["$_id", "FORD"]}, "$$ROOT", "$$REMOVE"]}}
}
}
])
See how it works on the playground example
Here's another way to do it using "$function" to generate a histogram of "Manufacturer" and format the returned array. The javascript function only traverses the "Vehicles" array once, so this may be fairly efficient, although I did not do algorithm timing comparisons on a large collection.
N.B.: I'm a javascript noob and there may be a better way to do this.
db.collection.aggregate([
{
"$set": {
"unfiltered_makes": {
"$function": {
// generate histogram of manufacturers and format output
"body": "function(makes) {const m = new Object();makes.forEach((elem) => {m[elem.Manufacturer] = m[elem.Manufacturer] + 1 || 1});return Object.entries(m).map(([make, count]) => {return {'_id':make, 'count':count}})}",
"args": ["$Vehicles"],
"lang": "js"
}
}
}
},
{
"$project": {
"_id": 0,
"unfiltered_makes": 1,
"makes": {
"$filter": {
"input": "$unfiltered_makes",
"as": "make",
"cond": {
"$eq": [
"$$make._id",
// your search "Manufacturer" goes here
"FORD"
]
}
}
}
}
}
])
Try it on mongoplayground.net.

Group by the content of array of string with out order in mongo aggregation

i have a problem with aggregation framework in MongoDB (mongoose) this is the problem. i have the following database scheme.so what i want to do is count number of people who has access through Mobile only , Card only, or both. with out any order,
{
'_id': ObjectId,
'user_access_type': ['Mobile' , 'Card']
}
{
'_id': ObjectId,
'user_access_type': ['Card' , 'Mobile']
}
{
'_id': ObjectId,
'user_access_type': ['Mobile']
}
{
'_id': ObjectId,
'user_access_type': ['Card']
}
Now i am using this but it only groups by the order of the user_access_type array,
[ { "$group" : { "_id": {"User" : "$user_access_type"} , "count": {"$sum" : 1} }]
this is the output:
{
"_id": {
"User": [
"Card",
"Mobile"
]
},
"count": 1
},
{
"_id": {
"_id": "5f7dce2359aaf004985f98eb",
"User": [
"Mobile",
"Card"
]
},
"count": 1
},
{
"_id": {
"User": [
"Mobile"
]
},
"count": 1
},
{
"_id": {
"User": [
"Card"
]
},
"count": 1
},
vs what i want:
{
"_id": {
"User": [
"Card",
"Mobile" // we can say both
]
},
"count": 2 // does not depend on order
},
{
"_id": {
"User": [
"Mobile"
]
},
"count": 1
},
{
"_id": {
"User": [
"Card"
]
},
"count": 1
},
You can use other option as well using $function,
$function can allow to add javascript code, you can use sort() to sort the array
db.collection.aggregate([
{
$addFields: {
user_access_type: {
$function: {
body: function(user_access_type){
return user_access_type.sort();
},
args: ["$user_access_type"],
lang: "js"
}
}
}
},
{
$group: {
_id: "$user_access_type",
count: { $sum: 1 }
}
}
])
Second option,
If user_access_type array having always unique elements then you can use $setUnion operator on user_access_type array as self union, some how this will re-order array in same order,
db.collection.aggregate([
{
$addFields: {
user_access_type: {
$setUnion: "$user_access_type"
}
}
},
{
$group: {
_id: "$user_access_type",
count: { $sum: 1 }
}
}
])
Playground

How to sum values of every document based of filed name using Mongodb and NodeJs

based on other solution
I have the following data stored on my mongo instance and i am trying to sum from each document the occurrences of each word looping on all documents, so final result should be aggregation by name with sum of all occurrences.
[
{
"_id": {
"$oid": "5f972f0a7c38a0f412d88ad0"
},
"cars": 1,
"collision": 1,
"crash": 1,
"eleven": 1,
"injured": 1,
"involving": 1
},
{
"_id": {
"$oid": "5f972f0a7c38a0f412d88b96"
},
"injured": 1,
"and": 1,
"man": 1,
"attack": 1,
"ashfield": 1,
"dog": 1,
"killed": 1,
"labrador": 1,
"sutton": 1
},
{
"_id": {
"$oid": "5f972f0a7c38a0f412d88ad2"
},
"the": 1,
"'var": 1,
"accountable'": 1,
"day": 1,
"goal": 1,
"have": 1,
"lacazette's": 1,
"match": 1,
"should": 1,
"stood": 1
},
....
and i execute the following Nodejs execution using MongoDB driver:
await db.collection('occurrences').aggregate([{
$project: {
_id: 0,
fields: {
$filter: {
input: { $objectToArray: "$$ROOT" },
cond: { $eq: [
{ $type: "$$this.v"
},
"double"
] }
}
}
}
},
{
$unwind: "$fields"
},
{
$group: {
_id: "$fields.k",
total: {
$sum: "$fields.v"
}
}
},
{
$group: {
_id: null,
aggregates: {
$push:
{ k: "$_id",
v: "$total"
}
}
}
},
{
$replaceRoot: {
newRoot: { $arrayToObject: "$aggregates" }
}
}]).toArray(function (err, docs) {
console.log(docs)
})
now my wish is to sum all word occurrences into object as follows:
{injured: 2, attack: 1, ....}
currently i am getting an empty array while i am printing console.log
I found the mistake, in my case all i need to change is
cond: { $eq: [
{ $type: "$$this.v"
},
"double"
] }
into
cond: { $eq: [
{ $type: "$$this.v"
},
"int"
] }

Mongoose aggregate to get Average rating, count each rate and return the actual ratings

Im trying to to get avg rating for a product, plus the count of each rating and also return the actual ratings and use pagination to limit amount that is returned without affecting the avg or count.
So I'm trying achieve something like this:
this is my rating collection:
{
"productId": "3"
"userid" : 5,
"rating" : 5
"comment": "this is nice"
},
{
"productId": "3"
"userid" : 2,
"rating" :4
"comment": "this is very nice"
}
and this is the end result I want
{
"_id" : 1,
"avgRating": "3.6"
"counts" : [
{
"rating" : 5,
"count" : 8
},
{
"rating" : 3,
"count" : 2
},
{
"rating" : 4,
"count" : 4
},
{
"rating" : 1,
"count" : 4
}
],
"ratings": [
{
"productId": "3"
"userid" : 5,
"rating" : 5
"comment": "this is nice"
},
{
"productId": "3"
"userid" : 2,
"rating" :4
"comment": "this is very nice"
},
{
"productId": "3"
"userid" : 12,
"rating" : 4
"comment": "this is okay"
}
]
}
I have this so far which give me the count for each rating:
db.votes.aggregate([
{ $match: { postId: {$in: [1,2]} } },
{
$group: { _id: { post: "$postId", rating: "$vote" }, count: { $sum: 1 } }
},
{
$group: {
_id: "$_id.post",
counts: { $push: { rating: "$_id.rating", count: "$count" } }
}
}
])
You're not far off, we just have to adjust some things:
db.votes.aggregate([
{
$match:
{
postId: {$in: [1, 2]}
}
},
{
$group: {
_id: {post: "$postId", rating: "$vote"},
count: {$sum: 1},
reviews: {$push : "$$ROOT" } //keep the original document
}
},
{
$group: {
_id: "$_id.post",
counts: {$push: {rating: "$_id.rating", count: "$count"}},
reviews: {$push: "$reviews"},
totalItemCount: {$sum: "$count"}, //for avg calculation
totalRating: {$sum: "$_id.rating"} // //for avg calculation
}
},
{
$project: {
_id: "$_id",
avgRating: {$divide: ["$totalRating", "$totalItemCount"]},
counts: "$counts",
reviews: {
$slice: [
{
$reduce: {
input: "$reviews",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
},
0, //skip
10 //limit
]
}
}
}
])
Note that I preserved the current pipeline structure for clarity, however I feel that using a pipeline that utilizes $facet might be more efficient as we won't have to hold the entire collection in memory while grouping.
we'll split it into two, one the current pipeline minus the review section and one with just $skip and $limit stages.
EDIT:
$facet version:
db.votes.aggregate([
{
"$match": {
"postId": {"$in": [1, 2]}
}
},
{
"$facet": {
"numbers": [
{
"$group": {
"_id": {
"post": "$postId",
"rating": "$vote"
},
"count": {
"$sum": 1.0
}
}
},
{
"$group": {
"_id": "$_id.post",
"counts": {
"$push": {
"rating": "$_id.rating",
"count": "$count"
}
},
"totalItemCount": {
"$sum": "$count"
},
"totalRating": {
"$sum": "$_id.rating"
}
}
}
],
"reviews": [
{
"$skip": 0.0
},
{
"$limit": 10.0
}
]
}
},
{
"$unwind": "$numbers"
},
{
"$project": {
"_id": "$numbers._id",
"reviews": "$reviews",
"avgRating": {"$divide": ["$numbers.totalRating", "$numbers.totalItemCount"]},
"counts": "$numbers.counts"
}
}
]);

How to get most repeated string array in Pymongo?

How can I get most repeated value for gender and age respectively?
My data:
[{ "_id": ObjectId("5dff27c0ac2d1547d87a1fe7"), "time": "2019-12-20 21:09:53",
"object": [{"Id": 1,"gender": "female","age": "0-10"},
{"Id": 2,"gender": "female","age": "20-30"}]
},
{ "_id": ObjectId("5dff27c0ac2d1547d87a1fe8"), "time": "2019-12-20 21:09:53",
"object": [{"Id": 1,"gender": "male","age": "0-10"},
{"Id": 2,"gender": "female","age": "30-40"}]
} ,
{ "_id": ObjectId("5dff27c0ac2d1547d87a1fe9"), "time": "2019-12-20 21:09:53",
"object": [{"Id": 1,"gender": "male","age": "10-15"},
{"Id": 2,"gender": "female","age": "30-40"},
{"Id": 3,"gender": "male","age": "0-10"}]
},
{ "_id": ObjectId("5dff27c0ac2d1547d87a1fea"), "time": "2019-12-20 21:09:53",
"object": [{"Id": 2,"gender": "male","age": "40-50"},
{"Id": 3,"gender": "male","age": "0-10"},
{"Id": 4,"gender": "male","age": "0-10"}]
}]
I have written below query,
mongo.db.xyz.aggregate([
{ "$unwind" : "$object"},
{"$group" : {"_id" : "$object.Id","_gen":{"$push":"$object.gender"},"_age":{"$push":"$object.age"}}},
{ "$project": { "_id" : "$_id", "gender":"$_gen","age":"$_age"}}
])
Below is the result I am getting,
[{"_id": 3,"age": ["0-10","0-10"],"gender": ["male","male"]},
{"_id": 2,"age": ["20-30","30-40","30-40","40-50"],"gender": ["female","female","female","male"]},
{"_id": 4,"age": ["0-10"],"gender": ["male"]},
{"_id": 1,"age": ["0-10","0-10","10-15"],"gender": ["female","male","male"]}
]
But I want the output to be ,
[{"_id": 3,"age": "0-10","gender": "male"},
{"_id": 2,"age": "30-40","gender": "female"},
{"_id": 4,"age": "0-10","gender": "male"},
{"_id": 1,"age": "0-10","gender": "male"}
]
Thinking about this problem I realized it was not so simple to get the mode of some fields independently in an array of objects with just one db query. To solve that I created a query to do it generically, based on your sample data.
db.collection.aggregate([
{
$unwind: {
path: "$arr"
}
},
{
$project: {
arr: {
$objectToArray: "$arr"
}
}
},
{
$unwind: {
path: "$arr"
}
},
{
$group: {
_id: {
_id: "$_id",
k: "$arr.k",
v: "$arr.v"
},
count: {
$sum: 1
}
}
},
{
$sort: {
count: -1
}
},
{
$group: {
_id: {
_id: "$_id._id",
k: "$_id.k"
},
v: {
$first: "$_id.v"
},
count: {
$first: "$count"
}
}
},
{
$group: {
_id: {
_id: "$_id._id"
},
arr: {
$push: {
k: "$_id.k",
v: {
mode: "$v",
count: "$count"
}
}
}
}
},
{
$project: {
arr: {
$arrayToObject: "$arr"
}
}
}
])
The secret to do that is to use the $objectToArray and the $arrayToObject operations, along with the $unwind and the correct $group stages. If you need a more detailed response on any stage please ask in the comments.
The output of the sample data is:
[
{
"_id": {
"_id": ObjectId("5dff27c0ac2d1547d87a1fea")
},
"arr": {
"Id": {
"count": 1,
"mode": 3
},
"age": {
"count": 2,
"mode": "0-10"
},
"gender": {
"count": 3,
"mode": "male"
}
}
},
{
"_id": {
"_id": ObjectId("5dff27c0ac2d1547d87a1fe7")
},
"arr": {
"Id": {
"count": 1,
"mode": 2
},
"age": {
"count": 1,
"mode": "0-10"
},
"gender": {
"count": 2,
"mode": "female"
}
}
},
{
"_id": {
"_id": ObjectId("5dff27c0ac2d1547d87a1fe9")
},
"arr": {
"Id": {
"count": 1,
"mode": 2
},
"age": {
"count": 1,
"mode": "30-40"
},
"gender": {
"count": 2,
"mode": "male"
}
}
},
{
"_id": {
"_id": ObjectId("5dff27c0ac2d1547d87a1fe8")
},
"arr": {
"Id": {
"count": 1,
"mode": 2
},
"age": {
"count": 1,
"mode": "30-40"
},
"gender": {
"count": 1,
"mode": "female"
}
}
}
]
After running it for a collection that has an array of objects named "arr" (edit the query if in your collection it has a different name) it will return the mode value and the number of occurrences of that value for each field. Objects that has that field unset will be not considered, but the ones with "null" will.

Resources