How to retun max occurence of value inside mongoDB array - node.js

I have an array in mongodb: I want to max occured devDependenciesList value from given array
[{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}]
The output should be :
[value1:3,value4:3,value3:2]

Basically you need to $unwind the array content and then $group on each value as the grouping key with a $sum to count:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}}
])
Which would return:
{ "_id" : "value23", "count" : 1 }
{ "_id" : "value93", "count" : 1 }
{ "_id" : "value7", "count" : 1 }
{ "_id" : "value2", "count" : 1 }
{ "_id" : "value3", "count" : 2 }
{ "_id" : "value1", "count" : 3 }
{ "_id" : "value4", "count" : 3 }
That's the basic data right there, but if you really want the "key/count" form you can do:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } },
{ "$group": {
"_id": null,
"items": { "$push": { "k": "$_id", "v": "$count" } }
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$items" }
}}
])
Which would return:
{
"value1" : 3,
"value4" : 3,
"value3" : 2,
"value23" : 1,
"value93" : 1,
"value7" : 1,
"value2" : 1
}
The additional $group and $push are to collect all the results into a single document with an array named with "k" and "v" elements. You want this form for the $arrayToObject operator which is used in the next $replaceRoot stage returning the final output.
You need a MongoDB version which supports those latter operators, but really you don't. This is actually most efficiently done in client code. Such as with JavaScript in the shell:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
And that produces the same results as above.
And of course if you wanted to exclude all of the singular results or something similar, just add a $match after the $group:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
Or using the node native driver that would be something like:
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o,{ _id, count }) => ({ ...o, [_id]: count }),{})
Given usage of async/await on the return of an actual array and the use of ES6 features like Object spread and destructuring.
Which of course is just:
{ "value1" : 3, "value4" : 3, "value3" : 2 }
Just for reference, here's a fully reproducible listing:
const { MongoClient } = require('mongodb');
const uri = 'mongodb://localhost:27017';
const opts = { useNewUrlParser: true };
const data = [
{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}
];
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
let client;
try {
client = await MongoClient.connect(uri, opts);
const db = client.db('test');
// Clean data
await db.collection('collection').deleteMany();
// Insert data
await db.collection('collection').insertMany(data);
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o, { _id, count }) => ({ ...o, [_id]: count }),{});
log(result);
let sample = await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$sortByCount": "$devDependenciesList" },
],{ "explain": true }).toArray();
log(sample);
} catch(e) {
console.error(e);
} finally {
if (client)
client.close();
}
})()
And output showing the expected result and the "explain" output to show that $sortByCount is not a "real" aggregation stage and is just a shorter way of typing things that existed way back with MongoDB 2.2:
{
"value1": 3,
"value4": 3,
"value3": 2
}
[
{
"stages": [
{
"$cursor": {
"query": {},
"fields": {
"devDependenciesList": 1,
"_id": 0
},
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.collection",
"indexFilterSet": false,
"parsedQuery": {},
"winningPlan": {
"stage": "COLLSCAN",
"direction": "forward"
},
"rejectedPlans": []
}
}
},
{
"$unwind": {
"path": "$devDependenciesList"
}
},
{
"$group": {
"_id": "$devDependenciesList",
"count": {
"$sum": {
"$const": 1
}
}
}
},
{
"$sort": {
"sortKey": {
"count": -1
}
}
}
],
"ok": 1,
"operationTime": "6674186995377373190",
"$clusterTime": {
"clusterTime": "6674186995377373190",
"signature": {
"hash": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"keyId": 0
}
}
}
]

Please try to use $sortByCount and $unwind like as below :
db.getCollection("test").aggregate([
{
$unwind: "$devDependenciesList"
},
{
$sortByCount: "$devDependenciesList"
}
]).map((obj)=>{return {[obj._id]:obj.count}})
This is the simple and short solution i could think of.

Related

Mongoose aggregate to get Average rating, count each rate and return the actual ratings

Im trying to to get avg rating for a product, plus the count of each rating and also return the actual ratings and use pagination to limit amount that is returned without affecting the avg or count.
So I'm trying achieve something like this:
this is my rating collection:
{
"productId": "3"
"userid" : 5,
"rating" : 5
"comment": "this is nice"
},
{
"productId": "3"
"userid" : 2,
"rating" :4
"comment": "this is very nice"
}
and this is the end result I want
{
"_id" : 1,
"avgRating": "3.6"
"counts" : [
{
"rating" : 5,
"count" : 8
},
{
"rating" : 3,
"count" : 2
},
{
"rating" : 4,
"count" : 4
},
{
"rating" : 1,
"count" : 4
}
],
"ratings": [
{
"productId": "3"
"userid" : 5,
"rating" : 5
"comment": "this is nice"
},
{
"productId": "3"
"userid" : 2,
"rating" :4
"comment": "this is very nice"
},
{
"productId": "3"
"userid" : 12,
"rating" : 4
"comment": "this is okay"
}
]
}
I have this so far which give me the count for each rating:
db.votes.aggregate([
{ $match: { postId: {$in: [1,2]} } },
{
$group: { _id: { post: "$postId", rating: "$vote" }, count: { $sum: 1 } }
},
{
$group: {
_id: "$_id.post",
counts: { $push: { rating: "$_id.rating", count: "$count" } }
}
}
])
You're not far off, we just have to adjust some things:
db.votes.aggregate([
{
$match:
{
postId: {$in: [1, 2]}
}
},
{
$group: {
_id: {post: "$postId", rating: "$vote"},
count: {$sum: 1},
reviews: {$push : "$$ROOT" } //keep the original document
}
},
{
$group: {
_id: "$_id.post",
counts: {$push: {rating: "$_id.rating", count: "$count"}},
reviews: {$push: "$reviews"},
totalItemCount: {$sum: "$count"}, //for avg calculation
totalRating: {$sum: "$_id.rating"} // //for avg calculation
}
},
{
$project: {
_id: "$_id",
avgRating: {$divide: ["$totalRating", "$totalItemCount"]},
counts: "$counts",
reviews: {
$slice: [
{
$reduce: {
input: "$reviews",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
},
0, //skip
10 //limit
]
}
}
}
])
Note that I preserved the current pipeline structure for clarity, however I feel that using a pipeline that utilizes $facet might be more efficient as we won't have to hold the entire collection in memory while grouping.
we'll split it into two, one the current pipeline minus the review section and one with just $skip and $limit stages.
EDIT:
$facet version:
db.votes.aggregate([
{
"$match": {
"postId": {"$in": [1, 2]}
}
},
{
"$facet": {
"numbers": [
{
"$group": {
"_id": {
"post": "$postId",
"rating": "$vote"
},
"count": {
"$sum": 1.0
}
}
},
{
"$group": {
"_id": "$_id.post",
"counts": {
"$push": {
"rating": "$_id.rating",
"count": "$count"
}
},
"totalItemCount": {
"$sum": "$count"
},
"totalRating": {
"$sum": "$_id.rating"
}
}
}
],
"reviews": [
{
"$skip": 0.0
},
{
"$limit": 10.0
}
]
}
},
{
"$unwind": "$numbers"
},
{
"$project": {
"_id": "$numbers._id",
"reviews": "$reviews",
"avgRating": {"$divide": ["$numbers.totalRating", "$numbers.totalItemCount"]},
"counts": "$numbers.counts"
}
}
]);

Is there any way to get date from ObjectId from mongoose using aggregate?

I have Users Collection. devices are all in array of Objects.
[{
"_id" : ObjectId("5c66a979e109fe0f537c7e37"),
"devices": [{
"dev_token" : "XXXX",
"_id" : ObjectId("5ccc0fa5f7778412173d22bf")
}]
},{
"_id" : ObjectId("5c66b6382b18fc4ff0276dcc"),
"devices": [{
"dev_token" : "XXXX",
"_id" : ObjectId("5c93316cc33c622bdcfaa4be")
}]
}]
I need to query the documents with adding the new field date in devices like
"devices": [{
"dev_token" : "XXXX",
"_id" : ObjectId("5c93316cc33c622bdcfaa4be"),
"date": ISODate("2012-10-15T21:26:17Z")
}]
date key from devices._id.getTimestamp()
I tried using aggregate this one, donno how to use getTimestamp()
db.getCollection('users').aggregate([ {
"$unwind": "$devices"
}, {
"$group": {
"_id": "$_id",
"devices": {
"$push": "$devices._id.getTimestamp()"
}
}
}])
I use $devices._id.getTimestamp(), this could be error.. Here how I handle this one.. Thanks for advance
You can use $toDate to get Timestamp from the _id field.
Add date field to each devices element after unwind stage, using $addFields
Try this :
db.getCollection('users').aggregate([ {
"$unwind": "$devices"
},{
$addFields : {
"devices.date": { $toDate: "$_id" }
}
}, {
"$group": {
"_id": "$_id",
"devices": {
"$push": "$devices"
}
}
}])
You can check the result at Mongo Playground (just press "run")
Using MongoDb 3.6
The $dateFromParts operator comes in handy here where you can use it in conjunction with the other date operators. You won't need
to $unwind the array as you can use $map to map over the devices array documents and add the extra date field with the above expression.
This can be followed with an example pipeline below :
db.getCollection('users').aggregate([
{ "$addFields": {
"devices": {
"$map": {
"input": "$devices",
"in": {
"dev_token": "$$this.dev_token",
"_id": "$$this._id",
"date": {
"$dateFromParts": {
'year': { "$year": "$$this._id"},
'month': { "$month": "$$this._id"},
'day':{ "$dayOfMonth": "$$this._id"},
'hour': { "$hour": "$$this._id"},
'minute': { "$minute": "$$this._id"},
'second': { "$second": "$$this._id"},
'millisecond': { "$millisecond": "$$this._id"}
}
}
}
}
}
} }
])
Output
/* 1 */
{
"_id" : ObjectId("5c66a979e109fe0f537c7e37"),
"devices" : [
{
"dev_token" : "XXXX",
"_id" : ObjectId("5ccc0fa5f7778412173d22bf"),
"date" : ISODate("2019-05-03T09:53:41.000Z")
}
]
}
/* 2 */
{
"_id" : ObjectId("5c66b6382b18fc4ff0276dcc"),
"devices" : [
{
"dev_token" : "XXXX",
"_id" : ObjectId("5c93316cc33c622bdcfaa4be"),
"date" : ISODate("2019-03-21T06:38:36.000Z")
}
]
}
Using MongoDb 4.0 and newer:
The pipeline can be tweaked slightly to use the new $toDate or $convert operators. Their respective uses follow:
$toDate
db.getCollection('users').aggregate([
{ "$addFields": {
"devices": {
"$map": {
"input": "$devices",
"in": {
"dev_token": "$$this.dev_token",
"_id": "$$this._id",
"date": { "$toDate": "$$this._id" }
}
}
}
} }
])
$convert
db.getCollection('users').aggregate([
{ "$addFields": {
"devices": {
"$map": {
"input": "$devices",
"in": {
"dev_token": "$$this.dev_token",
"_id": "$$this._id",
"date": {
"$convert": { "input": "$$this._id", "to": "date" }
}
}
}
}
} }
])

Use Aggregate with $group in mongodb

I have data in worksheets collection like below:
/* 1 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6506"),
"isBilling" : true,
"hours" : 6,
"userId" : ObjectId("5c1f38a1d7537d1444738493"),
}
/* 2 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6507"),
"isBilling" : true,
"hours" : 4,
"userId" : ObjectId("5c1f38a1d7537d1444738493"),
}
/* 3 */
{
"_id" : ObjectId("5c21e10fae07cc1204a5b647"),
"isBilling" : false,
"hours" : 8,
"userId" : ObjectId("5c1f388fd7537d1444738492"),
}
I have to create a aggregate query to sum the hours, where isBilling equals to true, and where isBilling equals to false.I want the below output:
{
"billingHours":10,
"fixContract":8
}
I have to get data with the particular userId. I tried the below:
Worksheet.aggregate([
{
$match: conditions
},
{
$lookup:{
"from": "worksheets",
"let": {},
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$isBilling",false] } } },
{
"$group": { "_id": null, "totalHours": { "$sum": "$hours" } }
},
],
"as": "billingHours"
}
},
{
"$project":{"billingHours":1}
}
])
I am getting the below result:
[
{
"_id": "5c21d780f82aa31334ab6506",
"billingHours": [
{
"_id": null,
"totalHours": 16
}
]
},
{
"_id": "5c21d780f82aa31334ab6507",
"billingHours": [
{
"_id": null,
"totalHours": 16
}
]
}
]
I don't know why it is giving me 16 hours instead of 10 and giving me two objects instead of 1.
You don't need to use $lookup here. Simple $group with $cond will do the job.
db.collection.aggregate([
{ "$group": {
"_id": null,
"billingHours": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, "$hours", 0]
}
},
"fixContract": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, 0, "$hours"]
}
}
}}
])

how get count from mongodb with different status from one collection

I have appointment collection in that i have status codes like upcoming, cancelled, completed. i want to write an api to get count of each status using mongoose or mongodb methods.
output should be like below
[{
group : "grp1",
appointments_completed :4
appointments_upcoming :5
appointments_cancelled : 7
}]
thanks in advance.
I hope it help you
db.getCollection('codelist').aggregate([
{
$group:{
_id:{status:"$status"},
count:{$sum:1}
}
}
])
The result will be
[{
"_id" : {
"status" : "cancelled"
},
"count" : 13.0
},
{
"_id" : {
"status" : "completed"
},
"count" : 20.0
}
]
I think you can process it with nodejs
Using Aggregation Pipeline $group we can get this count
db.collection_name.aggregate([
{ $group: {
_id:null,
appointments_completed: {$sum : "$appointments_completed" },
appointments_upcoming:{$sum :"$appointments_upcoming"},
appointments_cancelled:{$sum: "$appointments_cancelled"}
}
}
]);
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator and a $replaceRoot pipeline to get the desired result. You would need to run the following aggregate pipeline:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$concat": ["appointments_", { "$toLower": "$status" }] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"k": "$_id.status",
"v": "$count"
}
}
} },
{ "$addFields": {
"counts": {
"$setUnion": [
"$counts", [
{
"k": "group",
"v": "$_id"
}
]
]
}
} },
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$counts" }
} }
])
For older versions, a more generic approach though with a different output format would be to group twice and get the counts as an array of key value objects as in the following:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
} }
])
which spits out:
{
"_id": "grp1"
"counts":[
{ "status": "completed", "count": 4 },
{ "status": "upcoming", "count": 5 }
{ "status": "cancelled", "count": 7 }
]
}
If the status codes are fixed then the $cond operator in the $group pipeline step can be used effectively to evaluate the counts based on the status field value. Your overall aggregation pipeline can be constructed as follows to produce the result in the desired format:
db.appointments.aggregate([
{ "$group": {
"_id": <group_by_field>,
"appointments_completed": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "completed" ] }, 1, 0 ]
}
},
"appointments_upcoming": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "upcoming" ] }, 1, 0 ]
}
},
"appointments_cancelled": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "cancelled" ] }, 1, 0 ]
}
}
} }
])

MongoDB group by type and by date

I have a Collection with a simple Document to store impressions and conversions with the following structure:
/* 1 */
{
"_id" : ObjectId("566f1ef857c1e6dd3123050a"),
"path_id" : ObjectId("55944c1ebe244fd19cbf510b"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T19:56:40.100Z"),
"__v" : 0
}
/* 2 */
{
"_id" : ObjectId("566f1fc9ac964e6f327c55d6"),
"path_id" : ObjectId("55944c1ebe244fd19cbf510b"),
"data_type" : "conversion",
"created_at" : ISODate("2015-12-14T20:00:09.972Z"),
"__v" : 0
}
/* 3 */
{
"_id" : ObjectId("566f2896739f6afa4485f327"),
"path_id" : ObjectId("562e594315ef3d8c3f05d219"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T20:37:42.139Z"),
"__v" : 0
}
/* 4 */
{
"_id" : ObjectId("566f28e5739f6afa4485f328"),
"path_id" : ObjectId("562e594315ef3d8c3f05d219"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T20:39:01.233Z"),
"__v" : 0
}
I'm able to group and count by data_type, but what I need to do is group by date and then count the data_type in order to get the following result:
[
{
'_id': 'Y',
'conversions': 20,
'impressions': 2703,
'date': '2015-12-14'
},
{
'_id': 'Z',
'conversions': 10,
'impressions': 1703,
'date': '2015-12-13'
}
]
The code I have right now is the following, but it only groups by data_type. I'm trying to add a project to regroup by date with no luck so far.
var path_id = new mongoose.Types.ObjectId( req.body.path_id );
var match = {
'path_id': {
$eq: path_id
}
};
var group = {
'_id': '$data_type',
'count': {
'$sum': 1
}
}
Hit.aggregate( [ {
$match: match
}, {
$group: group
} ], function( err, res ) {
console.log( res );
} );
The result is
POST /api/hits/bypath 200 30ms - 15b
[ { _id: 'conversion', count: 2 },
{ _id: 'impression', count: 2703 } ]
To do nested group by date, you have to use to Date aggregation operator $dateToString.
Here is query
db.hits.aggregate([
{
"$project": {
"created_at": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"data_type": true
}
},
{
"$group": {
"_id": {
"data_type": "$data_type",
"created_at": "$created_at"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": {
"data_type": "$_id.data_type"
},
"data":{ "$addToSet" : { count: "$count", date: "$_id.created_at" } }
}
}
])
If you want to match before group by operation based on condition, Add as following in the query
{
"$match": {
"path_id": {
"$eq": "<path_id>"
}
}
}
you can use Date Aggregation Operators to project the day/month/year fields and then group by them
{
"$project": {
"y": {
"$year": "$created_at"
},
"m": {
"$month": "$created_at"
},
"d": {
"$dayOfMonth": "$created_at"
},
"data_type" : 1
}
},
{
"$group": {
"_id": {
"year": "$y",
"month": "$m",
"day": "$d",
"data_type": "$data_type"
},
count: {
"$sum": 1
}
}
}
and will output in this format:
"_id": {
"year": 2015,
"month": 10,
"day": 5,
"data_type": "impression"
},
count: 10
and then group again by date to combine the types in one document
{
"$group": {
"_id": {
"year": "$_id.year",
"month": "$_id.month",
"day": "$_id.day"
},
types: {"$push":"$_id.data_type"},
counters: {"$push":"$count"}
}
}
which will result in this:
"_id": {
"year": 2015,
"month": 10,
"day": 5
},
types: ["impression", "conversion"]
counters: [10, 5]
there might be a more elegant or faster (with 1 group) way to do this though, i am not sure.

Resources