Use Aggregate with $group in mongodb

Use Aggregate with $group in mongodb - node.js

I have data in worksheets collection like below:
/* 1 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6506"),
"isBilling" : true,
"hours" : 6,
"userId" : ObjectId("5c1f38a1d7537d1444738493"),
}
/* 2 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6507"),
"isBilling" : true,
"hours" : 4,
"userId" : ObjectId("5c1f38a1d7537d1444738493"),
}
/* 3 */
{
"_id" : ObjectId("5c21e10fae07cc1204a5b647"),
"isBilling" : false,
"hours" : 8,
"userId" : ObjectId("5c1f388fd7537d1444738492"),
}
I have to create a aggregate query to sum the hours, where isBilling equals to true, and where isBilling equals to false.I want the below output:
{
"billingHours":10,
"fixContract":8
}
I have to get data with the particular userId. I tried the below:
Worksheet.aggregate([
{
$match: conditions
},
{
$lookup:{
"from": "worksheets",
"let": {},
"pipeline": [
{ "$match": { "$expr": { "$eq": [ "$isBilling",false] } } },
{
"$group": { "_id": null, "totalHours": { "$sum": "$hours" } }
},
],
"as": "billingHours"
}
},
{
"$project":{"billingHours":1}
}
])
I am getting the below result:
[
{
"_id": "5c21d780f82aa31334ab6506",
"billingHours": [
{
"_id": null,
"totalHours": 16
}
]
},
{
"_id": "5c21d780f82aa31334ab6507",
"billingHours": [
{
"_id": null,
"totalHours": 16
}
]
}
]
I don't know why it is giving me 16 hours instead of 10 and giving me two objects instead of 1.

You don't need to use $lookup here. Simple $group with $cond will do the job.
db.collection.aggregate([
{ "$group": {
"_id": null,
"billingHours": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, "$hours", 0]
}
},
"fixContract": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, 0, "$hours"]
}
}
}}
])

Related

Is there any way to get date from ObjectId from mongoose using aggregate?

I have Users Collection. devices are all in array of Objects.
[{
"_id" : ObjectId("5c66a979e109fe0f537c7e37"),
"devices": [{
"dev_token" : "XXXX",
"_id" : ObjectId("5ccc0fa5f7778412173d22bf")
}]
},{
"_id" : ObjectId("5c66b6382b18fc4ff0276dcc"),
"devices": [{
"dev_token" : "XXXX",
"_id" : ObjectId("5c93316cc33c622bdcfaa4be")
}]
}]
I need to query the documents with adding the new field date in devices like
"devices": [{
"dev_token" : "XXXX",
"_id" : ObjectId("5c93316cc33c622bdcfaa4be"),
"date": ISODate("2012-10-15T21:26:17Z")
}]
date key from devices._id.getTimestamp()
I tried using aggregate this one, donno how to use getTimestamp()
db.getCollection('users').aggregate([ {
"$unwind": "$devices"
}, {
"$group": {
"_id": "$_id",
"devices": {
"$push": "$devices._id.getTimestamp()"
}
}
}])
I use $devices._id.getTimestamp(), this could be error.. Here how I handle this one.. Thanks for advance

You can use $toDate to get Timestamp from the _id field.
Add date field to each devices element after unwind stage, using $addFields
Try this :
db.getCollection('users').aggregate([ {
"$unwind": "$devices"
},{
$addFields : {
"devices.date": { $toDate: "$_id" }
}
}, {
"$group": {
"_id": "$_id",
"devices": {
"$push": "$devices"
}
}
}])
You can check the result at Mongo Playground (just press "run")

Using MongoDb 3.6
The $dateFromParts operator comes in handy here where you can use it in conjunction with the other date operators. You won't need
to $unwind the array as you can use $map to map over the devices array documents and add the extra date field with the above expression.
This can be followed with an example pipeline below :
db.getCollection('users').aggregate([
{ "$addFields": {
"devices": {
"$map": {
"input": "$devices",
"in": {
"dev_token": "$$this.dev_token",
"_id": "$$this._id",
"date": {
"$dateFromParts": {
'year': { "$year": "$$this._id"},
'month': { "$month": "$$this._id"},
'day':{ "$dayOfMonth": "$$this._id"},
'hour': { "$hour": "$$this._id"},
'minute': { "$minute": "$$this._id"},
'second': { "$second": "$$this._id"},
'millisecond': { "$millisecond": "$$this._id"}
}
}
}
}
}
} }
])
Output
/* 1 */
{
"_id" : ObjectId("5c66a979e109fe0f537c7e37"),
"devices" : [
{
"dev_token" : "XXXX",
"_id" : ObjectId("5ccc0fa5f7778412173d22bf"),
"date" : ISODate("2019-05-03T09:53:41.000Z")
}
]
}
/* 2 */
{
"_id" : ObjectId("5c66b6382b18fc4ff0276dcc"),
"devices" : [
{
"dev_token" : "XXXX",
"_id" : ObjectId("5c93316cc33c622bdcfaa4be"),
"date" : ISODate("2019-03-21T06:38:36.000Z")
}
]
}
Using MongoDb 4.0 and newer:
The pipeline can be tweaked slightly to use the new $toDate or $convert operators. Their respective uses follow:
$toDate
db.getCollection('users').aggregate([
{ "$addFields": {
"devices": {
"$map": {
"input": "$devices",
"in": {
"dev_token": "$$this.dev_token",
"_id": "$$this._id",
"date": { "$toDate": "$$this._id" }
}
}
}
} }
])
$convert
db.getCollection('users').aggregate([
{ "$addFields": {
"devices": {
"$map": {
"input": "$devices",
"in": {
"dev_token": "$$this.dev_token",
"_id": "$$this._id",
"date": {
"$convert": { "input": "$$this._id", "to": "date" }
}
}
}
}
} }
])

How to retun max occurence of value inside mongoDB array

I have an array in mongodb: I want to max occured devDependenciesList value from given array
[{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}]
The output should be :
[value1:3,value4:3,value3:2]

Basically you need to $unwind the array content and then $group on each value as the grouping key with a $sum to count:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}}
])
Which would return:
{ "_id" : "value23", "count" : 1 }
{ "_id" : "value93", "count" : 1 }
{ "_id" : "value7", "count" : 1 }
{ "_id" : "value2", "count" : 1 }
{ "_id" : "value3", "count" : 2 }
{ "_id" : "value1", "count" : 3 }
{ "_id" : "value4", "count" : 3 }
That's the basic data right there, but if you really want the "key/count" form you can do:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } },
{ "$group": {
"_id": null,
"items": { "$push": { "k": "$_id", "v": "$count" } }
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$items" }
}}
])
Which would return:
{
"value1" : 3,
"value4" : 3,
"value3" : 2,
"value23" : 1,
"value93" : 1,
"value7" : 1,
"value2" : 1
}
The additional $group and $push are to collect all the results into a single document with an array named with "k" and "v" elements. You want this form for the $arrayToObject operator which is used in the next $replaceRoot stage returning the final output.
You need a MongoDB version which supports those latter operators, but really you don't. This is actually most efficiently done in client code. Such as with JavaScript in the shell:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
And that produces the same results as above.
And of course if you wanted to exclude all of the singular results or something similar, just add a $match after the $group:
db.collection.aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray().reduce((o,e) => Object.assign(o, { [e._id]: e.count }),{})
Or using the node native driver that would be something like:
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o,{ _id, count }) => ({ ...o, [_id]: count }),{})
Given usage of async/await on the return of an actual array and the use of ES6 features like Object spread and destructuring.
Which of course is just:
{ "value1" : 3, "value4" : 3, "value3" : 2 }
Just for reference, here's a fully reproducible listing:
const { MongoClient } = require('mongodb');
const uri = 'mongodb://localhost:27017';
const opts = { useNewUrlParser: true };
const data = [
{
"_id" : 0,
"repoId" : 460078,
"devDependenciesList" : [
"value1",
"value2",
"value3",
"value4"
]
},{
"_id" : 1,
"repoId" : 1232,
"devDependenciesList" : [
"value1",
"value4",
"value7",
"value93"
]
},{
"_id" : 2,
"repoId" : 5423,
"devDependenciesList" : [
"value1",
"value23",
"value3",
"value4"
]
}
];
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
let client;
try {
client = await MongoClient.connect(uri, opts);
const db = client.db('test');
// Clean data
await db.collection('collection').deleteMany();
// Insert data
await db.collection('collection').insertMany(data);
let result = (await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$group": {
"_id": "$devDependenciesList",
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$sort": { "count": -1 } }
]).toArray()).reduce((o, { _id, count }) => ({ ...o, [_id]: count }),{});
log(result);
let sample = await db.collection('collection').aggregate([
{ "$unwind": "$devDependenciesList" },
{ "$sortByCount": "$devDependenciesList" },
],{ "explain": true }).toArray();
log(sample);
} catch(e) {
console.error(e);
} finally {
if (client)
client.close();
}
})()
And output showing the expected result and the "explain" output to show that $sortByCount is not a "real" aggregation stage and is just a shorter way of typing things that existed way back with MongoDB 2.2:
{
"value1": 3,
"value4": 3,
"value3": 2
}
[
{
"stages": [
{
"$cursor": {
"query": {},
"fields": {
"devDependenciesList": 1,
"_id": 0
},
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.collection",
"indexFilterSet": false,
"parsedQuery": {},
"winningPlan": {
"stage": "COLLSCAN",
"direction": "forward"
},
"rejectedPlans": []
}
}
},
{
"$unwind": {
"path": "$devDependenciesList"
}
},
{
"$group": {
"_id": "$devDependenciesList",
"count": {
"$sum": {
"$const": 1
}
}
}
},
{
"$sort": {
"sortKey": {
"count": -1
}
}
}
],
"ok": 1,
"operationTime": "6674186995377373190",
"$clusterTime": {
"clusterTime": "6674186995377373190",
"signature": {
"hash": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"keyId": 0
}
}
}
]

Please try to use $sortByCount and $unwind like as below :
db.getCollection("test").aggregate([
{
$unwind: "$devDependenciesList"
},
{
$sortByCount: "$devDependenciesList"
}
]).map((obj)=>{return {[obj._id]:obj.count}})
This is the simple and short solution i could think of.

How to distinct (count) value when value is nested in array?

I have a data sample something like this:
"diagnostics" : {
"_ID" : "554bbf7b761e06f02fef3561",
"tests" : [
{
"_id" : "59d678064e4645ec562a37e2",
"name" : "RBC",
},
{
"_id" : "59d678064e4645ec562a37e1",
"name" : "Calcium",
}
]
}
I want to get all distinct _ID and count of all test groups in with there names
which is something like this:
"_ID" : "554bbf7b761e06f02fef3561"{ {"name" : "Calcium", count :(count of Calcium)},{"name" : "RBC", count :(count of RBC)}
Thing to keep in mind are tests is inside diagnostics and contain any number of $name field it can be two or one or any number of times and I want individual count of each distinct name .
db.collection('transactions').aggregate([
{ $unwind : '$diagnostics.tests' },
{ $group : {
_id: {
"Test_Name" : '$diagnostics.tests.name',
"ID" : '$diagnostics._id'
},
test_count: { $sum: 1 }
}
}
])
and I am getting result something like this
[
{
"_id": {
"Test_Name": "Fasting Blood Sugar",
"ID": "554bbf7b761e06f02fef3561"
},
"test_count": 76
},
{
"_id": {
"Test_Name": "Fasting Blood Sugar",
"ID": "566726c35dc18d13242fffcc"
},
"test_count": 1
},
{
"_id": {
"Test_Name": "CBC - 7 Part",
"ID": "566726c35dc18d13242fffcc"
},
"test_count": 1
},
{
"_id": {
"Test_Name": "RBC",
"ID": "554bbf7b761e06f02fef3561"
},
"test_count": 1
},
{
"_id": {
"Test_Name": "Fasting Blood Sugar",
"ID": "5a2c9edfe0d0ec71aef1e526"
},
"test_count": 6
},
{
"_id": {
"Test_Name": "Calcium",
"ID": "554bbf7b761e06f02fef3561"
},
"test_count": 77
}
]
Can anybody help me with the query?

You need to use mulitple $group stages here.
First $unwind the tests and $group it by "name" and then resize it to original and lastly then $group by "diagnostics_ID" and for the tests count you can check the $size of the "tests" array.
db.collection.aggregate([
{ "$unwind": "$diagnostics.tests" },
{ "$group": {
"_id": {
"_id": "$diagnostics.tests.name",
"diagnosticID": "$diagnostics._ID"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": {
"_ID": "$_id.diagnosticID"
},
"tests": {
"$push": {
"name": "$_id._id",
"count": "$count"
}
}
}},
{ "$project": {
"diagnostics._ID": "$_id._ID",
"diagnostics.tests": "$tests",
"_id": 0,
"testCount": { "$size": "$tests" }
}}
])
Code snippet

how get count from mongodb with different status from one collection

I have appointment collection in that i have status codes like upcoming, cancelled, completed. i want to write an api to get count of each status using mongoose or mongodb methods.
output should be like below
[{
group : "grp1",
appointments_completed :4
appointments_upcoming :5
appointments_cancelled : 7
}]
thanks in advance.

I hope it help you
db.getCollection('codelist').aggregate([
{
$group:{
_id:{status:"$status"},
count:{$sum:1}
}
}
])
The result will be
[{
"_id" : {
"status" : "cancelled"
},
"count" : 13.0
},
{
"_id" : {
"status" : "completed"
},
"count" : 20.0
}
]
I think you can process it with nodejs

Using Aggregation Pipeline $group we can get this count
db.collection_name.aggregate([
{ $group: {
_id:null,
appointments_completed: {$sum : "$appointments_completed" },
appointments_upcoming:{$sum :"$appointments_upcoming"},
appointments_cancelled:{$sum: "$appointments_cancelled"}
}
}
]);

With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator and a $replaceRoot pipeline to get the desired result. You would need to run the following aggregate pipeline:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$concat": ["appointments_", { "$toLower": "$status" }] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"k": "$_id.status",
"v": "$count"
}
}
} },
{ "$addFields": {
"counts": {
"$setUnion": [
"$counts", [
{
"k": "group",
"v": "$_id"
}
]
]
}
} },
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$counts" }
} }
])
For older versions, a more generic approach though with a different output format would be to group twice and get the counts as an array of key value objects as in the following:
db.appointments.aggregate([
{ "$group": {
"_id": {
"group": <group_by_field>,
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.group",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
} }
])
which spits out:
{
"_id": "grp1"
"counts":[
{ "status": "completed", "count": 4 },
{ "status": "upcoming", "count": 5 }
{ "status": "cancelled", "count": 7 }
]
}
If the status codes are fixed then the $cond operator in the $group pipeline step can be used effectively to evaluate the counts based on the status field value. Your overall aggregation pipeline can be constructed as follows to produce the result in the desired format:
db.appointments.aggregate([
{ "$group": {
"_id": <group_by_field>,
"appointments_completed": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "completed" ] }, 1, 0 ]
}
},
"appointments_upcoming": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "upcoming" ] }, 1, 0 ]
}
},
"appointments_cancelled": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "cancelled" ] }, 1, 0 ]
}
}
} }
])

MongoDB group by type and by date

I have a Collection with a simple Document to store impressions and conversions with the following structure:
/* 1 */
{
"_id" : ObjectId("566f1ef857c1e6dd3123050a"),
"path_id" : ObjectId("55944c1ebe244fd19cbf510b"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T19:56:40.100Z"),
"__v" : 0
}
/* 2 */
{
"_id" : ObjectId("566f1fc9ac964e6f327c55d6"),
"path_id" : ObjectId("55944c1ebe244fd19cbf510b"),
"data_type" : "conversion",
"created_at" : ISODate("2015-12-14T20:00:09.972Z"),
"__v" : 0
}
/* 3 */
{
"_id" : ObjectId("566f2896739f6afa4485f327"),
"path_id" : ObjectId("562e594315ef3d8c3f05d219"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T20:37:42.139Z"),
"__v" : 0
}
/* 4 */
{
"_id" : ObjectId("566f28e5739f6afa4485f328"),
"path_id" : ObjectId("562e594315ef3d8c3f05d219"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T20:39:01.233Z"),
"__v" : 0
}
I'm able to group and count by data_type, but what I need to do is group by date and then count the data_type in order to get the following result:
[
{
'_id': 'Y',
'conversions': 20,
'impressions': 2703,
'date': '2015-12-14'
},
{
'_id': 'Z',
'conversions': 10,
'impressions': 1703,
'date': '2015-12-13'
}
]
The code I have right now is the following, but it only groups by data_type. I'm trying to add a project to regroup by date with no luck so far.
var path_id = new mongoose.Types.ObjectId( req.body.path_id );
var match = {
'path_id': {
$eq: path_id
}
};
var group = {
'_id': '$data_type',
'count': {
'$sum': 1
}
}
Hit.aggregate( [ {
$match: match
}, {
$group: group
} ], function( err, res ) {
console.log( res );
} );
The result is
POST /api/hits/bypath 200 30ms - 15b
[ { _id: 'conversion', count: 2 },
{ _id: 'impression', count: 2703 } ]

To do nested group by date, you have to use to Date aggregation operator $dateToString.
Here is query
db.hits.aggregate([
{
"$project": {
"created_at": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"data_type": true
}
},
{
"$group": {
"_id": {
"data_type": "$data_type",
"created_at": "$created_at"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": {
"data_type": "$_id.data_type"
},
"data":{ "$addToSet" : { count: "$count", date: "$_id.created_at" } }
}
}
])
If you want to match before group by operation based on condition, Add as following in the query
{
"$match": {
"path_id": {
"$eq": "<path_id>"
}
}
}

you can use Date Aggregation Operators to project the day/month/year fields and then group by them
{
"$project": {
"y": {
"$year": "$created_at"
},
"m": {
"$month": "$created_at"
},
"d": {
"$dayOfMonth": "$created_at"
},
"data_type" : 1
}
},
{
"$group": {
"_id": {
"year": "$y",
"month": "$m",
"day": "$d",
"data_type": "$data_type"
},
count: {
"$sum": 1
}
}
}
and will output in this format:
"_id": {
"year": 2015,
"month": 10,
"day": 5,
"data_type": "impression"
},
count: 10
and then group again by date to combine the types in one document
{
"$group": {
"_id": {
"year": "$_id.year",
"month": "$_id.month",
"day": "$_id.day"
},
types: {"$push":"$_id.data_type"},
counters: {"$push":"$count"}
}
}
which will result in this:
"_id": {
"year": 2015,
"month": 10,
"day": 5
},
types: ["impression", "conversion"]
counters: [10, 5]
there might be a more elegant or faster (with 1 group) way to do this though, i am not sure.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Use Aggregate with $group in mongodb - node.js

Related

Is there any way to get date from ObjectId from mongoose using aggregate?

How to retun max occurence of value inside mongoDB array

How to distinct (count) value when value is nested in array?

how get count from mongodb with different status from one collection

MongoDB group by type and by date

Categories

Resources