groups by month and year using mongoose.js - node.js

my collection in mongodb looks like below:
{
"AccountID" : "87f7fd60-d1ad-11e2-98bb-795730bce125",
"userId" : ObjectId("51b59fbec46916e60d00000c"),
"_id" : ObjectId("51b6e603e3efef161b000003"),
"accessDate" : ISODate("2013-06-11T08:55:31.957Z"),
"__v" : 0
}
{
"AccountID" : "47f7fd60-d1ad-11e2-98bb-795730bce125",
"userId" : ObjectId("51b59fbec46916e60d00000d"),
"_id" : ObjectId("51b6e603e3efef161b000003"),
"accessDate" : ISODate("2013-05-1T08:05:31.957Z"),
"__v" : 0
}
i what to write a query which results the below result:
this is result as grouped by month and year and the count per day.
{
"usage": [
{
"year": 2013,
"monthlyusage": [
{
"month": 1,
"dailyusage": [
{
"day": 1,
"count": 205
},
{
"day": 2,
"count": 1109
},
{
"day": 4,
"count": 455
}
]
},
{
"month": 2,
"dailyusage": [
{
"day": 11,
"count": 256
},
{
"day": 2,
"count": 1001
},
{
"day": 5,
"count": 65
}
]
}
]
},
{
"year": 2012,
"monthlyusage": [
{
"month": 12,
"dailyusage": [
{
"day": 1,
"count": 78
},
{
"day": 2,
"count": 7009
},
{
"day": 28,
"count": 55
}
]
},
{
"month": 11,
"dailyusage": [
{
"day": 11,
"count": 800
},
{
"day": 2,
"count": 5094
},
{
"day": 25,
"count": 165
}
]
}
]
}
]
}
How can i do this using mongoose.js framework

Mongoose provides a lightweight wrapper around the MongoDB aggregation framework. If you're new to aggregation, you can learn more about in from the MongoDB docs: http://docs.mongodb.org/manual/aggregation/
To massage your data into the form you've described above, you can use an aggregation pipeline with a series of $group operations. Here it is using the mongoose framework:
var dateSchema = mongoose.Schema({…});
var DateItem = mongoose.model('DateItem', dateSchema);
DateItem.aggregate(
{ $group : {
_id : { year: { $year : "$accessDate" }, month: { $month : "$accessDate" },day: { $dayOfMonth : "$accessDate" }},
count : { $sum : 1 }}
},
{ $group : {
_id : { year: "$_id.year", month: "$_id.month" },
dailyusage: { $push: { day: "$_id.day", count: "$count" }}}
},
{ $group : {
_id : { year: "$_id.year" },
monthlyusage: { $push: { month: "$_id.month", dailyusage: "$dailyusage" }}}
},
function (err, res)
{ if (err) ; // TODO handle error
console.log(res);
});
});
The first $group will result in documents of this form, one for each day:
{
"_id" : { "year" : 2013, "month" : 8, "day" : 15 },
"count" : 1
}
The second $group will result in documents grouped by month:
{
"_id" : { "year" : 2012, "month" : 11 },
"dailyusage" : [
{ "day" : 6, "count" : 1 },
{ "day" : 9, "count" : 1 },
... ]
},
And the third $group will result in even larger documents, one for each year.
This query will aggregate your data into large, hierarchical documents. If you plan to run queries on this data after aggregation, however, this might not be the most useful form for your data to be in. Consider how you'll be using the aggregated data. A schema involving more smaller documents, perhaps one per month or even one per day, might be more convenient.

Related

Mongoose aggregate to get Average rating, count each rate and return the actual ratings

Im trying to to get avg rating for a product, plus the count of each rating and also return the actual ratings and use pagination to limit amount that is returned without affecting the avg or count.
So I'm trying achieve something like this:
this is my rating collection:
{
"productId": "3"
"userid" : 5,
"rating" : 5
"comment": "this is nice"
},
{
"productId": "3"
"userid" : 2,
"rating" :4
"comment": "this is very nice"
}
and this is the end result I want
{
"_id" : 1,
"avgRating": "3.6"
"counts" : [
{
"rating" : 5,
"count" : 8
},
{
"rating" : 3,
"count" : 2
},
{
"rating" : 4,
"count" : 4
},
{
"rating" : 1,
"count" : 4
}
],
"ratings": [
{
"productId": "3"
"userid" : 5,
"rating" : 5
"comment": "this is nice"
},
{
"productId": "3"
"userid" : 2,
"rating" :4
"comment": "this is very nice"
},
{
"productId": "3"
"userid" : 12,
"rating" : 4
"comment": "this is okay"
}
]
}
I have this so far which give me the count for each rating:
db.votes.aggregate([
{ $match: { postId: {$in: [1,2]} } },
{
$group: { _id: { post: "$postId", rating: "$vote" }, count: { $sum: 1 } }
},
{
$group: {
_id: "$_id.post",
counts: { $push: { rating: "$_id.rating", count: "$count" } }
}
}
])
You're not far off, we just have to adjust some things:
db.votes.aggregate([
{
$match:
{
postId: {$in: [1, 2]}
}
},
{
$group: {
_id: {post: "$postId", rating: "$vote"},
count: {$sum: 1},
reviews: {$push : "$$ROOT" } //keep the original document
}
},
{
$group: {
_id: "$_id.post",
counts: {$push: {rating: "$_id.rating", count: "$count"}},
reviews: {$push: "$reviews"},
totalItemCount: {$sum: "$count"}, //for avg calculation
totalRating: {$sum: "$_id.rating"} // //for avg calculation
}
},
{
$project: {
_id: "$_id",
avgRating: {$divide: ["$totalRating", "$totalItemCount"]},
counts: "$counts",
reviews: {
$slice: [
{
$reduce: {
input: "$reviews",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
},
0, //skip
10 //limit
]
}
}
}
])
Note that I preserved the current pipeline structure for clarity, however I feel that using a pipeline that utilizes $facet might be more efficient as we won't have to hold the entire collection in memory while grouping.
we'll split it into two, one the current pipeline minus the review section and one with just $skip and $limit stages.
EDIT:
$facet version:
db.votes.aggregate([
{
"$match": {
"postId": {"$in": [1, 2]}
}
},
{
"$facet": {
"numbers": [
{
"$group": {
"_id": {
"post": "$postId",
"rating": "$vote"
},
"count": {
"$sum": 1.0
}
}
},
{
"$group": {
"_id": "$_id.post",
"counts": {
"$push": {
"rating": "$_id.rating",
"count": "$count"
}
},
"totalItemCount": {
"$sum": "$count"
},
"totalRating": {
"$sum": "$_id.rating"
}
}
}
],
"reviews": [
{
"$skip": 0.0
},
{
"$limit": 10.0
}
]
}
},
{
"$unwind": "$numbers"
},
{
"$project": {
"_id": "$numbers._id",
"reviews": "$reviews",
"avgRating": {"$divide": ["$numbers.totalRating", "$numbers.totalItemCount"]},
"counts": "$numbers.counts"
}
}
]);

Mongodb query join two collections

I´m trying to make a query to my bbdd in order to get some info that involve two collections.
First, I have one collecction, called Collectables that is a collection that store all available items that One user can get using an App.
For example, this collection can have... 100 items. This is the maximum number of items.
This is one document of this collection (called collectables)
{
"_id" : ObjectId("5d387ecfbb676b173aa57fe3"),
"img" : "some url",
"name" : "La 5",
"__v" : 0,
"amount" : 17,
"available" : 16,
"collec" : ObjectId("5d36c0c34c86991db93bd7c8"),
"gen" : 3,
"metadata" : {},
"position" : 1
}
Then, I have another collection called AppUsers. In this collection I store all info related to the user. Each user of the App has his own record here. The point is that besides meta info of the user such alias, avatar, age... I have one field called collectables. Is an array. Here I store what collectable have each user.
For example, if one user have 10 collectables (from the other collection) I have 10 entries in this array with that info. Is possible that one user "win" the same item (collectable) twice or more.. so in this collection I store a count field with the total. For example, a user win the collectable with Id 1 the first time, so I add the entry in the array with count 1. If then the user win the same collectable, the count is 2... and so on.
This is an example of one user... with 3 collectables, but several number of each item.
{
"_id" : ObjectId("5d36dc9445526a215c4eff52"),
"twitter" : "1",
"alias" : "ViktorCrowley",
"__v" : 25,
"collectables" : [
{
"count" : 12,
"collectable" : ObjectId("5d36c1ba4c86991db93bd7e7")
},
{
"count" : 25,
"collectable" : ObjectId("5d36c13d4c86991db93bd7c9")
},
{
"count" : 8,
"collectable" : ObjectId("5d381e122f25221126a98f9c")
}
]
}
So, in this case, this user, for example have 3 differents items (collectables). But imagine that the total of collectables from the first collection is 100.
Now... what I´m looking for. I need a query that give me (paginated) the items from the first collection (collectables) and in the case that the user already have one of this items, marked with the total count. I mean, I want all the items from the first collection, with a new field, called count. If the user doens´t have any entry in his array, count will be 0, and if the user for that item, has for example 4 collectables, the count will be 4.
Some thing like this:
[
{
"_id" : ObjectId("5d387ecfbb676b173aa57fe3"),
"img" : "some url",
"name" : "THe one",
"__v" : 0,
"amount" : 17,
"available" : 16,
"collec" : ObjectId("5d36c0c34c86991db93bd7c8"),
"gen" : 3,
"metadata" : {},
"position" : 1,
"count" : 0
},
{
"_id" : ObjectId("5d387ecfbb676b173aa57fe3"),
"img" : "some url",
"name" : "The two",
"__v" : 0,
"amount" : 17,
"available" : 16,
"collec" : ObjectId("5d36c0c34c86991db93bd7c8"),
"gen" : 3,
"metadata" : {},
"position" : 2,
"count" : 1
},
{
"_id" : ObjectId("5d387ecfbb676b173aa57fe4"),
"img" : "some url",
"name" : "The Three",
"__v" : 0,
"amount" : 17,
"available" : 16,
"collec" : ObjectId("5d36c0c34c86991db93bd7c8"),
"gen" : 3,
"metadata" : {},
"position" : 3,
"count" : 0
},
{
"_id" : ObjectId("5d387ecfbb676b173aa57fe4"),
"img" : "Some url",
"name" : "La 5",
"__v" : 0,
"amount" : 17,
"available" : 16,
"collec" : ObjectId("5d36c0c34c86991db93bd7c8"),
"gen" : 3,
"metadata" : {},
"position" : 4,
"count" : 12
}
I tried several things using aggregate and lookup but I can´t get make it work.
The only I could get, was retrieve the info from AppUser and the total count..
Something like this (with mongoose):
AppUser.aggregate([
{ $match: matchQuery },
{$unwind: "$collectables"},
{
$lookup:
{
from: "collectables",
localField: "collectables.collectable",
foreignField: "_id",
as: "result"
}
},
{ $sort: { "result.position": 1 } },
{$unwind: "$result"},
{ $addFields : { "result.count" : "$collectables.count" }
},
{ $replaceRoot: { newRoot: "$result" } },
{ $skip: size * (page - 1) },
{ $limit: size }
]).exec((err, result) =>
{
if (err)
{
console.log(err);
return res.status(401).send({ success: false });
}
else {
return res.status(200).send({ success: true, result });
}
});
So I need help because i have three days with this and I can´t get something nice...
Thanks in advance...
Below query will bring us the expected output
db.appuser.aggregate([
{ $unwind: "$collectables" },
{
$lookup: {
from: "collectables",
localField: "collectables.collectable",
foreignField: "_id",
as: "result"
}
},
{
$project: {
result: 1,
flag: { "$gt": [ {"$size": "$result"}, 0 ] },
"collectables.count": 1,
alias: 1,
_id: 0
}
},
{
$match: { flag: true }
}
]);
Unwind the appuser.collectables array, then do a lookup between the collections collectables and appuser, then use $project and $match to get the desired result.
In the $project stage added a flag to filter out the desired documents, by checking the size of the result array of the $lookup output.
Sample Data used
db.collectables.find()
{
"_id": "5d387ecfbb676b173aa57fe3",
"img": "some url",
"name": "La 5",
"__v": 0,
"amount": 17,
"available": 16,
"collec": "5d36c0c34c86991db93bd7c8",
"gen": 3,
"metadata": {
},
"position": 1
}
db.appuser.find()
[{
"_id": "5d36dc9445526a215c4eff52",
"twitter": "1",
"alias": "ViktorCrowley",
"__v": 25,
"collectables": [
{
"count": 12,
"collectable": "5d36c1ba4c86991db93bd7e7"
},
{
"count": 25,
"collectable": "5d36c13d4c86991db93bd7c9"
},
{
"count": 8,
"collectable": "5d381e122f25221126a98f9c"
}
]
},
{
"_id": "5d36dc9445526a215c4efga1",
"twitter": "1",
"alias": "John",
"__v": 11,
"collectables": [
{
"count": 10,
"collectable": "5d387ecfbb676b173aa57fe3"
}
]
},
{
"_id": "6dc2dc9445526a215c4efga1",
"twitter": "1",
"alias": "Alice",
"__v": 11,
"collectables": [
{
"count": 25,
"collectable": "5d387ecfbb676b173aa57fe3"
},
{
"count": 3,
"collectable": "5d36c13d4c86991db9312349"
},
{
"count": 5,
"collectable": "5d381e122f25221126a9711c"
}
]
}]
Final Output
{
"alias": "John",
"collectables": {
"count": 10
},
"result": [
{
"_id": "5d387ecfbb676b173aa57fe3",
"img": "some url",
"name": "La 5",
"__v": 0,
"amount": 17,
"available": 16,
"collec": "5d36c0c34c86991db93bd7c8",
"gen": 3,
"metadata": {
},
"position": 1
}
],
"flag": true
}
{
"alias": "Alice",
"collectables": {
"count": 25
},
"result": [
{
"_id": "5d387ecfbb676b173aa57fe3",
"img": "some url",
"name": "La 5",
"__v": 0,
"amount": 17,
"available": 16,
"collec": "5d36c0c34c86991db93bd7c8",
"gen": 3,
"metadata": {
},
"position": 1
}
],
"flag": true
}
Hope it helps!

Finding top 3 values from array mongodb [duplicate]

This question already has an answer here:
Finding top N entries from the Array
(1 answer)
Closed 3 years ago.
I have an array of objects in the following collection having levels i want
only the top 3 levels from an array in descending order of levels
{
"_id" : ObjectId("5ce10ffb0a9531d98ad11819"),
"_course" : ObjectId("593107b273790c30c4e08b02"),
"users" : [
{
"level" : 17,
"_user" : ObjectId("5c08be10d1a0c91e4c739629")
},
{
"level" : 16,
"_user" : ObjectId("593107b273790c30c4e08b07")
},
{
"level" : 5,
"_user" : ObjectId("593107b273790c30c4e08b08")
},
{
"level" : 17,
"_user" : ObjectId("593107b273790c30c4e08b05")
}
],
"acr" : "MH"
}
/* 2 */
{
"_id" : ObjectId("5ce10ffb0a9531d98ad1181a"),
"_course" : ObjectId("593107b273790c30c4e08b02"),
"users" : [
{
"level" : 9,
"_user" : ObjectId("5c08be10d1a0c91e4c739629")
},
{
"level" : 6,
"_user" : ObjectId("593107b273790c30c4e08b02")
},
{
"level" : 5,
"_user" : ObjectId("593107b273790c30c4e08b07")
},
{
"level" : 4,
"_user" : ObjectId("593107b273790c30c4e08b07")
}
],
"acr" : "MA"
}
Expected output
{
"_id": "5ce10ffb0a9531d98ad11819",
"users": [
{
"level": 17,
"_user": "593107b273790c30c4e08b09"
},
{
"level": 17,
"_user": "593107b273790c30c4e08b05"
},
{
"level": 16,
"_user": "593107b273790c30c4e08b07"
}
]
},
{
"_id": "5ce10ffb0a9531d98ad11819",
"users": [
{
"level": 9,
"_user": "593107b273790c30c4e08b09"
},
{
"level": 6,
"_user": "593107b273790c30c4e08b02"
},
{
"level": 5,
"_user": "593107b273790c30c4e08b07"
}
]
}
I want the expected output like above i am using aggregation but i cant find a way. I used other way also but still i am not getting the proper output something is missing that i cant find
You can use below aggregation
db.collection.aggregate([
{ "$unwind": "$users" },
{ "$sort": { "users.level": -1 }},
{ "$group": {
"_id": "$_id",
"users": { "$push": "$users" }
}},
{ "$addFields": { "users": { "$slice": ["$users", 3] }}}
])
MongoPlayground

MongoDB group by type and by date

I have a Collection with a simple Document to store impressions and conversions with the following structure:
/* 1 */
{
"_id" : ObjectId("566f1ef857c1e6dd3123050a"),
"path_id" : ObjectId("55944c1ebe244fd19cbf510b"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T19:56:40.100Z"),
"__v" : 0
}
/* 2 */
{
"_id" : ObjectId("566f1fc9ac964e6f327c55d6"),
"path_id" : ObjectId("55944c1ebe244fd19cbf510b"),
"data_type" : "conversion",
"created_at" : ISODate("2015-12-14T20:00:09.972Z"),
"__v" : 0
}
/* 3 */
{
"_id" : ObjectId("566f2896739f6afa4485f327"),
"path_id" : ObjectId("562e594315ef3d8c3f05d219"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T20:37:42.139Z"),
"__v" : 0
}
/* 4 */
{
"_id" : ObjectId("566f28e5739f6afa4485f328"),
"path_id" : ObjectId("562e594315ef3d8c3f05d219"),
"data_type" : "impression",
"created_at" : ISODate("2015-12-14T20:39:01.233Z"),
"__v" : 0
}
I'm able to group and count by data_type, but what I need to do is group by date and then count the data_type in order to get the following result:
[
{
'_id': 'Y',
'conversions': 20,
'impressions': 2703,
'date': '2015-12-14'
},
{
'_id': 'Z',
'conversions': 10,
'impressions': 1703,
'date': '2015-12-13'
}
]
The code I have right now is the following, but it only groups by data_type. I'm trying to add a project to regroup by date with no luck so far.
var path_id = new mongoose.Types.ObjectId( req.body.path_id );
var match = {
'path_id': {
$eq: path_id
}
};
var group = {
'_id': '$data_type',
'count': {
'$sum': 1
}
}
Hit.aggregate( [ {
$match: match
}, {
$group: group
} ], function( err, res ) {
console.log( res );
} );
The result is
POST /api/hits/bypath 200 30ms - 15b
[ { _id: 'conversion', count: 2 },
{ _id: 'impression', count: 2703 } ]
To do nested group by date, you have to use to Date aggregation operator $dateToString.
Here is query
db.hits.aggregate([
{
"$project": {
"created_at": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"data_type": true
}
},
{
"$group": {
"_id": {
"data_type": "$data_type",
"created_at": "$created_at"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": {
"data_type": "$_id.data_type"
},
"data":{ "$addToSet" : { count: "$count", date: "$_id.created_at" } }
}
}
])
If you want to match before group by operation based on condition, Add as following in the query
{
"$match": {
"path_id": {
"$eq": "<path_id>"
}
}
}
you can use Date Aggregation Operators to project the day/month/year fields and then group by them
{
"$project": {
"y": {
"$year": "$created_at"
},
"m": {
"$month": "$created_at"
},
"d": {
"$dayOfMonth": "$created_at"
},
"data_type" : 1
}
},
{
"$group": {
"_id": {
"year": "$y",
"month": "$m",
"day": "$d",
"data_type": "$data_type"
},
count: {
"$sum": 1
}
}
}
and will output in this format:
"_id": {
"year": 2015,
"month": 10,
"day": 5,
"data_type": "impression"
},
count: 10
and then group again by date to combine the types in one document
{
"$group": {
"_id": {
"year": "$_id.year",
"month": "$_id.month",
"day": "$_id.day"
},
types: {"$push":"$_id.data_type"},
counters: {"$push":"$count"}
}
}
which will result in this:
"_id": {
"year": 2015,
"month": 10,
"day": 5
},
types: ["impression", "conversion"]
counters: [10, 5]
there might be a more elegant or faster (with 1 group) way to do this though, i am not sure.

Aggregate mongodb by latest timestamp

I'd like to get the "population" of each city's last timestamp using the aggregate function.
In a MongoDB like this:
{
"_id": {"$oid": "55354bc97b5dfd021f2be661"},
"timestamp": {"$date": "2015-04-20T18:56:09.000Z"},
"city": "Roma",
"population": [
{"age": 90,"count": 1000},
{"age": 25,"count": 25}
]
},
{
"_id": {"$oid": "55354c357b5dfd021f2be663"},
"timestamp": {"$date": "2015-04-20T18:57:57.000Z"},
"city": "Madrid",
"population": [
{"age": 90,"count": 10},
{"age": 75,"count": 2343},
{"age": 50,"count": 500},
{"age": 70,"count": 5000}
]
},
{
"_id": {"$oid": "55362da541c37aef07d4ea9a"},
"timestamp": {"$date": "2015-04-21T10:59:49.000Z"},
"city": "Roma",
"population": [
{"age": 90,"count": 5}
]
}
I'd like to retrieve all the cities, but for each one only the latest timestamp:
{
"city": "Roma",
"population": [
{"age": 90,"count": 5}
]
},
{
"city": "Madrid",
"population": [
{"age": 90,"count": 10},
{"age": 75,"count": 2343},
{"age": 50,"count": 500},
{"age": 70,"count": 5000}
]
}
I have tried something like this answer, but I don't know how to "unwind" the populations after getting the latest timestamp for each city:
db.collection('population').aggregate([
{ $unwind: '$population' },
{ $group: { _id: '$city', timestamp: { $max: '$timestamp' } } },
{ $sort: { _id : -1 } }
], function(err, results) {
res.send(results)
});
The following aggregation pipeline will give you the desired result. The first step in the pipeline orders the documents by the timestamp field (descending) and then groups the ordered documents by the city field in the next $group stage. Within the $group operator, you can extract the population array field by way of the $$ROOT operator. The $first operator returns the value that results from applying the $$ROOT expression to the first document in a group of documents that share the same city key. The final pipeline stage involves projecting the fields from the previous pipeline into the desired fields:
db.population.aggregate([
{
"$sort": { "timestamp": -1 }
},
{
"$group": {
"_id": "$city",
"doc": { "$first": "$$ROOT" }
}
},
{
"$project": {
"_id": 0,
"city": "$_id",
"population": "$doc.population"
}
}
]);
Output:
/* 0 */
{
"result" : [
{
"city" : "Madrid",
"population" : [
{
"age" : 90,
"count" : 10
},
{
"age" : 75,
"count" : 2343
},
{
"age" : 50,
"count" : 500
},
{
"age" : 70,
"count" : 5000
}
]
},
{
"city" : "Roma",
"population" : [
{
"age" : 90,
"count" : 5
}
]
}
],
"ok" : 1
}
I think that you want to use $project instead of $unwind:
db.collection('population').aggregate([{
$group: {
_id: '$city',
timestamp: {$max: '$timestamp'}
}
}, {
$project: {
population: '$doc.population'
}
}, {
$sort: {
_id : -1
}
}], function(err, results) {
res.send(results)
});
I use this to sort any timestamp field using aggregation, I am sorting it by the latest update time of the document. If you need you can group it later. You can learn more about [aggregate sorting here.][1]
aggregate.push({ $sort: { updated_at: -1 } });
What I do is I make blocks of aggregate actions push them into an array and execute it all together. I find it easier to debug if something is not working properly.
[1]: https://www.mongodb.com/docs/manual/reference/operator/aggregation/sort/

Resources