Mongoose/MongoDB Get mostly viewed articles grouped within a day - node.js

I am trying to clone a Reddit-like community board API using MongoDB + Mongoose on Node.js.
My sample JSON data looks like below:
{
"genre": "free",
"viewCount": 90,
"isDeleted": false,
"commentCount": 0,
"voteCount": 0,
"_comments": [],
"_vote": [],
"_id": "ObjectId",
"title": "blahblah",
"contents": "blah",
"createdAt": "2020-01-24T08:50:28.409Z",
"__v": 0,
"id": "5e2aafd4395bf593aa94b623"
},
To solve this problem, I simply sorted using .sort({ viewCount:-1, createdAt: -1 }).
However, when I sorted in this way, the most recently created Post will be always come first, even though other posts have larger viewCount values...
The next thing I'm thinking of is trying to group Posts data by each day (i.e. All posts created today is grouped together; All posts created yesterday is grouped together).
After grouping, then maybe I can sort the rest of data by viewCount.
I believe the method using aggregate would be the one possible solution, but I'd like to know if there would be the simplest and the best solution for this problem!
The output I'd like to get is something like this:
// viewcount in Descending Order
{ '2020-01-24':
{ post1: { viewcount: 999, contents: ...},
{ post2: { viewcount: 998, contents:... },
... } },
'2020-01-23':
{ post1: { viewcount: 999, contents: ...},
{ post2: { viewcount: 998, contents:... },
... },
'2020-01-22':
{ post1: { viewcount: 999, contents: ...},
{ post2: { viewcount: 998, contents:... },
... }, ...}
Please help me out here...
Thank you :)

This aggregation gives something similar to the output you are expecting:
db.test.aggregate( [
{ $sort: { createdAt: -1, viewCount: -1} },
{ $group: { _id: "$createdAt", post: { $push: "$$ROOT" } } },
{ $project: { post: 1, date: "$_id", _id: 0 } }
] )

Related

mongodb - Is it possible to group with the $bucket operator by date?

is it possible to group by date with the mongodb $bucekt operator ?
{
$bucket: {
groupBy: // date field?
boundaries: [0, 5, 10, 15, 20, 30, 50, 75, 100, 500],
default: 'overBucket',
output: {
count: {
$sum: 1,
},
localities: {
$push: '$$ROOT',
},
},
},
},
Thanks in advance for the suggestions :)
If I understand correctly, I think the below code is what you are looking for:
db.events.aggregate([
{
$match: {
"type": {
"$in": [
"EVENT",
"SUBEVENT"
]
},
"verification.status": "APPROVED",
"verification.public": true,
},
},
{
$group: {
_id: {
"$dateToString": {
"date": "$baseData.startDate",
"format": "%Y-%m-%d"
},
},
count: {
$sum: 1,
},
events: {
$push: "$$ROOT",
},
},
},
])
Let me know if you are trying to achieve $bucket stage with days as boundaries
Mongo Playground Sample Execution

Sorting date in mongodb using mm-dd-yyyy format

Here is my sample document:
_id:5c10fd9def1d420ef80007af,
date_claimed:"01-14-2018"
I'm trying to sort my date_claimed in ascending order, it is working properly using 2018 as end year but after I put some 2019 value, it doesn't follow the order. How can I fix this? I should follow the mm-dd-yyyy format
Code:
Bloodrequest.aggregate([{$group: {_id : "$date_claimed" , count:{$sum:1}}},{$sort: {_id: 1}}]
console.log(date2);
Yields:
[ { _id: '01-01-2019', count: 1 }, //this should be the most bottom
{ _id: '01-14-2018', count: 1 },
{ _id: '01-20-2018', count: 1 },
{ _id: '02-13-2018', count: 2 },
{ _id: '03-13-2018', count: 3 },
{ _id: '04-25-2018', count: 1 }]
Since mm-dd-yyyy is not naturally sortable, any solution based on this field content would require a full collection scan and/or full collection manipulation to be able to do this, since you're essentially requiring a custom sort method on a string field.
This will be a major performance killer, and not practical to do in the long term.
I would suggest you store your date_claimed field into a proper ISODate() format, put an index on it for sorting purposes, and do $project (or similar method) into the required mm-dd-yyyy format for output purposes.
For example, if your document is structured like:
> db.test.find()
{
"_id": 0,
"date_claimed": ISODate("2018-01-01T00:00:00Z")
}
{
"_id": 1,
"date_claimed": ISODate("2018-01-02T00:00:00Z")
}
{
"_id": 2,
"date_claimed": ISODate("2019-01-01T00:00:00Z")
}
You can then create an index on the date_claimed field:
> db.test.createIndex({date_claimed:1})
You can display the sorted date in descending order as expected:
> db.test.aggregate([ {$sort: {date_claimed: -1}} ])
{ "_id": 2, "date_claimed": ISODate("2019-01-01T00:00:00Z") }
{ "_id": 1, "date_claimed": ISODate("2018-01-02T00:00:00Z") }
{ "_id": 0, "date_claimed": ISODate("2018-01-01T00:00:00Z") }
You can also use $project to display the date in mm-dd-yyyy format as required. Note that the documents are sorted properly:
> db.test.aggregate([
{$sort: {date_claimed: -1}},
{$project: {date_string: {$dateToString: {format: '%m-%d-%Y', date:'$date_claimed'}}}}
])
{ "_id": 2, "date_string": "01-01-2019" }
{ "_id": 1, "date_string": "01-02-2018" }
{ "_id": 0, "date_string": "01-01-2018" }
See the $dateToString and the $project manual page for more information.
There are a couple of good things from this method:
You can put an index on the date field, removing the necessity to perform ad-hoc manipulation on the whole collection.
By using the ISODate() field type, the whole Date Expression Operators is now available for your use.
Using moment.js converting the date into miliseconds and sorting using .sort()
would be your best bet I think. Something like this perhaps:
date2.map(res => {
let timeInMs = moment(res._id).unix() //gives you time in seconds
/* */
}).sort()
Try something like below:
Bloodrequest.aggregate([
{
$group: {
_id :{
"day": { "$dayOfMonth" : "$date_claimed" },
"month": { "$month" : "$date_claimed" },
"year": { "$year" : "$date_claimed"}
} ,
count:{$sum:1}
}
},
{
$sort: {
"_id.year": 1,
"_id.month": 1,
"_id.day": 1
}
}
]
Kevin's answer is the ideal solution. But if you're stuck not being able to change the schema for any reason, then you can use this solution:
Bloodrequest.dates.aggregate([
{
$group: { _id: "$date_claimed", count: { $sum: 1 } }
},
{
$addFields: {
date: {
$dateFromString: {
dateString: "$_id",
format: "%m-%d-%Y"
}
}
}
},
{
$sort:
{ date: 1 }
}
])
As highlighted by Kevin, this is a performance killer. Should be used only when the data size is small and/or the frequency of this query being used is very low.

Node.js calling MongoDB with aggregate find

I have used SQL Server for a long time and just really learning MongoDB. I am trying to figure out how to do the aggregate finds to get just the data I want. Here is a sample from the database:
{
"_id": "1",
"user_id": "123-88",
"department_id": "1",
"type": "start",
"time": "2017-04-20T19:40:15.329Z"
}
{
"_id": "2",
"user_id": "123-88",
"department_id": "1",
"type": "stop",
"time": "2017-04-20T19:47:15.329Z"
}
What I want to do is find each unique user_id of department 1, only take the record with the latest time and tell me if they are oncall or not. So in the example above user 123-88 is not oncall now. How would you make this query? I know you will need something like this:
TimeCard.aggregate([
{ $match: {department_id: req.query.department_id}},
{ $sort: { user_id: 1, time: 1 }},
{ $group: { _id: "$user_id", current_type: "$type", lastTime: { $last: "$time" }}}
], function(err, docs){
if(err){console.log(err);}
else {res.json(docs);}
});
But I keep erroring so I know I am not correct in my logic. I know if I just have the match it works and if I add the sort it matches and sorts but the final group is not working. Also what would I add to then only show the people that are oncall. Thanks again for your help.
You can count how many "types" per user_id you have, this can be done by $sum, if it's odd, the user is oncall, because there is a start without a stop. This approach is correct only if you always have a stop for a start.
TimeCard.aggregate([
{ $match: { department_id: req.query.department_id } },
{ $sort: { user_id: 1, time: 1 } },
{ $group: { _id: "$user_id", count_types: { $sum: 1 }, lastTime: { $last: "$time" }}},
{ $match: { count_types: { $mod: [ 2, 1 ] } } },
], function(err, docs) {
if(err) { console.log(err); }
else { res.json(docs); }
});

How to $match by _id provided in $project with mongodb aggregate?

I'm wanting to be able to match / filter for a specific style from whiskey.style.
I'm wondering if it's not matching due to the formatting of the OID. I tried toString() as the documentation seems to suggest - may need to investigate this more..
Here is my query / $match object
var qObj.whiskeyFilter = { whiskey: { style: '57953144abfaa62383341a72' },
_id:
{ '$in':
[ 57a115304d124a4d1ad12d81,
57a114d64d124a4d1ad12d7f,
57a1152a4d124a4d1ad12d80,
57a9049906f3733623826538 ] } }
my pipeline:
var pipeline = [
{
"$project": {
"weight": stack[0],
"whiskey": "$$ROOT",
"collection":
collect[0]
}
},
{
"$match": qObj.whiskeyFilter
}, {
"$sort": {
"weight": 1
}
}, {
"$limit": 12
}, {
"$skip": qObj.skip
}];
this works if I only include the _id / $in for the $match, but it will not $match with whiskey.style.
Here is an example of what would return from the aggregate:
[ { _id: 57a115304d124a4d1ad12d81,
weight: 1,
whiskey:
{ _id: 57a115304d124a4d1ad12d81,
name: 'sample whiskey 2',
distiller: 578c04f76091bcd618f26e04,
style: 57953144abfaa62383341a72,
userVote: 0,
timestamp: Tue Aug 02 2016 16:48:32 GMT-0500 (CDT),
total: 1,
vote: 2,
__v: 0 },
collection:
{ _id: 57acb4ff093360bee276aae6,
user: 57919ac16fa0390856a9998f,
whiskey: 57a115304d124a4d1ad12d81,
__v: 0,
collected: true,
vote: 1,
timestamp: Thu Aug 11 2016 12:25:19 GMT-0500 (CDT),
favorite: true } }
]
Update
I'm converting it into an objectId, as i've read the aggregation may have issues casting to string, but I still am not getting any returns for an expected match:
mongoose.Types.ObjectId(obj.style);
now, you can see that the style Id is no longer a string, but still $match does not seem to work:
match query : { whiskey: { style: 57953144abfaa62383341a72 },
_id:
{ '$in':
[ 57a115304d124a4d1ad12d81,
57a114d64d124a4d1ad12d7f,
57a1152a4d124a4d1ad12d80,
57a9049906f3733623826538 ] } }
Figured it out:
I had to modify and add my style to filter by in the main object / not nested
{
"$project": {
"weight": stack[0],
"whiskey": "$$ROOT",
"style": "$style", <--- added
"collection":
collect[0]
}
For some reason, it was not capable of filtering the nested $$ROOT object.
I'll add more detail if I can find it in the docs. Or, if anyone else would like to expand on the answer -- i'd prefer to accept that than my own.

Mongoose/mongodb - get only latest records per id

I have an Inspection model in mongoose:
var InspectionSchema = new Schema({
business_id: {
type: String,
required: true
},
score: {
type: Number,
min: 0,
max: 100,
required: true
},
date: {
type: Number, // in format YYYYMMDD
required: true
},
description: String,
type: String
});
InspectionSchema.index({business_id: 1, date: 1}, {unique: true});
It is possible for there to be multiple inspections on the same Business (each Business is represented by a unique business_id). However, there is a limit of one inspection per business per day, which is why there is a unique index on business_id + date.
I also created a static method on the Inspection object which, given a list of business_ids, retrieves all of the inspections for the underlying businesses.
InspectionSchema.statics.getAllForBusinessIds = function(ids, callback) {
this.find({'business_id': {$in: ids}}, callback);
};
This function fetches all of the inspections for the requested businesses. However, I want to also create a function that fetches only the latest inspection per business_id.
InspectionSchema.statics.getLatestForBusinessIds = function(ids, callback) {
// query to get only the latest inspection per business_id in "ids"?
};
How might I go about implementing this?
You can use the .aggregate() method in order to get all the latest data in one request:
Inspection.aggregate(
[
{ "$sort": { "buiness_id": 1, "date": -1 } },
{ "$group": {
"_id": "$business_id",
"score": { "$first": "$score" },
"date": { "$first": "$date" },
"description": { "$first": "$description" },
"type": { "$first": "$type" }
}}
],
function(err,result) {
}
);
Just $sort then $group with the "business_id" as the grouping key. The $first gets the first results from the grouping boundary, where we already sorted by date within each id.
If you just want the date then do this using $max:
Inspection.aggregate(
[
{ "$group": {
"_id": "$business_id",
"date": { "$max": "$date" }
}}
],
function(err,result) {
}
);
Also see $match if you want to "pre-filter" the business id values or any other conditions when doing this.
try this:
Inpection.aggregate(
[
{ $match : { _id : { "$in" : ids} } },
{ $group: { "_id" : "$business_id", lastInspectionDate: { $last: "$date" } } }
],
function(err,result) {
}
);

Resources