Mongoose/mongodb - get only latest records per id - node.js

I have an Inspection model in mongoose:
var InspectionSchema = new Schema({
business_id: {
type: String,
required: true
},
score: {
type: Number,
min: 0,
max: 100,
required: true
},
date: {
type: Number, // in format YYYYMMDD
required: true
},
description: String,
type: String
});
InspectionSchema.index({business_id: 1, date: 1}, {unique: true});
It is possible for there to be multiple inspections on the same Business (each Business is represented by a unique business_id). However, there is a limit of one inspection per business per day, which is why there is a unique index on business_id + date.
I also created a static method on the Inspection object which, given a list of business_ids, retrieves all of the inspections for the underlying businesses.
InspectionSchema.statics.getAllForBusinessIds = function(ids, callback) {
this.find({'business_id': {$in: ids}}, callback);
};
This function fetches all of the inspections for the requested businesses. However, I want to also create a function that fetches only the latest inspection per business_id.
InspectionSchema.statics.getLatestForBusinessIds = function(ids, callback) {
// query to get only the latest inspection per business_id in "ids"?
};
How might I go about implementing this?

You can use the .aggregate() method in order to get all the latest data in one request:
Inspection.aggregate(
[
{ "$sort": { "buiness_id": 1, "date": -1 } },
{ "$group": {
"_id": "$business_id",
"score": { "$first": "$score" },
"date": { "$first": "$date" },
"description": { "$first": "$description" },
"type": { "$first": "$type" }
}}
],
function(err,result) {
}
);
Just $sort then $group with the "business_id" as the grouping key. The $first gets the first results from the grouping boundary, where we already sorted by date within each id.
If you just want the date then do this using $max:
Inspection.aggregate(
[
{ "$group": {
"_id": "$business_id",
"date": { "$max": "$date" }
}}
],
function(err,result) {
}
);
Also see $match if you want to "pre-filter" the business id values or any other conditions when doing this.

try this:
Inpection.aggregate(
[
{ $match : { _id : { "$in" : ids} } },
{ $group: { "_id" : "$business_id", lastInspectionDate: { $last: "$date" } } }
],
function(err,result) {
}
);

Related

MongoDB aggregate lookup match not working the same without lookup

Can you please help? I'm trying to aggregate data over the past 12 months by both ALL publication data specific to a certain publisher and per publication to return a yearly graph data analysis based on the subscription type.
Here's a snapshot of the Subscriber model:
const SubscriberSchema = new Schema({
publication: { type: Schema.Types.ObjectId, ref: "publicationcollection" },
subType: { type: String }, // Print, Digital, Bundle
subStartDate: { type: Date },
subEndDate: { type: Date },
});
Here's some data for the reader (subscriber) collection:
{
_id: ObjectId("5dc14d3fc86c165ed48b6872"),
publication: ObjectId("5d89db9d82273f1d18970deb"),
subStartDate: "2019-11-20T00:00:00.000Z",
subtype: "print"
},
{
_id: ObjectId("5dc14d3fc86c165ed48b6871"),
publication: ObjectId("5d89db9d82273f1d18970deb"),
subStartDate: "2019-11-19T00:00:00.000Z",
subtype: "print"
},
{
_id: ObjectId("5dc14d3fc86c165ed48b6870"),
publication: ObjectId("5d89db9d82273f1d18970deb"),
subStartDate: "2019-11-18T00:00:00.000Z",
subtype: "digital"
},
{
_id: ObjectId("5dc14d3fc86c165ed48b6869"),
publication: ObjectId("5d8b36c3148c1e5aec64662c"),
subStartDate: "2019-11-19T00:00:00.000Z",
subtype: "print"
}
The publication model has plenty of fields but the _id and user fields are the only point of reference in the following queries.
Here's some data for the publication collection:
// Should use
{ "_id": {
"$oid": "5d8b36c3148c1e5aec64662c"
},
"user": {
"$oid": "5d24bbd89f09024590db9dcd"
},
"isDeleted": false
},
// Should use
{ "_id": {
"$oid": "5d89db9d82273f1d18970deb"
},
"user": {
"$oid": "5d24bbd89f09024590db9dcd"
},
"isDeleted": false
},
// Shouldn't use as deleted === true
{ "_id": {
"$oid": "5d89db9d82273f1d18970dec"
},
"user": {
"$oid": "5d24bbd89f09024590db9dcd"
},
"isDeleted": true
},
// Shouldn't use as different user ID
{ "_id": {
"$oid": "5d89db9d82273f1d18970dfc"
},
"user": {
"$oid": "5d24bbd89f09024590db9efd"
},
"isDeleted": true
}
When I do a lookup on a publication ID with the following, I'm getting perfect results:
Subscriber.aggregate([
{
$match: {
$and: [
{ 'publication': mongoose.Types.ObjectId(req.params.id) },
],
"$expr": { "$eq": [{ "$year": "$subStartDate" }, new Date().getFullYear()] }
}
},
{
/* group by year and month of the subscription event */
$group: {
_id: { year: { $year: "$subStartDate" }, month: { $month: "$subStartDate" }, subType: "$subType" },
count: { $sum: 1 }
},
},
{
/* sort descending (latest subscriptions first) */
$sort: {
'_id.year': -1,
'_id.month': -1
}
},
{
$limit: 100,
},
])
However, when I want to receive data from the readercollections (Subscriber Model) for ALL year data, I'm not getting the desired results (if any) from all of the things I'm trying (I'm posting the best attempt result below):
Publication.aggregate([
{
$match:
{
user: mongoose.Types.ObjectId(id),
isDeleted: false
}
},
{
$project: {
_id: 1,
}
},
{
$lookup: {
from: "readercollections",
let: { "id": "$_id" },
pipeline: [
{
$match:
{
$expr: {
$and: [
{ $eq: ["$publication", "$$id"] },
{ "$eq": [{ "$year": "$subStartDate" }, new Date().getFullYear()] }
],
}
}
},
{ $project: { subStartDate: 1, subType: 1 } }
],
as: "founditem"
}
},
// {
// /* group by year and month of the subscription event */
// $group: {
// _id: { year: { $year: "$founditem.subStartDate" }, month: { $month: "$foundtitem.subStartDate" }, subType: "$founditem.subType" },
// count: { $sum: 1 }
// },
// },
// {
// /* sort descending (latest subscriptions first) */
// $sort: {
// '_id.year': -1,
// '_id.month': -1
// }
// },
], function (err, result) {
if (err) {
console.log(err);
} else {
res.json(result);
}
})
Which returns the desired data without the $group (commented out) but I need the $group to work or I'm going to have to map a dynamic array based on month and subtype which is completely inefficient.
When I'm diagnosing, it looks like this $group is the issue but I can't see how to fix as it works in the singular $year/$month group. So I tried the following:
{
/* group by year and month of the subscription event */
$group: {
_id: { year: { $year: "$subStartDate" }, month: { $month: "$subStartDate" }, subType: "$founditem.subType" },
count: { $sum: 1 }
},
},
And it returned the $founditem.subType fine, but any count or attempt to get $year or $month of the $founditem.subStartDate gave a BSON error.
The output from the single publication ID lookup in the reader collection call that works (and is plugging into the line graph perfectly) is:
[
{
"_id": {
"year": 2019,
"month": 11,
"subType": "digital"
},
"count": 1
},
{
"_id": {
"year": 2019,
"month": 11,
"subType": "print"
},
"count": 3
}
]
This is the output I'd like for ALL publications rather than just a single lookup of a publication ID within the reader collection.
Thank you for any assistance and please let me know if you need more details!!

How to get aggregated sum of values in an array of mongoose subdocuments when query parent?

I'm trying to build some advanced hello world app on top of express and mongoose. Assume I have next Schemas:
const pollOptionsSchema = new Schema({
name: String,
votes: {
type: Number,
default: 0
}
});
const pollSchema = new Schema({
name: String,
dateCreated: { type: Date, default: Date.now },
author: { type: Schema.Types.ObjectId },
options: [pollOptionsSchema]
});
And when I simply call
Poll.findOne({_id: req.params.id}).exec((err, data) => {
if (err) console.log(err);
// I receive next data:
// { _id: 58ef3d2c526ced15688bd1ea,
// name: 'Question',
// author: 58dcdadfaea29624982e2fc6,
// __v: 0,
// options:
// [ { name: 'stack', _id: 58ef3d2c526ced15688bd1ec, votes: 5 },
// { name: 'overflow', _id: 58ef3d2c526ced15688bd1eb, votes: 3 } ],
// dateCreated: 2017-04-13T08:56:12.044Z }
});
The question is how I could receive same data + aggregated number of votes (i.e 8 in case above) after calling some method on Model level, for example:
// I want to receive:
// { _id: 58ef3d2c526ced15688bd1ea,
// name: 'Question',
// author: 58dcdadfaea29624982e2fc6,
// __v: 0,
// totalNumberOfVotes: 8,
// options:
// [ { name: 'stack', _id: 58ef3d2c526ced15688bd1ec, votes: 5 },
// { name: 'overflow', _id: 58ef3d2c526ced15688bd1eb, votes: 3 } ],
// dateCreated: 2017-04-13T08:56:12.044Z }
Or maybe I need to implement some extra method on document level i.e (data.aggregate)?
I've already reviewed:
http://mongoosejs.com/docs/api.html#model_Model.mapReduce
http://mongoosejs.com/docs/api.html#aggregate_Aggregate
https://docs.mongodb.com/manual/core/map-reduce/
https://docs.mongodb.com/manual/tutorial/map-reduce-examples/
But can't utilize it for my case :(
Any advice will be much appreciated. Thanks!
Use $reduce operator within an $addFields pipeline to create the totalNumberOfVotes field. In your aggregate pipeline, the first step is the $match which filters the document stream to allow only matching documents to pass unmodified into the next pipeline stage and uses standard MongoDB queries.
Consider running the following aggregate operation to get the desired result:
Poll.aggregate([
{ "$match": { "_id": mongoose.Types.ObjectId(req.params.id) } },
{
"$addFields": {
"totalNumberOfVotes": {
"$reduce": {
"input": "$options",
"initialValue": 0,
"in": { "$add" : ["$$value", "$$this.votes"] }
}
}
}
}
]).exec((err, data) => {
if (err) console.log(err);
console.log(data);
});
NB: The above will work for MongoDB 3.4 and greater.
For other earlier versions you would need to $unwind the options array first before grouping the denormalised documents within a $group pipeline step and aggregating with the accumulators $sum, $push and $first.
The following example shows this approach:
Poll.aggregate([
{ "$match": { "_id": mongoose.Types.ObjectId(req.params.id) } },
{ "$unwind": { "path": "$options", "preserveNullAndEmptyArrays": true } },
{
"$group": {
"_id": "$_id",
"totalNumberOfVotes": { "$sum": "$options.votes" },
"options": { "$push": "$options" },
"name": { "$first": "$name" },
"dateCreated": { "$first": "$dateCreated" },
"author": { "$first": "$author" }
}
}
]).exec((err, data) => {
if (err) console.log(err);
console.log(data);
});

Sum of subdocuments in Mongoose

Currently I have this schema.
var cartSchema = new Schema({
userPurchased: {type: Schema.Types.ObjectId, ref: 'users'},
products: [
{
product: {type: Schema.Types.ObjectId, ref: 'products'},
size: {type: String, required: true},
quantity: {type: Number, required: true},
subTotal: {type: Number, required: true}
}
],
totalPrice: {type: Number, default: 0, required: true}
});
Example of db record
{
"_id": {
"$oid": "586f4be94d3e481378469a08"
},
"userPurchased": {
"$oid": "586dca1f5f4a7810fc890f97"
},
"totalPrice": 0,
"products": [
{
"product": {
"$oid": "58466e8e734d1d2b0ceeae00"
},
"size": "m",
"quantity": 5,
"subTotal": 1995,
"_id": {
"$oid": "586f4be94d3e481378469a09"
}
},
{
"subTotal": 1197,
"quantity": 3,
"size": "m",
"product": {
"$oid": "58466e8e734d1d2b0ceeae01"
},
"_id": {
"$oid": "586f4ef64d3e481378469a0a"
}
}
],
"__v": 0
}
Is there any way to sum all the subTotal and put it in the total price field? Right now I am thinking about aggregate function but I doubt it will be the right approach in here. I guess I need an update query and sum method at the same time. Can anyone help me in here?
Using the aggregate() function, you can run the following pipeline which uses the $sum operator to get the desired results:
const results = await Cart.aggregate([
{ "$addFields": {
"totalPrice": {
"$sum": "$products.subTotal"
}
} },
]);
console.log(JSON.stringify(results, null, 4));
and the corresponding update operation follows:
db.carts.updateMany(
{ },
[
{ "$set": {
"totalPrice": {
"$sum": "$products.subTotal"
}
} },
]
)
Or if using MongoDB 3.2 and earlier versions, where $sum is available in the $group stage only, you can do
const pipeline = [
{ "$unwind": "$products" },
{
"$group": {
"_id": "$_id",
"products": { "$push": "$products" },
"userPurchased": { "$first": "$userPurchased" },
"totalPrice": { "$sum": "$products.subTotal" }
}
}
]
Cart.aggregate(pipeline)
.exec(function(err, results){
if (err) throw err;
console.log(JSON.stringify(results, null, 4));
})
In the above pipeline, the first step is the $unwind operator
{ "$unwind": "$products" }
which comes in quite handy when the data is stored as an array. When the unwind operator is applied on a list data field, it will generate a new record for each and every element of the list data field on which unwind is applied. It basically flattens the data.
This is a necessary operation for the next pipeline stage, the $group step where you group the flattened documents by the _id field, thus effectively regrouping the denormalised documents back to their original schema.
The $group pipeline operator is similar to the SQL's GROUP BY clause. In SQL, you can't use GROUP BY unless you use any of the aggregation functions. The same way, you have to use an aggregation function in MongoDB (called accumulators) as well. You can read more about the accumulators here.
In this $group operation, the logic to calculate the totalPrice and returning the original fields is through the accumulators. You get thetotalPrice by summing up each individual subTotal values per group with $sum as:
"totalPrice": { "$sum": "$products.subTotal }
The other expression
"userPurchased": { "$first": "$userPurchased" },
will return a userPurchased value from the first document for each group using $first. Thus effectively rebuilding the original document schema before the $unwind
One thing to note here is when executing a pipeline, MongoDB pipes operators into each other. "Pipe" here takes the Linux meaning: the output of an operator becomes the input of the following operator. The result of each operator is a new collection of documents. So Mongo executes the above pipeline as follows:
collection | $unwind | $group => result
As a side note, to help with understanding the pipeline or to debug it should you get unexpected results, run the aggregation with just the first pipeline operator. For example, run the aggregation in mongo shell as:
db.cart.aggregate([
{ "$unwind": "$products" }
])
Check the result to see if the products array is deconstructed properly. If that gives the expected result, add the next:
db.cart.aggregate([
{ "$unwind": "$products" },
{
"$group": {
"_id": "$_id",
"products": { "$push": "$products" },
"userPurchased": { "$first": "$userPurchased" },
"totalPrice": { "$sum": "$products.subTotal" }
}
}
])
Repeat the steps till you get to the final pipeline step.
If you want to update the field then you can add the $out pipeline stage as the last step. This will write the resulting documents of the aggregation pipeline to the same collection, thus technically updating the collection.
var pipeline = [
{ "$unwind": "$products" },
{
"$group": {
"_id": "$_id",
"products": { "$push": "$products" },
"userPurchased": { "$first": "$userPurchased" },
"totalPrice": { "$sum": "$products.subTotal" }
}
},
{ "$out": "cart" } // write the results to the same underlying mongo collection
]
UPDATE
To do both the update and query, you could then issue a find() call in the aggregate callback to get the updated json i.e.
Cart.aggregate(pipeline)
.exec(function(err, results){
if (err) throw err;
Cart.find().exec(function(err, docs){
if (err) return handleError(err);
console.log(JSON.stringify(docs, null, 4));
})
})
Using Promises, you could do this alternatively as
Cart.aggregate(pipeline).exec().then(function(res)
return Cart.find().exec();
).then(function(docs){
console.log(JSON.stringify(docs, null, 4));
});
I can't really say whether this approach is better than the aggregation, but in case you want to do it with virtuals:
cartSchema.virtual('totalPrice').get(function () {
return this.products.map(p => p.subTotal).reduce((a, b) => a + b);
});
But care:
If you use toJSON() or toObject() (or use JSON.stringify() on a mongoose document) mongoose will not include virtuals by default. Pass { virtuals: true } to either toObject() or toJSON()

Mongoose Virtuals in MongoDB Aggregate

My Mongoose Schema is as follows:
var DSchema = new mongoose.Schema({
original_y: {type: Number},,
new_y: {type: Number},,
date: {type: Date},
dummy: [dummyEmbeddedDocuments]
}, toObject: { virtuals: true }, toJSON: { virtuals: true}
});
DSchema.virtual('dateformatted').get(function () {
return moment(this.date).format('YYYY-MM-DD HH:mm:ss');
});
module.exports = mongoose.model('D', DSchema);
A document in my schema would be the following:
{
id:1,
original_y: 200,
new_y: 140,
date: 2015-05-03 00:00:00.000-18:30,
dummy: [
{id:1, storage:2, cost: 10},
{id:2, storage:0, cost: 20},
{id:3, storage:5, cost: 30},
]
}
My Query:
Item.aggregate([
{
"$match": {
"dummy.storage": {"$gt": 0}
}
},
{
"$unwind": "$dummy"
},
{
"$project": {
"original_y": 1,
"new_y": 1,
"dateformatted": 1,
"dummy.id": "$dummy.id",
"dummy.storage": "$dummy.storage",
"dummy.cost": "$dummy.cost",
"dummy.tallyAmount": {
"$divide": [
{ "$add": ["$new_y","$original_y"] },
"$dummy.cost"
]
}
}
},
{
"$group": {
"_id": "_$id",
"original_y": { "$first": "$original_y" },
"dateformatted": { "$first": "$dateformatted" },
"new_y": { "$first": "$new_y" },
"dummy": {
"$addToSet": "$dummy"
}
}
}
]).exec(callback);
This query however returns the VIRTUAL dateformatted attribute as NULL. Any thoughts as to why this is happening?
A couple notes in the docs touch on why this is so:
Arguments are not cast to the model's schema because $project operators allow redefining the "shape" of the documents at any stage
of the pipeline, which may leave documents in an incompatible format.
The documents returned are plain javascript objects, not mongoose documents (since any shape of document can be returned).
But it goes beyond this because the aggregate operation is performed server-side, where any client-side Mongoose concepts like virtuals do not exist.
The result is that you'll need to include the date field in your $project and $group stages and add your own dateformatted field to the results in code based on the date values.
This is an old question but I've come up with a useful hack to get back the virtuals and thought it might be useful for those searching for this problem.
You can easily convert the objects back to mongoose models:
documents = documents.map(d => {
return new Document(d);
});
var virtual = documents[0].virtualProperty;
the <field>: <1 or true> form is used to include an existing field which is not the case here since the dateformatted field doesn't exist and you have to create it using an expression, $dateToString can be used:
"$project": {
"original_y": 1,
"new_y": 1,
"dateformatted": { "$dateToString": { "format": "%Y-%m-%d %H:%M:%S", "date": "$date" } },
...
Another option is to use it with $addFields:
{
"$project": {
...
}
},
{
"$addFields": {
"dateformatted": { "$dateToString": {"format": "%Y-%m-%d %H:%M:%S", "date": "$date"} }
}
},
...
Here's a solution that works!
Aggregate queries return js objects which is not an instance of mongoose Document.
You may use Model.hydrate
const documents = docs.map(doc => myModel.hydrate(doc))

Mongoose error on aggregate group

i have this model:
var Chat = new Schema({
from: String,
to: String,
satopId: String,
createdAt: Date
});
var Chat = mongoose.model('Chat', Chat);
I want do a query to do a query that returns the max created at grouping by to and from field. I tried with:
Chat.aggregate([
{
$group: {
_id: '$to',
from: '$from',
createdAt: {
$max: '$createdAt'
}
}
},
{
$project: {
_id: 1,
createdAt: 1,
from: 1,
to: 1
}
}
], function(err, docs){
})
But this generates this error:
the group aggregate field 'from' must be defined as an expression
inside an object
I don't understand what does it mean. How can i solve it?
Thanks
Anything "outside" if the _id expression in a $group statement requires a "grouping operator" of some sort.
Assuming you are happy with the idea of the "sole" grouping key to be the "to" field in your documents then you probably want something like $first:
Chat.aggregate([
{ "$group": {
"_id": "$to",
"from": { "$first": "$from" },
"createdAt": { "$max": "$createdAt" }
}},
function (err, docs) {
// do something here
}
])
Otherwise if you want "distinct" values on "both" fields then they both belong as a composite value of the grouping key:
Chat.aggregate([
{ "$group": {
"_id": {
"to": "$to",
"from": "$from"
},
"createdAt": { "$max": "$createdAt" }
}},
function (err, docs) {
// do something here
}
])
That's basically how it works. Either it's part of the key or something that needs a grouping operator just like with SQL.
Also note that your $project assertion is not really required as $group already does what you are asking there.

Resources