MongoDB aggregation group average rating by last 7 days with empty values - node.js

I'm attempting to query a collection and retrieve an average value for the each of the last 7 days excluding the current day. On some or all of the days there may not be an average.
Here's what I have so far:
var dateTill = moment({hour:0,minute:0}).subtract(1, 'days')._d
var dateSevenDaysAgo = moment({hour:0,minute:0}).subtract(7, 'days')._d;
Rating.aggregate([
{
$match:{
userTo:facebookId,
timestamp:{$gt:dateSevenDaysAgo,$lt:dateTill}
}
},
{
$group:{
_id:{day:{'$dayOfMonth':'$timestamp'}},
average:{$avg:'$rating'}
}
},
{
$sort:{
'_id.day':1
}
}
]
This gives me
[ { _id: { day: 20 }, average: 1 },
{ _id: { day: 22 }, average: 3 },
{ _id: { day: 24 }, average: 5 } ]
What I'm trying to get is something like:
[1,,3,,5,,]
Which represents the last 7 days of averages in order and has an empty element where there is no average for that day.
I could try and make a function that detects where the gaps are but this won't work when the averages are spread across two different months. e.g (July 28,29,30,31,Aug 1,2] - the days in august will be sorted to the front of the array I want.
Is there an easier way to do this?
Thanks!

People ask about "empty results" quite often, and the thinking usually comes from how they would have approached the problem with a SQL query.
But whilst it is "possible" to throw a set of "empty results" for items that do not contain a grouping key, it is a difficult process and much like the SQL approach people use, it's just throwing those values within the statement artificially and it really isn't a very performance driven alternative. Think "join" with a manufactured set of keys. Not efficient.
The smarter approach is to have those results ready in the client API directly, without sending to the server. Then the aggregation output can be "merged" with those results to create a complete set.
However you want to store the set to merge with is up to you, it just requires a basic "hash table" and lookups. But here is an example using nedb, which allows you to maintain the MongoDB set of thinking for query and updates:
var async = require('async'),
mongoose = require('mongoose'),
DataStore = require('nedb'),
Schema = mongoose.Schema,
db = new DataStore();
mongoose.connect('mongodb://localhost/test');
var Test = mongoose.model(
'Test',
new Schema({},{ strict: false }),
"testdata"
);
var testdata = [
{ "createDate": new Date("2015-07-20"), "value": 2 },
{ "createDate": new Date("2015-07-20"), "value": 4 },
{ "createDate": new Date("2015-07-22"), "value": 4 },
{ "createDate": new Date("2015-07-22"), "value": 6 },
{ "createDate": new Date("2015-07-24"), "value": 6 },
{ "createDate": new Date("2015-07-24"), "value": 8 }
];
var startDate = new Date("2015-07-20"),
endDate = new Date("2015-07-27"),
oneDay = 1000 * 60 * 60 * 24;
async.series(
[
function(callback) {
Test.remove({},callback);
},
function(callback) {
async.each(testdata,function(data,callback) {
Test.create(data,callback);
},callback);
},
function(callback) {
async.parallel(
[
function(callback) {
var tempDate = new Date( startDate.valueOf() );
async.whilst(
function() {
return tempDate.valueOf() <= endDate.valueOf();
},
function(callback) {
var day = tempDate.getUTCDate();
db.update(
{ "day": day },
{ "$inc": { "average": 0 } },
{ "upsert": true },
function(err) {
tempDate = new Date(
tempDate.valueOf() + oneDay
);
callback(err);
}
);
},
callback
);
},
function(callback) {
Test.aggregate(
[
{ "$match": {
"createDate": {
"$gte": startDate,
"$lt": new Date( endDate.valueOf() + oneDay )
}
}},
{ "$group": {
"_id": { "$dayOfMonth": "$createDate" },
"average": { "$avg": "$value" }
}}
],
function(err,results) {
if (err) callback(err);
async.each(results,function(result,callback) {
db.update(
{ "day": result._id },
{ "$inc": { "average": result.average } },
{ "upsert": true },
callback
)
},callback);
}
);
}
],
callback
);
}
],
function(err) {
if (err) throw err;
db.find({},{ "_id": 0 }).sort({ "day": 1 }).exec(function(err,result) {
console.log(result);
mongoose.disconnect();
});
}
);
Which gives this output:
[ { day: 20, average: 3 },
{ day: 21, average: 0 },
{ day: 22, average: 5 },
{ day: 23, average: 0 },
{ day: 24, average: 7 },
{ day: 25, average: 0 },
{ day: 26, average: 0 },
{ day: 27, average: 0 } ]
In short, a "datastore" is created with nedb, which basically acts the same as any MongoDB collection ( with stripped down functionality ). You then insert your range of "keys" expected and default values for any of the results.
Then running your aggregation statement, which is only going to return the keys that exist in the queried collection, you simply "update" the created datastore at the same key with the aggregated values.
To make that a bit more efficient, I am running both the empty result "creation" and the "aggregation" operations in parallel, utilizing "upsert" functionallity and the $inc operator for the values. These will not conflict, and that means the creation can happen at the same time as the aggregation is running, so no delays.
This is very simple to integrate into your API, so you can have all the keys you want, including those with no data for aggregation in the collection for output.
The same approach adapts well to using another actual collection on your MongoDB server for very large result sets. But if they are very large, then you should be pre-aggregating results anyway, and just using standard queries to sample.

Related

How can I implement Auto calculated field in mongodb?

I have a collection of workorders where it have time_started and time_completed values. I want to have auto-calculated field called duration that automatically calculated time_completed - time_started. What is the best way?
Essentially what I want is, when App post requests with a completed time, my duration is auto calculated.
Example data
router.post('/completed', function (req, res) {
const time_completed = req.body.time_completed
const workorder_id = req.body.workorder_id
db.collection(workorder).updateOne(
{ _id: ObjectId(workorder_id) },
{
$set: {
time_completed: time_completed,
}
},
function (err, result) {
if (err) throw err
res.send('Updated')
}
)
});
Query
pipeline update requires >= MongoDB 4.2
add the time_completed
add the duration also
*replace the 6 with the javascript variable that holds the time_completed Date
*duration will be in milliseconds
Test code here
db.collection.update(
{"_id": 1},
[
{
"$set": {
"time_completed": 6,
"duration": {
"$subtract": [
6,
"$time_started"
]
}
}
}
])
Edit
You have strings on your database, i thought it was dates, best thing to do is to convert all those string-dates to Date with $dataFromString like the code bellow, and use the first query.
To get the string if you needed from Date you can do $stringFromDate when you need it.
Query
same like above but it converts string dates to Date to do the substraction (keeps the dates in strings inside the database)
Test code here
db.collection.update({
"_id": 1
},
[
{
"$set": {
"time_completed": "2021-11-21T00:00:00.000Z",
"duration": {
"$subtract": [
ISODate("2021-11-21T00:00:00Z"),
{
"$dateFromString": {
"dateString": "$time_started"
}
}
]
}
}
}
])

How to sum all amount paid by users

I am finding it defficult to add up all amount paid by customers that ordered items
Order Schema
const orderschema = new Mongoose.Schema({
created: { type: Date, default: Date.now() },
amount: { type: Number, default: 0 }
User: [{ type: Mongoose.Schema.ObjectId, ref: 'Users'}]
...
})
Route
Get('/total-amount', total-amount)
Controller
Exports.total-amount = () => {
Order.find()...
}
I don't know what to add here to get the total amount made by all customers.
Using NodeJS and MongoDB.
Thank you for you help
You can use $sum in an aggregation stage like this:
First $group all (without _id is to group all values)
Then create field total which is the sum of al amount.
And an optional stage, $project to output only total field.
db.order.aggregate({
"$group": {
"_id": null,
"total": {
"$sum": "$amount"
}
}
},
{
"$project": {
"_id": 0
}
})
Example here
To add into a controller using nodeJS and Mongoose you can use something like this piece of code:
Exports.total - amount = (req, res) => {
Order.aggregate({
"$group": {
"_id": null,
"total": {
"$sum": "$amount"
}
}
}, {
"$project": {
"_id": 0
}
}).then(response => {
res.status(200).send(response)
}).catch(e => res.status(400).send())
}
Note hoy the operation is done using your mongoose model (in this case Order). You are calling aggregate method in the same way you call find method for example: Instead of doing
yourModel.find()
Is
yourModel.aggregate()
And the response will be:
[
{
"total": 6
}
]
So even you can update your controller to add a if/else block like this:
if(response[0].total)
res.status(200).send(response[0].total)
else
res.status(404).send()

How can i select a particuar field in my MongoDB array and sum it up

I have a MongoDB schema that I have some time a single array and sometimes I have more than 20 arrays values in it, each array has a field value which I want to sum together and insert the sum into another field in my MongoDB.
Here is what am trying to say, here is my schema, How can i add the value of weight together for every package array inserted to the schema and let it be my total weight schema
{
"status": "In warehouse",
"paymentStatus": "incomplete",
"_id": "5d8b56476a3e4ae2d01f2953",
"unitNo": "0002",
"warehouseNo": "0001",
"trackingNo": "FPZ848505936",
"packages": [
{
"date": "2019-09-26T06:30:39.561Z",
"_id": "5d8c5b0f756838f78d5205d7",
"category": "chil",
"quantity": "177",
"description": "a valueablegoods",
"width": 15,
"itemweight": 123,
"length": 17,
"height": 21,
"dimension": 31.25903614457831,
"weight": 32.25903614457831
},
{
"date": "2019-09-26T06:30:39.561Z",
"_id": "5d8c5b0f756838f78d5202dd,
"category": "chil",
"quantity": "177",
"description": "a valueablegoods",
"width": 15,
"itemweight": 123,
"length": 17,
"height": 21,
"dimension": 35.25903614457831,
"weight": 30
},
{
"date": "2019-09-26T06:30:39.561Z",
"_id": "5d8c5b0f756838f78d51aeq",
"category": "chil",
"quantity": "177",
"description": "a valueablegoods",
"width": 15,
"itemweight": 123,
"length": 17,
"height": 21,
"dimension": 32.25903614457831,
"weight": 44
}
],
"totalWeigth": "This should add all weight value in my packages array together and if it is only 1 it should brings only the one"
"date": "2019-09-25T11:57:59.359Z",
"__v": 0
}
This is the api route that add the packages to the package array field and i want the totalWeight to be update anytime new packge is add or updated
// #route POST api/admins/addshipment/:unitNo
// #desc Add shipment for each customer
// #access Private
router.post(
'/addshipment/:unitNo',
passport.authenticate('jwt', { session: false }),
(req, res) => {
Shipments.findOne({unitNo: req.params.unitNo}, {paymentStatus: "incomplete"})
.then(shipments => {
if(shipments === null || shipments.paymentStatus === "complete"){
const errwarehouse = "This user doesn't have an existing warehouse";
return res.status(404).json(errwarehouse);
}else{
if (shipments.paymentStatus === "incomplete") {
function getPrice(){
if (initial > dimension){
return initial
}else if(initial === dimension) {
return initial
}else{
return dimension
}
}
const newPackages = {
category: req.body.category,
quantity: req.body.quantity,
description: req.body.description,
width: req.body.width,
itemweight: req.body.itemweight,
length: req.body.length,
height: req.body.height,
dimension,
weight: getPrice(),
};
Shipments.findOneAndUpdate({unitNo: req.params.unitNo ,paymentStatus: "incomplete"},
{"$push": {"packages": newPackages}}, {totalWeight: {"$sum" : {"packages.weight"}}}) //Here is were i add the package to the package array and here is where i tried sumup packages.weight for every time i add new package
.then(shipments=> res.json(shipments))
}
}
});
});
Thank you
var users =db.users.aggregate([
{$unwind:"$packages"},
{
$group:
{
_id: "$_id",
totalWeigth: { $sum: "$packages.weight" }
}
}
]).toArray()
users.forEach((ele)=>{
db.users.update({_id:ele._id},{$set:{totalWeigth:ele.totalWeigth}})
})
If you actually have MongoDB 4.2 or greater then you can use the new aggregation syntax available for updates. This essentially means adding one of the valid aggregation pipeline statements of either $addFields, $set ( alias to $addFields to make "updates" easier to read ), $projector $replaceRoot, and then actual aggregation operators in order to do the manipulation. In this case $sum:
let writeResult = await db.collection("collection").updateMany(
{},
[{ "$set": { "totalWeight": { "$sum": "$packages.weight" } } }]
);
That adds a new fields of totalWeight to every document based on the values present in the whole array of each document.
The main benefit here is that this is a single request to the server which actually performs ALL updating on the server and requires no information from the collection to be sent back to the client for iteration.
If you have an earlier version ( I suggest you don't use anything earlier than 3.4, but even 3.2 would do here ), then you could use bulkWrite() in a loop:
async function updateCollection() {
let cursor = db.collection("collection").aggregate([
{ "$project": {
"totalWeight": { "$sum": "$packages.weight" }
}}
]);
let batch = [];
while ( await cursor.hasNext() ) {
let { _id, totalWeight } = await cursor.next();
batch.push({
"updateOne": {
"filter": { _id },
"update": { "$set": { totalWeight } }
}
});
if ( batch.length > 1000 ) {
await db.collection("collection").bulkWrite(batch);
batch = [];
})
}
if ( batch.length != 0 ) {
await db.collection("collection").bulkWrite(batch);
batch = [];
}
}
And that would do the same thing, but of course actually requires some interaction back and forth with the server in both reading and writing the result back. Though using bulkWrite() you are only sending back writes in batches rather than per document of the collection.
If you have an even older MongoDB, then Update MongoDB field using value of another field has some references to the same techniques in loop iteration that may also apply. But I really do recommend that you should not have any older MongoDB version than those mentioned in the answer here.
N.B You probably would want to add some try/catch handlers in such update code as well in case of errors. Or on the other hand, such one off operations are probably better executed in something like the MongoDB shell.
Maintenance
The better solution overall however is to actually keep the total up to date on every addition to the array. As an example, this is basically what you would want when using $push for a new array element and $inc to add to the existing total:
let package = {
"date": "2019-09-26T06:30:39.561Z",
"_id": "5d8c5b0f756838f78d5205d7",
"category": "chil",
"quantity": "177",
"description": "a valueablegoods",
"width": 15,
"itemweight": 123,
"length": 17,
"height": 21,
"dimension": 31.25903614457831,
"weight": 32.25903614457831
};
let writeResult = await db.collection('collection').udpdateOne(
{ "_id": myIdValue },
{
"$push": { "packages": package },
"$inc": { "totalWeight": package.weight
}
);
In that way you are actually making sure the total is adjusted with every change you make and therefore it does not need constant reprocessing of another statement in order to keep that total in the document. Similar concepts apply for other types of updates other than adding a new item to an array.
i rewrote the code so that i can use for each of the package weight a sum should be done on them and here is the code
// #route POST api/admins/addshipment/:unitNo
// #desc Add shipment for each customer
// #access Private
router.post(
'/addshipment/:unitNo',
passport.authenticate('jwt', { session: false }),
(req, res) => {
Shipments.findOne({unitNo: req.params.unitNo}, {paymentStatus: "incomplete"})
.then(shipments => {
const newPackages = {
category: req.body.category,
quantity: req.body.quantity,
description: req.body.description,
width: req.body.width,
itemweight: req.body.itemweight,
length: req.body.length,
height: req.body.height,
dimension,
weight: getPrice(),
};
let total = 0;
Shipments.findOne({unitNo: req.params.unitNo ,paymentStatus: "incomplete"})
.then(shipments=> {
shipments.packages.push(newPackages)
shipments.packages.forEach(i=>{ total += i.weight})
shipments.totalWeight = total
shipments.save()
res.json(shipments)
})
}
}
});
});

How to make a query using Mongoose that gets N results, but combines any documents it finds that meet certain criteria?

I have a Comments collection in Mongoose, and a query that returns the most recent five (an arbitrary number) Comments.
Every Comment is associated with another document. What I would like to do is make a query that returns the most recent 5 comments, with comments associated with the same other document combined.
So instead of a list like this:
results = [
{ _id: 123, associated: 12 },
{ _id: 122, associated: 8 },
{ _id: 121, associated: 12 },
{ _id: 120, associated: 12 },
{ _id: 119, associated: 17 }
]
I'd like to return a list like this:
results = [
{ _id: 124, associated: 3 },
{ _id: 125, associated: 19 },
[
{ _id: 123, associated: 12 },
{ _id: 121, associated: 12 },
{ _id: 120, associated: 12 },
],
{ _id: 122, associated: 8 },
{ _id: 119, associated: 17 }
]
Please don't worry too much about the data format: it's just a sketch to try to show the sort of thing I want. I want a result set of a specified size, but with some results grouped according to some criterion.
Obviously one way to do this would be to just make the query, crawl and modify the results, then recursively make the query again until the result set is as long as desired. That way seems awkward. Is there a better way to go about this? I'm having trouble phrasing it in a Google search in a way that gets me anywhere near anyone who might have insight.
Here's an aggregation pipeline query that will do what you are asking for:
db.comments.aggregate([
{ $group: { _id: "$associated", maxID: { $max: "$_id"}, cohorts: { $push: "$$ROOT"}}},
{ $sort: { "maxID": -1 } },
{ $limit: 5 }
])
Lacking any other fields from the sample data to sort by, I used $_id.
If you'd like results that are a little closer in structure to the sample result set you provided you could add a $project to the end:
db.comments.aggregate([
{ $group: { _id: "$associated", maxID: { $max: "$_id"}, cohorts: { $push: "$$ROOT"}}},
{ $sort: { "maxID": -1 } },
{ $limit: 5 },
{ $project: { _id: 0, cohorts: 1 }}
])
That will print only the result set. Note that even comments that do not share an association object will be in an array. It will be an array of 1 length.
If you are concerned about limiting the results in the grouping as Neil Lunn is suggesting, perhaps a $match in the beginning is a smart idea.
db.comments.aggregate([
{ $match: { createDate: { $gte: new Date(new Date() - 5 * 60000) } } },
{ $group: { _id: "$associated", maxID: { $max: "$_id"}, cohorts: { $push: "$$ROOT"}}},
{ $sort: { "maxID": -1 } },
{ $limit: 5 },
{ $project: { _id: 0, cohorts: 1 }}
])
That will only include comments made in the last 5 minutes assuming you have a createDate type field. If you do, you might also consider using that as the field to sort by instead of "_id". If you do not have a createDate type field, I'm not sure how best to limit the comments that are grouped as I do not know of a "current _id" in the way that there is a "current time".
I honestly think you are asking a lot here and cannot really see the utility myself, but I'm always happy to have that explained to me if there is something useful I have missed.
Bottom line is you want comments from the last five distinct users by date, and then some sort of grouping of additional comments by those users. The last part is where I see difficulty in rules no matter how you want to attack this, but I'll try to keep this to the most brief form.
No way this happens in a single query of any sort. But there are things that can be done to make it an efficient server response:
var DataStore = require('nedb'),
store = new DataStore();
async.waterfall(
function(callback) {
Comment.aggregate(
[
{ "$match": { "postId": thisPostId } },
{ "$sort": { "associated": 1, "createdDate": -1 } },
{ "$group": {
"_id": "$associated",
"date": { "$first": "$createdDate" }
}},
{ "$sort": { "date": -1 } },
{ "$limit": 5 }
],
callback);
},
function(docs,callback) {
async.each(docs,function(doc,callback) {
Comment.aggregate(
[
{ "$match": { "postId": thisPostId, "associated": doc._id } },
{ "$sort": { "createdDate": -1 } },
{ "$limit": 5 },
{ "$group": {
"_id": "$associated",
"docs": {
"$push": {
"_id": "$_id", "createdDate": "$createdDate"
}
},
"firstDate": { "$first": "$createdDate" }
}}
],
function(err,results) {
if (err) callback(err);
async.each(results,function(result,callback) {
store.insert( result, function(err, result) {
callback(err);
});
},function(err) {
callback(err);
});
}
);
},
callback);
},
function(err) {
if (err) throw err;
store.find({}).sort({ "firstDate": - 1 }).exec(function(err,docs) {
if (err) throw err;
console.log( JSON.stringify( docs, undefined, 4 ) );
});
}
);
Now I stuck more document properties in both the document and the array, but the simplified form based on your sample would then come out like this:
results = [
{ "_id": 3, "docs": [124] },
{ "_id": 19, "docs": [125] },
{ "_id": 12, "docs": [123,121,120] },
{ "_id": 8, "docs": [122] },
{ "_id": 17, "docs": [119] }
]
So the essential idea is to first find your distinct "users" who where the last to comment by basically chopping off the last 5. Without filtering some kind of range here that would go over the entire collection to get those results, so it would be best to restrict this in some way, as in the last hour or last few hours or something sensible as required. Just add those conditions to the $match along with the current post that is associated with the comments.
Once you have those 5, then you want to get any possible "grouped" details for multiple comments by those users. Again, some sort of limit is generally advised for a timeframe, but as a general case this is just looking for the most recent comments by the user on the current post and restricting that to 5.
The execution here is done in parallel, which will use more resources but is fairly effective considering there are only 5 queries to run anyway. In contrast to your example output, the array here is inside the document result, and it contains the original document id values for each comment for reference. Any other content related to the document would be pushed into the array as well as required (ie The content of the comment).
The other little trick here is using nedb as a means for storing the output of each query in an "in memory" collection. This need only really be a standard hash data structure, but nedb gives you a way of doing that while maintaining the MongoDB statement form that you may be used to.
Once all results are obtained you just return them as your output, and sorted as shown to retain the order of who commented last. The actual comments are grouped in the array for each item and you can traverse this to output how you like.
Bottom line here is that you are asking for a compounded version of the "top N results problem", which is something often asked of MongoDB. I've written about ways to tackle this before to show how it's possible in a single aggregation pipeline stage, but it really is not practical for anything more than a relatively small result set.
If you really want to join in the insanity, then you can look at Mongodb aggregation $group, restrict length of array for one of the more detailed examples. But for my money, I would run on parallel queries any day. Node.js has the right sort of environment to support them, so you would be crazy to do it otherwise.

MongoDB greaterThan many values

I have an API built with Node + Mongoose, and now I want to filter the data using some fields. Everything works fine, but how can I get the data like: 1 < age < 3 AND 8 < age < 11
I am trying this:
query.where('age').gte(1).lte(3);
query.where('age').gte(8).lte(11);
but this code only gets me 8 < age < 11
Thanks!
You mean $or, since an $and condition cannot possibly overlap where you condition would be true:
{
"$or": [
{ "age": { "$gte": 1, "$lte": 3 } },
{ "age": { "$gte": 8, "$lte": 11 } }
]
}
Also you are using JavaScript which has a nice free flowing object notation, so the helper methods are really overkill here. Use the standard query operator syntax instead.
Just a demo to show different forms of representing the same query:
var mongoose = require('mongoose'),
Schema = mongoose.Schema;
var testSchema = new Schema({
"age": Number
});
var Test = mongoose.model( 'Test', testSchema, 'testcol' );
var query = Test.where({
"$or": [
{ "age": { "$gte": 1, "$lte": 3 } },
{ "age": { "$gte": 8, "$lte": 11 } }
]
});
console.log( JSON.stringify( query._conditions, undefined, 2 ) );
var query2 = Test.where().or([
{ "age": { "$gte": 1, "$lte": 3 } },
{ "age": { "$gte": 8, "$lte": 11 } }
]);
console.log( JSON.stringify( query2._conditions, undefined, 2 ) );
var query3 = Test.where().or(
[
Test.where("age").gte(1).lte(3)._conditions,
Test.where("age").gte(8).lte(11)._conditions
]
);
console.log( JSON.stringify( query3._conditions, undefined, 2 ) );
Which should also demonstrate that the "helpers" are not really adding much value to how the query is basically formed.

Resources