Date operation insede and array (aggreagation) - node.js

I want to build online test application using mongoDB and nodeJS. And there is a feature which admin can view user test history (with date filter option).
How to do the query, if I want to display only user which the test results array contains date specified by admin.
The date filter will be based on day, month, year from scheduledAt.startTime, and I think I must use aggregate framework to achieve this.
Let's say I have users document like below:
{
"_id" : ObjectId("582a7b315c57b9164cac3295"),
"username" : "lalalala#gmail.com",
"displayName" : "lalala",
"testResults" : [
{
"applyAs" : [
"finance"
],
"scheduledAt" : {
"endTime" : ISODate("2016-11-15T16:00:00.000Z"),
"startTime" : ISODate("2016-11-15T01:00:00.000Z")
},
"results" : [
ObjectId("582a7b3e5c57b9164cac3299"),
ObjectId("582a7cc25c57b9164cac329d")
],
"_id" : ObjectId("582a7b3e5c57b9164cac3296")
},
{
.....
}
],
"password" : "andi",
}
testResults document:
{
"_id" : ObjectId("582a7cc25c57b9164cac329d"),
"testCategory" : "english",
"testVersion" : "EAX",
"testTakenTime" : ISODate("2016-11-15T03:10:58.623Z"),
"score" : 2,
"userAnswer" : [
{
"answer" : 1,
"problemId" : ObjectId("581ff74002bb1218f87f3fab")
},
{
"answer" : 0,
"problemId" : ObjectId("581ff78202bb1218f87f3fac")
},
{
"answer" : 0,
"problemId" : ObjectId("581ff7ca02bb1218f87f3fad")
}
],
"__v" : 0
}
What I have tried until now is like below. If I want to count total document, which part of my aggregation framework should I change. Because in query below, totalData is being summed per group not per whole returned document.
User
.aggregate([
{
$unwind: '$testResults'
},
{
$project: {
'_id': 1,
'displayName': 1,
'testResults': 1,
'dayOfTest': { $dayOfMonth: '$testResults.scheduledAt.startTime' },
'monthOfTest': { $month: '$testResults.scheduledAt.startTime' },
'yearOfTest': { $year: '$testResults.scheduledAt.startTime' }
}
},
{
$match: {
dayOfTest: date.getDate(),
monthOfTest: date.getMonth() + 1,
yearOfTest: date.getFullYear()
}
},
{
$group: {
_id: {id: '$_id', displayName: '$displayName'},
testResults: {
$push: '$testResults'
},
totalData: {
$sum: 1
}
}
},
])
.then(function(result) {
res.send(result);
})
.catch(function(err) {
console.error(err);
next(err);
});

You can try something like this. Added the project stage to keep the test results if any of result element matches on the date passed. Add this as the first step in the pipeline and you can add the grouping stage the way you want.
$map applies an equals comparison between the date passed and start date in each test result element and generates an array with true and false values. $anyElementTrue inspects this array and returns true only if there is atleast one true value in the array. Match stage to include only elements with matched value of true.
aggregate([{
"$project": {
"_id": 1,
"displayName":1,
"testResults": 1,
"matched": {
"$anyElementTrue": {
"$map": {
"input": "$testResults",
"as": "result",
"in": {
"$eq": [{ $dateToString: { format: "%Y-%m-%d", date: '$$result.scheduledAt.startTime' } }, '2016-11-15']
}
}
}
}
}
}, {
"$match": {
"matched": true
}
}])
Alternative Version:
Similar to the above version but this one combines both the project and match stage into one. The $cond with $redact accounts for match and when match is found it keeps the complete tree or else discards it.
aggregate([{
"$redact": {
"$cond": [{
"$anyElementTrue": {
"$map": {
"input": "$testResults",
"as": "result",
"in": {
"$eq": [{
$dateToString: {
format: "%Y-%m-%d",
date: '$$result.scheduledAt.startTime'
}
}, '2016-11-15']
}
}
}
},
"$$KEEP",
"$$PRUNE"
]
}
}])

Related

How to fill missing documents with values 0 in mongoDB?

I have a collection where I'm storing water dispensed for a particular day. Now for some days when the device isn't operated the data isn't stored in the database and I won't be getting the data in the collection. For example, I am querying water dispensed for the last 7 days where the device only operated for two day gives me something like this:
[{
"uID" : "12345678",
"midNightTimeStamp" : NumberInt(1645381800),
"waterDispensed" : NumberInt(53)
},
{
"uID" : "12345678",
"midNightTimeStamp" : NumberInt(1645641000),
"waterDispensed" : NumberInt(30)
}]
Converting the above two timestamps gives me data for Monday 21st February and Thursday 24th February. Now if I run the query for 21st Feb to 27th Feb something like this,
db.getCollection("analytics").find({ uID: "12345678", midNightTimeStamp: {"$in": [1645381800, 1645468200, 1645554600, 1645641000, 1645727400, 1645813800, 1645900200]}})
This returns me above two documents only, how to fill missing values for midNightTimeStamp supplied to get the document list like this which doesn't exists:
[{
"uID" : "12345678",
"midNightTimeStamp" : 1645381800,
"waterDispensed" : 53
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645468200,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645554600,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645641000,
"waterDispensed" : 30
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645727400,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645813800,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645900200,
"waterDispensed" : 0
}
Maybe something like this:
db.collection.aggregate([
{
$group: {
_id: null,
ar: {
$push: "$$ROOT"
},
mind: {
"$min": "$midNightTimeStamp"
},
maxd: {
"$max": "$midNightTimeStamp"
}
}
},
{
$project: {
ar: {
$map: {
input: {
$range: [
"$mind",
{
"$sum": [
"$maxd",
86400
]
},
86400
]
},
as: "dateInRange",
in: {
$let: {
vars: {
dateIndex: {
"$indexOfArray": [
"$ar.midNightTimeStamp",
"$$dateInRange"
]
}
},
in: {
$cond: {
if: {
$ne: [
"$$dateIndex",
-1
]
},
then: {
$arrayElemAt: [
"$ar",
"$$dateIndex"
]
},
else: {
midNightTimeStamp: "$$dateInRange",
"waterDispensed": NumberInt(0)
}
}
}
}
}
}
}
}
},
{
$unwind: "$ar"
},
{
$project: {
_id: 0,
"waterDispensed": "$ar.waterDispensed",
midNightTimeStamp: "$ar.midNightTimeStamp",
"Date": {
$toDate: {
"$multiply": [
"$ar.midNightTimeStamp",
1000
]
}
}
}
}
])
Explained:
$group the documents to find max & min for the timestamps and $push all elements in temporary array named "ar"
$project the array $mapping with a $range of generated dated between max & min with 1x day step ( 86400 ) , fill the empty elements with waterDispanced:0
$unwind the array $ar
$project only the fields we need in the final output.
playground
This is just a little different than the other answer, and it takes care to just grab the "uID" desired. Comments in the MQL explain the process.
db.collection.aggregate([
{ // The uID we want
"$match": { "uID": "12345678" }
},
{ // grab all the uID docs as "water"
// keep uID
"$group": {
"_id": null,
"water": { "$push": "$$CURRENT" },
"uID": { "$first": "$uID" }
}
},
{ // create outArray
"$set": {
"outArray": {
// by mapping time vals
"$map": {
"input": {
"$range": [ NumberInt(1645381800), NumberInt(1645900200), 86400 ]
},
"in": {
"$cond": [
{ // already have doc?
"$in": [ "$$this", "$water.midNightTimeStamp" ]
},
{ // yes! Get it!
"$arrayElemAt": [
"$water",
{ "$indexOfArray": [ "$water.midNightTimeStamp", "$$this" ] }
]
},
{ // no, create it
"uID": "$uID",
"midNightTimeStamp": "$$this",
"waterDispensed": 0
}
]
}
}
}
}
},
{ // only need outArray now
"$project": {
"_id": 0,
"outArray": 1
}
},
{ // create docs
"$unwind": "$outArray"
},
{ // hoist them
"$replaceWith": "$outArray"
},
{ // don't need _id
"$unset": "_id"
}
])
Try it on mongoplayground.net.
As of MongoDB 5.1 you can use the $densify aggregation operator to fill in missing time series data with an average or default value.
https://www.mongodb.com/docs/rapid/reference/operator/aggregation/densify/
In your case, you may need to convert your timestamp field to a date while aggregating so that you can use $densify.
You can also watch a quick explanation of $densify in this presentation from MongoDB World 2022.

Find upcoming documents based on subdocument date

Suppose I have some MongoDB Event documents, each of which has a number of sessions which take place on different dates. We might represent this as:
db.events.insert([
{
_id: '5be9860fcb16d525543cafe1',
name: 'Past',
host: '5be9860fcb16d525543daff1',
sessions: [
{ date: new Date(Date.now() - 1e8 ) },
{ date: new Date(Date.now() + 1e8 ) }
]
}, {
_id: '5be9860fcb16d525543cafe2',
name: 'Future',
host: '5be9860fcb16d525543daff2',
sessions: [
{ date: new Date(Date.now() + 2e8) },
{ date: new Date(Date.now() + 3e8) }
]
}
]);
I'd like to find all Events which have not yet had their first session. So I'd like to find 'Future' but not 'Past'.
At the moment I'm using Mongoose and Express to do:
Event.aggregate([
{ $unwind: '$sessions' }, {
$group: {
_id: '$_id',
startDate: { $min: '$sessions.date' }
}
},
{ $sort:{ startDate: 1 } }, {
$match: { startDate: { $gte: new Date() } }
}
])
.then(result => Event.find({ _id: result.map(result => result._id) }))
.then(event => Event.populate(events, 'host'))
.then(events => res.json(events))
But I feel like I'm making heavy weather of this. Two hits on the database (three if you include the populate statement) and a big, complicated aggregate statement.
Is there a simpler way to do this? Ideally one which only involves one trip to the database.
You could use $reduce to fold the array and find if any of of the elements have a past session.
To illustrate this, consider running the following aggregate pipeline:
db.events.aggregate([
{ "$match": { "sessions.date": { "$gte": new Date() } } },
{ "$addFields": {
"hasPastSession": {
"$reduce": {
"input": "$sessions.date",
"initialValue": false,
"in": {
"$or" : [
"$$value",
{ "$lt": ["$$this", new Date()] }
]
}
}
}
} },
//{ "$match": { "hasPastSession": false } }
])
Based on the above sample, this will yield the following documents with the extra field
/* 1 */
{
"_id" : "5be9860fcb16d525543cafe1",
"name" : "Past",
"host" : "5be9860fcb16d525543daff1",
"sessions" : [
{
"date" : ISODate("2019-01-03T12:04:36.174Z")
},
{
"date" : ISODate("2019-01-05T19:37:56.174Z")
}
],
"hasPastSession" : true
}
/* 2 */
{
"_id" : "5be9860fcb16d525543cafe2",
"name" : "Future",
"host" : "5be9860fcb16d525543daff2",
"sessions" : [
{
"date" : ISODate("2019-01-06T23:24:36.174Z")
},
{
"date" : ISODate("2019-01-08T03:11:16.174Z")
}
],
"hasPastSession" : false
}
Armed with this aggregate pipeline, you can then leverage $expr and use the pipeline expression as your query in the find() method (or using the aggregate operation above but with the $match pipeline step at the end enabled) as
db.events.find(
{ "$expr": {
"$eq": [
false,
{ "$reduce": {
"input": "$sessions.date",
"initialValue": false,
"in": {
"$or" : [
"$$value",
{ "$lt": ["$$this", new Date()] }
]
}
} }
]
} }
)
which returns the document
{
"_id" : "5be9860fcb16d525543cafe2",
"name" : "Future",
"host" : "5be9860fcb16d525543daff2",
"sessions" : [
{
"date" : ISODate("2019-01-06T23:24:36.174Z")
},
{
"date" : ISODate("2019-01-08T03:11:16.174Z")
}
]
}
You don't need to use $unwind and $group to find the $min date from the array. You can directly use $min to extract the min date from the session array and then use $lookup to populate the host key
db.events.aggregate([
{ "$match": { "sessions.date": { "$gte": new Date() }}},
{ "$addFields": { "startDate": { "$min": "$sessions.date" }}},
{ "$match": { "startDate": { "$gte": new Date() }}},
{ "$lookup": {
"from": "host",
"localField": "host",
"foreignField": "_id",
"as": "host"
}},
{ "$unwind": "$host" }
])
Is it possible you can just reach into the sessions of each event, and pull back each event where all session dates are only in the future? Something like this? Might need tweaking..
db.getCollection("events").aggregate(
[
{$match:{'$and':
[
{'sessions.date':{'$gt': new Date()}},
{'sessions.date':{ '$not': {'$lt': new Date()}}}
]
}}
]
);

GroupBy date + mongodb

I have to aggregate the result based on the month of the given document. Consider the following as my document:
{
"_id" : ObjectId("5b3314a12b05b1b247366f48"),
"email" : "abc#gmail.com",
"qwerty":[{
"id" : "5ba4ebbad1b5eaf038841302",
"status" : "inprogress",
"Date" : "2018-08-20"
},
{
"id" : "5ba4ebbad1b5eaf038841303",
"status" : "inprogress",
"Date" : "2018-08-20"
}]
Following is my query:
var query =[
{ $match: {"email":email} },
{$unwind: "$courses" },
{$group:{_id:{$substrCP: ["$qwerty.Date", 5, 2]},count:{$sum:1}}}
];
Its working properly. But i $substrCP: ["$qwerty.Date", 5, 2] is based on the date format is "2018-08-20", what if "20-08-2018"?? So its possible to change the above query to accomodate of nay type.
Also i tried with new Date("").getMonth() but its showing as "NaN", i get to know that its not possible to use inside group.
Please suggest your ideas.
You can utilize $month in combination with $dateFromString to get what you need:
db.collection.aggregate([
{
$match: {
"email": "abc#gmail.com"
}
},
{
$unwind: "$qwerty"
},
{
$group: {
_id: {
$month: {
$dateFromString: {
dateString: "$qwerty.Date"
}
}
},
count: {
$sum: 1
}
}
}
])
You can see it here with the two different date formats.
To group per the date you can do the same without the $month:
db.collection.aggregate([
{
$match: {
"email": "abc#gmail.com"
}
},
{
$unwind: "$qwerty"
},
{
$group: {
_id: {
$dateFromString: {
dateString: "$qwerty.Date"
}
},
count: {
$sum: 1
}
}
}
])
See this version here

How to include empty array when filtering an array

Here is my item model.
const itemSchema = new Schema({
name: String,
category: String,
occupied: [Number],
active: { type: Boolean, default: true },
});
I want to filter 'occupied' array. So I use aggregate and unwind 'occupied' field.
So I apply match query. And group by _id.
But if filtered 'occupied' array is empty, the item disappear.
Here is my code
Item.aggregate([
{ $match: {
active: true
}},
{ $unwind:
"$occupied",
},
{ $match: { $and: [
{ occupied: { $gte: 100 }},
{ occupied: { $lt: 200 }}
]}},
{ $group : {
_id: "$_id",
name: { $first: "$name"},
category: { $first: "$category"},
occupied: { $addToSet : "$occupied" }
}}
], (err, items) => {
if (err) throw err;
return res.json({ data: items });
});
Here is example data set
{
"_id" : ObjectId("59c1bced987fa30b7421a3eb"),
"name" : "printer1",
"category" : "printer",
"occupied" : [ 95, 100, 145, 200 ],
"active" : true
},
{
"_id" : ObjectId("59c2dbed992fb91b7421b1ad"),
"name" : "printer2",
"category" : "printer",
"occupied" : [ ],
"active" : true
}
The result above query
[
{
"_id" : ObjectId("59c1bced987fa30b7421a3eb"),
"name" : "printer1",
"category" : "printer",
"occupied" : [ 100, 145 ],
"active" : true
}
]
and the result I want
[
{
"_id" : ObjectId("59c1bced987fa30b7421a3eb"),
"name" : "printer1",
"category" : "printer",
"occupied" : [ 100, 145 ],
"active" : true
},
{
"_id" : ObjectId("59c2dbed992fb91b7421b1ad"),
"name" : "printer2",
"category" : "printer",
"occupied" : [ ],
"active" : true
}
]
how could I do this??
Thanks in advance.
In the simplest form, you keep it simply by not using $unwind in the first place. Your conditions applied imply that you are looking for the "unique set" of matches to specific values.
For this you instead use $filter, and a "set operator" like $setUnion to reduce the input values to a "set" in the first place:
Item.aggregate([
{ "$match": { "active": true } },
{ "$project": {
"name": 1,
"category": 1,
"occupied": {
"$filter": {
"input": { "$setUnion": [ "$occupied", []] },
"as": "o",
"cond": {
"$and": [
{ "$gte": ["$$o", 100 ] },
{ "$lt": ["$$o", 200] }
]
}
}
}
}}
], (err, items) => {
if (err) throw err;
return res.json({ data: items });
});
Both have been around since MongoDB v3, so it's pretty common practice to do things this way.
If for some reason you were still using MongoDB 2.6, then you could apply $map and $setDifference instead:
Item.aggregate([
{ "$match": { "active": true } },
{ "$project": {
"name": 1,
"category": 1,
"occupied": {
"$setDifference": [
{ "$map": {
"input": "$occupied",
"as": "o",
"in": {
"$cond": {
"if": {
"$and": [
{ "$gte": ["$$o", 100 ] },
{ "$lt": ["$$o", 200] }
]
},
"then": "$$o",
"else": false
}
}
}},
[false]
]
}
}}
], (err, items) => {
if (err) throw err;
return res.json({ data: items });
});
It's the same "unique set" result as pulling the array apart, filtering the items and putting it back together with $addToSet. The difference being that its far more efficient, and retains ( or produces ) an empty array without any issues.

Create Price Range in mongo with aggregation pipeline with Nodejs

Wan't create Price range Using mongodb aggregation pipeline..
while using elastic search or solr we can directly get price filter range value... How can i create price range according to my products price, if there is no product in that range then don't create that range...
{
"_id" : ObjectId("5657412ddb70397479575d1d"),"price" : 1200
},
{
"_id" : ObjectId("5657412ddb70397479575d1d"),"price" : 200
},
{
"_id" : ObjectId("5657412ddb70397479575d1d"),"price" : 2000
},
{
"_id" : ObjectId("5657412ddb70397479575d1d"),"price" : 2020
},
{
"_id" : ObjectId("5657412ddb70397479575d1d"),"price" : 100
},
{
"_id" : ObjectId("5657412ddb70397479575d1d"),"price" : 3500
},
{
"_id" : ObjectId("5657412ddb70397479575d1d"),"price" : 3900
}
From above i have to create price range as par product price like filter available in flipkart OR myntra using Mongo aggregation...
[
{
range : '100-200',
count : 2
},
{
range : '1200-2020',
count : 3
},
{
range : '3500-3900',
count : 2
}
]
Within the aggregation framework pipeline, you can take advantage of the $cond operator in the $project stage to create an extra field that denotes the range the price falls in, and then use the $group step to get the counts:
var pipeline = [
{
"$project": {
"price": 1,
"range": {
"$cond": [
{
"$and": [
{ "$gte": ["$price", 100] },
{ "$lte": ["$price", 200] }
]
},
"100-200",
{
"$cond": [
{
"$and": [
{ "$gte": ["$price", 1200] },
{ "$lte": ["$price", 2020] }
]
},
"1200-2020", "2021-above"
]
}
]
}
}
},
{
"$group": {
"_id": "$range",
"count": { "$sum": 1 }
}
},
{
"$project": {
"_id": 0,
"range": "$_id",
"count": 1
}
}
];
collection.aggregate(pipeline, function (err, result){
if (err) {/* Handle err */};
console.log(JSON.stringify(result, null, 4));
});

Resources