Mongodb : Get Documents between specified time irrespective of dates - node.js

Suppose the sample documents in Mongodb database are as follows:
{ "date" : ISODate("2015-11-09T05:58:19.474Z") }
{ "date" : ISODate("2014-10-25T07:30:00.241Z") }
{ "date" : ISODate("2015-11-30T15:24:00.251Z") }
{ "date" : ISODate("2012-01-10T18:36:00.101Z") }
Expected:
{ "date" : ISODate("2015-11-09T05:58:19.474Z") }
{ "date" : ISODate("2014-10-25T07:30:00.241Z") }
Iam interested in finding the documents whose time in "date" field is between 04:15 and 07:40 irrespective of day,month and year. Indirectly query has to match any "YYYY-MM-DDT" in date field.
My approach would be, query all the documents within presumed duration of dates from node and then for each document that matched the query, compare the "date" field of document with "yyyy-MM-DDT"+"required_time" ("YYYY-MM-DD is copied from each document's "date field" to compare by converting into moment() and get month,date and year") using moment.js module.
Is there any way to query to directly get the same results?
Note: I am using nodejs to connect to mongodb
This question is based on Mongodb : Query based on time in ISODate format.

The basic case here is to use math to work out the minutes. Noting you already have an answer on the basic principles of matching, but just seem to have trouble working out the "minutes" part:
db.collection.aggregate([
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gte": [
{ "$add": [
{ "$hour": "$date" },
{ "$divide": [{ "$minute": "$date" }, 60] }
]},
4 + (15/60)
]},
{ "$lte": [
{ "$add": [
{ "$hour": "$date" },
{ "$divide": [{ "$minute": "$date" }, 60] }
]},
7 + (40/60)
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
])
So when you $add the divisor of $minute to the $hour then you get a representation that is stil between 0-24 but with a fractional component.
Or just use date math directly, and clean up duplication with $let:
db.collection.aggregate([
{ "$redact": {
"$cond": {
"if": {
"$let": {
"vars": {
"hours": {
"$divide": [
{ "$mod": [
{ "$subtract": [ "$date", new Date(0) ] },
1000 * 60 * 60 * 24
]},
1000 * 60 * 60
]
}
},
"in": {
"$and": [
{ "$gte": [ "$$hours", 4 + (15/60) ] },
{ "$lte": [ "$$hours", 7 + (40/60) ] }
]
}
}
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
])
Either way, it's just about working out the partial component and adding that into the value for selection.
If you really want to query this on a regular basis, I would strongly suggest you set a separate document property to just contain the "time" value and query directly on that instead of calculating on each pass.

Related

How to fill missing documents with values 0 in mongoDB?

I have a collection where I'm storing water dispensed for a particular day. Now for some days when the device isn't operated the data isn't stored in the database and I won't be getting the data in the collection. For example, I am querying water dispensed for the last 7 days where the device only operated for two day gives me something like this:
[{
"uID" : "12345678",
"midNightTimeStamp" : NumberInt(1645381800),
"waterDispensed" : NumberInt(53)
},
{
"uID" : "12345678",
"midNightTimeStamp" : NumberInt(1645641000),
"waterDispensed" : NumberInt(30)
}]
Converting the above two timestamps gives me data for Monday 21st February and Thursday 24th February. Now if I run the query for 21st Feb to 27th Feb something like this,
db.getCollection("analytics").find({ uID: "12345678", midNightTimeStamp: {"$in": [1645381800, 1645468200, 1645554600, 1645641000, 1645727400, 1645813800, 1645900200]}})
This returns me above two documents only, how to fill missing values for midNightTimeStamp supplied to get the document list like this which doesn't exists:
[{
"uID" : "12345678",
"midNightTimeStamp" : 1645381800,
"waterDispensed" : 53
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645468200,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645554600,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645641000,
"waterDispensed" : 30
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645727400,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645813800,
"waterDispensed" : 0
},
{
"uID" : "12345678",
"midNightTimeStamp" : 1645900200,
"waterDispensed" : 0
}
Maybe something like this:
db.collection.aggregate([
{
$group: {
_id: null,
ar: {
$push: "$$ROOT"
},
mind: {
"$min": "$midNightTimeStamp"
},
maxd: {
"$max": "$midNightTimeStamp"
}
}
},
{
$project: {
ar: {
$map: {
input: {
$range: [
"$mind",
{
"$sum": [
"$maxd",
86400
]
},
86400
]
},
as: "dateInRange",
in: {
$let: {
vars: {
dateIndex: {
"$indexOfArray": [
"$ar.midNightTimeStamp",
"$$dateInRange"
]
}
},
in: {
$cond: {
if: {
$ne: [
"$$dateIndex",
-1
]
},
then: {
$arrayElemAt: [
"$ar",
"$$dateIndex"
]
},
else: {
midNightTimeStamp: "$$dateInRange",
"waterDispensed": NumberInt(0)
}
}
}
}
}
}
}
}
},
{
$unwind: "$ar"
},
{
$project: {
_id: 0,
"waterDispensed": "$ar.waterDispensed",
midNightTimeStamp: "$ar.midNightTimeStamp",
"Date": {
$toDate: {
"$multiply": [
"$ar.midNightTimeStamp",
1000
]
}
}
}
}
])
Explained:
$group the documents to find max & min for the timestamps and $push all elements in temporary array named "ar"
$project the array $mapping with a $range of generated dated between max & min with 1x day step ( 86400 ) , fill the empty elements with waterDispanced:0
$unwind the array $ar
$project only the fields we need in the final output.
playground
This is just a little different than the other answer, and it takes care to just grab the "uID" desired. Comments in the MQL explain the process.
db.collection.aggregate([
{ // The uID we want
"$match": { "uID": "12345678" }
},
{ // grab all the uID docs as "water"
// keep uID
"$group": {
"_id": null,
"water": { "$push": "$$CURRENT" },
"uID": { "$first": "$uID" }
}
},
{ // create outArray
"$set": {
"outArray": {
// by mapping time vals
"$map": {
"input": {
"$range": [ NumberInt(1645381800), NumberInt(1645900200), 86400 ]
},
"in": {
"$cond": [
{ // already have doc?
"$in": [ "$$this", "$water.midNightTimeStamp" ]
},
{ // yes! Get it!
"$arrayElemAt": [
"$water",
{ "$indexOfArray": [ "$water.midNightTimeStamp", "$$this" ] }
]
},
{ // no, create it
"uID": "$uID",
"midNightTimeStamp": "$$this",
"waterDispensed": 0
}
]
}
}
}
}
},
{ // only need outArray now
"$project": {
"_id": 0,
"outArray": 1
}
},
{ // create docs
"$unwind": "$outArray"
},
{ // hoist them
"$replaceWith": "$outArray"
},
{ // don't need _id
"$unset": "_id"
}
])
Try it on mongoplayground.net.
As of MongoDB 5.1 you can use the $densify aggregation operator to fill in missing time series data with an average or default value.
https://www.mongodb.com/docs/rapid/reference/operator/aggregation/densify/
In your case, you may need to convert your timestamp field to a date while aggregating so that you can use $densify.
You can also watch a quick explanation of $densify in this presentation from MongoDB World 2022.

Mongo DB How to find common in two arrays and sort in descending order in a single query

Here is my data saves in database
0 _id:5e4d18bd10e5482eb623c6e4
name:'John singh',
cars_owned:[
{car_id:'1'},
{car_id:'5'},
{car_id:'7'},
{car_id:'8'}
],
1 _id:5e4d18bd10e5482eb6g57f5rt
name:'Josh kumar',
cars_owned:[
{car_id:'7'},
{car_id:'9'},
{car_id:'1'},
{car_id:'3'}
],
2 _id:5e4d18bd10e5482eb6r67222
name:'Jesse nigam',
cars_owned:[
{car_id:'6'},
{car_id:'7'},
{car_id:'9'},
{car_id:'3'}
],
3 _id:5e4d18bd10e5482eb6467ii46
name:'Jordan khan',
cars_owned:[
{car_id:'3'},
{car_id:'1'},
{car_id:'4'},
{car_id:'5'}
]
Now I want to search a user with its starting name that is 'J' and also cars_owned by me the input will be
'J',cars_owned['3','7','9','12','10']
and the output will be
1 _id:5e4d18bd10e5482eb6g57f5rt
name:'Josh kumar',
cars_owned:[
{car_id:'7'},
{car_id:'9'},
{car_id:'1'},
{car_id:'3'}
],
2 _id:5e4d18bd10e5482eb6r67222
name:'Jesse nigam',
cars_owned:[
{car_id:'6'},
{car_id:'7'},
{car_id:'9'},
{car_id:'3'}
],
0 _id:5e4d18bd10e5482eb623c6e4
name:'John singh',
cars_owned:[
{car_id:'1'},
{car_id:'5'},
{car_id:'7'},
{car_id:'8'}
],
3 _id:5e4d18bd10e5482eb6467ii46
name:'Jordan khan',
cars_owned:[
{car_id:'3'},
{car_id:'1'},
{car_id:'4'},
{car_id:'5'}
]
and now you will notice the result are all the users whose name starts with 'J' and cars_owned is in 3,7,9,12,10 but in descending order that is the user with most cars_owned matched in on top and according others. I want the result to be sorted according to maximum cars_owned matched in a single mongo db query. So far I have made one simple find query.
User_data.find({name: { $regex: "^" + search_name },
cars_owned:{$elemMatch:{car_id:'3',car_id:'7',car_id:'9',car_id:'12',car_id:'10'}
}},function(err,resp){
console.log(JSON.stringify(resp,null,4));
});
but it only returns one document I want all documents which have any of the give car_id but sort by maximum matched. If your don't understand anything in this question feel free to ask in comment but please give answer in a single mongo db query I am also ok with aggeregation framework.Thanks in advance.
You have to use aggregagtion for this.
First you define how many user's cars are common with your list, then filter results matching name and at least one car common, and finally sort your result by common cars.
Here's the query :
db.collection.aggregate([
{
$addFields: {
commonCars: {
$size: {
$setIntersection: [
[
"3",
"7",
"9",
"12",
"10"
],
"$cars_owned.car_id"
]
},
}
}
},
{
$match: {
$expr: {
$and: [
{
$eq: [
{
$regexMatch: {
input: "$name",
regex: "^J"
}
},
true
]
},
{
$gt: [
"$commonCars",
0
]
}
]
}
}
},
{
$sort: {
"commonCars": -1
}
}
])
And you can test it here
EDIT
if you don't need to sort your result, you can achieve this in one single match stage :
db.collection.aggregate([
{
$match: {
$expr: {
$and: [
{
$eq: [
{
$regexMatch: {
input: "$name",
regex: "^J"
}
},
true
]
},
{
$gt: [
{
$size: {
$setIntersection: [
[
"3",
"7",
"9",
"12",
"10"
],
"$cars_owned.car_id"
]
},
},
0
]
}
]
}
}
},
])
Test it here

Mongo aggregate - Split interval value into months affected

I have a tricky aggregation of data in mongo and I have no idea how to achieve it directly in mongo without no later data processing.
Here is an simplified example of documents in my collection
[
{
"from" : ISODate("2017-01-15T00:00:00.000Z"),
"to" : ISODate("2017-02-15T00:00:00.000Z"),
"value" : 1000
},
{
"from" : ISODate("2017-02-01T00:00:00.000Z"),
"to" : ISODate("2017-02-28T00:00:00.000Z"),
"value" : 2000
},
{
"from" : ISODate("2017-02-20T00:00:00.000Z"),
"to" : ISODate("2017-03-14T00:00:00.000Z"),
"value" : 1000
}
]
No I would like to get monthly sum of values belonging to a specific month.
[
{janurary: 500}, /* 1/2 of interval id 1 is January so take half the value */
{february: 2833}, /* 500 + 2000 + 333 */
{march: 666}, /* 2/3 of interval id 3 is March */
]
Calculation has to be precise so I can't simplify things by saying all months have exactly 30 days. But what I can do is provide this information from code for each month of the interval. So it should be possible to provide this query information january2017 = 31 days, february2017 = 28 days, march2017 = 31 days
I know I could do this in my node.js code but there might be A LOT of documents in that DB so I would rather not fetch all of these to server to perform the calculation.
Pah, I hope somebody else comes up with a nicer answer but here is one way of getting there:
db.collection.aggregate({
$addFields: {
dayFrom: { $dayOfMonth: "$from" },
dayTo: { $dayOfMonth: "$to" },
monthFrom: { $month: "$from" },
monthTo: { $month: "$to" },
numberOfDays: { $subtract: [ { $dayOfMonth: "$to" }, { $dayOfMonth: "$from" } ] },
numberOfMonths: { $subtract: [ { $month: "$to" }, { $month: "$from" } ] },
}
}, {
$addFields: {
numberOfDaysInFromMonth: { $dayOfMonth: { $subtract: [ { $dateFromParts : { year: { $year: "$from" }, month: { $add: [ "$monthFrom", 1 ] }, day: 1 } }, 1 ] } },
}
}, {
$addFields: {
numberOfDaysAccountingForFromMonth: { $subtract: [ { $add: [ "$numberOfDaysInFromMonth", 1 ] }, "$dayFrom" ] },
numberOfDaysAccountingForToMonth: { $subtract: [ "$dayTo", 1 ] }, // assuming the "to" day does not count anymore
}
}, {
$addFields: {
totalNumberOfDays: { $add: [ "$numberOfDaysAccountingForFromMonth", "$numberOfDaysAccountingForToMonth" ] }
}
}, {
$addFields: {
percentageAccountingForFromMonth: { $divide: [ "$numberOfDaysAccountingForFromMonth", "$totalNumberOfDays" ] },
percentageAccountingForToMonth: { $divide: [ "$numberOfDaysAccountingForToMonth", "$totalNumberOfDays" ] },
}
}, {
$facet: {
"from": [{
$group: {
_id: "$monthFrom",
sum: { $sum: { $multiply: [ "$value", "$percentageAccountingForFromMonth" ] } }
}
}],
"to": [{
$group: {
_id: "$monthTo",
sum: { $sum: { $multiply: [ "$value", "$percentageAccountingForToMonth" ] } }
}
}]
}
}, {
$project: {
total: { $concatArrays: [ "$from", "$to" ] }
}
}, {
$unwind: "$total"
}, {
$group: {
_id: "$total._id",
sum: { $sum: "$total.sum" }
}
})
Some remarks:
You will need to refine that to match your precise definition of
what forms part of a date range and how to count the number of days
("is 2018-01-30 to 2018-01-31 one day or is it two days?").
You might be able to beautify that query using $let and
some nesting. I thought it would be easier to use subsequent $addFields stages to make the beast easier to follow through.
The code does not support from and to values that touch more than two months (e.g. 2018-01-01 to 2018-03-01).

Date operation insede and array (aggreagation)

I want to build online test application using mongoDB and nodeJS. And there is a feature which admin can view user test history (with date filter option).
How to do the query, if I want to display only user which the test results array contains date specified by admin.
The date filter will be based on day, month, year from scheduledAt.startTime, and I think I must use aggregate framework to achieve this.
Let's say I have users document like below:
{
"_id" : ObjectId("582a7b315c57b9164cac3295"),
"username" : "lalalala#gmail.com",
"displayName" : "lalala",
"testResults" : [
{
"applyAs" : [
"finance"
],
"scheduledAt" : {
"endTime" : ISODate("2016-11-15T16:00:00.000Z"),
"startTime" : ISODate("2016-11-15T01:00:00.000Z")
},
"results" : [
ObjectId("582a7b3e5c57b9164cac3299"),
ObjectId("582a7cc25c57b9164cac329d")
],
"_id" : ObjectId("582a7b3e5c57b9164cac3296")
},
{
.....
}
],
"password" : "andi",
}
testResults document:
{
"_id" : ObjectId("582a7cc25c57b9164cac329d"),
"testCategory" : "english",
"testVersion" : "EAX",
"testTakenTime" : ISODate("2016-11-15T03:10:58.623Z"),
"score" : 2,
"userAnswer" : [
{
"answer" : 1,
"problemId" : ObjectId("581ff74002bb1218f87f3fab")
},
{
"answer" : 0,
"problemId" : ObjectId("581ff78202bb1218f87f3fac")
},
{
"answer" : 0,
"problemId" : ObjectId("581ff7ca02bb1218f87f3fad")
}
],
"__v" : 0
}
What I have tried until now is like below. If I want to count total document, which part of my aggregation framework should I change. Because in query below, totalData is being summed per group not per whole returned document.
User
.aggregate([
{
$unwind: '$testResults'
},
{
$project: {
'_id': 1,
'displayName': 1,
'testResults': 1,
'dayOfTest': { $dayOfMonth: '$testResults.scheduledAt.startTime' },
'monthOfTest': { $month: '$testResults.scheduledAt.startTime' },
'yearOfTest': { $year: '$testResults.scheduledAt.startTime' }
}
},
{
$match: {
dayOfTest: date.getDate(),
monthOfTest: date.getMonth() + 1,
yearOfTest: date.getFullYear()
}
},
{
$group: {
_id: {id: '$_id', displayName: '$displayName'},
testResults: {
$push: '$testResults'
},
totalData: {
$sum: 1
}
}
},
])
.then(function(result) {
res.send(result);
})
.catch(function(err) {
console.error(err);
next(err);
});
You can try something like this. Added the project stage to keep the test results if any of result element matches on the date passed. Add this as the first step in the pipeline and you can add the grouping stage the way you want.
$map applies an equals comparison between the date passed and start date in each test result element and generates an array with true and false values. $anyElementTrue inspects this array and returns true only if there is atleast one true value in the array. Match stage to include only elements with matched value of true.
aggregate([{
"$project": {
"_id": 1,
"displayName":1,
"testResults": 1,
"matched": {
"$anyElementTrue": {
"$map": {
"input": "$testResults",
"as": "result",
"in": {
"$eq": [{ $dateToString: { format: "%Y-%m-%d", date: '$$result.scheduledAt.startTime' } }, '2016-11-15']
}
}
}
}
}
}, {
"$match": {
"matched": true
}
}])
Alternative Version:
Similar to the above version but this one combines both the project and match stage into one. The $cond with $redact accounts for match and when match is found it keeps the complete tree or else discards it.
aggregate([{
"$redact": {
"$cond": [{
"$anyElementTrue": {
"$map": {
"input": "$testResults",
"as": "result",
"in": {
"$eq": [{
$dateToString: {
format: "%Y-%m-%d",
date: '$$result.scheduledAt.startTime'
}
}, '2016-11-15']
}
}
}
},
"$$KEEP",
"$$PRUNE"
]
}
}])

MongoDB aggregation pipeline with loop

I am having this aggregation pipeline code below that I would like to run for every day of the year! Essentially calculating the minimum, maximum and average temperature ("TEMP" field) for every day of the year. At the moment I am calling this piece of code 365 times, passing the start date and the end date of a day.
Obviously this is very inefficient. Is there any way to loop this within mongo so that its faster, and return an array of 365 average values, 365 min values and 365 max values or something like that. Im using a timezone library to derive the start date and end date.
collection.aggregate([
{
$match:{$and:[
{"UID" : uid},
{"TEMP" :{$exists:true}}
{"site" : "SITE123"},
{"updatedAt": {$gte : new Date(START_DATE_ARG), $lte : new Date(END_DATE_ARG)} }
]}
},
{ "$group": {
"_id": "$UID",
"avg": { $avg: $TEMP },
"min": { $min: $TEMP },
"max": { $max: $TEMP }
}
}
], function(err, result){
if (err){
cb(1, err);
}
else{
cb(0, result);
}
});
});
The datasets look like this
....
{UID: "123", TEMP: 11, site: "SITE123", updatedAt: ISODate("2014-09-12T21:55:19.326Z")}
{UID: "123", TEMP: 10, site: "SITE123", updatedAt: ISODate("2014-09-12T21:55:20.491Z")}
....
Any ideas? Maybe we can pass all the timestamps of all the days of the year in the aggregation pipeline?
Thank you!!
Why run this for every day when you can simply make the date part of the grouping key? This is what the date aggregation operators exist for, so you can aggregate by time frames in a whole period at once without looping:
collection.aggregate([
{ "$match":{
"UID": uid,
"TEMP":{ "$exists": true }
"site" : "SITE123",
"updatedAt": {
"$gte": new Date(START_DATE_ARG),
"$lte": new Date(END_DATE_ARG)
}}
}},
{ "$group": {
"_id": {
"uid": "$UID",
"year": { "$year": "$updatedAt" },
"month": { "$month": "$updatedAt" },
"day": { "$dayOfMonth" }
},
"avg": { "$avg": "$TEMP" },
"min": { "$min": "$TEMP" },
"max": { "$max": "$TEMP" }
}}
])
Or possibly just condensing the date to a timestamp value instead. A little trick of date math with date objects:
collection.aggregate([
{ "$match":{
"UID": uid,
"TEMP":{ "$exists": true }
"site" : "SITE123",
"updatedAt": {
"$gte": new Date(START_DATE_ARG),
"$lte": new Date(END_DATE_ARG)
}}
}},
{ "$group": {
"_id": {
"uid": "$UID",
"date": {
"$subtract": [
{ "$subtract": [ "$updatedAt", new Date("1970-01-01") ] },
{ "$mod": [
{ "$subtract": [ "$updatedAt", new Date("1970-01-01") ] },
1000 * 60 * 60 * 24
]}
]
}
},
"avg": { "$avg": "$TEMP" },
"min": { "$min": "$TEMP" },
"max": { "$max": "$TEMP" }
}}
])
Of course your "date range" here is now all of the dates you require to be in the results, so the start and the end dates for all the things where you intended to loop. The grouping is done in either case to reflect "one day", but of course you could change that to any interval you want to.
Also note that your use of $and here is not necessary. Queries in MongoDB "and" conditions by default. The only time you need that operator is for multiple conditions on the same field that would otherwise not be valid JSON/BSON.

Resources