MongoDB aggregation pipeline with loop - node.js

I am having this aggregation pipeline code below that I would like to run for every day of the year! Essentially calculating the minimum, maximum and average temperature ("TEMP" field) for every day of the year. At the moment I am calling this piece of code 365 times, passing the start date and the end date of a day.
Obviously this is very inefficient. Is there any way to loop this within mongo so that its faster, and return an array of 365 average values, 365 min values and 365 max values or something like that. Im using a timezone library to derive the start date and end date.
collection.aggregate([
{
$match:{$and:[
{"UID" : uid},
{"TEMP" :{$exists:true}}
{"site" : "SITE123"},
{"updatedAt": {$gte : new Date(START_DATE_ARG), $lte : new Date(END_DATE_ARG)} }
]}
},
{ "$group": {
"_id": "$UID",
"avg": { $avg: $TEMP },
"min": { $min: $TEMP },
"max": { $max: $TEMP }
}
}
], function(err, result){
if (err){
cb(1, err);
}
else{
cb(0, result);
}
});
});
The datasets look like this
....
{UID: "123", TEMP: 11, site: "SITE123", updatedAt: ISODate("2014-09-12T21:55:19.326Z")}
{UID: "123", TEMP: 10, site: "SITE123", updatedAt: ISODate("2014-09-12T21:55:20.491Z")}
....
Any ideas? Maybe we can pass all the timestamps of all the days of the year in the aggregation pipeline?
Thank you!!

Why run this for every day when you can simply make the date part of the grouping key? This is what the date aggregation operators exist for, so you can aggregate by time frames in a whole period at once without looping:
collection.aggregate([
{ "$match":{
"UID": uid,
"TEMP":{ "$exists": true }
"site" : "SITE123",
"updatedAt": {
"$gte": new Date(START_DATE_ARG),
"$lte": new Date(END_DATE_ARG)
}}
}},
{ "$group": {
"_id": {
"uid": "$UID",
"year": { "$year": "$updatedAt" },
"month": { "$month": "$updatedAt" },
"day": { "$dayOfMonth" }
},
"avg": { "$avg": "$TEMP" },
"min": { "$min": "$TEMP" },
"max": { "$max": "$TEMP" }
}}
])
Or possibly just condensing the date to a timestamp value instead. A little trick of date math with date objects:
collection.aggregate([
{ "$match":{
"UID": uid,
"TEMP":{ "$exists": true }
"site" : "SITE123",
"updatedAt": {
"$gte": new Date(START_DATE_ARG),
"$lte": new Date(END_DATE_ARG)
}}
}},
{ "$group": {
"_id": {
"uid": "$UID",
"date": {
"$subtract": [
{ "$subtract": [ "$updatedAt", new Date("1970-01-01") ] },
{ "$mod": [
{ "$subtract": [ "$updatedAt", new Date("1970-01-01") ] },
1000 * 60 * 60 * 24
]}
]
}
},
"avg": { "$avg": "$TEMP" },
"min": { "$min": "$TEMP" },
"max": { "$max": "$TEMP" }
}}
])
Of course your "date range" here is now all of the dates you require to be in the results, so the start and the end dates for all the things where you intended to loop. The grouping is done in either case to reflect "one day", but of course you could change that to any interval you want to.
Also note that your use of $and here is not necessary. Queries in MongoDB "and" conditions by default. The only time you need that operator is for multiple conditions on the same field that would otherwise not be valid JSON/BSON.

Related

How to split date from datetime in mongodb?

I have write below the code.I try to split date and time but I can't get correct solution. I write the service in Node JS. If i try get date between dates, It is working. But if i try to fetch exact date, it is not working.
exports.screenLog = function(req,res){
console.log(req.query);
Timesheet.find({userId: req.params.id,startTime: $match:{$gte : new Date(req.query.startTime),$lte: new Date(req.query.endTime)}}, function (err, timesheet) {
console.log(timesheet);
var timesheetIdArray = timesheet.map(function(ele){return ele._id});
Screenshot.find()
.where('timesheetId')
.in(timesheetIdArray)
.exec(function(err,data){
//console.log('ScreenData:',data);
if(err) {
res.send(err);
}
res.send(data);
});
});
This is My input format below:
[
{
"_id": "5963653e6b43611240189ea2",
"timesheetId": "595f4f2ec456a422bc291169",
"imageUrl": "/images/2017-07-10_05_00_06_PM.jpg",
"__v": 0,
"createdTime": "2017-07-07T09:06:54.000Z"
},
{
"_id": "5964bef37302792b0864009e",
"timesheetId": "595f4f2ec456a422bc291169",
"imageUrl": "/images/2017-07-11_05_35_07_PM.jpg",
"__v": 0,
"createdTime": "2017-07-11T12:05:07.687Z"
},
{
"_id": "5964bf897302792b086400ad",
"timesheetId": "595f4f2ec456a422bc291169",
"imageUrl": "/images/2017-07-11_05_37_37_PM.jpg",
"__v": 0,
"createdTime": "2017-07-11T12:07:37.446Z"
},
{
"_id": "5964ddf0ee77e90288d26eec",
"timesheetId": "5964ddf0ee77e90288d26eeb",
"imageUrl": "/images/2017-07-11_07_47_20_PM.jpg",
"__v": 0,
"createdTime": "2017-07-11T14:17:20.651Z"
}]
Date and Time Can't be Split in MongoDB.They are Store in Date object in database.
But You can Compare the dates By $gte,$lte,$eq etc.
For the Comparisons you can Only Compare with UTC date ,For this You can find out moment Library
eg : date:{ $gte : moment(YOUR_DATE_IN_STRING) }
I am not too familiar with Node.Js, but I think the project aggregation may help you out here:
db.MyCollection.aggregate(
[
{
$project:
{
year: { $year: "$MyDateField" },
month: { $month: "$MyDateField" },
day: { $dayOfMonth: "$MyDateField" },
hour: { $hour: "$MyDateField" },
minutes: { $minute: "$MyDateField" },
seconds: { $second: "$MyDateField" },
milliseconds: { $millisecond: "$MyDateField" },
dayOfYear: { $dayOfYear: "$MyDateField" },
dayOfWeek: { $dayOfWeek: "$MyDateField" },
week: { $week: "$MyDateField" }
}
}
]
)
That will return this:
{
"_id" : "5897697667f26827dc9c9028",
"year" : 2017,
"month" : 2,
"day" : 5,
"hour" : 18,
"minutes" : 5,
"seconds" : 41,
"milliseconds" : 822,
"dayOfYear" : 36,
"dayOfWeek" : 1,
"week" : 6
}
Is that getting you any closer to a solution?
You can do smth like this in mongo shell:
db.foo_collection.aggregate([
{'$addFields': {
'extracted_date': {'$dateFromParts': {
'year': {'$year': "$some_datetime_field"},
'month': {'$month': "$some_datetime_field"},
'day': {'$dayOfMonth': "$some_datetime_field"}
}}
}},
{'$match': {'$extracted_date': ...}}
])
This query extracts date from datetime field and then applies some filter expression on this date.

Query on date or does not exist?

Trying to figure out if I can make this query work using mongoose and nodejs.
Product.find({
price: { $gt: 2, $lt: 3},
date: { $gt: new Date() || $exists: false}
}). exec(callback);
Does anyone know if it is possible to check if a date does not exist send it back or if the date is greater than today?
Thanks
Use $or:
Product.find({
"price": { "$gt": 2, "$lt": 3 },
"$or": [
{ "date": { "$gt": new Date() } },
{ "date": { "$exists": false } }
]
}). exec(callback);
All arguments are generally an implicit AND, so just like it is "price greater than 2 AND less than 3" you are saying in addition "AND the date is greater than this date OR date does not exist".
Just to spell out the logic in phrase form
With "multiple fields like this, THEN you actually use an $and
Product.find({
"$and": [
{ "price": { "$gt": 2, "$lt": 3 } },
{ "$or": [
{ "date1": { "$gt": new Date() } },
{ "date1": { "$exists": false } }
]},
{ "$or": [
{ "date2": { "$gt": new Date() } },
{ "date2": { "$exists": false } }
]}
]
}). exec(callback);

Mongodb : Get Documents between specified time irrespective of dates

Suppose the sample documents in Mongodb database are as follows:
{ "date" : ISODate("2015-11-09T05:58:19.474Z") }
{ "date" : ISODate("2014-10-25T07:30:00.241Z") }
{ "date" : ISODate("2015-11-30T15:24:00.251Z") }
{ "date" : ISODate("2012-01-10T18:36:00.101Z") }
Expected:
{ "date" : ISODate("2015-11-09T05:58:19.474Z") }
{ "date" : ISODate("2014-10-25T07:30:00.241Z") }
Iam interested in finding the documents whose time in "date" field is between 04:15 and 07:40 irrespective of day,month and year. Indirectly query has to match any "YYYY-MM-DDT" in date field.
My approach would be, query all the documents within presumed duration of dates from node and then for each document that matched the query, compare the "date" field of document with "yyyy-MM-DDT"+"required_time" ("YYYY-MM-DD is copied from each document's "date field" to compare by converting into moment() and get month,date and year") using moment.js module.
Is there any way to query to directly get the same results?
Note: I am using nodejs to connect to mongodb
This question is based on Mongodb : Query based on time in ISODate format.
The basic case here is to use math to work out the minutes. Noting you already have an answer on the basic principles of matching, but just seem to have trouble working out the "minutes" part:
db.collection.aggregate([
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gte": [
{ "$add": [
{ "$hour": "$date" },
{ "$divide": [{ "$minute": "$date" }, 60] }
]},
4 + (15/60)
]},
{ "$lte": [
{ "$add": [
{ "$hour": "$date" },
{ "$divide": [{ "$minute": "$date" }, 60] }
]},
7 + (40/60)
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
])
So when you $add the divisor of $minute to the $hour then you get a representation that is stil between 0-24 but with a fractional component.
Or just use date math directly, and clean up duplication with $let:
db.collection.aggregate([
{ "$redact": {
"$cond": {
"if": {
"$let": {
"vars": {
"hours": {
"$divide": [
{ "$mod": [
{ "$subtract": [ "$date", new Date(0) ] },
1000 * 60 * 60 * 24
]},
1000 * 60 * 60
]
}
},
"in": {
"$and": [
{ "$gte": [ "$$hours", 4 + (15/60) ] },
{ "$lte": [ "$$hours", 7 + (40/60) ] }
]
}
}
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
])
Either way, it's just about working out the partial component and adding that into the value for selection.
If you really want to query this on a regular basis, I would strongly suggest you set a separate document property to just contain the "time" value and query directly on that instead of calculating on each pass.

Mongodb - query today's totals, week's totals, and month's totals in one query

I have objects in my db that look like this:
{
"_id": ObjectId("563f8c320ef987c122aeeb4a"),
"num": 1515,
"createdAt": ISODate("2015-10-29T21:14:26.477Z"),
}
I'd like to write an aggregation that groups all by a specific id and sums up the total for today, this week, and this month, and do it in one query. I've written three separate queries for this task, but I'm wondering if I can make this more efficient and get it done in one query.
Edit
Somebody mentioned mapReduce as a solution. It seems like a promising solution, but I'm not able to get anything to return from a simple query. The following is what I have tried:
var o = {};
o.map = function () { emit( this.num, this.createdAt ) }
o.reduce = function (k, vals) { return vals }
o.query = {
_id: req.user._id
}
Submission.mapReduce(o, function (err, results) {
console.log(results)
})
The console logs an empty array. I've also tried casting the _id to a mongoose object id, but it still returns an empty array.
This is more really a question of what you expect the output to look like, as any aggregated result essentially needs to group at the lowest level and then progressively group at higher "grains" until the largest level ( "month" ) is reached. This kind of implies data grouped by "month" ultimately, unless you break it down otherwise.
In essence, progressively $group:
db.collection.aggregate([
// First total per day. Rounding dates with math here
{ "$group": {
"_id": {
"$add": [
{ "$subtract": [
{ "$subtract": [ "$createdAt", new Date(0) ] },
{ "$mod": [
{ "$subtract": [ "$createdAt", new Date(0) ] },
1000 * 60 * 60 * 24
]}
]},
new Date(0)
]
},
"week": { "$first": { "$week": "$createdAt" } },
"month": { "$first": { "$month": "$createdAt" } },
"total": { "$sum": "$num" }
}},
// Then group by week
{ "$group": {
"_id": "$week",
"month": { "$first": "$month" },
"days": {
"$push": {
"day": "$_id",
"total": "$total"
}
},
"total": { "$sum": "$total" }
}},
// Then group by month
{ "$group": {
"_id": "$month",
"weeks": {
"$push": {
"week": "$_id",
"total": "$total",
"days": "$days"
}
},
"total": { "$sum": "$total" }
}}
])
So each level after the first which sums up per day is then progressively pushed into array content for it's "round up" value and the totals are then summed at that level as well.
If you want a flatter output with one record per day containing it's weekly and monthly totals as well as the day total, then just append two $unwind statements to the end of the pipeline:
{ "$unwind": "$weeks" },
{ "$unwind": "$weeks.days" }
And optionally $project the "dotted" fields out to something flatter and readable if you must.
If you are spanning "years" with this, then include such an operation in the grouping key at least from the "week" level so you are not possibly combining data from different years and they are separated.
It is also my own general preference to use the "date math" approach when rounding dates as it returns a Date object, but as is used at the other levels than "day", you can just alternately use the date aggregation operators instead.
No need for mapReduce as this is fairly intuitive and there is a finite amount of days in a month that means the BSON limit when nesting arrays in the content while aggregating will not be broken.

How to count distinct date items in a timestamp field in Mongoose/NodeJS?

I use Mongoose in my NodeJS, and I have a collection with documents that look like this -
var SomeSchema = new Schema({
date: {
type: Date,
default: new Date()
},
some_item: {
type: String
}
});
The date field contains date with the time component (for example, 2014-05-09 09:43:02.478Z). How do I get a count of distinct items for a given date, say 2014-05-08?
EDIT: Ideally, I would like to get a count of records for each distinct date.
What you want is usage of the aggregation framework with the aggregate() command, at least for finding one date as you ask:
SomeSchema.aggregate(
[
{ "$match": {
"date": {
"$gte": new Date("2014-05-08"), "$lt": new Date("2014-05-09")
}
}},
{ "$group": {
"_id": "$some_item",
"count": { "$sum": 1 }
}}
],
function(err,result) {
// do something with result
}
);
If you are specifically looking for "counts" over several dates then you can do something like this:
SomeSchema.aggregate(
[
{ "$match": {
"date": {
"$gte": new Date("2014-05-01"), "$lt": new Date("2014-06-01")
}
}},
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"day": { "$dayOfMonth": "$date" }
}
"count": { "$sum": 1 }
}}
],
function(err,result) {
// do something with result
}
);
And that gives you "per day" grouping. Look for the date aggregation operators in the documentation if you wish to take this further.

Resources