Insert or update object element in array [duplicate] - node.js

I'm new to MongoDB and Mongoose and I'm trying to use it to save stock ticks for daytrading analysis. So I imagined this Schema:
symbolSchema = Schema({
name:String,
code:String
});
quoteSchema = Schema({
date:{type:Date, default: now},
open:Number,
high:Number,
low:Number,
close:Number,
volume:Number
});
intradayQuotesSchema = Schema({
id_symbol:{type:Schema.Types.ObjectId, ref:"symbol"},
day:Date,
quotes:[quotesSchema]
});
From my link I receive information like this every minute:
date | symbol | open | high | low | close | volume
2015-03-09 13:23:00|AAPL|127,14|127,17|127,12|127,15|19734
I have to:
Find the ObjectId of the symbol (AAPL).
Discover if the intradayQuote document of this symbol already exists (symbol and date combination)
Discover if the minute OHLCV data of this symbol exists on the quotes array (because it could be repeated)
Update or create the document and update or create the quotes inside the array
I'm able to accomplish this task without veryfing if the quotes already exists, but this method can creates repeated entries inside quotes array:
symbol.find({"code":mySymbol}, function(err, stock) {
intradayQuote.findOneAndUpdate({
{ id_symbol:stock[0]._id, day: myDay },
{ $push: { quotes: myQuotes } },
{ upsert: true },
myCallback
});
});
I already tried:
$addToSet instead of $push, but unfortunatelly this doesn't seems to work with array of documents
{ id_symbol:stock[0]._id, day: myDay, 'quotes["date"]': myDate } on the conditions of findOneAndUpdate; but unfortunatelly if mongo doesn't find it, it creates a new document for the minute instead of appending to the quotes array.
Is there a way to get this working without using one more query (I'm already using 2)? Should I rethink my Schema to facilitate this job? Any help will be appreciated. Thanks!

Basically put an $addToSet operator cannot work for you because your data is not a true "set" by definition being a collection of "completely distinct" objects.
The other piece of logical sense here is that you would be working on the data as it arrives, either as a sinlge object or a feed. I'll presume its a feed of many items in some form and that you can use some sort of stream processor to arrive at this structure per document received:
{
"date": new Date("2015-03-09 13:23:00.000Z"),
"symbol": "AAPL",
"open": 127.14
"high": 127.17,
"low": 127.12
"close": 127.15,
"volume": 19734
}
Converting to a standard decimal format as well as a UTC date since any locale settings really should be the domain of your application once data is retrieved from the datastore of course.
I would also at least flatten out your "intraDayQuoteSchema" a little by removing the reference to the other collection and just putting the data in there. You would still need a lookup on insertion, but the overhead of the additional populate on read would seem to be more costly than the storage overhead:
intradayQuotesSchema = Schema({
symbol:{
name: String,
code: String
},
day:Date,
quotes:[quotesSchema]
});
It depends on you usage patterns, but it's likely to be more effective that way.
The rest really comes down to what is acceptable to
stream.on(function(data) {
var symbol = data.symbol,
myDay = new Date(
data.date.valueOf() -
( data.date.valueOf() % 1000 * 60 * 60 * 24 ));
delete data.symbol;
symbol.findOne({ "code": symbol },function(err,stock) {
intraDayQuote.findOneAndUpdate(
{ "symbol.code": symbol , "day": myDay },
{ "$setOnInsert": {
"symbol.name": stock.name
"quotes": [data]
}},
{ "upsert": true }
function(err,doc) {
intraDayQuote.findOneAndUpdate(
{
"symbol.code": symbol,
"day": myDay,
"quotes.date": data.date
},
{ "$set": { "quotes.$": data } },
function(err,doc) {
intraDayQuote.findOneAndUpdate(
{
"symbol.code": symbol,
"day": myDay,
"quotes.date": { "$ne": data.date }
},
{ "$push": { "quotes": data } },
function(err,doc) {
}
);
}
);
}
);
});
});
If you don't actually need the modified document in the response then you would get some benefit by implementing the Bulk Operations API here and sending all updates in this package within a single database request:
stream.on("data",function(data) {
var symbol = data.symbol,
myDay = new Date(
data.date.valueOf() -
( data.date.valueOf() % 1000 * 60 * 60 * 24 ));
delete data.symbol;
symbol.findOne({ "code": symbol },function(err,stock) {
var bulk = intraDayQuote.collection.initializeOrderedBulkOp();
bulk.find({ "symbol.code": symbol , "day": myDay })
.upsert().updateOne({
"$setOnInsert": {
"symbol.name": stock.name
"quotes": [data]
}
});
bulk.find({
"symbol.code": symbol,
"day": myDay,
"quotes.date": data.date
}).updateOne({
"$set": { "quotes.$": data }
});
bulk.find({
"symbol.code": symbol,
"day": myDay,
"quotes.date": { "$ne": data.date }
}).updateOne({
"$push": { "quotes": data }
});
bulk.execute(function(err,result) {
// maybe do something with the response
});
});
});
The point is that only one of the statements there will actually modify data, and since this is all sent in the same request there is less back and forth between the application and server.
The alternate case is that it might just be more simple in this case to have the actual data referenced in another collection. This then just becomes a simple matter of processing upserts:
intradayQuotesSchema = Schema({
symbol:{
name: String,
code: String
},
day:Date,
quotes:[{ type: Schema.Types.ObjectId, ref: "quote" }]
});
// and in the steam processor
stream.on("data",function(data) {
var symbol = data.symbol,
myDay = new Date(
data.date.valueOf() -
( data.date.valueOf() % 1000 * 60 * 60 * 24 ));
delete data.symbol;
symbol.findOne({ "code": symbol },function(err,stock) {
quote.update(
{ "date": data.date },
{ "$setOnInsert": data },
{ "upsert": true },
function(err,num,raw) {
if ( !raw.updatedExisting ) {
intraDayQuote.update(
{ "symbol.code": symbol , "day": myDay },
{
"$setOnInsert": {
"symbol.name": stock.name
},
"$addToSet": { "quotes": data }
},
{ "upsert": true },
function(err,num,raw) {
}
);
}
}
);
});
});
It really comes down to how important to you is it to have the data for quotes nested within the "day" document. The main distinction is if you want to query those documents based on the data some of those "quote" fields or otherwise live with the overhead of using .populate() to pull in the "quotes" from the other collection.
Of course if referenced and the quote data is important to your query filtering, then you can always just query that collection for the _id values that match and use an $in query on the "day" documents to only match days that contain those matched "quote" documents.
It's a big decision where it matters most which path you take based on how your application uses the data. Hopefully this should guide you on the general concepts behind doing what you want to achieve.
P.S Unless you are "sure" that your source data is always a date rounded to an exact "minute" then you probably want to employ the same kind of date rounding math as used to get the discrete "day" as well.

Related

mongoose query regex to ignore middle character

I am working with mongodb and my data is stored as day:"3/6/2020" i want to query the string that matches day:"3/2020" without the middle value or omitting the 6 in day:"3/6/2020".
such as
`myModel.find({"day": {$regex: "3/2020", $options:" what option to pass here to ignore the `
` middle value"}});`
or any better way
`model.find({"name": {$regex: search, $options:" regex options"}}, (err, users)=> {`
` res.status(200).send({users});`
"3/2020" to match any record with 3 and 2020 just as this "3/2020" matches with "3/6/2020"
I guess you could use aggregation framework to convert the string date in to proper date, fetch months and years and match against that properly. Something like this
model.aggregate([{
"$addFields": {
"convertedDate": {
"$toDate": "$myDate" // mydate is your field name which has string
}
}
}, {
"$addFields": {
"month": {
"$month": "$convertedDate"
},
"year": {
"$year": "$convertedDate"
}
}
}, {
"$match": {
"month": 3, // this is your search criteria
"year": 2020 // // this is your search criteria
}
}, {
"$project": {
"month": 0, // Do not take temp added fields month and year
"year": 0 // Do not take temp added fields month and year
}
}])
This might look like a big query but I guess much better than doing string comaprision using regex. If your field is saved in date format, you could also remove the first stage in which you are doing $toDate. Hope this helps
That might help you. We match the 3, than a number wich is one or two characters wide, then at the end 2020.
3\/\d{1,2}\/2020
https://regex101.com/r/jn4zwa/1
Solved. i solved it splitting my date as in my schema
```
qDay :{
type:Number, default: new Date().getDate(Date.now)
},
qMonth :{
type:Number, default: new Date().getMonth(Date.now) + 1
},
qYear :{
type:Number, default: new Date().getFullYear(Date.now)
}```
my query
```const getByDay = async (req, res)=> {
console.log(req.body);
merchantModel.find({$and:[{qMonth : req.body.month}
,{qYear: req.body.year},{qDay:req.body.day}]}).then((record)=> {
if(record.length == 0){
res.status(404).send({msg:'no record!'});
}else{
co`enter code here`nsole.log(record);
res.status(200).send({record:record});
}
});
}```

Aggregation Timing Out

I am using aggregates to query my schema for counts over date ranges, my problem is i am not getting any response from the server (Times out everytime), other mongoose queries are working fine (find, save, etc.) and when i call aggregates it depends on the pipeline (when i only use match i get a response when i add unwind i don't get any).
Connection Code:
var promise = mongoose.connect('mongodb://<username>:<password>#<db>.mlab.com:<port>/<db-name>', {
useMongoClient: true,
replset: {
ha: true, // Make sure the high availability checks are on
haInterval: 5000 // Run every 5 seconds
}
});
promise.then(function(db){
console.log('DB Connected');
}).catch(function(e){
console.log('DB Not Connected');
console.errors(e.message);
process.exit(1);
});
Schema:
var ProspectSchema = new Schema({
contact_name: {
type: String,
required: true
},
company_name: {
type: String,
required: true
},
contact_info: {
type: Array,
required: true
},
description:{
type: String,
required: true
},
product:{
type: Schema.Types.ObjectId, ref: 'Product'
},
progression:{
type: String
},
creator:{
type: String
},
sales: {
type: Schema.Types.ObjectId,
ref: 'User'
},
technical_sales: {
type: Schema.Types.ObjectId,
ref: 'User'
},
actions: [{
type: {type: String},
description: {type: String},
date: {type: Date}
}],
sales_connect_id: {
type: String
},
date_created: {
type: Date,
default: Date.now
}
});
Aggregation code:
exports.getActionsIn = function(start_date, end_date) {
var start = new Date(start_date);
var end = new Date(end_date);
return Prospect.aggregate([
{
$match: {
// "actions": {
// $elemMatch: {
// "type": {
// "$exists": true
// }
// }
// }
"actions.date": {
$gte: start,
$lte: end
}
}
}
,{
$project: {
_id: 0,
actions: 1
}
}
,{
$unwind: "actions"
}
,{
$group: {
_id: "actions.date",
count: {
$sum: 1
}
}
}
// ,{
// $project: {
// _id: 0,
// date: {
// $dateToString: {
// format: "%d/%m/%Y",
// date: "actions.date"
// }
// }
// // ,
// // count : "$count"
// }
// }
]).exec();
}
Calling the Aggregation:
router.get('/test',function(req, res, next){
var start_date = req.query.start_date;
var end_date = req.query.end_date;
ProspectCont.getActionsIn(start_date,end_date).then(function(value, err){
if(err)console.log(err);
res.json(value);
});
})
My Main Problem is that i get no response at all, i can work with an error message the issue is i am not getting any so i don't know what is wrong.
Mongoose Version: 4.11.8
P.s. I tried multiple variations of the aggregation pipeline, so this isn't my first try, i have an aggregation working on the main prospects schema but not the actions sub-document
You have several problems here, mostly by missing concepts. Lazy readers can skip to the bottom for the full pipeline example, but the main body here is in the explanation of why things are done as they are.
You are trying to select on a date range. The very first thing to check on any long running operation is that you have a valid index. You might have one, or you might not. But you should issue: ( from the shell )
db.prospects.createIndex({ "actions.date": 1 })
Just to be sure. You probably really should add this to the schema definition so you know this should be deployed. So add to your defined schema:
ProspectSchema.index({ "actions.date": 1 })
When querying with a "range" on elements of an array, you need to understand that those are "multiple conditions" which you are expecting to match elements "between". Whilst you generally can get away with querying a "single property" of an array using "Dot Notation", you are missing that the application of [$gte][1] and $lte is like specifying the property several times with $and explicitly.
Whenever you have such "multiple conditions" you always mean to use $elemMatch. Without it, you are simply testing every value in the array to see if it is greater than or less than ( being some may be greater and some may be lesser ). The $elemMatch operator makes sure that "both" are applied to the same "element", and not just all array values as "Dot notation" exposes them:
{ "$match": {
"actions": {
"$elemMatch": { "date": { "$gte": start, "$lte: end } }
}
}}
That will now only match documents where the "array elements" fall between the specified date. Without it, you are selecting and processing a lot more data which is irrelevant to the selection.
Array Filtering: Marked in Bold because it's prominence cannot be ignored. Any initial $match works just like any "query" in that it's "job" is to "select documents" valid to the expression. This however does not have any effect on the contents of the array in the documents returned.
Whenever you have such a condition for document selection, you nearly always intend to "filter" such content from the array itself. This is a separate process, and really should be performed before any other operations that work with the content. Especially [$unwind][4].
So you really should add a $filter in either an $addFields or $project as is appropriate to your intend "immediately" following any document selection:
{ "$project": {
"_id": 0,
"actions": {
"$filter": {
"input": "$actions",
"as": "a",
"in": {
"$and": [
{ "$gte": [ "$$a.date", start ] },
{ "$lte": [ "$$a.date", end ] }
]
}
}
}
}}
Now the array content, which you already know "must" have contained at least one valid item due to the initial query conditions, is "reduced" down to only those entries that actually match the date range that you want. This removes a lot of overhead from later processing.
Note the different "logical variants" of $gte and $lte in use within the $filter condition. These evaluate to return a boolean for expressions that require them.
Grouping It's probably just as an attempt at getting a result, but the code you have does not really do anything with the dates in question. Since typical date values should be provided with millisecond precision, you general want to reduce them.
Commented code suggests usage of $dateToString within a $project. It is strongly recommended that you do not do that. If you intend such a reduction, then supply that expression directly to the grouping key within $group instead:
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$actions.date"
}
},
"count": { "$sum": 1 }
}}
I personally don't like returning a "string" when a natural Date object serializes properly for me already. So I like to use the "math" approach to "round" dates instead:
{ "$group": {
"_id": {
"$add": [
{ "$subtract": [
{ "$subtract": [ "$actions.date", new Date(0) ] },
{ "$mod": [
{ "$subtract": [ "$actions.date", new Date(0) ] },
1000 * 60 * 60 * 24
]}
],
new Date(0)
]
},
"count": { "$sum": 1 }
}}
That returns a valid Date object "rounded" to the current day. Mileage may vary on preferred approaches, but it's the one I like. And it takes the least bytes to transfer.
The usage of Date(0) represents the "epoch date". So when you $subtract one BSON Date from another you end up with the milliseconds difference between the two as an integer. When $add an integer value to a BSON Date, you get a new BSON Date representing the sum of the milliseconds value between the two. This is the basis of converting to numeric, rounding to the nearest start of day, and then converting numeric back to a Date value.
By making that statement directly within the $group rather than $project, you are basically saving what actually gets interpreted as "go through all the data and return this calculated value, then go and do...". Much the same as working through a pile of objects, marking them with a pen first and then actually counting them as a separate step.
As a single pipeline stage it saves considerable resources as you do the accumulation at the same time as calculating the value to accumulate on. When you think it though much like the provided analogy, it just makes a lot of sense.
As a full pipeline example you would put the above together as:
Prospect.aggregate([
{ "$match": {
"actions": {
"$elemMatch": { "date": { "$gte": start, "$lte: end } }
}
}},
{ "$project": {
"_id": 0,
"actions": {
"$filter": {
"input": "$actions",
"as": "a",
"in": {
"$and": [
{ "$gte": [ "$$a.date", start ] },
{ "$lte": [ "$$a.date", end ] }
]
}
}
}
}},
{ "$unwind": "$actions" },
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$actions.date"
}
},
"count": { "$sum": 1 }
}}
])
And honestly if after making sure an index is in place, and following that pipeline you still have timeout problems, then reduce the date selection down until you get a reasonable response time.
If it's still taking too long ( or the date reduction is not reasonable ) then your hardware simply is not up to the task. If you really have a lot of data then you have to be reasonable with expectations. So scale up or scale out, but those things are outside the scope of any question here.
As it stands those improvements should make a significant difference over any attempt shown so far. Mostly due to a few fundamental concepts that are being missed.

how to combine array of object result in mongodb

how can i combine match document's subdocument together as one and return it as an array of object ? i have tried $group but don't seem to work.
my query ( this return array of object in this case there are two )
User.find({
'business_details.business_location': {
$near: coords,
$maxDistance: maxDistance
},
'deal_details.deals_expired_date': {
$gte: new Date()
}
}, {
'deal_details': 1
}).limit(limit).exec(function(err, locations) {
if (err) {
return res.status(500).json(err)
}
console.log(locations)
the console.log(locations) result
// give me the result below
[{
_id: 55 c0b8c62fd875a93c8ff7ea, // first document
deal_details: [{
deals_location: '101.6833,3.1333',
deals_price: 12.12 // 1st deal
}, {
deals_location: '101.6833,3.1333',
deals_price: 34.3 // 2nd deal
}],
business_details: {}
}, {
_id: 55 a79898e0268bc40e62cd3a, // second document
deal_details: [{
deals_location: '101.6833,3.1333',
deals_price: 12.12 // 3rd deal
}, {
deals_location: '101.6833,3.1333',
deals_price: 34.78 // 4th deal
}, {
deals_location: '101.6833,3.1333',
deals_price: 34.32 // 5th deal
}],
business_details: {}
}]
what i wanted to do is to combine these both deal_details field together and return it as an array of object. It will contain 5 deals in one array of object instead of two separated array of objects.
i have try to do it in my backend (nodejs) by using concat or push, however when there's more than 2 match document i'm having problem to concat them together, is there any way to combine all match documents and return it as one ? like what i mentioned above ?
What you are probably missing here is the $unwind pipeline stage, which is what you typically use to "de-normalize" array content, particularly when your grouping operation intends to work across documents in your query result:
User.aggregate(
[
// Your basic query conditions
{ "$match": {
"business_details.business_location": {
"$near": coords,
"$maxDistance": maxDistance
},
"deal_details.deals_expired_date": {
"$gte": new Date()
}},
// Limit query results here
{ "$limit": limit },
// Unwind the array
{ "$unwind": "$deal_details" },
// Group on the common location
{ "$group": {
"_id": "$deal_details.deals_location",
"prices": {
"$push": "$deal_details.deals_price"
}
}}
],
function(err,results) {
if (err) throw err;
console.log(JSON.stringify(results,undefined,2));
}
);
Which gives output like:
{
"_id": "101.6833,3.1333",
"prices": [
12.12,
34.3,
12.12,
34.78,
34.32
]
}
Depending on how many documents actually match the grouping.
Alternately, you might want to look at the $geoNear pipeline stage, which gives a bit more control, especially when dealing with content in arrays.
Also beware that with "location" data in an array, only the "nearest" result is being considered here and not "all" of the array content. So other items in the array may not be actually "near" the queried point. That is more of a design consideration though as any query operation you do will need to consider this.
You can merge them with reduce:
locations = locations.reduce(function(prev, location){
previous = prev.concat(location.deal_details)
return previous
},[])

mongoose subdocument sorting

I have an article schema that has a subdocument comments which contains all the comments i got for this particular article.
What i want to do is select an article by id, populate its author field and also the author field in comments. Then sort the comments subdocument by date.
the article schema:
var articleSchema = new Schema({
title: { type: String, default: '', trim: true },
body: { type: String, default: '', trim: true },
author: { type: Schema.ObjectId, ref: 'User' },
comments: [{
body: { type: String, default: '' },
author: { type: Schema.ObjectId, ref: 'User' },
created_at: { type : Date, default : Date.now, get: getCreatedAtDate }
}],
tags: { type: [], get: getTags, set: setTags },
image: {
cdnUri: String,
files: []
},
created_at: { type : Date, default : Date.now, get: getCreatedAtDate }
});
static method on article schema: (i would love to sort the comments here, can i do that?)
load: function (id, cb) {
this.findOne({ _id: id })
.populate('author', 'email profile')
.populate('comments.author')
.exec(cb);
},
I have to sort it elsewhere:
exports.load = function (req, res, next, id) {
var User = require('../models/User');
Article.load(id, function (err, article) {
var sorted = article.toObject({ getters: true });
sorted.comments = _.sortBy(sorted.comments, 'created_at').reverse();
req.article = sorted;
next();
});
};
I call toObject to convert the document to javascript object, i can keep my getters / virtuals, but what about methods??
Anyways, i do the sorting logic on the plain object and done.
I am quite sure there is a lot better way of doing this, please let me know.
I could have written this out as a few things, but on consideration "getting the mongoose objects back" seems to be the main consideration.
So there are various things you "could" do. But since you are "populating references" into an Object and then wanting to alter the order of objects in an array there really is only one way to fix this once and for all.
Fix the data in order as you create it
If you want your "comments" array sorted by the date they are "created_at" this even breaks down into multiple possibilities:
It "should" have been added to in "insertion" order, so the "latest" is last as you note, but you can also "modify" this in recent ( past couple of years now ) versions of MongoDB with $position as a modifier to $push :
Article.update(
{ "_id": articleId },
{
"$push": { "comments": { "$each": [newComment], "$position": 0 } }
},
function(err,result) {
// other work in here
}
);
This "prepends" the array element to the existing array at the "first" (0) index so it is always at the front.
Failing using "positional" updates for logical reasons or just where you "want to be sure", then there has been around for an even "longer" time the $sort modifier to $push :
Article.update(
{ "_id": articleId },
{
"$push": {
"comments": {
"$each": [newComment],
"$sort": { "$created_at": -1 }
}
}
},
function(err,result) {
// other work in here
}
);
And that will "sort" on the property of the array elements documents that contains the specified value on each modification. You can even do:
Article.update(
{ },
{
"$push": {
"comments": {
"$each": [],
"$sort": { "$created_at": -1 }
}
}
},
{ "multi": true },
function(err,result) {
// other work in here
}
);
And that will sort every "comments" array in your entire collection by the specified field in one hit.
Other solutions are possible using either .aggregate() to sort the array and/or "re-casting" to mongoose objects after you have done that operation or after doing your own .sort() on the plain object.
Both of these really involve creating a separate model object and "schema" with the embedded items including the "referenced" information. So you could work upon those lines, but it seems to be unnecessary overhead when you could just sort the data to you "most needed" means in the first place.
The alternate is to make sure that fields like "virtuals" always "serialize" into an object format with .toObject() on call and just live with the fact that all the methods are gone now and work with the properties as presented.
The last is a "sane" approach, but if what you typically use is "created_at" order, then it makes much more sense to "store" your data that way with every operation so when you "retrieve" it, it stays in the order that you are going to use.
You could also use JavaScript's native Array sort method after you've retrieved and populated the results:
// Convert the mongoose doc into a 'vanilla' Array:
const articles = yourArticleDocs.toObject();
articles.comments.sort((a, b) => {
const aDate = new Date(a.updated_at);
const bDate = new Date(b.updated_at);
if (aDate < bDate) return -1;
if (aDate > bDate) return 1;
return 0;
});
As of the current release of MongoDB you must sort the array after database retrieval. But this is easy to do in one line using _.sortBy() from Lodash.
https://lodash.com/docs/4.17.15#sortBy
comments = _.sortBy(sorted.comments, 'created_at').reverse();

Mongoose Birthday query

I store the date of birth (dob) within a user model and I would like to check if it is the users birthday today. How do I do query that?
I have started to try, but obviously failed
// User Model
var UserSchema = new Schema({
name: String,
email: { type: String, lowercase: true },
role: {
type: String,
default: 'user'
},
hashedPassword: String,
dob: { type: Date, default: new Date() },
joinedOn: { type: Date, default: new Date() },
leftOn: { type: Date },
position: { type: String },
provider: String,
salt: String,
google: {},
github: {}
});
// Birtdhays
exports.birthdays = function(req, res, next) {
var todayStart = moment().startOf('day');
var todayEnd = moment().endOf('day');
User
.find()
.where('dob').gt(todayStart).lt(todayEnd)
.limit(3)
.sort('dob')
.select('name dob')
.exec(function(err, users){
if(err) return res.send(500, err);
res.json(200, users);
});
};
Presuming that you have birthdays stored as a date object in your documents then they probably look something like this:
{
"name": "Neil",
"dob": ISODate("1971-09-22T00:00:00Z")
}
So it's not just the "day" but the full year as well from the originally selected day of birth. This probably seems like a logical way to store a date of birth and it is a useful date object for many purposes. But how to query on that? Any date derived from the current year is not going to match that value within a range and other users data on the same day can occur in different years.
There is some JavaScript date manipulation you can do in order to deal with this though, and also some functionality of MongoDB in the aggregation framework ( or alternately using JavaScript in mapReduce ), to get data out of the date that is useful for matching.
Firstly you can look at the $dayOfyear operator and code to get that from the current date to use as a match:
var now = new Date();
var start = new Date( now.getFullYear() + "-01-01" );
var diff = now - start;
var oneDay = 1000 * 60 * 60 * 24;
var dayOfYear = Math.floor(diff / oneDay);
User.aggregate(
[
{ "$project": {
"name": 1,
"dob": 1,
"dayOfYear": { "$dayOfYear": "$dob" }
}},
{ "$match": { "dayOfYear": dayOfYear }
],
function(err,users) {
// work here
}
)
Now that's all fine, or it seems. But of course what happens when there is a leap year? All days after February 28 are moved forward one. You could account for this in other ways, but how about just using the "day" and "month" to do the match on instead:
var now = new Date();
var day = now.getDate(); // funny method name but that's what it is
var month = now.getMonth() + 1; // numbered 0-11
User.aggregate(
[
{ "$project": {
"name": 1,
"dob": 1,
"day": "$dayOfMonth",
"month": "$month"
}},
{ "$match": {
"day": day,
"month": month
}}
],
function(err,users) {
// work here
}
)
And the aggregation operators for $dayOfMonth and $month help there.
So that's all better and you can query for the "birthday" now, but there is something still not really right. Whilst the query will work, it's clearly not really efficient. Since what you are doing here is running through all of the results in the collection and manipulating the existing dates in order to extract the parts to perform a match.
Ideally you don't want to do this, and have the "query" itself target the current "birthdays" in a simple stroke to avoid doing this manipulation to get a match. This is where modelling comes in, and where you should consider adding more data to your document where queries like this are common:
{
"name": "Neil",
"dob": ISODate("1971-09-22T00:00:00Z"),
"day": 22,
"month": 9
}
Then it's easy to query on this as both "day" and "month" fields can also be indexed to further improve performance and avoid scanning the whole collection for matches:
var now = new Date();
var day = now.getDate(); // funny method name but that's what it is
var month = now.getMonth() + 1; // numbered 0-11
User.find({ "day": day, "month": month },function(err,users) {
// work here
})
So those are the considerations. Either accept the manipulation with the aggregation framework ( or possibly mapReduce ), or where you are going to frequently use such a query and/or have many items in the collection then add additional fields to your document schema that can be used as a data point directly in the query itself.
I solved it in the following way:
Added the fields birthday and brithmonth to the user model and added a pre save hook to keep them updated on any changes to the birthday:
UserSchema
.pre('save', function(next) {
this.birthmonth = this.dob.getMonth() + 1;
this.birthday = this.dob.getDate()
next();
});
Thanks Neil for the inspiration!

Resources