I am using mongoose watch on a particular model as shown below:
const mongoose = require("mongoose");
const ScheduledEventSchema = new mongoose.Schema(
{
_id: mongoose.Schema.Types.ObjectId,
data: Object,
expireAt: { type: Date, expires: 0 }
},
{ timestamps: true }
);
ScheduledEventSchema.index({ "expireAt": 1 }, { expireAfterSeconds: 0 })
const ScheduledEvent = mongoose.model('ScheduledEvent', ScheduledEventSchema);
ScheduledEvent.watch().on('change', data => {
let { operationType, documentKey } = data;
if (operationType === "delete") {
//Do something
}
});
module.exports = ScheduledEvent;
The above model is part of a service which is hosted on Google Cloud Run.
Now, Google Cloud Run services don't run all the time, they only run when invoked by some trigger.
So, assuming the service using the above model is currently not running and MongoDb sends a document deleted event, will the service pick up that deleted event the next time it is started or will that event be lost permanently?
Related
I have a NodeJS server that has the following Mongoose Schema:
const mongoose = require("mongoose");
const EventSchema = new mongoose.Schema(
{
_id: mongoose.Schema.Types.ObjectId,
type: String,
time: Date,
expireAt: { type: Date, expires: 0 }
},
{ timestamps: true }
);
EventSchema.index({ "expireAt": 1 }, { expireAfterSeconds: 0 })
const Event = mongoose.model('Event', EventSchema);
Event.watch().on('change', data => { //I want only 1 instance of my server to watch this
let { operationType } = data;
if (operationType === "delete") {
//Perform an operation based on deletion only once
}
});
module.exports = Event;
The server is hosted on heroku. Now, when I scale the number of dynos for the server to 2. All 2 instances of the server are now listening for changes to the document as achieved using the Event.watch function.
How can I make sure that irrespective of how many dynos running in production, only 1 instance can listen for changes made to the document.
Thanks.
Considering the code below, how can a transaction be implemented in order to ensure that someParentDocument doesn't get deleted and any operations performed inside the hooks are rolledback, when an error is thrown in any of the hooks?
const parentSchema = new mongoose.Schema({
name: String,
children: [{ type: mongoose.Schema.Types.ObjectId, ref: "Child" }],
});
const childSchema = new mongoose.Schema({
name: String,
parent: { type: mongoose.Schema.Types.ObjectId, ref: "Parent" },
});
parentSchema.pre("remove", async function(next){
// Add and remove documents to Parent and Child...
//...
next();
});
parentSchema.post("remove", async function(parent){
throw new Error("Exception!");
});
// (...)
try {
await someParentDocument.delete(); // <-- will trigger the hooks above
} catch {}
TLDR; Mongoose middleware was not designed for this.
This method of inserting transactions is actually patching the middleware functionality, and you are essentially creating an api completely separate from the mongoose middleware.
What would be better is inverting the logic for your remove query in a separate function.
Simple & Intended Solution
Allow a transaction handling method to do its magic, and create a separate remove method for your parent model. Mongoose wraps mongodb.ClientSession.prototype.withTransaction with mongoose.Connection.prototype.transaction and we don't even have to instantiate or manage a session! Look at the different between the length of this and that below. And you save the mental headache of remembering the internals of that middleware at the cost of one separate function.
const parentSchema = new mongoose.Schema({
name: String,
children: [{ type: mongoose.Schema.Types.ObjectId, ref: "Child" }],
});
const childSchema = new mongoose.Schema({
name: String,
parent: { type: mongoose.Schema.Types.ObjectId, ref: "Parent" },
});
// Assume `parent` is a parent document here
async function fullRemoveParent(parent) {
// The document's connection
const db = parent.db;
// This handles everything with the transaction for us, including retries
// session, commits, aborts, etc.
await db.transaction(async function (session) {
// Make sure to associate all actions with the session
await parent.remove({ session });
await db
.model("Child")
.deleteMany({ _id: { $in: parent.children } })
.session(session);
});
// And done!
}
Small Extension
Another way to make this easy, is to register a middleware which simply inherits a session iff_ the query has one registered. Maybe throw an error if a transaction has not been started.
const parentSchema = new mongoose.Schema({
name: String,
children: [{ type: mongoose.Schema.Types.ObjectId, ref: "Child" }],
});
const childSchema = new mongoose.Schema({
name: String,
parent: { type: mongoose.Schema.Types.ObjectId, ref: "Parent" },
});
parentSchema.pre("remove", async function () {
// Look how easy!! Just make sure to pass a transactional
// session to the removal
await this.db
.model("Child")
.deleteMany({ _id: { $in: parent.children } })
.session(this.$session());
// // If you want to: throw an error/warning if you forgot to add a session
// // and transaction
// if(!this.$session() || !this.$session().inTransaction()) {
// throw new Error("HEY YOU FORGOT A TRANSACTION.");
// }
});
// Assume `parent` is a parent document here
async function fullRemoveParent(parent) {
db.transaction(async function(session) {
await parent.remove({ session });
});
}
Risky & Complex Solution
This works, and is totally, horribly complex. Not recommended. Will likely break some day because it relies on intricacies of the mongoose API. I don't know why I coded this, please don't include it in your projects.
import mongoose from "mongoose";
import mongodb from "mongodb";
const parentSchema = new mongoose.Schema({
name: String,
children: [{ type: mongoose.Schema.Types.ObjectId, ref: "Child" }],
});
const childSchema = new mongoose.Schema({
name: String,
parent: { type: mongoose.Schema.Types.ObjectId, ref: "Parent" },
});
// Choose a transaction timeout
const TRANSACTION_TIMEOUT = 120000; // milliseconds
// No need for next() callback if using an async function.
parentSchema.pre("remove", async function () {
// `this` refers to the document, not the query
let session = this.$session();
// Check if this op is already part of a session, and start one if not.
if (!session) {
// `this.db` refers to the documents's connection.
session = await this.db.startSession();
// Set the document's associated session.
this.$session(session);
// Note if you created the session, so post can clean it up.
this.$locals.localSession = true;
//
}
// Check if already in transaction.
if (!session.inTransaction()) {
await session.startTransaction();
// Note if you created transaction.
this.$locals.localTransaction = true;
// If you want a timeout
this.$locals.startTime = new Date();
}
// Let's assume that we need to remove all parent references in the
// children. (just add session-associated ops to extend this)
await this.db
.model("Child") // Child model of this connection
.updateMany(
{ _id: { $in: this.children } },
{ $unset: { parent: true } }
)
.session(session);
});
parentSchema.post("remove", async function (parent) {
if (this.$locals.localTransaction) {
// Here, there may be an error when we commit, so we need to check if it
// is a 'retryable' error, then retry if so.
try {
await this.$session().commitTransaction();
} catch (err) {
if (
err instanceof mongodb.MongoError &&
err.hasErrorLabel("TransientTransactionError") &&
new Date() - this.$locals.startTime < TRANSACTION_TIMEOUT
) {
await parent.remove({ session: this.$session() });
} else {
throw err;
}
}
}
if (this.$locals.localSession) {
await this.$session().endSession();
this.$session(null);
}
});
// Specific error handling middleware if its really time to abort (clean up
// the injections)
parentSchema.post("remove", async function (err, doc, next) {
if (this.$locals.localTransaction) {
await this.$session().abortTransaction();
}
if (this.$locals.localSession) {
await this.$session().endSession();
this.$session(null);
}
next(err);
});
mongodb client version: 3.1.10
I am using hosted mongo db server on Mlab.com: Shared Production-Ready cluster.
var dbo = db.db(DB);
var collection = dbo.collection('schedule');
var query = {email:userEmail};
let availability = (availabilityArg==="offQ")?"no":"yes";
let note = (availabilityArg==="offQ")?"Off Queue":"";
var data = {available:availability,notes:note};
collection.update(query,{$set:data},function(err,data){
if(err)
console.log("DB find error.");
else{
console.log(userEmail+((availabilityArg==="offQ")?" off the queue":" back in queue"));
}
});
The above works 95% of the time. 5% of the time, the DB does not get updated at all.
95%: The DB get's updated based on the availabilityArg. If it is offQ, the available attribute will be set to no. If it is onQ, the available attribute will be set to yes. The notes attribute also gets updated accordingly.
5%: The DB does not get updated at all. There's no change to the available attribute and the notes attribute. Though I see the console.log statement with the email ID and the off the queue/back in queue message.
It just doesn't make sense.
PS:
function(db,userEmail, availabilityArg)
I took your code and re-imagined it as follows, I hope it helps.
/*
I am not sure what is going on behind here
so I'll just comment it out, and show you
an implementation that works, which you can
repurpose.
*/
// var dbo = db.db(DB);
// var collection = dbo.collection('schedule');
/*
NOTE: Below is the an example model for handling data using mongoose,
This should be in a separate file which you would import
*/
const mongoose = require('mongoose');
const schema = new mongoose.Schema({
email: {
required: true,
type: String,
require: true
},
availability: {
required: true,
type: String,
default: ''
},
notes: {
required: true,
type: String,
default: ''
}
});
// If in the same file (not advisable)
const Schedule = mongoose.model('Schedule', schema, 'schedule_collection');
// Or export (advisable)
module.exports = mongoose.model('Schedule', schema, 'schedule_collection');
// NOTE: Below is a re-imagined version of your code
const findQuery = { email: userEmail };
const availability = availabilityArg === 'offQ' ? 'no' : 'yes';
const notes = availabilityArg === 'offQ' ? 'Off Queue' : '';
// NOTE: For updating the document
const updateQuery = {
availability: availability,
notes: notes
};
// NOTE: Using findOneAndUpdate of the Model
Schedule.findOneAndUpdate(findQuery, updateQuery, (err, _) => {
if (err)
console.log("DB error.");
else {
console.log(userEmail + ((availabilityArg === "offQ") ? " off the queue" : " back in queue"));
}
});
I am facing memory issues with my node app. Took some heapdumps and saw a lot of mongo objects being held in the memory which is causing the node app to run out of memory.
I have the following setup for my app.
MongoDB 3.4.13
Mongoose 4.11.10 (tried 4.13.11 and 5.0.7 also)
Node 8.9.4
config.js
const clientUID = require('./env').clientUID;
module.exports = {
// Secret key for JWT signing and encryption
secret: 'mysecret',
// Database connection information
database: `mongodb://localhost:27017/app_${clientUID}`,
// Setting port for server
port: process.env.PORT || 3000,
}
I have several models in the app. Every model is defined in the following manner (just listing one of the models here):
models/card.js
const mongoose = require('mongoose');
const Schema = mongoose.Schema;
const CardSchema = new Schema({
name: {
type: String,
unique: true,
required: true
},
macId: {
type: String,
unique: true,
required: true
},
cardTypeId: {
type: mongoose.Schema.Types.ObjectId,
ref: 'CardType',
required: true
},
},
{
timestamps: true
});
module.exports = mongoose.model('Card', CardSchema);
In the app I require the model and perform some actions as follows:
const Card = require('./models/card');
...require other models
const config = require('./config');
mongoose.connect(config.database);
function fetchCardByMacId(macId) {
return Card.findOne({ macId }).lean().exec();
}
function updateTrackerByMacId(macId, x, y, nodeId) {
const data = {x, y, lastNodeId: nodeId};
fetchCardByMacId(macId)
.then(card => {
Tracker.findOneAndUpdate({ cardId: card._id }, data, { upsert: true, new: true }).exec((error, tracker) => {
if (error) {
return console.log('update tracker error', error);
}
TrackerHistory.findOne({ trackerId: tracker._id }).exec((err, trackerHistory) => {
if (err) {
return console.log('fetch trackerHistory error', err);
}
if (trackerHistory) {
trackerHistory.trackers.push({ x, y, timestamp: moment().format(), nodeId });
TrackerHistory.findOneAndUpdate({_id: trackerHistory._id},trackerHistory,(er, trackerHis) => {
if (er) {
return console.log('trackerHistory change update error', er);
}
})
} else {
const trackerHistoryNew = new TrackerHistory({
trackerId: tracker._id,
trackers: [{ x, y, timestamp: moment().format(), nodeId }]
});
trackerHistoryNew.save((er, trackerHis) => {
if (er) {
return console.log('trackerHistory create error', er);
}
});
}
});
});
}).catch(error => {
console.log('updateTrackerByMacId error', error);
});
}
Like this there are many other functions that read and update data.
Every 5 seconds I get new data that needs to be inserted into the db (not more than few 100kbs) and some of the old db data also gets updated based on this new data (seems like fairly straight forward db ops...read, manipulate and update back).
From the index.js I spawn 2 child processes that take the load of processing this new data and updating the db based on the business logic. When new data is received in the index.js using event listeners, I send it to child process 1 to insert/update the db. child process 2 runs on a 10s timer to read this updated data and then do some further updates to the db.
Running this on my local macbook pro is no issue (logging heap memory being used never goes above 40-50mb). When i load it on a DO Ubuntu 16.04 server (4GB /2 CPUs) I am facing memory issues. The child processes are exiting after hitting the memory threshold for the process (~1.5gb) which seems very odd to me.
I also tried to do this using docker containers and see the same results. on the mac it runs without issues but on the server it is eating up memory.
Generating heapdumps shows a lot of mongo objects in the heap.
I would like some help in understanding what I am doing wrong here and what is the issue with mongo eating up this much memory on the server.
So there was a big issue with the way the TrackerHistory collection was modelled. TrackerHistory had an array and every time a new object had to be added to the array the whole TrackerHistory object was being loaded in the memory and at the given frequency of updating the real time data the memory was bloating up faster than it was being gc'd.
Fixed it by removing the trackers array in a new collection and adding a foreign key reference to the TrackerHistory.
reference article that helped me identify this issue.
https://www.mongodb.com/blog/post/6-rules-of-thumb-for-mongodb-schema-design-part-1
I would like to know the best approach to solve the current scenario.
I've got a node API which uses mongoose and bluebird. And some Android clients will post "movement" entities to it.
(Question at the end).
Let's say movement-model.js exports the Schema, and looks like this:
"use strict";
const mongoose = require('mongoose');
const _movementSchema = {
movementId: { type: Number, requried: true },
relMovementId: Number,
_party: { type: mongoose.Schema.Types.ObjectId, required: true, ref: 'Party' }
}
module.exports = mongoose.Schema(_movementSchema, {collection: 'movement'});
And related exported Schema on party-model.js is as follows:
"use strict";
const mongoose = require('mongoose');
const _partySchema = {
active: { type: Boolean, default: true },
name: { type: String, trim: true, required: true },
logo: { type: Buffer },
coordenates: { lat: Number, long: Number },
startOn: { type: Date, required: true },
endOn: { type: Date, required: true }
}
module.exports = mongoose.Schema(_partySchema, {collection: 'party'});
Android client would send the JSON with ObjectId and not full populated object. So when the POST comes, I'm using it directly (i.e: let _movement = req.body;) and on the movement-dao.js I've got the createNew method and I'm exporting the Model:
"use strict";
const mongoose = require('mongoose');
const Promise = require('bluebird');
mongoose.Promise = Promise;
const movementSchema = require('../model/movement-model');
movementSchema.statics.createNew = (movement) => {
return new Promise((resolve, reject) => {
if (!_.isObject(movement)) {
return reject(new TypeError('Movement is not a valid object.'));
}
let _something = new Movement(movement);
_something.save((err, saved) => {
err ? reject(err)
: resolve(saved);
});
});
}
const Movement = mongoose.model('Movement', movementSchema);
module.exports = Movement;
What I want to accomplish is to: save the movement collection with the _party as the full party document is at the moment of the save, I mean an embedded document of a copy of the Party document, which will not be affected by the updates done to the Party document in the future.
While I cannot change the Android Client, so I will still be getting only the ObjectId from it.
JSON example of what Android client will post: {"movementId":1, "relMovementId":4138, "_party":"58dbfe26194cfc5a9ec9b7c5"}
I'm confused now, and not sure if due to the way Android is posting the JSON, I need two schemas; one for the object received (i.e: with ObjectId and ref to Party) and a second one for the object persisted (i.e: with the schema referenced _party: Party.Schema) or if I could do something simpler as some populate prior to save... or what.
For the sake of closing this up:
I've implemented one of the approaches I had in mind while writing the question. Movement schema changed so that: _party: Party.Schema
When I get a POST to create a new movement I do a getById and use the result of that exec to populate the value as an embedded doc.