Mongo DB concurrency issue with findOne and updateOne - node.js

I am having an issue with concurrent requests that are updating the same document. I'm not using findAndModify() because I need to access the current state of the document to make the update which I don't see supported with findAndModify(). I also would like to avoid using db.fsyncLock() since that locks the entire database and I only need to lock one document in one collection.
First I use findOne() to get a document, then I use the updateOne() in the callback of findOne() to update the same document. When I queue up a bunch of actions and run them all at once I believe they are all accessing the same state when they call findOne() instead of waiting for the updateOne() to complete from the previous action.
How should I handle this?
mongoDBPromise.then((db)=> {
db.collection("notes").findOne(
{path: noteId},
(err, result)=> {
if (err) {
console.log(err);
return;
}
if (!result.UndoableNoteList.future.length) {
console.log("Nothing to redo");
return;
}
let past = result.UndoableNoteList.past.concat(Object.assign({},result.UndoableNoteList.present));
let present = Object.assign({},result.UndoableNoteList.future[0]);
let future = result.UndoableNoteList.future.slice(1, result.UndoableNoteList.future.length);
db.collection("notes").updateOne(
{path: noteId},
{
$set: {
UndoableNoteList: {
past: past,
present: present,
future:future
}
}
},
(err, result)=> {
if (err) {
console.log(err);
return;
}
}
)
}
);
});

As updateOne() is an async call, findOne() won't wait for it to complete and hence there can be situations where the same document is updated simultaneously, which won't be allowed in mongo.
I think updateOne() is not necessary in this case. Note that you have already found the right instance of the document which needs to be updated in findOne() query. Now, you can update that instance and save that document without doing updateOne(). I think the problem can be avoided this way:
mongoDBPromise.then((db)=> {
db.collection("notes").findOne(
{path: noteId},
(err, result)=> {
if (err) {
console.log(err);
return;
}
if (!result.UndoableNoteList.future.length) {
console.log("Nothing to redo");
return;
}
let past = result.UndoableNoteList.past.concat(Object.assign({},result.UndoableNoteList.present));
let present = Object.assign({},result.UndoableNoteList.future[0]);
let future = result.UndoableNoteList.future.slice(1, result.UndoableNoteList.future.length);
result.UndoableNoteList.past = past;
result.UndoableNoteList.present = present;
result.UndoableNoteList.future = future;
//save the document here and return
}
);
});
Hope this answer helps you!

I was not able to find a way to sequentially run the queries using purely mongodb functions. I've written some node.js logic that blocks mongodb queries from running on the same document and adds those queries to a queue. Here's what the code currently looks like.
The Websocket Undo Listener
module.exports = (noteId, wsHelper, noteWebSocket) => {
wsHelper.addMessageListener((msg, ws)=> {
if (msg.type === "UNDO") {
noteWebSocket.broadcast(msg, noteWebSocket.getOtherClientsInPath(noteId, wsHelper));
noteWebSocket.saveUndo(noteId);
}
});
};
The saveUndo function called from the listener
saveUndo(noteId) {
this.addToActionQueue(noteId, {payload: noteId, type: "UNDO"});
this.getNoteByIdAndProcessQueue(noteId);
}
The getNoteByIdAndProcessQueue function called from saveUndo
getNoteByIdAndProcessQueue(noteId) {
if (this.isProcessing[noteId])return;
this.isProcessing[noteId] = true;
mongoDBPromise.then((db)=> {
db.collection("notes").findOne(
{path: noteId},
(err, result)=> {
if (err) {
this.isProcessing[noteId] = false;
this.getNoteByIdAndProcessQueue(noteId);
return;
}
this.processQueueForNoteId(noteId, result.UndoableNoteList);
});
});
}
The processQueueForNoteId function
processQueueForNoteId(noteId, UndoableNoteList) {
this.actionQueue[noteId].forEach((action)=> {
if (action.type === "UNDO") {
UndoableNoteList = this.undoNoteAction(UndoableNoteList);
} else if (action.type === "REDO") {
UndoableNoteList = this.redoNoteAction(UndoableNoteList);
} else if (action.type === "ADD_NOTE") {
UndoableNoteList = this.addNoteAction(UndoableNoteList, action.payload);
} else if (action.type === "REMOVE_NOTE") {
UndoableNoteList = this.removeNoteAction(UndoableNoteList, action.payload);
}
});
let actionsBeingSaved = this.actionQueue[noteId].concat();
this.actionQueue[noteId] = [];
mongoDBPromise.then((db)=> {
db.collection("notes").updateOne(
{path: noteId},
{
$set: {
UndoableNoteList: UndoableNoteList
}
},
(err, result)=> {
this.isProcessing[noteId] = false;
// If the update failed then try again
if (err) {
console.log("update error")
this.actionQueue[noteId] = actionsBeingSaved.concat(this.actionQueue[noteId]);
}
// if action were queued during save then save again
if (this.actionQueue[noteId].length) {
this.getNoteByIdAndProcessQueue(noteId);
}
}
)
});
}

Related

Is there any easy way to copy collection data into another in MEAN app?

I'm currently implementing admin dashboard of online shopping app.I want to implement method to perform user deletion and store that deleted user data temporally on another collection.
(Copy_userdata->Save it on another collection -> delete original data)
As an example my users data currently available in collection called users, and after deleting that user particular user's data must be available in another collection, lets say deleted_users collection. Are there any easy way to do that? thanks!
You will be modify some of the code but this is the basic logic,
Use aggregation for copy collections over
Refer here for aggregate function using mongo client
So the function looks like this
public aggregation(collectionName: string, pipelines: Object[]): Promise<Array<any>>
{
return new Promise((resolve, reject) =>
{
let cursor: mongodb.AggregationCursor<any> = null;
//Here you will use getCollection method on your own to fetch the collection
this.getCollection(collectionName)
.then((collection: mongodb.Collection) =>
{
cursor = collection.aggregate(pipelines);
return cursor.toArray();
})
.then((result: Array<any>) =>
{
return resolve(result);
})
.catch((error: any) =>
{
//error//
});
}
public dropCollection(collectionName: string): Promise<any>
{
return new Promise((resolve, reject) =>
{
this.getCollection(collectionName)
.then((collection: mongodb.Collection) =>
{
collection.drop((err: Error, result: any) =>
{
if (err)
{
return reject(DataDropError);
}
return resolve(result);
});
})
.catch(reject);
});
}
public async backupAndDrop()
{
const Object = [ { $match: {} }, { $out: "DeletedCollection" } ];
try
{
await this.aggregationPipeline("originalCollection", Object);
await this.dropCollection("originalCollection");
}
catch (e)
{
throw e;
}
}
Also try to run this on your mongo shell:
db.originalCollection.aggregate([ { $match: {} }, { $out: "Backup" } ])
Why don't you add a flag like isDeleted which is false by default and then make it true when the user is deleted?
You can do something like this...
Client.connect(connection_string, function(err, db) {
if(err){
console.log(err);
}
else{
db.collection(CollectionA).find().forEach(function(d){ db.collection(CollectionB).insert(d); });
}
Try out if it works.
This can help too:
How to properly reuse connection to Mongodb across NodeJs application and modules
You can first find the record to be deleted and do a create with that data to the new collection and then delete the record.
db.collection(CollectionA).findOne({userIdTODelete}, function(err, res){
db.collection(CollectionB).insertOne(res, function() {
db.collection(CollectionA).deleteOne({userIdTODelete});
})
});

Mongoose : Unable to upsert data in for loop

I want to upsert document in the following way
for (var i = 0; i < req.body.app_events.length; i++ ) {
console.log(req.body.app_events[i].event_key);
//delete upsertData._id;
Appusers.update({app_key: req.body.app_key, e_key:req.body.app_events[i].event_key}, {
$set : {
app_key:req.body.app_key,
e_key: req.body.app_events[i].event_key,
e_name: req.body.app_events[i].event_name
}}, { upsert: true}, function(err, data){
if(err) return console.log(err);
console.log(data);
});
}
it is creating a single document with _id only. i want to insert document if it exist otherwise update on the basis of e_key and app_key.
You really should not be calling asynchronous functions inside a synchronous loop. What you need is something that repects the callback on completion of the loop cycle and will alert when the update is complete. This makes incrementing counters externally safe.
Use something like async.whilst for this:
var i = 0;
async.whilst(
function() { return i < req.body.app_events.length; },
function(callback) {
console.log(req.body.app_events[i].event_key);
//delete upsertData._id;
Appusers.findOneAndUpdate(
{ app_key: req.body.app_key, e_key:req.body.app_events[i].event_key},
{
$set : {
app_key:req.body.app_key,
e_key: req.body.app_events[i].event_key,
e_name: req.body.app_events[i].event_name
}
},
{ upsert: true},
function(err,data) {
if (err) callback(err);
console.log(data);
i++;
callback();
}
);
},
function(err) {
if (err)
console.log(err);
else
// done;
}
);
Now the loop is wrapped with a "callback" which is called in itself within the callback to the update method. Also if you expect a "document" back then you should be using .findOneAndUpdate() as .update() just modifies the content and returns the number affected.
When the loop is complete or when an error is passed to the callback, then handling is moved to the last function block, where you complete your call or call other callbacks as required.
Better than above. Dig into the native driver methods for Bulk operations. You need to be careful that you have an open connection to the database already established. If unsure about this, then try to always wrap application logic in:
mongoose.connection('once',function(err) {
// app logic here
});
Which makes sure the connections have been made. The mongoose methods themselves "hide" this away, but the native driver methods have no knowledge.
But this is the fastest possible listing to update the data:
var i = 0;
var bulk = Appusers.collection.initializeOrderedBulkOp();
async.whilst(
function() { return i < req.body.app_events.length; },
function(callback) {
console.log(req.body.app_events[i].event_key);
bulk.find(
{ app_key: req.body.app_key, e_key:req.body.app_events[i].event_key},
).upsert().updateOne({
$set : {
app_key:req.body.app_key,
e_key: req.body.app_events[i].event_key,
e_name: req.body.app_events[i].event_name
}
});
i++;
if ( i % 1000 == 0) {
bulk.execute(function(err,response) {
if (err) callback(err);
console.log(response);
bulk = Appusers.collection.initializeOrderedBulkOp();
callback();
})
} else {
callback();
}
},
function(err) {
if (err)
console.log(err);
else {
if ( i % 1000 != 0 )
bulk.execute(function(err,response) {
if (err) console.log(err)
console.log(response);
// done
});
else
// done
}
}
);
The Bulk methods build up "batches" of results ( in this case 1000 at a time ) and send all to the server in one request with one response ( per batch ). This is a lot more efficient than contacting the database once per every write.

How to make transactions with sails.js/waterline?

I am trying to put my queries into transaction and I am failing in runtime. Error I am getting is :
Object #<bound> has no method 'transaction'
I tried to follow this "documentation".
In short my model looks like that :
updateOrCreate: function (profile_id, positive,negative) {
var deferred = Q.defer();
Reputation.transaction().findOne().where({profile: profile_id}).then(function (rep) {
if (rep) {
// Reputation logic
rep.save(function (err) {deferred.resolve();});
} else {
// Reputation does not exist. Create.
Reputation.create({profile: profile_id, positive: positive,negative:negative}).exec(function (e, rep) {
deferred.resolve();});
}
}).fail(function (err) {deferred.reject()});
return deferred.promise;
}
any ideas what did I do wrong?
thanks.
w.
This is now supported in sails v1 (not official release yet at June 26, 2017).
You can follow this link for documentation in next.sailsjs.com:
Datastore.transaction()
In doc above is the following example:
sails.getDatastore()
.transaction(function (db, proceed) {
BankAccount.findOne({ owner: req.session.userId }).usingConnection(db)
.exec(function (err, myAccount) {
if (err) { return proceed(err); }
if (!myAccount) { return proceed(new Error('Consistency violation: Database is corrupted-- logged in user record has gone missing')); }
BankAccount.findOne({ owner: req.param('recipientId') }).usingConnection(db)
.exec(function (err, recipientAccount) {
if (err) { return proceed(err); }
if (!recipientAccount) {
err = new Error('There is no recipient with that id');
err.code = 'E_NO_SUCH_RECIPIENT';
return proceed(err);
}
// Do the math to subtract from the logged-in user's account balance,
// and add to the recipient's bank account balance.
var myNewBalance = myAccount.balance - req.param('amount');
// If this would put the logged-in user's account balance below zero,
// then abort. (The transaction will be rolled back automatically.)
if (myNewBalance < 0) {
err = new Error('Insufficient funds');
err.code = 'E_INSUFFICIENT_FUNDS';
return proceed(err);
}
// Update the current user's bank account
BankAccount.update({ owner: req.session.userId })
.set({
balance: myNewBalance
})
.usingConnection(db)
.exec(function (err) {
if (err) { return proceed(err); }
// Update the recipient's bank account
BankAccount.update({ owner: req.param('recipientId') })
.set({
balance: recipientAccount.balance + req.param('amount')
})
.usingConnection(db)
.exec(function (err) {
if (err) { return proceed(err); }
return proceed();
});
});
});
});
}).exec(function(err){
// At this point, we know that, if our code above passed through
// an error to `proceed`, Sails took care of rolling back the
// transaction. Otherwise, it committed it to the database.
if (err && err.code === 'E_INSUFFICIENT_FUNDS') {
return res.badRequest(err);
}
else if (err && err.code === 'E_NO_SUCH_RECIPIENT') {
return res.notFound();
}
else if (err) {
return res.serverError(err);
}
// All done!
return res.ok();
});
The "documentation" you're following is a proposal for how transaction support could be added to Sails. There is no native transaction support in Sails. See this answer for an example of how to use the .query method for the MySQL or Postgres adapters to perform transactions.
Seems they don't support this. You could use something like:
https://github.com/Shyp/pg-transactions
https://github.com/postmanlabs/sails-mysql-transactions

Iterating over a mongodb cursor serially (waiting for callbacks before moving to next document)

Using mongoskin, I can do a query like this, which will return a cursor:
myCollection.find({}, function(err, resultCursor) {
resultCursor.each(function(err, result) {
}
}
However, I'd like to call some async functions for each document, and only move on to the next item on the cursor after this has called back (similar to the eachSeries structure in the async.js module). E.g:
myCollection.find({}, function(err, resultCursor) {
resultCursor.each(function(err, result) {
externalAsyncFunction(result, function(err) {
//externalAsyncFunction completed - now want to move to next doc
});
}
}
How could I do this?
Thanks
UPDATE:
I don't wan't to use toArray() as this is a large batch operation, and the results might not fit in memory in one go.
A more modern approach that uses async/await:
const cursor = db.collection("foo").find({});
while(await cursor.hasNext()) {
const doc = await cursor.next();
// process doc here
}
Notes:
This may be even more simple to do when async iterators arrive.
You'll probably want to add try/catch for error checking.
The containing function should be async or the code should be wrapped in (async function() { ... })() since it uses await.
If you want, add await new Promise(resolve => setTimeout(resolve, 1000)); (pause for 1 second) at the end of the while loop to show that it does process docs one after the other.
If you don't want to load all of the results into memory using toArray, you can iterate using the cursor with something like the following.
myCollection.find({}, function(err, resultCursor) {
function processItem(err, item) {
if(item === null) {
return; // All done!
}
externalAsyncFunction(item, function(err) {
resultCursor.nextObject(processItem);
});
}
resultCursor.nextObject(processItem);
}
since node.js v10.3 you can use async iterator
const cursor = db.collection('foo').find({});
for await (const doc of cursor) {
// do your thing
// you can even use `await myAsyncOperation()` here
}
Jake Archibald wrote a great blog post about async iterators, that I came to know after reading #user993683's answer.
This works with large dataset by using setImmediate:
var cursor = collection.find({filter...}).cursor();
cursor.nextObject(function fn(err, item) {
if (err || !item) return;
setImmediate(fnAction, item, arg1, arg2, function() {
cursor.nextObject(fn);
});
});
function fnAction(item, arg1, arg2, callback) {
// Here you can do whatever you want to do with your item.
return callback();
}
If someone is looking for a Promise way of doing this (as opposed to using callbacks of nextObject), here it is. I am using Node v4.2.2 and mongo driver v2.1.7. This is kind of an asyncSeries version of Cursor.forEach():
function forEachSeries(cursor, iterator) {
return new Promise(function(resolve, reject) {
var count = 0;
function processDoc(doc) {
if (doc != null) {
count++;
return iterator(doc).then(function() {
return cursor.next().then(processDoc);
});
} else {
resolve(count);
}
}
cursor.next().then(processDoc);
});
}
To use this, pass the cursor and an iterator that operates on each document asynchronously (like you would for Cursor.forEach). The iterator needs to return a promise, like most mongodb native driver functions do.
Say, you want to update all documents in the collection test. This is how you would do it:
var theDb;
MongoClient.connect(dbUrl).then(function(db) {
theDb = db; // save it, we'll need to close the connection when done.
var cur = db.collection('test').find();
return forEachSeries(cur, function(doc) { // this is the iterator
return db.collection('test').updateOne(
{_id: doc._id},
{$set: {updated: true}} // or whatever else you need to change
);
// updateOne returns a promise, if not supplied a callback. Just return it.
});
})
.then(function(count) {
console.log("All Done. Processed", count, "records");
theDb.close();
})
You can do something like this using the async lib. The key point here is to check if the current doc is null. If it is, it means you are finished.
async.series([
function (cb) {
cursor.each(function (err, doc) {
if (err) {
cb(err);
} else if (doc === null) {
cb();
} else {
console.log(doc);
array.push(doc);
}
});
}
], function (err) {
callback(err, array);
});
You could use a Future:
myCollection.find({}, function(err, resultCursor) {
resultCursor.count(Meteor.bindEnvironment(function(err,count){
for(var i=0;i<count;i++)
{
var itemFuture=new Future();
resultCursor.nextObject(function(err,item)){
itemFuture.result(item);
}
var item=itemFuture.wait();
//do what you want with the item,
//and continue with the loop if so
}
}));
});
You can get the result in an Array and iterate using a recursive function, something like this.
myCollection.find({}).toArray(function (err, items) {
var count = items.length;
var fn = function () {
externalAsyncFuntion(items[count], function () {
count -= 1;
if (count) fn();
})
}
fn();
});
Edit:
This is only applicable for small datasets, for larger one's you should use cursors as mentioned in other answers.
A more modern approach that uses for await:
const cursor = db.collection("foo").find({});
for await(const doc of cursor) {
// process doc here with await
await processDoc(doc);
}
You could use simple setTimeOut's. This is an example in typescript running on nodejs (I am using promises via the 'when' module but it can be done without them as well):
import mongodb = require("mongodb");
var dbServer = new mongodb.Server('localhost', 27017, {auto_reconnect: true}, {});
var db = new mongodb.Db('myDb', dbServer);
var util = require('util');
var when = require('when'); //npm install when
var dbDefer = when.defer();
db.open(function() {
console.log('db opened...');
dbDefer.resolve(db);
});
dbDefer.promise.then(function(db : mongodb.Db){
db.collection('myCollection', function (error, dataCol){
if(error) {
console.error(error); return;
}
var doneReading = when.defer();
var processOneRecordAsync = function(record) : When.Promise{
var result = when.defer();
setTimeout (function() {
//simulate a variable-length operation
console.log(util.inspect(record));
result.resolve('record processed');
}, Math.random()*5);
return result.promise;
}
var runCursor = function (cursor : MongoCursor){
cursor.next(function(error : any, record : any){
if (error){
console.log('an error occurred: ' + error);
return;
}
if (record){
processOneRecordAsync(record).then(function(r){
setTimeout(function() {runCursor(cursor)}, 1);
});
}
else{
//cursor up
doneReading.resolve('done reading data.');
}
});
}
dataCol.find({}, function(error, cursor : MongoCursor){
if (!error)
{
setTimeout(function() {runCursor(cursor)}, 1);
}
});
doneReading.promise.then(function(message : string){
//message='done reading data'
console.log(message);
});
});
});

nested loops asynchronously in Node.js, next loop must start only after one gets completed

Check below algorithm...
users = getAllUsers();
for(i=0;i<users.length;i++)
{
contacts = getContactsOfUser(users[i].userId);
contactslength = contacts.length;
for(j=o;j<contactsLength;j++)
{
phones = getPhonesOfContacts(contacts[j].contactId);
contacts[j].phones = phones;
}
users[i].contacts = contacts;
}
return users;
I want to develop such same logic using node.js.
I have tried using async with foreach and concat and foreachseries functions. But all fail in the second level.
While pointer is getting contacts of one user, a value of i increases and the process is getting started for next users.
It is not waiting for the process of getting contacts & phones to complete for one user. and only after that starting the next user. I want to achieve this.
Actually, I want to get the users to object with proper
Means all the sequences are getting ruined, can anyone give me general idea how can I achieve such a series process. I am open to change my algorithm also.
In node.js you need to use asynchronous way. Your code should look something like:
var processUsesrs = function(callback) {
getAllUsers(function(err, users) {
async.forEach(users, function(user, callback) {
getContactsOfUser(users.userId, function(err, contacts) {
async.forEach(contacts, function(contact, callback) {
getPhonesOfContacts(contacts.contactId, function(err, phones) {
contact.phones = phones;
callback();
});
}, function(err) {
// All contacts are processed
user.contacts = contacts;
callback();
});
});
}, function(err) {
// All users are processed
// Here the finished result
callback(undefined, users);
});
});
};
processUsers(function(err, users) {
// users here
});
You could try this method without using async:
function getAllUserContacts(users, callback){
var index = 0;
var results = [];
var getUserContacts = function(){
getContactsOfUser(users[index].userId, function(contacts){
var index2 = 0;
var getContactsPhones = function(){
getPhonesOfContacts(contacts[index2].contactId, function(phones){
contacts[index2].phones = phones;
if(index2 === (contacts.length - 1)){
users[index].contacts = contacts;
if(index === (users.length - 1)){
callback(users)
} else {
index++;
getUserContacts();
}
}else{
index2++;
getContactsPhones();
}
});
}
getContactsPhones();
});
}
getUserContacts();
}
//calling the function
getAllUsers(function(users){
getAllUsersWithTheirContacts(users, function(usersWithContacts){
console.log(usersWithContacts);
})
})
//Asynchronous nested loop
async.eachSeries(allContact,function(item, cb){
async.eachSeries(item,function(secondItem,secondCb){
console.log(secondItem);
return secondCb();
}
return cb();
},function(){
console.log('after all process message');
});

Resources