I have some problem with transactions in ArangoDB+nodejs. I need to do something like this:
but when the second insert failed, the first one didn't rollback!
please help me with an example!
here is my code:
var transaction = function (collections,params,callback)
function ()
var insertedDataId;
var relationsArrayIds=[];
console.log("ERROR: Arango--insert-->err: %j", err);
//throw "Error: "+err;
return false;
var relations=params.relations;
for(var i=0;i<relations.length;i++)
console.log("ERROR: Arango--insert.edge-->err:23232 %j", err);
console.log("after return");
return false
console.log("transaction before true");
function next(result)
console.log("transaction is ok:",result);
console.log("transaction is not OK:",result);
console.log("catch->error in -->Arango.transaction: ",e);
first of all there seems to be a misunderstanding in how to write the action that is supposed to be executed. This action is executed directly on the Database Server , hence you cant use any functionality provided by the Arango Javascript api.
If you want to design your action it has to run in the arango shell or on the server console (bin/arangod data --console)
I took a look into your code and assume you want to store relations between users and merchants. As Arango comes with a nice graph module you could follow the following approach :
// First we define a graph, containing of 2 document collections ("users" and "merchants") and 2 edge collections (one per relation type, in this example "contactRequested" and "boughtSomethingFrom".
// Note that in this definition the relation "boughtSomethingFrom" is only allowed from a user to a merchant. Of course this is just one way to design it, you have to do it the way it suits you the best.
var edgeDefinitions = [{
collection: "contactRequested",
from: ["users", "merchants"],
to: ["users", "merchants"]
}, {
collection: "boughtSomethingFrom",
from: ["users"],
to: ["merchants"]
// Now we create a graph called "user_merchant_graph" and in the callback function execute a transaction
db.graph.create("user_merchant_graph", edgeDefinitions, function(err, ret, message) {
// Lets define the action for the transaction, again this will be executed directly on the server ......
var action = function (params) {
// We have to require the database module ....
var db = require("internal").db;
var relationsArrayIds = [];
// now we store the user provided to the function
var insertedUserId = db["users"].insert(params.data)._id;
var relations = params.relations;
// Now we loop over through the relations object, store each merchant and it's relations to the user
Object.keys(relations).forEach(function (relation) {
// store merchant
var insertedMerchantId = db["merchants"].insert({merchantName : relation})._id;
// store relation as edge from "insertedUserId" to "insertedMerchantId".
var edgeId = db[relations[relation].relation].insert(insertedUserId, insertedMerchantId, relations[relation].additionalData)._id;
// End of action
var options = {};
options.params = {
data: {
userName : "someUserName",
userSurname : "someUserSurname"
relations : {
merchantA : {relation : "contactRequested", additionalData : {data :"someData"}},
merchantB : {relation : "boughtSomethingFrom", additionalData : {data :"someData"}},
merchantC : {relation : "contactRequested", additionalData : {data :"someData"}}
// Now we call the transaction module ... a note to the collections parameter, it has to be an object containing the keys "write" and "read" which have a list of all collections as value into which the action is writing /reading from
// This collections object is NOT available within your action, the only thing passed as argument to your action is "options.params" !!
db.transaction.submit({write : ["users", "merchants", "contactRequested", "boughtSomethingFrom"]}, action, options, function(err, ret, message) {
//some callback
With regards to transactions they are working, you can give this code a shot and if you f.e. mess up the storing of the edges (change it to "var edgeId = db[relations[relation].relation].insert(relations[relation].additionalData)._id;")
you will see that your user and merchant have not been stored
I hope this helps
I fetched the records using
query. In that records, there is relation id of other document(table). I want to get records of relation table of each record of above find query result.
db.collection('serviceBooking').find({'request_to_sp_user_id': docs._id.toString()}).toArray(function (err, serviceBookingDocs) {
if (serviceBookingDocs.length) {
var asyncCalls = [];
serviceBookingDocs.forEach(function (bookingRecord, key) {
var temp = {};
temp.userDetails = {};
//Async call for getting the user details for all users
asyncCalls.push(function (callback) {
db.collection('userDetails').findOne({'user_id': new mongo.ObjectID(bookingRecord.booked_by_user_id)}, function (err, userDetailsDocs) {
db.collection('serviceBookingDetails').find({'serviceBookingId': bookingRecord._id.toString()}).toArray(function (err, bookingDetailsDocs) {
if (userDetailsDocs) {
if (bookingDetailsDocs.length) {
temp.bookingDetails = bookingDetailsDocs;
bookingDetailsDocs.forEach(function (bookDetailItems, key) {
db.collection('serviceCatalog').findOne({'_id': new mongo.ObjectID(bookDetailItems.catalogId), isDeleted: 0}, function (err, spCatalogs) {
db.collection('spServiceCatalog').findOne({'_id': new mongo.ObjectID(spCatalogs.serviceCategory)}, function (err, spServiceCatalogDocs) {
if (spCatalogs) {
spCatalogs.catalogName = spServiceCatalogDocs.name;
temp.bookingDetails[key].serviceCatalgs = spCatalogs;
} else {
spCatalogs.catalogName = null;
temp.bookingDetails[key].serviceCatalgs = spCatalogs;
callback(null, temp);
} else {
callback(null, null);
I tried with callback function but it not get the values of category name from mainCategory document.
I also tried to get the internal fetched category name outside the forEach() but its not getting in result in temp array.
This may help you.
It says..
Functions are the only thing on javascript that "enclose" scope.
This means that the variable items in your inner callback function are not accessible on the outer scope.
You can define a variable in the outer scope so it will be visible to all the inner ones:
I have a huge collection of documents in my DB and I'm wondering how can I run through all the documents and update them, each document with a different value.
The answer depends on the driver you're using. All MongoDB drivers I know have cursor.forEach() implemented one way or another.
Here are some examples:
collection.find(query).forEach(function(doc) {
// handle
}, function(err) {
// done or error
db.collection.find(query).forEach(function(err, doc) {
// handle
collection.find(query, { stream: true })
// handle doc
// handle error
// final callback
.on('data', function(doc){
// handle doc
.on('error', function(err){
// handle error
.on('end', function(){
// final callback
Updating documents inside of .forEach callback
The only problem with updating documents inside of .forEach callback is that you have no idea when all documents are updated.
To solve this problem you should use some asynchronous control flow solution. Here are some options:
promises (when.js, bluebird)
Here is an example of using async, using its queue feature:
var q = async.queue(function (doc, callback) {
// code for your update
_id: doc._id
}, {
$set: {hi: 'there'}
}, {
w: 1
}, callback);
}, Infinity);
var cursor = collection.find(query);
cursor.each(function(err, doc) {
if (err) throw err;
if (doc) q.push(doc); // dispatching doc to async.queue
q.drain = function() {
if (cursor.isClosed()) {
console.log('all items have been processed');
Using the mongodb driver, and modern NodeJS with async/await, a good solution is to use next():
const collection = db.collection('things')
const cursor = collection.find({
bla: 42 // find all things where bla is 42
let document;
while ((document = await cursor.next())) {
await collection.findOneAndUpdate({
_id: document._id
}, {
$set: {
blu: 43
This results in only one document at a time being required in memory, as opposed to e.g. the accepted answer, where many documents get sucked into memory, before processing of the documents starts. In cases of "huge collections" (as per the question) this may be important.
If documents are large, this can be improved further by using a projection, so that only those fields of documents that are required are fetched from the database.
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
db.collection('companies').find(query).toArray(function(err, docs) {
assert.equal(err, null);
assert.notEqual(docs.length, 0);
docs.forEach(function(doc) {
console.log(doc.name + " is a " + doc.category_code + " company.");
Notice that the call .toArray is making the application to fetch the entire dataset.
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
var cursor = db.collection('companies').find(query);
function(doc) {
console.log(doc.name + " is a " + doc.category_code + " company.");
function(err) {
assert.equal(err, null);
return db.close();
Notice that the cursor returned by the find() is assigned to var cursor. With this approach, instead of fetching all data in memory and consuming data at once, we're streaming the data to our application. find() can create a cursor immediately because it doesn't actually make a request to the database until we try to use some of the documents it will provide. The point of cursor is to describe our query. The 2nd parameter to cursor.forEach shows what to do when the driver gets exhausted or an error occurs.
In the initial version of the above code, it was toArray() which forced the database call. It meant we needed ALL the documents and wanted them to be in an array.
Also, MongoDB returns data in batch format. The image below shows, requests from cursors (from application) to MongoDB
forEach is better than toArray because we can process documents as they come in until we reach the end. Contrast it with toArray - where we wait for ALL the documents to be retrieved and the entire array is built. This means we're not getting any advantage from the fact that the driver and the database system are working together to batch results to your application. Batching is meant to provide efficiency in terms of memory overhead and the execution time. Take advantage of it, if you can in your application.
None of the previous answers mentions batching the updates. That makes them extremely slow 🐌 - tens or hundreds of times slower than a solution using bulkWrite.
Let's say you want to double the value of a field in each document. Here's how to do that fast 💨 and with fixed memory consumption:
// Double the value of the 'foo' field in all documents
let bulkWrites = [];
const bulkDocumentsSize = 100; // how many documents to write at once
let i = 0;
db.collection.find({ ... }).forEach(doc => {
// Update the document...
doc.foo = doc.foo * 2;
// Add the update to an array of bulk operations to execute later
replaceOne: {
filter: { _id: doc._id },
replacement: doc,
// Update the documents and log progress every `bulkDocumentsSize` documents
if (i % bulkDocumentsSize === 0) {
bulkWrites = [];
print(`Updated ${i} documents`);
// Flush the last <100 bulk writes
And here is an example of using a Mongoose cursor async with promises:
new Promise(function (resolve, reject) {
.on('data', function(doc) {
// ...
.on('error', reject)
.on('end', resolve);
.then(function () {
// ...
Mongoose cursors
Streams and promises
Leonid's answer is great, but I want to reinforce the importance of using async/promises and to give a different solution with a promises example.
The simplest solution to this problem is to loop forEach document and call an update. Usually, you don't need close the db connection after each request, but if you do need to close the connection, be careful. You must just close it if you are sure that all updates have finished executing.
A common mistake here is to call db.close() after all updates are dispatched without knowing if they have completed. If you do that, you'll get errors.
Wrong implementation:
collection.find(query).each(function(err, doc) {
if (err) throw err;
if (doc) {
collection.update(query, update, function(err, updated) {
// handle
else {
db.close(); // if there is any pending update, it will throw an error there
However, as db.close() is also an async operation (its signature have a callback option) you may be lucky and this code can finish without errors. It may work only when you need to update just a few docs in a small collection (so, don't try).
Correct solution:
As a solution with async was already proposed by Leonid, below follows a solution using Q promises.
var Q = require('q');
var client = require('mongodb').MongoClient;
var url = 'mongodb://localhost:27017/test';
client.connect(url, function(err, db) {
if (err) throw err;
var promises = [];
var query = {}; // select all docs
var collection = db.collection('demo');
var cursor = collection.find(query);
// read all docs
cursor.each(function(err, doc) {
if (err) throw err;
if (doc) {
// create a promise to update the doc
var query = doc;
var update = { $set: {hi: 'there'} };
var promise =
Q.npost(collection, 'update', [query, update])
console.log('Updated: ' + updated);
} else {
// close the connection after executing all promises
.then(function() {
if (cursor.isClosed()) {
console.log('all items have been processed');
The node-mongodb-native now supports a endCallback parameter to cursor.forEach as for one to handle the event AFTER the whole iteration, refer to the official document for details http://mongodb.github.io/node-mongodb-native/2.2/api/Cursor.html#forEach.
Also note that .each is deprecated in the nodejs native driver now.
You can now use (in an async function, of course):
for await (let doc of collection.find(query)) {
await updateDoc(doc);
// all done
which nicely serializes all updates.
let's assume that we have the below MongoDB data in place.
Database name: users
Collection name: jobs
{ "_id" : ObjectId("1"), "job" : "Security", "name" : "Jack", "age" : 35 }
{ "_id" : ObjectId("2"), "job" : "Development", "name" : "Tito" }
{ "_id" : ObjectId("3"), "job" : "Design", "name" : "Ben", "age" : 45}
{ "_id" : ObjectId("4"), "job" : "Programming", "name" : "John", "age" : 25 }
{ "_id" : ObjectId("5"), "job" : "IT", "name" : "ricko", "age" : 45 }
This code:
var MongoClient = require('mongodb').MongoClient;
var dbURL = 'mongodb://localhost/users';
MongoClient.connect(dbURL, (err, db) => {
if (err) {
throw err;
} else {
console.log('Connection successful');
var dataBase = db.db();
// loop forEach
console.log('There is a job called :'+ myDoc.job +'in Database')})
I looked for a solution with good performance and I end up creating a mix of what I found which I think works good:
* This method will read the documents from the cursor in batches and invoke the callback
* for each batch in parallel.
* THE VALUE OF batchSize. This way the performance benefits are maxed out since
* the mongo instance will send into our process memory the same number of documents
* that we handle in concurrent each time, so no memory space is wasted
* and also the memory usage is limited.
* Example of usage:
* const cursor = await collection.aggregate([
{...}, ...],
cursor: {batchSize: BATCH_SIZE} // Limiting memory use
DbUtil.concurrentCursorBatchProcessing(cursor, BATCH_SIZE, async (doc) => ...)
* #param cursor - A cursor to batch process on.
* We can get this from our collection.js API by either using aggregateCursor/findCursor
* #param batchSize - The batch size, should match the batchSize of the cursor option.
* #param callback - Callback that should be async, will be called in parallel for each batch.
* #return {Promise<void>}
static async concurrentCursorBatchProcessing(cursor, batchSize, callback) {
let doc;
const docsBatch = [];
while ((doc = await cursor.next())) {
if (docsBatch.length >= batchSize) {
await PromiseUtils.concurrentPromiseAll(docsBatch, async (currDoc) => {
return callback(currDoc);
// Emptying the batch array
docsBatch.splice(0, docsBatch.length);
// Checking if there is a last batch remaining since it was small than batchSize
if (docsBatch.length > 0) {
await PromiseUtils.concurrentPromiseAll(docsBatch, async (currDoc) => {
return callback(currDoc);
An example of usage for reading many big documents and updating them:
const cursor = await collection.aggregate([
], {
cursor: {batchSize: BATCH_SIZE}, // Limiting memory use
allowDiskUse: true
const bulkUpdates = [];
await DbUtil.concurrentCursorBatchProcessing(cursor, BATCH_SIZE, async (doc: any) => {
const update: any = {
updateOne: {
filter: {
update: {
// Updating if we read too many docs to clear space in memory
await this.bulkWriteIfNeeded(bulkUpdates, collection);
// Making sure we updated everything
await this.bulkWriteIfNeeded(bulkUpdates, collection, true);
private async bulkWriteParametersIfNeeded(
bulkUpdates: any[], collection: any,
forceUpdate = false, flushBatchSize) {
if (bulkUpdates.length >= flushBatchSize || forceUpdate) {
// concurrentPromiseChunked is a method that loops over an array in a concurrent way using lodash.chunk and Promise.map
await PromiseUtils.concurrentPromiseChunked(bulkUpsertParameters, (upsertChunk: any) => {
return techniquesParametersCollection.bulkWrite(upsertChunk);
// Emptying the array
bulkUpsertParameters.splice(0, bulkUpsertParameters.length);
I have a list of users. I don't want to publish all user data to the client, especially emails. I have multiple publish methods where i can use:
Meteor.publish('usersData', function() {
return Users.find({}, {
fields: {
emails: 0
But what if I or other programmer forget to filter fields and just publish whole collection:
Meteor.publish('users', function() {
return Users.find();
It's a problem. There should be global settings to filter data in collection. Is there any way how to do it in current ( Meteor?
You can create a method you use instead of the normal collection.find method that you use anywhere you need to publish users. An example could be:
function findUsers(query) {
return Meteor.users.find(query || {}, { fields: { emails: 0 } });
And then you can just remind your programmers to use the findUsers method:
Meteor.publish('userData', function () {
return findUsers({ points: { $gt: 5 } });
How about writing a collection observer that throws an exception whenever a user with email fields present was published.
The observer runs independently for each connected user and triggers every time a user object has been pushed to the user collection. If it is not the current user, throw an error if the object contains the email field.
Your team should then notice these exceptions during development.
Meteor.publish("userCheck", function () {
var self = this;
var handle = Meteor.users.find({}).observeChanges({
added: function(id) {
var user = Meteor.users.findOne({_id: id});
if (user.emails && self.userId !== id) {
throw new Meteor.Error(500, "Must not publish other people's email!");
self.onStop(function () {
One of the advantages of NodeJS is its async and non-blocking I/O, which in my case is great on the one hand, but breaks my neck every day on the other hand.
I consider myself a NodeJS / Async novice and I often end up having such code:
function(req, res) {
req.assert("name", "Lobbyname is required").notEmpty();
req.assert("name", "Lobbyname length should be between 4 and 64 characters").len(4, 64);
req.assert("game", "Game not found").isInt();
var userId = req.user.id;
var errors = req.validationErrors();
var pg_errors = [];
var games = null;
if (errors) {
client.query("SELECT * FROM games", function(err, result) {
if (!err) {
games = result.rows;
res.render("lobby/create", {
title: "Create a new lobby",
games: games,
errors: errors.toString()
else {
else {
errors = null;
client.query("SELECT COUNT(*) as in_lobbies FROM users u RIGHT JOIN lobby_userlist ul ON ul.user_id = u.id WHERE u.id = $1", [userId], function(err, result) {
if (!err) {
if (result.rows[0].in_lobbies < 1) {
client.query("SELECT COUNT(*) as hosting_lobbies FROM lobbies WHERE owner = $1", [userId], function(err, result) {
if (!err) {
if (result.rows[0].hosting_lobbies < 1) {
client.query("INSERT INTO lobbies(name, game, owner) VALUES($1, $2, $3)", [req.param("name"), req.param("game"), userId], function(err, result) {
if (!err) {
else {
else {
errors = "You can only host one lobby at a time";
else {
client.query("SELECT * FROM games", function(err, result) {
if (!err) {
games = result.rows;
res.render("lobby/create", {
title: "Create a new lobby",
games: games,
errors: errors
else {
else {
console.log("pg_errors _end");
if (pg_errors.length < 1) {
console.log("no errors");
else {
res.send("error service operation failed");
This an example I have written using the following npm packages:
pg (native)
express-validator (middleware of node-validator)
passport (auth middleware)
Checking whether the input given by the user is valid or not is the least problem, I have this checks where I assert the variables and give back a rendered version of the page printing out the errors to the user.
BUT if we pass the validation errors in the first place we assume the "lobby" is ready to be inserted into the database, before I want to ensure that the user has no other lobby open and is not member of another lobby.
Well now I end up putting one query into another and theoretically I would have to put my view render function (res.render()) into every query callback if the query encounters an error or returns a result which inidicates that the user is not allowed to create a lobby.
I don't want that and it doesn't seem very practicable.
I tried removing the render logic and every other logic from the query callbacks and instead let the query callbacks set error arrays or variables which would indicate a success or a failure and below my query code I would check if(errors) renderPageWithErrors.
This lead to strange errors due to the async behaviour of nodejs in which case res.redirect() was called after res.render() and stuff like that.
I had to move my res.render back into the query callbacks.
Is there a proper way of doing this?
You might want to look into an async library such as https://github.com/caolan/async. It helps structure async code so that it doesn't turn into a mess like this. There are different methods depending on your requirements from simple series and parallel execution to things like waterfall to auto which does dependency tracking.
get_data: function(callback){
// async code to get some data
make_folder: function(callback){
// async code to create a directory to store a file in
// this is run at the same time as getting the data
write_file: ['get_data', 'make_folder', function(callback){
// once there is some data and the directory exists,
// write the data to a file in the directory
callback(null, filename);
email_link: ['write_file', function(callback, results){
// once the file is written let's email a link to it...
// results.write_file contains the filename returned by write_file.
}, function(err) {
// everything is done or an error occurred
The other nice thing it does is consolidate all errors into a single callback. That way you only have to handle errors in one place instead of them sprinkled throughout your code.
You might want to check for https://github.com/0ctave/node-sync library as well. It's a syntax sugar for nodejs Fibers, a way to write asynchronous code in a traditional way without breaking nodejs event loop model. There are a lot of discussions about pros and cons of using Fibers, but I prefer code readability and ease of development over potential small resource usage increase.
I don't know all of your code logic, but function above can look something like this:
function(req, res) {
Sync(function() {
req.assert("name", "Lobbyname is required").notEmpty();
req.assert("name", "Lobbyname length should be between 4 and 64 characters").len(4, 64);
req.assert("game", "Game not found").isInt();
var userId = req.user.id;
var errors = req.validationErrors();
var pg_errors = [];
var games = null;
if (errors) {
var games = client.query.sync(client, "SELECT * FROM games").rows;
games = result;
res.render("lobby/create", {
title: "Create a new lobby",
games: games,
errors: errors.toString()
else {
errors = null;
var result = client.query.sync(client, "SELECT COUNT(*) as in_lobbies FROM users u RIGHT JOIN lobby_userlist ul ON ul.user_id = u.id WHERE u.id = $1", [userId]);
if (result.rows[0].in_lobbies < 1) {
var result = client.query.sync(client, "SELECT COUNT(*) as hosting_lobbies FROM lobbies WHERE owner = $1", [userId]);
if (result.rows[0].hosting_lobbies < 1) {
var res = client.query.sync(clien, "INSERT INTO lobbies(name, game, owner) VALUES($1, $2, $3)", [req.param("name"), req.param("game"), userId]);
else {
errors = "You can only host one lobby at a time";
else {
var games = client.query.sync(client, "SELECT * FROM games").rows;
res.render("lobby/create", {
title: "Create a new lobby",
games: games,
errors: errors
}, function(err) {
if(err) {
// do your error handling here
I have this simple nodejs application, which generates dummy date for my web application.
All it does is:
Drops the dummy database
Populates the inventory collection
Populates the invoices collection
Populates the const data collection
Of course, all the actions are asynchronous and I want to execute them sequentially, one after another. For me, it was simpler to write something to manage this kind of the flow, however, I would like a mainstream solution, which would support other kinds of flows. For instance, running in parallel and stopping all on the first failure.
For your reference, please, find below the skeleton, depicting my solution:
/*global require, console, process*/
var mongo, db, inventory, createChain;
function generateInventory(count) {
// returns the generated inventory
function generateInvoices(count, inventory) {
// returns the generated invoices
function generateConst() {
// returns the generated const data
mongo = require('mongojs');
db = mongo.connect('dummy', ['invoices', 'const', 'inventory']);
createChain = function () {
"use strict";
var chain = [false], i = 0;
return {
add: function (action, errMsg, resultCallback) {
chain[chain.length - 1] = {action: action, errMsg: errMsg, resultCallback: resultCallback};
return this;
invoke: function (exit) {
var str, that = this;
if (chain[i]) {
chain[i].action(function (err, o) {
if (err || !o) {
str = chain[i].errMsg;
if (err && err.message) {
str = str + ": " + err.message;
} else {
if (chain[i].resultCallback) {
i += 1;
} else {
if (exit) {
.add(function (callback) {
"use strict";
console.log("Dropping the dummy database.");
}, "Failed to drop the dummy database")
.add(function (callback) {
"use strict";
console.log("Populating the inventory.");
db.inventory.insert(generateInventory(100), callback);
}, "Failed to populate the inventory collection", function (res) {
"use strict";
inventory = res;
.add(function (callback) {
"use strict";
console.log("Populating the invoices.");
db.invoices.insert(generateInvoices(10, inventory), callback);
}, "Failed to populate the invoices collection")
.add(function (callback) {
"use strict";
console.log("Populating the const.");
db["const"].insert(generateConst(), callback);
}, "Failed to populate the const collection")
Can anyone suggest a relevant nodejs package, which would also be easy to use?
Thank you very much.
Use the async module to provide just about any type of flow control you're ever likely to need. In particular, the series method provides sequential flow control.
Actually, for sequential flow control, you should use waterfall
As an example:
// r=1
// r=2
// e=null
// r=3
This will execute sequentially.
If you callback an error early, (i.e. cb("error")), then it will directly go to the final function(e,r), with e="error" and r=undefined
Notice how function(r,cb){} can become precomposed in a util library to handle commonly reused blocks and make things in the future easier.