How to save data from mongodb into node.js cache? - node.js

I need help with create simple function in nodejs that show all rows in some table from mongodb.
In second time the function running its get the data from node.js caching and not go to mongodb.
somthing like that idea:
getData function(){
if(myCache == undefined){
// code that get data from mongodb (i have it)
// and insert into cache of node.js (TODO)
}
else {
// code that get data from cache node.js (TODO)
}
}

The general idea is to implement some form of asynchronous caching in which the cache object will have a key-value storage. So, for example, extending your idea you could restructure your function to follow this pattern:
var myCache = {};
var getData = function(id, callback) {
if (myCache.hasOwnProperty(id)) {
if (myCache[id].hasOwnProperty("data")) { /* value is already in cache */
return callback(null, myCache[id].data);
}
/* value is not yet in cache, so queue the callback */
return myCache[id].queue.push(callback);
}
/* cache for the first time */
myCache[id] = { "queue": [callback] };
/* fetch data from MongoDB */
collection.findOne({ "_id": id }, function(err, data){
if (err) return callback(err);
myCache[id].data = data;
myCache[id].queue.map(function (cb) {
cb(null, data);
});
delete myCache[id].queue;
});
}

Related

save updated models to mongodb

I have following code to fetch some data from the db (mongo).
function getAllUsers(){
var UsersPromise = Q.defer();
UserSchema.find({}, function(err, data){
if(err){
UsersPromise .reject(err);
}else {
UsersPromise .resolve(data);
}
});
return UsersPromise .promise;
}
Then I modify each of these models. I add certain fields to the model depending on the type of user. (This is working correctly).
function buildUsers(users){
// my code iterates over users and adds
// properties as required.
// Working fine.
return users; // updated users.
}
Now I want to save these updated models back to mongo and this is where it's making me pull my hair.
function saveUsers(users){
// here, the users are received correctly. But the following line to save the users fails.
var SaveUsersPromise = Q.defer();
UserSchema.save(users, function(err, data){
if(err){
SaveUsersPromise .reject(err);
} else {
SaveUsersPromise .resolve(data);
}
});
return SaveUsersPromise .promise;
}
Lastly I call these functions like:
DB.connect()
.then(getAllUsers)
.then(buildUsers)
.then(saveUsers)
.catch(errorHandler);
Everything works correctly untill I call UserSchema.save. What could be the problem?
PS: I am using mongoose.
TIA.
UserSchema.save accepts single instance, you have to loop through users and save each. Mongoose doesn't have bulk inserts implemented yet (see issue #723).
Here's simple implementation using async.eachSeries
function saveUsers(users){
var async = require('async'); // <== npm install async --save
var SaveUsersPromise = Q.defer();
async.eachSeries(users, function(user, done){
UserSchema.save(user, done);
// or
user.save(done); // if user is Mongoose-document object
}, function(err){
if(err){
SaveUsersPromise.reject(err);
} else {
SaveUsersPromise.resolve();
}
});
return SaveUsersPromise.promise;
}

How can I use a cursor.forEach() in MongoDB using Node.js?

I have a huge collection of documents in my DB and I'm wondering how can I run through all the documents and update them, each document with a different value.
The answer depends on the driver you're using. All MongoDB drivers I know have cursor.forEach() implemented one way or another.
Here are some examples:
node-mongodb-native
collection.find(query).forEach(function(doc) {
// handle
}, function(err) {
// done or error
});
mongojs
db.collection.find(query).forEach(function(err, doc) {
// handle
});
monk
collection.find(query, { stream: true })
.each(function(doc){
// handle doc
})
.error(function(err){
// handle error
})
.success(function(){
// final callback
});
mongoose
collection.find(query).stream()
.on('data', function(doc){
// handle doc
})
.on('error', function(err){
// handle error
})
.on('end', function(){
// final callback
});
Updating documents inside of .forEach callback
The only problem with updating documents inside of .forEach callback is that you have no idea when all documents are updated.
To solve this problem you should use some asynchronous control flow solution. Here are some options:
async
promises (when.js, bluebird)
Here is an example of using async, using its queue feature:
var q = async.queue(function (doc, callback) {
// code for your update
collection.update({
_id: doc._id
}, {
$set: {hi: 'there'}
}, {
w: 1
}, callback);
}, Infinity);
var cursor = collection.find(query);
cursor.each(function(err, doc) {
if (err) throw err;
if (doc) q.push(doc); // dispatching doc to async.queue
});
q.drain = function() {
if (cursor.isClosed()) {
console.log('all items have been processed');
db.close();
}
}
Using the mongodb driver, and modern NodeJS with async/await, a good solution is to use next():
const collection = db.collection('things')
const cursor = collection.find({
bla: 42 // find all things where bla is 42
});
let document;
while ((document = await cursor.next())) {
await collection.findOneAndUpdate({
_id: document._id
}, {
$set: {
blu: 43
}
});
}
This results in only one document at a time being required in memory, as opposed to e.g. the accepted answer, where many documents get sucked into memory, before processing of the documents starts. In cases of "huge collections" (as per the question) this may be important.
If documents are large, this can be improved further by using a projection, so that only those fields of documents that are required are fetched from the database.
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
};
db.collection('companies').find(query).toArray(function(err, docs) {
assert.equal(err, null);
assert.notEqual(docs.length, 0);
docs.forEach(function(doc) {
console.log(doc.name + " is a " + doc.category_code + " company.");
});
db.close();
});
});
Notice that the call .toArray is making the application to fetch the entire dataset.
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
};
var cursor = db.collection('companies').find(query);
function(doc) {
cursor.forEach(
console.log(doc.name + " is a " + doc.category_code + " company.");
},
function(err) {
assert.equal(err, null);
return db.close();
}
);
});
Notice that the cursor returned by the find() is assigned to var cursor. With this approach, instead of fetching all data in memory and consuming data at once, we're streaming the data to our application. find() can create a cursor immediately because it doesn't actually make a request to the database until we try to use some of the documents it will provide. The point of cursor is to describe our query. The 2nd parameter to cursor.forEach shows what to do when the driver gets exhausted or an error occurs.
In the initial version of the above code, it was toArray() which forced the database call. It meant we needed ALL the documents and wanted them to be in an array.
Also, MongoDB returns data in batch format. The image below shows, requests from cursors (from application) to MongoDB
forEach is better than toArray because we can process documents as they come in until we reach the end. Contrast it with toArray - where we wait for ALL the documents to be retrieved and the entire array is built. This means we're not getting any advantage from the fact that the driver and the database system are working together to batch results to your application. Batching is meant to provide efficiency in terms of memory overhead and the execution time. Take advantage of it, if you can in your application.
None of the previous answers mentions batching the updates. That makes them extremely slow 🐌 - tens or hundreds of times slower than a solution using bulkWrite.
Let's say you want to double the value of a field in each document. Here's how to do that fast 💨 and with fixed memory consumption:
// Double the value of the 'foo' field in all documents
let bulkWrites = [];
const bulkDocumentsSize = 100; // how many documents to write at once
let i = 0;
db.collection.find({ ... }).forEach(doc => {
i++;
// Update the document...
doc.foo = doc.foo * 2;
// Add the update to an array of bulk operations to execute later
bulkWrites.push({
replaceOne: {
filter: { _id: doc._id },
replacement: doc,
},
});
// Update the documents and log progress every `bulkDocumentsSize` documents
if (i % bulkDocumentsSize === 0) {
db.collection.bulkWrite(bulkWrites);
bulkWrites = [];
print(`Updated ${i} documents`);
}
});
// Flush the last <100 bulk writes
db.collection.bulkWrite(bulkWrites);
And here is an example of using a Mongoose cursor async with promises:
new Promise(function (resolve, reject) {
collection.find(query).cursor()
.on('data', function(doc) {
// ...
})
.on('error', reject)
.on('end', resolve);
})
.then(function () {
// ...
});
Reference:
Mongoose cursors
Streams and promises
Leonid's answer is great, but I want to reinforce the importance of using async/promises and to give a different solution with a promises example.
The simplest solution to this problem is to loop forEach document and call an update. Usually, you don't need close the db connection after each request, but if you do need to close the connection, be careful. You must just close it if you are sure that all updates have finished executing.
A common mistake here is to call db.close() after all updates are dispatched without knowing if they have completed. If you do that, you'll get errors.
Wrong implementation:
collection.find(query).each(function(err, doc) {
if (err) throw err;
if (doc) {
collection.update(query, update, function(err, updated) {
// handle
});
}
else {
db.close(); // if there is any pending update, it will throw an error there
}
});
However, as db.close() is also an async operation (its signature have a callback option) you may be lucky and this code can finish without errors. It may work only when you need to update just a few docs in a small collection (so, don't try).
Correct solution:
As a solution with async was already proposed by Leonid, below follows a solution using Q promises.
var Q = require('q');
var client = require('mongodb').MongoClient;
var url = 'mongodb://localhost:27017/test';
client.connect(url, function(err, db) {
if (err) throw err;
var promises = [];
var query = {}; // select all docs
var collection = db.collection('demo');
var cursor = collection.find(query);
// read all docs
cursor.each(function(err, doc) {
if (err) throw err;
if (doc) {
// create a promise to update the doc
var query = doc;
var update = { $set: {hi: 'there'} };
var promise =
Q.npost(collection, 'update', [query, update])
.then(function(updated){
console.log('Updated: ' + updated);
});
promises.push(promise);
} else {
// close the connection after executing all promises
Q.all(promises)
.then(function() {
if (cursor.isClosed()) {
console.log('all items have been processed');
db.close();
}
})
.fail(console.error);
}
});
});
The node-mongodb-native now supports a endCallback parameter to cursor.forEach as for one to handle the event AFTER the whole iteration, refer to the official document for details http://mongodb.github.io/node-mongodb-native/2.2/api/Cursor.html#forEach.
Also note that .each is deprecated in the nodejs native driver now.
You can now use (in an async function, of course):
for await (let doc of collection.find(query)) {
await updateDoc(doc);
}
// all done
which nicely serializes all updates.
let's assume that we have the below MongoDB data in place.
Database name: users
Collection name: jobs
===========================
Documents
{ "_id" : ObjectId("1"), "job" : "Security", "name" : "Jack", "age" : 35 }
{ "_id" : ObjectId("2"), "job" : "Development", "name" : "Tito" }
{ "_id" : ObjectId("3"), "job" : "Design", "name" : "Ben", "age" : 45}
{ "_id" : ObjectId("4"), "job" : "Programming", "name" : "John", "age" : 25 }
{ "_id" : ObjectId("5"), "job" : "IT", "name" : "ricko", "age" : 45 }
==========================
This code:
var MongoClient = require('mongodb').MongoClient;
var dbURL = 'mongodb://localhost/users';
MongoClient.connect(dbURL, (err, db) => {
if (err) {
throw err;
} else {
console.log('Connection successful');
var dataBase = db.db();
// loop forEach
dataBase.collection('jobs').find().forEach(function(myDoc){
console.log('There is a job called :'+ myDoc.job +'in Database')})
});
I looked for a solution with good performance and I end up creating a mix of what I found which I think works good:
/**
* This method will read the documents from the cursor in batches and invoke the callback
* for each batch in parallel.
* IT IS VERY RECOMMENDED TO CREATE THE CURSOR TO AN OPTION OF BATCH SIZE THAT WILL MATCH
* THE VALUE OF batchSize. This way the performance benefits are maxed out since
* the mongo instance will send into our process memory the same number of documents
* that we handle in concurrent each time, so no memory space is wasted
* and also the memory usage is limited.
*
* Example of usage:
* const cursor = await collection.aggregate([
{...}, ...],
{
cursor: {batchSize: BATCH_SIZE} // Limiting memory use
});
DbUtil.concurrentCursorBatchProcessing(cursor, BATCH_SIZE, async (doc) => ...)
* #param cursor - A cursor to batch process on.
* We can get this from our collection.js API by either using aggregateCursor/findCursor
* #param batchSize - The batch size, should match the batchSize of the cursor option.
* #param callback - Callback that should be async, will be called in parallel for each batch.
* #return {Promise<void>}
*/
static async concurrentCursorBatchProcessing(cursor, batchSize, callback) {
let doc;
const docsBatch = [];
while ((doc = await cursor.next())) {
docsBatch.push(doc);
if (docsBatch.length >= batchSize) {
await PromiseUtils.concurrentPromiseAll(docsBatch, async (currDoc) => {
return callback(currDoc);
});
// Emptying the batch array
docsBatch.splice(0, docsBatch.length);
}
}
// Checking if there is a last batch remaining since it was small than batchSize
if (docsBatch.length > 0) {
await PromiseUtils.concurrentPromiseAll(docsBatch, async (currDoc) => {
return callback(currDoc);
});
}
}
An example of usage for reading many big documents and updating them:
const cursor = await collection.aggregate([
{
...
}
], {
cursor: {batchSize: BATCH_SIZE}, // Limiting memory use
allowDiskUse: true
});
const bulkUpdates = [];
await DbUtil.concurrentCursorBatchProcessing(cursor, BATCH_SIZE, async (doc: any) => {
const update: any = {
updateOne: {
filter: {
...
},
update: {
...
}
}
};
bulkUpdates.push(update);
// Updating if we read too many docs to clear space in memory
await this.bulkWriteIfNeeded(bulkUpdates, collection);
});
// Making sure we updated everything
await this.bulkWriteIfNeeded(bulkUpdates, collection, true);
...
private async bulkWriteParametersIfNeeded(
bulkUpdates: any[], collection: any,
forceUpdate = false, flushBatchSize) {
if (bulkUpdates.length >= flushBatchSize || forceUpdate) {
// concurrentPromiseChunked is a method that loops over an array in a concurrent way using lodash.chunk and Promise.map
await PromiseUtils.concurrentPromiseChunked(bulkUpsertParameters, (upsertChunk: any) => {
return techniquesParametersCollection.bulkWrite(upsertChunk);
});
// Emptying the array
bulkUpsertParameters.splice(0, bulkUpsertParameters.length);
}
}

How to make synchronous call with mongoose and nodejs

I'm designing a client/server synchronization feature. The client sends a bunch of changed events to server. The server will do creation, deletion or modification upon requested item status. After the database operation, the server need send a summary back to client.
Below is excerpt from my server side code, designed with mongoose and restify.
var EventModel = mongoose.model('Event', eventSchema);
server.post("/sync", function (req, res, next) {
var events = req.params.events;
var created = [], deleted = [], updated = [];
events.forEach(function (elem) {
if (elem.status == 0) {
// Delete
EventModel.remove({ _id: elem.uuid }, function (err, event) {
if (!err) deleted.push({uuid: elem.uuid});
});
} else if (elem.status == 1) {
// Create and update uuid
var event = new EventModel(elem);
event.save(function (err, doc) {
if (!err) {
elem.uuid = event._doc._id;
created.push(elem);
}
});
} else if (elem.status == 2) {
// Update
EventModel.findOne({ _id: elem.uuid }, function (err, event) {
event.save(function (err, doc) {
if (!err) updated.push({uuid:elem.uuid});
});
});
}
});
// Notify client what are processed.
// PROBLEM: created, deleted, updated are always empty!
res.send({processed: {created: created, deleted: deleted, updated: updated}});
});
Since mongoose do CRUD in async way, the response created,deleted and updated are always empty.
Is there any way to let the mongoose operation in series?
As stated in the comments you could use the npm async module.
Alternatively, you may prefer to nest callbacks (but this might lead to what is known as callback hell, viz many nested callbacks) or take advantage of the mongoose .then() method - see http://mongoosejs.com/docs/promises.html
Here you can do ..
EventModel.remove(args).then((removeResponse) => {
return EventModel.findOne(args);
}).then((findResponse) => {
// etc
})
These requests will happen synchronously.

How do I return error header if a db call throws an error?

I have a small data gathering web app running with NodeJS and Couchbase. The requirement is, that when a 3rd party pushes some data to us and we are able to process it, we return the 200 header, but if there are any problems with storing that data, we return 500. This means that they can re-try with the failed data batch.
I'm having an issue where the 200 is always returned (because the DB calls are completed asynchronously). Here's an example:
...
var app = express();
function create(req, res) {
var error = false;
// Parse all the entries in request
for (var i = 0; i < req.body.length; i++) {
var event = req.body[i];
if (!event.email) {
// log error to file
error = true;
res.send("Event object does not have an email address!", 500);
}
// Greate the id index value
var event_id = 'blah';
// See if record already exists
db.get(event_id, function (err, result) {
var doc = result.value;
if (doc === undefined) {
// Add a new record
db.add(event_id, event, function (err, result) {
if (err) {
error = true;
res.send('There were processing errors', 500);
}
});
}
});
}
if (error)
res.send("Try again", 500);
else
res.send("OK", 200);
}
app.post('/create', create);
Is there a way of making the app wait for those DB calls to complete, i.e. for this funciton to be synchronous? Or am I using a wrong tech for this? :(
I decided to go with NodeJS+Couchbase because we are likely to have a very high amount of calls, where the data (small JSON objects) must be written, read and deleted. EDIT: Ah the data structure is likely to change for various events, so being able to store non-uniformly shaped documents its of a great advantage!
This is a typical use case for the async library, which is a utility-belt library with lots of patterns to work with asynchronous functions.
Since you need to call an asynchronous function for each record, you can use async.each, which executes an asynchronous function for all elements of an array. A last callback is called when all asynchronous tasks are finished.
var app = express();
function handleEvent = function (event, callback) {
if (! event.email) {
callback(new Error('Event object does not have an email address!'));
}
var event_id = 'blah';
db.get(event_id, function (err, result) {
var doc = result.value;
if (doc === undefined) {
// Add a new record
db.add(event_id, event, function (err, result) {
if (err) {
callback(new Error('There were processing errors'));
}
else {
callback(null);
}
});
}
});
}
function create(req, res) {
// https://github.com/caolan/async#each
async.each(req.body, handleEvent, function (err) {
if (err)
res.send(err.message, 500);
else
res.send('OK', 200);
});
}

How to add an array of objects to parse.com db?

I'm developing an application and need to add many items at a time.
How can I do that with node.js?
This is the npm module for parse.com but there is no method like
insertAll("Foo", [objs...], ...)
I don't want to insert single object every time.
Write a convenience function that interfaces between your application and parse.com. You will have to write the iteration code once (or debug mine)
var async = require('async');
var parseApp = require('node-parse-api').Parse;
var APP_ID = "";
var MASTER_KEY = "";
var parseApp = new Parse(APP_ID, MASTER_KEY);
function insertAll(class, objs, callback){
// create an iterator function(obj,done) that will insert the object
// with an appropriate group and call done() upon completion.
var insertOne =
( function(class){
return function(obj, done){
parseApp.insert(class, obj, function (err, response) {
if(err){ return done(err); }
// maybe do other stuff here before calling done?
var res = JSON.parse(response);
if(!res.objectId){ return done('No object id') };
done(null, res.objectId);
});
};
} )(class);
// async.map calls insertOne with each obj in objs. the callback is executed
// once every iterator function has called back `done(null,data)` or any one
// has called back `done(err)`. use async.mapLimit if throttling is needed
async.map(objs, insertOne, function(err, mapOutput){
// complete
if(err){ return callback(err) };
// no errors
var objectIds = mapOutput;
callback(null, objectIds);
});
};
// Once you've written this and made the function accessible to your other code,
// you only need this outer interface.
insertAll('Foo', [{a:'b'}, {a:'d'}], function(err, ids){
if(err){
console.log('Error inserting all the Foos');
console.log(err);
} else {
console.log('Success!);
};
});

Resources