Mongoose with async queue & waterfall - node.js

I aim to import large amount of data by Mongoose. As a newbie, I fail to setup the flow control properly with various mechanisms by async. Glad if someone could point to an appropriate solution. Thanks.
var async = require('async'),
mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/test');
var Cat = mongoose.model('Cat', { name: String });
// Imagine this is a huge array with a million items.
var content = ['aaa', 'bbb', 'ccc'];
var queries = [];
content.forEach(function(name) {
queries.push(function(cb) {
var obj = new Cat({ name: name });
obj.save(function(err) {
console.log("SAVED: " + name);
console.log(err);
});
return true;
});
});
// FAILED: async.parallel adds all content to db,
// but it would exhaust the resource with too many parallel tasks.
async.parallel(queries, function(err, result) {
if (err)
return console.log(err);
console.log(result);
});
// FAILED: save the first item but not the rest
async.waterfall(queries, function(err, result) {
if (err)
return console.log(err);
console.log(result);
});
// FAILED: same as async.waterfall, async.queue saves the first item only
var q = async.queue(function(name, cb) {
var obj = new Cat({ name: name });
obj.save(function(err) {
console.log("SAVED: " + name);
console.log(err);
});
})
q.push(content, function (err) {
console.log('finished processing queue');
});

I think eachLimit or eachSeries fit your situation best:
var content = ['aaa', 'bbb', 'ccc'];
async.eachLimit(content, 10, function(name, done) {
var obj = new Cat({ name : name });
obj.save(done);
// if you want to print some status info, use this instead:
//
// obj.save(function(err) {
// console.log("SAVED: " + name);
// console.log(err);
// done(err);
// });
//
}, function(err) {
// handle any errors;
});
With eachLimit, you can run an X amount of queries 'in parallel' (10 in the example above) to speed things up without exhausting resources. eachSeries will wait for the previous save before it continues with the next, so effectively saving one object at a time.
Notice that with each*, you won't get a list with (saved) objects back (it's a bit of a fire-and-forget mechanism where you're not interested in the outcome, bar any errors). If you do want a list of saved objects in the end, you can use the equivalent map* functions: mapLimit and mapSeries.

Related

page renders before getting all the values sorted

I think the rendering takes place before the searching of the string on the files, i have tried different methods but don't seems to get this working. any help will be appreciated. im a noob on to the nodejs. im trying to get the id of the user and query and get all the data and there after see if he is in any of the lists given and finally render the page.
const j = [];
let name = '';
const filename = [];
var ext = '';
module.exports = function(app, express) {
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
app.post('/cusdetails', isLoggedIn, function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
connection.query(insertQuerys,[cusid],
function(err, rows){
rows.forEach( (row) => {
name=row.fncus;
});
fs.readdir('./views/iplist', function(err, files) {
if (err)
throw err;
for (var index in files) {
j.push(files[index])
}
j.forEach(function(value) {
var k = require('path').resolve(__dirname, '../views/iplist/',value);
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) throw err;
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
}
});
}
else {
console.log(" FileNotExist ");
}
});
});
});
console.log(filename);
res.render('cusdetails.ejs', {rows: rows, user:req.user , aml: filename });
});
})
You can create simple Promise wrapper and then use it inside async/await function to pause execution until resolved.
// use mysql2 package as it provides promise, less work to write promise wrappers
const mysql = require('mysql2/promise');
// create the connection to database
const connection = mysql.createConnection({
host: 'localhost',
user: 'root',
database: 'test'
});
// sample wrapper
function some(k) {
// more advisable to have local variables, why do you need this to be array?
var filename = [];
return new Promise((resolve, reject) => {
// doing this is also not recommended check nodejs documentation **fs.exists** for more info
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) reject(err);
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
resolve(filename)
}
});
}
else {
// reject(new Error("FileNotExist"))
console.log(" FileNotExist ");
}
});
})
}
// note the use of async
app.post('/cusdetails', isLoggedIn, async function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
// using await to pause excution, waits till query is finished
const [rows] = await connection.query(insertQuerys,[cusid])
rows.forEach( (row) => {
name=row.fncus;
});
// then you can
var result = await some(k)
...
Note however this way you loose the advantage of concurrent execution, as it's kindoff blocking. If the result of one call is not used in another, you can execute in parallel and await for result to achieve sequencing like
const [rows] = connection.query(insertQuerys,[cusid])
var result = some(k)
console.log(await rows) // do something
console.log(await result) // do something
JavaScript is asynchronous. This means that if you have a function with a callback (i.e. your query), the callback will be called asynchronously, at an unknown time, while the other code executes.
You need to look up some tutorials how to deal with callbacks, to get a proper understanding of it. Another method is using async/await and/or promises.
Basically, if you take the following code:
console.log("this will print first");
setTimeout(function () {
console.log("this will print last");
}, 1000);
console.log("this will print second");
If you run the code above, the top level is executed synchronously, so, it first calls console.log, then it executes setTimeout, which is synchronous. It sets a timeout, then says "I'm ready", and the code continues to the other console.log. After 1 second (1000 milliseconds), the callback in the setTimeout function is executed, and only then that console.log is called. You can not make the rest of the code wait this way, you need to restructure your code or read into promises.

async foreach inside async series nodejs

I'm working on node async library. I'm not able to execute in the sequence i want. i don't know where I'm going wrong
here is the code.. in comments i have defined order number..
currently its executing in 2,3,4,5,1 order i want in 1,2,3,4,5 order ....kindly help
function getAsExhibitors(req, res) {
//getting all exhibitors against an event
var exhibitors = [];
var eac_app_names = [];
async.series([function(callback){
models.EacExhibitorsExt.find({ deleted: false,userid: req.user._id}).sort({ modified: -1 }).exec(function(err, myExhibitors) {
exhibitors = myExhibitors;
callback();
});
},function(callback){
async.forEach(exhibitors,function(exhibitor,callback){
models.Eac.findById(exhibitor.eventid).exec(function(err,eac){
eac_app_names[exhibitors.indexOf(exhibitor)]=eac;
console.log("-----------------1--------------"+eac_app_names);
});
console.log("-----------------2--------------"+eac_app_names);
callback();
},function(err) {
console.log("-----------------3--------------"+eac_app_names);
callback();
});
}],function(err) { //This function gets called after the two tasks have called their "task callbacks"
if (err) return next(err);
//Here locals will be populated with 'exhibitors' and 'apps'
console.log("-------------------------4------"+eac_app_names);
console.log("-------------------------5------"+eac_app_names.name);
res.locals.exhibitors = exhibitors;
res.locals.eac_app_names = eac_app_names;
res.render('eac/eac_reg_as_exhibitor', { title: "My Event Exhibitors", asexhibitor: exhibitors,app_names:eac_app_names});
});
};
All mongoose methods work as asynchronous.In your scenario try this way:
function getAsExhibitors(req, res) {
//getting all exhibitors against an event
var exhibitors = [];
var eac_app_names = [];
async.series([function(callback){
models.EacExhibitorsExt.find({ deleted: false,userid: req.user._id}).sort({ modified: -1 }).exec(function(err, myExhibitors) {
exhibitors = myExhibitors;
callback();
});
},function(callback){
async.forEach(exhibitors,function(exhibitor,callback){
models.Eac.findById(exhibitor.eventid).exec(function(err,eac){
eac_app_names[exhibitors.indexOf(exhibitor)]=eac;
console.log("-----------------1--------------"+eac_app_names);
console.log("-----------------2--------------"+eac_app_names);
callback();
});
},function(err) {
console.log("-----------------3--------------"+eac_app_names);
callback();
});
}],function(err) { //This function gets called after the two tasks have called their "task callbacks"
if (err) return next(err);
//Here locals will be populated with 'exhibitors' and 'apps'
console.log("-------------------------4------"+eac_app_names);
console.log("-------------------------5------"+eac_app_names.name);
res.locals.exhibitors = exhibitors;
res.locals.eac_app_names = eac_app_names;
res.render('eac/eac_reg_as_exhibitor', { title: "My Event Exhibitors", asexhibitor: exhibitors,app_names:eac_app_names});
});
};
You are welcome to use es6 with generator.
Try out co-foreach-series to get each of the array element and execute async function one by one.
ForEach Series Example
foreach(yourArray, function(element, index) {
// Each of this function will be executed one after one
co(function*() {
// Do some async task, and wait until this task be finished
yield yourAsyncFunc();
yield doOtherAsyncTasks();
})
})

NodeJS and MongoDB send response when finish all [duplicate]

I want to drop some mongodb collections, but that's an asynchronous task. The code will be:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/xxx');
var conn = mongoose.connection;
['aaa','bbb','ccc'].forEach(function(name){
conn.collection(name).drop(function(err) {
console.log('dropped');
});
});
console.log('all dropped');
The console displays:
all dropped
dropped
dropped
dropped
What is the simplest way to make sure all dropped will be printed after all collections has been dropped? Any 3rd-party can be used to simplify the code.
Use Promises.
var mongoose = require('mongoose');
mongoose.connect('your MongoDB connection string');
var conn = mongoose.connection;
var promises = ['aaa', 'bbb', 'ccc'].map(function(name) {
return new Promise(function(resolve, reject) {
var collection = conn.collection(name);
collection.drop(function(err) {
if (err) { return reject(err); }
console.log('dropped ' + name);
resolve();
});
});
});
Promise.all(promises)
.then(function() { console.log('all dropped)'); })
.catch(console.error);
This drops each collection, printing “dropped” after each one, and then prints “all dropped” when complete. If an error occurs, it is displayed to stderr.
Previous answer (this pre-dates Node’s native support for Promises):
Use Q promises or Bluebird promises.
With Q:
var Q = require('q');
var mongoose = require('mongoose');
mongoose.connect('your MongoDB connection string');
var conn = mongoose.connection;
var promises = ['aaa','bbb','ccc'].map(function(name){
var collection = conn.collection(name);
return Q.ninvoke(collection, 'drop')
.then(function() { console.log('dropped ' + name); });
});
Q.all(promises)
.then(function() { console.log('all dropped'); })
.fail(console.error);
With Bluebird:
var Promise = require('bluebird');
var mongoose = Promise.promisifyAll(require('mongoose'));
mongoose.connect('your MongoDB connection string');
var conn = mongoose.connection;
var promises = ['aaa', 'bbb', 'ccc'].map(function(name) {
return conn.collection(name).dropAsync().then(function() {
console.log('dropped ' + name);
});
});
Promise.all(promises)
.then(function() { console.log('all dropped'); })
.error(console.error);
I see you are using mongoose so you are talking about server-side JavaScript. In that case I advice looking at async module and use async.parallel(...). You will find this module really helpful - it was developed to solve the problem you are struggling with. Your code may look like this
var async = require('async');
var calls = [];
['aaa','bbb','ccc'].forEach(function(name){
calls.push(function(callback) {
conn.collection(name).drop(function(err) {
if (err)
return callback(err);
console.log('dropped');
callback(null, name);
});
}
)});
async.parallel(calls, function(err, result) {
/* this code will run after all calls finished the job or
when any of the calls passes an error */
if (err)
return console.log(err);
console.log(result);
});
The way to do it is to pass the tasks a callback that updates a shared counter. When the shared counter reaches zero you know that all tasks have finished so you can continue with your normal flow.
var ntasks_left_to_go = 4;
var callback = function(){
ntasks_left_to_go -= 1;
if(ntasks_left_to_go <= 0){
console.log('All tasks have completed. Do your stuff');
}
}
task1(callback);
task2(callback);
task3(callback);
task4(callback);
Of course, there are many ways to make this kind of code more generic or reusable and any of the many async programing libraries out there should have at least one function to do this kind of thing.
Expanding upon #freakish answer, async also offers a each method, which seems especially suited for your case:
var async = require('async');
async.each(['aaa','bbb','ccc'], function(name, callback) {
conn.collection(name).drop( callback );
}, function(err) {
if( err ) { return console.log(err); }
console.log('all dropped');
});
IMHO, this makes the code both more efficient and more legible. I've taken the liberty of removing the console.log('dropped') - if you want it, use this instead:
var async = require('async');
async.each(['aaa','bbb','ccc'], function(name, callback) {
// if you really want the console.log( 'dropped' ),
// replace the 'callback' here with an anonymous function
conn.collection(name).drop( function(err) {
if( err ) { return callback(err); }
console.log('dropped');
callback()
});
}, function(err) {
if( err ) { return console.log(err); }
console.log('all dropped');
});
I do this without external libaries:
var yourArray = ['aaa','bbb','ccc'];
var counter = [];
yourArray.forEach(function(name){
conn.collection(name).drop(function(err) {
counter.push(true);
console.log('dropped');
if(counter.length === yourArray.length){
console.log('all dropped');
}
});
});
All answers are quite old. Since the beginning of 2013 Mongoose started to support promises gradually for all queries, so that would be the recommended way of structuring several async calls in the required order going forward I guess.
With deferred (another promise/deferred implementation) you can do:
// Setup 'pdrop', promise version of 'drop' method
var deferred = require('deferred');
mongoose.Collection.prototype.pdrop =
deferred.promisify(mongoose.Collection.prototype.drop);
// Drop collections:
deferred.map(['aaa','bbb','ccc'], function(name){
return conn.collection(name).pdrop()(function () {
console.log("dropped");
});
}).end(function () {
console.log("all dropped");
}, null);
If you are using Babel or such transpilers and using async/await you could do :
function onDrop() {
console.log("dropped");
}
async function dropAll( collections ) {
const drops = collections.map(col => conn.collection(col).drop(onDrop) );
await drops;
console.log("all dropped");
}

strange Mongoose behaviour - document will not save into DB

In the following code, I try to populate my dev DB with some test data. I would like to first delete all documents and then add new test ones:
var mongoose = require('mongoose')
, Plan = mongoose.model('Plan')
, Async = require('async')
Async.series([
function(callback){
// delete all records
Plan.find(function(err,docs){
for (d in docs)
Plan.remove(d, function(err) {
if (err) console.log("error removing records " + err)
});
});
callback();
},
function(callback){
var planArray = [
{title: 'Plan A', body: 'Restaurant Financial Plan'},
{title: 'Plan B', body: 'Coffeeshop Financial Plan'},
{title: 'Plan C', body: 'bar Financial Plan'}
]
var arrayLength = planArray.length;
for (var i = 0; i < arrayLength; i++) {
var p = new Plan(planArray[i])
p.save(function(err, saved){
if (err)
{console.log("error creating fixture " + err)}
else {
console.log(saved)
}
})
}
callback();
}
])
The interesting (strange) behaviour is this:
- the code runs and removes all documents but does not add the new test ones.
- no errors on the console, the console.log(saved) prints each new document to the console successfully.
- if I remove the first Async function (delete all records) - then the new docs are saved into the DB.
a mongoose quirk or my misunderstanding of async flow..?
There were a few problems. Firstly you have a for loop that is firing of async removes, but these were likely not completing before your first callback was called. Better to use Async.each instead.
Also there seems to be some function naming collision happening. So for a complete example of this see the following:
var mongoose = require('mongoose'),
Async = require('async'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/test');
var planSchema = new Schema({
title: String,
body: String
});
var Plan = mongoose.model('Plan',planSchema);
Async.series([
function(call1) {
Plan.find(function(err,docs) {
if (err)
throw err;
if ( docs.length > 0 ) {
Async.each( docs, function(d, call2) {
Plan.remove(d, function(err) {
if (err)
throw err;
console.log( "deleting: " + d );
call2();
});
});
}
});
call1();
},
function(call3) {
var planArray = [
{ title: 'Plan A', body: 'Plan A' },
{ title: 'Plan B', body: 'Plan B' }
];
var arrayLength = planArray.length;
for ( var i = 0; i < arrayLength; i++ ) {
var p = new Plan(planArray[i]);
p.save(function(err,saved) {
if (err)
throw err;
console.log( "saving: " + saved );
});
}
call3();
}
]);
My guess is the latter--a misunderstanding of async flow. Your callback on the first function is invoked before you finish finding and removing documents. So while you're still finding and removing them, you are already adding some more in the second function--but these will be found and removed by the first.
You need to call the first callback only after deleting all documents. Try putting an async.each within your Plan.find callback:
Async.series([
function(callback){
// delete all records
Plan.find(function(err, docs){
Async.each(
docs, // your array
function removeDoc(d, cb) { // iterator function
Plan.remove(d, function (err) {
if (err) console.log("error removing records " + err);
return cb();
});
},
callback // executed after the iterator is done
);
},
...
Incidentally, I believe Plan.remove({}, function(err){...}) deletes all documents--no need to iterate over each document unless of course you're doing something else.
Second function starts execution after you call callback(), i. e. before find and remove calls. You have to wait until find and remove are done and the call callback().
Have a look at queue method from async: https://github.com/caolan/async#queue

nested loops asynchronously in Node.js, next loop must start only after one gets completed

Check below algorithm...
users = getAllUsers();
for(i=0;i<users.length;i++)
{
contacts = getContactsOfUser(users[i].userId);
contactslength = contacts.length;
for(j=o;j<contactsLength;j++)
{
phones = getPhonesOfContacts(contacts[j].contactId);
contacts[j].phones = phones;
}
users[i].contacts = contacts;
}
return users;
I want to develop such same logic using node.js.
I have tried using async with foreach and concat and foreachseries functions. But all fail in the second level.
While pointer is getting contacts of one user, a value of i increases and the process is getting started for next users.
It is not waiting for the process of getting contacts & phones to complete for one user. and only after that starting the next user. I want to achieve this.
Actually, I want to get the users to object with proper
Means all the sequences are getting ruined, can anyone give me general idea how can I achieve such a series process. I am open to change my algorithm also.
In node.js you need to use asynchronous way. Your code should look something like:
var processUsesrs = function(callback) {
getAllUsers(function(err, users) {
async.forEach(users, function(user, callback) {
getContactsOfUser(users.userId, function(err, contacts) {
async.forEach(contacts, function(contact, callback) {
getPhonesOfContacts(contacts.contactId, function(err, phones) {
contact.phones = phones;
callback();
});
}, function(err) {
// All contacts are processed
user.contacts = contacts;
callback();
});
});
}, function(err) {
// All users are processed
// Here the finished result
callback(undefined, users);
});
});
};
processUsers(function(err, users) {
// users here
});
You could try this method without using async:
function getAllUserContacts(users, callback){
var index = 0;
var results = [];
var getUserContacts = function(){
getContactsOfUser(users[index].userId, function(contacts){
var index2 = 0;
var getContactsPhones = function(){
getPhonesOfContacts(contacts[index2].contactId, function(phones){
contacts[index2].phones = phones;
if(index2 === (contacts.length - 1)){
users[index].contacts = contacts;
if(index === (users.length - 1)){
callback(users)
} else {
index++;
getUserContacts();
}
}else{
index2++;
getContactsPhones();
}
});
}
getContactsPhones();
});
}
getUserContacts();
}
//calling the function
getAllUsers(function(users){
getAllUsersWithTheirContacts(users, function(usersWithContacts){
console.log(usersWithContacts);
})
})
//Asynchronous nested loop
async.eachSeries(allContact,function(item, cb){
async.eachSeries(item,function(secondItem,secondCb){
console.log(secondItem);
return secondCb();
}
return cb();
},function(){
console.log('after all process message');
});

Resources