Refactoring nested callbacks, node.js, async - node.js

function indexArticles(callback) {
fs.readdir("posts/", function(err, files) {
async.map(files, readPost, function(err, markdown) {
async.map(markdown, parse, function(err, results) {
async.sortBy(results, function(obj, callback) {
callback(err, obj.date);
}, function(err, sorted) {
callback( {"articles": sorted.reverse()} );
});
});
});
});
}
I'm trying to figure out how to make this prettier -- as you can tell I'm using caolan's async library, but I'm not sure which of the control flow structures to use. It seems like if I use async.waterfall, for example, that results in quite a bit more code, with each step having to be wrapped in an anonymous function. For example, this is just the first two lines of the nested version with waterfall:
function indexArticles(callback) {
async.waterfall([
function(callback) {
fs.readdir("posts/", function(err, files) {
callback(err, files)
})
},
function(files, callback) {
async.map(files, readPost, function(err, markdown) {
callback(err, markdown)
})
}])
}
How would you improve this?
If there were a way to partially apply arguments NOT only from the left, then I could see doing, for example,
function indexArticles(callback) {
async.waterfall([
async.apply(fs.readdir, "posts/"),
async.apply(async.map, __, readPost),
async.apply(async.map, __, parse),
// etc...
])
}

This is an interesting problem, as you need to bind arguments both to the left and to the right of your iterator functions, so neither bind/ nor bindRight (of which there are a few implementaions on StackOverflow) will work for you. There's a few options for you here:
(1) First, in your async.waterfall example, you have:
function(callback) {
fs.readdir("posts/", function(err, files) {
callback(err, files)
})
}
which is the same as:
function(callback) {
fs.readdir("posts/", callback)
}
Using Function.bind and this method, your entire function indexArticles could be written:
function indexArticles(callback) {
async.waterfall([
fs.readdir.bind(this, 'posts/'),
function(files, cb) { async.map(files, readPost, cb); },
function(text, cb) { async.map(text, parse, cb); },
function(results, cb) { async.sortBy(results, function(obj, callback) {
callback(null, obj.date);
}, cb) }
], function(err, sorted) {
callback( {"articles": sorted.reverse()} );
});
};
Which is a bit shorter.
(2) If you really want to avoid the wrapping functions, you can use a type of partial function application. First, at the top of your file (or in a module, etc), define a function called partial:
var partial = function(fn) {
var args = Array.prototype.slice.call(arguments, 1);
return function() {
var currentArg = 0;
for(var i = 0; i < args.length && currentArg < arguments.length; i++) {
if (args[i] === undefined)
args[i] = arguments[currentArg++];
}
return fn.apply(this, args);
};
}
This function takes a function and any number of arguments, and replaces undefined values in the arguments list with the actual arguments when the function is called. You would then use it like this:
function indexArticles(callback) {
async.waterfall([
fs.readdir.bind(this, 'posts/'),
partial(async.map, undefined, readPost, undefined),
partial(async.map, undefined, parse, undefined),
partial(async.sortBy, undefined, function(obj, callback) {
callback(null, obj.date);
}, undefined)
], function(err, sorted) {
callback( {"articles": sorted.reverse()} );
});
}
So, partial(async.map, undefined, readPost, undefined) returns a function that, when called by the Async library as fn(files, callback), it fills in files for the first undefined, and callback for the second undefined, ending in a call to async.map(files, readPost, callback).
(3) There is also a version of partial for Function.prototype at this StackOverflow answer, allowing you to use the syntax: async.map.partial(undefined, readPost, undefined); however, I would probably recommend against modifying Function.prototype in this way, and just use partial as a function.
In the end, it's up to you which method is the most readable and maintainable.

Looks like I have some overlap with Brandon's answer, but here's my take:
var async = require("async")
//dummy function
function passThrough(arg, callback){
callback(null, arg)
}
//your code rewritten to only call the dummy.
//same structure, didn't want to think about files and markdown
function indexArticles(callback) {
passThrough("posts/", function(err, files) {
async.map(files, passThrough, function(err, markdown) {
async.map(markdown, passThrough,
function(err, results) {
async.sortBy(results, function(obj, callback) {
callback(err, obj);
},
function(err, sorted) {
callback( {"articles": sorted.reverse()} );
});
});
});
});
}
indexArticles(console.log)
//version of apply that calls
//fn(arg, arg, appliedArg, apliedArg, callback)
function coolerApply(fn) {
var args = Array.prototype.slice.call(arguments, 1);
return function () {
var callback = Array.prototype.slice.call(arguments, -1)
var otherArgs = Array.prototype.slice.call(arguments, 0, -1)
return fn.apply(
null, otherArgs.concat(args).concat(callback)
);
};
};
//my version of your code that uses coolerAppl
function indexArticles2(callback){
async.waterfall([
async.apply(passThrough, "posts/"),
coolerApply(async.map, passThrough),
coolerApply(async.map, passThrough),
coolerApply(async.sortBy, function(obj, callback){callback(null,obj)})
],
function(err, sorted){
callback({"articles": sorted.reverse()})
})
}
//does the same thing as indexArticles!
indexArticles2(console.log)

Here's what I've ended up with so far.
function indexArticles(callback) {
var flow = [
async.apply(fs.readdir, "posts/"),
function(data, callback) { async.map(data, readPost, callback); },
function sortByDate(parsed, callback) {
var iterator = function(obj, callback) {
if (obj.date) { callback(null, obj.date); }
else { callback("Article has no date.") }
}
// Note that this sorts in reverse lexicographical order!
async.sortBy(parsed, iterator,
function(err, sorted) { callback(err, {"articles": sorted.reverse()} ); }
);
}
];
async.waterfall(flow, async.apply(callback))
}

I've recently created a simple abstraction named WaitFor to call async functions in sync mode (based on Fibers): https://github.com/luciotato/waitfor
I've not tested it with the async package, but it should work. If you run into problems, contact me.
Using wait.for and async your code will be:
var wait = require('waitfor');
var async = require('async');
function indexArticles(callback) {
var files = wait.for(fs.readdir,"posts/");
var markdown = wait.for(async.map, files, readPost);
var results = wait.for(async.map, markdown, parse);
var sorted = wait.for(async.sortBy, results, function(obj, callback) {
callback(null, obj.date);
});
callback( null, {"articles": sorted.reverse()} );
}
to call your fn (async-mode):
//execute in a fiber
wait.launchFiber(indexArticles,function(err,data){
// do something with err,data
});
to call your fn (sync-mode):
//execute in a fiber
function handleRequest(req,res){
try{
...
data = wait.for(indexArticles); //call indexArticles and wait for results
// do something with data
res.end(data.toString());
}
catch(err){
// handle errors
}
}
// express framework
app.get('/posts', function(req, res) {
// handle request in a Fiber, keep node spinning
wait.launchFiber(handleRequest,req,res);
});

Related

Node async waterfall callback was already called

I'm trying to read some files from dir with async.waterfall, it seems to me that I'm doing stuff right, but I get the specified error and the readData function is never called. What's wrong?
var fs = require("fs");
var async = require("async");
var folder = "./files/";
try {
async.waterfall([
function readDir(cb) {
fs.readdir(folder, function(err, files) {
cb(err, files);
});
},
function loopFiles(files, cb) {
files.forEach(function(fn) {
console.log("loop " + fn);
cb(null, fn);
});
},
function check(fn, cb) {
console.log("check "+fn);
fs.stat(folder + fn, function(err, stats) {
console.log(stats.isFile());
cb(err, stats, fn);
});
},
function readData(stats, fn, cb) {
console.log("read "+fn);
if (stats.isFile()) {
fs.readFile(folder + fn, "utf-8", function(err, data) {
cb(err, data);
});
}
}
], function(err, result) {
if (err) {
throw err;
}
console.log(result);
});
} catch (err) {
console.log(err);
}
The problem is that you're calling cb(null, fn) in loopFiles() multiple times if files.length > 1. You will probably need to perform a separate async.waterfall() or use some other async.* method for each file.
One other problem is in readData() where you aren't calling cb() in the case that stats.isFile() evaluates to false.

Need to call two apis using node,js asynchronously and aggregate the response from both the apis

The code I wrote so far is as below.
I don't need the whole response but just part of it.
var request = require('request');
var async = require('async');
var asyncTasks = [];
var install;
async.series([
function (callback) {
setTimeout(function () {
request('URL', function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body); // Show the HTML for the Google homepage.
}
});
}, 5000);
},
function (callback) {
setTimeout(function () {
request('URL', function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body); // Show the HTML for the Google homepage.
}
});
}, 5000);
}
],
function (error, results) {
console.log(results);
});
One approach to do the above concurrently would be to use async.parallel - of the form of:
async.parallel([
function(){ ... },
function(){ ... }
], callback);
Another approach is to use a Promises library - BlueBird or Q are good choices.
Q.All is of the form of:
return Q.all([
promise1,
promise2
]).spread(function (resultFromPromise1, resultFromPromise2) {
// do something with the results...
});
You could use one of these approaches to parallelise the two calls. The outputs of each will give you an array containing the results of each call respectively.
Here is a simple illustration of each approach:
Using Async.js
var async = require('async');
var task = function (cb, count) {
setTimeout(function () {
cb(null, "complete: " + count);
}, 1000);
};
async.parallel([
function (cb) {
task(cb, 'one');
},
function (cb) {
task(cb, 'two');
}
], function (err, results) {
console.log(results);
//[ 'complete: one', 'complete: two' ]
});
Using Q:
var Q = require('q');
function task1(cb, count) {
var deferred = Q.defer();
setTimeout(function () {
return deferred.resolve(cb(null, count));
}, 1000);
return deferred.promise;
}
var myCb = function (err, count) {
return "complete: " + count;
};
Q.all([task1(myCb, 'one'), task1(myCb, 'two')])
.then(function (results) {
console.log(results);
//[ 'complete: one', 'complete: two' ]
});
Let me know if you are unclear.
Promises are there to help you out in such a case.
I would prefer to use 'Q' library.
I have modified your code to use Q library
var Q = require('q');
var request = require('request');
function makeCall() {
Q.all([req1(), req2()])
.spread(function (res1, res2) {
// This block is executed once all the functions( Provided in Q.all() ) are finished its execution.
// Use responses from called functions
}, function (err) {
// Error, If any
});
}
function req1() {
var defer = Q.defer();
var url = ''; // Specify URL
var options = {
method: 'get', // Method to use
url: url
}
request(options, function (err, res, body) {
if (err) {
return defer.reject(err);
}
return defer.resolve(body);
});
return defer.promise;
}
function req2() {
var defer = Q.defer();
var url = ''; // Specify URL
var options = {
method: 'get', // Method to use
url: url
}
request(options, function (err, res, body) {
if (err) {
return defer.reject(err);
}
return defer.resolve(body);
});
return defer.promise;
}
You can find docs for Q library here : Q docs

Async Waterfall function only call the first callback

I'm trying to play a bit with node and async waterfall function. This is my code:
var async = require('async');
var waterfall = function (req, res) {
async.waterfall([
_function1(req),
_function2,
_function3
], function (error, success) {
if (error) { alert('Something is wrong!'); }
console.log("success");
return alert('Done!');
});
};
function _function1 (req) {
return function (callback) {
var something = req.body;
console.log("first");
callback (null, something);
}
}
function _function2 (something, callback) {
return function (callback) {
console.log("second");
var somethingelse = function () { };
callback (err, somethingelse);
}
}
function _function3 (something, callback) {
return function (callback) {
console.log("third");
var somethingmore = function () { };
callback (err, somethingmore);
}
}
waterfall(function(){}, function(){});
But in the output I get only 'first'. Why another callbacks are not called?
You shouldn't return a function from _function2 and _function3. The reason it's being done for _function1 is to pass req in (which can be done in better ways, see below):
function _function2 (something, callback) {
console.log("second");
var somethingelse = function () { };
callback (err, somethingelse);
}
function _function3 (something, callback) {
console.log("third");
var somethingmore = function () { };
callback (err, somethingmore);
}
To get req into the waterfall, you could use this:
async.waterfall([
function(callback) { return callback(null, req); },
_function1,
_function2,
_function3
], ...);
Or (which I would prefer):
async.waterfall([
_function1.bind(this, req),
_function2,
_function3
], ...);
In both cases, you wouldn't need to return a function from _function1 either:
function _function1 (req, callback) {
var something = req.body;
console.log("first");
callback (null, something);
}

Call same function many times and process combined result set

I have a requirement to make several API requests and then do some processing on the combines result sets. In the example below, you can see that 3 requests are made (to /create) by duplicating the same request code however I would like to be able to specify how many to make. For example, I may wish to run the same API call 50 times.
How can I make n calls without duplicating the API call function n times?
async.parallel([
function(callback){
request.post('http://localhost:3000/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err, null);
}
callback(null, res.body.id);
});
},
function(callback){
request.post('http://localhost:3000/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err, null);
}
callback(null, res.body.id);
});
},
function(callback){
request.post('http://localhost:3000/api/store/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err, null);
}
callback(null, res.body.id);
});
}
],
function(err, results){
if (err) {
console.log(err);
}
// do stuff with results
});
First, wrap the code that you want to call many times in a function:
var doRequest = function (callback) {
request.post('http://localhost:3000/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err);
}
callback(null, res.body.id);
});
}
Then, use the async.times function:
async.times(50, function (n, next) {
doRequest(function (err, result) {
next(err, result);
});
}, function (error, results) {
// do something with your results
}
Create an array with as many references to the function as you need tasks in your workload. Then pass them to async.parallel. For example:
var async = require("async");
var slowone = function (callback) {
setTimeout(function () {
callback(null, 1);
}, 1000);
};
async.parallel(
dd(slowone, 100),
function (err, r) {
console.log(JSON.stringify(r));
}
);
// Returns an array with count instances of value.
function dd(value, count) {
var result = [];
for (var i=0; i<count; i++) {
result.push(value);
}
return result;
}
Note again that there is only one instance of the slow running function, in spite of there being many references to it.

nodejs async execute callbacks in series

I have a situation like:
function smth(data) {
// save data to db.
Object.findOne({ _id: ceva['id']}, function(obj) {
obj.save();
});
}
This function is called from various async calls. There is a race condition where the second findOne call runs before a previous save() runs.
Is there a way to work around this? Maybe using the async library to run things in series?
You can make use one of async control flows to ensure every iteration of smth() happens in order.
If you're not in favor of using a flow control library, you can easily achieve series execution of each event. Consider following code snippet:
function smth(data, cb) {
// save data to db.
findOne({
id: data.id
}, function (err, obj) {
if (!err && obj) {
savedb(obj, cb);
} else {
cb(err);
}
});
}
function findOne(filter, cb) {
// simulate find
setTimeout(function () {
cb(null, {
id: filter.id,
name: 'test',
role: 'test'
});
}, 500);
}
function savedb(obj, cb) {
//simulate db save
setTimeout(function () {
cb(null, obj);
}, 500);
}
// iterations count
var count = parseInt(process.argv[2], 10) || 3;
(function iterate(i) {
console.log(i);
if (i === count) {
// iterations complete
process.exit(1);
}
var data = {
id: 123 + i
};
smth(data, function (err, res) {
console.log(err || res);
iterate(++i);
});
})(0);
//make this follow async conventions with callback argument last
function smth(data, callback) {
//pseudocode database API here
db.save(data, function (error) {
if (error) {
callback(error);
return;
}
Object.findOne({ _id: ceva['id']}, function(obj) {
obj.save(callback);
});
});
}
That's the basic callback approach. You can use async.js if you like to clean it up a bit or study callbackhell.com for more ways to avoid the nested functions.

Resources