Async foreach in nodejs - node.js

In node.js, I know array.forEach is blocking.
I need to loop through an array and build a string like this:
var rarray = ['test', 'test2'];
var rstring = '';
rarray.forEach(function(e, i){
rstring = rstring + i + e;
});
return rstring;
how do i do it asynchronously?

In such a case, you probably don't need to transform this code into its async version.
Hypothetically, or for very very large arrays, heres's a way to transform your code into something asynchronous, or at least something that will get back to the main event loop periodically.
var rarray = ['test', 'test2'];
var rstring = '';
var max = rarray.length;
var current = 0;
process.nextTick(function next() {
rstring = rstring + rarray[current++];
if (current >= max) {
console.log("done", rstring);
} else {
process.nextTick(next);
}
});
In practice you'd wrap this in a function, and replace console.log by calling the completion callback.

I might add that what you are trying to do is actually called reduce.
you can write it this way (doc)
var res = array.reduce(function(prev, current, index) {
return prev + index + current ;
}, '');
doing it asynchronously could be done this way
var array = ['one', 'two'];
function reduceAsync(collection, initial, process, callback) {
var i = 0;
var res = initial;
function DO(err, result) {
if(err) return callback(err);
if(i > collection.length) return callback(null, res);
var index = i++;
var value = collection[index];
process(res, value, index, collection, DO);
}
DO(null, res);
}
reduceAsync(array, '', function(previous, current, index, collection, callback) {
setTimeout(function() {
callback(null, previous + index + current);
}, 10); // wait 10 ms
}, function finalResult(err, result) {
console.log(result);
})
or, you know, you could use async.reduce

Pascal's answer is essentially a form of cooperative multithreading (See Wikipedia: Thread).
It's hard to say without measuring, but I would guess that preemptive multithreading would have better throughput for things of this nature, because the compiler has the opportunity to do loop optimizations (not sure that would happen specifically with the code above), and the operating system is likely better at deciding exactly how often to task-switch. It looks like you can do Web Worker Threads in node.js.
Anybody have benchmarks?

If you want to use the async module with the code above, you would still end up with synchronous code. The async module allows you to avoid callback hell and manage your callbacks. It doesn't make something synchronous async. To achieve that you need to use process.nextTick like in Pascal's answer.
If you were doing some extra async processing on each item in your array and you want to aggregate the result from each operation while preserving order then you could use async.eachSeries like this:
var async = require('async');
var rarray = ['test', 'test2'];
var rstring = '', i = 0;
async.eachSeries(rarray, function(item, callback){
rstring += (i++) + item;
callback();
}, function(err){
console.log(rstring);
}
);
If you are not fussed about the order of things, then you could use async.each and it will execute your async processing function in parallel.

Related

Can Node.js stream be made as coroutine?

Is there a way to make Node.js stream as coroutine.
Example
a Fibonacci numbers stream.
fibonacci.on('data', cb);
//The callback (cb) is like
function cb(data)
{
//something done with data here ...
}
Expectation
function* fibonacciGenerator()
{
fibonacci.on('data', cb);
//Don't know what has to be done further...
};
var fibGen = fibonacciGenerator();
fibGen.next().value(cb);
fibGen.next().value(cb);
fibGen.next().value(cb);
.
.
.
Take desired numbers from the generator. Here Fibonacci number series is just an example, in reality the stream could be of anything a file, mongodb query result, etc.
Maybe something like this
Make the 'stream.on' function as a generator.
Place yield inside the callback function.
Obtain generator object.
Call next and take the next value in stream.
Is it at-least possible if yes how and if not why? Maybe a dumb question :)
If you don't want to use a transpiler (e.g. Babel) or wait until async/await make it to Node.js, you can implement it yourself using generators and promises.
The downside is that your code must live inside a generator.
First, you can make a helper that receives a stream and returns a function that, when called, returns a promise for the next "event" of the stream (e.g. data).
function streamToPromises(stream) {
return function() {
if (stream.isPaused()) {
stream.resume();
}
return new Promise(function(resolve) {
stream.once('data', function() {
resolve.apply(stream, arguments);
stream.pause();
});
});
}
}
It pauses the stream when you're not using it, and resumes it when you ask it the next value.
Next, you have a helper that receives a generator as an argument, and every time it yields a promise, it resolves it and passes its result back to the generator.
function run(fn) {
var gen = fn();
var promise = gen.next().value;
var tick = function() {
promise.then(function() {
promise = gen.next.apply(gen, arguments).value;
}).catch(function(err) {
// TODO: Handle error.
}).then(function() {
tick();
});
}
tick();
}
Finally, you would do your own logic inside a generator, and run it with the run helper, like this:
run(function*() {
var nextFib = streamToPromises(fibonacci);
var n;
n = yield nextFib();
console.log(n);
n = yield nextFib();
console.log(n);
});
Your own generator will yield promises, pausing its execution and passing the control to the run function.
The run function will resolve the promise and pass its value back to your own generator.
That's the gist of it. You'd need to modify streamToPromises to check for other events as well (e.g. end or error).
class FibonacciGeneratorReader extends Readable {
_isDone = false;
_fibCount = null;
_gen = function *() {
let prev = 0, curr = 1, count = 1;
while (this._fibCount === -1 || count++ < this._fibCount) {
yield curr;
[prev, curr] = [curr, prev + curr];
}
return curr;
}.bind(this)();
constructor(fibCount) {
super({
objectMode: true,
read: size => {
if (this._isDone) {
this.push(null);
} else {
let fib = this._gen.next();
this._isDone = fib.done;
this.push(fib.value.toString() + '\n');
}
}
});
this._fibCount = fibCount || -1;
}
}
new FibonacciGeneratorReader(10).pipe(process.stdout);
Output should be:
1
1
2
3
5
8
13
21
34
55

Bluebird async series call

I have this below code, but as soon as widgetsAddCall is added into array, it gets executed, and promise.each is of no use.
Function widgetsAddCall is making async request to API server and returns bluebird promisified request. I want that API call is made one after another, so that each call sends chunk of data one after another.
var chunkCnt = Math.ceil(widgetsIds.length/4000);
var responseT = Array();
var Promise = require('bluebird');
for(var cntTemp =0 ; cntTemp<chunkCnt;cntTemp++){
var tempWidgs = widgetsIds.slice(cntTemp,cntTemp+4000);
var query = {
id: tempWidgs.join(',')
};
responseT.push(widgetsAddCall(tempWidgs,query,campRemoteId,campaign));
}
return Promise.each(responseT,function(responses) {
// Use the responses here
return getWidgets(campRemoteId,campaign).then((ids) => {
var toRemove = [];
for(var id of ids){
if(widgetsIds.indexOf(id)===-1){
toRemove.push(id);
}
}
if(toRemove.length) {
return removeWidgets(campaign, campRemoteId, toRemove);
}
});
})
I want that API call is made one after another, so that each call
sends chunk of data one after another.
The simplest way I can think of to serialize all your calls is to prebuild your chunks into an array and then use Bluebird's Promise.mapSeries() to serially iterate through the array:
var Promise = require('bluebird');
// pre-build chunks into an array
var chunks = [];
var chunkCnt = Math.ceil(widgetsIds.length / 4000);
for (var chunkIndex = 0; chunkIndex < chunkCnt; chunkIndex++) {
chunks.push(widgetsIds.slice(cntIndex,cntIndex + 4000));
}
// now serially iterate the array
Promise.mapSeries(chunks, function(item) {
return widgetsAddCall(item, {id: item.join(',')}, campRemoteId, campaign);
}).then(function(results) {
// now process the results of widgetsAddCall() here
});
FYI, your original use of Promise.each() was not making any sense to me because you were iterating an array of promises, but then not using any info from the iteration. There seemed to be no point to the iteration. Plus, you weren't properly serializing your calls to widgetsAddCall() anyway since you were launching all those calls in parallel.

nodejs event loop, how to use nextTick correctly

I'm trying to follow exercises from [node school][1]. There is an exercise where one needs to collect three streams and only print the output when all three streams are done. Without using any 3rd party module.
Can somebody please point out why my approach is not working? It gets stuck in an infinite loop:
var http = require('http');
var concat = require('concat-stream');
var count = 3;
var str1, str2, str3;
http.get(process.argv[2], function (response) {
response.pipe(concat(function(data) {
str1 = data.toString();
--count;
}));
});
http.get(process.argv[3], function (response) {
response.pipe(concat(function(data) {
str2 = data.toString();
--count;
}));
});
http.get(process.argv[4], function (response) {
response.pipe(concat(function(data) {
str3 = data.toString();
--count;
}));
});
function foo() {
if (count > 0) {
process.nextTick(foo);
} else {
console.log(str1);
console.log(str2);
console.log(str3);
}
};
foo();
http.get() callbacks can't run until the next tick of the event loop or later. process.nextTick() puts something right at the front of the event loop, ahead of the callbacks that are already there.
Your recursive routine never stops recursing because it's waiting for those callbacks to decrement the counter but they never fire.
It might work if you swap out process.nextTick() for setImmediate(). (I didn't test that, and if you do, hey, let me know if it works or not.)
But I would say just get rid of the recursion altogether. It's not needed. You can (for example) do something like this instead:
var count = 0;
var httpGet = function (index) {
http.get(process.argv[2 + index], function (response) {
// Do stuff here
// This next bit will probably end up inside the callback provided to concat
count++;
if (count === 3) {
// Print results here
}
})
};
for (var i = 0; i < 3; i++) {
httpGet(i);
}

Nodejs: Resolving promises with generator function

I know there are a lot of good examples over the web and I read a lot of them, but currently I'm stucked with resolving promises with the new functionality of generators in nodejs 0.11.x.
For e.g. I have the following function:
SolrBaseDomain.prototype.promisedQuery = function(query, callback) {
var solrClient = solr.createClient(this.configuration);
var defer = Q.defer();
solrClient.search(query, function(err,obj){
if (!err) {
if (obj.response.numFound > 0) {
defer.resolve(obj.response.docs);
} else {
defer.resolve(null);
}
} else {
defer.reject(err);
}
});
var promise = defer.promise;
return Q.async(function* (){
var result = yield promise;
return result;
});
};
I expected that every call to this method will wait until the promise is fullfilled and the return-statement gives back the result of the promise.
But currently it seems that instead the code inside "Q.async..." will not be executed or the async call arrives after the return statement of the method was executed.
It's strange, in every example I know, this is one of the recommended ways in order to wait for async calls in nodejs but currently it does not work for me.
I've tried a lot of different variations of the above example, but the result is everytime the same, I get not back a valid result.
I have nodejs installed in version 0.11.10 and the --harmony-flag is set, when the code is executede.
Can anyone point me to right direction? I'm wondering if I oversee something ... :)
Thanks for your feedback.
Best regards
Udo
I expected that every call to this method will wait until the promise is fullfilled and the return-statement gives back the result of the promise.
No. Generators will not make functions synchronous, you cannot (and don't want to) block while waiting for a result. When calling a generator function and running sequentially through the async steps that it yields, the result you will get back in the end is still asynchronous - and therefore a promise. Only inside of the generator, your code can use synchronous control flow and yield.
This means that the (then-) callback-based code
SolrBaseDomain.prototype.promisedQuery = function(query) {
var promise = Q.ninvoke(solr.createClient(this.configuration), "search", query);
return promise.then(function(obj) {
if (obj.response.numFound > 0) {
return obj.response.docs;
} else {
return null;
}
});
};
becomes
SolrBaseDomain.prototype.promisedQuery = Q.async(function* (query) {
var promise = Q.ninvoke(solr.createClient(this.configuration), "search", query);
var obj = yield promise;
// ^^^^^
if (obj.response.numFound > 0) {
return obj.response.docs;
} else {
return null;
}
});
Try this
SolrBaseDomain.prototype.promisedQuery = Q.async(function*(query) {
var solrClient = solr.createClient(this.configuration);
var obj = yield Q.ninvoke(solrClient, "search", query);
return obj.response.numFound > 0 ? obj.response.docs : null;
});
This does the same thing for promises as this does for callbacks:
SolrBaseDomain.prototype.query = function (query, callback) {
var solrClient = solr.createClient(this.configuration);
solrClient.search(query, function(err, obj) {
if (err) return callback(err);
callback(null, obj.response.numFound > 0 ? obj.response.docs : null);
});
};
Therefore if the first return a promise that resolves to undefined so will the callback version call the callback with undefined.
according to your suggestions, my code looks now like this:
...
SolrBaseDomain.prototype.query = Q.async(function* (query) {
var solrClient = solr.createClient(this.configuration);
var obj = yield Q.ninvoke(solrClient, "search", query);
return obj.response.numFound > 0 ? obj.response.docs : null;
});
...
I share the above query-function over all data access layers in order to have a central method which is querying the different indexes in an asynchronous way.
For e.g. in the domain data access layer, the code which deals with that function looks like this:
SolrHostDomain.prototype.getByName = Q.async(function* (domain) {
var queryObject = {
"domain": domain
};
var query = this.getQuery("byName", queryObject);
var docs = yield this.query(query);
var domain = null;
if (docs != null && docs.length > 0) {
domain = this.dataMapper.merge(docs[0]);
}
return domain;});
Currently I'm not sure if the generator in the "getByName"-function is necessary, but it seems to work. Dealing with promises is some unclear concept for me, since I'm new to nodejs.
So maybe, if you can help me on that topic and point me in the right direction, this would be helpfull.
The main question for me is, how can I ensure, that a synchronous method can call an asynchronous method and get back not a promise but the final result of this promise.
I've searched a long time, but I could not find a good documentation which describes the use of generator functions or promises in conjunction with synchronous calls. Even examples are focusing only of using the mechanism but not working together with synchronous function.
Best regards and many thanks for your help
Udo
Got it!!!
After a few trial and errors, I think I got it now and I have a working solutions:
Query function:
SolrBaseDomain.prototype.query = Q.async(function* (query) {
var solrClient = solr.createClient(this.configuration);
var obj = yield Q.ninvoke(solrClient, "search", query);
return obj.response.numFound > 0 ? obj.response.docs : null;
});
Calling method:
SolrHostDomain.prototype.getByName = function(domain) {
var queryObject = {
"domain": domain
};
var query = this.getQuery("byName", queryObject);
var docsPromise = this.query(query);
var _self = this;
return docsPromise.then(function(docs) {
var domain = null;
if (docs != null && docs.length > 0) {
domain = _self.dataMapper.merge(docs[0]);
}
return domain;
});
};
The solution was to understand, that the "query"-method still returns a promise instead of the concrete result even if yield is used.
So I have to add every code which is working on the result of the promise within the "then"-functions (or "done" if no other caller up in the calling hierarchy of methods will follow).
After the settlement of the promise, each code which is set within the "then"-functions will be processed.
BR
Udo

How to chain a variable number of promises in Q, in order?

I have seen Chaining an arbitrary number of promises in Q ; my question is different.
How can I make a variable number of calls, each of which returns asynchronously, in order?
The scenario is a set of HTTP requests, the number and type of which is determined by the results of the first HTTP request.
I'd like to do this simply.
I have also seen this answer which suggests something like this:
var q = require('q'),
itemsToProcess = ["one", "two", "three", "four", "five"];
function getDeferredResult(prevResult) {
return (function (someResult) {
var deferred = q.defer();
// any async function (setTimeout for now will do, $.ajax() later)
setTimeout(function () {
var nextResult = (someResult || "Initial_Blank_Value ") + ".." + itemsToProcess[0];
itemsToProcess = itemsToProcess.splice(1);
console.log("tick", nextResult, "Array:", itemsToProcess);
deferred.resolve(nextResult);
}, 600);
return deferred.promise;
}(prevResult));
}
var chain = q.resolve("start");
for (var i = itemsToProcess.length; i > 0; i--) {
chain = chain.then(getDeferredResult);
}
...but it seems awkward to loop through the itemsToProcess in that way. Or to define a new function called "loop" that abstracts the recursion. What's a better way?
There's a nice clean way to to this with [].reduce.
var chain = itemsToProcess.reduce(function (previous, item) {
return previous.then(function (previousValue) {
// do what you want with previous value
// return your async operation
return Q.delay(100);
})
}, Q.resolve(/* set the first "previousValue" here */));
chain.then(function (lastResult) {
// ...
});
reduce iterates through the array, passing in the returned value of the previous iteration. In this case you're returning promises, and so each time you are chaining a then. You provide an initial promise (as you did with q.resolve("start")) to kick things off.
At first it can take a while to wrap your head around what's going on here but if you take a moment to work through it then it's an easy pattern to use anywhere, without having to set up any machinery.
I like this way better:
var q = require('q'),
itemsToProcess = ["one", "two", "three", "four", "five"];
function getDeferredResult(a) {
return (function (items) {
var deferred;
// end
if (items.length === 0) {
return q.resolve(true);
}
deferred = q.defer();
// any async function (setTimeout for now will do, $.ajax() later)
setTimeout(function () {
var a = items[0];
console.log(a);
// pop one item off the array of workitems
deferred.resolve(items.splice(1));
}, 600);
return deferred.promise.then(getDeferredResult);
}(a));
}
q.resolve(itemsToProcess)
.then(getDeferredResult);
The key here is to call .then() on the deferred.promise with a spliced version of the array of workitems. This then gets run after the initial deferred promise resolves, which is in the fn for the setTimeout. In a more realistic scenario, the deferred promise would get resolved in the http client callback.
The initial q.resolve(itemsToProcess) kicks things off by passing in the work items to the first call of the work fn.
I added this in hopes it would help others.
Here is a concept of a state machine defined with Q.
Suppose you have the HTTP function defined, so it returns a Q promise object:
var Q_http = function (url, options) {
return Q.when($.ajax(url, options));
}
You can define a recursive function nextState as following:
var states = [...]; // an array of states in the system.
// this is a state machine to control what url to get data from
// at the current state
function nextState(current) {
if (is_terminal_state(current))
return Q(true);
return Q_http(current.url, current.data).then(function (result) {
var next = process(current, result);
return nextState(next);
});
}
Where function process(current, result) is a function to find out what the next step would be according to the current state and the result from the HTTP call.
When you use it, use it like:
nextState(initial).then(function () {
// all requests are successful.
}, function (reason) {
// for some unexpected reason the request sequence fails in the middle.
});
I propose another solutions, which looks easier to understand to me.
You do the same as you would when chaining promises directly:
promise.then(doSomethingFunction).then(doAnotherThingFunction);
If we put that into a loop, we get this:
var chain = Q.when();
for(...) {
chain = chain.then(functionToCall.bind(this, arg1, arg2));
};
chain.then(function() {
console.log("whole chain resolved");
});
var functionToCall = function(arg1, arg2, resultFromPreviousPromise) {
}
We use function currying to use multiple arguments. In our example
functionToCall.bind(this, arg1, arg2) will return a function with one argument: functionToCall(resultFromPreviousPromise)
You do not need to use the result from the previous promise.

Resources