underscore: async each loop waiting "inside" - node.js

How can i wait for callbacks inside a loop changing a variable, and still using asynchronicity?
The 2nd example is using async, in this case i don't know how to add a 2nd parameter sum to wait, in order to avoid a global var sum. Called like wait(sum,value); with a return value sum
wait is a representation for a complex function, which i use in my real-problem, so it can't be rewritten into "inline"-code and has to stay "function".
Example1:
var _ = require('underscore');
var arr = [1,2,3,4,5,6,7];
var sum = 0;
function wait(item,callback) {
setTimeout(function() {
callback(item);
}, Math.ceil(Math.random()*1000));
}
var done = _.after(arr.length,function(value) {
console.log('sum ='+value);
})
_.each(arr,function(itm) {
wait(itm,function(value) {
console.log('waiting... '+value);
sum = sum + value;
})
// Please wait for the callback
console.log(itm);
done(sum);
});
Example2:
function asyncExample2() {
var async = require('async');
var arr = [1,2,3,4,5,6,7];
function otherWait(item, callback) {
setTimeout(function() {
callback(item); // call this when you're done with whatever you're doing
}, Math.ceil(Math.random()*1000));
}
function wait(item, callback) {
setTimeout(function() {
otherWait(item,function() {
console.log(item);
});
callback(item);
}, Math.ceil(Math.random()*1000));
}
function done() { console.log("sum = "+sum);};
var sum = 0;
async.forEach(arr, wait, done);
}
Desired Call:
sum = wait(sum,item)

The easiest way to do this is putting done in the function wait. It makes done called only after the last callback is executed.
var arr = [1,2,3,4,5,6,7];
var sum = 0;
function wait(item,callback) {
setTimeout(function() {
callback(item);
done(sum);
}, Math.ceil(Math.random()*1000));
}
var done = _.after(arr.length,function(value) {
console.log('sum ='+value);
})
_.each(arr,function(itm) {
wait(itm,function(value) {
console.log('waiting... '+value);
sum = sum + value;
})
// Please wait for the callback
console.log(itm);
//done(sum);
});
Output:

Underscore is entirely synchronous so done(sum) would execute before wait has finished executing.
For asynchronous operation don't use underscore.
Something simple like this should do what you want:
var sum = 0;
var waitNext = function(pos) {
wait(arr[pos], function(value)) {
if(pos < arr.length)
{
console.log('waiting... '+value);
sum += value;
waitNext(pos+1);
}
else
{
done(sum);
}
}
}
waitNext(0);
You could of course avoid using waitNext and just modify wait but this will work if wait is not your code.
Not sure if you'd want sum += value inside the if or just before it since now there's an unnecessary waitNext call you could remove by tweaking the if condition's order.

You can really do a recursion version without global variable.
var arr = [1,2,3,4,5,6,7];
function wait(arr, max, callback, sum, done) {
var item = arr.shift();
setTimeout(function(){
if(item) {
sum[0] = callback(item, sum[0]);
sum[1]++;
}
else
sum[1] === max ? done(sum[0]) : wait(arr,max, callback, sum, done);
}, Math.random()*1000);
item && wait(arr, max,callback, sum, done);
}
function cb(item, acc) {
console.log('waiting....' + item);
return item + acc;
}
function done(sum) {
console.log(sum);
}
wait(arr, arr.length,cb, [0, 0], done);
Output

I am assuming that you are using setTimeout to implement the asynchronus behaviour, instead you can use a library like async which handles much more easily for you.
Ex of using async's each
var async = require('async');
var arr = [1,2,3,4,5,6,7];
var sum = 0;
async.forEach(arr, function(item, cb){
sum = sum + item;
cb();
}, function(err){
console.log(sum);
});

Related

Return value in function from a promise block

I'm trying to write a function (using WebdriverJS lib) that iterates through a list of elements, checks the names and build an xpath locator that corresponds to that name. I simplified xpath locators here, so don't pay attention.
The issues I'm facing here are:
1) Calling this function returns undefined. As far as I understand, this is because the return statement is not in its place, but:
2) Placing it in the correct place where a synchronous code would normally work, doesn't work for async promises, hence calling this function will return the same undefined, but because the return statement fires before the "driver.findElement" statement.
How should I use the return statement here, if I want to get createdTask variable as a result of calling this function?
var findCreatedTask = function() {
var createdTask;
driver.findElements(By.xpath("//div[#id='Tasks_Tab']")).then(function(tasks) {
for (var index = 1; index <= tasks.length; index++) {
driver.findElement(By.xpath("//div[#id='Tasks_Tab'][" + index + "]//div[#class='task-title']")).getText().then(function(taskTitle) {
if (taskTitle == "testName") {
createdTask = "//div[#id='Tasks_Tab'][" + index + "]";
return createdTask;
}
});
}
});
};
You could first get all the texts with promise.map and then get the position with indexOf :
var map = webdriver.promise.map;
var findCreatedTask = function() {
var elems = driver.findElements(By.xpath("//div[#id='Tasks_Tab']//div[#class='task-title']"));
return map(elems, elem => elem.getText()).then(titles => {
var position = titles.indexOf("testName") + 1;
return "//div[#id='Tasks_Tab'][" + position + "]";
});
}
Here you go, I cleaned it up a bit. This will actually return an error if one is experienced in the nested promises:
var findCreatedTask = function() {
var Promise = require('bluebird');
var createdTask;
return driver.findElements(By.xpath("//div[#id='Tasks_Tab']"))
.then(function(tasks) {
return Promise.map(tasks, function(task){
return driver.findElement(By.xpath("//div[#id='Tasks_Tab'][" + index + "]//div[#class='task-title']")).getText()
}).then(function(taskTitles){
for (let i = 0; i < taskTitles.length; i++){
if(taskTitles[i] === 'testName'){
createdTask = "//div[#id='Tasks_Tab'][" + i + "]";
return createdTask;
}
}
});
});
};
You call it using
findCreatedTask.then(function(res){
//do your thing
}).catch(function(err){
console.error(err.stack);
});
You will not be able to return the value that you want from this function because when this function returns, the value is not defined yet.
This is not a problem that you try to return the value in the wrong place, but that you try to access it at the wrong time.
You have two options: you can either return a promise from this function, or this function can take a callback that would be called when the value is available.
Examples
This is not tested but should give you an idea on how to think about it.
Promise
Version with promise:
var findCreatedTask = function (callback) {
var createdTask;
return new Promise(function (resolve, reject) {
driver.findElements(By.xpath("//div[#id='Tasks_Tab']")).then(function(tasks) {
for (let index = 1; index <= tasks.length && !createdTask; index++) {
driver.findElement(By.xpath("//div[#id='Tasks_Tab'][" + index + "]//div[#class='task-title']")).getText().then(function(taskTitle) {
if (taskTitle == "testName") {
createdTask = "//div[#id='Tasks_Tab'][" + index + "]";
resolve(createdTask);
}
});
}
});
});
};
and then you call it with:
findCreatedTask().then(function (createdTask) {
// you have your createdTask here
});
Callback
Version with callback:
var findCreatedTask = function (callback) {
var createdTask;
driver.findElements(By.xpath("//div[#id='Tasks_Tab']")).then(function(tasks) {
for (let index = 1; index <= tasks.length && !createdTask; index++) {
driver.findElement(By.xpath("//div[#id='Tasks_Tab'][" + index + "]//div[#class='task-title']")).getText().then(function(taskTitle) {
if (taskTitle == "testName") {
createdTask = "//div[#id='Tasks_Tab'][" + index + "]";
callback(null, createdTask);
}
});
}
});
};
and then you call it with:
findCreatedTask(function (err, createdTask) {
// you have your createdTask here
});
More info
You can read some other answers that explain how promises and callbacks work if you're interested to know ore about it:
A detailed explanation on how to use callbacks and promises
Explanation on how to use promises in complex request handlers
An explanation of what a promise really is, on the example of AJAX requests
An explanation of callbacks, promises and how to access data returned asynchronously

Function returning undefined Node JS

I am currently trying to iterate through an array of JSON elements, parse and add the data I need into a specially formatted string, and once conditions are met, initiate the uploading of this data.
The problem that I am running into, however, is my variable 'deviceOutString' is being returned as undefined, leaving me with a string of 'undefined' written as many time as there are JSON elements in the array. I know that the return from the 'checkDuplicates' function is correct because before returning the value, the logs show that the value is correct.
I have attached my code below, please let me know if you have any ideas.
Thanks!
Old Code (updated below)
var i=0;
var parsedJson = JSON.parse(storedData) ;
var storedDataSize = parsedJson.length;
console.log('Stored Data Size: '+storedDataSize);
var async = require('async');
async.each(parsedJson, function( subElemJson, callback1) {
async.series([
function(callback){
console.log('dstring: ' + deviceOutString);
console.log('i : ' + i);
var subElemJsonPretty = JSON.stringify(subElemJson,null,0) ;
var date = subElemJson['date'];
deviceOutString += checkDuplicates(subElemJson, deviceOutString);
console.log('theLoop*DString: ' + deviceOutString);
callback(null, 'one');
},
function(callback){
if((i == storedDataSize - 1 || count == 225) && storedDataSize > 0) {
writeDCC(deviceOutString);
count = 0;
makeList();
}
i++;
callback(null, 'two');
setTimeout(function () { callback1(); }, 500);
}
]);
}); }
Updated New Code
function theLoop(storedData) {
var deviceOutString = '<list>';
var temp;
try {
var i=0;
var parsedJson = JSON.parse(storedData) ;
var storedDataSize = parsedJson.length;
console.log('Stored Data Size: '+storedDataSize);
var async = require('async');
var delayed = require('delayed');
async.each(parsedJson, function( subElemJson, callback1) {
async.series([
function(callback){
var subElemJsonPretty = JSON.stringify(subElemJson,null,0) ;
var date = subElemJson.date;
console.log('THIS IS THE DATE: '+date);
temp = checkDuplicates(subElemJson, deviceOutString);
console.log('This is the temp: ' + temp);
callback(null, temp);
}
], function(results){
console.log('*****Results are In*****: ' + results);
deviceOutString =+ temp;
if((i == storedDataSize - 1 || count == 225) && storedDataSize > 0) {
writeDCC(deviceOutString);
count = 0;
deviceOutString = '<list>';
}
i++;
callback1(results);
});
},
function(err){
if( err ) {
console.log('A file failed to process');
} else {
console.log('All files have been processed successfully');
}
});
} catch (error) {
console.info('Exception parsing ' + '\n\n' + error);
return;
}
}
So a few things
1: var date = subElemJson['date']; accessing object properties via array syntax is a bad practice. Nit picky but hey :P try var data = subElemJson.date; instead.
2: deviceOutString isn't defined anywhere in the code you provided.
3: Both async.series and async.each are going to want a callback function for when each is finished. that's the whole point of calling callback(null, 'one'); -- that you pass a value to the "results" array in the final async.series callback. You are calling setTimeout(function() { callback1(); }, 500); in the wrong place (also arbitrarily putting it behind a timeout?).
The proper async.series formatting is thus:
async.series([
function(callback) {
// do stuff
callback(null, someValue);
},
function(callback) {
// do other stuff
callback(null, someOtherValue);
}
], function(results) {
// all the stuffs are done
console.log(results); <-- results is an array containing "someValue" and "someOtherValue" from the iterations above
callback1(results);
});
Also, async.each is in the same boat -- it expects you to pass a "every thing I'm looping through has completed now!" function at the end.
Async docs on .each() (scroll down for docs on .series()): https://github.com/caolan/async#each

How to Sync call in Node.js

I have following code snippet:
var array = [1, 2, 3];
var data = 0;
for(var i=0; i<array.length; i++){
asyncFunction(data++);
}
console.log(data);
executeOtherFunction(data);
I am expecting value of data as 3 but I see it as 0 due to asyncFunction. How do I call executeOtherFunction when all the asyncFunction calls are done?
Use async.each:
var async = require('async');
var data = 0;
var array = [ 1, 2, 3 ];
async.each(array, function(item, done) {
asyncFunction(data++, done);
}, function(err) {
if (err) ... // handle error
console.log(data);
executeOtherFunction(data);
});
(assuming that asyncFunction takes two arguments, a number (data) and a callback)
If asyncFunction is implemented like the following:
function asyncFunction(n) {
process.nextTick(function() { /* do some operations */ });
}
Then you'll have no way of knowing when asyncFunction is actually done executing because it's left the callstack. So it'll need to notify when execution is complete.
function asyncFunction(n, callback) {
process.nextTick(function() {
/* do some operations */
callback();
});
}
This is using the simple callback mechanism. If you want to use one of freakishly many modules to have this handled for you, go ahead. But implementing something like with basic callbacks might not be pretty, but isn't difficult.
var array = [1, 2, 3];
var data = 0;
var cntr = 0;
function countnExecute() {
if (++cntr === array.length)
executeOtherFunction(data);
}
for(var i = 0; i < array.length; i++){
asyncFunction(data++, countnExecute);
}
Take a look at this module, I think this is what you're looking for:
https://github.com/caolan/async

NodeJS async queue too fast (Slowing down async queue method)

I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);

Using recursive pattern loop with node.js

ive been trying to use node.js to iterate through an array of cities and make an iterative request to google for directions on each (i then JSON.parse to abstract the drive times). I need to find a way to do this synchronously as otherwise i will just be requesting all the info from google on each city at once. I found a good pattern to use at http://tech.richardrodger.com/2011/04/21/node-js-%E2%80%93-how-to-write-a-for-loop-with-callbacks/ but cannot get the callback to work. As you can see, im using a 'show' function to test the same. My code is as follows:
var request = require('request');
var fs = require('fs');
var arr = ['glasgow','preston','blackpool','chorley','newcastle','bolton','paris','york','doncaster'];
//the function I want to call on each city from [arr]
function getTravelTime(a, b,callback){
request('https://maps.googleapis.com/maps/api/directions/json?origin='+a+'&destination='+b+'&region=en&sensor=false',function(err,res,data){
var foo = JSON.parse(data);
var duration = foo.routes[0].legs[0].duration.text;
console.log(duration);
});
};
function show(b){
fs.writeFile('testing.txt',b);
};
function uploader(i){
if( i < arr.length ){
show( arr[i],function(){
uploader(i+1);
});
}
}
uploader(0)
The problem I have is that only the first city from the array is output and the callback/iteration never proceeds. Any ideas where im going wrong please?
 
Thanks for the pointers, was clearly down to my poor understanding of callbacks in javascript. Just reading JavaScript patterns by O'Reilly and hit the 'Callback pattern' sections - doh!
For anyone who doesn't know, this is how the code will work:
var arr = ['glasgow','preston','blackpool','chorley','newcastle','bolton','paris','york','doncaster'];
function show(a,callback){
console.log(a);
callback();
}
function uploader(i){
if( i < arr.length ){
show(arr[i],
function(){
uploader(i+1)
});
};
}
uploader(0)
I was also facing issues like this, so I've written a recursive callback function which will act as a for loop but you can control when to increment. The following is that module, name as syncFor.js and include this in your program
module.exports = function syncFor(index, len, status, func) {
func(index, status, function (res) {
if (res == "next") {
index++;
if (index < len) {
syncFor(index, len, "r", func);
} else {
return func(index, "done", function () {
})
}
}
});
}
//this will be your program if u include this module
var request = require('request');
var fs = require('fs');
var arr = ['glasgow', 'preston', 'blackpool', 'chorley', 'newcastle', 'bolton', 'paris', 'york', 'doncaster'];
var syncFor = require('./syncFor'); //syncFor.js is stored in same directory
//the following is how u implement it
syncFor(0, arr.length, "start", function (i, status, call) {
if (status === "done")
console.log("array iteration is done")
else
getTravelTime(arr[i], "whatever", function () {
call('next') // this acts as increment (i++)
})
})
function getTravelTime(a, b, callback) {
request('https://maps.googleapis.com/maps/api/directions/json?origin=' + a + '&destination=' + b + '&region=en&sensor=false', function (err, res, data) {
var foo = JSON.parse(data);
var duration = foo.routes[0].legs[0].duration.text;
callback(); // call the callback when u get answer
console.log(duration);
});
};

Resources