Using recursive pattern loop with node.js - node.js

ive been trying to use node.js to iterate through an array of cities and make an iterative request to google for directions on each (i then JSON.parse to abstract the drive times). I need to find a way to do this synchronously as otherwise i will just be requesting all the info from google on each city at once. I found a good pattern to use at http://tech.richardrodger.com/2011/04/21/node-js-%E2%80%93-how-to-write-a-for-loop-with-callbacks/ but cannot get the callback to work. As you can see, im using a 'show' function to test the same. My code is as follows:
var request = require('request');
var fs = require('fs');
var arr = ['glasgow','preston','blackpool','chorley','newcastle','bolton','paris','york','doncaster'];
//the function I want to call on each city from [arr]
function getTravelTime(a, b,callback){
request('https://maps.googleapis.com/maps/api/directions/json?origin='+a+'&destination='+b+'&region=en&sensor=false',function(err,res,data){
var foo = JSON.parse(data);
var duration = foo.routes[0].legs[0].duration.text;
console.log(duration);
});
};
function show(b){
fs.writeFile('testing.txt',b);
};
function uploader(i){
if( i < arr.length ){
show( arr[i],function(){
uploader(i+1);
});
}
}
uploader(0)
The problem I have is that only the first city from the array is output and the callback/iteration never proceeds. Any ideas where im going wrong please?
 

Thanks for the pointers, was clearly down to my poor understanding of callbacks in javascript. Just reading JavaScript patterns by O'Reilly and hit the 'Callback pattern' sections - doh!
For anyone who doesn't know, this is how the code will work:
var arr = ['glasgow','preston','blackpool','chorley','newcastle','bolton','paris','york','doncaster'];
function show(a,callback){
console.log(a);
callback();
}
function uploader(i){
if( i < arr.length ){
show(arr[i],
function(){
uploader(i+1)
});
};
}
uploader(0)

I was also facing issues like this, so I've written a recursive callback function which will act as a for loop but you can control when to increment. The following is that module, name as syncFor.js and include this in your program
module.exports = function syncFor(index, len, status, func) {
func(index, status, function (res) {
if (res == "next") {
index++;
if (index < len) {
syncFor(index, len, "r", func);
} else {
return func(index, "done", function () {
})
}
}
});
}
//this will be your program if u include this module
var request = require('request');
var fs = require('fs');
var arr = ['glasgow', 'preston', 'blackpool', 'chorley', 'newcastle', 'bolton', 'paris', 'york', 'doncaster'];
var syncFor = require('./syncFor'); //syncFor.js is stored in same directory
//the following is how u implement it
syncFor(0, arr.length, "start", function (i, status, call) {
if (status === "done")
console.log("array iteration is done")
else
getTravelTime(arr[i], "whatever", function () {
call('next') // this acts as increment (i++)
})
})
function getTravelTime(a, b, callback) {
request('https://maps.googleapis.com/maps/api/directions/json?origin=' + a + '&destination=' + b + '&region=en&sensor=false', function (err, res, data) {
var foo = JSON.parse(data);
var duration = foo.routes[0].legs[0].duration.text;
callback(); // call the callback when u get answer
console.log(duration);
});
};

Related

How to handle callbacks in a for loop(Node.JS)

I am trying to write a code with NodeJS where I grab data from an external API and then populate them in MongoDB using Mongoose. In between that, I'll check to see if that particular already exists in Mongo or not. Below is my code.
router.route('/report') // the REST api address
.post(function(req, res) // calling a POST
{
console.log('calling report API');
var object = "report/" + reportID; // related to the API
var parameters = '&limit=100' // related to the API
var url = link + object + apiKey + parameters; // related to the API
var data = "";
https.get(url, function callback(response)
{
response.setEncoding("utf8");
response.on("data", function(chunk)
{
data += chunk.toString() + "";
});
response.on("end", function()
{
var jsonData = JSON.parse(data);
var array = jsonData['results']; // data is return in array of objects. accessing only a particular array
var length = array.length;
console.log(length);
for (var i = 0; i < length; i++)
{
var report = new Report(array.pop()); // Report is the schema model defined.
console.log('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^');
console.log(i);
console.log('*****************************');
console.log(report);
console.log('*****************************');
// console.log(report['id']);
/*report.save(function(err)
{
if(err)
res.send(err);
});*/
Report.find({id:report['id']}).count(function(err, count) // checks if the id of that specific data already exists in Mongo
{
console.log(count);
console.log('*****************************');
if (count == 0) // if the count = 0, meaning not exist, then only save
{
report.save(function(err)
{
console.log('saved');
if(err)
res.send(err);
});
}
});
};
res.json({
message: 'Grabbed Report'
});
});
response.on("error", console.error);
});
})
My problem is that since NodeJS callbacks are parallel, it is not getting called sequentially. My end result would be something like this :
Calling report API
console.log(length) = 100
^^^^^^^^^^^^^^^^^^^^^^^^
console.log(i) = starts with 0
*******************************
console.log(report) = the data which will be stored inside Mongo
*******************************
number 3 - 7 repeats 100 times as the length is equals to 100
console.log(count) = either 0 or 1
number 9 repeats 100 times
console.log('saved')
number 11 repeats 100 times
Lastly, only the last out of 100 data is stored into Mongo
What I need is some sort of technique or method to handle these callbacks which are executing one after the other and not sequentially following the loop. I am pretty sure this is the problem as my other REST APIs are all working.
I have looked into async methods, promises, recursive functions and a couple others non which I could really understand how to solve this problem. I really hope someone can shed some light into this matter.
Feel free also to correct me if I did any mistakes in the way I'm asking the question. This is my first question posted in StackOverflow.
This problem is termed as the "callback hell".
There's lots of other approaches like using Promise and Async libraries you'll find.
I'm more excited about the native async ES7 will bring,
which you can actually start using today with transpiler library Babel.
But by far the simplest approach I've found is the following:
You take out the long callback functions and define them outside.
router.route('/report') // the REST api address
.post(calling_a_POST)
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
response.on("end", response_on_end_callback); // --> take out
response.on("error", console.error);
});
}
function response_on_end_callback() { // <-- define here
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
Report.find({ id: report['id'] })
.count(Report_find_count_callback); // --> take out
};
res.json({
message: 'Grabbed Report'
});
}
function Report_find_count_callback(err, count) { // <-- define here
...
if (count == 0) {
report.save(function(err) { // !! report is undefined here
console.log('saved');
if (err)
res.send(err); // !! res is undefined here
});
}
}
A caveat is that you won't be able to access all the variables inside what used to be the callback,
because you've taken them out of the scope.
This could be solved with a "dependency injection" wrapper of sorts to pass the required variables.
router.route('/report') // the REST api address
.post(calling_a_POST)
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
response.on("end", function(err, data){ // take these arguments
response_on_end(err, data, res); // plus the needed variables
});
response.on("error", console.error);
});
}
function response_on_end(err, data, res) { // and pass them to function defined outside
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
Report.find({ id: report['id'] })
.count(function(err, count){
Report_find_count(err, count, report, res); // same here
});
};
res.json({ // res is now available
message: 'Grabbed Report'
});
}
function Report_find_count(err, count, report, res) { // same here
...
if (count == 0) {
report.save(function(err) { // report is now available
console.log('saved');
if (err)
res.send(err); // res is now available
});
}
}
When I execute the response_on_end function, I am getting the undefined:1 unexpected token u error.
I am pretty much sure it has something to do with this line: var jsonData = JSON.parse(data)
My response_on_end is as below: var jsonData = JSON.parse(data); // problem here
I realize I made an error here:
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
//sponse.on("end", function(err, data){
response.on("end", function(err){ // data shouldn't be here
response_on_end(err, data, res);
});
response.on("error", console.error);
});
}
Another problem I could forsee, which actually may not arise here but still would be better to talk about anyways.
The data variable, since it's a string which is a primitive type unlike an object, it is "passed by value".
More info
It's better to wrap the variable in an object and pass the object, because objects in javascript are always "passed by reference".
function calling_a_POST(req, res) {
...
// var data = ""; //
var data_wrapper = {};
data_wrapper.data = {}; // wrap it in an object
https.get(url, function callback(response) {
...
response.on("data", function(chunk){
data_wrapper.data += chunk.toString() + ""; // use the dot notation to reference
});
response.on("end", function(err){
response_on_end(err, data_wrapper, res); // and pass that object
});
response.on("error", console.error);
});
}
function response_on_end_callback(err, data_wrapper, res) {
var data = data_wrapper.data; // later redefine the variable
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
You can use async library for controlling your execution flows. And there are also iterators for working with arrays.

JS Closures, Redis, loop, Async :: empty array

I give up on this. May some of the wise stackoverflow monks please fix my bugs?
Code is self explaining. Client sends room names, server does a redis lookup and pushes valid rooms to the array. After adding all the rooms, the list should be emitted to the client.
Problem is closure, async etc. based. I understand the problem but cannot get a workaround because the array needs to remain inside the function. Tricky.
Code:
function roomList(socket){
var roomlist = [], rooms = getRooms(), p = /pChannel_/;
redis.select(7, function(err,res){
for (var k in rooms){
if(rooms[k] != '' && p.test(rooms[k])){
var key = 'channel:'+rooms[k];
redis.hgetall(key, function (err, reply) {
if(reply){
var c = io.sockets.manager.rooms[rooms[k]];
roomlist.push( Array(reply['name'],c.length,reply['icon']) );
}
else { console.log('nothing found'); }
});
}
}
// here be dragons
console.log(roomlist);
socket.emit('roomList', roomlist);
});
}
Thanks.
C'mon guys. The OP explicitly said she/he is interested by understanding how things are supposed to work. And you don't need Q or async or any other 3rd party modules to implement this.
In the initial code, there are two problems:
with Javascript, the closure scope is at function level, not block level. A function must be introduced to define a proper closure. Here, a simple forEach can be used.
the final step (i.e. emit) is not run after the replies have been received from Redis. It must be called in the loop itself. In order to achieve it, it is required to count the items so that the inner callback can test whether the process is complete or not.
So here is another version:
function roomList(socket){
var roomlist = [], rooms = getRooms(), p = /pChannel_/;
redis.select(7, function(err,res){
var count = rooms.length
rooms.forEach( function(r) {
if( r != '' && p.test(r) ) {
var key = 'channel:'+r
redis.hgetall(key, function (err, reply) {
if(reply) {
var c = io.sockets.manager.rooms[r];
roomlist.push( Array(reply['name'],c.length,reply['icon']) );
} else {
console.log('nothing found');
}
if ( --count <= 0 ) {
// here be dragons
console.log(roomlist);
socket.emit('roomList', roomlist);
}
});
} else --count;
});
});
}
Looks like a job for async.map:
function roomList(socket){
var rooms = getRooms(), p = /pChannel_/;
redis.select(7, function(err, res) {
async.map(rooms, function(room, callback) {
if (room === '' || ! p.test(room))
return callback(null, null);
var key = 'channel:' + room;
var c = io.sockets.manager.rooms[room];
redis.hgetall(key, function (err, reply) {
if (err)
callback(err); // propagate Redis errors to final callback, don't know
// if you want that or not; use 'callback(null)' if not.
else
if (reply)
callback(err, Array(reply.name, c.length, reply.icon) );
else
callback(err, null);
});
}, function(err, roomlist) {
if (err)
// handle Redis errors...
// filter 'null' entries from roomlist
roomlist = roomlist.filter(function(room) { return room !== null });
console.log(roomlist);
socket.emit('roomList', roomlist);
});
});
}
(untested)
If you just want to wait for the room list to be fully built before emitting the response (as seems highly reasonable), and assuming Q to be available, then you should just need a few additional lines of Q magic plus a closure-forming wrapper around the inner code to maintain a reliable reference to a Q deferred at each pass of the for loop.
function roomList(socket) {
redis.select(7, function(err, res) {
var list = [],
rooms = getRooms(),
p = /pChannel_/,
promises = [];
for(var k in rooms) {
if(rooms[k] != '' && p.test(rooms[k])) {
(function(dfrd) {
promises.push(dfrd.promise);
var key = 'channel:' + rooms[k];
redis.hgetall(key, function(err, reply) {
if(reply) {
var c = io.sockets.manager.rooms[rooms[k]];
list.push( [reply['name'], c.length, reply['icon']] );
}
else {
console.log('nothing found');
}
dfrd.resolve();
});
})(Q.defer());
}
}
Q.all(promises).then(function() {
console.log(list);
socket.emit('roomList', list);
});
});
}

NodeJS async queue too fast (Slowing down async queue method)

I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);

How to send mongodb data async from inner functions to outside callback function from a for loop?

NEW POST:
Here is the sample of the working async code without a db.
The problem is, if i replace the vars (data1_nodb,...) with the db.collection.find();
function, all needed db vars received at the end and the for() loop ends not
correct. I hope that explains my problem a bit better. OA
var calc = new Array();
function mach1(callback){
error_buy = 0;
// some vars
for(var x_c99 = 0; x_c99 < array_temp_check0.length;x_c99++){
// some vars
calc[x_c99] = new Array();
calc[x_c99][0]= new Array();
calc[x_c99][0][0] = "dummy1";
calc[x_c99][0][1] = "dummy2";
calc[x_c99][0][2] = "dummy3";
calc[x_c99][0][3] = "dummy4";
calc[x_c99][0][4] = "dummy5";
function start_query(callback) {
data1_nodb = "data1";
data2_nodb = "data2";
data3_nodb = "data3";
data4_nodb = "data4";
calc[x_c99][0][0] = data1_nodb;
calc[x_c99][0][1] = data2_nodb;
calc[x_c99][0][2] = data3_nodb;
callback(data1_nodb,data2_nodb,etc..);
}
start_query(function() {
console.log("start_query OK!");
function start_query2(callback) {
data4_nodb = "data5";
data5_nodb = "data6";
data6_nodb = "data7";
calc[x_c99][0][3] = data4_nodb;
calc[x_c99][0][4] = data5_nodb;
callback(data5_nodb,data6_nodb,etc..);
}
start_query2(function() {
console.log("start_query2 OK!");
function start_query3(callback) {
for(...){
// do something
}
callback(vars...);
}
start_query3(function() {
console.log("start_query3 OK!");
});
});
});
}
callback(calc);
};
function mach2(callback){
mach1(function() {
console.log("mach1 OK!");
for(...){
// do something
}
});
callback(calc,error_buy);
};
mach2(function() {
console.log("mach2 OK 2!");
});
OLD POST:
i try to read data from the mongodb and send them back with a callback to the next
function, that needs the infos from the db to proceed.
Without the mongodb read functions it works perfect but now i dont know how
i can send the db vars out of the two inner functions to the first callback function.
Hope someone can help me...
Thanks
var error = 0; var var1 = "yessir";
function start_query(callback) {
var db_name = "db1";
db[db_name].find({name:var1},{data1:1, data2:1, data3:1, data4:1}, function(err, data_catch,callback) {
if( err || !data_catch ) {
console.log("Problem finding data_catch:" + err);
} else {
data_catch.forEach( function(data_catch_finder,callback) {
data1_db = data_catch_finder.data1;
data2_db = data_catch_finder.data2;
data3_db = data_catch_finder.data3;
data4_db = data_catch_finder.data4;
if(data1_db == "" || data2_db == "" || data3_db == "" || data4_db == ""){error = 1; console.log("Error: data_catch_finder");}
callback(data1_db, data2_db, data3_db, data4_db, error);
});
}
});
callback(data1, data2, data3, data4, error);
}
//########################################################################
start_query(function() {
function start_query2(callback) {
console.log("DATA1 from callback:" + data1_db);
console.log("DATA2 from callback:" + data2_db);
console.log("DATA3 from callback:" + data3_db);
console.log("DATA4 from callback:" + data4_db);
var var_no_db = "testing";
//do something else and callback
callback(var_no_db);
}
start_query2(function() {
console.log("Var from callback start_query2:" + var_no_db);
console.log("The end");
});
});
your callback signature are issuing callback as a parameter.
As far as I can understand your code, you need to keep reference of the first callback, the one you receive here: function start_query(callback).
In every callback function you made the mistake to bind the variable name callback to the parameter from the mongo driver (a simple undefined i think).
You can fix it removing every reference of callback from the signature of your inner functions.
a simple example:
function async (cb) {
// params: Error, data
cb(null, 'moo');
}
function foo(callback) {
async(function(err, data, callback){
console.log(callback); // undefined
});
console.log(callback); // Moo
}
foo('Moo');
Take a look at Eloquent Javascript to better understand the JS context switching;
edit
The only way to wait the results of an async function is recall the first callback inside the last nested callback.
function ugly_nested (callback) {
dbquery('...', function(err, data_01) {
if (!! err) return callback(err);
dbquery('...', function(err, data_02) {
if (!! err) return callback(err);
dbquery('...', function(err, data_03) {
if (!! err) return callback(err);
callback(null, data_01, data_02, data_03);
});
});
});
}
ugly_nested(function(err, data01, data02, data03) {
if (!! err) throw err;
manage_data(data01, data02, data03);
});
The FOR loop is synchronous, but, the database calls are asynchronous, so, the for loop will end before the database returns his results. If you really need that for loop you can try out one of the nice flow control libraries out there

Async utility for Node.js Call back function not executing

I am trying to use the async utility for the node.js. Below is my code. The console prints "In my func1" and "In my func2". I also expect it to print "call back func" but it won'nt
var myfunc1 = function(callback) {
var a = 1;
console.log ("In my func1");
};
var myfunc2 = function(callback) {
var b = 2;
console.log ("In my func2");
};
models.Picture.prototype.relativeSort = function(viewer_location) {
console.log("Rel Sort");
var sortedPics = [];
var relsortedPics = [];
// Finds all the pictures
async.series([myfunc1(), myfunc2()],
function(err, results){
if(err) {
throw err;
}
console.log("call back func");
return a+b;
});
};
You need to use the callback argument, for example:
var myfunc1 = function(callback) {
var a = 1;
console.log ("In my func1");
callback(null, 'test');
};
The first argument of callback is an error, and the second the result you want to pass to the final handler.
EDIT
I've missed the other mistake. This line:
async.series([myfunc1(), myfunc2()],
should be
async.series([myfunc1, myfunc2],
You are not supposed to call functions. You are telling async: "Hey, take these functions and do something (asynchronous) with them!".

Resources