NodeJS + redis gives weird results - node.js

Maybe the results ain't weird, but I started using Node 1-2 months ago so for me they are...
I have a loop which sorts out every other value of the array returned by hgetall (Redis command) and in that loop I call a function to get all values from another table with keys stored in the sorted array. This was more difficult to explain than I thought. Here's my code:
Pastebin: http://pastebin.com/tAVhSUV1 (or see below)
function getInfo (cn, callback) {
var anArray = [];
redis_client.hgetall('chat_info:' + cn, function (err, vals) {
if(err) { throw err; }
for(i in vals) {
anArray.push(vals[i]);
}
return callback(anArray);
});
}
redis_client.hgetall('chat_rooms:' + POST.chat_name, function (err, val) {
if(err) { throw err; }
var vars = [],
rArr = [];
for (i in val) {
vars.push(i);
}
for(var i = 0; i < vars.length; i += 1) {
if(i%2 === 0) {
getInfo(vars[i], function (hej) {
rArr.push(hej);
});
}
}
});
The callback from the call to getInfo() is executed after the entire loop. Am I missing out on something here? Because it can't do that, right? (when I use rArr (right after the loop) it's empty, nbBut if I log it in the callback it gets logged after everything else written after the loop)

Yes, that's probably normal.
Understand that callbacks are executed after the hgetall call. Which mean that when the redis functions receive somehting it will call the callbacks. In other words, all the callbacks can be executed later.
As javascript only works in one thread, the calls to hgetall should be blocking to be executed as they come in the for loop. But as you're more certainly using async IO. The for loop ends and then it will start calling each callbacks that were queued inside the javascript event loop.
Edit
Unfortunately, to achieve what you're trying to do, you should wrap your code inside many other callbacks. You can use this project to make it easier: https://github.com/caolan/async
You should be able to install it using npm install async.
You'd have to do something like that:
function getInfo (cn) {
return function(callback) {
var anArray = [];
redis_client.hgetall('chat_info:' + cn, function (err, vals) {
if(err) { throw err; }
for(i in vals) {
anArray.push(vals[i]);
}
return callback(anArray);
});
};
}
redis_client.hgetall('chat_rooms:' + POST.chat_name, function (err, val) {
if(err) { throw err; }
var vars = [],
rArr = [],
callbacks = [];
for (i in val) {
vars.push(i);
}
for(var i = 0; i < vars.length; i += 1) {
if(i%2 === 0) {
callbacks.push(getInfo(vars[i]));
}
}
async.series(callbacks, function (err, results) {
// Final code here
});
});

Related

Issue with asynchronous mongodb query

I am trying to loop through an array and find the amount of tickets assigned to each person.
Unfortunately, I noticed that my taskcount is getting the same values but in different order, because of its asynchronous nature.
Some queries might take long and so the ones that gets finished first gets inserted and hence my array has the same values but in different order. Now, I want to avoid that and make it so, that once a query gets completed, only then the next value from the array is being picked up and pushed to search from the db. How can i modify my existing code.
exports.find_task_count = function(callback) {
var names = ['Evan', 'Surajit', 'Isis', 'Millie', 'Sharon', 'Phoebe', 'Angel', 'Serah']
var taskcount = []
var resultsCount = 0;
for (var i = 0; i < names.length; i++) {
_tasks.find({'assignee': names[i]}, function (err, tickets) {
resultsCount++
if (err) {
console.log(err)
return callback(err)
} else {
taskcount.push(tickets.length)
if (resultsCount === names.length) {
return callback(taskcount);
taskcount=[]
}
}
})
}
}
You can use the async module designed to handle such scenarios.
I have updated the code as follows
var async = require('async');
exports.find_task_count = function (callback) {
var names = ['Evan', 'Surajit', 'Isis', 'Millie', 'Sharon', 'Phoebe', 'Angel', 'Serah'];
async.map(names, function (name, iterateeCallback) {
_tasks.find({ 'assignee': name }, function (err, tickets) {
if (err) {
return iterateeCallback(err);
}
return iterateeCallback(null, tickets.length);
});
}, function (error, results) {
if (error) {
return callback(error);
}
return callback(null, results);
});
}
As per the documentation of async
Note, that since this function applies the iteratee to each item in parallel, there is no guarantee that the iteratee functions will complete in order. However, the results array will be in the same order as the original coll.
if you still want to process the array in series use mapSeries instead of map in the above code

node.js svn update/commit synchronously?

I'm using svn-spawn library to update/commit files to svn. Problem is my app calls svn up/commit in a loop, and because of the async nature of the call, svn-up is called from the next iteration of the loop before the previous svn-up can finish.
How to handle this issue? Is there any way to prevent the next call from happening until the previous one is complete?
Figured out a way to do it using async module.
async.series can be used to execute async tasks in a serial fashion.
This is how I did it.
function commitFile(arg, callback) {
svnClient.getStatus(filePath, function(err, data) {
//...
svnClient.commit(['Commit msg', filePath], callback);
//...
});
}
var toCommit = [];
for (var i = 0, len = requests.length; i < len; i++) {
//Adding files to commit, async.apply enables adding arguments to the anonymous function
toCommit.push(async.apply(function(arg, cb) {
commitFile(arg, cb);
}, 'arg1'));
}
async.series(toCommit,function (err, result) {
console.log('Final callback');
if(err) {
console.log('error', err);
} else {
console.log('result of this run: ' + result);
}
});
async.series needs an array of functions which must call a callback once they are done. It uses the callback to determine that the current function in done executing and only then it will pick the next function to execute.

NodeJS and parallel flow

I'm new with NodeJS. An issue makes me confused is parallel flow. I read an example show this snippet as a technique for controlling parallel flow:
var fs = require('fs');
var fileDir = './files';
fs.readdir(filesDir, function (err, files) {
if (err) throw err;
for (var index in files) {
var task = (function (file) {
return function () {
fs.readFile(file, function (err, text) {
if (err) throw err;
doSomething();
});
}
})(filesDir + '/' + files[index]);
tasks.push(task);
}
for (var index in tasks) {
tasks[index]();
}
});
This code work like a charm, but when I replace it with
for (var index in files) {
var task = function () {
console.log(files[index]);
fs.readFile(filesDir + '/' + files[index], function (err, text) {
if (err) throw err;
doSomething();
});
};
tasks.push(task);
}
for (var index in tasks) {
tasks[index]();
}
It doesn't work as I expected, because the files[index] in loop is always the last file in directory. Could you please explain me what the real flow is?
In short, the function you created have reference for the index variable(not it's value), so when it's executed, the index value is the last file in directory in your case.
Some links: Understanding variable capture by closures in Javascript/Node
Its because index reference will be to its last file. Node js is asynchronous that it ll not wait till read file operation is completed. It ll increment index value.
for (var index in files) {
var task = function () {
console.log(files[index]);
fs.readFile(filesDir + '/' + files[index], function (err, text) {
if (err) throw err;
doSomething();
});
};
tasks.push(task);
}
Since first code uses closures and it passes the current indexed file to a function. It ll take the current indexed file and returns a function with the file as input.
Now that returned function will execute in parallel.

While loop to check uniqueness of custom id

I have a MongoDB databse set up with some objects that have a unique code (not the primary key).
I should also note that I'm using NodeJS and this code is in my server.js to connect to the MongoDB database.
To generate a new code, I generate one randomly and I want to check if it already exists. If not then we use it no problem, but if it already exists I want to generate another code and check it again. This is the code I use to check if the id already exists:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return count != 0; },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(num) {
count = num;
});
},
function (err) {
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(count);
}
count = records.length;
callback(count);
});
}
First of all, is there any particular reason you're not using a simple number increment sequence? This type of code is prone to inefficiency, the more numbers you generate the more chance you have of collisions which means you're going to be spending more time on generating an ID for your data than you are on the rest of your processing. Not a good idea.
But I can still tell you what's going wrong.
OK, so getPartyIdCount() will only, ever, always, without fail, return undefined (or, basically, nothing).
Your mongo call processes the return value in a callback, and that callback doesn't assign its value to anything, so return records.length just gets lost into nothingness.
You've mixed up createPartyId(), which it appears you want to run synchronously, with your mongo call, which must run asynchronously.
return always goes with the nearest containing function, so in this case it goes with function(err, records), not function getPartyIdCount(partyId).
(Expanding my comment from above)
The issue is that createPartyId is an asynchronous function, but you're trying to return the value synchronously. That won't work. Once you touch an async operation, the rest of the call stack has to be async as well.
You don't include the code that's calling this, but I assume you want it to be something like:
var partyId = createPartyId();
// do stuff...
That's not going to work. Try this:
function createPartyId(callback) {
var min = 10000, max = 99999;
var partyId = -1, count = -1;
async.whilst(
function () { return (count == 0); },
function (callback) {
partyId = min + Math.floor(Math.random() * (max - min + 1));
partyId = 88888;
getPartyIdCount(partyId, function(err, num) {
if (!err) {
count = num;
}
callback(err);
});
},
function (err) {
// this is called when the loop ends, error or not
// Invoke outer callback to return the result
callback(err, count);
}
);
}
function getPartyIdCount(partyId, callback) {
count = -1;
db.db_name.find({id: partyId}, function(err, records) {
if(err) {
console.log("There was an error executing the database query.");
callback(err);
}
count = records.length;
callback(null, count);
});
}
(I've also adopted the default node.js convention of always returning errors as the first argument to callback functions.)
So, to use this you would do:
getPartyId(function (err, num) {
if (err) { return aughItFellOver(err); }
// do stuff
});

How can I break loop from callback function in node js?

I have asynchronous query in Node.js. Variable sq3 is a connection variable.
For example something like this:
for (var i in res) {
if (i == 1) {
sq3.query("SELECT * from students;",
function (err, res) {
if (err) {
throw err;
} else {
if (res.length == 1) {
//do something
} else {
//break for
}
}
});
sq3.end();
}
}
How can I break from callback function?
Thanks
Just do it like this, using recursion instead of loops. Not only does this allow you to achieve the logic you want. It also doesn't spin up a bunch of async requests at once. They execute in turn, but asynchronously, so it's still performant.
function lookatEntireResponse(res) {
function lookAtItemInResponse(item) {
if(item == 1) {
sq3.query("SELECT * from students;",
function(err, res) {
if (err)
{
throw err;
}
else
{
if(res.length==1)
{
doSomething(item);
lookAtItemInResponse(res.shift());
}
else
{
//just don't call the next lookAtItemInResponse function, effectively same thing as "break;"
}
}
});
sq3.end();
} else {
lookAtItemInResponse(res.shift());
}
}
lookAtItemInResponse(res.shift());
}
You can consider throttling simultaneous requests with similar logic (say allowing 10 such requests per lookAtItem call. This way you can achieve a hybrid of the two methods, and then just optimize the number of simultaneous requests for performance. The Async library makes stuff like this easier.
In your code fragment, you can't break from the within the callback function. In node.js, callback functions run at an unspecified later time on the same thread. This means but the time you callback function executes, the for loop has long since finished.
To get the effect you want, you need to restructure you code quite significantly. Here's an example of how you could do it (untested!!). The idea is to keep calling doSomething() with the list of items, shrinking it by one element each time until the desired result is achieved (your break condition).
function doSomething(res)
{
while (res.length > 0)
{
i = res.shift(); // remove the first element from the array and return it
if (i == 1)
{
sq3.query("SELECT * from students;",
function(err, res) {
if (err)
{
throw err;
}
if (res.length==1)
{
//do something
// continue with the remaining elements of the list
// the list will shrink by one each time as we shift off the first element
doSomething(res);
}
else
{
// no need to break, just don't schedule any more queries and nothing else will be run
}
});
sq3.end();
break; // break from the loop BEFORE the query executes. We have scheduled a callback to run when the query completes.
}
}
}
for (var i = 0; i < res.length; i++) {
if (i == 1) {
sq3.query("SELECT * from students;",
function (err, res) {
if (err) {
throw err;
} else {
if (res.length == 1) {
//do something
} else {
i = res.length
}
}
});
sq3.end();
}
}

Resources