is my code blocking node thread - node.js

I have some confusions over nodejs and would like some help. I have a table called camps, contacts and camp_contact. I have to show the contact list based on the camp the user belongs to. I used async to loop through the camps, which I save in the user session, and then grab the data from mysql.
var array_myData = [];
async.each(req.session.user.camps, function(camps, callback) {
database.getConnection(function(err, connection) {
// Use the connection
connection.query('SELECT contacts.*, contact_camp.* '+
' FROM contacts JOIN contact_camp '+
' ON contact_camp.contact_id = contacts.id '+
' WHERE contact_camp.camp_id = ?',
[camps.camp_id], function(err,data){
if(err) {
//this will call the err function
callback('error');
}
else {
array_myData = array_myData.concat(data);
callback();
}
connection.release();
});
});
//final function call.
}, function(err){
// if any error happened, this function fires.
if( err ) {
// All processing will now stop.
} else {
res.render('contacts',
{
page
});
}
});
The code works fine. Now the thing I'm wondering about is, does using array.concat block the thread? if so, how can I change that? I read around and according to what I understood, I/O operation that are not asynchronous blocks the thread like reading from file or database. Does having array like this var array = ['a','b', 'c'] and looping through it would block the thread?
Lastly, is there a way to know if a code that I have written has blocked the thread or not? Because I get worried every time I write a function of my own.
I also get confused when a create a function with a callback like:
function test(param, fn) { do something; fn(); }
I'm not sure if this kind of function without a timer would block the thread or not.

Ok, if it helps you somehow:
I read around and according to what I understood, I/O operation that are not asynchronous blocks the thread like reading from file or database.
In NodeJS the operations on file descriptors are asynchronous => non-blocking
The file descriptors would be:
database operations,
open/close files
network operations
ex.:
fs.readFile('<pathToTheFile>', (err, data) => {
if (err) throw err;
console.log(data);
});
// if your file is very big, until it is read, maybe other requests will be finished
Does having array like this var array = ['a','b', 'c'] and looping through it would block the thread?
Yes, it will block the thread, but operations like this are not resource intensive. NodeJs is an event-driven language (single-threaded), but also, in a multi threaded or multi process language, the kernel thread would still be blocked by such operations => the same thing ... this is maybe off topic.
// if you do something like this
while(true){
function test(param, fn) { do something; fn(); }
}
// you will see that you just blocked the thread
I'm not sure if this kind of function without a timer would block the thread or not
normally you would put that function in a error handler, and it shouldn't block the thread if you're not doing an infinite while.
try{}catch(err){
// do something with the error
}

Related

Is safe read and then write a file asynchronously on node.js?

I have a method that reads and write a log file, this method is called on every request by all users, then write the log the request path in a file. The questions are two:
Is safe read and then write a file in async mode considering concurrency questions?
If yes for the first question, the code bellow will work considering concurrency questions?
If yes for the first question how I have to do?
Please, disregard exceptions and performance questions, this is a didactic code.
var logFile = '/tmp/logs/log.js';
app.get("/", function(req){
var log = {path: req.path, date: new Date().getTime()};
log(log);
});
function log(data){
fs.exists(logFile, function(exists){
if(exists){
fs.readFile(logFile, function (err, data) {
if (err){
throw err
}
var logData = JSON.parse(data.toString());
logData.push(data);
writeLog(logData);
});
}else{
writeLog([data]);
}
});
}
function writeLog(base){
fs.writeFile(logFile, JSON.stringify(base, null, '\t'), function(err) {
if(err)
throw err;
});
}
I strongly suggest that you don't just "log asynchronously" because you want the log to be ordered based on the order things happened in your app, and there is no guarantee this will happen that way if you don't synchronize it somehow.
You can, for instance, use a promise chain to synchronize it:
var _queue = Promise.resolve();
function log(message){
_queue = _queue.then(function(){ // chain to the queue
return new Promise(function(resolve){
fs.appendFile("/tmp/logs/log.txt", new Date() + message + "\n", function(err, data){
if(err) console.log(err); // don't die on log exceptions
else resolve(); // signal to the queue it can progress
});
});
});
}
You can now call log and it will queue messages and write them some time asynchronously for you. It will never take more than a single file descriptor or exhaust the server either.
Consider using a logging solution instead of rolling your own logger btw.
In you're example you're already using the Asynchronous versions of those functions. If you're concerned about the order of your operations then you should use the synchronous versions of those functions.
readFileSync
writeFileSync
Also to note, JSON.parse() is a synchronous operation.You can make this "asynchronous" using the async module and doing a async.asyncify(JSON.parse(data.toString()));.
As noted by #BenjaminGruenbaum, async.asyncify(); doesn't actually make the operation of JSON.parse(); truly asynchronous but it does provide a more "async" style for the control flow of the operations.

Nodejs behaviour

I have been working on nodeJS + MongoDB, using the Express and Mongoose frameworks for a few months, and I wanted to ask you guys what is really happening in a situation such as the following:
Model1.find({}, function (err, elems) {
if (err) {
console.log('ERROR');
} else {
elems.forEach(function (el) {
Model2.find({[QUERY RELATED WITH FIELDS IN 'el']}, function (err, elems2) {
if (err) {
console.log('ERROR');
} else {
//DO STAFF.
}
});
});
}
});
My best guess is that there's a main thread looping over elems, and then different threads attending each query over Model2, but I'm not really sure.
Is that correct? And also, is this a good solution? And if not, how would you code in a situation such as this, where you need the information in each of the elements you get from Model1 to get elements from Model2, and perform the actual functionality you are looking for?
I know I could elaborate a more complex query where I could get all the elements each of the 'el' in elems would yield, but I¡d rather not do that, because in that case i would be worried about the memory expense.
Also, I've been thinking about changing the data model, but I've gone over it and I'm confident it is well thought, and I don't think that's the best solution for my aplication.
Thanks!
NodeJS is a single threaded environment and it works asynchronously for blocking function calls such as network requests in your case. So there is only one thread and your query results will be called asynchronously so that nothing will be blocked due to intensive network operation.
In your scenario if the first query returns quite a lot of records such as 100000 thousands you may exhaust your mongo server in your loop as you will query your server as many as the result of first query instantly. This will happen because node won't stop for receiving the results of each query as it works asynchronously.
So usually manually throttling the requests to network operations is a good practice. This is not trivial when working on asynchronous environment. One way to do is to use recursive function call. Basically you split your tasks into groups and do each group in batch, once you are done with one batch you start with your next group.
Here is a simple example on how to do it, I have used promises instead of callback functions, Q is a promise library that is very useful for handling promises:
var rows = [...]; // array of many
function handleRecursively(startIndex, batchSize){
var promises = [];
for(i = 0; i < batchSize && i + batchSize < rows.length; i++){
var theRow = rows[startIndex + i];
promises.push(doAsynchronousJobWithTheRow(theRow));
}
//you wait until you handle all tasks in this iteration
Q.all(promises).then(function(){
startIndex += batchSize;
if(startIndex < rows.length){ // if there is still task to do continue with next batch
handleRecursively(startIndex, batchSize); }
})
}
handleRecursively(0, 1000);
Here is the best solution :
Model1.find({}, function (err, elems) {
if (err) {
console.log('ERROR');
} else {
loopAllElements(0,elems);
}
});
function loopAllElements(startIndex,elems){
if (startIndex==elems.length) {
return "success";
}else{
Model2.find({[QUERY RELATED WITH FIELDS IN elems[startIndex] ]}, function (err, elems2) {
if (err) {
console.log('ERROR');
return "error";
} else {
//DO STAFF.
loopAllElements(startIndex+1, elems);
}
});
}
}

how to make this function async in node.js

Here is the situation:
I am new to node.js, I have a 40MB file containing multilevel json file like:
[{},{},{}] This is an array of objects (~7000 objects). Each object has properties and a one of those properties is also an array of objects
I wrote a function to read the content of the file and iterate it. I succeeded to get what I wanted in terms of content but not usability. I thought that I wrote an async function that would allow node to serve other web requests while iterating the array but that is not the case. I would be very thankful if anyone can point me to what I've done wrong and how to rewrite it so I can have a non-blocking iteration. Here's the function that handles the situation:
function getContents(callback) {
fs.readFile(file, 'utf8', function (err, data) {
if (err) {
console.log('Error: ' + err);
return;
}
js = JSON.parse(data);
callback();
return;
});
}
getContents(iterateGlobalArr);
var count = 0;
function iterateGlobalArr() {
if (count < js.length) {
innerArr = js.nestedProp;
//iterate nutrients
innerArr.forEach(function(e, index) {
//some simple if condition here
});
var schema = {
//.....get props from forEach iteration
}
Model.create(schema, function(err, post) {
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
count++;
process.nextTick(iterateGlobalArr);
}
else {
console.log("\nIteration finished");
next();
}
Just so it is clear how I've tested the above situation. I open two tabs one loading this iteration which takes some time and second with another node route which does not load until the iteration is over. So essentially I've written a blocking code but not sure how to re-factor it! I suspect that just because everything is happening in the callback I am unable to release the event loop to handle another request...
Your code is almost correct. What you are doing is inadvertently adding ALL the items to the very next tick... which still blocks.
The important piece of code is here:
Model.create(schema, function(err, post) {
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
// add EVERYTHING to the very same next tick!
count++;
process.nextTick(iterateGlobalArr);
Let's say you are in tick A of the event loop when getContents() runs and count is 0. You enter iterateGlobalArr and you call Model.create. Because Model.create is async, it is returning immediately, causing process.nextTick() to add processing of item 1 to the next tick, let's say B. Then it calls iterateGlobalArr, which does the same thing, adding item 2 to the next tick, which is still B. Then item 3, and so on.
What you need to do is move the count increment and process.nextTick() into the callback of Model.create(). This will make sure the current item is processed before nextTick is invoked... which means next item is actually added to the next tick AFTER the model item has been created... which will give your app time to handle other things in between. The fixed version of iterateGlobalArr is here:
function iterateGlobalArr() {
if (count < js.length) {
innerArr = js.nestedProp;
//iterate nutrients
innerArr.forEach(function(e, index) {
//some simple if condition here
});
var schema = {
//.....get props from forEach iteration
}
Model.create(schema, function(err, post) {
// schedule our next item to be processed immediately.
count++;
process.nextTick(iterateGlobalArr);
// then move on to handling this result.
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
}
else {
console.log("\nIteration finished");
next();
}
}
Note also that I would strongly suggest that you pass in your js and counter with each call to iterageGlobalArr, as it will make your iterateGlobalArr alot easier to debug, among other things, but that's another story.
Cheers!
Node is single-threaded so async will only help you if you are relying on another system/subsystem to do the work (a shell script, external database, web service etc). If you have to do the work in Node you are going to block while you do it.
It is possible to create one node process per core. This solution would result in only blocking one of the node processes and leave the rest to service your requests, but this feature is still listed as experimental http://nodejs.org/api/cluster.html.
A single instance of Node runs in a single thread. To take advantage
of multi-core systems the user will sometimes want to launch a cluster
of Node processes to handle the load.
The cluster module allows you to easily create child processes that
all share server ports.

Is the following node.js code blocking or non-blocking?

I have the node.js code running on a server and would like to know if it is blocking or not. It is kind of similar to this:
function addUserIfNoneExists(name, callback) {
userAccounts.findOne({name:name}, function(err, obj) {
if (obj) {
callback('user exists');
} else {
// Add the user 'name' to DB and run the callback when done.
// This is non-blocking to here.
user = addUser(name, callback)
// Do something heavy, doesn't matter when this completes.
// Is this part blocking?
doSomeHeavyWork(user);
}
});
};
Once addUser completes the doSomeHeavyWork function is run and eventually places something back into the database. It does not matter how long this function takes, but it should not block other events on the server.
With that, is it possible to test if node.js code ends up blocking or not?
Generally, if it reaches out to another service, like a database or a webservice, then it is non-blocking and you'll need to have some sort of callback. However, any function will block until something (even if nothing) is returned...
If the doSomeHeavyWork function is non-blocking, then it's likely that whatever library you're using will allow for some sort of callback. So you could write the function to accept a callback like so:
var doSomHeavyWork = function(user, callback) {
callTheNonBlockingStuff(function(error, whatever) { // Whatever that is it likely takes a callback which returns an error (in case something bad happened) and possible a "whatever" which is what you're looking to get or something.
if (error) {
console.log('There was an error!!!!');
console.log(error);
callback(error, null); //Call callback with error
}
callback(null, whatever); //Call callback with object you're hoping to get back.
});
return; //This line will most likely run before the callback gets called which makes it a non-blocking (asynchronous) function. Which is why you need the callback.
};
You should avoid in any part of your Node.js code synchronous blocks which don't call system or I/O operations and which computation takes long time (in computer meaning), e.g iterating over big arrays. Instead move this type of code to the separate worker or divide it to smaller synchronous pieces using process.nextTick(). You can find explanation for process.nextTick() here but read all comments too.

How to wait for all async calls to finish

I'm using Mongoose with Node.js and have the following code that will call the callback after all the save() calls has finished. However, I feel that this is a very dirty way of doing it and would like to see the proper way to get this done.
function setup(callback) {
// Clear the DB and load fixtures
Account.remove({}, addFixtureData);
function addFixtureData() {
// Load the fixtures
fs.readFile('./fixtures/account.json', 'utf8', function(err, data) {
if (err) { throw err; }
var jsonData = JSON.parse(data);
var count = 0;
jsonData.forEach(function(json) {
count++;
var account = new Account(json);
account.save(function(err) {
if (err) { throw err; }
if (--count == 0 && callback) callback();
});
});
});
}
}
You can clean up the code a bit by using a library like async or Step.
Also, I've written a small module that handles loading fixtures for you, so you just do:
var fixtures = require('./mongoose-fixtures');
fixtures.load('./fixtures/account.json', function(err) {
//Fixtures loaded, you're ready to go
};
Github:
https://github.com/powmedia/mongoose-fixtures
It will also load a directory of fixture files, or objects.
I did a talk about common asyncronous patterns (serial and parallel) and ways to solve them:
https://github.com/masylum/i-love-async
I hope its useful.
I've recently created simpler abstraction called wait.for to call async functions in sync mode (based on Fibers). It's at an early stage but works. It is at:
https://github.com/luciotato/waitfor
Using wait.for, you can call any standard nodejs async function, as if it were a sync function, without blocking node's event loop. You can code sequentially when you need it.
using wait.for your code will be:
//in a fiber
function setup(callback) {
// Clear the DB and load fixtures
wait.for(Account.remove,{});
// Load the fixtures
var data = wait.for(fs.readFile,'./fixtures/account.json', 'utf8');
var jsonData = JSON.parse(data);
jsonData.forEach(function(json) {
var account = new Account(json);
wait.forMethod(account,'save');
}
callback();
}
That's actually the proper way of doing it, more or less. What you're doing there is a parallel loop. You can abstract it into it's own "async parallel foreach" function if you want (and many do), but that's really the only way of doing a parallel loop.
Depending on what you intended, one thing that could be done differently is the error handling. Because you're throwing, if there's a single error, that callback will never get executed (count won't be decremented). So it might be better to do:
account.save(function(err) {
if (err) return callback(err);
if (!--count) callback();
});
And handle the error in the callback. It's better node-convention-wise.
I would also change another thing to save you the trouble of incrementing count on every iteration:
var jsonData = JSON.parse(data)
, count = jsonData.length;
jsonData.forEach(function(json) {
var account = new Account(json);
account.save(function(err) {
if (err) return callback(err);
if (!--count) callback();
});
});
If you are already using underscore.js anywhere in your project, you can leverage the after method. You need to know how many async calls will be out there in advance, but aside from that it's a pretty elegant solution.

Resources