NodeJS Cluster Error : SIGSEGV - node.js

I use nodejs cluster
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++){
var worker = cluster.fork();
worker.on('exit', function(code, signal) {
console.log("worker was killed by signal: " + signal);
});
}
}
And sometimes with different time intervals i have a error
worker was killed by signal: SIGSEGV
What this error mean and why she called?
node version v0.11.14-pre, Debian

Don't know exactly the answer but think this could help.
Using phantomJs I was getting sometimes the same error (changing worker for signal). The situation: I was opening a page using phantomJs; when the body was ready I want a callback to be called, then I was closing phantom. grosso modo my code was:
phantom.create(function (ph) {
...
callbackDone(result);
ph.exit();
});
Doing so, the exception was:
signal killed phantomjs: SIGSEGV
At this point I realized it was breaking just for heavy callback tasks. In order words, if the callback function before exit() was a light one: everything was fine; but in different conditions it crashes.
The solution: close the object before the callback:
ph.exit();
callbackDone(result);

Related

NodeJS bug in Linux when executing child_process.fork?

I cannot reliably get a forked child process to send back a message to the parent that exceeds 219262 bytes.
The issue is only on Linux. In Windows, it works as expected. And this issue seems have been introduced between Node versions 1.0.1 and 1.0.2 - works fine on Node versions prior to 1.0.1 but not after.
(the maxBuffer option is not relevent for child_process.fork, it only applies to child_process.exec and child_process.execFile)
Below is the failing sample. Executing "node parent" on the command line will fail to output the child's "messageToParent" if it exceeds 219262 bytes on Linux.
parent.js is:
var cp = require('child_process');
var child = cp.fork('./child', [], {});
console.log('>>>PARENT ---> SENDING MESSAGE TO CHILD');
child.send({});
child.on('message', function(msg) {
console.log('>>>PARENT ---> MESSAGE RECEIVED FROM CHILD = ' + JSON.stringify(msg));
});
child.on('error', function(err) {
console.log('>>>PARENT ---> ERROR FROM CHILD. err = '+ err);
});
child.on('exit', function(code, signal) {
console.log('>>>PARENT ---> EXIT FROM CHILD. code='+code+' signal = '+ signal);
});
child.on('close', function(code, signal) {
console.log('>>>PARENT ---> CLOSE FROM CHILD. code='+code+' signal = '+signal);
});
child.on('disconnect', function() {
console.log('>>>PARENT ---> DISCONNECT FROM CHILD');
});
child.js is
process.on('message', function(messageFromParent) {
console.log('>>>>>>CHILD ---> RECEIVED MESSAGE FROM PARENT');
var messageToParent = "It would be too long to post on stackoverflow, but if I make this string longer than 219262 bytes, it fails to return to the parent in Linux. There is no such issue in Windows";
var ret = process.send(messageToParent);
console.log('>>>>>>CHILD ---> SENDING MESSAGE TO PARENT process.send returned ' + ret);
process.exit(0);
});
process.on('uncaughtException', function(err) {
process.send({ output: {ERROR:err} });
process.exit(-1);
});
Posting an answer in case anyone else stumbles into this issue (https://github.com/nodejs/node/issues/36268)
The above child.js works perfectly in Node versions prior to 1.0.1 since child_process.fork() used to be synchronous. So "process.send(messageToParent)", followed by "process.exit(0)" will always return messageToParent to parent.js.
In later versions of Node, however, process.send() is async. Therefore, the child must exit via process.exit() within a process.send callback, else a race condition is created between V8 javascript thread and IPC pipe.
Also - in Windows, the default IPC pipe buffer is large enough that the message is always returned to parent prior to child exiting. This is not the case in Linux. This explains why the above code works in Windows even with later versions of Node where process.send() is async.

Kill fork on windows throws "write EPIPE"

I would like to kill forks after a specific amount of time. However, in my codebase, I sometimes get the following error (only on windows) :
events.js:85
throw er; // Unhandled 'error' event
^
Error: write EPIPE
at exports._errnoException (util.js:746:11)
at ChildProcess.target._send (child_process.js:484:28)
at ChildProcess.target.send (child_process.js:416:12)
at sendHelper (cluster.js:676:8)
at send (cluster.js:512:5)
at cluster.js:488:7
at SharedHandle.add (cluster.js:99:3)
at queryServer (cluster.js:480:12)
at Worker.onmessage (cluster.js:438:7)
at ChildProcess.<anonymous> (cluster.js:692:8)
This error seems to happen whenever a worker is not yet completely started and is killed (eg takes 1 second to start and is killed before having started)
Here's a minimal example so that you can reproduce.
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
var workers=[];
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
workers[i] = cluster.fork();
console.log('forking');
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
setTimeout(function(){
workers.forEach(function(worker){
worker.kill();
})
},1)
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
If I change the http.createServer to something like console.log, I don't have the problem, so I suspect it is because my worker hasn't finished "starting".
Strangely enough, I also get sometimes an AssertionError instead (they doesn't seem to be any kind of pattern, I have sometimes had 10s in a row of the same error, sometimes it toggles between the two errors : it seems random between EPIPE and ASSERTION error).
assert.js:86
throw new assert.AssertionError({
^
AssertionError: Resource leak detected.
at removeWorker (cluster.js:346:9)
at ChildProcess.<anonymous> (cluster.js:366:34)
at ChildProcess.g (events.js:199:16)
at ChildProcess.emit (events.js:110:17)
at Process.ChildProcess._handle.onexit (child_process.js:1074:12)
The reason for the error is that the daemon is not yet listening when we send it the SIGTERM signal:
The solution is to wait for the listening event before killing the fork.
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
var workers=[];
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
workers[i] = cluster.fork();
console.log('forking');
}
cluster.on('listening', function(worker, code, signal) {
setTimeout(function(){
worker.kill();
},1)
});
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
Here is my workaround...
As said by edi9999, worker process is busy in something and you can't kill it while it not listening.
So instead of killing the worker from main process, send a message from main process to worker and use process.exit() statement in the worker to kill itself.
This way, worker would finish the current work and listen to the message from main process and kill itself.
UPDATE
in this Repository of mine, there is an example code which you can try. This is a command line tool. If you cd in to the folder and type node se and press enter. You will get instructions on how to use it.
If you just wanna see code. Look following two files simultaneously.
1. se.js
2. functions.js
hope it helps!!

Nodejs cluster isn't response to web browser

I am a newbie in Nodejs and I try something with cluster on nodejs. But I meet a problem:
- I use example on Nodejs API about Cluster:
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
I run above code, but when hit url into browser (localhost:8000), browser doesn't receive any response from nodejs (it's connecting... forever until I kill nodejs ##), I am,however, getting the "online" event to fire.
How can I get this server to respond to requests?
p/s: I try getting event "exit" to respawn new worker. Sometime when I hit enter on browser, console log worker x die and then respawn new worker. But browser still connecting...
http://i.stack.imgur.com/RHSYY.png
Help me :) and sorry my bad english
I install the newest version of nodejs ( v0.10.24)
I had a similar issue with cluster. Try putting the http server creation part inside a setTimeout call. (for example delay it for 1000 ms) you should observe an improvement.
setTimeout(function(){ http.createServer...... }, 1000);
Besides, you may also try to create your server using jxcore mt-keep to see if it works on the similar case.
$ jx mt-keep servercode.js

Is rotating child processes in node.js/cluster good idea?

Apache Web Server has a config parameter called MaxRequestsPerChild.
http://httpd.apache.org/docs/2.0/en/mod/mpm_common.html#maxrequestsperchild
"After MaxRequestsPerChild requests, the child process will die."
To avoid crush caused by memory leaks, too many connections, or other unexpected errors, should I do the same thing when using node.js Cluster module?
*I'm using Nginx in front of node.js, not Apache. I mentioned to it so that I could easily explain.
I just implemented it like this:
var maxReqsPerChild = 10; // Small number for debug
var numReqs = 0;
if (cluster.isMaster) {
var numCPUs = require('os').cpus().length;
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('death', function(worker) {
// Fork another when one died
cluster.fork();
});
} else {
http.createServer(function(webReq, webRes) {
// Count up
numReqs++;
// Doing something here
// Kill myself
if (numReqs > maxReqsPerChild) {
process.kill(process.pid); // Or more simply, process.exit() is better?
}
}).listen(1338);
}
This has been working well up until now, but I'm wondering there is more proper way.
MaxRequestsPerChild is good to hide memory leak troubles, but shouldn't be used too often, because it just hides real trouble. First try to avoid the memory leaks.
It shouldn't be used to avoid other issues like too many connections, nor other unexpected errors.
When you do use MaxRequetsPerChild, you shouldn't process.kill neither process.exit,
because that immediately closes all undergoing connections.
Instead, you should server.close, which will wait for all undergoing connections finish, and then fires 'close' event.
var server = http.createServer(...);
server.on( "close", function() {
process.exit(0);
});
server.on( "request", function () {
requestCount += 1;
if ( options.max_requests_per_child && (requestCount >= options.max_requests_per_child) ) {
process.send({ cmd: "set", key: "overMaxRequests", value: 1 });
if ( ! server.isClosed ) {
server.close();
server.isClosed = 1;
}
}
});
see a complete working example here:
https://github.com/mash/node_angel

How to gracefully restart a NodeJS server?

Currently, my prod environment for a side project is a git repo, where I pull in some code, manually kill the server with Ctrl-C, and restart it manually.
I realize there are a lot of things wrong with this. For instance, what if a user is still in the middle of doing something important and the process is crunching sensitive data, and I just killed it?!
When I used node v0.4.x there was a nice Cluster module that could restart the server gracefully, when the application is in a quiet state. In v0.6.x the Cluster module is built into node, but it's really, really bare, and doesn't have the graceful restart ability.
Anyone know how I can gracefully restart a nodejs server in v0.6.x?
You can handle POSIX signals in node code.
See in the example code, that will handle SIGINT (Ctrl-C for instance) as a STOP signal for all cluster workers, and SIGUSR2 will just restart all workers
So, issuing kill -SIGUSR2 PID, where PID is node master PID will restart all cluster
module.exports = function(app) {
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
var workerList = new Array();
var sigkill = false;
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++) {
var env = process.env;
var worker = cluster.fork(env);
workerList.push(worker);
}
process.on('SIGUSR2',function(){
console.log("Received SIGUSR2 from system");
console.log("There are " + workerList.length + " workers running");
workerList.forEach(function(worker){
console.log("Sending STOP message to worker PID=" + worker.pid);
worker.send({cmd: "stop"});
});
});
process.on('SIGINT',function(){
sigkill = true;
process.exit();
});
cluster.on('death', function(worker) {
if (sigkill) {
logger.warn("SIGKINT received - not respawning workers");
return;
}
var newWorker = cluster.fork();
console.log('Worker ' + worker.pid + ' died and it will be re-spawned');
removeWorkerFromListByPID(worker.pid);
workerList.push(newWorker);
});
} else {
process.on('message', function(msg) {
if (msg.cmd && msg.cmd == 'stop') {
console.log("Received STOP signal from master");
app.close();
process.exit();
}
});
app.listen(3000);
}
function removeWorkerFromListByPID(pid) {
var counter = -1;
workerList.forEach(function(worker){
++counter;
if (worker.pid === pid) {
workerList.splice(counter, 1);
}
});
}
}
There's a module named Forever.
That can gracefully restart the process. I suppose then you can somehow run several instances with cluster (one on each core) and use Forever to monitor / restart them.
This is just an option I found; I'm open to suggestions!
There's also a module named PM2. It has the ability to stop all processes in a cluster.

Resources