I would like to kill forks after a specific amount of time. However, in my codebase, I sometimes get the following error (only on windows) :
events.js:85
throw er; // Unhandled 'error' event
^
Error: write EPIPE
at exports._errnoException (util.js:746:11)
at ChildProcess.target._send (child_process.js:484:28)
at ChildProcess.target.send (child_process.js:416:12)
at sendHelper (cluster.js:676:8)
at send (cluster.js:512:5)
at cluster.js:488:7
at SharedHandle.add (cluster.js:99:3)
at queryServer (cluster.js:480:12)
at Worker.onmessage (cluster.js:438:7)
at ChildProcess.<anonymous> (cluster.js:692:8)
This error seems to happen whenever a worker is not yet completely started and is killed (eg takes 1 second to start and is killed before having started)
Here's a minimal example so that you can reproduce.
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
var workers=[];
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
workers[i] = cluster.fork();
console.log('forking');
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
setTimeout(function(){
workers.forEach(function(worker){
worker.kill();
})
},1)
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
If I change the http.createServer to something like console.log, I don't have the problem, so I suspect it is because my worker hasn't finished "starting".
Strangely enough, I also get sometimes an AssertionError instead (they doesn't seem to be any kind of pattern, I have sometimes had 10s in a row of the same error, sometimes it toggles between the two errors : it seems random between EPIPE and ASSERTION error).
assert.js:86
throw new assert.AssertionError({
^
AssertionError: Resource leak detected.
at removeWorker (cluster.js:346:9)
at ChildProcess.<anonymous> (cluster.js:366:34)
at ChildProcess.g (events.js:199:16)
at ChildProcess.emit (events.js:110:17)
at Process.ChildProcess._handle.onexit (child_process.js:1074:12)
The reason for the error is that the daemon is not yet listening when we send it the SIGTERM signal:
The solution is to wait for the listening event before killing the fork.
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
var workers=[];
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
workers[i] = cluster.fork();
console.log('forking');
}
cluster.on('listening', function(worker, code, signal) {
setTimeout(function(){
worker.kill();
},1)
});
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
Here is my workaround...
As said by edi9999, worker process is busy in something and you can't kill it while it not listening.
So instead of killing the worker from main process, send a message from main process to worker and use process.exit() statement in the worker to kill itself.
This way, worker would finish the current work and listen to the message from main process and kill itself.
UPDATE
in this Repository of mine, there is an example code which you can try. This is a command line tool. If you cd in to the folder and type node se and press enter. You will get instructions on how to use it.
If you just wanna see code. Look following two files simultaneously.
1. se.js
2. functions.js
hope it helps!!
Related
I use nodejs cluster
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++){
var worker = cluster.fork();
worker.on('exit', function(code, signal) {
console.log("worker was killed by signal: " + signal);
});
}
}
And sometimes with different time intervals i have a error
worker was killed by signal: SIGSEGV
What this error mean and why she called?
node version v0.11.14-pre, Debian
Don't know exactly the answer but think this could help.
Using phantomJs I was getting sometimes the same error (changing worker for signal). The situation: I was opening a page using phantomJs; when the body was ready I want a callback to be called, then I was closing phantom. grosso modo my code was:
phantom.create(function (ph) {
...
callbackDone(result);
ph.exit();
});
Doing so, the exception was:
signal killed phantomjs: SIGSEGV
At this point I realized it was breaking just for heavy callback tasks. In order words, if the callback function before exit() was a light one: everything was fine; but in different conditions it crashes.
The solution: close the object before the callback:
ph.exit();
callbackDone(result);
I was trying to understand how http requests are served by workers of a cluster. I started with the code at http://nodejs.org/api/cluster.html and added some logging around it.
I put the following code in server.js and executed the file with:
NODE_DEBUG=cluster node server.js
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
console.log('number of CPUs: ' + numCPUs);
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
console.log('forking worker ' + i);
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
console.log('worker spawned: ' + cluster.worker.process.pid);
var someVar = {'b': 1};
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
console.log('someVar.b current value: ' + someVar.b + ". someVar.b will be incremented");
someVar.b = someVar.b + 1;
console.log('Request being processed by worker: ' + cluster.worker.process.pid);
res.writeHead(200);
res.end("hello world\n");
console.log('response ended by worker: ' + cluster.worker.process.pid);
}).listen(8000);
}
I then went to localhost:8000 once, and lo and behold, "hello world" was the response as I expected. What I didn't expect is some log statements being printed twice:
number of CPUs: 2
forking worker 0
forking worker 1
535,Master Worker 536 online
535,Master Worker 537 online
worker spawned: 536
worker spawned: 537
someVar.b current value: 1. someVar.b will be incremented
Request being processed by worker: 536
response ended by worker: 536
someVar.b current value: 2. someVar.b will be incremented
Request being processed by worker: 536
response ended by worker: 536
Specifically, I expected that for one request to my server, the following lines would only be printed once:
someVar.b current value: X. someVar.b will be incremented
Request being processed by worker: XXX
response ended by worker: XXX
Instead my functions are getting executed twice by the same worker. Could someone please shed some light on this?
Did you check req.url? I bet one of the two requests is for /favicon.ico.
I am trying to simulate 1000 websocket connections to a test server.I thought implementing it using cluster.fork, but using it 1000 time will deplete my memory and crash the computer.Async seems to be a solution to the problem,however i still ran out of memory.How am i supposed to conduct such a test while being at the same time careful with my resources?If i use numCpus instead of 500 , i only get 4 connections.
edit :
if (cluster.isMaster) {
//This is the master control process
console.log("Control process running: PID=" + process.pid);
//Fork workers.
for (var i = 0; i < 500; i++) {
worker = cluster.fork();
//Receive messages from this worker and handle them in the master process.
worker.on('message', function(msg) {
console.log('Master ' + process.pid + ' received message from worker ' + msg);
});
}
if (cluster.isWorker) {
console.log('Worker ' + process.pid + ' has started.');
websocketCreation();//function that creates new WebSockets
eventHandler();//function that handles on error,close and messagr
}
I am a newbie in Nodejs and I try something with cluster on nodejs. But I meet a problem:
- I use example on Nodejs API about Cluster:
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
I run above code, but when hit url into browser (localhost:8000), browser doesn't receive any response from nodejs (it's connecting... forever until I kill nodejs ##), I am,however, getting the "online" event to fire.
How can I get this server to respond to requests?
p/s: I try getting event "exit" to respawn new worker. Sometime when I hit enter on browser, console log worker x die and then respawn new worker. But browser still connecting...
http://i.stack.imgur.com/RHSYY.png
Help me :) and sorry my bad english
I install the newest version of nodejs ( v0.10.24)
I had a similar issue with cluster. Try putting the http server creation part inside a setTimeout call. (for example delay it for 1000 ms) you should observe an improvement.
setTimeout(function(){ http.createServer...... }, 1000);
Besides, you may also try to create your server using jxcore mt-keep to see if it works on the similar case.
$ jx mt-keep servercode.js
Currently, my prod environment for a side project is a git repo, where I pull in some code, manually kill the server with Ctrl-C, and restart it manually.
I realize there are a lot of things wrong with this. For instance, what if a user is still in the middle of doing something important and the process is crunching sensitive data, and I just killed it?!
When I used node v0.4.x there was a nice Cluster module that could restart the server gracefully, when the application is in a quiet state. In v0.6.x the Cluster module is built into node, but it's really, really bare, and doesn't have the graceful restart ability.
Anyone know how I can gracefully restart a nodejs server in v0.6.x?
You can handle POSIX signals in node code.
See in the example code, that will handle SIGINT (Ctrl-C for instance) as a STOP signal for all cluster workers, and SIGUSR2 will just restart all workers
So, issuing kill -SIGUSR2 PID, where PID is node master PID will restart all cluster
module.exports = function(app) {
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
var workerList = new Array();
var sigkill = false;
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++) {
var env = process.env;
var worker = cluster.fork(env);
workerList.push(worker);
}
process.on('SIGUSR2',function(){
console.log("Received SIGUSR2 from system");
console.log("There are " + workerList.length + " workers running");
workerList.forEach(function(worker){
console.log("Sending STOP message to worker PID=" + worker.pid);
worker.send({cmd: "stop"});
});
});
process.on('SIGINT',function(){
sigkill = true;
process.exit();
});
cluster.on('death', function(worker) {
if (sigkill) {
logger.warn("SIGKINT received - not respawning workers");
return;
}
var newWorker = cluster.fork();
console.log('Worker ' + worker.pid + ' died and it will be re-spawned');
removeWorkerFromListByPID(worker.pid);
workerList.push(newWorker);
});
} else {
process.on('message', function(msg) {
if (msg.cmd && msg.cmd == 'stop') {
console.log("Received STOP signal from master");
app.close();
process.exit();
}
});
app.listen(3000);
}
function removeWorkerFromListByPID(pid) {
var counter = -1;
workerList.forEach(function(worker){
++counter;
if (worker.pid === pid) {
workerList.splice(counter, 1);
}
});
}
}
There's a module named Forever.
That can gracefully restart the process. I suppose then you can somehow run several instances with cluster (one on each core) and use Forever to monitor / restart them.
This is just an option I found; I'm open to suggestions!
There's also a module named PM2. It has the ability to stop all processes in a cluster.