How to gracefully restart a NodeJS server? - node.js

Currently, my prod environment for a side project is a git repo, where I pull in some code, manually kill the server with Ctrl-C, and restart it manually.
I realize there are a lot of things wrong with this. For instance, what if a user is still in the middle of doing something important and the process is crunching sensitive data, and I just killed it?!
When I used node v0.4.x there was a nice Cluster module that could restart the server gracefully, when the application is in a quiet state. In v0.6.x the Cluster module is built into node, but it's really, really bare, and doesn't have the graceful restart ability.
Anyone know how I can gracefully restart a nodejs server in v0.6.x?

You can handle POSIX signals in node code.
See in the example code, that will handle SIGINT (Ctrl-C for instance) as a STOP signal for all cluster workers, and SIGUSR2 will just restart all workers
So, issuing kill -SIGUSR2 PID, where PID is node master PID will restart all cluster
module.exports = function(app) {
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
var workerList = new Array();
var sigkill = false;
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++) {
var env = process.env;
var worker = cluster.fork(env);
workerList.push(worker);
}
process.on('SIGUSR2',function(){
console.log("Received SIGUSR2 from system");
console.log("There are " + workerList.length + " workers running");
workerList.forEach(function(worker){
console.log("Sending STOP message to worker PID=" + worker.pid);
worker.send({cmd: "stop"});
});
});
process.on('SIGINT',function(){
sigkill = true;
process.exit();
});
cluster.on('death', function(worker) {
if (sigkill) {
logger.warn("SIGKINT received - not respawning workers");
return;
}
var newWorker = cluster.fork();
console.log('Worker ' + worker.pid + ' died and it will be re-spawned');
removeWorkerFromListByPID(worker.pid);
workerList.push(newWorker);
});
} else {
process.on('message', function(msg) {
if (msg.cmd && msg.cmd == 'stop') {
console.log("Received STOP signal from master");
app.close();
process.exit();
}
});
app.listen(3000);
}
function removeWorkerFromListByPID(pid) {
var counter = -1;
workerList.forEach(function(worker){
++counter;
if (worker.pid === pid) {
workerList.splice(counter, 1);
}
});
}
}

There's a module named Forever.
That can gracefully restart the process. I suppose then you can somehow run several instances with cluster (one on each core) and use Forever to monitor / restart them.
This is just an option I found; I'm open to suggestions!

There's also a module named PM2. It has the ability to stop all processes in a cluster.

Related

How to restart if NodeJS API service failed?

I've the similar NodeJS code:
cluster.js
'use strict';
const cluster = require('cluster');
var express = require('express');
const metricsServer = express();
const AggregatorRegistry = require('prom-client').AggregatorRegistry;
const aggregatorRegistry = new AggregatorRegistry();
var os = require('os');
if (cluster.isMaster) {
for (let i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
metricsServer.get('/metrics', (req, res) => {
aggregatorRegistry.clusterMetrics((err, metrics) => {
if (err) console.log(err);
res.set('Content-Type', aggregatorRegistry.contentType);
res.send(metrics);
});
});
metricsServer.listen(3013);
console.log(
'Cluster metrics server listening to 3013, metrics exposed on /metrics'
);
} else {
require('./app.js'); // Here it'll handle all of our API service and it'll run under port 3000
}
As you can see in the above code I'm using NodeJS Manual cluster method instead of PM2 cluster, because I need to monitor my API via Prometheus. I'm usually starting the cluster.js via pm2 start cluster.js, however due to some DB connection our app.js service failed but cluster.js didn't. It apparently looks like I've not handled the db connection error, even though I've not handle it. I want to know,
How can I make sure my app.js and cluster.js always restarts if it crashes?
Is there a Linux crontab can be place to check the certain ports are always running (i.e 3000 and 3013)? (If this a good idea, I appreciate if you could provide me the code, I'm not much familiar with Linux)
Or I can deploy another NodeJS api to check the certain services are running, but since my API's real-time and catching certain amount of load; I'm not much happy do this?
Any help would be appreciate, Thanks in advance.
You can use monit https://www.digitalocean.com/community/tutorials/how-to-install-and-configure-monit in your server to regular monitor your process, if your project crashes it restart it again and can even notify you. but in this you have to do some configuration in server as monit regularly monitors a port, if it dosent get any reply from thta port then it restarts it.
otherwise you can use forever module. Easy to install and easy to use-https://www.npmjs.com/package/forever
it monitors and within 1 sec it restarts your application
I recently found out that, we can listen to the worker event if it's died/closed and restart it accordingly.
Here is the code:
'use strict';
const cluster = require('cluster');
var express = require('express');
const metricsServer = express();
var os = require('os');
if (cluster.isMaster) {
for (let i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
cluster.on(
"exit",
function handleExit( worker, code, signal ) {
console.log( "Worker has died.", worker.process.pid );
console.log( "Death was suicide:", worker.exitedAfterDisconnect );
// If a Worker was terminated accidentally (such as by an uncaught
// exception), then we can try to restart it.
if ( ! worker.exitedAfterDisconnect ) {
var worker = cluster.fork();
// CAUTION: If the Worker dies immediately, perhaps due to a bug in the
// code, you can run [from what I have READ] into rapid CPU consumption
// as Master continually tries to create new Workers.
}
}
);
} else {
require('./app.js');
}

Kill fork on windows throws "write EPIPE"

I would like to kill forks after a specific amount of time. However, in my codebase, I sometimes get the following error (only on windows) :
events.js:85
throw er; // Unhandled 'error' event
^
Error: write EPIPE
at exports._errnoException (util.js:746:11)
at ChildProcess.target._send (child_process.js:484:28)
at ChildProcess.target.send (child_process.js:416:12)
at sendHelper (cluster.js:676:8)
at send (cluster.js:512:5)
at cluster.js:488:7
at SharedHandle.add (cluster.js:99:3)
at queryServer (cluster.js:480:12)
at Worker.onmessage (cluster.js:438:7)
at ChildProcess.<anonymous> (cluster.js:692:8)
This error seems to happen whenever a worker is not yet completely started and is killed (eg takes 1 second to start and is killed before having started)
Here's a minimal example so that you can reproduce.
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
var workers=[];
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
workers[i] = cluster.fork();
console.log('forking');
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
setTimeout(function(){
workers.forEach(function(worker){
worker.kill();
})
},1)
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
If I change the http.createServer to something like console.log, I don't have the problem, so I suspect it is because my worker hasn't finished "starting".
Strangely enough, I also get sometimes an AssertionError instead (they doesn't seem to be any kind of pattern, I have sometimes had 10s in a row of the same error, sometimes it toggles between the two errors : it seems random between EPIPE and ASSERTION error).
assert.js:86
throw new assert.AssertionError({
^
AssertionError: Resource leak detected.
at removeWorker (cluster.js:346:9)
at ChildProcess.<anonymous> (cluster.js:366:34)
at ChildProcess.g (events.js:199:16)
at ChildProcess.emit (events.js:110:17)
at Process.ChildProcess._handle.onexit (child_process.js:1074:12)
The reason for the error is that the daemon is not yet listening when we send it the SIGTERM signal:
The solution is to wait for the listening event before killing the fork.
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
var workers=[];
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
workers[i] = cluster.fork();
console.log('forking');
}
cluster.on('listening', function(worker, code, signal) {
setTimeout(function(){
worker.kill();
},1)
});
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
Here is my workaround...
As said by edi9999, worker process is busy in something and you can't kill it while it not listening.
So instead of killing the worker from main process, send a message from main process to worker and use process.exit() statement in the worker to kill itself.
This way, worker would finish the current work and listen to the message from main process and kill itself.
UPDATE
in this Repository of mine, there is an example code which you can try. This is a command line tool. If you cd in to the folder and type node se and press enter. You will get instructions on how to use it.
If you just wanna see code. Look following two files simultaneously.
1. se.js
2. functions.js
hope it helps!!

NodeJS Cluster Error : SIGSEGV

I use nodejs cluster
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++){
var worker = cluster.fork();
worker.on('exit', function(code, signal) {
console.log("worker was killed by signal: " + signal);
});
}
}
And sometimes with different time intervals i have a error
worker was killed by signal: SIGSEGV
What this error mean and why she called?
node version v0.11.14-pre, Debian
Don't know exactly the answer but think this could help.
Using phantomJs I was getting sometimes the same error (changing worker for signal). The situation: I was opening a page using phantomJs; when the body was ready I want a callback to be called, then I was closing phantom. grosso modo my code was:
phantom.create(function (ph) {
...
callbackDone(result);
ph.exit();
});
Doing so, the exception was:
signal killed phantomjs: SIGSEGV
At this point I realized it was breaking just for heavy callback tasks. In order words, if the callback function before exit() was a light one: everything was fine; but in different conditions it crashes.
The solution: close the object before the callback:
ph.exit();
callbackDone(result);

Nodejs cluster isn't response to web browser

I am a newbie in Nodejs and I try something with cluster on nodejs. But I meet a problem:
- I use example on Nodejs API about Cluster:
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
I run above code, but when hit url into browser (localhost:8000), browser doesn't receive any response from nodejs (it's connecting... forever until I kill nodejs ##), I am,however, getting the "online" event to fire.
How can I get this server to respond to requests?
p/s: I try getting event "exit" to respawn new worker. Sometime when I hit enter on browser, console log worker x die and then respawn new worker. But browser still connecting...
http://i.stack.imgur.com/RHSYY.png
Help me :) and sorry my bad english
I install the newest version of nodejs ( v0.10.24)
I had a similar issue with cluster. Try putting the http server creation part inside a setTimeout call. (for example delay it for 1000 ms) you should observe an improvement.
setTimeout(function(){ http.createServer...... }, 1000);
Besides, you may also try to create your server using jxcore mt-keep to see if it works on the similar case.
$ jx mt-keep servercode.js

Is rotating child processes in node.js/cluster good idea?

Apache Web Server has a config parameter called MaxRequestsPerChild.
http://httpd.apache.org/docs/2.0/en/mod/mpm_common.html#maxrequestsperchild
"After MaxRequestsPerChild requests, the child process will die."
To avoid crush caused by memory leaks, too many connections, or other unexpected errors, should I do the same thing when using node.js Cluster module?
*I'm using Nginx in front of node.js, not Apache. I mentioned to it so that I could easily explain.
I just implemented it like this:
var maxReqsPerChild = 10; // Small number for debug
var numReqs = 0;
if (cluster.isMaster) {
var numCPUs = require('os').cpus().length;
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('death', function(worker) {
// Fork another when one died
cluster.fork();
});
} else {
http.createServer(function(webReq, webRes) {
// Count up
numReqs++;
// Doing something here
// Kill myself
if (numReqs > maxReqsPerChild) {
process.kill(process.pid); // Or more simply, process.exit() is better?
}
}).listen(1338);
}
This has been working well up until now, but I'm wondering there is more proper way.
MaxRequestsPerChild is good to hide memory leak troubles, but shouldn't be used too often, because it just hides real trouble. First try to avoid the memory leaks.
It shouldn't be used to avoid other issues like too many connections, nor other unexpected errors.
When you do use MaxRequetsPerChild, you shouldn't process.kill neither process.exit,
because that immediately closes all undergoing connections.
Instead, you should server.close, which will wait for all undergoing connections finish, and then fires 'close' event.
var server = http.createServer(...);
server.on( "close", function() {
process.exit(0);
});
server.on( "request", function () {
requestCount += 1;
if ( options.max_requests_per_child && (requestCount >= options.max_requests_per_child) ) {
process.send({ cmd: "set", key: "overMaxRequests", value: 1 });
if ( ! server.isClosed ) {
server.close();
server.isClosed = 1;
}
}
});
see a complete working example here:
https://github.com/mash/node_angel

Resources