How to restart if NodeJS API service failed? - node.js

I've the similar NodeJS code:
cluster.js
'use strict';
const cluster = require('cluster');
var express = require('express');
const metricsServer = express();
const AggregatorRegistry = require('prom-client').AggregatorRegistry;
const aggregatorRegistry = new AggregatorRegistry();
var os = require('os');
if (cluster.isMaster) {
for (let i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
metricsServer.get('/metrics', (req, res) => {
aggregatorRegistry.clusterMetrics((err, metrics) => {
if (err) console.log(err);
res.set('Content-Type', aggregatorRegistry.contentType);
res.send(metrics);
});
});
metricsServer.listen(3013);
console.log(
'Cluster metrics server listening to 3013, metrics exposed on /metrics'
);
} else {
require('./app.js'); // Here it'll handle all of our API service and it'll run under port 3000
}
As you can see in the above code I'm using NodeJS Manual cluster method instead of PM2 cluster, because I need to monitor my API via Prometheus. I'm usually starting the cluster.js via pm2 start cluster.js, however due to some DB connection our app.js service failed but cluster.js didn't. It apparently looks like I've not handled the db connection error, even though I've not handle it. I want to know,
How can I make sure my app.js and cluster.js always restarts if it crashes?
Is there a Linux crontab can be place to check the certain ports are always running (i.e 3000 and 3013)? (If this a good idea, I appreciate if you could provide me the code, I'm not much familiar with Linux)
Or I can deploy another NodeJS api to check the certain services are running, but since my API's real-time and catching certain amount of load; I'm not much happy do this?
Any help would be appreciate, Thanks in advance.

You can use monit https://www.digitalocean.com/community/tutorials/how-to-install-and-configure-monit in your server to regular monitor your process, if your project crashes it restart it again and can even notify you. but in this you have to do some configuration in server as monit regularly monitors a port, if it dosent get any reply from thta port then it restarts it.
otherwise you can use forever module. Easy to install and easy to use-https://www.npmjs.com/package/forever
it monitors and within 1 sec it restarts your application

I recently found out that, we can listen to the worker event if it's died/closed and restart it accordingly.
Here is the code:
'use strict';
const cluster = require('cluster');
var express = require('express');
const metricsServer = express();
var os = require('os');
if (cluster.isMaster) {
for (let i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
cluster.on(
"exit",
function handleExit( worker, code, signal ) {
console.log( "Worker has died.", worker.process.pid );
console.log( "Death was suicide:", worker.exitedAfterDisconnect );
// If a Worker was terminated accidentally (such as by an uncaught
// exception), then we can try to restart it.
if ( ! worker.exitedAfterDisconnect ) {
var worker = cluster.fork();
// CAUTION: If the Worker dies immediately, perhaps due to a bug in the
// code, you can run [from what I have READ] into rapid CPU consumption
// as Master continually tries to create new Workers.
}
}
);
} else {
require('./app.js');
}

Related

Node.js cluster for only a specific function within an Express app

I am trying to run a Node.js cluster within my Express app, but only for one specific function.
My app is a standard Express app generated with the express app generator.
My app initially scrapes an eCommerce website to get a list of categories in an array. I want to be able to then scrape each category's products, concurrently, using child processes.
I do not want to have the whole Express app inside the child processes. When the app starts up I want only one process to scrape for the initial categories. Once that is done I only want the function that scrapes the products to be run concurrently in the cluster.
I have tried the following:
delegation-controller.js
var {em} = require('./entry-controller');
const cluster = require('cluster');
const numCPUs = require('os').cpus().length;
class DelegationController {
links = [];
constructor() {
em.on('PageLinks', links => {
this.links = links;
this.startCategoryCrawl();
});
}
startCategoryCrawl() {
if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`);
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`worker ${worker.process.pid} died`);
});
} else {
console.log(`Worker ${process.pid} started`);
process.exit();
}
}
}
module.exports = DelegationController;
But then I got an error:
/ecommerce-scraper/bin/www:58
throw error;
^
Error: bind EADDRINUSE null:3000
Which I am guessing is because it is trying to start the express server again, but it is in use.
Am I able to do what I am trying to do, or am I misunderstanding how Node.js clusters work?
I believe this is not the case where you make use of cluster module. Instead you need the child_process module. This module lets you create a separate process. Here is the documentation.
I typically create my own Worker bootstrap that sits on top of my application. For things that need to run once, I have a convenient runonce function that is given a name and callback. The function checks the primary process for an open (non-busy process) and sends back the PID. If the PID matches (because all processes will claim ownership) the callback executes. If not, the function returns.
Example:
https://gist.github.com/jonshipman/abe627c687a46e7f5ea4b36bb919666c
NodeJS clustering creates identical copies of your application (through the cluster.fork(). It's up to your application to ensure that multiple actions aren't run twice (when they aren't expected to).
I believe, when using Express or https.createServer, it's setup in a way so that it doesn't listen to the same port multiple times. Instead each the prime process will distribute the load internally.

How to use redis with node js clusters

I have been using redis as an in-memory store with my nodejs server.
Sessions are also being managed with redis.
Currently what i have been doing is, i flush my redis whenever my server connects to it, so that no session is there whenever server starts
Like this:
redisClient.on('connect', function () {
redisClient.flushdb(function (err, succeeded) {
logger.debug("redis db cleared on startup--", succeeded); // will be true if successfull
});
});
I also use redis for some other data storage like some queue.
But now i want to implement clustering on my server.
My problem is if i have 4 cores, 4 instances of node will be runnig on my server.
num_processes = require('os').cpus().length;
if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`);
var workers = [];
// Helper function for spawning worker at index 'i'.
var spawn = function(i) {
console.log("spawing at index---",i);
workers[i] = cluster.fork();
console.log("----worker id-------", workers[i].id,"-------");
// Optional: Restart worker on exit
workers[i].on('exit', function(code, signal) {
console.log(`code is ${code} and signal is ${signal}`)
console.log(`worker ${workers[i].process.pid} died array index is ---${i}`);
console.log('respawning worker', i);
spawn(i);
});
workers[i].on('listening', () => {
workers[i].send({'index' : i });
});
};
// Spawn workers.
for (var i = 0; i < num_processes; i++) {
spawn(i);
} // Code to run if we're in a worker process
} else {
var redis = require('redis');
const sio_redis = require('socket.io-redis'),
redisClient = redis.createClient();
redisClient.on('connect', function () {
redisClient.flushdb(function (err, succeeded) {
logger.debug("redis db cleared on startup--", succeeded); // will be true if successfull
});
});
var RedisStore = require('connect-redis')(session);
const redisStore = new RedisStore({'host': 'localhost', 'port': 6379, 'client': redisClient});
var server = require('http').Server(app);
var listeningServer = server.listen(3002);
}
If any instance exit or die due to some reason , it will clear all the session and data in redis
I dont want this to happen. How should i work with redis in this scenario, so that sessions and data corresponding to that instance get cleared?
You can check if current process is master. That way it will only flush the first time you start your app. If any fork restarts then it wont flush the db.
redisClient = redis.createClient();
redisClient.on('connect', function() {
if (cluster.isMaster) {
redisClient.flushdb(function(err, succeeded) {
logger.debug("redis db cleared on startup--", succeeded); // will be true if successfull
});
}else{
logger.debug("Forked instance no need to flush redis db"); //
}
});
There are essentially two ways around this that I think are sane:
don't flush redis,
separate flush from instance startup.
don't flush
You say you want to cleanup session data on startup. Well, one of the points of having sessions not in memory is to persist them accross the sessions, and keep your actual app server (Node.js app) stateless.
You can, e.g. set expire key on all session data. So every time you "save" a session, you also setex on that session, optionally even prolonging this TTL every time you access the session (so, a session is valid 12 hours from last touching it, or something). Or, depending on your usage, your session middleware could do that, e.g. https://stackoverflow.com/a/45019589/162070.
Or not expire it at all.
flush independantly
Maybe you only flush sessions because you expect to have breaking changes and your old sessions won't work. Or have an explicit requirement, that on each new deploy, you clean the session data up. In any case, you separate this then. Have, for example, your server.js, as your app, and have a separate session-cleanup.js file that connects to redis, flushes, and disconnects. Then have npm setup like this:
"scripts": {
"session.cleanup": "node lib/session-cleanup.js",
"start": "npm run session.cleanup && node lib/server.js",
...
}
That way, before running your server.js and it runs cluster mode, you will cleanup sessions first. And if your cluster instances die and respawn, nothing happens.
Now, each time you redeploy, you clean it up. Or even remove this from "start", and have your deployment pipeline call npm run session.cleanup explicitly. You can even expose a button in an admin UI to do this on runtime (which would likely log you out :)), the point is, session cleanup is now a separate issue from server start.

cluster in mongodb and how does it picks up the cores

So I am new to cluster and mongodb and i came across this bunch of code.
#!/usr/bin/env node
var cluster = require('cluster');
var os = require('os');
var app = require('../main')
var models = require("../models");
if(cluster.isMaster){
var mCoreCount = os.cpus().length;
console.log("Cores : ", mCoreCount);
for (var i = 0; i < mCoreCount; i++) {
cluster.fork();
}
cluster.on('exit', function(){
cluster.fork();
});
}else{
models.sequelize.sync().then(function(){
app.listen(app.get('port'), function(){
console.log('api is live.' + app.get('port'));
});
});
}
So when I console I get cores as 4, I tried reading but I could not understand anything , If someone could point me whats going on here It will be a great help.
I understood that the greater the number of cores the node instances will increase , but I guess right now its picking up from my system, what happens in production ?.
This script is trying to get the more efficient way to launch the NodeJS app by creating a fork for each available core on the hardware server.
It picks up the number of core with os.cpus().length
In production, it will append the same process, and the number of fork will depend of the number of available core production server.
Are you really sure the database is MongoDB in both environement ? We can't really tell without seeing the whole app code.

How to increase event loop capacity in nodejs?

I know that Node.js uses a single-thread and an event loop to process requests only processing one at a time (which is non-blocking). But i am unable to determine Event loop capacity to run 100k request per second.
Here i want to capacity planning for nodejs server to handle the 100k request per second.
Please let me know how can i determine the capacity of event loop to increase capacity.
A single instance of Node.js runs in a single thread. To take advantage of multi-core systems the user will sometimes want to launch a cluster of Node.js processes to handle the load.
More info here and here
For the reference check following code for simple implementation of cluster in node.js
var cluster = require('cluster');
var express = require('express');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++) {
// Create a worker
cluster.fork();
}
} else {
// Workers share the TCP connection in this server
var app = express();
app.get('/', function (req, res) {
res.send('Hello World!');
});
// All workers use this port
app.listen(8080);
}
Cluster is an extensible multi-core server manager for node.js for more source check here.

NodeJS on multiple processors (PM2, Cluster, Recluster, Naught)

I am investigating options for running node in a multi-core environment.
I'm trying to determine the best approach and so far I've seen these options
Use built in cluster library to spin up works and respond to signals
Use PM but, PM2 -i is listed as beta.
Naught
Recluster
Are there other alternatives? What are folks using in production?
I've been using the default cluster library, and it works very well. I've had over 10,000 concurrents(multiple clusters on multiple servers) and it works very well.
It is suggested to use clusters with domain for error handling.
This is lifted straight from http://nodejs.org/api/domain.html I've mades some changes on how it spawns new clusters for each core of your machine. and got rid of if/else and added express.
var cluster = require('cluster'),
http = require('http'),
PORT = process.env.PORT || 1337,
os = require('os'),
server;
function forkClusters () {
var cpuCount = os.cpus().length;
// Create a worker for each CPU
for (var i = 0; i < cpuCount ; i += 1) {
cluster.fork();
}
}
// Master Process
if (cluster.isMaster) {
// You can also of course get a bit fancier about logging, and
// implement whatever custom logic you need to prevent DoS
// attacks and other bad behavior.
//
// See the options in the cluster documentation.
//
// The important thing is that the master does very little,
// increasing our resilience to unexpected errors.
forkClusters ()
cluster.on('disconnect', function(worker) {
console.error('disconnect!');
cluster.fork();
});
}
function handleError (d) {
d.on('error', function(er) {
console.error('error', er.stack);
// Note: we're in dangerous territory!
// By definition, something unexpected occurred,
// which we probably didn't want.
// Anything can happen now!Be very careful!
try {
// make sure we close down within 30 seconds
var killtimer = setTimeout(function() {
process.exit(1);
}, 30000);
// But don't keep the process open just for that!
killtimer.unref();
// stop taking new requests.
server.close();
// Let the master know we're dead.This will trigger a
// 'disconnect' in the cluster master, and then it will fork
// a new worker.
cluster.worker.disconnect();
} catch (er2) {
// oh well, not much we can do at this point.
console.error('Error sending 500!', er2.stack);
}
});
}
// child Process
if (cluster.isWorker) {
// the worker
//
// This is where we put our bugs!
var domain = require('domain');
var express = require('express');
var app = express();
app.set('port', PORT);
// See the cluster documentation for more details about using
// worker processes to serve requests.How it works, caveats, etc.
var d = domain.create();
handleError(d);
// Now run the handler function in the domain.
//
// put all code here. any code included outside of domain.run will not handle errors on the domain level, but will crash the app.
//
d.run(function() {
// this is where we start our server
server = http.createServer(app).listen(app.get('port'), function () {
console.log('Cluster %s listening on port %s', cluster.worker.id, app.get('port'));
});
});
}
We use Supervisor to manage our Node.JS process's, to start them upon boot, and to act as a watchdog in case the process's crash.
We use Nginx as a reverse-proxy to load balance traffic between the process's that listen to different ports
this way each process is isolated from the other.
for example: Nginx listens on port 80 and forwards traffic to ports 8000-8003
I was using PM2 for quite a while, but their pricing is expensive for my needs as I'm having my own analytics environment and I don't require support, so I decided to experiment alternatives. For my case, just forever made the trick, very simple one actually:
forever -m 5 app.js
Another useful example is
forever start app.js -p 8080

Resources