I have been trying to figure this out for a while. I wrote a very simple http server in node to benchmark the effect of using cluster. Here is my code:
var cluster = require('cluster');
var http = require('http');
var numCPUs = 0; //require('os').cpus().length;
if(process.argv.length >= 3)
{
numCPUs = process.argv[2];
}
if (cluster.isMaster && numCPUs > 0) {
console.log("launching " + numCPUs + " procs");
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
console.log("launching proc #" + i);
cluster.fork();
}
cluster.on('death', function(worker) {
console.log('worker ' + worker.pid + ' died');
});
} else {
// Worker processes have a http server.
http.Server(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(3000);
}
The problem is that I am not seeing any performance gain at all. 1 process has better performance most of the time. And, If I add more work, like retrieving data from redis or mongo then increasing the processes helps, but only modestly (around 15%). I've tried this on an i7 MBPr (quad-core with HT), and an i5 (quad-core) Win7 systems both with the same results.
Can someone please explain whats wrong with this code? Or, why am I not seeing an advantage/benefit in using cluster?
Your test appears to be almost purely I/O-oriented and in that situation using cluster provides little benefit (as you've seen) because I/O is concurrent regardless.
To see a significant benefit you'd need to have portions of your code that are CPU-bound because it's only then that you can provide additional parallelism between your cluster workers.
Related
So I am new to cluster and mongodb and i came across this bunch of code.
#!/usr/bin/env node
var cluster = require('cluster');
var os = require('os');
var app = require('../main')
var models = require("../models");
if(cluster.isMaster){
var mCoreCount = os.cpus().length;
console.log("Cores : ", mCoreCount);
for (var i = 0; i < mCoreCount; i++) {
cluster.fork();
}
cluster.on('exit', function(){
cluster.fork();
});
}else{
models.sequelize.sync().then(function(){
app.listen(app.get('port'), function(){
console.log('api is live.' + app.get('port'));
});
});
}
So when I console I get cores as 4, I tried reading but I could not understand anything , If someone could point me whats going on here It will be a great help.
I understood that the greater the number of cores the node instances will increase , but I guess right now its picking up from my system, what happens in production ?.
This script is trying to get the more efficient way to launch the NodeJS app by creating a fork for each available core on the hardware server.
It picks up the number of core with os.cpus().length
In production, it will append the same process, and the number of fork will depend of the number of available core production server.
Are you really sure the database is MongoDB in both environement ? We can't really tell without seeing the whole app code.
I'm attempting to implement a Node module which uses cluster. The problem is that the entire parent scope is forked alongside the intended cluster code. I discovered it while writing tests in Mocha for the module: the test suite will run many times, instead of once.
See below, myModule.js creates N workers, one for each CPU. These workers are http servers, or could be anything else.
Each time the test.js runs, the script runs N + 1 times. In the example below, console.log runs 5 times on my quad core.
Can someone explain if this is an implementation issue or cluster config issue? Is there any way to limit the scope of fork() without having to import a module ( as in this solution https://github.com/mochajs/mocha/issues/826 )?
/// myModule.js ////////////////////////////////////
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
var startCluster = function(){
if (cluster.isMaster) {
// CREATE A CLUSTER OF FORKED WORKERS, ONE PER CPU
//master does not listen to UDP messages.
for (var i = 0; i < numCPUs; i++) {
var worker = cluster.fork();
}
} else {
// Worker processes have an http server.
http.Server(function (req, res){
res.writeHead(200);
res.end('hello world\n');
}).listen(8000);
}
return
}
module.exports = startCluster;
/////////////////////////////////////////////////
//// test.js ////////////////////////////////////
var startCluster = require('./myModule.js')
startCluster()
console.log('hello');
////////////////////////////////////////////////////////
So I'll venture an answer. Looking closer at the node docs there is a cluster.setupMaster which can override defaults. The default on a cluster.fork() is to execute the current script, with "file path to worker file. (Default=process.argv[1])"
https://nodejs.org/docs/latest/api/cluster.html#cluster_cluster_settings
So if another module is importing a script with a cluster.fork() call, it will still use the path of process.argv[1], which may not be the path you expect, and have unintended consequences.
So we shouldn't initialize the cluster master and worker in the same file as the official docs suggest. It would be prudent to separate the worker into a new file and override the default settings. (Also for safety you can add the directory path with __dirname ).
cluster.setupMaster({ exec: __dirname + '/worker.js',});
So here would be the corrected implementation:
/// myModule.js ////////////////////////////////////
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
var startCluster = function(){
cluster.setupMaster({
exec: __dirname + '/worker.js'
});
if (cluster.isMaster) {
// CREATE A CLUSTER OF FORKED WORKERS, ONE PER CPU
for (var i = 0; i < numCPUs; i++) {
var worker = cluster.fork();
}
}
return
}
module.exports = startCluster;
/////////////////////////////////////////////////
//// worker.js ////////////////////////////////////
var http = require('http');
// All worker processes have an http server.
http.Server(function (req, res){
res.writeHead(200);
res.end('hello world\n');
}).listen(8000);
////////////////////////////////////////////////////////
//// test.js ////////////////////////////////////
var startCluster = require('./myModule.js')
startCluster()
console.log('hello');
////////////////////////////////////////////////////////
You should only see 'hello' once instead of 1 * Number of CPUs
You need to have the "isMaster" stuff at the top of your code, not inside the function. The worker will run from the top of the module ( it's not like a C++ fork, where the worker starts at the fork() point ).
I assume that you want the startCluster = require('./cluster-bug.js') to eval only once? Well, thats because your whole script runs clustered. What you do specify inside startCluster is only to make it vary between master and slave clusters. Cluster spawns fork of file in which it is initialised.
iam programming a prototype application with the following components:
webinterface for admins
doing cron jobs(statistic generation, ..)
interact with other webservices over http
I started programming with nodejs(typescript) and i got the connection to the other services. Now i got a problem with cron-jobs in nodejs.
Iam using node-cron for executing the cronjob.
Inside one job i need to obtain the status of much pc's and make a summary of it. If i would do this, this would block the main thread.
So i think I need to this in a separate thread.
How can i do this in nodejs? Should i use webworker-threads?
Am I on the proper way?
Should i better use Java(Grails/Spring) for this?
I really like the simplicity of nodejs (for http clients, ..)
Hope someone can give me hope that iam on the proper way.
I will just use Node Cluster. Using cluster, a master can create a multiple workers, which means your cron wont block incoming request. Just make sure that only 1 worker doing the Cron.
I have never working with node-cron before, but I have experience with the SyncedCron. But should be the same.
For the http client there are a lot libraries doing this, you can check Request or httpclient.
Your code should look something like this :
var cluster = require('cluster');
var http = require('http');
var numWorkers = require('os').cpus().length-1; // just give 1 cpu for OS to use, or maybe 2
if (cluster.isMaster) {
console.log('Master cluster setting up ' + numWorkers + ' workers...');
var cronPID=null;
for(var i = 0; i < numWorkers; i++) {
var worker=cluster.fork();
if(i==0){
//instructing the first worker to assume role of SyncedCron.
worker.send('you do it!');
cronPID=worker.process.pid;
console.log("worker "+cronPID+" choosed as Cron worker!");
}
}
cluster.on('online', function(worker) {
console.log('Worker ' + worker.process.pid + ' is online');
});
cluster.on('exit', function(worker, code, signal) {
// have to revive the worker
console.log('Worker ' + worker.process.pid + ' died with code: ' + code + ', and signal: ' + signal);
console.log('Starting a new worker');
var newWorker=cluster.fork();
if(cronPID==worker.process.pid)
{ // need to re-elect a new cron worker!
newWorker.send('you do it!');
cronPID=newWorker.process.pid;
console.log("worker "+cronPID+" choosed as Cron worker!");
}
});
}else
{ // worker sides
process.on('message', (msg) => {
// validate your message that you get
// if validated, create a cron job here
});
// create your express below, I assume you use express instead of node's http library
var express = require('express');
var app = express();
app.post...........
}
Note :
To revive the master, use something like "forever"
Your server should have multiple core, at least 4 but I recommend more (8 maybe?).
I am a newbie in Nodejs and I try something with cluster on nodejs. But I meet a problem:
- I use example on Nodejs API about Cluster:
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}
I run above code, but when hit url into browser (localhost:8000), browser doesn't receive any response from nodejs (it's connecting... forever until I kill nodejs ##), I am,however, getting the "online" event to fire.
How can I get this server to respond to requests?
p/s: I try getting event "exit" to respawn new worker. Sometime when I hit enter on browser, console log worker x die and then respawn new worker. But browser still connecting...
http://i.stack.imgur.com/RHSYY.png
Help me :) and sorry my bad english
I install the newest version of nodejs ( v0.10.24)
I had a similar issue with cluster. Try putting the http server creation part inside a setTimeout call. (for example delay it for 1000 ms) you should observe an improvement.
setTimeout(function(){ http.createServer...... }, 1000);
Besides, you may also try to create your server using jxcore mt-keep to see if it works on the similar case.
$ jx mt-keep servercode.js
Apache Web Server has a config parameter called MaxRequestsPerChild.
http://httpd.apache.org/docs/2.0/en/mod/mpm_common.html#maxrequestsperchild
"After MaxRequestsPerChild requests, the child process will die."
To avoid crush caused by memory leaks, too many connections, or other unexpected errors, should I do the same thing when using node.js Cluster module?
*I'm using Nginx in front of node.js, not Apache. I mentioned to it so that I could easily explain.
I just implemented it like this:
var maxReqsPerChild = 10; // Small number for debug
var numReqs = 0;
if (cluster.isMaster) {
var numCPUs = require('os').cpus().length;
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('death', function(worker) {
// Fork another when one died
cluster.fork();
});
} else {
http.createServer(function(webReq, webRes) {
// Count up
numReqs++;
// Doing something here
// Kill myself
if (numReqs > maxReqsPerChild) {
process.kill(process.pid); // Or more simply, process.exit() is better?
}
}).listen(1338);
}
This has been working well up until now, but I'm wondering there is more proper way.
MaxRequestsPerChild is good to hide memory leak troubles, but shouldn't be used too often, because it just hides real trouble. First try to avoid the memory leaks.
It shouldn't be used to avoid other issues like too many connections, nor other unexpected errors.
When you do use MaxRequetsPerChild, you shouldn't process.kill neither process.exit,
because that immediately closes all undergoing connections.
Instead, you should server.close, which will wait for all undergoing connections finish, and then fires 'close' event.
var server = http.createServer(...);
server.on( "close", function() {
process.exit(0);
});
server.on( "request", function () {
requestCount += 1;
if ( options.max_requests_per_child && (requestCount >= options.max_requests_per_child) ) {
process.send({ cmd: "set", key: "overMaxRequests", value: 1 });
if ( ! server.isClosed ) {
server.close();
server.isClosed = 1;
}
}
});
see a complete working example here:
https://github.com/mash/node_angel