Nodejs for analytics data dumping in flat file - node.js

We are planning to use nodejs instead of tomcat or jetty. But we ran some test around it and node is performing slower than jetty. Test case we ran is on 8 core machine with 32gb of ram. For test we used 1000000 requests with 1000 concurrency. We used cluster module, url module, http module and os module with stream. Does these module add any overhead…? Is there any optimization that we could do to better the performance. Following is the code for node.
var http = require("http");
var url = require("url");
var cluster = require('cluster');
var fs = require('fs');
var numCPUs = require('os').cpus().length;
var stream = [];
if (cluster.isMaster) {
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('death', function(worker) {
console.log('worker ' + worker.pid + ' died');
});
}
else{
var server = http.createServer(function(request, response) {
response.end();
});
stream = fs.createWriteStream("nodejsCluster"+cluster.worker.id+".txt");
stream.once('open', function(fd) {
server.listen(8084);
});
server.on('request', function(request, response){
var parsedUrl = url.parse(request.url, true, true);
var queryAsObject = parsedUrl.query;
stream.write( ( new Date() )+'This is the content to write into file '+queryAsObject.test+"\n");
});
console.log("Server is listening"+cluster.worker.id);
}
Thanks

Related

Socket.io not working with node cluster

I have a cluster.js with the following code:
var numOfCpus = 16;
var cluster = require('cluster');
if (cluster.isMaster) {
for (var i = 0; i < numOfCpus; i++) {
cluster.fork();
}
console.log("master is running");
} else {
console.log('Worker %d started', cluster.worker.id);
var server = app.listen(8887);
var io = require('socket.io').listen(server);
var live_data = io.of('/live_data');
live_data.on('connection',function(socket){
console.log('Connected: %s', socket.id);
});
}
My client application works fine when the numOfCpus = 1 in cluster.js. When I have anything more than 1, the socket.io starts giving the following error:
Do I have to do anything special to make socket.io work with multiple node workers? Any help will be highly appreciated. Thanks.
The issue was solved using sticky session, as pointed by robertklep, in the comment. However, the package I used for the purpose is socketio-sticky-session.
My Final code of cluster.js looks like the following:
var sticky = require('socketio-sticky-session')
var cluster = require('cluster');
var os = require('os');
var options = {
proxy: false,
num: require('os').cpus().length
}
var server = sticky(options, function() {
var server = app.listen();
var io = require('socket.io').listen(server);
var live_data = io.of('/live_data');
live_data.on('connection',function(socket){
console.log('Connected: %s', socket.id);
});
return server
}).listen(8887, function() {
console.log((cluster.worker ? 'WORKER ' + cluster.worker.id : 'MASTER') + ' | HOST ' + os.hostname() + ' | PORT ' + 8887)
})
Details about the working and implementation of sticky sessions can be read # https://github.com/elad/node-cluster-socket.io

How to verify Cluster working in node js?

I am new to nodejs and currently playing with its features, one of the important feature I came across is Cluster, I tried to implement that for my sample application using expressjs, angular and nodejs.
Cluster code:
var cluster = require('cluster');
if (cluster.isMaster) {
var cpuCount = require('os').cpus().length;
for (var i = 0; i < cpuCount; i += 1) {
cluster.fork();
}
} else {
var express = require('express');
var app = express();
var exportRouter=require('./routers/exportRouter');
var process = require('process');
fakeDB = [];
app.use(express.static(__dirname + '/public'));
app.use(require('./routers/exportRouter.js'));
console.log('process Id :',process.pid);
app.listen(3000, function(){
console.log('running on 30000');
});
}
I have added following code in my routers to block the event loop,so when I make first request It will block one nodejs worker. so if another user makes call while first node is blocked second worker should pick that up.
router code :
var express = require('express');
var exportRouter = express.Router();
var process = require('process');
exportRouter.get('/getMe',function(req,res){
console.log('I am using process ',process.pid);
console.log('get is called');
fakeDB.push(req.query.newName+' '+ process.pid);
res.send(req.query.newName + ' ' + process.pid);
console.log('New name received ',fakeDB);
console.log('New name received ',fakeDB);
var d = new Date().getTime();
console.log('old ',d)
var x = d+10000;
console.log('should stop post ',x);
while(true){
var a = new Date().getTime();
//console.log('new ',a)
if(x<a){
break;
}
}
console.log('I am releasing event loop for ',process.pid);
});
module.exports = exportRouter;
it does not serve other request using another worker and waits for blocked node worker.. BTW I am using node js version 0.12.7(64bit) and 4 cpus.
THanks in advance..
it does not serve other request using another worker and waits for blocked node worker
Your testing methodology is probably wrong. Here's a simplified version of your sample.
var cluster = require('cluster')
if (cluster.isMaster) {
var cpuCount = require('os').cpus().length
for (var i = 0; i < cpuCount; i += 1) {
cluster.fork()
}
} else {
var express = require('express')
var app = express()
console.log('process Id:', process.pid)
app.get('/', function (req, res) {
console.log('pid', process.pid, 'handler start, blocking CPU')
var i = 0;
while (i < 10e9) {
i++
}
console.log('pid', process.pid, 'unblocked, responding')
res.send('thanks')
})
app.listen(3003, function () {
console.log('running on 3003')
})
}
If I run this in one terminal, then open two other terminals and as quickly as possible fire off a curl localhost:3003 in each terminal, I can see the second request arrives and begins processing before the first request gets a response:
pid 53434 handler start, blocking CPU
pid 53437 handler start, blocking CPU
pid 53434 unblocked, responding
pid 53437 unblocked, responding

Socket.io events working only in the same process

I'm trying to use socket.io & sticky-session to pass messages to my clients.
The problem is that client which connect to one of the processes won't get messages from other processes, only from the process he is connected to.
How can I make web sockets to work across all processes?
Server.js:
var cluster = require('cluster');
var app = require('./config/express')(db);
// Init the server to run according to server CPU's
if (cluster.isMaster) {
for (var i = 0; i < 4; i++) {
cluster.fork();
}
} else {
app.listen(config.port, function () {
console.log('Process ' + process.pid + ' is listening to all incoming requests');
});
}
Process.js:
var http = require('http');
var express = require('express');
var app = express();
var server = http.createServer(app);
var io = require('socket.io').listen(server);
var ns = io.of('/ns');
var sticky = require('sticky-session');
if (!sticky.listen(server, 8080)) {
// Master code
server.once('listening', function() {
console.log('server started on 8080 port');
});
}
client.js:
var io = require('socket.io-client');
var serverUrl = 'http://localhost:8080/ns';
var conn = io.connect(serverUrl);
conn.on('malware', function(infectedProcess){
console.log('infectedProcess: ' + infectedProcess);
});

Maximum performance for node HTTP server?

I'm running a test trying to draw maximum delivery speed from a Node HTTP server. It's a simple server.
In my test I have 50K virtual clients establishing a permanent connection with the server (I run ulimit -n 99999 before). Then, upon another event, an HTTP connection to a different port, the server sends one message to each virtual client. At the end all clients receive the corresponding message.
Sending all messages takes minutes in my tests. Are there any recommendations that would help me improve these measurements so that I can send 50K messages in seconds instead of minutes?
The server is running in a m1.medium AWS instance. The idea is to improve performance with the same platform.
Copying the server code:
var http = require("http");
var connectionResponse = [];
var connectionIndex = 0;
http.createServer(function(request, response) {
console.log("Received connection " + connectionIndex);
response.setTimeout(1200000, function() {
console.log("Socket timeout");
});
connectionResponse[connectionIndex] = response;
connectionIndex++;
}).listen(8888);
http.createServer(function(request, response) {
console.log("8887 connected - Will respond");
response.writeHead(200, {"Content-Type": "text/plain"});
response.write("Triggered all responses");
response.end();
console.log("Begin notifications:" + new Date().toISOString());
for(var i = 0; i < connectionIndex; i++) {
connectionResponse[i].writeHead(200, {"Content-Type": "text/plain", "Content-Length": 4, "transfer-encoding" : ""});
connectionResponse[i].write("CAFE");
connectionResponse[i].end();
}
console.log("End notifications" + new Date().toISOString());
}).listen(8887);
Setting this http://nodejs.org/api/http.html#http_agent_maxsockets to a sufficient number
var http = require('http');
http.globalAgent.maxSockets = xxx;
var https = require('https');
https.globalAgent.maxSockets = xxx;
Using nodejs clustering module, http://nodejs.org/api/cluster.html
Now, regarding the clustering, it really depends on what you want to do. The default example can go long way before you have to tweak it. An example would be
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
} else {
// Workers can share any TCP connection
// In this case its a HTTP server
http.createServer(function(req, res) {
res.writeHead(200);
res.end("hello world\n");
}).listen(8000);
}

Node.js Cluster + Express always invoke the same worker

I'm trying to use the cluster module to handle multiple http requests concurrently with Express.
With the code below I'm able to spawn multiple workers and have all of them listen on the same port. The large for loop is there to simulate heavy load on the web server.
What I'd like to see is that if a worker is busy processing one http request when a second request comes in, a different worker will get invoked and handle that second request. Instead, when I try to issue multiple requests using curl, all requests are processed sequentially by one single worker; no other workers are ever invoked even though they've been forked.
Could it be that I'm using Express incorrectly? Thanks in advance!
var cluster = require('cluster');
if (cluster.isMaster) {
var cpuCount = require('os').cpus().length;
for (var i = 0; i < cpuCount; i += 1) {
cluster.fork();
}
}
else {
var http = require('http'),
app = require('express')();
http.createServer(app).listen(31415, function () {
console.log(process.pid + " listening on 31415");
});
app.get('/', function (req, res) {
var t= 0;
for(var i=0; i < 100000000; i++){
t++;
}
res.send('done');
});
}
Try not to use built-in module ?
master.js
var cp = require('child_process');
var net = require('net');
// create tcp server listen to a port
var tcp = net.createServer();
tcp.listen(8000, function(){
// detect cpu number, and fork child process
for (var i=0;i< require('os').cpus().length; i++) {
var worker = cp.fork('child.js');
worker.send(i, tcp._handle);
}
tcp.close();
});
child.js
var express = require('express');
var app = express();
process.on('message', function(id, handle){
app.get('/',function(){
console.log(process.pid+' is listening ...');
});
app.listen(handle, function(){
console.log(process.pid + 'started');
});
});
this works fine with express 3.x

Resources