Tearing my hair out with this one... has anyone managed to scale Socket.IO to multiple "worker" processes spawned by Node.js's cluster module?
Lets say I have the following on four worker processes (pseudo):
// on the server
var express = require('express');
var server = express();
var socket = require('socket.io');
var io = socket.listen(server);
// socket.io
io.set('store', new socket.RedisStore);
// set-up connections...
io.sockets.on('connection', function(socket) {
socket.on('join', function(rooms) {
rooms.forEach(function(room) {
socket.join(room);
});
});
socket.on('leave', function(rooms) {
rooms.forEach(function(room) {
socket.leave(room);
});
});
});
// Emit a message every second
function send() {
io.sockets.in('room').emit('data', 'howdy');
}
setInterval(send, 1000);
And on the browser...
// on the client
socket = io.connect();
socket.emit('join', ['room']);
socket.on('data', function(data){
console.log(data);
});
The problem: Every second, I'm receiving four messages, due to four separate worker processes sending the messages.
How do I ensure the message is only sent once?
Edit: In Socket.IO 1.0+, rather than setting a store with multiple Redis clients, a simpler Redis adapter module can now be used.
var io = require('socket.io')(3000);
var redis = require('socket.io-redis');
io.adapter(redis({ host: 'localhost', port: 6379 }));
The example shown below would look more like this:
var cluster = require('cluster');
var os = require('os');
if (cluster.isMaster) {
// we create a HTTP server, but we do not use listen
// that way, we have a socket.io server that doesn't accept connections
var server = require('http').createServer();
var io = require('socket.io').listen(server);
var redis = require('socket.io-redis');
io.adapter(redis({ host: 'localhost', port: 6379 }));
setInterval(function() {
// all workers will receive this in Redis, and emit
io.emit('data', 'payload');
}, 1000);
for (var i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
}
if (cluster.isWorker) {
var express = require('express');
var app = express();
var http = require('http');
var server = http.createServer(app);
var io = require('socket.io').listen(server);
var redis = require('socket.io-redis');
io.adapter(redis({ host: 'localhost', port: 6379 }));
io.on('connection', function(socket) {
socket.emit('data', 'connected to worker: ' + cluster.worker.id);
});
app.listen(80);
}
If you have a master node that needs to publish to other Socket.IO processes, but doesn't accept socket connections itself, use socket.io-emitter instead of socket.io-redis.
If you are having trouble scaling, run your Node applications with DEBUG=*. Socket.IO now implements debug which will also print out Redis adapter debug messages. Example output:
socket.io:server initializing namespace / +0ms
socket.io:server creating engine.io instance with opts {"path":"/socket.io"} +2ms
socket.io:server attaching client serving req handler +2ms
socket.io-parser encoding packet {"type":2,"data":["event","payload"],"nsp":"/"} +0ms
socket.io-parser encoded {"type":2,"data":["event","payload"],"nsp":"/"} as 2["event","payload"] +1ms
socket.io-redis ignore same uid +0ms
If both your master and child processes both display the same parser messages, then your application is properly scaling.
There shouldn't be a problem with your setup if you are emitting from a single worker. What you're doing is emitting from all four workers, and due to Redis publish/subscribe, the messages aren't duplicated, but written four times, as you asked the application to do. Here's a simple diagram of what Redis does:
Client <-- Worker 1 emit --> Redis
Client <-- Worker 2 <----------|
Client <-- Worker 3 <----------|
Client <-- Worker 4 <----------|
As you can see, when you emit from a worker, it will publish the emit to Redis, and it will be mirrored from other workers, which have subscribed to the Redis database. This also means you can use multiple socket servers connected the the same instance, and an emit on one server will be fired on all connected servers.
With cluster, when a client connects, it will connect to one of your four workers, not all four. That also means anything you emit from that worker will only be shown once to the client. So yes, the application is scaling, but the way you're doing it, you're emitting from all four workers, and the Redis database is making it as if you were calling it four times on a single worker. If a client actually connected to all four of your socket instances, they'd be receiving sixteen messages a second, not four.
The type of socket handling depends on the type of application you're going to have. If you're going to handle clients individually, then you should have no problem, because the connection event will only fire for one worker per one client. If you need a global "heartbeat", then you could have a socket handler in your master process. Since workers die when the master process dies, you should offset the connection load off of the master process, and let the children handle connections. Here's an example:
var cluster = require('cluster');
var os = require('os');
if (cluster.isMaster) {
// we create a HTTP server, but we do not use listen
// that way, we have a socket.io server that doesn't accept connections
var server = require('http').createServer();
var io = require('socket.io').listen(server);
var RedisStore = require('socket.io/lib/stores/redis');
var redis = require('socket.io/node_modules/redis');
io.set('store', new RedisStore({
redisPub: redis.createClient(),
redisSub: redis.createClient(),
redisClient: redis.createClient()
}));
setInterval(function() {
// all workers will receive this in Redis, and emit
io.sockets.emit('data', 'payload');
}, 1000);
for (var i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
}
if (cluster.isWorker) {
var express = require('express');
var app = express();
var http = require('http');
var server = http.createServer(app);
var io = require('socket.io').listen(server);
var RedisStore = require('socket.io/lib/stores/redis');
var redis = require('socket.io/node_modules/redis');
io.set('store', new RedisStore({
redisPub: redis.createClient(),
redisSub: redis.createClient(),
redisClient: redis.createClient()
}));
io.sockets.on('connection', function(socket) {
socket.emit('data', 'connected to worker: ' + cluster.worker.id);
});
app.listen(80);
}
In the example, there are five Socket.IO instances, one being the master, and four being the children. The master server never calls listen() so there is no connection overhead on that process. However, if you call an emit on the master process, it will be published to Redis, and the four worker processes will perform the emit on their clients. This offsets connection load to workers, and if a worker were to die, your main application logic would be untouched in the master.
Note that with Redis, all emits, even in a namespace or room will be processed by other worker processes as if you triggered the emit from that process. In other words, if you have two Socket.IO instances with one Redis instance, calling emit() on a socket in the first worker will send the data to its clients, while worker two will do the same as if you called the emit from that worker.
Let the master handle your heartbeat (example below) or start multiple processes on different ports internally and load balance them with nginx (which supports also websockets from V1.3 upwards).
Cluster with Master
// on the server
var express = require('express');
var server = express();
var socket = require('socket.io');
var io = socket.listen(server);
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
// socket.io
io.set('store', new socket.RedisStore);
// set-up connections...
io.sockets.on('connection', function(socket) {
socket.on('join', function(rooms) {
rooms.forEach(function(room) {
socket.join(room);
});
});
socket.on('leave', function(rooms) {
rooms.forEach(function(room) {
socket.leave(room);
});
});
});
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
// Emit a message every second
function send() {
console.log('howdy');
io.sockets.in('room').emit('data', 'howdy');
}
setInterval(send, 1000);
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
}
This actually looks like Socket.IO succeeding at scaling. You would expect a message from one server to go to all sockets in that room, regardless of which server they happen to be connected to.
Your best bet is to have one master process that sends a message each second. You can do this by only running it if cluster.isMaster, for example.
Inter-process communication is not enough to make socket.io 1.4.5 working with cluster. Forcing websocket mode is also a must. See WebSocket handshake in Node.JS, Socket.IO and Clusters not working
Related
Nodejs is very weird for me dealing with asynchronous. I heavily used the structure from https://github.com/elad/node-cluster-socket.io. My biggest issue is the lack examples and documentation to handle clustering.
Code:
var config = require(__dirname+'/Config/config.json');
var sql = require(__dirname+'/Server/mysql.js');
var express = require('express'),
cluster = require('cluster'),
net = require('net'),
sio = require('socket.io'),
sio_redis = require('socket.io-redis'),
farmhash = require('farmhash');
var fs = require('fs');
var https = require('https');
var options = {
key: fs.readFileSync(__dirname+'/Config/key.pem'),
cert: fs.readFileSync(__dirname+'/Config/server.crt')
};
var app = express();
var num_processes = require('os').cpus().length;
if (cluster.isMaster) {
console.log('master online');
// This stores our workers. We need to keep them to be able to reference
// them based on source IP address. It's also useful for auto-restart,
// for example.
var workers = [];
// Helper function for spawning worker at index 'i'.
var spawn = function(i) {
workers[i] = cluster.fork();
// Optional: Restart worker on exit
workers[i].on('exit', function(code, signal) {
console.log('respawning worker', i);
spawn(i);
});
};
// Spawn workers.
for (var i = 0; i < num_processes; i++) {
console.log('Worker Spawned');
spawn(i);
}
// Helper function for getting a worker index based on IP address.
// This is a hot path so it should be really fast. The way it works
// is by converting the IP address to a number by removing non numeric
// characters, then compressing it to the number of slots we have.
//
// Compared against "real" hashing (from the sticky-session code) and
// "real" IP number conversion, this function is on par in terms of
// worker index distribution only much faster.
var worker_index = function(ip, len) {
return farmhash.fingerprint32(ip) % len; // Farmhash is the fastest and works with IPv6, too
};
app.get('/',function(req, res) {
res.sendFile(__dirname + '/client/index.html');
});
app.use('/client',express.static(__dirname + '/client'));
// Create the outside facing server listening on our port.
//var serv = https.createServer(options, app);
var server = https.createServer(options, app);
//var server = net.createServer({ pauseOnConnect: true }, function(connection) {
// We received a connection and need to pass it to the appropriate
// worker. Get the worker for this connection's source IP and pass
// it the connection.
//var worker = workers[worker_index(connection.remoteAddress, num_processes)];
//worker.send('sticky-session:connection', connection);
//console.log('Socket Connection!'+worker_index(connection.remoteAddress, num_processes));
server.listen(config.ServerPort);
var io = require('socket.io')(server,{});
io.sockets.on('connection', function(socket){
console.log('Socket Connection');
socket.on('signIn',function(data){
console.log('signin Attempt');
sql.LoginCheck(data,function(res){
if(res){
console.log('signin Success');
socket.emit('signInresponse',{success:true});
var worker =workers[1];
worker.send('sticky-session:connection', server);
console.log('Socket Connection passed');
}else{
console.log('signin Fail');
socket.emit('signInresponse',{success:false});
}
});
});
// socket.on('disconnect',function(){
// delete SOCKET_LIST[socket.id];
// console.log('socket disconnected');
// });
});
} else {
// Note we don't use a port here because the master listens on it for us.
var app = new express();
// Here you might use middleware, attach routes, etc.
// Don't expose our internal server to the outside.
var server = app.listen(0, 'localhost');
io = sio(server);
// Tell Socket.IO to use the redis adapter. By default, the redis
// server is assumed to be on localhost:6379. You don't have to
// specify them explicitly unless you want to change them.
io.adapter(sio_redis({ host: 'localhost', port: 6379 }));
// Here you might use Socket.IO middleware for authorization etc.
// Listen to messages sent from the master. Ignore everything else.
process.on('message', function(message, connection) {
if (message !== 'sticky-session:connection') {
return;
}
// Emulate a connection event on the server by emitting the
// event with the connection the master sent us.
console.log('Socket Connection received');
server.emit('connection', connection);
connection.resume();
});
}
Console
master online
Worker Spawned
Worker Spawned
Worker Spawned
Worker Spawned
Socket Connection
signin Attempt
Admin
true
signin Success
Socket Connection passed
Socket Connection received
_http_server.js:304
socket.setTimeout(self.timeout);
^
TypeError: socket.setTimeout is not a function
at Server.connectionListener (_http_server.js:304:12)
at emitOne (events.js:77:13)
at Server.emit (events.js:169:7)
at process.<anonymous> (/home/name/app/test.js:128:10)
at emitTwo (events.js:92:20)
at process.emit (events.js:172:7)
at handleMessage (internal/child_process.js:686:10)
at internal/child_process.js:497:7
at Server.<anonymous> (internal/child_process.js:57:9)
at Server.g (events.js:260:16)
respawning worker 1
No idea what to do with the error. But just doing random committing out it appears that the worker's listener is the culprit. I hope someone can help me with this.
I'm running into a strange issue where - in my production environment only (everything works fine in local testing) - socket.emit() works fine but io.emit() does not. In other words, each individual socket client connection can send and receive messages to the server, but when the server emits to all clients, none of them receive the message. This is strange, because it can see all the clients - if I check Object.keys(io.engine.clients) I see the ids of all connected clients. But io.emit() doesn't broadcast to any of them.
I'm using the latest version of node (7.7.4 and socket.io (1.7.3). I'm hosting on Heroku, and have enabled sticky sessions. I use cluster so that all CPUs are utilized, and I have a redis service set up to synchronize all the workers. All of that infrastructure appears to be working just fine.
Why would socket.emit() work for any given socket, but none of the other methods?
socket.on('sendChat', function(messageBundle) {
console.log('sockets: ' + Object.keys(io.engine.clients)) //shows clients
io.sockets.emit('incomingChat', messageBundle); //nothing
io.emit('incomingChat', messageBundle); //also nothing
var clients = Object.keys(io.engine.clients);
for (var i = 0; i < clients.length; i++) {
console.log('broadcasting to: ' + clients[i]);
socket.broadcast.to(clients[i]).emit('incomingChat', messageBundle); //still nothing
}
socket.emit('incomingChat', messageBundle); //this works though
});
UPDATE:
Here is where I define the socket stuff earlier.
var redis = require('redis');
var pub = redis.createClient(redisPort, redisHost, {
auth_pass: redisPwd
});
var sub = redis.createClient(redisPort, redisHost, {
detect_buffers: true,
auth_pass: redisPwd
});
var adapter = require('socket.io-redis');
io.adapter(adapter({
pubClient: pub,
subClient: sub
}));
var cluster = require('cluster');
var WORKERS = process.env.WEB_CONCURRENCY || 3;
if (cluster.isMaster) {
for (var i = 0; i < WORKERS; ++i)
console.log('forking process ' + i);
cluster.fork();
} else {
var express = require('express');
var session = require('express-session');
var app = express();
var server = require('http').createServer(app);
var io = require('socket.io').listen(server);
var port = process.env.PORT || 5000;
server.listen(port, function() {
console.log("Listening on " + port);
console.log("testing");
});
var mySession = session({...}) //this enables me to authenticate the socket using the same session middleware as the express routes
io.use(function(socket, next) {
mySession(socket.handshake, {}, next);
});
io.on('connection', function(socket) {
socket.emit('welcome');
console.log('socket connection');
socket.on(... etc.)
...})
...})
On the client side, the socket connection is initiated using a simple config:
var socket = io({
reconnection: true,
reconnectionDelay: 100,
});
io is undeclared on the beginning, and then You try to redeclare it in the else statement.
For me the issue was mismatching version of socket.io and reddis-adapter (upgraded socket io to 3.0.1 and redis adapter remained at 5.x.x)
Upgrading adapted to 6.0.1 solved it
I am trying to create a simple chat App with Node.JS and Websocket using Socket.IO. I inspired myself by this German tutorial and for the single backend instance it works well.
I would like to have 2 instances of the server in the backend, which should sync the chat messages between each other and store the chat history in redis. The client should display the last 10 messages from the current channel, when new chat-room is joined.
I tried to apply the solution from this stackoverflow page but I get some issues. First I get
WebSocket connection to 'ws://localhost:8080/socket.io/?EIO=3&transport=websocket&sid=LEqGvY0CVk9YaSzBAAAA' failed: Connection closed before receiving a handshake response
error in the console. And second, both clients are receiving the 'payload' messages in the 1-sec interval. I probably do not understand how the redis-sync mechanism works and I also do not yet know how to display the chat history.
Here is my code so far:
var conf = require('./config.json');
var cluster = require('cluster');
var os = require('os');
if (cluster.isMaster) {
// we create a HTTP server, but we do not use listen
// that way, we have a socket.io server that doesn't accept connections
var server = require('http').createServer();
var io = require('socket.io').listen(server);
var redis = require('socket.io-redis');
io.adapter(redis({ host: conf.redisUri, port: conf.redisPort }));
setInterval(function() {
// all workers will receive this in Redis, and emit
io.emit('chat', {time: new Date(), text:'payload'});
}, conf.syncIntervall);
// set number of workers
for (var i = 0; i < conf.numberOfWorkers; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
}
if (cluster.isWorker) {
var express = require('express');
var app = express();
var http = require('http');
var server = http.createServer(app);
var io = require('socket.io').listen(server);
var redis = require('socket.io-redis');
// Webserver
//app.listen(conf.defaultPort);
server.listen(conf.defaultPort);
// deliver static files
app.use(express.static(__dirname + '/public'));
// route for the / path
app.get('/', function (req, res) {
// send the index.html in the reponse
res.sendfile(__dirname + '/public/index.html');
});
// Websocket
io.adapter(redis({ host: conf.redisUri, port: conf.redisPort }));
// Callback when a client connects
io.sockets.on('connection', function (socket) {
// store the room name in the socket session for this client
socket.room = 'defaultChannel';
// send client to room 1
socket.join('defaultChannel');
// echo to client they've connected
socket.emit('chat', { time: new Date(), text: 'You have connected to room defaultChannel on the server!' });
socket.emit('chat', { time: new Date(), text: 'Connected to chat worker-node: ' + cluster.worker.id});
// if a message is received
socket.on('chat', function (data) {
// the it will be send to all other clients
console.log('the current channel', socket.room);
socket.broadcast.in(socket.room).emit('chat', { time: new Date(), name: data.name || 'anonymous', text: data.text });
});
// if a client joins a channel
socket.on('join', function (room) {
// store the room name in the socket session for this client
socket.room = room;
// send client to room 1
socket.join(room);
console.log('Client joined room ' + room);
});
});
// Log port number in the console
console.log('Server running under http://127.0.0.1:' + conf.defaultPort + '/');
}
I would like to broadcast a single message to every client every second (think about it as custom heartbeat mechanism).
So the NodeJS app is started, sockets are created and when I connect from the client app the heartbeat messages are broadcasted. I'm still developing the client application and that means hitting F5 all the time and reloading the application. The new client SocketIO connection is created on load and this results in heartbeat messages coming to client app with rate much higher than 1 message/sec.
There is nothing special about the code - server side:
var server = http.createServer(app);
var io = require('socket.io').listen(server);
server.listen(8080);
io.sockets.on('connection', function(socket) {
...
setInterval(function() {
console.info('broadcasting heartbeat');
socket.broadcast.emit('heartbeat', /* custom heartbeat*/);
}, 1000);
...
});
Client side:
var socket = io.connect('localhost', { 'reconnect': false, port: 8080 });
socket.on('heartbeat', function(data) { console.log('heartbeat'); });
Can anybody give me some advice what's wrong? Thanks
No need to startup up an interval each time. You can store the intervalID, and even clear it out with clearInterval(INTERVAL); when it's not needed.
var server = http.createServer(app);
var io = require('socket.io').listen(server);
server.listen(8080);
var INTERVAL;
io.sockets.on('connection', function(socket) {
...
if (!INTERVAL) {
INTERVAL = setInterval(function() {
console.info('broadcasting heartbeat');
socket.broadcast.emit('heartbeat', /* custom heartbeat*/);
}, 1000);
}
...
});
The idea is to create server in master process, and handle requests in workers. I want to utilize all CPU cores and to have kinda load balance as well.
At first I tried to send server handler from master to worker:
var cluster = require('cluster');
if (cluster.isMaster) {
var app = require('express').createServer();
app.listen(1234);
var worker = cluster.fork();
worker.stdin.write('fd', 'utf8', app._handle);
} else {
process.stdin.resume();
process.stdin.on('fd', function(fd){
var stream = require('net').Stream(fd);
var io = require('socket.io').listen(stream);
io.sockets.on('connection', function(socket){
...
}
}
}
but write did not fired on('fd'...) event handler in worker. Then I put everything to master in order to check if this is possible at all:
var app = require('express').createServer();
app.listen(1234);
var stream = require('net').Stream(app._handler);
var io = require('socket.io').listen(stream);
the server starts without any errors but does not work. I cant event request the socket.io.js script from the client side with tag:
<script src="http://localhost:8080/socket.io/socket.io.js"></script>
Response: Cannot GET /socket.io/socket.io.js
So I have two troubles:
How to send opened socket descriptor to the worker?
How to set up handler for the server by this descriptor?
You don't need to take care of file descriptors etc on your own, look at a similar question I've just answered here:
Node.js, multi-threading and Socket.io
Code sample:
var cluster = require('cluster');
var http = require('http');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
} else {
var sio = require('socket.io')
, RedisStore = sio.RedisStore
, io = sio.listen(8080, options);
// Somehow pass this information to the workers
io.set('store', new RedisStore);
// Do the work here
io.sockets.on('connection', function (socket) {
socket.on('chat', function (data) {
socket.broadcast.emit('chat', data);
})
});
}