Pass request to specific forked node instance - node.js

Correct me if I am wrong, but it isn't possible to start multiple http-servers on the same port.
Based on this it is interesting the NodeJS cluster may fork. Of cause I know there is the master what is passing the request to one of the forked workers. What worker is managed by operating system or cluster.schedulingPolicy= "rr" for "round robin".
The point is: Every worker needs its own memory, so you need x-times much memory where x is the number of workers.
But if I like to run different (sub)domains out of my node app, I also like to hold different parts of an in_memory database (e.g. a simple JSON file) bound to a (sub)domain. OR based on resources like subdomain.example.tdl/resource1/whatever.
It doesn't seams to be possible. Either resource based nor domain based.
In my opinion it should be possible, because I can route based on request-objects (res.url) and resources (params) by different existing middleware.
So that way it should be possible to tell the master to pass the request to a specific forked instance.

It's possible: you need create net server at master, and pass connection by you rules to workers http server:
var cluster = require('cluster');
if (cluster.isMaster) {
var workers = [];
// Create workers
for (var i=0; i<require('os').cpus().length; i++) {
workers[i] = cluster.fork({WORKER_INDEX:i, JSON_INDEX:i});
}
// Create net server at master
var server = require('net').createServer({pauseOnConnect:true}, function(c) {
var b = Math.floor( Math.random()*workers.length );
workers[b].send("doit",c);
}).listen(3000);
} else {
// Load specific data for worker (pass parametr JSON_INDEX)
var json = "{default:default}";
try {
json = require("fs").readFileSync('./data_'+process.env.JSON_INDEX+'.json');
} catch (e) {}
// Create http server and pass specific json to client
var server = require('http').createServer( function(req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(json);
}).listen(0,'127.0.0.1');
// Get message from master and check if need pass to http server
process.on('message', function(m,c) {
if ( "doit" === m ) {
server.emit('connection', c);
c.resume();
}
});
}

Related

Nodejs Clustering with Sticky-Session

const cluster = require('cluster');
const http = require('http');
const numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`);
// Fork workers.
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`worker ${worker.process.pid} died`);
});
} else {
// Workers can share any TCP connection
// In this case it is an HTTP server
var sticky = require('sticky-session');
var express = require('express');
var app = express();
app.get('/', function (req, res) {
console.log('worker: ' + cluster.worker.id);
res.send('Hello World!');
});
var server = http.createServer(app);
sticky.listen(server,3000);
console.log(`Worker ${process.pid} started`);
}
I looked up the documentation for nodejs clustering and sticky-session
and another stack overflow answer regarding this
var cluster = require('cluster');
var http = require('http');
var sticky = require('sticky-session');
var express = require('express');
var app = express();
app.get('/', function (req, res) {
console.log('worker: ' + cluster.worker.id);
res.send('Hello World!');
});
var server = http.createServer(app);
sticky.listen(server,3000);
If the above snippet is run without forking it works fine but else never works as shown in the clustered example above in which the threads are started but server is never initialised .
I read there is alternative of sticky-cluster can somebody give a proper authoritative answer on this topic which will be useful for people looking for the same and the another main issue comes with this is the app.locals object which is used to store variables for an app instance and the occurrence multiple server instances causes this to break as values will be different across different instances so this approach causes a big issue and app breaks so .When answering please don't copy paste some code please give a detailed answer detailing the approach its benefit and short comings.
I am not looking for a answer that is limited to using sticky-sessions nodejs module, I welcome all other approaches in which all cores of the processor are used and but ensuring session continuity .
If it involves RedisStore or MongoDb store its ok,What I want to know is about a standard approach in case of nodejs application with clustering with session continuity
https://github.com/indutny/sticky-session
https://nodejs.org/api/cluster.html
https://stackoverflow.com/a/37769107/3127499
There is a small problem in your code.
"sticky-session" module already uses node.js "cluster" module within.You dont need to "fork()" because sticky-session will already do it for you. Lets find out how:
var cluster = require('cluster'); // Only required if you want the worker id
var sticky = require('sticky-session');
var server = require('http').createServer(function(req, res) {
res.end('worker: ' + cluster.worker.id);
});
sticky.listen(server, 3000);
calling sticky.listen() will already spawn workers for you.See the listen() implementation below
function listen(server, port, options) {
if (!options)
options = {};
if (cluster.isMaster) {
var workerCount = options.workers || os.cpus().length;
var master = new Master(workerCount, options.env);
master.listen(port);
master.once('listening', function() {
server.emit('listening');
});
return false;
}
return true;
}
This line var master = new Master(workerCount, options.env) is responsible for spawning workers.
see the Master() implementation below:
function Master(workerCount, env) {
net.Server.call(this, {
pauseOnConnect: true
}, this.balance);
this.env = env || {};
this.seed = (Math.random() * 0xffffffff) | 0;
this.workers = [];
debug('master seed=%d', this.seed);
this.once('listening', function() {
debug('master listening on %j', this.address());
for (var i = 0; i < workerCount; i++)
// spawning workers
this.spawnWorker();
});
}
So indeed when you call sticky.listen(server,port) you are actually calling cluster.fork().hence you should not explicitly again call fork().
Now your code should look like:
var cluster = require('cluster'); // Only required if you want the worker id
var sticky = require('sticky-session');
var server = require('http').createServer(function(req, res) {
res.end('worker: ' + cluster.worker.id);
});
//sticky.listen() will return false if Master
if (!sticky.listen(server, 3000)) {
// Master code
server.once('listening', function() {
console.log('server started on 3000 port');
});
} else {
// Worker code
}
One important thing to remember is that spawned workers will have its own EVENTLOOP and memory hence resources are not shared among each other.
You can use "REDIS" or other npm modules such as "memored" to share resources among different workers.
Hope this solves your both issues.
I think you are confusing sticky session with shared memory store.
Let me try to help:
Sticky-sessions module is balancing requests using their IP address. Thus client will always connect to same worker server, and socket.io will work as expected, but on multiple
processes!
Implementing sticky sessions means that you now have multiple nodes accepting connections. However, it DOES NOT guarantee that these nodes will SHARE the same memory, as each worker has their own eventloop and internal memory state.
In other words, data being processed by one node may not be available to other worker nodes, which explains the issue you pointed out.
...another main issue comes with this is the app.locals object which
is used to store variables for an app instance and the occurrence
multiple server instances causes this to break as values will be
different across different instances so this approach causes a big
issue and app breaks...
Thus, to resolve this, we would require using something like Redis so that data can be shared across multiple nodes.
Hope this helps!
If I understand your questions correctly, you are dealing with in-memory data storage or session storage. This is one of the known problems in the session based authentication in multi-node or in a cluster. Suppose you made a call to Node A and get the session called sessionA but for the next call you made it to Node B. Node B does not know anything about sessionA. People try to solve this issue by using sticky session but that is not enough. Good practice will be to use an alternative approach, such as JWT or oAuth2. I prefer JWT for service to service communication. JWT does not store anything and stateless. It works brilliantly with REST since REST is also stateless. Here https://www.rfc-editor.org/rfc/rfc7519 is the specification of the JWT implementation. If you need to have some sort of refresh token, that case you need to consider a storage. Storage can be anything like REDIS, MongoDB or any other SQL based DB. For further clarification about JWT in nodejs:
https://jwt.io/
https://jwt.io/introduction/
https://www.npmjs.com/package/jsonwebtoken
https://cloud.google.com/iot/docs/how-tos/credentials/jwts#iot-core-jwt-refresh-nodejs

socket.io+redis+expressjs cluster - get socket object in expressjs request

Question based on this answer: https://stackoverflow.com/a/18650183/4478897
I tried to find this solution but nothing seems to work in the way that I need.
Clustering expressjs and socket.io we can share sessions using redis and send io messages inside io world (io.sockets.on('connection',...). The problem is if we want to send the message (or use a simple socket.join/leave) inside the expressjs world (route.get/post).
If we are not using clusters we can atach the client socket object to the express request object (or simply export the io object) and then use it at any time on any GET/POST route.
At the other hand, if we are clustering and use the mentioned method to get the socket object inside the expressjs world, sometimes the socket object is undefined because the socket object for this client is initialized at other worker.
Some example flow:
Client connects to http://localhost and worker 1 handles this request.
After the page is loaded, the client connects to socket.io. Worker 2 handles this connection.
Client do a POST and again worker 1 or worker X handles this request.
In this case when the client do the POST, only the worker 2 knows the socket object for this client. So this will get an undefined socket object.
So, the question:
How can we get the client socket object from any worker to reuse it on expressjs request object.
Maybe my code is wrong but is almost like the link to the answer mentioned above.
NOTEs
Don't want to use some kind of proxy.
Don't want to migrate to other libraries (expressio, sockjs...)
Sorry for my English :)
Using last nodejs, socket.io, expressjs, socket.io-redis, redis... versions
Don't hesitate to ask something!
UPDATE 1
Possible solution but still need to test it. Dont know if this is a really good: solution.
UPDATE 3: Working code on my own answer
UPDATE 2
Like update 1 but using https://nodejs.org/dist/latest-v5.x/docs/api/cluster.html#cluster_event_message
remoteJoin and remoteLeave methods were added in socket.io-redis 3.0.0:
io.adapter.remoteJoin('<my-id>', 'room1', function (err) {
if (err) { /* unknown id */ }
// success
});
io.adapter.remoteLeave('<my-id>', 'room1', function (err) {
if (err) { /* unknown id */ }
// success
});
Note: The implementation looks a lot (hopefully?) like the answer above.
Well finally tried the code and it works (with some misspells modifications and other things) but i'm sure that needs to be a better code somewhere. So i'm open to more answers!
This code is part of my socket.io module when authorize the client socket and some other stuff...
var redis = require("redis");
var redisPub = redis.createClient();
var redisSub = redis.createClient();
var PubSubChannel = "clusterChannel";
// Function that checks if this worker knows the socket object of this socketId.
// If not, publish the message to all the other sockets (workers)
io.socketDo = function (type, socketId, roomName) {
if (typeof io.sockets.connected[socketId] != "undefined") {
if (type === "join") {
return io.sockets.connected[socketId].join(roomName);
}
if (type === "leave") {
return io.sockets.connected[socketId].leave(roomName);
}
} else {
redisPub.publish(
PubSubChannel,
JSON.stringify({
type: type,
socketId: '' + socketId,
roomName: roomName
})
);
}
};
// Subscribe to some channel
redisSub.subscribe(PubSubChannel);
// When this worker receive a message from channel "PubSubChannel" checks
// if it have the socket object for this socketId and do the operation
redisSub.on("message", function (channel, data) {
data = JSON.parse(data);
var type = data.type;
var socketId = data.socketId;
var roomName = data.roomName;
if ((type === "join" || type === "leave") && channel == PubSubChannel){
if (typeof io.sockets.connected[socketId] != "undefined") {
if (type === "join") {
return io.sockets.connected[socketId].join(roomName);
}
if (type === "leave") {
return io.sockets.connected[socketId].leave(roomName);
}
}
}
});
Then just simply export the module and attach it to your expressjs request => req.io = io
// req.session.socketId value is fetched on "io.sockets.on('connection', function(socket) {"
// by express to socket.io using redis shared sessions
app.get('/', function (req, res) {
req.io.socketDo('join', req.session.socketId, 'someRoomToJoin');
// IT WORKS!
req.io.sockets.in('someRoomToJoin').emit('text');
req.io.socketDo('leave', req.session.socketId, 'someRoomToLeave');
res.send('Hello World!');
});

Node JS TCP Proxy: Reuse socket in callback function

I'm trying to implement a TCP proxy in Node JS. I only have some experience with Javascript so I met a lot of problems along the way. I've done a lot of searching for this one but had no luck.
The problem occurs when browser sends a CONNECT request for HTTPS. My proxy will parse the host name and port, and then create a new socket that connects to the server. If all these steps went well, I will start forwarding message.
Part of my code looks like this:
var net = require('net');
var server = net.createServer(function(clientSock) {
clientSock.on('data', function(clientData) {
var host = // get from data
var port = // get from data
if (data is a CONNECT request) {
// Create a new socket to server
var serverSock = new net.Socket();
serverSock.connect(port, host, function() {
serverSock.write(clientData);
clientSock.write('HTTP/1.1 200 OK\r\n');
}
serverSock.on('data', function(serverData) {
clientSock.write(serverData);
}
}
}
}
Since the CONNECT request needs both client socket and server socket open until one side closes the connection, the code above doesn't have this behavior. Every time I receive some data from client, I will create a new socket to server and the old one is closed.
Is there a way to store the server socket as a global variable so that the data event handler can reuse it? Or is there any other way to solve this?
Thanks a lot!!!!
You can just move the variable up to a higher scope so it survives across multiple events and then you can test to see if its value is already there:
var net = require('net');
var server = net.createServer(function(clientSock) {
var serverSock;
clientSock.on('data', function(clientData) {
var host = // get from data
var port = // get from data
if (data is a CONNECT request) {
// Create a new socket to server
if (!serverSock) {
serverSock = new net.Socket();
serverSock.connect(port, host, function() {
serverSock.write(clientData);
clientSock.write('HTTP/1.1 200 OK\r\n');
}
serverSock.on('data', function(serverData) {
clientSock.write(serverData);
}
} else {
serverSock.write(clientData);
}
}
}
}

Socket.io 'Handshake' failing with cluster and sticky-session

I am having problems getting the sticky-sessions socket.io module to work properly with even a simple example. Following the very minimal example given in the readme (https://github.com/indutny/sticky-session), I am just trying to get this example to work:
var cluster = require('cluster');
var sticky = require('sticky-session');
var http = require('http');
if (cluster.isMaster) {
for (var i = 0; i < 4; i++) {
cluster.fork();
}
Object.keys(cluster.workers).forEach(function(id) {
console.log("Worker running with ID : " +
cluster.workers[id].process.pid);
});
}
if (cluster.isWorker) {
var anotherServer = http.createServer(function(req, res) {
res.end('hello world!');
});
anotherServer.listen(3000);
console.log('http server on 3000');
}
sticky(function() {
var io = require('socket.io')();
var server = http.createServer(function(req, res) {
res.end('socket.io');
});
io.listen(server);
io.on('connection', function onConnect(socket) {
console.log('someone connected.');
socket.on('sync', sync);
socket.on('send', send);
function sync(id) {
socket.join(id);
console.log('someone joined ' + id);
}
function send(id, msg) {
io.sockets.in(id).emit(msg);
console.log('someone sent ' + msg + ' to ' + id);
}
});
return server;
}).listen(3001, function() {
console.log('socket.io server on 3001')
});
and a simple client:
var socket = require('socket.io-client')('http://localhost:3001');
socket.on('connect', function() {
console.log('connected')
socket.emit('sync', 'secret')
});
The workers start up fine. The http servers work fine. But when the client connects, the console logs 'someone connected' and nothing more. The client never fires the on connect event, so I think the upgrade/handshake is failing or something. If anyone can spot what I am doing wrong that would help alot.
Thanks!
#jordyyy : I was facing same issue after googling I have fond answer.
Socket.Io handshaking task complete in more than one request and when you will run on sticky session it means you are using multiple process according to your core.
So handshaking request will distribute on different different process and they can't talk.(not IPC) (They are child process) and most of time connection will be failed/lost.(connection-disconnect event occurs frequently )
So what is solution ? Solution is socketio-sticky-session
Socketio-sticky-session, manage connection on IP based. So when you will request by any client then it will maintain ip address with respect process/worker. So further request will be forward to same process/worker and your connection properly stabilized.
And When you will use redies adapter then you can actually maintain socket
connection data b/w all processes/workers.
For more information
https://github.com/elad/node-cluster-socket.io
(you need some patch on worker_index method, if your server is supporting IPv6)
Just knowledge bytes. :) :)
One more thing, you don't need to fork process. It will be done by sticky session.
This was super old and wasn't really answered when i needed it, but my solution was to drop this bad module and any other super confusing module and just use pub/sub with redis adapter. The only other step was to force transports to websockets, and if that bothers anyone then use something else. For my purposes my solution was simple, readable, didn't mess with the 'typical' socket.io api, and best of all it worked extremely well.

Save Data on Socket in Socket.IO

I want to save some data on the socket, server side, so whenever the client emits any data to the server, I want that data to be available!
One use case can be storing a token on the socket. When the client is connecting for the first time, it will emit the token, if it has one, or it will show the login page and then the login data will be sent to the server. Whichever one it is, I want to store the token on the server, so that every request after that doesn't need to specify the token.
Later, I'll use RedisStore, so all the data will be accessible all the servers running the app.
My only question is, where do I store the data on the socket so it's associated with that client?
on http://socket.io/#how-to-use
scroll to: Storing data associated to a client
use socket.set and socket.get to set and get data asynchronously
I'm suffering from the same question and guessing what's going on with an example code from socket.io on version 4.x
In the example, They use middleware(use function to register a middleware)
namespace.use((socket, next) => {
// get data from client
const sessionID = socket.handshake.auth.sessionID;
const {userId, username} = yourFunction();
// set socket specific data
socket.sessionID = sessionID;
socket.userID = session.userID;
socket.username = session.username;
next();
});
Middlewares are executed when a socket is connected with a server.
and you can use the data afterward
note - Socket.IO reference tells use socket.data for this purpose
namespace.on('connection', socket => {
socket.emit("join", `${socket.username} has been joined`);
})
If you use multiple servers, then you have to keep in mind that the data is only valid for the server
On multiple server environment, You need a single source of data which will be used by socket servers.
namespace.use(async (socket: Socket & { sessionID?: string, userID?: string, username?: string }, next) => {
const sessionID = socket.handshake.auth.sessionID; // [socket.handshake][4]
// or other [socket related attributes][4]
if (sessionID) {
// you have to implement a function to save and retrive session info
const session = await someFunctionToRetrieveSession(sessionID);
if (session) {
socket.sessionID = sessionID;
socket.userID = session.userID;
socket.username = session.username;
return next();
}
}
const username = socket.handshake.auth.username;
if (!username) {
return next(new Error("invalid username"));
}
socket.sessionID = randomId();
socket.userID = randomId();
socket.username = username;
next();
});
and one more thing as I understood the namespace.use function is called only for the namespace if your client use other namespace then default then default('/') use function will not be called.
//client side
io("/chat");
...
//server side
io.use() // == io.of('/').use() will not be called
io.of('/chat').use() // only will be called
Thanksfully the author of the example implemented a sessionStorage using redis
refer to this example code
with this info, I guess socket.io server saves sockets' info in memory and set a property of a socket will be saved and when the socket comes later the server retrives the socket and it's related data. but because it happens on memory so you can't share the info among other servers that's why you have to find a way to share the data with other servers(eg. redis)
You can save the data on the global variables when you dont want to use any database
var globalVariable = {};
io.sockets.on("connection", function (socket) {
socket.on("save-client-data", function (clientData) {
var clientId = clientData.clientId;
globalVariable[clientId] = JSON.parse(clientHandshakeData);
});
socket.on("get-client-data", function (clientId) {
var clientData = globalVariable[clientId];
socket.emit("get-client-data", JSON.stringify(clientData));
});
});
This worked for my scenario, however I'm not aware of the performance implications.

Resources