how do I get a ROUTER -> DEALER to echo? - node.js

I'm trying to build the back half of a Paranoid Pirate Pattern, a ROUTER that sends work out to a set of DEALER nodes (it's possible that I'm misundertanding the diagram). For now I just want the DEALERs to echo the work back or just send back a message that says "done". The problem is that the worker node (DEALER) is never receiving any messages.
var buildSocket, connectionTemplate, delay, frontPort, log, q, qPort, worker, workerPort, zmq;
zmq = require("zmq");
frontPort = 5000;
qPort = 5100;
workerPort = 5200;
connectionTemplate = "tcp://127.0.0.1:";
log = console.log;
debugger;
delay = process.argv[2] || 1000;
buildSocket = function(desc, socketType, port) {
var socket;
log("creating " + socketType + " socket");
socket = zmq.socket(socketType);
socket.identity = "" + desc + "-" + socketType + "-" + process.pid + "-" + port;
return socket;
};
q = buildSocket('q_output', 'router', qPort);
worker = buildSocket('worker', 'dealer', workerPort);
worker.bind("" + connectionTemplate + workerPort);
q.connect("" + connectionTemplate + workerPort);
q.on('message', function() {
var args;
args = Array.apply(null, arguments);
log('queue received ' + JSON.stringify(arguments));
return worker.send('work done');
});
worker.on('message', function() {
var args;
log('back received ' + JSON.stringify(arguments));
args = Array.apply(null, arguments);
return q.send(args);
});
setInterval((function() {
var value;
value = Math.floor(Math.random() * 100);
console.log(q.identity + ": sending " + value);
q.send(value);
}), delay);
The queue and worker on 'message' events never fire. The way I understand this is you set up the ROUTER node, bind it to a port (for return messages), set up the DEALER nodes and bind them to a port then connect the ROUTER to the DEALER port and start sending messages. In practice, messages are sent but never received:
creating router socket
creating dealer socket
q_output-router-60326-5100: sending 30
q_output-router-60326-5100: sending 25
q_output-router-60326-5100: sending 65
q_output-router-60326-5100: sending 68
q_output-router-60326-5100: sending 50
q_output-router-60326-5100: sending 88

You've got things a little backwards, here. Think of a DEALER socket as a modified REQ socket... it should be initiating your messages to your router. A ROUTER socket is more like a modified REP socket... it should be responding to the initial request sent by your dealer.
You don't strictly need to follow that pattern with ROUTER/DEALER pairings... but it definitely makes things much easier, so you should stick with it while you're learning.
The second thing that sticks out to me from your code is that you message handlers, you've got the wrong socket sending messages.
Take for instance this code (directly copied without modification):
q.on('message', function() {
var args;
args = Array.apply(null, arguments);
log('queue received ' + JSON.stringify(arguments));
return worker.send('work done');
});
... that says (in psuedocode):
when `q` receives a message from `worker`
print out the message we received
now have `worker` send *another* message that says "work done"
What you want is something more like the following (simplified):
var zmq = require("zmq");
var q = zmq.socket('router');
var worker = zmq.socket('dealer');
// I've switched it so the router is binding and the worker is connecting
// this is somewhat arbitrary, but generally I'd consider workers to be less
// reliable, more transient, and also more numerous. I'd think of the queue
// as the "server"
// I've used bindSync, which is synchronous, but that's typically OK in the
// startup phase of a script, and it simplifies things. If you're spinning
// up new sockets in the middle of your script, using the async bind()
// is more appropriate
q.bindSync('tcp://127.0.0.1:5200');
worker.connect('tcp://127.0.0.1:5200');
q.on('message', function() {
var args;
args = Array.apply(null, arguments);
log('queue received ' + JSON.stringify(arguments));
// if we've received a message at our queue, we know the worker is ready for
// more work, so we ready some new data, regardless of whether we
// received work back
var value = Math.floor(Math.random() * 100);
// note that the socket that received the message is responding back
if (args[1].toString() == 'ready') console.log('worker now online');
else console.log('work received: '+args[1].toString());
// we need to specify the "ID" of our worker as the first frame of
// the message
q.send([args[0], value]);
// we don't need to return anything, the return value of a
// callback is ignored
});
worker.on('message', function() {
var args;
log('back received ' + JSON.stringify(arguments));
args = Array.apply(null, arguments);
// we're just echoing back the "work" we received from the queue
// for additional "workiness", we wait somewhere between 10-1000
// milliseconds to respond
setTimeout(function() {
worker.send(args[0].toString());
}, parseInt(args[0].toString())*10);
});
setTimeout((function() {
var value;
console.log('WORKER STARTING UP');
// the worker starts the communication, indicating it's ready
// rather than the queue just blindly sending work
worker.send('ready'); // sending the first message, which we catch above
}), 500); // In my experience, half a second is more than enough, YMMV
... as you can see, the pattern is:
Worker indicates readiness
Queue sends available work
Worker completes work and sends back
Queue receives completed work and sends back more work
GOTO 3

Related

socket.io: clientsCount and on connection and on disconnect events

I count the number of online websockets via onConnection and onDisconnect events:
const socketIo = require('socket.io');
var on_connect = 0;
var on_disconnect = 0;
var port = 6001;
var io = socketIo(port, {
pingTimeout: 5000,
pingInterval: 10000
});
//I have only one NameSpace and root NS is not used
var ns1 = io.of('ns1');
ns1
.on('connection', function (socket) {
on_connect += 1;
socket.on('disconnect', function (reason) {
on_disconnect += 1;
});
});
...
var online = on_connect - on_disconnect;
...
But online value not equal io.engine.clientsCount value.
And over time the difference between online value and io.engine.clientsCount value is growing up.
Why this is happens?
What is needed to make to fix this?
on_connect and on_disconnect variables are updates in the callback events, whereas the online variable is not recalculated. So you will need to recalculate the online variable every time the other variable change.
It might be easier to use only one variable to count connections. Increment it on connection, and decrement it on disconnect. That's how I keep track of the number of connections. Then there isn't a need to calculate it every time its value is needed.
Also, the line that states var online = on_connect - on_disconnect; Is occurring before either is modified... That's what #gvmani is trying to tell you.
Here's an example of some of what I'm doing. The code below sets up to listen for connections & disconnections and maintains a count of current connections. I should note that I'm not using a namespace like the OP, but the counting portion is what's of importance. I'll also note that I use connCount > 0 in the send() function. Which in my application is used to broadcast to all connected clients.
/* ******************************************************************** */
// initialize the server
const http = require('http');
const server = http.createServer();
// Socket.io listens to our server
const io = require('socket.io').listen(server);
// Count connections as they occur, decrement when a client disconnects.
// If the counter is zero then we won't send anything over the socket.
var connCount = 0;
// A client has connected,
io.on('connection', function(socket) {
// Increment the connection counter
connCount += 1;
// log the new connection for debugging purposes.
console.log(`on connect - ${socket.id} ${connCount}`);
// The client that initiated the connection has disconnected.
socket.on('disconnect', function () {
connCount -= 1;
console.log(`on disconnect - ${socket.id} ${connCount}`);
});
});
// Start listening...
server.listen(3000);
// Send something to all connected clients (a broadcast) the
// 'channel' will indicate the destination within the client
// and 'data' becomes the payload.
function send(channel, data) {
console.log(`send() - channel = ${channel} payload = ${JSON.stringify(data)}`);
if(connCount > 0) io.emit(channel, {payload: data});
else console.log('send() - no connections');
};

NodeJs: Never emits "end" when reading a TCP Socket

I am pretty new to Node.Js and I'm using tcp sockets to communicate with a client. Since the received data is fragmented I noticed that it prints "ondata" to the console more than once. I need to be able to read all the data and concatenate it in order to implement the other functions. I read the following http://blog.nodejs.org/2012/12/20/streams2/ and thought I can use socket.on('end',...) for this purpose. But it never prints "end" to the console.
Here is my code:
Client.prototype.send = function send(req, cb) {
var self = this;
var buffer = protocol.encodeRequest(req);
var header = new Buffer(16);
var packet = Buffer.concat([ header, buffer ], 16 + buffer.length);
function cleanup() {
self.socket.removeListener('data', ondata);
self.socket.removeListener('error', onerror);
}
var body = '';
function ondata() {
var chunk = this.read() || '';
body += chunk;
console.log('ondata');
}
self.socket.on('readable', ondata);
self.socket.on('end', function() {
console.log('end');
});
function onerror(err) {
cleanup();
cb(err);
}
self.socket.on('error', onerror);
self.socket.write(packet);
};
The end event will handle the FIN package of the TCP protocol (in other words: will handle the close package)
Event: 'end'#
Emitted when the other end of the socket sends a FIN packet.
By default (allowHalfOpen == false) the socket will destroy its file descriptor once it has written out its pending write queue. However, by setting allowHalfOpen == true the socket will not automatically end() its side allowing the user to write arbitrary amounts of data, with the caveat that the user is required to end() their side now.
About FIN package: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#Connection_termination
The solution
I understand your problem, the network communication have some data transfer gaps and it split your message in some packages. You just want read your fully content.
For solve this problem i will recommend you create a protocol. Just send a number with the size of your message before and while the size of your concatenated message was less than total of your message size, keep concatenating :)
I have created a lib yesterday to simplify that issue: https://www.npmjs.com/package/node-easysocket
I hope it helps :)

NodeJS + RabbitMQ - How to limit the number of messages a receiver processes

I am using the amqplib node module and following the hello world send/receive tutorial.
https://github.com/squaremo/amqp.node/tree/master/examples/tutorials
My receivers/workers take that message and perform a CPU intensive task in the background, so I can only process about 5 messages at once.
What is the best way to control the number of messages that are being accepted by the receiver.
Code sample:
var amqp = require('amqplib');
amqp.connect('amqp://localhost').then(function(conn) {
process.once('SIGINT', function() { conn.close(); });
return conn.createChannel().then(function(ch) {
var ok = ch.assertQueue('hello', {durable: false});
ok = ok.then(function(_qok) {
return ch.consume('hello', function(msg) {
console.log(" [x] Received '%s'", msg.content.toString());
}, {noAck: true});
});
return ok.then(function(_consumeOk) {
console.log(' [*] Waiting for messages. To exit press CTRL+C');
});
});
}).then(null, console.warn);
You need to set the Quality Of Service on the model. Here is how you would do that in C#
var _model = rabbitConnection.CreateModel();
// Configure the Quality of service for the model. Below is how what each setting means.
// BasicQos(0="Dont send me a new message untill I’ve finshed", _fetchSize = "Send me N messages at a time", false ="Apply to this Model only")
_model.BasicQos(0, _fetchSize, false);
The QOS works with the Ack process. So until you Ack a message, it will only send you N (_fetchSize) at a time. I think you'll have to set noAck: false in your code to get this working.
Good luck!

Best way to execute parallel processing in Node.js

I'm trying to write a small node application that will search through and parse a large number of files on the file system.
In order to speed up the search, we are attempting to use some sort of map reduce. The plan would be the following simplified scenario:
Web request comes in with a search query
3 processes are started that each get assigned 1000 (different) files
once a process completes, it would 'return' it's results back to the main thread
once all processes complete, the main thread would continue by returning the combined result as a JSON result
The questions I have with this are:
Is this doable in Node?
What is the recommended way of doing it?
I've been fiddling, but come no further then following example using Process:
initiator:
function Worker() {
return child_process.fork("myProcess.js");
}
for(var i = 0; i < require('os').cpus().length; i++){
var process = new Worker();
process.send(workItems.slice(i * itemsPerProcess, (i+1) * itemsPerProcess));
}
myProcess.js
process.on('message', function(msg) {
var valuesToReturn = [];
// Do file reading here
//How would I return valuesToReturn?
process.exit(0);
}
Few sidenotes:
I'm aware the number of processes should be dependent of the number of CPU's on the server
I'm also aware of speed restrictions in a file system. Consider it a proof of concept before we move this to a database or Lucene instance :-)
Should be doable. As a simple example:
// parent.js
var child_process = require('child_process');
var numchild = require('os').cpus().length;
var done = 0;
for (var i = 0; i < numchild; i++){
var child = child_process.fork('./child');
child.send((i + 1) * 1000);
child.on('message', function(message) {
console.log('[parent] received message from child:', message);
done++;
if (done === numchild) {
console.log('[parent] received all results');
...
}
});
}
// child.js
process.on('message', function(message) {
console.log('[child] received message from server:', message);
setTimeout(function() {
process.send({
child : process.pid,
result : message + 1
});
process.disconnect();
}, (0.5 + Math.random()) * 5000);
});
So the parent process spawns an X number of child processes and passes them a message. It also installs an event handler to listen for any messages sent back from the child (with the result, for instance).
The child process waits for messages from the parent, and starts processing (in this case, it just starts a timer with a random timeout to simulate some work being done). Once it's done, it sends the result back to the parent process and uses process.disconnect() to disconnect itself from the parent (basically stopping the child process).
The parent process keeps track of the number of child processes started, and the number of them that have sent back a result. When those numbers are equal, the parent received all results from the child processes so it can combine all results and return the JSON result.
For a distributed problem like this, I've used zmq and it has worked really well. I'll give you a similar problem that I ran into, and attempted to solve via processes (but failed.) and then turned towards zmq.
Using bcrypt, or an expensive hashing algorith, is wise, but it blocks the node process for around 0.5 seconds. We had to offload this to a different server, and as a quick fix, I used essentially exactly what you did. Run a child process and send messages to it and get it to
respond. The only issue we found is for whatever reason our child process would pin an entire core when it was doing absolutely no work.(I still haven't figured out why this happened, we ran a trace and it appeared that epoll was failing on stdout/stdin streams. It would also only happen on our Linux boxes and would work fine on OSX.)
edit:
The pinning of the core was fixed in https://github.com/joyent/libuv/commit/12210fe and was related to https://github.com/joyent/node/issues/5504, so if you run into the issue and you're using centos + kernel v2.6.32: update node, or update your kernel!
Regardless of the issues I had with child_process.fork(), here's a nifty pattern I always use
client:
var child_process = require('child_process');
function FileParser() {
this.__callbackById = [];
this.__callbackIdIncrement = 0;
this.__process = child_process.fork('./child');
this.__process.on('message', this.handleMessage.bind(this));
}
FileParser.prototype.handleMessage = function handleMessage(message) {
var error = message.error;
var result = message.result;
var callbackId = message.callbackId;
var callback = this.__callbackById[callbackId];
if (! callback) {
return;
}
callback(error, result);
delete this.__callbackById[callbackId];
};
FileParser.prototype.parse = function parse(data, callback) {
this.__callbackIdIncrement = (this.__callbackIdIncrement + 1) % 10000000;
this.__callbackById[this.__callbackIdIncrement] = callback;
this.__process.send({
data: data, // optionally you could pass in the path of the file, and open it in the child process.
callbackId: this.__callbackIdIncrement
});
};
module.exports = FileParser;
child process:
process.on('message', function(message) {
var callbackId = message.callbackId;
var data = message.data;
function respond(error, response) {
process.send({
callbackId: callbackId,
error: error,
result: response
});
}
// parse data..
respond(undefined, "computed data");
});
We also need a pattern to synchronize the different processes, when each process finishes its task, it will respond to us, and we'll increment a count for each process that finishes, and then call the callback of the Semaphore when we've hit the count we want.
function Semaphore(wait, callback) {
this.callback = callback;
this.wait = wait;
this.counted = 0;
}
Semaphore.prototype.signal = function signal() {
this.counted++;
if (this.counted >= this.wait) {
this.callback();
}
}
module.exports = Semaphore;
here's a use case that ties all the above patterns together:
var FileParser = require('./FileParser');
var Semaphore = require('./Semaphore');
var arrFileParsers = [];
for(var i = 0; i < require('os').cpus().length; i++){
var fileParser = new FileParser();
arrFileParsers.push(fileParser);
}
function getFiles() {
return ["file", "file"];
}
var arrResults = [];
function onAllFilesParsed() {
console.log('all results completed', JSON.stringify(arrResults));
}
var lock = new Semaphore(arrFileParsers.length, onAllFilesParsed);
arrFileParsers.forEach(function(fileParser) {
var arrFiles = getFiles(); // you need to decide how to split the files into 1k chunks
fileParser.parse(arrFiles, function (error, result) {
arrResults.push(result);
lock.signal();
});
});
Eventually I used http://zguide.zeromq.org/page:all#The-Load-Balancing-Pattern, where the client was using the nodejs zmq client, and the workers/broker were written in C. This allowed us to scale this across multiple machines, instead of just a local machine with sub processes.

How to calculate node.js socket buffer to avoid allocating memory and never using it?

I'm using node.js as a server between pairs of clients, to handle my online game.
Clients send short messages between hem [one message should not exceed 200bytes].
Currently I expect single client to send [on average] 1 message per second [keeping in mind it can be 5 seconds of nothing and 5 messages one after another].
I've downloaded a sample server using 'net' module and rewritten it to handle the messages the way I need them to be handled.
Basically, for every connected socket, it creates a Buffer with size of 1024*8.
Currently I'm testing my game with some bots, which simply connect, wait 3 seconds and disconnect. They only send 1 message. Nothing else happening.
function sendMessage(socket, message) {
socket.write(message);
}
server.on('connection', function(socket) {
socket.setNoDelay(true);
socket.connection_id = require('crypto').createHash('sha1').update( 'krystian' + Date.now() + Math.random() ).digest('hex') ; // unique sha1 hash generation
socket.channel = '';
socket.matchInProgress = false
socket.resultAnnounced = false;
socket.buffer = new Buffer(cfg.buffer_size);
socket.buffer.len = 0; // due to Buffer's nature we have to keep track of buffer contents ourself
_log('New client: ' + socket.remoteAddress +':'+ socket.remotePort);
socket.on('data', function(data_raw) { // data_raw is an instance of Buffer as well
if (data_raw.length > (cfg.buffer_size - socket.buffer.len)) {
_log("Message doesn't fit the buffer. Adjust the buffer size in configuration");
socket.buffer.len = 0; // trimming buffer
return false;
}
socket.buffer.len += data_raw.copy(socket.buffer, socket.buffer.len); // keeping track of how much data we have in buffer
var str, start, end
, conn_id = socket.connection_id;
str = socket.buffer.slice(0,socket.buffer.len).toString();
if ( (start = str.indexOf("<somthing>")) != -1 && (end = str.indexOf("</something>")) != -1) {
try {
if (!<some check to see if the message format is right>) {
sendMessage(socket, "<error message to the client>");
return;
}
<storing info on the socket>
} catch(err) {
sendMessage(socket, "<error message to the client>");
return;
}
socket.channel = <channel>;
str = str.substr(end + 11);
socket.buffer.len = socket.buffer.write(str, 0);
sockets[socket.channel] = sockets[socket.channel] || {}; // hashmap of sockets subscribed to the same channel
sockets[socket.channel][conn_id] = socket;
waiting[socket.channel] = waiting[socket.channel] || {};
waiting[socket.channel][conn_id] = socket;
sendMessage(socket, "<info message to the client>");
for (var prop in waiting[socket.channel]) {
if (waiting[socket.channel].hasOwnProperty(prop) && waiting[socket.channel][prop].connection_id != socket.connection_id) {
<here I'll try to advertise this client among other clients>
sendMessage(waiting[socket.channel][prop], "<info to other clients about new client>");
}
}
}
var time_to_exit = true;
do{ // this is for a case when several messages arrived in buffer
if ( (start = str.indexOf("<some other format>")) != -1 && (end = str.indexOf("</some other format>")) != -1 ) {
var json = str.substr( start+19, end-(start+19) );
var jsono;
try {
jsono = JSON.parse(json);
} catch(err) {
sendMessage(socket, "<parse error>");
return;
}
if (<message indicates two clients are going to play together>) {
if (waiting[socket.channel][jsono.other_client_id] && waiting[socket.channel][socket.connection_id]) {
delete waiting[socket.channel][jsono.other_client_id];
delete waiting[socket.channel][socket.connection_id];
var opponentSocket = sockets[socket.channel][jsono.other_client_id];
sendMessage(opponentSocket, "<start game with the other socket>");
opponentSocket.opponentConnectionId = socket.connection_id;
sendMessage(socket, "<start game with the other socket>");
socket.opponentConnectionId = jsono.other_client_id;
}
} else if (<check if clients play together>) {
var opponentSocket = sockets[socket.channel][socket.opponentConnectionId];
if (<some generic action between clients, just pass the message>) {
sendMessage(sockets[socket.channel][socket.opponentConnectionId], json);
} else if (<match is over>) {
if (<match still in progress>) {
<send some messages indicating who won, who lost>
} else {
<log an error>
}
delete sockets[socket.channel][opponentSocket.connection_id];
delete sockets[socket.channel][socket.connection_id];
}
}
str = str.substr(end + 20); // cut the message and remove the precedant part of the buffer since it can't be processed
socket.buffer.len = socket.buffer.write(str, 0);
time_to_exit = false;
} else { time_to_exit = true; } // if no json data found in buffer - then it is time to exit this loop
} while ( !time_to_exit );
}); // end of socket.on 'data'
socket.on('close', function(){ // we need to cut out closed socket from array of client socket connections
if (!socket.channel || !sockets[socket.channel]) return;
if (waiting[socket.channel] && waiting[socket.channel][socket.connection_id]) {
delete waiting[socket.channel][socket.connection_id];
}
var opponentSocket = sockets[socket.channel][socket.opponentConnectionId];
if (opponentSocket) {
sendMessage(opponentSocket, "<the other client has disconnected>");
delete sockets[socket.channel][socket.opponentConnectionId];
}
delete sockets[socket.channel][socket.connection_id];
_log(socket.connection_id + " has been disconnected from channel " + socket.channel);
}); // end of socket.on 'close'
}); // end of server.on 'connection'
server.on('listening', function(){ console.log('Listening on ' + server.address().address +':'+ server.address().port); });
server.listen(cfg.port);
I've pasted the above code [very stripped version of the original] to give you and idea about how simple the server is.
I've got an array of sockets, who joined the game and array of sockets on the waiting list, waiting for another client to play with.
Nothing else is going on.
Still the script is memory hungry - 5 hours of connecting and disconnecting gave me this:
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
31461 ec2-user 20 0 995m 91m 7188 S 0.7 15.4 1:29.07 node
I think this is way too much.
I'm using nodetime.com free service at the moment to monitor the script, but none of the metrics would suggest the script gained so much memory (it starts with just 10-12MB).
I believe this is due to the buffers, and because they allocate too much memory.
I'm only wondering, if my assumptions regarding buffer size are correct.
Should I adjust the buffer to reflect the amount of data I expect from the client?
If I expect the client to send 5 messages with a very short time between them, 200 bytes max each, should I assume that 1024*3 would be enough?
Or should I adjust buffer size according to the message size I expect, so if I'm sure the message will never go above 300 bytes, I should be fine with buffer size of 512?
Thanks,
Krystian
EDIT:
Node version:
$ node -v
v0.10.5
$ npm -v
1.2.19
EDIT2:
I've tested the script with 400 connections connecting and disconnecting and memory usage dropped significantly to around 60MB. After changing the test setup back to 4 connections it went up again.
The kernel has a socket receive buffer which is at least 8k., which takes care of multiple incoming messages on the socket. You don't need to buffer messages you've already read, so your application buffer doesn't need to be any bigger than the largest expected message.

Resources