"Re-chunking" Stream object in node.js - node.js

Pretty simple node.js question. I want to extend the stream object to re-chunk the data that is coming in from a remote connection. I'm doing multiple telnets and sending commands to other servers, and they send back responses. It looks something like this.
> Hello, this is a command
This is the response to the command.
Sometimes it pauses here (which triggers the 'data' event prematurely).
But the message isn't over until you see the semicolon
;
What I'd like to do is instead of triggering the 'data' event at the pause, is wait for the ; and trigger a custom 'message' event.
I've read and reread this question, but I don't quite get it yet (partially because it's about a writable stream, and partially because I don't yet grok CoffeeScript).
EDIT: I guess I'm asking two things here:
How do I extend/inherit the stream object that net.CreateConnection uses?
Can I just extend the prototype.write to do a 'split' and re-'emit' each part?
Here's a snip of what I'm doing so far, but the chunking should be part of the stream, not part of the 'data' listener:
var net = require('net');
var nodes = [
//list of ip addresses
];
function connectToServer(ip) {
var conn = net.createConnection(3083, ip);
conn.on('connect', function() {
conn.write ("login command;");
});
conn.on('data', function(data) {
var read = data.toString();
var message_list = read.split(/^;/m);
message_list.forEach (function(message) {
console.log("Atonomous message from " + ip + ':' + message);
//I need to extend the stream object to emit these instead of handling it here
//Also, sometimes the data chunking breaks the messages in two,
//but it should really wait for a line beginning with a ; before it emits.
});
});
conn.on('end', function() {
console.log("Lost conncection to " + ip + "!!");
});
conn.on('error', function(err) {
console.log("Connection error: " + err + " for ip " + ip);
});
}
nodes.forEach(function(node) {
connectToServer(node);
});
If I was using a raw stream, I guess it would be something like this (based on code I found elsewhere)?
var messageChunk = function () {
this.readable = true;
this.writable = true;
};
require("util").inherits(messageChunk, require("stream"));
messageChunk.prototype._transform = function (data) {
var regex = /^;/m;
var cold_storage = '';
if (regex.test(data))
{
var message_list = read.split(/^;/m);
message_list.forEach (function(message) {
this.emit("data", message);
});
}
else
{
//somehow store the data until data with a /^;/ comes in.
}
}
messageChunk.prototype.write = function () {
this._transform.apply(this, arguments);
};
But I'm not using a raw stream, I'm using the stream object in the net.createConnection object returns.

Don't use the _transform,_read,_write,or _flush functions you implement directly, those are for the internals of node to use.
Emit a custom event when you see the character ";" in your stream:
var msg = "";
conn.on("data",function(data) {
var chunk = data.toString();
msg += chunk;
if(chunk.search(";") != -1) {
conn.emit("customEvent",msg);
msg = "";
}
});
conn.on("customEvent",function(msg) {
//do something with your message
});

Related

How to disconnect a socket after streaming data?

I am making use of "socket.io-client" and "socket.io stream" to make a request and then stream some data. I have the following code that handles this logic
Client Server Logic
router.get('/writeData', function(req, res) {
var io = req.app.get('socketio');
var nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
var nameNodeData = {};
async.waterfall([
checkForDataNodes,
readFileFromS3
], function(err, result) {
if (err !== null) {
res.json(err);
}else{
res.json("Finished Writing to DN's");
}
});
function checkForDataNodes(cb) {
nameNodeSocket.on('nameNodeData', function(data) {
nameNodeData = data;
console.log(nameNodeData);
cb(null, nameNodeData);
});
if (nameNodeData.numDataNodes === 0) {
cb("No datanodes found");
}
}
function readFileFromS3(nameNodeData, cb) {
for (var i in nameNodeData['blockToDataNodes']) {
var IP = nameNodeData['blockToDataNodes'][i]['ipValue'];
var dataNodeSocket = io.connect('http://'+ IP +":5000");
var ss = require("socket.io-stream");
var stream = ss.createStream();
var byteStartRange = nameNodeData['blockToDataNodes'][i]['byteStart'];
var byteStopRange = nameNodeData['blockToDataNodes'][i]['byteStop'];
paramsWithRange['Range'] = "bytes=" + byteStartRange.toString() + "-" + byteStopRange.toString();
//var file = require('fs').createWriteStream('testFile' + i + '.txt');
var getFileName = nameNodeData['blockToDataNodes'][i]['key'].split('/');
var fileData = {
'mainFile': paramsWithRange['Key'].split('/')[1],
'blockName': getFileName[1]
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
s3.getObject(paramsWithRange).createReadStream().pipe(stream);
//dataNodeSocket.disconnect();
}
cb(null);
}
});
Server Logic (that gets the data)
var dataNodeIO = require('socket.io')(server);
var ss = require("socket.io-stream");
dataNodeIO.on('connection', function(socket) {
console.log("Succesfully connected!");
ss(socket).on('sendData', function(stream, data) {
var IP = data['ipValue'];
var blockName = data['blockName'];
var mainFile = data['mainFile'];
dataNode.makeDir(mainFile);
dataNode.addToReport(mainFile, blockName);
stream.pipe(fs.createWriteStream(mainFile + '/' + blockName));
});
});
How can I properly disconnect the connections in function readFileFromS3. I have noticed using dataNodeSocket.disconnect() at the end does not work as I cannot verify the data was received on the 2nd server. But if I comment it out, I can see the data being streamed to the second server.
My objective is to close the connections in Client Server side
It appears that the main problem with closing the socket is that you weren't waiting for the stream to be done writing before trying to close the socket. So, because the writing is all asynchronous and finishes sometime later, you were trying to close the socket before the data had been written.
Also because you were putting asynchronous operations inside a for loop, you were also running all your operations in parallel which may not be exactly what you want as it makes error handling more difficult and server load more difficult.
Here's the code I would suggest that does the following:
Create a function streamFileFromS3() that streams a single file and returns a promise that will notify when it's done.
Use await in a for loop with that streamFileFromS3() to serialize the operations. You don't have to serialize them, but then you would have to change your error handling to figure out what to do if one errors while the others are already running and you'd have to be more careful about concurrency issues.
Use try/catch to catch any errors from streamFileFromS3().
Add error handling on the stream.
Change all occurrences of data['propertyName'] to data.propertyName. The only time you need to use brackets is if the property name contains a character that is not allowed in a Javascript identifier or if the property name is in a variable. Otherwise, the dot notation is preferred.
Add socket.io connection error handling logic for both socket.io connections.
Set returned status to 500 when there's an error processing the request
So, here's the code for that:
const ss = require("socket.io-stream");
router.get('/writeData', function(req, res) {
const io = req.app.get('socketio');
function streamFileFromS3(ip, data) {
return new Promise((resolve, reject) => {
const dataNodeSocket = io.connect(`http://${ip}:5000`);
dataNodeSocket.on('connect_error', reject);
dataNodeSocket.on('connect_timeout', () {
reject(new Error(`timeout connecting to http://${ip}:5000`));
});
dataNodeSocket.on('connection', () => {
// dataNodeSocket connected now
const stream = ss.createStream().on('error', reject);
paramsWithRange.Range = `bytes=${data.byteStart}-${data.byteStop}`;
const filename = data.key.split('/')[1];
const fileData = {
'mainFile': paramsWithRange.Key.split('/')[1],
'blockName': filename
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
// get S3 data and pipe it to the socket.io stream
s3.getObject(paramsWithRange).createReadStream().on('error', reject).pipe(stream);
stream.on('close', () => {
dataNodeSocket.disconnect();
resolve();
});
});
});
}
function connectError(msg) {
res.status(500).send(`Error connecting to ${NAMENODE_ADDRESS}`);
}
const nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
nameNodeSocket.on('connect_error', connectError).on('connect_timeout', connectError);
nameNodeSocket.on('nameNodeData', async (nameNodeData) => {
try {
for (let item of nameNodeData.blockToDataNodes) {
await streamFileFromS3(item.ipValue, item);
}
res.json("Finished Writing to DN's");
} catch(e) {
res.status(500).json(e);
}
});
});
Other notes:
I don't know what paramsWithRange is as it is not declared here and when you were doing everything in parallel, it was getting shared among all the connections which is asking for a concurrency issue. In my serialized implementation, it's probably safe to share it, but the way it is now bothers me as it's a concurrency issue waiting to happen.

Stream read returning empty in nodejs

I am using express to create a webservice that will read string data from a stream, and respond to the HTTP POST request with that value. Here is the code for the S3Store.js file that defines the readFileFromS3(.) function:
S3Store.js
S3Store.prototype.readFileFromS3 = function(filename, callback) {
var readConfig = {
'Bucket': 'shubham-test',
'Key': filename
};
var readStream = this.s3.getObject(readConfig).createReadStream();
var allData = '';
readStream.on('data', function(data) {
//data = Buffer.concat([allData, data]);
data = allData + data;
console.log("data: " + data);
});
readStream.on('error', function(err) {
callback(err, null);
});
Now, if I call this method from a terminal like this:
s3Instance.readFileFromS3('123.json', function(err, data) {
console.log(data);
});
I see the appropriate string for data logged to the console. However, when I call the same method from inside one of the routes in express for HTTP POST requests, the service responds with a value of data set to empty string. Code for the POST request:
router.post('/resolve', function(req, res) {
var commandJson = req.body;
var appId = commandJson['appId'];
var command = commandJson['text'];
if (appId == undefined || command == undefined) {
res.status(400).send("Malformed Request: appId: " + appId + ", command: " + command);
};
s3Store.readFileFromS3('123.json', function(err, data) {
res.send(data);
});
});
Why does it return an empty string when calling the readFileFromS3(.) from the HTTP POST method and not when I ran the same method directly from the node console?
You're logging the data but you're not passing anything to the completion callback (see below for some more explanation):
S3Store.prototype.readFileFromS3 = function(filename, callback) {
var readConfig = {
'Bucket': 'shubham-test',
'Key': filename
};
var readStream = this.s3.getObject(readConfig).createReadStream();
var allData = [];
// Keep collecting data.
readStream.on('data', function(data) {
allData.push(data);
});
// Done reading, concatenate and pass to completion callback.
readStream.on('end', function() {
callback(null, Buffer.concat(allData));
});
// Handle any stream errors.
readStream.on('error', function(err) {
callback(err, null);
});
};
I took the liberty to rewrite the data collection to use Buffer's instead of strings, but this obviously isn't a requirement.
The callback argument is a completion function, meant to be called when either reading the S3 stream is done, or when it has thrown an error. The error handling was already in place, but not the part where you would call back when all the data from the stream was read, which is why I added the end handler.
At that point, the readStream is exhausted (everything from it has been read into allData), and you call the completion callback when the collected data as second argument.
The common idiom throughout Node is that completion callbacks take (at least) two arguments: the first is either an error, or null when there aren't errors, and the second is the data you want to pass back to the caller (in your case, the anonymous function in your route handler that calls res.send()).

Simple publishing of many messages to Rabbitmq

I want to send the same messages many times in a row, but i need to use a loop. When I use a loop though, no messages are sent. I am using amqp in Nodejs.
Here is the working code for sending a single messages. What should I do to send many. I have already tried just wrapping a while loop around the connection.publish part and nothing was sent.
var amqp = require('amqp');
var connection = amqp.createConnection({url: "amqp://tester:tstpsswrd#10.4.52.115:5672"});
connection.on('ready', function () {
connection.queue('my-queue', function (q) {
connection.publish('my-queue', 'hi');
});
});
I'm positive that I am doing something stupid wrong here, or maybe missing something. First time with rabbitmq.
Update, Loop example
var amqp = require('amqp');
var connection = amqp.createConnection({url: "amqp://tester:tstpsswrd#10.4.52.115:5672"});
connection.on('ready', function () {
connection.queue('my-queue', function (q) {
while(true){
connection.publish('my-queue', 'hi');
}
});
});
In practical scenario you can not and should not be having a infinite loop as such for writing to a message broker. There have to be some event based thing or a proper defined number.
Try this code you can use the for loop according to your requirement:
var amqp = require('amqp');
var connection = amqp.createConnection({ host: 'localhost', port: 5672});
connection.on('ready', function () {
for(var i=0; i<1000; i++){
var status = writeOnQueue("testing the queue"+i);
}
});
function writeOnQueue(xml){
var msg = xml;
console.log(msg);
try{
connection.exchange('test-exchange', {confirm: true},function(exchange) {
publish = exchange.publish('my-queue',msg, { mandatory: false });
console.log('sent the message success test-exchange');
return true;
});
}
catch(e){
console.log('Some error occured.'+ e);
}
}

NodeJs net.Socket() events not firing

Trying to write a TCP client in Node v0.10.15 and I am having a little trouble getting data back from the server. I know that the server is working properly because I have 3-4 different clients written in different languages communicating with it.
Below is a snippet of a larger piece of code but this should get the point across.
The problem is: I'm expecting 2 packets coming back after writing to the socket (this part is not included in this example). I'm only seeing the "data" event being fired once. Is there something that I need to do to get node to resume reading from the Tcp stream? I can confirm that the server is sending 2 packets(The length and then the actual data) Any help would be appreciated.
var dc = require('./DataContracts.js');
var net = require('net');
require('buffertools').extend();
var client = net.Socket();
var isConnected = false;
var serverHost = '10.2.2.21';
var dataCallback;
var receivedBuffer = new Array();
function InitComm(buffer) {
if (!isConnected) {
client.connect(4987, serverHost, function() {
client.on('data', function(data) {
console.log('Received server packet...');
var buf = new Buffer(data);
receivedBuffer.push(buf);
client.resume();
});
client.on('end', function() {
if (receivedBuffer.length > 1) {
if (dataCallback !== undefined)
dataCallback(receivedBuffer);
}
});
client.on('close', function() {
//clean up
});
client.on('error', function(err) {
console.log('Error!: ' + err);
});
Communicate(buffer);
});
} else {
Communicate(buffer);
}
}
Turns out that node was combining both of the packets together. I must be missing a Carriage return on the first packet

Reading CSV file and sending data in intervals with websockets (Node, Socket.io)

I'm relatively new to Node and Express.js. I'm trying to create a websocket server to push CSV data in irregular intervals stored in the file itself, line after line.
The CSV structure is something like this:
[timeout [ms], data1, data2, data3 ...]
I've successfully created a websocket server which communicates with the client.
I'm looking for a best solution to effectively do something like this:
1. Read a line of the CSV file
2. Send a line with WebSockets
3. Pause the reading for a period of time stored in the first value of the row
4. Resume the reading after the interval has passed, and back to step 1.
So far, I got this far (please feel free to trash my code completely as it might be very wrong - as I said, I'm new to it. It seems like the pause() doesn't do anything.
var $ = require('jquery')
,csv = require('csv');
exports.index = function(server){
var io = require('socket.io').listen(server);
io.sockets.on('connection', function (socket) {
socket.on('startTransmission', function(msg) {
csv()
.from.path('C:/dev/node_express/csv/test.csv', { delimiter: ',', escape: '"' })
.on('record', function(row,index){
var rowArray = $.parseJSON(JSON.stringify(row));
var json = {},
that = this;
$.each(rowArray, function(i,value){
json[keys[i]] = value;
});
socket.emit('transmitDataData', json);
//this.pause(); //I guess around here is where I'd like to pause
// setTimeout(function(){
// that.resume(); //and resume here after the timeout, stored in the first value (rowArray[0])
// }, rowArray[0]);
});
});
});
};
The commented out code unfortunately does not work - All data is sent immediately, row after row, the function doesn't pause
I ran into the same sort of thing with another use case. The issue is that calling pause() on the stream pauses the underlying stream reading but not the csv record parsing, so the record event can get called with the remainder of the records that made up the last read stream chunk. I synchronized them, in my case, like this:
var rows=0, actions=0;
stream.on('record', function(row, index){
rows++;
// pause here, but expect more record events until the raw read stream is exhausted
stream.pause();
runner.do(row, function(err, result) {
// when actions have caught up to rows read, read more rows.
if (actions==rows) {
stream.resume();
}
});
});
In your case, I'd buffer the rows and release them with the timer. Here's an untested re-factoring just to give you an idea of what I mean:
var $ = require('jquery'),
csv = require('csv');
exports.index = function(server){
var io = require('socket.io').listen(server);
io.sockets.on('connection', function (socket) {
socket.on('startTransmission', function(msg) {
var timer=null, buffered=[], stream=csv().from.path('C:/dev/node_express/csv/test.csv', { delimiter: ',', escape: '"' });
function transmit(row) {
socket.emit('transmitDataData', row);
}
function drain(timeout) {
if (!timer) {
timer = setTimeout(function() {
timer = null;
if (buffered.length<=1) { // get more rows ahead of time so we don't run out. otherwise, we could skip a beat.
stream.resume(); // get more rows
} else {
var row = buffered.shift();
transmit(row);
drain(row[0]);
}
}, timeout);
}
}
stream.on('record', function(row,index){
stream.pause();
if (index == 0) {
transmit(row);
} else {
buffered.push(row);
}
drain(row[0]); // assuming row[0] contains a timeout value.
});
stream.on('end', function() {
// no more rows. wait for buffer to empty, then cleanup.
});
stream.on('error', function() {
// handle error.
});
});
};

Resources