Nodejs: Set highWaterMark of socket object - node.js

is it possible to set the highWaterMark of a socket object after it was created:
var http = require('http');
var server = http.createServer();
server.on('upgrade', function(req, socket, head) {
socket.on('data', function(chunk) {
var frame = new WebSocketFrame(chunk);
// skip invalid frames
if (!frame.isValid()) return;
// if the length in the head is unequal to the chunk
// node has maybe split it
if (chunk.length != WebSocketFrame.getLength()) {
socket.once('data', listenOnMissingChunks);
});
});
});
function listenOnMissingChunks(chunk, frame) {
frame.addChunkToPayload(chunk);
if (WebSocketFrame.getLength()) {
// if still corrupted listen once more
} else {
// else proceed
}
}
The above code example does not work. But how do I do it instead?
Further explaination:
When I receive big WebSocket frames they get split into multiple data events. This makes it hard to parse the frames because I do not know if this is a splitted or corrupted frame.

I think you misunderstand the nature of a TCP socket. Despite the fact that TCP sends its data over IP packets, TCP is not a packet protocol. A TCP socket is simply a stream of data. Thus, it is incorrect to view the data event as a logical message. In other words, one socket.write on one end does not equate to a single data event on the other.
There are many reasons that a single write to a socket does not map 1:1 to a single data event:
The sender's network stack may combine multiple small writes into a single IP packet. (The Nagle algorithm)
An IP packet may be fragmented (split into multiple packets) along its journey if its size exceeds any one hop's MTU.
The receiver's network stack may combine multiple packets into a single data event (as seen by your application).
Because of this, a single data event might contain multiple messages, a single message, or only part of a message.
In order to correctly handle messages sent over a stream, you must buffer incoming data until you have a complete message.
var net = require('net');
var max = 1024 * 1024 // 1 MB, the maximum amount of data that we will buffer (prevent a bad server from crashing us by filling up RAM)
, allocate = 4096; // how much memory to allocate at once, 4 kB (there's no point in wasting 1 MB of RAM to buffer a few bytes)
, buffer=new Buffer(allocate) // create a new buffer that allocates 4 kB to start
, nread=0 // how many bytes we've buffered so far
, nproc=0 // how many bytes in the buffer we've processed (to avoid looping over the entire buffer every time data is received)
, client = net.connect({host:'example.com', port: 8124}); // connect to the server
client.on('data', function(chunk) {
if (nread + chunk.length > buffer.length) { // if the buffer is too small to hold the data
var need = Math.min(chunk.length, allocate); // allocate at least 4kB
if (nread + need > max) throw new Error('Buffer overflow'); // uh-oh, we're all full - TODO you'll want to handle this more gracefully
var newbuf = new Buffer(buffer.length + need); // because Buffers can't be resized, we must allocate a new one
buffer.copy(newbuf); // and copy the old one's data to the new one
buffer = newbuf; // the old, small buffer will be garbage collected
}
chunk.copy(buffer, nread); // copy the received chunk of data into the buffer
nread += chunk.length; // add this chunk's length to the total number of bytes buffered
pump(); // look at the buffer to see if we've received enough data to act
});
client.on('end', function() {
// handle disconnect
});
client.on('error', function(err) {
// handle errors
});
function find(byte) { // look for a specific byte in the buffer
for (var i = nproc; i < nread; i++) { // look through the buffer, starting from where we left off last time
if (buffer.readUInt8(i, true) == byte) { // we've found one
return i;
}
}
}
function slice(bytes) { // discard bytes from the beginning of a buffer
buffer = buffer.slice(bytes); // slice off the bytes
nread -= bytes; // note that we've removed bytes
nproc = 0; // and reset the processed bytes counter
}
function pump() {
var pos; // position of a NULL character
while ((pos = find(0x00)) >= 0) { // keep going while there's a NULL (0x00) somewhere in the buffer
if (pos == 0) { // if there's more than one NULL in a row, the buffer will now start with a NULL
slice(1); // discard it
continue; // so that the next iteration will start with data
}
process(buffer.slice(0,pos)); // hand off the message
slice(pos+1); // and slice the processed data off the buffer
}
}
function process(msg) { // here's where we do something with a message
if (msg.length > 0) { // ignore empty messages
// here's where you have to decide what to do with the data you've received
// experiment with the protocol
}
}

You don't need to. Incoming data will almost certainly be split across two or more reads: this is the nature of TCP and there is nothing you can do about it. Fiddling with obscure socket parameters certainly won't change it. And the data will be lit but certainly not corrupted. Just treat the socket as what it is: a byte stream.

Related

native websocket api NodeJS for larger messages?

I was following an article about writing a socket server from scratch, and its mostly working with small frames / packages, but when I try to send about 2kb of data, I get this error:
.
internal/buffer.js:77
throw new ERR_OUT_OF_RANGE(type || 'offset',
^
RangeError [ERR_OUT_OF_RANGE]: The value of "offset" is out of range. It must be >= 0 and <= 7. Receive
d 8
at boundsError (internal/buffer.js:77:9)
at Buffer.readUInt8 (internal/buffer.js:243:5)
at pm (/home/users/me/main.js:277:24)
at Socket.<anonymous> (/home/users/me/main.js:149:15)
at Socket.emit (events.js:315:20)
at addChunk (_stream_readable.js:297:12)
at readableAddChunk (_stream_readable.js:273:9)
at Socket.Readable.push (_stream_readable.js:214:10)
at TCP.onStreamRead (internal/stream_base_commons.js:186:23) {
code: 'ERR_OUT_OF_RANGE'
}
Here's my server code (some details were changed for security, but here it is in its entirety for the line numbers etc.) but the relevant part here is the function pm [=parseMessage](towards the bottom):
let http = require('http'),
ch = require("child_process"),
crypto = require("crypto"),
fs = require("fs"),
password = fs.readFileSync(“./secretPasswordFile.txt”),
callbacks = {
CHANGEDforSecUrITY(m, cs) {
if(m.password === password) {
if(m.command) {
try {
cs.my = ch.exec(
m.command,
(
err,
stdout,
stderr
) => {
cs.write(ans(s({
err,
stdout,
stderr
})));
}
);
} catch(e) {
cs.write(ans(
s({
error: e.toString()
})
))
}
}
if(m.exit) {
console.log("LOL", cs.my);
if(cs.my && typeof cs.my.kill === "function") {
cs.my.kill();
console.log(cs.my, "DID?");
}
}
cs.write(
ans(
s({
hi: 2,
youSaid:m
}))
)
} else {
cs.write(ans(s({
hey: "wrong password!!"
})))
}
console.log("hi!",m)
}
},
banned = [
"61.19.71.84"
],
server = http.createServer(
(q,r)=> {
if(banned.includes(q.connection.remoteAddress)) {
r.end("Hey man, " + q.connection.remoteAddress,
"I know you're there!!");
} else {
ch.exec(`sudo "$(which node)" -p "console.log(4)"`)
console.log(q.url)
console.log(q.connection.remoteAddress,q.connection.remotePort)
let path = q.url.substring(1)
q.url == "/" &&
(path = "index.html")
q.url == "/secret" &&
(path = "../main.js")
fs.readFile(
"./static/" + path,
(er, f) => {
if(er) {
r.end("<h2>404!!</h2>");
} else {
r.end(f);
}
}
)
}
}
)
server.listen(
process.env.PORT || 80,
c=> {
console.log(c,"helo!!!")
server.on("upgrade", (req, socket) => {
if(req.headers["upgrade"] !== "websocket") {
socket.end("HTTP/1.1 400 Bad Request");
return;
}
let key = req.headers["sec-websocket-key"];
if(key) {
let hash = gav(key)
let headers = [
"HTTP/1.1 101 Web Socket Protocol Handshake",
"Upgrade: WebSocket",
"Connection: Upgrade",
`Sec-WebSocket-Accept: ${hash}`
];
let protocol = req.headers[
"sec-websocket-protocol"
];
let protocols = (
protocol &&
protocol.split(",")
.map(s => s.trim())
|| []
);
protocols.includes("json") &&
headers
.push("Sec-WebSocket-Protocol: json");
let headersStr = (
headers.join("\r\n") +
"\r\n\r\n"
)
console.log(
"Stuff happening",
req.headers,
headersStr
);
fs.writeFileSync("static/logs.txt",headersStr);
socket.write(
headersStr
);
socket.write(ans(JSON.stringify(
{
hello: "world!!!"
}
)))
}
socket.on("data", buf => {
let msg = pm(buf);
console.log("HEY MAN!",msg)
if(msg) {
console.log("GOT!",msg);
for(let k in msg) {
if(callbacks[k]) {
callbacks[k](
msg[k],
socket
)
}
}
} else {
console.log("nope");
}
});
});
}
)
function pm(buf) {
/*
*structure of first byte:
1: if its the last frame in buffer
2 - 4: reserved bits
5 - 8: a number which shows what type of message it is. Chart:
"0": means we continue
"1": means this frame contains text
"2": means this is binary
"0011"(3) - "0111" (11): reserved values
"1000"(8): means connection closed
"1001"(9): ping (checking for response)
"1010"(10): pong (response verified)
"1010"(11) - "1111"(15): reserved for "control" frames
structure of second byte:
1: is it "masked"
2 - 8: length of payload, if less than 126.
if 126, 2 additional bytes are added
if 127 (or more), 6 additional bytes added (total 8)
* */
const myFirstByte = buf.readUInt8(0);
const isThisFinalFrame = isset(myFirstByte,7) //first bit
const [
reserved1,
reserved2,
reserved3
] = [
isset(myFirstByte, 6),
isset(myFirstByte, 5),
isset(myFirstByte, 4) //reserved bits
]
const opcode = myFirstByte & parseInt("1111",2); //checks last 4 bits
//check if closed connection ("1000"(8))
if(opcode == parseInt("1000", 2))
return null; //shows that connection closed
//look for text frame ("0001"(1))
if(opcode == parseInt("0001",2)) {
const theSecondByte = buf.readUInt8(1);
const isMasked = isset(theSecondByte, 7) //1st bit from left side
let currentByteOffset = 2; //we are theSecondByte now, so 2
let payloadLength = theSecondByte & 127; //chcek up to 7 bits
if(payloadLength > 125) {
if(payloadLength === 126) {
payloadLength = buf.readUInt16BE(
currentByteOffset
) //read next two bytes from position
currentByteOffset += 2; //now we left off at
//the fourth byte, so thats where we are
} else {
//if only the second byte is full,
//that shows that there are 6 more
//bytes to hold the length
const right = buf.readUInt32BE(
currentByteOffset
);
const left = buf.readUInt32BE(
currentByteOffset + 4 //the 8th byte ??
);
throw new Error("brutal " + currentByteOffset);
}
}
//if we have masking byte set to 1, get masking key
//
//
//now that we have the lengths
//and possible masks, read the rest
//of the bytes, for actual data
const data = Buffer.alloc(payloadLength);
if(isMasked) {
//can't just copy it,
//have to do some stuff with
//the masking key and this thing called
//"XOR" to the data. Complicated
//formulas, llook into later
//
let maskingBytes = Buffer.allocUnsafe(4);
buf.copy(
maskingBytes,
0,
currentByteOffset,
currentByteOffset + 4
);
currentByteOffset += 4;
for(
let i = 0;
i < payloadLength;
++i
) {
const source = buf.readUInt8(
currentByteOffset++
);
//now mask the source with masking byte
data.writeUInt8(
source ^ maskingBytes[i & 3],
i
);
}
} else {
//just copy bytes directly to our buffer
buf.copy(
data,
0,
currentByteOffset++
);
}
//at this point we have the actual data, so make a json
//
const json = data.toString("utf8");
return p(json);
} else {
return "LOL IDK?!";
}
}
function p(str) {
try {
return JSON.parse(str);
} catch(e){
return str
}
}
function s(ob) {
try {
return JSON.stringify(ob);
} catch(e) {
return e.toString();
}
}
function ans(str) {
const byteLength = Buffer.byteLength(str);
const lengthByteCount = byteLength < 126 ? 0 : 2;
const payloadLength = lengthByteCount === 0 ? byteLength : 126;
const buffer = Buffer.alloc(
2 +
lengthByteCount +
byteLength
);
buffer.writeUInt8(
parseInt("10000001",2), //opcode is "1", at firstbyte
0
);
buffer.writeUInt8(payloadLength, 1); //at second byte
let currentByteOffset = 2; //already wrote second byte by now
if(lengthByteCount > 0) {
buffer.writeUInt16BE(
byteLength,
2 //more length at 3rd byte position
);
currentByteOffset += lengthByteCount; //which is 2 more bytes
//of length, since not supporting more than that
}
buffer.write(str, currentByteOffset); //the rest of the bytes
//are the actual data, see chart in function pm
//
return buffer;
}
function gav(ak) {
return crypto
.createHash("sha1")
.update(ak +'258EAFA5-E914-47DA-95CA-C5AB0DC85B11', "binary")
.digest("base64")
}
function isset(b, k) {
return !!(
b >>> k & 1
)
}
Given that this error does not happen with smaller packets, I'm taking an educated guess that this is due to the limitations of the code here, as mentioned in the offical RFC documentation:
5.4. Fragmentation
The primary purpose of fragmentation is to allow sending a message
that is of unknown size when the message is started without having to
buffer that message. If messages couldn't be fragmented, then an
endpoint would have to buffer the entire message so its length could
be counted before the first byte is sent. With fragmentation, a
server or intermediary may choose a reasonable size buffer and, when
the buffer is full, write a fragment to the network.
A secondary use-case for fragmentation is for multiplexing, where
it is not desirable for a large message on one logical channel to
monopolize the output channel, so the multiplexing needs to be free to
split the message into smaller fragments to better share the output
channel. (Note that the multiplexing extension is not described in
this document.)
Unless specified otherwise by an extension, frames have no semantic
meaning. An intermediary might coalesce and/or split frames, if no
extensions were negotiated by the client and the server or if some
extensions were negotiated, but the intermediary understood all the
extensions negotiated and knows how to coalesce and/or split frames
in the presence of these extensions. One implication of this is that
in absence of extensions, senders and receivers must not depend on
the presence of specific frame boundaries.
The following rules apply to fragmentation:
o An unfragmented message consists of a single frame with the FIN
bit set (Section 5.2) and an opcode other than 0.
o A fragmented message consists of a single frame with the FIN bit
clear and an opcode other than 0, followed by zero or more frames
with the FIN bit clear and the opcode set to 0, and terminated by
a single frame with the FIN bit set and an opcode of 0. A
fragmented message is conceptually equivalent to a single larger
message whose payload is equal to the concatenation of the
payloads of the fragments in order; however, in the presence of
extensions, this may not hold true as the extension defines the
interpretation of the "Extension data" present. For instance,
"Extension data" may only be present at the beginning of the first
fragment and apply to subsequent fragments, or there may be
"Extension data" present in each of the fragments that applies
only to that particular fragment. In the absence of "Extension
data", the following example demonstrates how fragmentation works.
EXAMPLE: For a text message sent as three fragments, the first
fragment would have an opcode of 0x1 and a FIN bit clear, the
second fragment would have an opcode of 0x0 and a FIN bit clear,
and the third fragment would have an opcode of 0x0 and a FIN bit
that is set.
o Control frames (see Section 5.5) MAY be injected in the middle
of
a fragmented message. Control frames themselves MUST NOT be
fragmented.
o Message fragments MUST be delivered to the recipient in the
order
sent by the sender. o The fragments of one message MUST NOT be interleaved between the
fragments of another message unless an extension has been
negotiated that can interpret the interleaving.
o An endpoint MUST be capable of handling control frames in the
middle of a fragmented message.
o A sender MAY create fragments of any size for non-control
messages.
o Clients and servers MUST support receiving both fragmented and
unfragmented messages.
o As control frames cannot be fragmented, an intermediary MUST NOT
attempt to change the fragmentation of a control frame.
o An intermediary MUST NOT change the fragmentation of a message
if
any reserved bit values are used and the meaning of these values
is not known to the intermediary.
o An intermediary MUST NOT change the fragmentation of any message
in the context of a connection where extensions have been
negotiated and the intermediary is not aware of the semantics of
the negotiated extensions. Similarly, an intermediary that didn't
see the WebSocket handshake (and wasn't notified about its
content) that resulted in a WebSocket connection MUST NOT change
the fragmentation of any message of such connection.
o As a consequence of these rules, all fragments of a message are
of
the same type, as set by the first fragment's opcode. Since
control frames cannot be fragmented, the type for all fragments in
a message MUST be either text, binary, or one of the reserved
opcodes.
NOTE: If control frames could not be interjected, the latency of a
ping, for example, would be very long if behind a large message.
Hence, the requirement of handling control frames in the middle of a
fragmented message.
IMPLEMENTATION NOTE: In the absence of any extension, a receiver
doesn't have to buffer the whole frame in order to process it. For
example, if a streaming API is used, a part of a frame can be
delivered to the application. However, note that this assumption
might not hold true for all future WebSocket extensions.
In the words of the article above:
Alignment of Node.js socket buffers with WebSocket message frames
Node.js socket data (I’m talking about net.Socket in this case, not
WebSockets) is received in buffered chunks. These are split apart with
no regard for where your WebSocket frames begin or end!
What this means is that if your server is receiving large messages
fragmented into multiple WebSocket frames, or receiving large numbers
of messages in rapid succession, there’s no guarantee that each data
buffer received by the Node.js socket will align with the start and
end of the byte data that makes up a given frame.
So, as you’re parsing each buffer received by the socket, you’ll need
to keep track of where one frame ends and where the next begins.
You’ll need to be sure that you’ve received all of the bytes of data
for a frame — before you can safely consume that frame’s data.
It may be that one frame ends midway through the same buffer in which
the next frame begins. It also may be that a frame is split across
several buffers that will be received in succession.
The following diagram is an exaggerated illustration of the issue. In
most cases, frames tend to fit inside a buffer. Due to the way the
data arrives, you’ll often find that a frame will start and end in
line with the start and end of the socket buffer. But this can’t be
relied upon in all cases, and must be considered during
implementation. This can take
some work to get right.
For the basic implementation that follows below, I have skipped any
code for handling large messages or messages split across multiple
frames.
So my problem here is that the article skipped the fragmentation code, which is kind of what I need to know... but in that RFC documentation, some examples of fragmentated and unfragmented packets are given:
5.6. Data Frames
Data frames (e.g., non-control frames) are identified by opcodes
where the most significant bit of the opcode is 0. Currently defined
opcodes for data frames include 0x1 (Text), 0x2 (Binary). Opcodes
0x3-0x7 are reserved for further non-control frames yet to be
defined.
Data frames carry application-layer and/or extension-layer data.
The opcode determines the interpretation of the data:
Text
The "Payload data" is text data encoded as UTF-8. Note that a
particular text frame might include a partial UTF-8 sequence;
however, the whole message MUST contain valid UTF-8. Invalid
UTF-8 in reassembled messages is handled as described in
Section 8.1.
Binary
The "Payload data" is arbitrary binary data whose interpretation
is solely up to the application layer.
5.7. Examples
o A single-frame unmasked text message
* 0x81 0x05 0x48 0x65 0x6c 0x6c 0x6f (contains "Hello")
o A single-frame masked text message
* 0x81 0x85 0x37 0xfa 0x21 0x3d 0x7f 0x9f 0x4d 0x51 0x58
(contains "Hello")
o A fragmented unmasked text message
* 0x01 0x03 0x48 0x65 0x6c (contains "Hel")
* 0x80 0x02 0x6c 0x6f (contains "lo")
o Unmasked Ping request and masked Ping response
* 0x89 0x05 0x48 0x65 0x6c 0x6c 0x6f (contains a body of "Hello",
but the contents of the body are arbitrary)
* 0x8a 0x85 0x37 0xfa 0x21 0x3d 0x7f 0x9f 0x4d 0x51 0x58
(contains a body of "Hello", matching the body of the ping)
o 256 bytes binary message in a single unmasked frame
* 0x82 0x7E 0x0100 [256 bytes of binary data]
o 64KiB binary message in a single unmasked frame
* 0x82 0x7F 0x0000000000010000 [65536 bytes of binary data]
So it would appear that is an example of a fragment.
Also this seems relevant:
6.2. Receiving Data
To receive WebSocket data, an endpoint listens on the underlying
network connection. Incoming data MUST be parsed as WebSocket frames
as defined in Section 5.2. If a control frame (Section 5.5) is
received, the frame MUST be handled as defined by Section 5.5. Upon
receiving a data frame (Section 5.6), the endpoint MUST note the
/type/ of the data as defined by the opcode (frame-opcode) from
Section 5.2. The "Application data" from this frame is defined as
the /data/ of the message. If the frame comprises an unfragmented
message (Section 5.4), it is said that A WebSocket Message Has Been
Received with type /type/ and data /data/. If the frame is part of
a fragmented message, the "Application data" of the subsequent data
frames is concatenated to form the /data/. When the last fragment is
received as indicated by the FIN bit (frame-fin), it is said that A
WebSocket Message Has Been Received with data /data/ (comprised of
the concatenation of the "Application data" of the fragments) and type
/type/ (noted from the first frame of the fragmented message).
Subsequent data frames MUST be interpreted as belonging to a new
WebSocket message.
Extensions (Section 9) MAY change the semantics of how data is
read, specifically including what comprises a message boundary.
Extensions, in addition to adding "Extension data" before the
"Application data" in a payload, MAY also modify the "Application
data" (such as by compressing it).
The problem:
I don't know how to check for fragments and line them up with the node buffers, as mentioned in the article, I'm only able to read very small buffer amounts.
How can I parse larger data chunks using the fragmentation methods mentioned in the RFC documentation and the lining-up of nodeJS buffers alluded to (but not explained) in the article?
I came across your question when I was working on my own "pure NodeJs WebSocket server". All worked fine for payloads less than 1-2 KiB. When I was trying to send more, but still within [64 KiB - 1] limit (16 bit payload length), it randomly blow up the server with ERR_OUT_OF_RANGE error.
Side note: https://medium.com/hackernoon/implementing-a-websocket-server-with-node-js-d9b78ec5ffa8 "Implementing a WebSocket server with Node.js" by Srushtika Neelakantam is excellent article! Before I found it the WebSocket was alwas a black box to me. She described very simple and easy to understand implementation of WebSocket client/server from scratch. Unfortunately it lacks (on purpose to not make article hard) support for larger payloads and buffers alignment. I just wanted to give Srushtika Neelakantam credit because without her article I would never write my own pure NodeJs WebSocket server.
The solution described in the article fails only because the NodeJs buffer is simply over and there are no more bytes to read but the function's logic expects more bytes. You end with ERR_OUT_OF_RANGE error. Code simply wants to read bytes that are not yet available but will be available in next 'data' event.
The solution to this problem is simply check if the next byte that you want to read from buffer is really available. As long as there are bytes you are fine. The challenge starts when there to less bytes or to much bytes. In order to be be more flexible the function that parses buffer should return not only payload but pair: payload and bufferRemainingBytes. It will allow to concat the buffers in the main data event handler.
We need to handle three cases:
When there is exactly the right amount of bytes in the buffer to build valid WebSocket frame we return
{ payload: payloadFromValidWebSocketFrame, bufferRemainingBytes: Buffer.alloc(0) }
When there are enough bytes to build valid WebSocket but still there are few left in the buffer we return
{ payload: payloadFromValidWebSocketFrame, bufferRemainingBytes: bufferBytesAfterValidWebSocketFrame }
This case also forces us to wrap all getParsedBuffer calls with a do-while loop. The bufferRemainingBytes could still contain second (or third, or more) valid WebSocket frame. We need to parse them all in currently processed socket data event.
When there are not enough bytes to build valid WebSocket frame we return empty payload and entire buffer as bufferRemainingBytes
{ payload: null, bufferRemainingBytes: buffer }
How to merge buffers together with bufferRemainingBytes in the subsequent socket data events? Here is the code:
server.on('upgrade', (req, socket) => {
let bufferToParse = Buffer.alloc(0); // at the beginning we just start with 0 bytes
// .........
socket.on('data', buffer => {
let parsedBuffer;
// concat 'past' bytes with the 'current' bytes
bufferToParse = Buffer.concat([bufferToParse, buffer]);
do {
parsedBuffer = getParsedBuffer(bufferToParse);
// the output of the debugBuffer calls will be on the screenshot later
debugBuffer('buffer', buffer);
debugBuffer('bufferToParse', bufferToParse);
debugBuffer('parsedBuffer.payload', parsedBuffer.payload);
debugBuffer('parsedBuffer.bufferRemainingBytes', parsedBuffer.bufferRemainingBytes);
bufferToParse = parsedBuffer.bufferRemainingBytes;
if (parsedBuffer.payload) {
// .........
// handle the payload as you like, for example send to other sockets
}
} while (parsedBuffer.payload && parsedBuffer.bufferRemainingBytes.length);
console.log('----------------------------------------------------------------\n');
});
// .........
});
Here is how my getParsedBuffer function looks like(it was called parseMessage in the article):
const getParsedBuffer = buffer => {
// .........
// whenever I want to read X bytes I simply check if I really can read X bytes
if (currentOffset + 2 > buffer.length) {
return { payload: null, bufferRemainingBytes: buffer };
}
payloadLength = buffer.readUInt16BE(currentOffset);
currentOffset += 2;
// .........
// in 99% of cases this will prevent the ERR_OUT_OF_RANGE error to happen
if (currentOffset + payloadLength > buffer.length) {
console.log('[misalignment between WebSocket frame and NodeJs Buffer]\n');
return { payload: null, bufferRemainingBytes: buffer };
}
payload = Buffer.alloc(payloadLength);
if (isMasked) {
// ......... I skip masked code as it's too long and not masked shows the idea same way
} else {
for (let i = 0; i < payloadLength; i++) {
payload.writeUInt8(buffer.readUInt8(currentOffset++), i);
}
}
// it could also happen at this point that we already have a valid WebSocket payload
// but there are still some bytes remaining in the buffer
// we need to copy all unused bytes and return them as bufferRemainingBytes
bufferRemainingBytes = Buffer.alloc(buffer.length - currentOffset);
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ this value could be >= 0
for (let i = 0; i < bufferRemainingBytes.length; i++) {
bufferRemainingBytes.writeUInt8(buffer.readUInt8(currentOffset++), i);
}
return { payload, bufferRemainingBytes };
}
Real life test of the described solution (64 KiB - 1 bytes):
In short - the above solution should work fine with payloads up to [64 KiB - 1] bytes. It's written entirely in pure NodeJs without any external library. I guess that is what you were looking for in your project ;)
Please find below the links to full version of my Binary Broadcast App on GitHub gist:
server https://gist.github.com/robertrypula/b813ffe23a9489bae1b677f1608676c8
client https://gist.github.com/robertrypula/f8da8f89819068a97bef4f27d04ad5b7
For some time (before I deploy the updated app with more features) the live demo of above's gist can be found here:
http://sndu.pl - let's send you the file
this is not perfect answer but an approach. this is how i would do what you are trying to do. i m writing pseudo code just to save time ;)
first i will be creating a custom object to communicate :
class Request {
id?: string; // unique id of the request, same request id can be used to continue a requst or to reply to a request
api?: string; // the request type i.e. what kind of request it is or how do you want this data to be used like a client can perform multiple operations on server like API_AUTH or API_CREATE_FILE etc.
complete?: boolean; // this is a flag if the request is complete or it needs to be added to the queue to wait for more data
error?: boolean; // this flag ll be helpful in request replies when the server has processed the request for an api and wants to respond with error or success
message?: string; // just sample message that can be shown or helpful raw note for the developer to debug
data?: any; // this is the actual data being sent
}
now for communicating between both the sides(i m taking server client approach in this example) we ll use this object.
now here is some pseudo code about how to process on server
class Server {
requestQueue: Map<string, Request> = new Map();
onRequestReceived(request: Request) {
if(request !== undefined){
switch(request.api){
case "API_LONG_DATA":
if(this.requestQueue.get(request.id) !== undefined){
if(request.complete){
// add this data to the requests in the querue, process the request and remove it from the queue
}else{
// add data to the request in the queue and resave it to the map
}
}else{
if(request.complete){
// process your request here
}else{
// add this request to queue
}
}
break;
case "API_AUTH":
// just a sample api
break;
}
}else{
// respond with error
}
}
}
this is easier than playing with buffers i believe and even i have used this approach a lot many times and sending large chunk of data is not a good practice because it can be used by someone to exploit your resources and it might fail in low networks.
so hope you get some hints from my approach ;)
UPDATE[full implementation]
first we need websoket package so
npm install websocket
now this is how we create websocket server in node.js using websocket package and process incoming requests
server.ts
import { WebSocketServer } from 'websocket';
import * as http from 'http';
// this is the request data object which ll serve as a common data entity that both server and client are aware of
class Request {
id?: string; // unique id of the request, same request id can be used to continue a requst or to reply to a request
api?: string; // the request type i.e. what kind of request it is or how do you want this data to be used like a client can perform multiple operations on server like API_AUTH or API_CREATE_FILE etc.
complete?: boolean; // this is a flag if the request is complete or it needs to be added to the queue to wait for more data
error?: boolean; // this flag ll be helpful in request replies when the server has processed the request for an api and wants to respond with error or success
message?: string; // just sample message that can be shown or helpful raw note for the developer to debug
data?: any; // this is the actual data being sent
}
// this is optional if you want to show 404 on the page
const server = http.createServer((request, response) => {
response.writeHead(404);
response.end();
});
server.listen(8080, function() {
console.log((new Date()) + ' Server is listening on port 8080');
});
const wsServer = new WebSocketServer({
httpServer: server,
autoAcceptConnections: false
});
function originIsAllowed(origin) {
// put logic here to detect whether the specified origin is allowed.
return true;
}
wsServer.on('request', (request) => {
if (originIsAllowed(request.origin)) {
const connection = request.accept('echo-protocol', request.origin);
// this is the request queue is there are any heavy request which cant fit into one request
const requestQueue: Map<string, Request> = new Map();
connection.on('message', (message) => {
// i consider that the data being sent to server is utf8 string
if (message.type === 'utf8') {
// here we construct the request object from incoming data
const request: Request = JSON.parse(message.utf8Data);
// here we process the request
switch(request.api){
case "API_LONG_DATA":
if(requestQueue.get(request.id) !== undefined){
if(request.complete){
// add this data to the requests in the querue, process the request and remove it from the queue
}else{
// add data to the request in the queue and resave it to the map
}
}else{
if(request.complete){
// process your request here
}else{
// add this request to queue
}
}
break;
case "API_AUTH":
// just a sample api
break;
}
}else{
// handle other data types
}
});
connection.on('close', (reasonCode, description) => {
// a connection as closed do cleanup here
});
}else{
// Make sure we only accept requests from an allowed origin
request.reject();
}
});
here is the way you send data from client
client.ts
import { WebSocketClient } from 'websocket';
// this is the request data object which ll serve as a common data entity that both server and client are aware of
class Request {
id?: string; // unique id of the request, same request id can be used to continue a requst or to reply to a request
api?: string; // the request type i.e. what kind of request it is or how do you want this data to be used like a client can perform multiple operations on server like API_AUTH or API_CREATE_FILE etc.
complete?: boolean; // this is a flag if the request is complete or it needs to be added to the queue to wait for more data
error?: boolean; // this flag ll be helpful in request replies when the server has processed the request for an api and wants to respond with error or success
message?: string; // just sample message that can be shown or helpful raw note for the developer to debug
data?: any; // this is the actual data being sent
}
const client = new WebSocketClient();
client.on('connectFailed', (error) => {
// handle error when connection failed
});
client.on('connect', (connection) => {
connection.on('error', (error)=> {
// handle when some error occurs in existing connection
});
connection.on('close', () => {
// connection closed
});
connection.on('message', function(message) {
// i m condsidering we are using utf8 data to communicate
if (message.type === 'utf8') {
// here we parse request object
const request: Request = JSON.parse(message.utf8Data);
// here you can handle the request object
}else{
// handle other data types
}
});
// here you start communicating with the server
// example 1. normal requst
const authRequest: Request = {
id: "auth_request_id",
api: "API_AUTH",
complete: true,
data: {
user: "testUser",
pass: "testUserPass"
}
}
connection.sendUTF(JSON.stringify(authRequest));
// example 2. long data request
const longRequestChunk1: Request = {
id: "long_chunck_request_id",
api: "API_LONG_CHUNCK",
complete: false, // observer this flag. as this is the first part of the chunk so this needs to be added to the queue on server
data: "..." // path one of long data
}
const longRequestChunk2: Request = {
id: "long_chunck_request_id", // request id must be the same
api: "API_LONG_CHUNCK", // same api
complete: true, // as this is the last part of the chunk so this flag is true
data: "..." // path one of long data
}
connection.sendUTF(JSON.stringify(longRequestChunk1));
connection.sendUTF(JSON.stringify(longRequestChunk2));
});
client.connect('ws://localhost:8080/', 'echo-protocol');
i can explain it furthure if you want ;)

NodeJs: Never emits "end" when reading a TCP Socket

I am pretty new to Node.Js and I'm using tcp sockets to communicate with a client. Since the received data is fragmented I noticed that it prints "ondata" to the console more than once. I need to be able to read all the data and concatenate it in order to implement the other functions. I read the following http://blog.nodejs.org/2012/12/20/streams2/ and thought I can use socket.on('end',...) for this purpose. But it never prints "end" to the console.
Here is my code:
Client.prototype.send = function send(req, cb) {
var self = this;
var buffer = protocol.encodeRequest(req);
var header = new Buffer(16);
var packet = Buffer.concat([ header, buffer ], 16 + buffer.length);
function cleanup() {
self.socket.removeListener('data', ondata);
self.socket.removeListener('error', onerror);
}
var body = '';
function ondata() {
var chunk = this.read() || '';
body += chunk;
console.log('ondata');
}
self.socket.on('readable', ondata);
self.socket.on('end', function() {
console.log('end');
});
function onerror(err) {
cleanup();
cb(err);
}
self.socket.on('error', onerror);
self.socket.write(packet);
};
The end event will handle the FIN package of the TCP protocol (in other words: will handle the close package)
Event: 'end'#
Emitted when the other end of the socket sends a FIN packet.
By default (allowHalfOpen == false) the socket will destroy its file descriptor once it has written out its pending write queue. However, by setting allowHalfOpen == true the socket will not automatically end() its side allowing the user to write arbitrary amounts of data, with the caveat that the user is required to end() their side now.
About FIN package: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#Connection_termination
The solution
I understand your problem, the network communication have some data transfer gaps and it split your message in some packages. You just want read your fully content.
For solve this problem i will recommend you create a protocol. Just send a number with the size of your message before and while the size of your concatenated message was less than total of your message size, keep concatenating :)
I have created a lib yesterday to simplify that issue: https://www.npmjs.com/package/node-easysocket
I hope it helps :)

NodeJS: How to write a file parser using readStream?

I have a file in a binary format:
The format is as follows:
[4 - header bytes] [8 bytes - int64 - how many bytes to read following] [variable num of bytes (size of the int64) - read the actual information]
And then it repeats, so I must first read the first 12 bytes to determine how many more bytes I need to read.
I have tried:
var readStream = fs.createReadStream('/path/to/file.bin');
readStream.on('data', function(chunk) { ... })
The problem I have is that chunk always comes back in chunks of 65536 bytes at a time whereas I need to be more specific on the number of bytes that I am reading.
I have always tried readStream.on('readable', function() { readStream.read(4) })
But it is also not very flexible, because it seems to turn asynchronous code into synchronous code because, I have to put the 'reading' in a while loop
Or maybe readStream is not appropriate in this case and I should use this instead? fs.read(fd, buffer, offset, length, position, callback)
Here's what I'd recommend as an abstract handler of a readStream to process abstract data like you're describing:
var pending = new Buffer(9999999);
var cursor = 0;
stream.on('data', function(d) {
d.copy(pending, cursor);
cursor += d.length;
var test = attemptToParse(pending.slice(0, cursor));
while (test !== false) {
// test is a valid blob of data
processTheThing(test);
var rawSize = test.raw.length; // How many bytes of data did the blob actually take up?
pending.copy(pending.copy, 0, rawSize, cursor); // Copy the data after the valid blob to the beginning of the pending buffer
cursor -= rawSize;
test = attemptToParse(pending.slice(0, cursor)); // Is there more than one valid blob of data in this chunk? Keep processing if so
}
});
For your use-case, ensure the initialized size of the pending Buffer is large enough to hold the largest possible valid blob of data you'll be parsing (you mention an int64; that max size plus the header size) plus one extra 65536 bytes in case the blob boundary happens just on the edge of a stream chunk.
My method requires a attemptToParse() method that takes a buffer and tries to parse the data out of it. It should return false if the length of the buffer is too short (data hasn't come in enough yet). If it is a valid object, it should return some parsed object that has a way to show the raw bytes it took up (.raw property in my example). Then you do any processing you need to do with the blob (processTheThing()), trim out that valid blob of data, shift the pending Buffer to just be the remainder and keep going. That way, you don't have a constantly growing pending buffer, or some array of "finished" blobs. Maybe process on the receiving end of processTheThing() is keeping an array of the blobs in memory, maybe it's writing them to a database, but in this example, that's abstracted away so this code just deals with how to handle the stream data.
Add the chunk to a Buffer, and then parse the data from there. Being aware not to go beyond the end of the buffer (if your data is large). I'm using my tablet right now so can't add any example source code. Maybe somebody else can?
Ok, mini source, very skeletal.
var chunks = [];
var bytesRead= 0;
stream.on('data', function(chunk) {
chunks.push(chunk);
bytesRead += chunk.length;
// look at bytesRead...
var buffer = Buffer.concat(chunks);
chunks = [buffer]; // trick for next event
// --> or, if memory is an issue, remove completed data from the beginning of chunks
// work with the buffer here...
}

How to calculate node.js socket buffer to avoid allocating memory and never using it?

I'm using node.js as a server between pairs of clients, to handle my online game.
Clients send short messages between hem [one message should not exceed 200bytes].
Currently I expect single client to send [on average] 1 message per second [keeping in mind it can be 5 seconds of nothing and 5 messages one after another].
I've downloaded a sample server using 'net' module and rewritten it to handle the messages the way I need them to be handled.
Basically, for every connected socket, it creates a Buffer with size of 1024*8.
Currently I'm testing my game with some bots, which simply connect, wait 3 seconds and disconnect. They only send 1 message. Nothing else happening.
function sendMessage(socket, message) {
socket.write(message);
}
server.on('connection', function(socket) {
socket.setNoDelay(true);
socket.connection_id = require('crypto').createHash('sha1').update( 'krystian' + Date.now() + Math.random() ).digest('hex') ; // unique sha1 hash generation
socket.channel = '';
socket.matchInProgress = false
socket.resultAnnounced = false;
socket.buffer = new Buffer(cfg.buffer_size);
socket.buffer.len = 0; // due to Buffer's nature we have to keep track of buffer contents ourself
_log('New client: ' + socket.remoteAddress +':'+ socket.remotePort);
socket.on('data', function(data_raw) { // data_raw is an instance of Buffer as well
if (data_raw.length > (cfg.buffer_size - socket.buffer.len)) {
_log("Message doesn't fit the buffer. Adjust the buffer size in configuration");
socket.buffer.len = 0; // trimming buffer
return false;
}
socket.buffer.len += data_raw.copy(socket.buffer, socket.buffer.len); // keeping track of how much data we have in buffer
var str, start, end
, conn_id = socket.connection_id;
str = socket.buffer.slice(0,socket.buffer.len).toString();
if ( (start = str.indexOf("<somthing>")) != -1 && (end = str.indexOf("</something>")) != -1) {
try {
if (!<some check to see if the message format is right>) {
sendMessage(socket, "<error message to the client>");
return;
}
<storing info on the socket>
} catch(err) {
sendMessage(socket, "<error message to the client>");
return;
}
socket.channel = <channel>;
str = str.substr(end + 11);
socket.buffer.len = socket.buffer.write(str, 0);
sockets[socket.channel] = sockets[socket.channel] || {}; // hashmap of sockets subscribed to the same channel
sockets[socket.channel][conn_id] = socket;
waiting[socket.channel] = waiting[socket.channel] || {};
waiting[socket.channel][conn_id] = socket;
sendMessage(socket, "<info message to the client>");
for (var prop in waiting[socket.channel]) {
if (waiting[socket.channel].hasOwnProperty(prop) && waiting[socket.channel][prop].connection_id != socket.connection_id) {
<here I'll try to advertise this client among other clients>
sendMessage(waiting[socket.channel][prop], "<info to other clients about new client>");
}
}
}
var time_to_exit = true;
do{ // this is for a case when several messages arrived in buffer
if ( (start = str.indexOf("<some other format>")) != -1 && (end = str.indexOf("</some other format>")) != -1 ) {
var json = str.substr( start+19, end-(start+19) );
var jsono;
try {
jsono = JSON.parse(json);
} catch(err) {
sendMessage(socket, "<parse error>");
return;
}
if (<message indicates two clients are going to play together>) {
if (waiting[socket.channel][jsono.other_client_id] && waiting[socket.channel][socket.connection_id]) {
delete waiting[socket.channel][jsono.other_client_id];
delete waiting[socket.channel][socket.connection_id];
var opponentSocket = sockets[socket.channel][jsono.other_client_id];
sendMessage(opponentSocket, "<start game with the other socket>");
opponentSocket.opponentConnectionId = socket.connection_id;
sendMessage(socket, "<start game with the other socket>");
socket.opponentConnectionId = jsono.other_client_id;
}
} else if (<check if clients play together>) {
var opponentSocket = sockets[socket.channel][socket.opponentConnectionId];
if (<some generic action between clients, just pass the message>) {
sendMessage(sockets[socket.channel][socket.opponentConnectionId], json);
} else if (<match is over>) {
if (<match still in progress>) {
<send some messages indicating who won, who lost>
} else {
<log an error>
}
delete sockets[socket.channel][opponentSocket.connection_id];
delete sockets[socket.channel][socket.connection_id];
}
}
str = str.substr(end + 20); // cut the message and remove the precedant part of the buffer since it can't be processed
socket.buffer.len = socket.buffer.write(str, 0);
time_to_exit = false;
} else { time_to_exit = true; } // if no json data found in buffer - then it is time to exit this loop
} while ( !time_to_exit );
}); // end of socket.on 'data'
socket.on('close', function(){ // we need to cut out closed socket from array of client socket connections
if (!socket.channel || !sockets[socket.channel]) return;
if (waiting[socket.channel] && waiting[socket.channel][socket.connection_id]) {
delete waiting[socket.channel][socket.connection_id];
}
var opponentSocket = sockets[socket.channel][socket.opponentConnectionId];
if (opponentSocket) {
sendMessage(opponentSocket, "<the other client has disconnected>");
delete sockets[socket.channel][socket.opponentConnectionId];
}
delete sockets[socket.channel][socket.connection_id];
_log(socket.connection_id + " has been disconnected from channel " + socket.channel);
}); // end of socket.on 'close'
}); // end of server.on 'connection'
server.on('listening', function(){ console.log('Listening on ' + server.address().address +':'+ server.address().port); });
server.listen(cfg.port);
I've pasted the above code [very stripped version of the original] to give you and idea about how simple the server is.
I've got an array of sockets, who joined the game and array of sockets on the waiting list, waiting for another client to play with.
Nothing else is going on.
Still the script is memory hungry - 5 hours of connecting and disconnecting gave me this:
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
31461 ec2-user 20 0 995m 91m 7188 S 0.7 15.4 1:29.07 node
I think this is way too much.
I'm using nodetime.com free service at the moment to monitor the script, but none of the metrics would suggest the script gained so much memory (it starts with just 10-12MB).
I believe this is due to the buffers, and because they allocate too much memory.
I'm only wondering, if my assumptions regarding buffer size are correct.
Should I adjust the buffer to reflect the amount of data I expect from the client?
If I expect the client to send 5 messages with a very short time between them, 200 bytes max each, should I assume that 1024*3 would be enough?
Or should I adjust buffer size according to the message size I expect, so if I'm sure the message will never go above 300 bytes, I should be fine with buffer size of 512?
Thanks,
Krystian
EDIT:
Node version:
$ node -v
v0.10.5
$ npm -v
1.2.19
EDIT2:
I've tested the script with 400 connections connecting and disconnecting and memory usage dropped significantly to around 60MB. After changing the test setup back to 4 connections it went up again.
The kernel has a socket receive buffer which is at least 8k., which takes care of multiple incoming messages on the socket. You don't need to buffer messages you've already read, so your application buffer doesn't need to be any bigger than the largest expected message.

Is http.ServerResponse.write() blocking?

Is it possible to write non-blocking response.write? I've written a simple test to see if other clients can connect while one downloads a file:
var connect = require('connect');
var longString = 'a';
for (var i = 0; i < 29; i++) { // 512 MiB
longString += longString;
}
console.log(longString.length)
function download(request, response) {
response.setHeader("Content-Length", longString.length);
response.setHeader("Content-Type", "application/force-download");
response.setHeader("Content-Disposition", 'attachment; filename="file"');
response.write(longString);
response.end();
}
var app = connect().use(download);
connect.createServer(app).listen(80);
And it seems like write is blocking!
Am I doing something wrong?
Update So, it doesn't block and it blocks in the same time. It doesn't block in the sense that two files can be downloaded simultaneously. And it blocks in the sense that creating a buffer is a long operation.
Any processing done strictly in JavaScript will block. response.write(), at least as of v0.8, is no exception to this:
The first time response.write() is called, it will send the buffered header information and the first body to the client. The second time response.write() is called, Node assumes you're going to be streaming data, and sends that separately. That is, the response is buffered up to the first chunk of body.
Returns true if the entire data was flushed successfully to the kernel buffer. Returns false if all or part of the data was queued in user memory. 'drain' will be emitted when the buffer is again free.
What may save some time is to convert longString to Buffer before attempting to write() it, since the conversion will occur anyways:
var longString = 'a';
for (...) { ... }
longString = new Buffer(longString);
But, it would probably be better to stream the various chunks of longString rather than all-at-once (Note: Streams are changing in v0.10):
var longString = 'a',
chunkCount = Math.pow(2, 29),
bufferSize = Buffer.byteLength(longString),
longBuffer = new Buffer(longString);
function download(request, response) {
var current = 0;
response.setHeader("Content-Length", bufferSize * chunkCount);
response.setHeader("Content-Type", "application/force-download");
response.setHeader("Content-Disposition", 'attachment; filename="file"');
function writeChunk() {
if (current < chunkCount) {
current++;
if (response.write(longBuffer)) {
process.nextTick(writeChunk);
} else {
response.once('drain', writeChunk);
}
} else {
response.end();
}
}
writeChunk();
}
And, if the eventual goal is to stream a file from disk, this can be even easier with fs.createReadStream() and stream.pipe():
function download(request, response) {
// response.setHeader(...)
// ...
fs.createReadStream('./file-on-disk').pipe(response);
}
Nope, it does not block, I tried one from IE and other from firefox. I did IE first but still could download file from firefox first.
I tried for 1 MB (i < 20) it works the same just faster.
You should know that whatever longString you create requires memory allocation. Try to do it for i < 30 (on windows 7) and it will throw FATAL ERROR: JS Allocation failed - process out of memory.
It takes time for memory allocation/copying nothing else. Since it is a huge file, the response is time taking and your download looks like blocking. Try it yourself for smaller values (i < 20 or something)

Resources