Should `drain` event be registered before the result of the write is available - node.js

I have this snippet of code:
const file = fs.createWriteStream('./test.txt');
let written = true;
// handler is added before even an attempt to write is made
file.on('drain', function () {
written = true;
console.log('drained');
});
const interval = setInterval(function () {
if (Date.now() - time > 10000) {
clearInterval(interval);
}
if (written) {
written = file.write(new Array(1000000).join('z'));
}
}, 100);
I'm wondering if that a standard practice to add handler even an attempt to write is made?

In case of using file.on('drain') listener you set up general listener to drain event of your stream.
Notice: This listener will be removed after closing of writable stream.
Generally that code will work proper, but most common practice in Node.js is to use stream.once('drain') handler for each case of internal buffer exceeding. That approach is covered in Node.js documentation for Event: 'drain':
function writeOneMillionTimes(writer, data, encoding, callback) {
var i = 1000000;
write();
function write() {
var ok = true;
do {
i -= 1;
if (i === 0) {
// last time!
writer.write(data, encoding, callback);
} else {
// see if we should continue, or wait
// don't pass the callback, because we're not done yet.
ok = writer.write(data, encoding);
}
} while (i > 0 && ok);
if (i > 0) {
// had to stop early!
// write some more once it drains
writer.once('drain', write);
}
}
}

Related

NodeJS sleep with promise takes too long

I'm trying to improve performance of processing my worker's incoming messages using a queue.
However, the sleep function takes anywhere between 16 to 30 milliseconds to complete instead of the 1 millisecond. Is there a better way to process the queue without this large delay, and without keeping my app at 100% cpu?
I am doing something like this:
var actions = new Queue();
parentPort.on('message', (msg) => actions.enqueue(msg));
loopy();
async function loopy() {
while (true) {
if (actions.size() > 0) {
let action = actions.dequeue();
//do work
continue;
}
await sleep(1);
}
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
Any help would be appreciated. Thanks!
while(true) is (usually) not a good idea.
You should call the dequeue function after you enqueue a message. The dequeue function should end when 1. there is already a dequeue function running 2. no more message in the queue.
var isProcessing = false;
var actions = new Queue();
parentPort.on('message', (msg) => {
actions.enqueue(msg)
tryDequeue();
});
async function tryDequeue() {
if(isProcessing || actions.size() == 0)
{
return;
}
isProcessing = true;
let action = actions.dequeue();
//do work
isProcessing = false;
tryDequeue();
}

node.js: create a connected writable and readable stream pair

I am trying to create a function that returns a connected writable and readable stream pair. eg:
const { writable, readable } = createStreamPair();
where each end has the right interface (writable instanceof stream.Readable === false and readable instanceof stream.Writable === false) unlike the PassThrough stream.
use case:
createWriteStream(filePath) {
const { writable, readable } = createStreamPair();
writeFile(filePath, readable);
return writable;
}
How to create my createStreamPair() function ?
Edit1
A naive approach that obviously does not work ...
function createStreamPair() {
var readable = new stream.Readable();
var writable = new stream.Writable();
readable.pipe(writable);
return { writable, readable }
}
The Node.js tests uses a function that creates two Duplex streams, writes to one can be read from the other, and vice-versa: https://github.com/nodejs/node/blob/master/test/common/duplexpair.js
It isn't part of the Node.js standard library, but you can write your own.
I'll present a slightly modified, annotated version here:
const Duplex = require('stream').Duplex;
const assert = require('assert');
// Define some unique property names.
// The actual value doesn't matter,
// so long as they're not used by Node.js for anything else.
const kCallback = Symbol('Callback');
const kOtherSide = Symbol('Other');
// Define a function `DuplexSocket` whose prototype inherits from `Duplex`
class DuplexSocket extends Duplex {
constructor() {
// Let Node.js initialize everything it needs to
super();
// Define two values we will be using
// kCallback saves a temporary reference to a function while
this[kCallback] = null;
// kOtherSide will be the reference to the other side of the stream
this[kOtherSide] = null;
}
_read() {
// This is called when this side receives a push() call
// If the other side set a callback for us to call,
// then first clear that reference
// (it might be immediately set to a new value again),
// then call the function.
const callback = this[kCallback];
if (callback) {
this[kCallback] = null;
callback();
}
}
_write(chunk, encoding, callback) {
// This is called when someone writes to the stream
// Ensure there's a reference to the other side before trying to call it
assert.notStrictEqual(this[kOtherSide], null);
// Ensure that the other-side callback is empty before setting it
// If push immediately calls _read, this should never be a problem
assert.strictEqual(this[kOtherSide][kCallback], null);
if (chunk.length === 0) {
// callback is not called for zero-length chunks
process.nextTick(callback);
} else {
// Let Node.js know when _read has been called
this[kOtherSide][kCallback] = callback;
// And finally, send the other side the data to be read
this[kOtherSide].push(chunk);
}
}
_final(callback) {
// Ask the other side to let us know it received our EOF request
this[kOtherSide].on('end', callback);
// And finally, pushing null signals the end of the stream
this[kOtherSide].push(null);
}
}
function makeDuplexPair() {
// Create two pairs of
const clientSide = new DuplexSocket();
const serverSide = new DuplexSocket();
// Set the other-side reference
clientSide[kOtherSide] = serverSide;
serverSide[kOtherSide] = clientSide;
// Both instances behave the same, so choice of name doesn't matter,
// So long as they're distinguishable.
return { clientSide, serverSide };
}
module.exports = makeDuplexPair;
Here's another way of creating two streams, one Readable and one Writable in this case:
function makeAsymmetricalStreamPair() {
var readableCallback;
const readableSide = new ReadableStream;
readableSide._read = function _read(){
if(!readableCallback) return;
var callback = readableCallback;
readableCallback = null;
callback();
}
const writableSide = new WritableStream;
writableSide._write = function _write(chunk, enc, callback){
if (readableCallback) throw new Error;
if (chunk.length === 0) {
process.nextTick(callback);
} else {
readableCallback = callback;
readableSide.push(chunk);
}
}
writableSide._final = function _final(callback){
readableSide.on('end', callback);
readableSide.push(null);
}
return { readableSide, writableSide };
}
As of today you can use stream.PassTrough

Why Readable.push() return false every time Readable._read() is called

I have the following readable stream in typescript:
import {Readable} from "stream";
enum InputState {
NOT_READABLE,
READABLE,
ENDED
}
export class Aggregator extends Readable {
private inputs: Array<NodeJS.ReadableStream>;
private states: Array<InputState>;
private records: Array<any>;
constructor(options, inputs: Array<NodeJS.ReadableStream>) {
// force object mode
options.objectMode = true;
super(options);
this.inputs = inputs;
// set initial state
this.states = this.inputs.map(() => InputState.NOT_READABLE);
this.records = this.inputs.map(() => null);
// register event handlers for input streams
this.inputs.forEach((input, i) => {
input.on("readable", () => {
console.log("input", i, "readable event fired");
this.states[i] = InputState.READABLE;
if (this._readable) { this.emit("_readable"); }
});
input.on("end", () => {
console.log("input", i, "end event fired");
this.states[i] = InputState.ENDED;
// if (this._end) { this.push(null); return; }
if (this._readable) { this.emit("_readable"); }
});
});
}
get _readable () {
return this.states.every(
state => state === InputState.READABLE ||
state === InputState.ENDED);
}
get _end () {
return this.states.every(state => state === InputState.ENDED);
}
_aggregate () {
console.log("calling _aggregate");
let timestamp = Infinity,
indexes = [];
console.log("initial record state", JSON.stringify(this.records));
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
return;
}
}
// update timestamp if a better one is found
if (this.records[i] && timestamp > this.records[i].t) {
timestamp = this.records[i].t;
// clean the indexes array
indexes.length = 0;
}
// include the record index if has the required timestamp
if (this.records[i] && this.records[i].t === timestamp) {
indexes.push(i);
}
});
console.log("final record state", JSON.stringify(this.records), indexes, timestamp);
// end prematurely if after trying to read inputs the aggregator is
// not ready
if (!this._readable) {
console.log("end prematurely trying to read inputs", this.states);
this.push(null);
return;
}
// end prematurely if all inputs are ended and there is no remaining
// record values
if (this._end && indexes.length === 0) {
console.log("end on empty indexes", this.states);
this.push(null);
return;
}
// create the aggregated record
let record = {
t: timestamp,
v: this.records.map(
(r, i) => indexes.indexOf(i) !== -1 ? r.v : null
)
};
console.log("aggregated record", JSON.stringify(record));
if (this.push(record)) {
console.log("record pushed downstream");
// remove records already aggregated and pushed
indexes.forEach(i => { this.records[i] = null; });
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
}
}
});
} else {
console.log("record failed to push downstream");
}
}
_read () {
console.log("calling _read", this._readable);
if (this._readable) { this._aggregate(); }
else {
this.once("_readable", this._aggregate.bind(this));
}
}
}
It is designed to aggregate multiple input streams in object mode. In the end it aggregate multiple time series data streams into a single one. The problem i'm facing is that when i test the feature i'm seeing repeatedly the message record failed to push downstream and immediately the message calling _read true and in between just the 3 messages related to the aggregation algorithm. So the Readable stream machinery is calling _read and every time it's failing the push() call. Any idea why is this happening? Did you know of a library that implement this kind of algorithm or a better way to implement this feature?
I will answer myself the question.
The problem was that i was misunderstanding the meaning of the this.push() return value call. I think a false return value mean that the current push operation fail but the real meaning is that the next push operation will fail.
A simple fix to the code shown above is to replace this:
if (this.push(record)) {
console.log("record pushed downstream");
// remove records already aggregated and pushed
indexes.forEach(i => { this.records[i] = null; });
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
}
}
});
} else {
console.log("record failed to push downstream");
}
By this:
this.push(record);
console.log("record pushed downstream");
// remove records already aggregated and pushed
indexes.forEach(i => { this.records[i] = null; });
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
}
}
});
You can notice that the only difference is avoid conditioning operations on the return value of the this.push() call. Given that the current implementation call this.push() only once per _read() call this simple change solve the issue.
It means feeding is faster than consuming. The official approach is enlarge its highWaterMark, Default: 16384 (16KB), or 16 for objectMode. As long as its inner buffer is big enough, the push function will always return true. It does not have to be single push() in single _read(). You may push as much as the highWaterMark indicates in a single _read().

How do you implement a stream that properly handles backpressure in node.js?

I can't for the life of me figure out how to implement a stream that properly handles backpressure. Should you never use pause and resume?
I have this implementation I'm trying to get to work correctly:
var StreamPeeker = exports.StreamPeeker = function(myStream, callback) {
stream.Readable.call(this, {highWaterMark: highWaterMark})
this.stream = myStream
myStream.on('readable', function() {
var data = myStream.read(5000)
//process.stdout.write("Eff: "+data)
if(data !== null) {
if(!this.push(data)) {
process.stdout.write("Pause")
this.pause()
}
callback(data)
}
}.bind(this))
myStream.on('end', function() {
this.push(null)
}.bind(this))
}
util.inherits(StreamPeeker, stream.Readable)
StreamPeeker.prototype._read = function() {
process.stdout.write("resume")
//this.resume() // putting this in for some reason causes the stream to not output???
}
It correctly sends output, but doesn't correctly produce backpressure. How can I change it to properly support backpressure?
Ok I finally figured it out after lots of trial and error. A couple guidelines:
Never ever use pause or resume (otherwise it'll go into legacy "flowing" mode)
Never add a "data" event listener (otherwise it'll go into legacy "flowing" mode)
Its the implementor's responsibility to keep track of when the source is readable
Its the implementor's responsibility to keep track of when the destination wants more data
The implementation should not read any data until the _read method is called
The argument to read tells the source to give it that many bytes, it probably best to pass the argument passed to this._read into the source's read method. This way you should be able to configure how much to read at a time at the destination, and the rest of the stream chain should be automatic.
So this is what I changed it to:
Update: I created a Readable that is much easier to implement with proper back-pressure, and should have just as much flexibility as node's native streams.
var Readable = stream.Readable
var util = require('util')
// an easier Readable stream interface to implement
// requires that subclasses:
// implement a _readSource function that
// * gets the same parameter as Readable._read (size)
// * should return either data to write, or null if the source doesn't have more data yet
// call 'sourceHasData(hasData)' when the source starts or stops having data available
// calls 'end()' when the source is out of data (forever)
var Stream666 = {}
Stream666.Readable = function() {
stream.Readable.apply(this, arguments)
if(this._readSource === undefined) {
throw new Error("You must define a _readSource function for an object implementing Stream666")
}
this._sourceHasData = false
this._destinationWantsData = false
this._size = undefined // can be set by _read
}
util.inherits(Stream666.Readable, stream.Readable)
Stream666.Readable.prototype._read = function(size) {
this._destinationWantsData = true
if(this._sourceHasData) {
pushSourceData(this, size)
} else {
this._size = size
}
}
Stream666.Readable.prototype.sourceHasData = function(_sourceHasData) {
this._sourceHasData = _sourceHasData
if(_sourceHasData && this._destinationWantsData) {
pushSourceData(this, this._size)
}
}
Stream666.Readable.prototype.end = function() {
this.push(null)
}
function pushSourceData(stream666Readable, size) {
var data = stream666Readable._readSource(size)
if(data !== null) {
if(!stream666Readable.push(data)) {
stream666Readable._destinationWantsData = false
}
} else {
stream666Readable._sourceHasData = false
}
}
// creates a stream that can view all the data in a stream and passes the data through
// correctly supports backpressure
// parameters:
// stream - the stream to peek at
// callback - called when there's data sent from the passed stream
var StreamPeeker = function(myStream, callback) {
Stream666.Readable.call(this)
this.stream = myStream
this.callback = callback
myStream.on('readable', function() {
this.sourceHasData(true)
}.bind(this))
myStream.on('end', function() {
this.end()
}.bind(this))
}
util.inherits(StreamPeeker, Stream666.Readable)
StreamPeeker.prototype._readSource = function(size) {
var data = this.stream.read(size)
if(data !== null) {
this.callback(data)
return data
} else {
this.sourceHasData(false)
return null
}
}
Old Answer:
// creates a stream that can view all the data in a stream and passes the data through
// correctly supports backpressure
// parameters:
// stream - the stream to peek at
// callback - called when there's data sent from the passed stream
var StreamPeeker = exports.StreamPeeker = function(myStream, callback) {
stream.Readable.call(this)
this.stream = myStream
this.callback = callback
this.reading = false
this.sourceIsReadable = false
myStream.on('readable', function() {
this.sourceIsReadable = true
this._readMoreData()
}.bind(this))
myStream.on('end', function() {
this.push(null)
}.bind(this))
}
util.inherits(StreamPeeker, stream.Readable)
StreamPeeker.prototype._read = function() {
this.reading = true
if(this.sourceIsReadable) {
this._readMoreData()
}
}
StreamPeeker.prototype._readMoreData = function() {
if(!this.reading) return;
var data = this.stream.read()
if(data !== null) {
if(!this.push(data)) {
this.reading = false
}
this.callback(data)
}
}

stack exceed on recursive function for framing tcp stream

I have a function that is recursive in framing tcp stream. But when I reach around 1000+ packets received per second I suddenly get Max call stack exceeded or something along those lines.
My code is:
var expectedRemaining = (this.expectedSz - this.receivedSz);
if (buff.length > expectedRemaining) {
var tmp = buff.slice(0, expectedRemaining);
buff = buff.slice(expectedRemaining);
recurse = true;
this.inPacket.push(tmp);
this.receivedSz = this.expectedSz;
} else {
this.inPacket.push(buff);
this.receivedSz += buff.length;
}
if (this.receivedSz === this.expectedSz) {
this.emit('data', Buffer.concat( this.inPacket, this.expectedSz));
this.reset();
}
if (recurse) this.handleData(buff);
Any suggestions?
Right now, your function looks something like this (psuedocode):
this.handleData = function handleData(buf) {
if ( someCondition ) {
// do stuff
recurse = true;
}
else {
// do other stuff
}
if (recurse) {
this.handleData(buf);
}
};
What I'm suggesting is that you implement the recursive behavior with a setImmediate call. This will allow the stack frame to be cleared and the data event to be emitted before entering your function again.
this.handleData = function handleData(buf) {
if ( someCondition ) {
// do stuff
recurse = true;
}
else {
// do other stuff
}
if (recurse) {
setImmediate(function() {
this.handleData(buf);
});
}
};
Just in case people don't read the comments, for the application described in the original question, nextTick ended up being a better fit. The main difference is that nextTick guarantees that the given function will execute before any queued events.

Resources