publishing observable to make it *hot* - node.js

I am trying to create a hot observable that does not need to be subscribed too. This is for a library and I would like to give users the ability to call certain methods and avoid the need to call subscribe() to get the observables to fire. Initially I have this:
const q = new Queue();
q.add('foo bar baz').subscribe(); // <<< need to call subscribe
Queue.prototype.add = Queue.prototype.enqueue = function (lines) {
lines = _.flattenDeep([lines]);
var lockAcquired = false;
return this.init()
.flatMap(() => {
return acquireLock(this)
})
.flatMap(() => {
lockAcquired = true;
return appendFile(this, lines)
})
.flatMap(() => releaseLock(this))
.catch(err => {
if (lockAcquired) {
return releaseLock(this);
}
else {
return makeGenericObservable();
}
})
};
in order to make the observable hot, I thought I could do something like this:
const q = new Queue();
q.add('foo bar baz'); // <<< don't call subscribe
Queue.prototype.add = Queue.prototype.enqueue = function (lines) {
lines = _.flattenDeep([lines]);
var lockAcquired = false;
return this.init()
.flatMap(() => {
return acquireLock(this)
})
.flatMap(() => {
lockAcquired = true;
return appendFile(this, lines)
})
.flatMap(() => releaseLock(this))
.catch(err => {
if (lockAcquired) {
return releaseLock(this);
}
else {
return makeGenericObservable();
}
})
.publish()
.share() // this too?
};
however the problem is when I call publish(), nothing happens, and the add method never seems to get fully invoked (I assume the first observable in the sequence never fires at all, because effectively subscribe is not called). But I though publish() would automatically invoke the observable chain?
How can I make the observable returned from the add method, hot?

You misunderstood. Hot observable does require subscription like a cold one. Difference is that hot uses some external producer (like dome element) and start listen to it on subscription. On the other hand cold observable creates producer internally on subscription.
It results in that you can miss some events with hot observable because external producer knows nothing about subscription and emits independently. And with cold observable you can't miss anything because producer is created on subscription.
Long story short you can build any chain of observable on top of either hot or cold observable but until you subscribe to it nothing will happen.
PS. No need to use publish with share because latter is alias for .publish().refCount().

If you don't want to use subscribe, you could manually .connect() your stream or subscribe it inside your method:
const q = new Queue();
q.add('foo bar baz');
Queue.prototype.add = Queue.prototype.enqueue = function (lines) {
lines = _.flattenDeep([lines]);
var lockAcquired = false;
let add$ = this.init()
.flatMap(() => {
return acquireLock(this)
})
.flatMap(() => {
lockAcquired = true;
return appendFile(this, lines)
})
.flatMap(() => releaseLock(this))
.catch(err => {
if (lockAcquired) {
return releaseLock(this);
}
else {
return makeGenericObservable();
}
})
.publish();
add$.connect();
return add$; // optional, depends if you even need the stream outside of the add-method
};
or as an alternative to use an internal subscribe:
const q = new Queue();
q.add('foo bar baz'); // <<< don't call subscribe
Queue.prototype.add = Queue.prototype.enqueue = function (lines) {
lines = _.flattenDeep([lines]);
var lockAcquired = false;
let add$ = this.init()
...
.share();
add$.subscribe();
return add$;
};

Related

NodeJS sleep with promise takes too long

I'm trying to improve performance of processing my worker's incoming messages using a queue.
However, the sleep function takes anywhere between 16 to 30 milliseconds to complete instead of the 1 millisecond. Is there a better way to process the queue without this large delay, and without keeping my app at 100% cpu?
I am doing something like this:
var actions = new Queue();
parentPort.on('message', (msg) => actions.enqueue(msg));
loopy();
async function loopy() {
while (true) {
if (actions.size() > 0) {
let action = actions.dequeue();
//do work
continue;
}
await sleep(1);
}
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
Any help would be appreciated. Thanks!
while(true) is (usually) not a good idea.
You should call the dequeue function after you enqueue a message. The dequeue function should end when 1. there is already a dequeue function running 2. no more message in the queue.
var isProcessing = false;
var actions = new Queue();
parentPort.on('message', (msg) => {
actions.enqueue(msg)
tryDequeue();
});
async function tryDequeue() {
if(isProcessing || actions.size() == 0)
{
return;
}
isProcessing = true;
let action = actions.dequeue();
//do work
isProcessing = false;
tryDequeue();
}

NodeJS: call func from inside another func in same file

I have NodeJS program.
In one class, I have various utility methods. One function, safeGithubPush, calls safeString, another func in the same class
module.exports = {
safeString(stringToCheck) {
console.log(validator.isAscii(stringToCheck), validator.matches(stringToCheck, /^((\w)*[-.]?(\w)*)*$/))
return (
validator.isAscii(stringToCheck) &&
validator.matches(stringToCheck, /^((\w)*[-.]?(\w)*)*$/)
);
},
safeGithubPush(currentJob) {
if (
!currentJob ||
!currentJob.payload ||
!currentJob.payload.repoName ||
!currentJob.payload.repoOwner ||
!currentJob.payload.branchName
) {
this.logIn(
currentJob,
`${' (sanitize)'.padEnd(15)}failed due to insufficient job definition`
);
throw invalidJobDef;
}
if (
this.safeString(currentJob.payload.repoName) &&
this.safeString(currentJob.payload.repoOwner) &&
this.safeString(currentJob.payload.branchName)
) {
return true;
}
throw invalidJobDef;
},
}
While this.logIn(), another func in the utility class, works just fine, I get the error for safeString:
Error caught by first catch: TypeError: this.safeString is not a function
I followed a solution offer by another SO post:
safeString: function(stringToCheck){
...
}
safeGithubPush(currentJob) {
...
if (
this.safeString(currentJob.payload.repoName) &&
this.safeString(currentJob.payload.repoOwner) &&
this.safeString(currentJob.payload.branchName)
) {
return true;
}
}
But this also gets a, TypeError: this.safeString is not a function.
I'm not using arrow functions, which is the explanation for this error on a different SO post
I don't think the reason is determinable with the code you are currently presenting. It likely has something to do with how you are calling safeGithubPush. If you do something that would change the this binding the this.safeString is going to fail.
const foo = {
fizz() {
console.log("fizz");
},
buzz() {
this.fizz();
}
};
// "this" is correct
foo.buzz();
// "this" has no fizz to call
const myFizz = foo.buzz;
myFizz();
Considering you are attaching these to module.exports I am going to guess that you pull these functions off in a require call and then try to use them bare which makes the problem obvious after looking at my example above:
// Ignore these 2 lines, they let this look like node
const module = {};
const require = () => module.exports;
// Ignore above 2 lines, they let this look like node
// Your module "some.js"
module.exports = {
safeString(str) {
return true;
},
safeGithubPush(currentJob) {
if (!this.safeString("some")) {
throw new Error("Not safe");
}
return true;
}
};
try {
// Some consumer module that doesn't work
const {safeGithubPush} = require("./some.js");
const isItSafe = safeGithubPush();
console.log(`Safe? ${isItSafe}`);
} catch (err) {
console.error("Didn't bind right \"this\"");
}
try {
// Some consumer module that DOES work
const someModule = require("./some.js");
const isItSafe = someModule.safeGithubPush();
console.log(`Safe? ${isItSafe}`);
} catch (err) {
console.error(err);
}
I would restructure this code. You say these are utility functions which makes me think you don't really want to have to structure them with this in mind.
Instead of attaching them all to module.exports at their definition, define them outside and directly reference the functions you want to use, then attach them to exports so other modules can use the functions:
function safeString(stringToCheck) {
return true;
}
function safeGithubPush(currentJob) {
if (!safeString("some")) {
throw new Error("Not safe");
}
return true;
}
module.exports = {
safeString,
safeGithubPush
};

passing function to a class in nodejs

I have a function that I need to pass to a class I have defined in nodeJs.
The use case scenario is I want to give the implementer of the class the control of what to do with the data received from createCall function. I don't mind if the method becomes a member function of the class. Any help would be appreciated.
//Function to pass. Defined by the person using the class in their project.
var someFunction = function(data){
console.log(data)
}
//And I have a class i.e. the library.
class A {
constructor(user, handler) {
this.user = user;
this.notificationHandler = handler;
}
createCall(){
var result = new Promise (function(resolve,reject) {
resolve(callApi());
});
//doesn't work. Keeps saying notificationHandler is not a function
result.then(function(resp) {
this.notificationHandler(resp);
}) ;
//I want to pass this resp back to the function I had passed in the
// constructor.
//How do I achieve this.
}
callApi(){ ...somecode... }
}
// The user creates an object of the class like this
var obj = new A("abc#gmail.com", someFunction);
obj.createCall(); // This call should execute the logic inside someFunction after the resp is received.
Arrow functions (if your Node version supports them) are convenient here:
class A {
constructor(user, handler) {
this.user = user;
this.notificationHandler = handler;
}
createCall() {
var result = new Promise(resolve => {
// we're fine here, `this` is the current A instance
resolve(this.callApi());
});
result.then(resp => {
this.notificationHandler(resp);
});
}
callApi() {
// Some code here...
}
}
Inside arrow functions, this refers to the context that defined such functions, in our case the current instance of A. The old school way (ECMA 5) would be:
createCall() {
// save current instance in a variable for further use
// inside callback functions
var self = this;
var result = new Promise(function(resolve) {
// here `this` is completely irrelevant;
// we need to use `self`
resolve(self.callApi());
});
result.then(function(resp) {
self.notificationHandler(resp);
});
}
Check here for details: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions#No_separate_this

Why Readable.push() return false every time Readable._read() is called

I have the following readable stream in typescript:
import {Readable} from "stream";
enum InputState {
NOT_READABLE,
READABLE,
ENDED
}
export class Aggregator extends Readable {
private inputs: Array<NodeJS.ReadableStream>;
private states: Array<InputState>;
private records: Array<any>;
constructor(options, inputs: Array<NodeJS.ReadableStream>) {
// force object mode
options.objectMode = true;
super(options);
this.inputs = inputs;
// set initial state
this.states = this.inputs.map(() => InputState.NOT_READABLE);
this.records = this.inputs.map(() => null);
// register event handlers for input streams
this.inputs.forEach((input, i) => {
input.on("readable", () => {
console.log("input", i, "readable event fired");
this.states[i] = InputState.READABLE;
if (this._readable) { this.emit("_readable"); }
});
input.on("end", () => {
console.log("input", i, "end event fired");
this.states[i] = InputState.ENDED;
// if (this._end) { this.push(null); return; }
if (this._readable) { this.emit("_readable"); }
});
});
}
get _readable () {
return this.states.every(
state => state === InputState.READABLE ||
state === InputState.ENDED);
}
get _end () {
return this.states.every(state => state === InputState.ENDED);
}
_aggregate () {
console.log("calling _aggregate");
let timestamp = Infinity,
indexes = [];
console.log("initial record state", JSON.stringify(this.records));
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
return;
}
}
// update timestamp if a better one is found
if (this.records[i] && timestamp > this.records[i].t) {
timestamp = this.records[i].t;
// clean the indexes array
indexes.length = 0;
}
// include the record index if has the required timestamp
if (this.records[i] && this.records[i].t === timestamp) {
indexes.push(i);
}
});
console.log("final record state", JSON.stringify(this.records), indexes, timestamp);
// end prematurely if after trying to read inputs the aggregator is
// not ready
if (!this._readable) {
console.log("end prematurely trying to read inputs", this.states);
this.push(null);
return;
}
// end prematurely if all inputs are ended and there is no remaining
// record values
if (this._end && indexes.length === 0) {
console.log("end on empty indexes", this.states);
this.push(null);
return;
}
// create the aggregated record
let record = {
t: timestamp,
v: this.records.map(
(r, i) => indexes.indexOf(i) !== -1 ? r.v : null
)
};
console.log("aggregated record", JSON.stringify(record));
if (this.push(record)) {
console.log("record pushed downstream");
// remove records already aggregated and pushed
indexes.forEach(i => { this.records[i] = null; });
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
}
}
});
} else {
console.log("record failed to push downstream");
}
}
_read () {
console.log("calling _read", this._readable);
if (this._readable) { this._aggregate(); }
else {
this.once("_readable", this._aggregate.bind(this));
}
}
}
It is designed to aggregate multiple input streams in object mode. In the end it aggregate multiple time series data streams into a single one. The problem i'm facing is that when i test the feature i'm seeing repeatedly the message record failed to push downstream and immediately the message calling _read true and in between just the 3 messages related to the aggregation algorithm. So the Readable stream machinery is calling _read and every time it's failing the push() call. Any idea why is this happening? Did you know of a library that implement this kind of algorithm or a better way to implement this feature?
I will answer myself the question.
The problem was that i was misunderstanding the meaning of the this.push() return value call. I think a false return value mean that the current push operation fail but the real meaning is that the next push operation will fail.
A simple fix to the code shown above is to replace this:
if (this.push(record)) {
console.log("record pushed downstream");
// remove records already aggregated and pushed
indexes.forEach(i => { this.records[i] = null; });
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
}
}
});
} else {
console.log("record failed to push downstream");
}
By this:
this.push(record);
console.log("record pushed downstream");
// remove records already aggregated and pushed
indexes.forEach(i => { this.records[i] = null; });
this.records.forEach((record, i) => {
// try to read missing records
if (!this.records[i] && this.states[i] !== InputState.ENDED) {
this.records[i] = this.inputs[i].read();
if (!this.records[i]) {
this.states[i] = InputState.NOT_READABLE;
}
}
});
You can notice that the only difference is avoid conditioning operations on the return value of the this.push() call. Given that the current implementation call this.push() only once per _read() call this simple change solve the issue.
It means feeding is faster than consuming. The official approach is enlarge its highWaterMark, Default: 16384 (16KB), or 16 for objectMode. As long as its inner buffer is big enough, the push function will always return true. It does not have to be single push() in single _read(). You may push as much as the highWaterMark indicates in a single _read().

stack exceed on recursive function for framing tcp stream

I have a function that is recursive in framing tcp stream. But when I reach around 1000+ packets received per second I suddenly get Max call stack exceeded or something along those lines.
My code is:
var expectedRemaining = (this.expectedSz - this.receivedSz);
if (buff.length > expectedRemaining) {
var tmp = buff.slice(0, expectedRemaining);
buff = buff.slice(expectedRemaining);
recurse = true;
this.inPacket.push(tmp);
this.receivedSz = this.expectedSz;
} else {
this.inPacket.push(buff);
this.receivedSz += buff.length;
}
if (this.receivedSz === this.expectedSz) {
this.emit('data', Buffer.concat( this.inPacket, this.expectedSz));
this.reset();
}
if (recurse) this.handleData(buff);
Any suggestions?
Right now, your function looks something like this (psuedocode):
this.handleData = function handleData(buf) {
if ( someCondition ) {
// do stuff
recurse = true;
}
else {
// do other stuff
}
if (recurse) {
this.handleData(buf);
}
};
What I'm suggesting is that you implement the recursive behavior with a setImmediate call. This will allow the stack frame to be cleared and the data event to be emitted before entering your function again.
this.handleData = function handleData(buf) {
if ( someCondition ) {
// do stuff
recurse = true;
}
else {
// do other stuff
}
if (recurse) {
setImmediate(function() {
this.handleData(buf);
});
}
};
Just in case people don't read the comments, for the application described in the original question, nextTick ended up being a better fit. The main difference is that nextTick guarantees that the given function will execute before any queued events.

Resources