NodeJS Cluster: how to reduce data from workers in master? - node.js

i'm new to nodejs, and what i want is to read data from database and compute.To make it faster, i use the nodejs cluster module.
there are tow global variables: pairMap and nameSet, and i allocate the jobs to worker in master process, and they do some computation works(to modify the map and set, just like the map-reduce )
however, it seems that the pairMap and nameSet are not modified and empty . (code in the doMasterAction )
(another strange thing is i console the data, and it did modified but in the end ,return to empty in the master process).
the data is as follows(i extract the main idea):
const Promise = require('bluebird');
const cluster = require('cluster');
const numCPUs = require('os').cpus().length;
const fs = Promise.promisifyAll(require('fs'))
const utils = {
mergeMap:(source,dest)=>{
for(let [key,value] of Object.entries(source)){
if(!dest.has(key)) dest.set(key,value);
for(let [type,arr] of Object.entries(value)){
const final = new Set([...dest.get[key][type],...arr])
dest.get[key][type] = final;
}
}
}
}
/**
* key: name1#group.com||name2#group.com
* value: {to: [id1,id2,id3],cc,bcc}
* #param row
* #param map
* #param nameSet
*/
function countLinks(res,map,nameSet) {
nameSet.add(res);
map.set(res,{ 'test': Math.floor(Math.random()*10+1)});
}
class hackingTeamPrepare {
constructor(bulk=100000,total = 1150000){
this.bulk = bulk;
this.count = Math.ceil(total / this.bulk);
const parallelArr = new Array(this.count).fill(0).map((v,i)=> i);
this.jobs = parallelArr.map(v=> 'key'+v);
this.pairMap = new Map();
this.nameSet = new Set();
this.bindThis();
}
bindThis(){
this.doWorkerAction = this.doWorkerAction.bind(this);
this.doMasterAction = this.doMasterAction.bind(this);
}
doMasterAction() {
const workers = [],result = {};
const self = this;
let count = 0,timeout;
for(let i=0;i<numCPUs;i++){
const worker = cluster.fork();
workers[i] = worker;
}
cluster.on('online', (worker) => {
worker.send(self.jobs.shift());
});
cluster.on('exit', function() {
if(self.jobs.length===0) return;
console.log('A worker process died, restarting...');
});
cluster.on('message',function (senderWorkder,info) {
const { workerId,jobIndex } = info;
result[jobIndex] = true;
console.log(`----worker ${workerId} done job: ${jobIndex}----`);
const finish = !self.jobs.length && Object.keys(result).length===self.count;
if(finish){
// -----------------!!here!!--------------------------**
console.log('-------finished-------',self.pairMap,self.nameSet); // Map {}, Set {}
for(let id in cluster.workers){
const curWorker = cluster.workers[id];
curWorker.disconnect();
}
}else{
if(!self.jobs.length) return;
senderWorkder.send(self.jobs.shift());
}
})
}
/**
* {[person1,person2]: {to,cc,bcc}}
*/
doWorkerAction() {
//Process为worker, receive from master
const self = this;
process.on('message',(sql)=>{
const jobPromise = Promise.resolve(sql).then(res => {
countLinks(res,self.pairMap,self.nameSet);
const data = {
workerId: process.pid,
jobIndex: sql,
}
// send to master
process.send(data);
}).catch(err=> {
console.log('-----query error----',err)
});
})
}
readFromPG(){
if(cluster.isMaster){
this.doMasterAction();
}else if (cluster.isWorker){
this.doWorkerAction();
}
}
init(){
this.readFromPG();
}
}
const test = new hackingTeamPrepare(2,10);
test.init();
anyone can help me with this?
i have tried to merge data manually in the master process,however the data sent by the worker.send seems to ignore the object in it.

In Node.js cluster, objects in memory are not shared between master and workers.
pairMap and nameSet exist separately in master and in every worker. When a worker modifies these objects, they change in the same worker (process), while remain unchanged in master and other workers.
To make your idea work, you need to maintain a single pairMap and a single nameSet inside the master process, send messages containing whatever data you need from workers to master, and update these objects using the received data.
Note that you cannot pass any object as a message from worker to master. If you need somewhat complex data, you'll need to send plain javascript objects (key-value pairs). For example, if you need to send a Map instance from worker to master, see the following functions taken from here:
// source - http://2ality.com/2015/08/es6-map-json.html
function mapToJson(map) {
return JSON.stringify([...map]);
}
function jsonToMap(jsonStr) {
return new Map(JSON.parse(jsonStr));
}
// send message using this example:
process.send(mapToJson(pairMap));
// receive message:
worker.on('message', message => console.log(jsonToMap(message)))

Related

lowDb - out of memory

I have an Out-Of-Memory Problem in Node.js and see a lot of big strings that can't be garbage collected when I inspect the snapshots of the heap.
I use lowDB and those strings are mainly the content of the lowDb file.
Question in principle...
When I use FileAsync (so the writing to the file is asynchronous) and I do a lot of (fire and forget) writes...is it possible that my heap space is full of waiting stack entries that all wait for the file system to finish writing? (and node can clear the memory for each finished write).
I do a lot of writes as I use lowDB to save log messages of an algorithm that I execute. Later on I want to find the log messages of a specific execution. So basically:
{
executions: [
{
id: 1,
logEvents: [...]
},
{
id: 2,
logEvents: [...]
},
...
]
}
My simplified picture of node processing this is:
my script is the next script on the stack and runs
with each write something is waiting for the file system to return an answer
this something is bloating my memory and each of this 'somethings' hold the whole content of the lowdb file (multiple times?!)
Example typescript code to try it out:
import * as lowDb from 'lowdb';
import * as FileAsync from 'lowdb/adapters/FileAsync';
/* first block just for generating random data... */
const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
const charactersLength = characters.length;
const alphanum = (length: number) => {
const result = new Buffer(length);
for (let i = 0; i < length; i++ ) {
result.write(characters.charAt(Math.floor(Math.random() * charactersLength)));
}
return result.toString('utf8');
};
class TestLowDb {
private adapter = new FileAsync('test.json');
private db;
/* starting the db up, loading with Async FileAdapter */
async startDb(): Promise<void> {
return lowDb(this.adapter).then(db => {
this.db = db;
return this.db.defaults({executions: [], dbCreated: new Date()}).write().then(_ => {
console.log('finished with intialization');
})
});
}
/* fill the database with data, fails quite quickly, finally produces a json like the following:
* { "executions": [ { "id": "<ID>", "data": [ <data>, <data>, ... ] }, <nextItem>, ... ] } */
async fill(): Promise<void> {
for (let i = 0; i < 100; i++) {
const id = alphanum(3);
this.start(id); // add the root id for this "execution"
for (let j = 0; j < 100; j++) {
this.fireAndForget(id, alphanum(1000));
// await this.wait(id, alphanum(1000));
}
}
}
/* for the first item in the list add the id with the empty array */
start(id:string): void {
this.db.get('executions')
.push({id, data:[]})
.write();
}
/* ignores the promise and continues to work */
fireAndForget(id:string, data:string): void {
this.db.get('executions')
.find({id})
.get('data')
.push(data)
.write();
}
/* returns the promise that the caller can handle it "properly" */
async wait(id:string, data:string): Promise<void> {
return this.db.get('executions')
.find({id})
.get('data')
.push(data)
.write();
}
}
const instance = new TestLowDb();
instance.startDb().then(_ => {
instance.fill()
});
enter code here

node.js: create a connected writable and readable stream pair

I am trying to create a function that returns a connected writable and readable stream pair. eg:
const { writable, readable } = createStreamPair();
where each end has the right interface (writable instanceof stream.Readable === false and readable instanceof stream.Writable === false) unlike the PassThrough stream.
use case:
createWriteStream(filePath) {
const { writable, readable } = createStreamPair();
writeFile(filePath, readable);
return writable;
}
How to create my createStreamPair() function ?
Edit1
A naive approach that obviously does not work ...
function createStreamPair() {
var readable = new stream.Readable();
var writable = new stream.Writable();
readable.pipe(writable);
return { writable, readable }
}
The Node.js tests uses a function that creates two Duplex streams, writes to one can be read from the other, and vice-versa: https://github.com/nodejs/node/blob/master/test/common/duplexpair.js
It isn't part of the Node.js standard library, but you can write your own.
I'll present a slightly modified, annotated version here:
const Duplex = require('stream').Duplex;
const assert = require('assert');
// Define some unique property names.
// The actual value doesn't matter,
// so long as they're not used by Node.js for anything else.
const kCallback = Symbol('Callback');
const kOtherSide = Symbol('Other');
// Define a function `DuplexSocket` whose prototype inherits from `Duplex`
class DuplexSocket extends Duplex {
constructor() {
// Let Node.js initialize everything it needs to
super();
// Define two values we will be using
// kCallback saves a temporary reference to a function while
this[kCallback] = null;
// kOtherSide will be the reference to the other side of the stream
this[kOtherSide] = null;
}
_read() {
// This is called when this side receives a push() call
// If the other side set a callback for us to call,
// then first clear that reference
// (it might be immediately set to a new value again),
// then call the function.
const callback = this[kCallback];
if (callback) {
this[kCallback] = null;
callback();
}
}
_write(chunk, encoding, callback) {
// This is called when someone writes to the stream
// Ensure there's a reference to the other side before trying to call it
assert.notStrictEqual(this[kOtherSide], null);
// Ensure that the other-side callback is empty before setting it
// If push immediately calls _read, this should never be a problem
assert.strictEqual(this[kOtherSide][kCallback], null);
if (chunk.length === 0) {
// callback is not called for zero-length chunks
process.nextTick(callback);
} else {
// Let Node.js know when _read has been called
this[kOtherSide][kCallback] = callback;
// And finally, send the other side the data to be read
this[kOtherSide].push(chunk);
}
}
_final(callback) {
// Ask the other side to let us know it received our EOF request
this[kOtherSide].on('end', callback);
// And finally, pushing null signals the end of the stream
this[kOtherSide].push(null);
}
}
function makeDuplexPair() {
// Create two pairs of
const clientSide = new DuplexSocket();
const serverSide = new DuplexSocket();
// Set the other-side reference
clientSide[kOtherSide] = serverSide;
serverSide[kOtherSide] = clientSide;
// Both instances behave the same, so choice of name doesn't matter,
// So long as they're distinguishable.
return { clientSide, serverSide };
}
module.exports = makeDuplexPair;
Here's another way of creating two streams, one Readable and one Writable in this case:
function makeAsymmetricalStreamPair() {
var readableCallback;
const readableSide = new ReadableStream;
readableSide._read = function _read(){
if(!readableCallback) return;
var callback = readableCallback;
readableCallback = null;
callback();
}
const writableSide = new WritableStream;
writableSide._write = function _write(chunk, enc, callback){
if (readableCallback) throw new Error;
if (chunk.length === 0) {
process.nextTick(callback);
} else {
readableCallback = callback;
readableSide.push(chunk);
}
}
writableSide._final = function _final(callback){
readableSide.on('end', callback);
readableSide.push(null);
}
return { readableSide, writableSide };
}
As of today you can use stream.PassTrough

'IObservable<SyncProgress>' does not contain a definition for 'CombineLatest'

I just took this code snippet from realm which fits my requirement .
var session = realm.GetSession();
var uploadProgress = session.GetProgressObservable(ProgressDirection.Upload, ProgressMode.ReportIndefinitely);
var downloadProgress = session.GetProgressObservable(ProgressDirection.Download, ProgressMode.ReportIndefinitely);
var token = uploadProgress.CombineLatest(downloadProgress, (upload, download) =>
{
return new
{
TotalTransferred = upload.TransferredBytes + download.TransferredBytes,
TotalTransferable = upload.TransferableBytes + download.TransferableBytes
};
})
.Throttle(TimeSpan.FromSeconds(0.1))
.ObserveOn(SynchronizationContext.Current)
.Subscribe(progress =>
{
if (progress.TotalTransferred < progress.TotalTransferable)
{
// Show spinner
}
else
{
// Hide spinner
}
});
But CombineLatest method is not available inside IObservable interface in Xamarin provided .Net subset.
But given I took this directly from real websites it is supposed to work .

I just started learning node.js and i implemented a demo code exhibiting event listeners. I got an error

I am getting this error in my code
TypeError: account.on() is not a function
Where did i go wrong?
Code
var events = require('events');
function Account() {
this.balance = 0;
events.EventEmitter.call(this);
this.deposit = function(amount) {
this.balance += amount;
this.emit('balanceChanged');
};
this.withdraw = function(amount) {
this.balance -= amount;
this.emit('balanceChanged');
};
}
Account.prototype._proto_ = events.EventEmitter.prototype;
function displayBalance() {
console.log('Account balance : $%d', this.balance);
}
function checkOverdraw() {
if (this.balance < 0) {
console.log('Account overdrawn!!!');
}
}
function checkgoal(acc, goal) {
if (acc.balance > goal) {
console.log('Goal Achieved!!!');
}
}
var account = new Account();
account.on('balanceChanged', displayBalance);
account.on('balanceChanged', checkOverdraw);
account.on('balanceChanged', function() {
checkgoal(this, 1000);
});
account.deposit(220);
account.deposit(320);
account.deposit(600);
account.withdraw(1200);
Your example code is not idiomatic Node JS.
I'd strongly recommend you follow the recommended best practices when creating new inheritable objects, as in:
var util=require('util');
var EventEmitter = require('events').EventEmitter;
var Account = function(){
EventEmitter.call(this); // should be first
this.balance=0; // instance var
};
util.inherits(Account,EventEmitter);
Account.prototype.deposit = function(amount){
this.balance += amount;
this.emit('balanceChanged');
};
Account.prototype.withdraw = function(amount){
this.balance -= amount;
this.emit('balanceChanged');
};
var account = new Account();
var displayBalance = function(){
console.log("Account balance : $%d", this.balance);
};
account.on('balanceChanged',displayBalance);
account.deposit(200);
account.withdraw(40);
// ... etc. ....
Which, when run displays:
Account balance : $200
Account balance : $160
Best practices are there so that
your code can be expressed in a way that is easy for others to understand
you don't run into unexpected problems when you try to replicate functionality that is already defined, possibly complex and difficult to understand.
The reason that util.inherits exists is so you don't have to worry about how the prototype chain is constructed. By constructing it yourself, you will often run into the problem you experienced.
Also, since the current Node runtime (>6.0) also includes most of the ES6 spec, you can also (and really should) write your code as:
const util = require('util');
const EventEmitter = require('events').EventEmitter;
const Account = () => {
EventEmitter.call(this);
this.balance = 0;
};
util.inherits(Account,EventEmitter);
Account.prototype.deposit = (val) => {
this.balance += val;
this.emit('balanceChanged');
};
Account.prototype.withdraw = (val) => {
this.balance -= val;
this.emit('balanceChanged');
};
The use of the const keyword assures the variables you create cannot be changed inadvertently or unexpectedly.
And the use of the "fat arrow" function definition idiom (() => {}) is more succinct and thus quicker to type, but also carries the added benefit that it preserves the value of this from the surrounding context so you never have to write something like:
Account.prototype.doSomething = function() {
var self = this;
doSomething(val, function(err,res){
if(err) {
throw err;
}
self.result=res;
});
};
which, using the 'fat arrow' construct becomes:
Account.prototype.doSomething = () => {
doSomething(val, (err,res) => {
if(err) {
throw err;
}
this.result=res; // where 'this' is the instance of Account
});
};
The "fat arrow" idiom also allows you to do some things more succinctly like:
// return the result of a single operation
const add = (a,b) => a + b;
// return a single result object
const getSum = (a,b) => {{a:a,b:b,sum:a+b}};
Another way to create inheritable "classes" in ES6 is to use its class construction notation:
const EventEmitter = require('events');
class Account extends EventEmitter {
constructor() {
super();
this._balance = 0; // start instance vars with an underscore
}
get balance() { // and add a getter
return this._balance;
}
deposit(amount) {
this._balance += amount;
this.emit('balanceChanged');
}
withdraw(amount) {
this._balance -= amount;
this.emit('balanceChanged');
}
}
It should be noted that both ways of constructing inheritable prototypal objects is really the same, except that the new class construction idiom adds syntactic "sugar" to bring the declaration more in-line with other languages that support more classical object orientation.
The ES6 extensions to node offer many other benefits worthy of study.

How do you implement a stream that properly handles backpressure in node.js?

I can't for the life of me figure out how to implement a stream that properly handles backpressure. Should you never use pause and resume?
I have this implementation I'm trying to get to work correctly:
var StreamPeeker = exports.StreamPeeker = function(myStream, callback) {
stream.Readable.call(this, {highWaterMark: highWaterMark})
this.stream = myStream
myStream.on('readable', function() {
var data = myStream.read(5000)
//process.stdout.write("Eff: "+data)
if(data !== null) {
if(!this.push(data)) {
process.stdout.write("Pause")
this.pause()
}
callback(data)
}
}.bind(this))
myStream.on('end', function() {
this.push(null)
}.bind(this))
}
util.inherits(StreamPeeker, stream.Readable)
StreamPeeker.prototype._read = function() {
process.stdout.write("resume")
//this.resume() // putting this in for some reason causes the stream to not output???
}
It correctly sends output, but doesn't correctly produce backpressure. How can I change it to properly support backpressure?
Ok I finally figured it out after lots of trial and error. A couple guidelines:
Never ever use pause or resume (otherwise it'll go into legacy "flowing" mode)
Never add a "data" event listener (otherwise it'll go into legacy "flowing" mode)
Its the implementor's responsibility to keep track of when the source is readable
Its the implementor's responsibility to keep track of when the destination wants more data
The implementation should not read any data until the _read method is called
The argument to read tells the source to give it that many bytes, it probably best to pass the argument passed to this._read into the source's read method. This way you should be able to configure how much to read at a time at the destination, and the rest of the stream chain should be automatic.
So this is what I changed it to:
Update: I created a Readable that is much easier to implement with proper back-pressure, and should have just as much flexibility as node's native streams.
var Readable = stream.Readable
var util = require('util')
// an easier Readable stream interface to implement
// requires that subclasses:
// implement a _readSource function that
// * gets the same parameter as Readable._read (size)
// * should return either data to write, or null if the source doesn't have more data yet
// call 'sourceHasData(hasData)' when the source starts or stops having data available
// calls 'end()' when the source is out of data (forever)
var Stream666 = {}
Stream666.Readable = function() {
stream.Readable.apply(this, arguments)
if(this._readSource === undefined) {
throw new Error("You must define a _readSource function for an object implementing Stream666")
}
this._sourceHasData = false
this._destinationWantsData = false
this._size = undefined // can be set by _read
}
util.inherits(Stream666.Readable, stream.Readable)
Stream666.Readable.prototype._read = function(size) {
this._destinationWantsData = true
if(this._sourceHasData) {
pushSourceData(this, size)
} else {
this._size = size
}
}
Stream666.Readable.prototype.sourceHasData = function(_sourceHasData) {
this._sourceHasData = _sourceHasData
if(_sourceHasData && this._destinationWantsData) {
pushSourceData(this, this._size)
}
}
Stream666.Readable.prototype.end = function() {
this.push(null)
}
function pushSourceData(stream666Readable, size) {
var data = stream666Readable._readSource(size)
if(data !== null) {
if(!stream666Readable.push(data)) {
stream666Readable._destinationWantsData = false
}
} else {
stream666Readable._sourceHasData = false
}
}
// creates a stream that can view all the data in a stream and passes the data through
// correctly supports backpressure
// parameters:
// stream - the stream to peek at
// callback - called when there's data sent from the passed stream
var StreamPeeker = function(myStream, callback) {
Stream666.Readable.call(this)
this.stream = myStream
this.callback = callback
myStream.on('readable', function() {
this.sourceHasData(true)
}.bind(this))
myStream.on('end', function() {
this.end()
}.bind(this))
}
util.inherits(StreamPeeker, Stream666.Readable)
StreamPeeker.prototype._readSource = function(size) {
var data = this.stream.read(size)
if(data !== null) {
this.callback(data)
return data
} else {
this.sourceHasData(false)
return null
}
}
Old Answer:
// creates a stream that can view all the data in a stream and passes the data through
// correctly supports backpressure
// parameters:
// stream - the stream to peek at
// callback - called when there's data sent from the passed stream
var StreamPeeker = exports.StreamPeeker = function(myStream, callback) {
stream.Readable.call(this)
this.stream = myStream
this.callback = callback
this.reading = false
this.sourceIsReadable = false
myStream.on('readable', function() {
this.sourceIsReadable = true
this._readMoreData()
}.bind(this))
myStream.on('end', function() {
this.push(null)
}.bind(this))
}
util.inherits(StreamPeeker, stream.Readable)
StreamPeeker.prototype._read = function() {
this.reading = true
if(this.sourceIsReadable) {
this._readMoreData()
}
}
StreamPeeker.prototype._readMoreData = function() {
if(!this.reading) return;
var data = this.stream.read()
if(data !== null) {
if(!this.push(data)) {
this.reading = false
}
this.callback(data)
}
}

Resources