Node.js cluster + collective - node.js

I have submitted the issue to the github repo, so as to track it there!
I'm running a clustered app that could be on a machine with N cores. Let's say I am running 2 of the app instances locally for testing, really emulating 2 different boxes. So N cores on N machines using the cluster module (in reality, the N machines is static, e.g. just 2 behind an AWS Load Balancer).
How do I properly configure the collective.js "all_hosts" option for this? Would I use process.id somehow along with IP?
Running the code snippets would be something along the lines of 2 bash terminals:
terminal 1:
coffee cluster1
terminal 2:
coffee cluster2
Note: the code below works, but doesn't really work, as I can't quite figure out the configuration; each time I log data it's specific to the process.
cluster1.coffee:
cluster = require 'cluster'
numCPUs = require('os').cpus().length
if cluster.isMaster
i = 0
cluster.setupMaster
exec: './server1'
console.log "App 1 clustering with: #{numCPUs} clusters"
while i < numCPUs
cluster.fork()
i++
cluster.on 'fork', (worker) ->
console.log 'Forked App 1 server worker ' + worker.process.pid
server1.coffee:
Collective = require 'collective'
all_hosts = [
host: 'localhost', port: 8124 # Wrong
]
collective = new Collective(
host: 'localhost'
port: 8124
, all_hosts, (collective) ->
)
collectiveUpsert = () ->
num = Math.floor((Math.random()*10000)+1)
data =
num: num
console.log process.pid + ' sees current num as: ' + JSON.stringify(collective.get('foo.bar'))
console.log process.pid + ' setting num to: ' + JSON.stringify(data)
collective.set 'foo.bar', data
setInterval (->
collectiveUpsert()
), 5 * 1000
cluster2.coffee:
cluster = require 'cluster'
numCPUs = require('os').cpus().length
if cluster.isMaster
i = 0
cluster.setupMaster
exec: './server2'
console.log "App 2 clustering with: #{numCPUs} clusters"
while i < numCPUs
cluster.fork()
i++
cluster.on 'fork', (worker) ->
console.log 'Forked App 2 server worker ' + worker.process.pid
server2.coffee:
Collective = require 'collective'
all_hosts = [
host: 'localhost', port: 8124 # Wrong
]
collective = new Collective(
host: 'localhost'
port: 8124
, all_hosts, (collective) ->
)
collectiveUpsert = () ->
num = Math.floor((Math.random()*10000)+1)
data =
num: num
console.log process.pid + ' sees current num as: ' + JSON.stringify(collective.get('foo.bar'))
console.log process.pid + ' setting num to: ' + JSON.stringify(data)
collective.set 'foo.bar', data
setInterval (->
collectiveUpsert()
), 5 * 1000

In order to use collective.js with cluster and/or multiple servers you need to start it on every Node.js child process. Think of it as a http module, where you have to create the listener on every child/slave, not the master (http://nodejs.org/api/cluster.html#cluster_cluster). Following similar logic, for collective.js, you should do something like this (single server):
if (cluster.isMaster) {
// fork n children
} else {
var current_host = {host: "localhost", port: 10000};
current_host.port += cluster.worker.id; // this is incremented for every new process.
var all_hosts = [
{"host": "localhost", "port": 10001},
{"host": "localhost", "port": 10002},
{"host": "localhost", "port": 10003},
{"host": "localhost", "port": 10004},
{"host": "localhost", "port": 10005},
{"host": "localhost", "port": 10006}
// must be the same amount as is the child process count.
];
var collective = new modules.collective(current_host, all_hosts, function (collective) {
// Do your usual stuff. Start http listener, etc...
});
}
You should modify localhost to your ip addresses and make sure ports increment properly, if you want to use this on different servers.
For any additional information you can check crude tests at test/index.js
Hope that helps! If you need any further assistance - please ask.
P.S. Admittedly, this way is to cumbersome and needs clearer explanation. I hope to figure out a cleaner and easier initialization process in the near future. In addition to that, clarify the readme and provide some full examples.

Related

Slowness of node/deno postgres client on mac

I have encountered peculiar slowness on mac, when using node-postgres or deno-postgres. I have a very simple table with two columns, and when I execute query select * from table it happens very very slowly. I have also tried selecting directly with SQL client and it is very fast.
So to be precise - the table has 60 entries. two columns. on the remote postgres server (12.2)
I have the following three scripts.
#node v13.12.0
const { Client } = require('pg')
const client = new Client({
user: 'u',
host: 'address',
database: 'db',
password: 'pw',
port: 5432,
})
client.connect()
const start = Date.now();
client.query('SELECT * from unit', (err, res) => {
const ms = Date.now() - start;
console.log(`db call ${ms}`);
console.log(res.rows.length);
client.end()
})
#deno 1.1.2
#v8 8.5.216
#typescript 3.9.2
import { Client } from "https://deno.land/x/postgres#v0.4.2/mod.ts";
const client = new Client({
user: "u",
database: "db",
hostname: "addr",
password: "pw",
port: 5432,
});
await client.connect();
const start = Date.now();
const dataset = await client.query("SELECT * FROM unit");
const ms = Date.now() - start;
console.log(`db call ${ms}`);
console.log(dataset.rowsOfObjects().length)
#python 3.7.7
import psycopg2
from datetime import datetime
#try:
connection = psycopg2.connect(user = "u",
password = "p",
host = "addr",
port = "5432",
database = "db")
cursor = connection.cursor()
a = datetime.now()
cursor.execute("select * from unit");
records = cursor.fetchall()
b = datetime.now()
c = b - a
print(len(records))
print(c.total_seconds() * 1000)
and when I execute all three scripts on my macos (10.15.5) I get the following results:
"select * from unit" (60 records)
node ~16'000ms
deno ~16'000ms
python ~240ms
when I execute "select * from unit limit 5"
node ~480ms
deno ~110ms
python ~220ms
when I execute "select * from unit" on the same ubuntu server where postgres is installed then all 3 scripts execute in around 10ms.
I have enabled timing and full logging in the postgres server, and I see that I can see that queries in all the above situations have executed in below one milisecond, around ~0.600ms
At this point, I have feeling that fault lies into intersection of node/deno and my macos, which could probably be v8. or something else that deno and node share.
So, what could it be?
p.s I also tried node profiler and I see this:
[Summary]:
ticks total nonlib name
0 0.0% 0.0% JavaScript
116 84.7% 99.1% C++
22 16.1% 18.8% GC
20 14.6% Shared libraries
1 0.7% Unaccounted
[C++ entry points]:
ticks cpp total name
45 54.9% 32.8% T __ZN2v88internal32Builtin_DatePrototypeSetUTCHoursEiPmPNS0_7IsolateE
36 43.9% 26.3% T __ZN2v88internal21Builtin_HandleApiCallEiPmPNS0_7IsolateE
1 1.2% 0.7% T __ZN2v88internal23Builtin_DateConstructorEiPmPNS0_7IsolateE
but I have no idea what that might mean.
ok, I finally figured it out.
As nothing was working I decided to move my API to the remote server instead of running it locally, started it up, and was pleased to see instant communication between API and database... only to see exactly the same slowness on the frontend running on my machine.
And this is when it dawned on me - this is some sort of traffic shaping from my internet provider. I turned on VPN and everything started working as expected immediately.
No wonder I couldn't understand why it was getting stuck. The issue was way down the stack, this will be a lesson for me - always have to think outside the box that is a computer itself.
this explains why it was sometimes working normally. However, it doesn't explain why this issue never affected python script - maybe it was communicating with the Postgres server in a little bit different manner that didn't trigger the provider's filter. Who knows.

How to test a clustered Express app with Mocha?

Here is a simplified version of my cluster Express app:
/index.js
module.exports = process.env.CODE_COV
? require('./lib-cov/app')
: require('./lib/app');
/lib/app.js
var cluster = require('cluster'),
express = require('express'),
app = module.exports = express.createServer();
if (cluster.isMaster) {
// Considering I have 4 cores.
for (var i = 0; i < 4; ++i) {
cluster.fork();
}
} else {
// do app configurations, then...
// Don't listen to this port if the app is required from a test script.
if (!module.parent.parent) {
app.listen(8080);
}
}
/test/test1.js
var app = require('../');
app.listen(7777);
// send requests to app, then assert the response.
Questions:
var app = require('../'); will not work in this cluster environment. Which of the worker apps should it return? Should it return the cluster object instead of an Express app?
Now, obviously setting the port in the test script will not work. How would you set a port within a test script to a cluster of apps?
How would you send requests to this cluster of apps?
The only solution I can think of is to conditionally turn off the clustering feature and run only one app if the app is requested from a test script (if (module.parent.parent) ...).
Any other way to test a clustered Express app with Mocha?
It's been quite a long time since I have posted this question. Since no one has answered, I will answer to this question myself.
I kept the /index.js as it is:
module.exports = process.env.CODE_COV
? require('./lib-cov/app')
: require('./lib/app');
In /lib/app.js which starts the cluster, I have the following code. In brief, I start the cluster only in non-test environment. In test environment the cluster is not started but only one app/worker itself is started as defined in the cluster.isMaster && !module.parent.parent condition.
var cluster = require('cluster'),
express = require('express'),
app = module.exports = express.createServer();
if (cluster.isMaster && !module.parent.parent) {
// Considering I have 4 cores.
for (var i = 0; i < 4; ++i) {
cluster.fork();
}
} else {
// do app configurations, then...
// Don't listen to this port if the app is required from a test script.
if (!module.parent.parent) {
app.listen(8080);
}
}
In the above case !module.parent.parent will be evaluated as a truthful object only if the application was not started by a test script.
module is the current /lib/app.js script.
module.parent is its parent /index.js script.
module.parent.parent is undefined if the application was started directly via node index.js.
module.parent.parent is the test script if the application was started via one of the scripts.
Thus, I can safely start the script where I can set a custom port.
/test/test1.js
var app = require('../');
app.listen(7777);
// send requests to app, then assert the response.
At the same time if I need to run the application in real, i.e. not for testing, then I run node index.js and it will start up the cluster of applications.
I have a much simpler way of doing this
if (process.env.NODE_ENV !== 'test') {
if (cluster.isMaster) {
var numCPUs = require('os').cpus().length;
console.log('total cpu cores on this host: ', numCPUs);
for (var i = 0; i < numCPUs; i++) {
console.log('forking worker...');
cluster.fork();
}
cluster.on('online', function(worker) {
console.log('Worker ' + worker.process.pid + ' is online.');
});
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died.');
});
} else {
console.log('Im a worker');
// application code
setupServer()
}
} else {
// when running tests
setupServer();
}
Just make sure to set the env to test when running the tests
ex: NODE_ENV=test grunt test
I kind of liked your solution because of it's simplicity, however, in an environment like an MVC framework for node, you may end up chaining module.parent up to 11 times (seriously).
I think a better approach would be to simply check which script node started processing with. The node's command-line arguments are available at process.argv.
The first item in this array would be 'node', the executable and the second argument would be the path to the file that node start executing. This would be index.js in your case.
So instead of checking
module.parent.parent
^ ^
(app.js) |
(index.js)
You could do something like this
var starter = process.argv[1].split(path.sep).pop();
Where starter would be index or index.js depending on what you started your server with.
node index.js vs node index
The check would then look like:
if (cluster.isMaster && starter === 'index.js') {
cluster.fork();
}
Worked in my environments—I hope this helps!

NodeJS - How to handle "listen EADDRINUSE" when accessing external process

I'm using phantomJS for printing PDF, with phantomjs-node module. It works well but when I try to create several files at once, it throws an Unhandled error "Listen EADDRINUSE.
I assume this is because the module uses phantomJS which is an external process and it can't bind it to the same port several times ?
Anyway, I can't catch this error, and I'd like to resolve this problem at least by avoiding a server crash when this happens.
I thought of using a "global" variable, like a locker, in order to block concurrent calls until the current one is finished.
Any idea of how to implement that, or any other solution ?
The code from #AndyD is not correct imho. See lines 45 - 54 in
https://github.com/sgentle/phantomjs-node/blob/master/phantom.coffee
So the example should be
var portscanner = require('portscanner');
var phantom = require('phantom');
portscanner.findAPortNotInUse(40000, 60000, 'localhost', function(err, freeport) {
phantom.create({'port': freeport}, function(ph){
...
}
});
You should be able to pass in a port number every time you call create:
var phantom = require('phantom');
phantom.create(null, null, function(ph){
}, null, 11111);
You can then use a counter to ensure it's different every time you start phantomjs-node.
If you are starting a new process every time and you can't share a counter then you can use portscanner to find a free port:
var portscanner = require('portscanner');
var phantom = require('phantom');
portscanner.findAPortNotInUse(40000, 60000, 'localhost', function(err, freeport) {
phantom.create(null, null, function(ph){
...
}
}, null, freeport);

How to debug Node.JS child forked process?

I'm trying to debug the child Node.JS process created using:
var child = require('child_process');
child .fork(__dirname + '/task.js');
The problem is that when running in IntelliJ/WebStorm both parent and child process start on the same port.
debugger listening on port 40893
debugger listening on port 40893
So it only debugs the parent process.
Is there any way to set IntelliJ to debug the child process or force it to start on a different port so I can connect it in Remote debug?
Yes. You have to spawn your process in a new port. There is a workaround to debug with clusters, in the same way you can do:
Start your app with the --debug command and then:
var child = require('child_process');
var debug = typeof v8debug === 'object';
if (debug) {
//Set an unused port number.
process.execArgv.push('--debug=' + (40894));
}
child.fork(__dirname + '/task.js');
debugger listening on port 40894
It is a known bug in node.js that has been recently fixed (although not backported to v0.10).
See this issue for more details: https://github.com/joyent/node/issues/5318
There is a workaround where you alter the command-line for each worker process, although the API was not meant to be used this way (the workaround might stop working in the future). Here is the source code from the github issue:
var cluster = require('cluster');
var http = require('http');
if (cluster.isMaster) {
var debug = process.execArgv.indexOf('--debug') !== -1;
cluster.setupMaster({
execArgv: process.execArgv.filter(function(s) { return s !== '--debug' })
});
for (var i = 0; i < 2; ++i) {
if (debug) cluster.settings.execArgv.push('--debug=' + (5859 + i));
cluster.fork();
if (debug) cluster.settings.execArgv.pop();
}
}
else {
var server = http.createServer(function(req, res) {
res.end('OK');
});
server.listen(8000);
}
Quick simple fix ( where using chrome://inspect/#devices )
var child = require('child_process');
child.fork(__dirname + '/task.js',[],{execArgv:['--inspect-brk']});
Then run your app without any --inspect-brk and the main process won't debug but the forked process will and no conflicts.
To stop a fork conflicting when debugging the main process ;
child.fork(__dirname + '/task.js',[],{execArgv:['--inspect=xxxx']});
where xxxx is some port not being used for debugging the main process. Though I haven't managed to easily connect to both at the same time in the debugger even though it reports as listening.
I find that setting the 'execArgv' attribute in the fork func will work:
const child = fork('start.js', [], {
cwd: startPath,
silent: true,
execArgv: ['--inspect=10245'] });
if "process.execArgv" doenst work you have to try:
if (debug) {
process.argv.push('--debug=' + (40894));
}
this worked for me..
There are one more modern way to debug child (or any) process with Chrome DevTools.
Start your app with arg
--inspect
like below:
node --debug=9200 --inspect app/main.js
You will see the message with URL for each child process:
Debugger listening on port 9200.
Warning: This is an experimental feature and could change at any time.
To start debugging, open the following URL in Chrome:
chrome-devtools://devtools/bundled/inspector.html?experiments=true&v8only=true&ws=127.0.0.1:9200/207f2ab6-5700-4fc5-b6d3-c49a4b34a311
Debugger listening on port 9201.
Warning: This is an experimental feature and could change at any time.
To start debugging, open the following URL in Chrome:
chrome-devtools://devtools/bundled/inspector.html?experiments=true&v8only=true&ws=127.0.0.1:9201/97be3351-2ea1-4541-b744-e720188bacfa
Debugger listening on port 9202.
Warning: This is an experimental feature and could change at any time.
To start debugging, open the following URL in Chrome:
chrome-devtools://devtools/bundled/inspector.html?experiments=true&v8only=true&ws=127.0.0.1:9202/8eb8384a-7167-40e9-911a-5a8b902bb8c9
If you want to debug the remote processes, just change the address 127.0.0.1 to your own.

Dynamic arguments for ZINTERSTORE with node_redis

I'm trying to use the ZINTERSTORE command of redis from node.js using node_redis:
//node.js server code
var redis = require("redis");
var client = redis.createClient();
// ... omitted code ...
exports.searchImages = function(tags, page, callback){
//tags = ["red", "round"]
client.ZINTERSTORE("tmp", tags.length, tags.join(' '), function(err, replies){
//do something
});
}
But the call client.ZINTERSTORE throws the error: [Error: ERR syntax error]. Passing in tags as an array (instead of using tags.join(' ')) throws the same error.
Where can I find the correct syntax for this command? The source code for node_redis has it buried in the javascript parser, but it's tricky to see what's going on without 'stepping through' the code. Is there a good way to do step through debugging with node.js?
There are multiple ways to debug a Redis client with node.js.
First you can rely on the Redis monitor feature to log every commands received by the Redis server:
> src/redis-cli monitor
OK
1371134499.182304 [0 172.16.222.72:51510] "info"
1371134499.185190 [0 172.16.222.72:51510] "zinterstore" "tmp" "2" "red,round"
You can see the zinterstore command received by Redis is ill-formed.
Then, you can activate the debugging mode of node_redis by adding the following line in your script:
redis.debug_mode = true;
It will output the Redis protocol at runtime:
Sending offline command: zinterstore
send ncegcolnx243:6379 id 1: *4
$11
zinterstore
$3
tmp
$1
2
$9
red,round
send_command buffered_writes: 0 should_buffer: false
net read ncegcolnx243:6379 id 1: -ERR syntax error
Then, you can use node.js debugger. Put a debugger breakpoint in the code in the following way:
function search(tags, page, callback) {
debugger; // breakpoint is here
client.ZINTERSTORE("tmp", tags.length, tags, function(err, replies){
console.log(err);
console.log(replies);
callback('ok')
});
}
You can then launch the script with node in debug mode:
$ node debug test.js
< debugger listening on port 5858
connecting... ok
break in D:\Data\NodeTest\test.js:1
1 var redis = require("redis");
2 var client = redis.createClient( 6379, "ncegcolnx243" );
3
debug> help
Commands: run (r), cont (c), next (n), step (s), out (o), backtrace (bt), setBreakpoint (sb), clearBreakpoint (cb),
watch, unwatch, watchers, repl, restart, kill, list, scripts, breakOnException, breakpoints, version
debug> cont
break in D:\Data\NodeTest\test.js:8
6 function search(tags, page, callback) {
7
8 debugger;
9 client.ZINTERSTORE("tmp", tags.length, tags, function(err, replies){
10 console.log(err);
... use n(ext) and s(tep) commands ...
By stepping through the code, you will realize that the command array is not correct because the tags are serialized and processed as a unique parameter.
Changing the code as follows will fix the problem:
var cmd = [ "tmp", tags.length ];
client.zinterstore( cmd.concat(tags), function(err, replies) {
...
});

Resources