Node clusters with pm2 queues every other request - node.js

Im using pm2 to manage concurrent requests to my API, so far so good, I've managed to make it work.
My api has only one route. Each request takes between 1-2 min to completely resolve and send back the response. As soon as I make my first request, I can see in the pm2 logs that the request has been accepted, but if I make a second request to the same route it gets queued and only gets processed after the first is completed.
Only in case I make a third request to the same route while the second request is in queue, another worker is called and accepts the third request, the second stays in queue until the first gets resolved.
I hope I made myself clear
first request is accepted promptly by a worker, second request gets queued and third request gets also promply accepted by another worker, fourth gets queued, fifth gets accepted, sixth gets queued and so on.
I have 24 available workers.
here is my very simple server:
const express = require('express');
const runner = require('./Rrunner2')
const moment = require('moment');
const bodyParser = require('body-parser');
const app = express();
app.use(express.json({limit: '50mb', extended: true}));
app.use(bodyParser.json())
app.post('/optimize', (req, res) => {
try{
const req_start = moment().format('DD/MM/YYYY h:mm a');
console.log('Request received at ' + req_start)
console.log(`Worker process ID - ${process.pid} has accepted the request.`);
const data = req.body;
const opt_start = moment().format('DD/MM/YYYY h:mm a')
console.log('Optimization started at ' + opt_start)
let result = runner.optimizer(data);
const opt_end = moment().format('DD/MM/YYYY h:mm a')
console.log('Optmization ended at ' + opt_end)
const res_send = moment().format('DD/MM/YYYY h:mm a');
console.log('Response sent at ' + res_send)
return res.send(result)
}catch(err){
console.error(err)
return res.status(500).send('Server error.')
}
});
const PORT = 3000;
app.listen(PORT, () => console.log(`Server listening port ${PORT}.`))
my PM2 ecosystem file is:
module.exports = {
apps : [{
name: "Asset Optimizer",
script: "server.js",
watch: true,
ignore_watch: ["optimizer", "outputData", "Rplots.pdf", "nodeSender.js", "errorLog"],
instances: "max",
autorestart: true,
max_memory_restart: "1G",
exec_mode: "cluster",
watch_options:{
"followSymlinks": false
}
}
]}
I start the server using pm2 start ecosystem.config.js
everything works just fine, but this queue issue is making me crazy. I've tried many many dirty approaches, including splitting routes, splitting servers. no success whatsoever.
Even if you donĀ“t know the answer to this, please give me some ideas on how to overcome this problem. Thank you very much.
UPDATE
Okay, I've managed to make this work with the native cluster module by setting:
const cluster = require('cluster');
const http = require('http');
const numCPUs = require('os').cpus().length;
cluster.schedulingPolicy = cluster.SCHED_RR;
But once I try to make pm2 start the server, it no longer works.
Is it possible to make pm2 accept the Round Robin appproach.
P.S.: I'm using windows,and found in the node docs that it is the onlu platform that this is not setup by default.

The only viable solution to this issue was implementing nginx as a reverse proxy and a load balancer.
I used nginx 1.18.0 and this is the configuration file that made it work.
If anyone comes by this issue, nginx + pm2 is the way to go.
Happy to clarify further if anyone faces this. It gave me a lot of work.
worker_processes 5;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
upstream expressapi {
least_conn;
server localhost:3000;
server localhost:3001;
server localhost:3002;
server localhost:3003;
server localhost:3004;
}
sendfile on;
keepalive_timeout 800;
fastcgi_read_timeout 800;
proxy_read_timeout 800;
server {
listen 8000;
server_name optimizer;
location / {
proxy_pass http://expressapi/;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
}

As said above the problem isnt with PM2. Pm2 is just processing which requests gets sent to it. The problem is the layer above that is sending requests, aka nignx.

Related

HAProxy locks up simple express server in 5 minutes?

I have a really strange one I just cannot work out.
I have been building node/express apps for years now and usually run a dev server just at home for quick debugging/testing. I frontend it with a haproxy instance to make it "production like" and to perform the ssl part.
In any case, just recently ALL servers (different projects) started mis-behaving and stopped responding to requests around exactly 5 minutes after being started. That is ALL the 3 or 4 I run sometimes on this machine, yet the exact same instance of haproxy is front-ending the production version of the code and that has no issues, it's still rock solid. And, infuriatingly, I wrote a really basic express server example and if it's front ended by the same haproxy it also locks up, but if I switch ports, it runs fine forever as expected!
So in summary:
1x haproxy instance frontending a bunch of prod/dev instances with the same rule sets, all with ssl.
2x production instances working fine
4x dev instances(and a simple test program) ALL locking up after around 5 min when behind haproxy
and if I run the simple test program on a different port so it's local network only, it works perfectly.
I do also have uptime robot liveness checks hitting the haproxy as well to monitor the instances.
So this example:
const express = require('express')
const request = require('request');
const app = express()
const port = 1234
var counter = 0;
var received = 0;
process.on('warning', e => console.warn(e.stack));
const started = new Date();
if (process.pid) {
console.log('Starting as pid ' + process.pid);
}
app.get('/', (req, res) => {
res.send('Hello World!').end();
})
app.get('/livenessCheck', (req, res) => {
res.send('ok').end();
})
app.use((req, res, next) => {
console.log('unknown', { host: req.headers.host, url: req.url });
res.send('ok').end();
})
const server = app.listen(port, () => {
console.log(`Example app listening on port ${port}`)
})
app.keepAliveTimeout = (5 * 1000) + 1000;
app.headersTimeout = (6 * 1000) + 2000;
setInterval(() => {
server.getConnections(function(error, count) {
console.log('connections', count);
});
//console.log('tick', new Date())
}, 500);
setInterval(() => {
console.log('request', new Date())
request('http://localhost:' + port, function (error, response, body) {
if (error) {
const ended = new Date();
console.error('request error:', ended, error); // Print the error if one occurred
counter = counter - 1;
if (counter < 0) {
console.error('started ', started); // Print the error if one occurred
const diff = Math.floor((ended - started) / 1000)
const min = Math.floor(diff / 60);
console.error('elapsed ', min, 'min ', diff - min*60, 'sec');
exit;
}
return;
}
received = received + 1;
console.log('request ', received, 'statusCode:', new Date(), response && response.statusCode); // Print the response status code if a response was received
//console.log('body:', body); // Print the HTML for the Google homepage.
});
}, 1000);
works perfectly and runs forever on a non-haproxy port, but only runs for approx 5 min on a port behind haproxy, it usually gets to 277 request responses each time before hanging up and timing out.
The "exit()" function is just a forced crash for testing.
I've tried adjusting some timeouts on haproxy, but to no avail. And each one has no impact on the production instances that just keep working fine.
I'm running these dev versions on a mac pro 2013 with latest OS. and tried various versions of node.
Any thoughts what it could be or how to debug further?
oh, and they all server web sockets as well as http requests.
Here is one example of a haproxy config that I am trying (relevant sections):
global
log 127.0.0.1 local2
...
nbproc 1
daemon
defaults
mode http
log global
option httplog
option dontlognull
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 4s
timeout server 5s
timeout http-keep-alive 4s
timeout check 4s
timeout tunnel 1h
maxconn 3000
frontend wwws
bind *:443 ssl crt /etc/haproxy/certs/ no-sslv3
option http-server-close
option forwardfor
reqadd X-Forwarded-Proto:\ https
reqadd X-Forwarded-Port:\ 443
http-request set-header X-Client-IP %[src]
# set HTTP Strict Transport Security (HTST) header
rspadd Strict-Transport-Security:\ max-age=15768000
acl host_working hdr_beg(host) -i working.
use_backend Working if host_working
default_backend BrokenOnMac
backend Working
balance roundrobin
server working_1 1.2.3.4:8456 check
backend BrokenOnMac
balance roundrobin
server broken_1 2.3.4.5:8456 check
So if you go to https://working.blahblah.blah it works forever, but the backend for https://broken.blahblah.blah locks up and stops responding after 5 minutes (including direct curl requests bypassing haproxy).
BUT if I run the EXACT same code on a different port, it responds forever to any direct curl request.
The "production" servers that are working are on various OSes like Centos. On my Mac Pro, I run the tests. The test code works on the Mac on a port NOT front-ended by haproxy. The same test code hangs up after 5 minutes on the Mac when it has haproxy in front.
So the precise configuration that fails is:
Mac Pro + any node express app + frontended by haproxy.
If I change anything, like run the code on Centos or make sure there is no haproxy, then the code works perfectly.
So given it only stopped working recently, then is it the latest patch for OSX Monterey (12.6) maybe somehow interfering with the app socket when it gets a certain condition from haproxy? Seems highly unlikely, but the most logical explanation I can come up with.

Socket Io limiting only 6 connection in Node js

So i came across a problem.I am trying to send {id} to my rest API (node js) and in response, I get data on the socket.
Problem:
For first 5-6 time it works perfectly fine and display Id and send data back to socket.But after 6 time it does not get ID.
I tried this https://github.com/socketio/socket.io/issues/1145
and https://github.com/socketio/socket.io/issues/1145 but didn't solve the problem.
On re compiling the server it shows previous {ids} which i enter after 6 time.it like after 5-6 time it is storing id in some form of cache.
Here is my API route.
//this route only get {id} 5-6 times .After 5-6 times it does not display receing {id}.
const express = require("express");
var closeFlag = false;
const PORT = process.env.SERVER_PORT; //|| 3000;
const app = express();
var count = 1;
http = require('http');
http.globalAgent.maxSockets = 100;
http.Agent.maxSockets = 100;
const serverTCP = http.createServer(app)
// const tcpsock = require("socket.io")(serverTCP)
const tcpsock = require('socket.io')(serverTCP, {
cors: {
origin: '*',
}
, perMessageDeflate: false
});
app.post("/getchanneldata", (req, res) => {
console.log("count : "+count)
count++;// for debugging purpose
closeFlag = false;
var message = (req.body.val).toString()
console.log("message : "+message);
chanId = message;
client = dgram.createSocket({ type: 'udp4', reuseAddr: true });
client.on('listening', () => {
const address = client.address();
});
client.on('message', function (message1, remote) {
var arr = message1.toString().split(',');
}
});
client.send(message, 0, message.length, UDP_PORT, UDP_HOST, function (err, bytes) {
if (err) throw err;
console.log(message);
console.log('UDP client message sent to ' + UDP_HOST + ':' + UDP_PORT);
// message="";
});
client.on('disconnect', (msg) => {
client.Diconnected()
client.log(client.client)
})
}
);
There are multiple issues here.
In your app.post() handler, you don't send any response to the incoming http request. That means that when the browser (or any client) sends a POST to your server, the client sits there waiting for a response, but that response never comes.
Meanwhile, the browser has a limit for how many requests it will send simultaneously to the same host (I think Chrome's limit is coincidentally 6). Once you hit that limit, the browser queues the request and and waits for one of the previous connections to return its response before sending another one. Eventually (after a long time), those connections will time out, but that takes awhile.
So, the first thing to fix is to send a response in your app.post() handler. Even if you just do res.send("ok");. That will allow the 7th and 8th and so on requests to be immediately sent to your server. Every incoming http request should have a response sent back to it, even if you have nothing to send, just do a res.end(). Otherwise, the http connection is left hanging, consuming resources and waiting to eventually time out.
On a separate note, your app.post() handler contains this:
client = dgram.createSocket({ type: 'udp4', reuseAddr: true });
This has a couple issues. First, you never declare the variable client so it becomes an implicit global (which is really bad in a server). That means successive calls to the app.post() handler will overwrite that variable.
Second, it is not clear from the included code when, if ever, you close that udp4 socket. It does not appear that the server itself ever closes it.
Third, you're recreating the same UDP socket on every single POST to /getchanneldata. Is that really the right design? If your server receives 20 of these requests, it will open up 20 separate UDP connections.

Connecting to Websocket in OpenShift Online Next Gen Starter

I'm in the process of trying to get an application which I'd built on the old OpenShift Online 2 free service up and running on the new OpenShift Online 3 Starter, and I'm having a bit of trouble.
The application uses websocket, and in the old system all that was required was for the client to connect to my server on port 8443 (which was automatically routed to my server). That doesn't seem to work in the new setup however - the connection just times out - and I haven't been able to find any documentation about using websocket in the new system.
My first thought was that I needed an additional rout, but 8080 is the only port option available for routing as far as I can see.
The app lives here, and the connection is made on line 21 of this script with the line:
this.socket = new WebSocket( 'wss://' + this.server + ':' + this.port, 'tabletop-protocol' );
Which becomes, in practice:
this.socket = new WebSocket( 'wss://production-instanttabletop.7e14.starter-us-west-2.openshiftapps.com:8443/', 'tabletop-protocol' );
On the back end, the server setup is unchanged from what I had on OpenShift 2, aside from updating the IP and port lookup from env as needed, and adding logging to help diagnose the issues I've been having.
For reference, here's the node.js server code (with the logic trimmed out):
var http = require( "http" );
var ws = require( "websocket" ).server;
// Trimmed some others used by the logic...
var ip = process.env.IP || process.env.OPENSHIFT_NODEJS_IP || '0.0.0.0';
var port = process.env.PORT || process.env.OPENSHIFT_NODEJS_PORT || 8080;
/* FILE SERVER */
// Create a static file server for the client page
var pageHost = http.createServer( function( request, response ){
// Simple file server that seems to be working, if a bit slowly
// ...
} ).listen( port, ip );
/* WEBSOCKET */
// Create a websocket server for ongoing communications
var wsConnections = [];
wsServer = new ws( { httpServer: pageHost } );
// Start listening for events on the server
wsServer.on( 'request', function( request ){
// Server logic for the app, but nothing in here ever gets hit
// ...
} );
In another question it was suggested that nearly anything - including this -
could be related to the to the ongoing general issues with US West 2, but other related problems I was experiencing seem to have cleared, and that issue has been posted for a week with no update, so I figured I'd dig deeper into this on the assumption that it's something I'm doing wrong instead of them.
Anyone know more about this and what I need to do to make it work?

NodeJs application behind Amazon ELB throws 502

We have a node application running behind Amazon Elastic Load Balancer (ELB), which randomly throws 502 errors when there are multiple concurrent requests and when each request takes time to process. Initially, we tried to increase the idle timeout of ELB to 5 minutes, but still we were getting 502 responses.
When we contacted amazon team, they said this was happening because the back-end is closing the connection with ELB after 5s.
ELB will send HTTP-502 back to your clients for the following reasons:
The load balancer received a TCP RST from the target when attempting to establish a connection.
The target closed the connection with a TCP RST or a TCP FIN while the load balancer had an outstanding request to the target.
The target response is malformed or contains HTTP headers that are not valid.
A new target group was used but no targets have passed an initial health check yet. A target must pass one health check to be considered healthy.
We tried to set our application's keep-alive/timeouts greater than ELB idle timeout (5 min), so the ELB can be responsible for opening and closing the connections. But still, we are facing 502 errors.
js:
var http = require( 'http' );
var express = require( 'express' );
var url = require('url');
var timeout = require('connect-timeout')
const app = express();
app.get( '/health', (req, res, next) => {
res.send( "healthy" );
});
app.get( '/api/test', (req, res, next) => {
var query = url.parse( req.url, true ).query;
var wait = query.wait ? parseInt(query.wait) : 1;
setTimeout(function() {
res.send( "Hello!" );
}, wait );
});
var server = http.createServer(app);
server.setTimeout(10*60*1000); // 10 * 60 seconds * 1000 msecs
server.listen(80, function () {
console.log('**** STARTING SERVER ****');
});
Try setting server.keepAliveTimeout to something other than the default 5s. See: https://nodejs.org/api/http.html#http_server_keepalivetimeout. Per AWS docs, you'd want this to be greater than the load balancer's idle timeout.
Note: this was added in Node v8.0.0
Also, if you're still on the Classic ELB, consider moving to the new Application Load Balancer as based on current experience this seems to have improved things for us a lot. You'll also save a few bucks if you have a lot of separate ELBs for each service. The downside could be that there's 1 point of failure for all your services. But in AWS we trust :)
In my case, I needed upgrade nodejs version:
https://github.com/nodejs/node/issues/27363
After that the problem was fixed.
Change your server.listen() to this:
const port = process.env.PORT || 3000;
const server = app.listen(port, function() {
console.log("Server running at http://127.0.0.1:" + port + "/");
});
You can read more about this here.

Change port without losing data

I'm building a settings manager for my http server. I want to be able to change settings without having to kill the whole process. One of the settings I would like to be able to change is change the port number, and I've come up with a variety of solutions:
Kill the process and restart it
Call server.close() and then do the first approach
Call server.close() and initialize a new server in the same process
The problem is, I'm not sure what the repercussions of each approach is. I know that the first will work, but I'd really like to accomplish these things:
Respond to existing requests without accepting new ones
Maintain data in memory on the new server
Lose as little uptime as possible
Is there any way to get everything I want? The API for server.close() gives me hope:
server.close(): Stops the server from accepting new connections.
My server will only be accessible by clients I create and by a very limited number of clients connecting through a browser, so I will be able to notify them of a port change. I understand that changing ports is generally a bad idea, but I want to allow for the edge-case where it is convenient or possibly necessary.
P.S. I'm using connect if that changes anything.
P.P.S. Relatively unrelated, but what would change if I were to use UNIX server sockets or change the host name? This might be a more relevant use-case.
P.P.P.S. This code illustrates the problem of using server.close(). None of the previous servers are killed, but more are created with access to the same resources...
var http = require("http");
var server = false,
curPort = 8888;
function OnRequest(req,res){
res.end("You are on port " + curPort);
CreateServer(curPort + 1);
}
function CreateServer(port){
if(server){
server.close();
server = false;
}
curPort = port;
server = http.createServer(OnRequest);
server.listen(curPort);
}
CreateServer(curPort);
Resources:
http://nodejs.org/docs/v0.4.4/api/http.html#server.close
I tested the close() function. It seems to do absolute nothing. The server still accepts connections on his port. restarting the process was the only way for me.
I used the following code:
var http = require("http");
var server = false;
function OnRequest(req,res){
res.end("server now listens on port "+8889);
CreateServer(8889);
}
function CreateServer(port){
if(server){
server.close();
server = false;
}
server = http.createServer(OnRequest);
server.listen(port);
}
CreateServer(8888);
I was about to file an issue on the node github page when I decided to test my code thoroughly to see if it really is a bug (I hate filing bug reports when it's user error). I realized that the problem only manifests itself in the browser, because apparently browsers do some weird kind of HTTP request keep alive thing where it can still access dead ports because there's still a connection with the server.
What I've learned is this:
Browser caches keep ports alive unless the process on the server is killed
Utilities that do not keep caches by default (curl, wget, etc) work as expected
HTTP requests in node also don't keep the same type of cache that browsers do
For example, I used this code to prove that node http clients don't have access to old ports:
Client-side code:
var http = require('http'),
client,
request;
function createClient (port) {
client = http.createClient(port, 'localhost');
request = client.request('GET', '/create');
request.end();
request.on('response', function (response) {
response.on('end', function () {
console.log("Request ended on port " + port);
setTimeout(function () {
createClient(port);
}, 5000);
});
});
}
createClient(8888);
And server-side code:
var http = require("http");
var server,
curPort = 8888;
function CreateServer(port){
if(server){
server.close();
server = undefined;
}
curPort = port;
server = http.createServer(function (req, res) {
res.end("You are on port " + curPort);
if (req.url === "/create") {
CreateServer(curPort);
}
});
server.listen(curPort);
console.log("Server listening on port " + curPort);
}
CreateServer(curPort);
Thanks everyone for the responses.
What about using cluster?
http://learnboost.github.com/cluster/docs/reload.html
It looks interesting!

Resources