HAProxy locks up simple express server in 5 minutes? - node.js

I have a really strange one I just cannot work out.
I have been building node/express apps for years now and usually run a dev server just at home for quick debugging/testing. I frontend it with a haproxy instance to make it "production like" and to perform the ssl part.
In any case, just recently ALL servers (different projects) started mis-behaving and stopped responding to requests around exactly 5 minutes after being started. That is ALL the 3 or 4 I run sometimes on this machine, yet the exact same instance of haproxy is front-ending the production version of the code and that has no issues, it's still rock solid. And, infuriatingly, I wrote a really basic express server example and if it's front ended by the same haproxy it also locks up, but if I switch ports, it runs fine forever as expected!
So in summary:
1x haproxy instance frontending a bunch of prod/dev instances with the same rule sets, all with ssl.
2x production instances working fine
4x dev instances(and a simple test program) ALL locking up after around 5 min when behind haproxy
and if I run the simple test program on a different port so it's local network only, it works perfectly.
I do also have uptime robot liveness checks hitting the haproxy as well to monitor the instances.
So this example:
const express = require('express')
const request = require('request');
const app = express()
const port = 1234
var counter = 0;
var received = 0;
process.on('warning', e => console.warn(e.stack));
const started = new Date();
if (process.pid) {
console.log('Starting as pid ' + process.pid);
}
app.get('/', (req, res) => {
res.send('Hello World!').end();
})
app.get('/livenessCheck', (req, res) => {
res.send('ok').end();
})
app.use((req, res, next) => {
console.log('unknown', { host: req.headers.host, url: req.url });
res.send('ok').end();
})
const server = app.listen(port, () => {
console.log(`Example app listening on port ${port}`)
})
app.keepAliveTimeout = (5 * 1000) + 1000;
app.headersTimeout = (6 * 1000) + 2000;
setInterval(() => {
server.getConnections(function(error, count) {
console.log('connections', count);
});
//console.log('tick', new Date())
}, 500);
setInterval(() => {
console.log('request', new Date())
request('http://localhost:' + port, function (error, response, body) {
if (error) {
const ended = new Date();
console.error('request error:', ended, error); // Print the error if one occurred
counter = counter - 1;
if (counter < 0) {
console.error('started ', started); // Print the error if one occurred
const diff = Math.floor((ended - started) / 1000)
const min = Math.floor(diff / 60);
console.error('elapsed ', min, 'min ', diff - min*60, 'sec');
exit;
}
return;
}
received = received + 1;
console.log('request ', received, 'statusCode:', new Date(), response && response.statusCode); // Print the response status code if a response was received
//console.log('body:', body); // Print the HTML for the Google homepage.
});
}, 1000);
works perfectly and runs forever on a non-haproxy port, but only runs for approx 5 min on a port behind haproxy, it usually gets to 277 request responses each time before hanging up and timing out.
The "exit()" function is just a forced crash for testing.
I've tried adjusting some timeouts on haproxy, but to no avail. And each one has no impact on the production instances that just keep working fine.
I'm running these dev versions on a mac pro 2013 with latest OS. and tried various versions of node.
Any thoughts what it could be or how to debug further?
oh, and they all server web sockets as well as http requests.
Here is one example of a haproxy config that I am trying (relevant sections):
global
log 127.0.0.1 local2
...
nbproc 1
daemon
defaults
mode http
log global
option httplog
option dontlognull
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 4s
timeout server 5s
timeout http-keep-alive 4s
timeout check 4s
timeout tunnel 1h
maxconn 3000
frontend wwws
bind *:443 ssl crt /etc/haproxy/certs/ no-sslv3
option http-server-close
option forwardfor
reqadd X-Forwarded-Proto:\ https
reqadd X-Forwarded-Port:\ 443
http-request set-header X-Client-IP %[src]
# set HTTP Strict Transport Security (HTST) header
rspadd Strict-Transport-Security:\ max-age=15768000
acl host_working hdr_beg(host) -i working.
use_backend Working if host_working
default_backend BrokenOnMac
backend Working
balance roundrobin
server working_1 1.2.3.4:8456 check
backend BrokenOnMac
balance roundrobin
server broken_1 2.3.4.5:8456 check
So if you go to https://working.blahblah.blah it works forever, but the backend for https://broken.blahblah.blah locks up and stops responding after 5 minutes (including direct curl requests bypassing haproxy).
BUT if I run the EXACT same code on a different port, it responds forever to any direct curl request.
The "production" servers that are working are on various OSes like Centos. On my Mac Pro, I run the tests. The test code works on the Mac on a port NOT front-ended by haproxy. The same test code hangs up after 5 minutes on the Mac when it has haproxy in front.
So the precise configuration that fails is:
Mac Pro + any node express app + frontended by haproxy.
If I change anything, like run the code on Centos or make sure there is no haproxy, then the code works perfectly.
So given it only stopped working recently, then is it the latest patch for OSX Monterey (12.6) maybe somehow interfering with the app socket when it gets a certain condition from haproxy? Seems highly unlikely, but the most logical explanation I can come up with.

Related

(PERL-> NODEJS) 500: Server closed connection without sending any data back

My Perl script talks to a Node.js server and sends the commands that the Node.js server needs to execute. While some commands take less time, others take a lot longer. While the command is executing on the server, there is silence on the connection. After a while, I receive the error: 500: Server closed connection without sending any data back
During this error, the command is still executing on the server and the desired results are obtained (if you check the server logs). My problem is that I don't want the connection to reset as there are other follow on commands that need to run after these long running commands. Some commands might take 20 mins
Perl Side Code:
my $ua = LWP::UserAgent->new;
$ua->timeout(12000);
my $uri = URI->new('http://server');
my $json = JSON->new;
my $data_to_json = {DATA};
my $json_content = $json->encode($data_to_json);
# set custom HTTP request header fields
my $req = HTTP::Request->new(POST => $uri);
my $resp = $ua->request($req);
my $message = $resp->decoded_content;
NodeJS Code
var express = require('express');
var http = require('http');
var app = express();
app.use(express.json());
var port = process.env.PORT || 8080;
app.get('<API URL>', function (req, res) {
<get all the passed arguments>
<send output to the console>
});
app.post('<API URL>', function(req, res) {
.
.
.
req.connection.setTimeout(0);
var exec = require('child_process').exec;
var child = exec(command);
}
// start the server
const server = app.listen(port);
server.timeout = 12000;
console.log('Server started! Listening on port' + port);
I have tried to add the timeout for the server using server.timeout and req.connection.setTimeout(0);.
How do I make sure that the connection is not broken?
It's generally a bad idea to make a web worker carry out long running tasks. It ties up the worker and you end up having problems like this.
Instead, have the web worker add a job to some sort of job queue (Perl's Minion is really nice). The queue operates independently of the web server. Clients can poll the server to check on the status of a job and get the output or artifacts when it's complete.
Another advantage of a proper job queue is that you can restart jobs if they fail. The queue knows the job was there. As you've seen, a broken web connection means that it fails and you've probably lost track of those inputs.

Websocket closing after 60 seconds of being idle while connection node server using AWS ELB

I have one node server running on EC2 instance and client is also running on same EC2 instance, Client open websocket connection to communicate node server, it is working in QA and Dev AWS environment but same web connection is getting close after 60 seconds of being idle in prod environment ,I am running client and node server behind ELB in aws environment.
Client Code:
ws = new WebSocket('ws://localhost:8443');
ws.onclose = function () {
console.log("Websocket connection has been closed.");
clientObj.emit('LogoffSuccess', 'LogoffSuccessfully');
};
ws.onerror=function(event)
{
console.log(event.data);
};
ws.addEventListener('open', function (event) {
console.log('Websocket connection has been opened');
ws.send(JSON.stringify(loginCreds));
});
Node server Code below:
const wss = new WebSocket.Server({ server: app });
const clients = {};
const idMap = {};
wss.on(`connection`, ws => {
const headers = ws.upgradeReq.headers;
const host = headers.host;
const key = ws.upgradeReq.headers[`sec-websocket-key`];
ctiServer.on(`responseMessage`, message => {
clients[message.AgentId].send(JSON.stringify(message));
});
ws.on(`message`, message => {
log.info(`Message received. Host: ${host}, Msg: ${message}`);
if (JSON.parse(message).EventName === `Login`) {
clients[JSON.parse(message).AgentId] = ws;
idMap[key] = JSON.parse(message).AgentId;
}
ctiServer.processIncomingRequest(message);
});
ws.on(`close`, () => {
log.info(`Connection closed. Host: ${host}`);
const message = {
EventName: `Logoff`,
AgentId: idMap[key],
EventData: {}
};
});
});
By default, Elastic Load Balancing sets the idle timeout value to 60 seconds. Therefore, if the target doesn't send some data at least every 60 seconds while the request is in flight, the load balancer can close the front-end connection. To ensure that lengthy operations such as file uploads have time to complete, send at least 1 byte of data before each idle timeout period elapses, and increase the length of the idle timeout period as needed.
https://docs.aws.amazon.com/elasticloadbalancing/latest/application/application-load-balancers.html#connection-idle-timeout
Note that your interests are best served by periodically sending traffic to keep the connection alive. You can set the idle timeout to up to 4000 seconds in an Application Load Balancer, but you will find that stateful intermediate network infrastructure (firewalls, NAT devices) tends to reset connections before they are actually idle for so long.
PING!
Write a ping implementation (or a nil message implementation)...
...otherwise the AWS proxy (probably nginx) will shut down the connection after a period of inactivity (60 seconds in your case, but it's a bit different on different systems).
Do you use NGINX? Their requests timeout after 60 seconds.
You can extended the timeout in the NGINX configuration file for your websockets specific location.
In your case it could look something like this when extending the timeout to an hour:
...
location / {
...
proxy_pass http://127.0.0.1:8443;
...
proxy_read_timeout 3600;
proxy_send_timeout 3600;
...
}
Also see this website for more information:
https://ubiq.co/tech-blog/increase-request-timeout-nginx/
https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_read_timeout
https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_send_timeout

NodeJs application behind Amazon ELB throws 502

We have a node application running behind Amazon Elastic Load Balancer (ELB), which randomly throws 502 errors when there are multiple concurrent requests and when each request takes time to process. Initially, we tried to increase the idle timeout of ELB to 5 minutes, but still we were getting 502 responses.
When we contacted amazon team, they said this was happening because the back-end is closing the connection with ELB after 5s.
ELB will send HTTP-502 back to your clients for the following reasons:
The load balancer received a TCP RST from the target when attempting to establish a connection.
The target closed the connection with a TCP RST or a TCP FIN while the load balancer had an outstanding request to the target.
The target response is malformed or contains HTTP headers that are not valid.
A new target group was used but no targets have passed an initial health check yet. A target must pass one health check to be considered healthy.
We tried to set our application's keep-alive/timeouts greater than ELB idle timeout (5 min), so the ELB can be responsible for opening and closing the connections. But still, we are facing 502 errors.
js:
var http = require( 'http' );
var express = require( 'express' );
var url = require('url');
var timeout = require('connect-timeout')
const app = express();
app.get( '/health', (req, res, next) => {
res.send( "healthy" );
});
app.get( '/api/test', (req, res, next) => {
var query = url.parse( req.url, true ).query;
var wait = query.wait ? parseInt(query.wait) : 1;
setTimeout(function() {
res.send( "Hello!" );
}, wait );
});
var server = http.createServer(app);
server.setTimeout(10*60*1000); // 10 * 60 seconds * 1000 msecs
server.listen(80, function () {
console.log('**** STARTING SERVER ****');
});
Try setting server.keepAliveTimeout to something other than the default 5s. See: https://nodejs.org/api/http.html#http_server_keepalivetimeout. Per AWS docs, you'd want this to be greater than the load balancer's idle timeout.
Note: this was added in Node v8.0.0
Also, if you're still on the Classic ELB, consider moving to the new Application Load Balancer as based on current experience this seems to have improved things for us a lot. You'll also save a few bucks if you have a lot of separate ELBs for each service. The downside could be that there's 1 point of failure for all your services. But in AWS we trust :)
In my case, I needed upgrade nodejs version:
https://github.com/nodejs/node/issues/27363
After that the problem was fixed.
Change your server.listen() to this:
const port = process.env.PORT || 3000;
const server = app.listen(port, function() {
console.log("Server running at http://127.0.0.1:" + port + "/");
});
You can read more about this here.

Non-http TCP connection on Cloudfoundry

I'm a nooby mobile developer trying to take advantage of cloudfoundry's service to run my server to handle some chats and character movements.
I'm using Noobhub to achieve this (TCP connection between server and client using Node.js and Corona SDK's TCP connection API)
So basically I'm trying a non-http TCP connection between Cloudfoundry(Node.js) and my machine(lua).
Link to Noobhub(There is a github repo with server AND client side implementation.
I am doing
Client
...
socket.connect("myappname.cloudfoundry.com", 45234)
...
(45234 is from server's process.env.VCAP_APP_PORT value I retrieved from console output I got through "vmc logs myappname" after running the application.)
Server
...
server.listen(process.env.VCAP_APP_PORT)
When I try to connect, it just times out.
On my local machine, doing
Client
...
socket.connect("localhost",8989)
Server
...
server.listen(8989)
works as expected. It's just on cloudfoundry that it doesn't work.
I tried a bunch of other ways of doing this such as setting the client's port connection to 80 and a bunch of others. I saw a few resources but none of them solved it.
I usually blow at asking questions so if you need more information, please ask me!
P.S.
Before you throw this link at me with an angry face D:< , here's a question that shows a similar problem that another person posted.
cannot connect to TCP server on CloudFoundry (localhost node.js works fine)
From here, I can see that this guy was trying to do a similar thing I was doing.
Does the selected answer mean that I MUST use host header (i.e. use http protocol) to connect? Does that also mean cloudfoundry will not support a "TRUE" TCP socket much like heroku or app fog?
Actually, process.env.VCAP_APP_PORT environment variable provides you the port, to which your HTTP traffic is redirected by the Cloud Foundry L7 router (nginx) based on the your apps route (e.g. nodejsapp.vcap.me:80 is redirected to the process.env.VCAP_APP_PORT port on the virtual machine), so you definitely should not use it for the TCP connection. This port should be used to listen HTTP traffic. That is why you example do work locally and do not work on Cloud Foundry.
The approach that worked for me is to listen to the port provided by CF with an HTTP server and then attach Websocket server (websocket.io in my example below) to it. I've created sample echo server that works both locally and in the CF. The content of my Node.js file named example.js is
var host = process.env.VCAP_APP_HOST || "localhost";
var port = process.env.VCAP_APP_PORT || 1245;
var webServerApp = require("http").createServer(webServerHandler);
var websocket = require("websocket.io");
var http = webServerApp.listen(port, host);
var webSocketServer = websocket.attach(http);
function webServerHandler (req, res) {
res.writeHead(200);
res.end("Node.js websockets.");
}
console.log("Web server running at " + host + ":" + port);
//Web Socket part
webSocketServer.on("connection", function (socket) {
console.log("Connection established.");
socket.send("Hi from webSocketServer on connect");
socket.on("message", function (message) {
console.log("Message to echo: " + message);
//Echo back
socket.send(message);
});
socket.on("error", function(error){
console.log("Error: " + error);
});
socket.on("close", function () { console.log("Connection closed."); });
});
The dependency lib websocket.io could be installed running npm install websocket.io command in the same directory. Also there is a manifest.yml file which describes CF deploy arguments:
---
applications:
- name: websocket
command: node example.js
memory: 128M
instances: 1
host: websocket
domain: vcap.me
path: .
So, running cf push from this directory deployed app to my local CFv2 instance (set up with the help of cf_nise_installer)
To test this echo websocket server, I used simple index.html file, which connects to server and sends messages (everything is logged into the console):
<!DOCTYPE html>
<head>
<script>
var socket = null;
var pingData = 1;
var prefix = "ws://";
function connect(){
socket = new WebSocket(prefix + document.getElementById("websocket_url").value);
socket.onopen = function() {
console.log("Connection established");
};
socket.onclose = function(event) {
if (event.wasClean) {
console.log("Connection closed clean");
} else {
console.log("Connection aborted (e.g. server process killed)");
}
console.log("Code: " + event.code + " reason: " + event.reason);
};
socket.onmessage = function(event) {
console.log("Data received: " + event.data);
};
socket.onerror = function(error) {
console.log("Error: " + error.message);
};
}
function ping(){
if( !socket || (socket.readyState != WebSocket.OPEN)){
console.log("Websocket connection not establihed");
return;
}
socket.send(pingData++);
}
</script>
</head>
<body>
ws://<input id="websocket_url">
<button onclick="connect()">connect</button>
<button onclick="ping()">ping</button>
</body>
</html>
Only thing to do left is to enter server address into the textbox of the Index page (websocket.vcap.me in my case), press Connect button and we have working Websocket connection over TCP which could be tested by sending Ping and receiving echo. That worked well in Chrome, however there were some issues with IE 10 and Firefox.
What about "TRUE" TCP socket, there is no exact info: according to the last paragraph here you cannot use any port except 80 and 443 (HTTP and HTTPS) to communicate with your app from outside of Cloud Foundry, which makes me think TCP socket cannot be implemented. However, according to this answer, you can actually use any other port... It seems that some deep investigation on this question is required...
"Cloud Foundry uses an L7 router (ngnix) between clients and apps. The router needs to parse HTTP before it can route requests to apps. This approach does not work for non-HTTP protocols like WebSockets. Folks running node.js are going to run into this issue but there are no easy fixes in the current architecture of Cloud Foundry."
- http://www.subbu.org/blog/2012/03/my-gripes-with-cloud-foundry
I decided to go with pubnub for all my messaging needs.

Change port without losing data

I'm building a settings manager for my http server. I want to be able to change settings without having to kill the whole process. One of the settings I would like to be able to change is change the port number, and I've come up with a variety of solutions:
Kill the process and restart it
Call server.close() and then do the first approach
Call server.close() and initialize a new server in the same process
The problem is, I'm not sure what the repercussions of each approach is. I know that the first will work, but I'd really like to accomplish these things:
Respond to existing requests without accepting new ones
Maintain data in memory on the new server
Lose as little uptime as possible
Is there any way to get everything I want? The API for server.close() gives me hope:
server.close(): Stops the server from accepting new connections.
My server will only be accessible by clients I create and by a very limited number of clients connecting through a browser, so I will be able to notify them of a port change. I understand that changing ports is generally a bad idea, but I want to allow for the edge-case where it is convenient or possibly necessary.
P.S. I'm using connect if that changes anything.
P.P.S. Relatively unrelated, but what would change if I were to use UNIX server sockets or change the host name? This might be a more relevant use-case.
P.P.P.S. This code illustrates the problem of using server.close(). None of the previous servers are killed, but more are created with access to the same resources...
var http = require("http");
var server = false,
curPort = 8888;
function OnRequest(req,res){
res.end("You are on port " + curPort);
CreateServer(curPort + 1);
}
function CreateServer(port){
if(server){
server.close();
server = false;
}
curPort = port;
server = http.createServer(OnRequest);
server.listen(curPort);
}
CreateServer(curPort);
Resources:
http://nodejs.org/docs/v0.4.4/api/http.html#server.close
I tested the close() function. It seems to do absolute nothing. The server still accepts connections on his port. restarting the process was the only way for me.
I used the following code:
var http = require("http");
var server = false;
function OnRequest(req,res){
res.end("server now listens on port "+8889);
CreateServer(8889);
}
function CreateServer(port){
if(server){
server.close();
server = false;
}
server = http.createServer(OnRequest);
server.listen(port);
}
CreateServer(8888);
I was about to file an issue on the node github page when I decided to test my code thoroughly to see if it really is a bug (I hate filing bug reports when it's user error). I realized that the problem only manifests itself in the browser, because apparently browsers do some weird kind of HTTP request keep alive thing where it can still access dead ports because there's still a connection with the server.
What I've learned is this:
Browser caches keep ports alive unless the process on the server is killed
Utilities that do not keep caches by default (curl, wget, etc) work as expected
HTTP requests in node also don't keep the same type of cache that browsers do
For example, I used this code to prove that node http clients don't have access to old ports:
Client-side code:
var http = require('http'),
client,
request;
function createClient (port) {
client = http.createClient(port, 'localhost');
request = client.request('GET', '/create');
request.end();
request.on('response', function (response) {
response.on('end', function () {
console.log("Request ended on port " + port);
setTimeout(function () {
createClient(port);
}, 5000);
});
});
}
createClient(8888);
And server-side code:
var http = require("http");
var server,
curPort = 8888;
function CreateServer(port){
if(server){
server.close();
server = undefined;
}
curPort = port;
server = http.createServer(function (req, res) {
res.end("You are on port " + curPort);
if (req.url === "/create") {
CreateServer(curPort);
}
});
server.listen(curPort);
console.log("Server listening on port " + curPort);
}
CreateServer(curPort);
Thanks everyone for the responses.
What about using cluster?
http://learnboost.github.com/cluster/docs/reload.html
It looks interesting!

Resources