Does keep alive only matter with outbound requests? - node.js

If I have a client that may be making a request to an http Google Cloud Function multiple times in a relatively short amount of time how can I use keep-alive? Is having the client send the connection keep-alive header enough?
I saw this on the Google docs:
https://cloud.google.com/functions/docs/bestpractices/networking
const http = require('http');
const agent = new http.Agent({keepAlive: true});
/**
* HTTP Cloud Function that caches an HTTP agent to pool HTTP connections.
*
* #param {Object} req Cloud Function request context.
* #param {Object} res Cloud Function response context.
*/
exports.connectionPooling = (req, res) => {
req = http.request(
{
host: '',
port: 80,
path: '',
method: 'GET',
agent: agent,
},
resInner => {
let rawData = '';
resInner.setEncoding('utf8');
resInner.on('data', chunk => {
rawData += chunk;
});
resInner.on('end', () => {
res.status(200).send(`Data: ${rawData}`);
});
}
);
req.on('error', e => {
res.status(500).send(`Error: ${e.message}`);
});
req.end();
};
But, that would only apply to making outbound requests from the cloud function right?
There was also something about the global (instance-wide) scope here:
https://cloud.google.com/functions/docs/bestpractices/tips
Is there anything I need to do to reuse connections on requests sent from the end user?

When you define agent at the global scope in your function, that is only retained for as long as any given server instance where that is running. So, while your connections may keep alive in that one instance, it will not share any connections with other instances that are allocated when the load on your function increases. You don't have much direct control over when Cloud Functions will spin up a new instance to handle new load, or when it will deallocate that instance. You just have to accept that they will come and go over time, along with any HTTP connections that are kept alive by global scope objects.

Related

Saving req to global variable in nodejs server

What are the consequences of making request global (or singleton), so that it is accessible all over the server and does not have to be passed in each function call? For example:
index.js:
const http = require('http');
const { saveReq } = require('./shared');
const {
doSomethingWithReqPassingItAsParameter,
doSomethingWithReqPassingItAsGlobal
} = require('./lib');
const requestListener = function (req, res) {
// approach 1
doSomethingWithReqPassingItAsParameter(req);
// approach 2
saveReq(req);
doSomethingWithReqPassingItAsGlobal();
res.writeHead(200);
res.end('Hello, World!');
}
const server = http.createServer(requestListener);
server.listen(8080);
lib.js:
const { loadReq } = require('./shared');
const doSomethingWithReqPassingItAsParameter = (req) => {
console.log('req as parameter', req.url);
};
const doSomethingWithReqPassingItAsGlobal = () => {
console.log('req as global', loadReq().url);
};
module.exports = {
doSomethingWithReqPassingItAsParameter,
doSomethingWithReqPassingItAsGlobal,
};
shared.js
var request;
const saveReq = (r) => request = r;
const loadReq = () => request;
module.exports = {
saveReq,
loadReq,
}
This is very convenient for large projects with many levels of function calls, but how parallel requests will be handled? I know that nodejs is single-threaded, does it mean than each http request will be run from end to finish separately or they can overlap, thus using a global request object would make a mess?
The consequences are that your server will only work for one request at a time and as soon as you have more than one request in flight at the same time, data will be mixed up between requests dealing to bugs, crashes, security issues and incorrect results.
Simply put, you cannot program a server that way. Pass the req object or data from it to any functions that need it. That keeps the appropriate req object associated with the right execution to avoid all the problems of trying to store a req object in some sort of global location where multiple requests in flight at the same time will/can conflict.
There is a relatively new thing in nodejs called "async local storage" that could perhaps be used for this. You can read a little about it here, though it's my personal opinion that it's still better to pass your request data to the functions that want to use it rather than the async local storage for this.

Google Cloud Storage NodeJS getFilesStream Async

I have a Google Function that never returns; it just hits the timeout limit as a Google Function. It works fine locally within under 60 seconds. Not sure what the issue might be. Code is below:
/**
* Responds to any HTTP request.
*
* #param {!express:Request} req HTTP request context.
* #param {!express:Response} res HTTP response context.
*/
const {Storage} = require('#google-cloud/storage');
exports.main = async (req, res) => {
const storage = new Storage({projectId: 'our-project'});
const store = storage.bucket('our-bucket');
const incomplete = {
LT04: [],
LT05: [],
LE07: [],
LC08: []
};
store.getFilesStream({prefix : 'prefixToMatch', autoPaginate : true })
.on('error', (err) => {
return console.error(err.toString())
})
.on('data', (file) => {
// Find small/bad files
if (file.metadata.size === 162) {
const split = file.name.split('/');
const prefix = split[2].substr(0, 4);
incomplete[prefix].push(file.name);
}
})
.on('end', () => {
return JSON.stringify(incomplete, false, ' ');
});
};
Your code it seems ok. But you need to take into account some additional details about this.
Does your Cloud function's memory is enough for this? I think that you could increase the memory allocated of your CF.
Are you sure that this is due to a timeout issue? If you have not seen the logs you can do it going to the Error reporting section.
In case that you have already confirm this, another option could be to change the timeout duration.
I think the issue was that I needed to send a "res.send" instead a Promise.resolve. As well, I needed to remove the async before the function.
Thanks for the quick response with guidelines, error was easier than that apparently.

Making an HTTPs request on AWS ApiGatewayV2 websocket connection to Respond, or Delete it

UPDATE
This issue is partially resolved, the problem now lies in authenticating the ApiGateway request. I am unsure of how to acquire the necessary tokens to send with the request so that it is valid, because this is a [serverless-framework] service so I can't use the AWS Console to copy paste the tokens into the request's json data. Moreover, I wouldn't know what json key they'd have to be under anyways. So I guess this question has changed considerably in scope.
I need to respond/delete an active websocket connection established through AWS ApiGatewayV2, in a Lambda. How do I use node js to send a POST request that ApiGateway can understand?
I saw on the websocket support announcement video that you could issue an HTTP POST request to respond to a websocket, and DELETE request to disconnect a websocket. Full table from the video transcribed here:
Connection URL
https://abcdef.execute-api.us-west-1.amazonaws.com/env/#connections/connectionId
Operation Action
POST Sends a message from the Server to connected WS Client
GET Gets the latest connection status of the connected WS Client
DELETE Disconnect the connected client from the WS connection
(this is not documented anywhere else, AFAIK)
Seeing as the AWS SDK does not provide a deleteConnection method on ApiGatewayManagementApi, I need to be able to issue requests directly to the ApiGateway anyways.
const connect = async (event, context) => {
const connection_id = event.requestContext.connectionId;
const host = event.requestContext.domainName;
const path = '/' + event.requestContext.stage + '/#connections/';
const json = JSON.stringify({data: "hello world!"});
console.log("send to " + host + path + connection_id + ":\n" + json);
await new Promise((resolve, reject) => {
const options = {
host: host,
port: '443',
path: path + connection_id,
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(json)
}
};
const req = https.request(
options,
(res) => {
res.on('data', (data) => {
console.error(data.toString());
});
res.on('end', () => {
console.error("request finished");
resolve();
});
res.on('error', (error) => {
console.error(error, error.stack);
reject();
});
}
);
req.write(json);
req.end();
});
return success;
};
When I use wscat to test it out, this code results in the console.log showing up in CloudWatch:
send to ********.execute-api.us-east-2.amazonaws.com/dev/#connections/*************:
{
"data": "hello world!"
}
...
{
"message": "Missing Authentication Token"
}
...
request finished
And wscat says:
connected (press CTRL+C to quit)
>
But does not print hello world! or similar.
Edit
I was missing
res.on('data', (data) => {
console.error(data.toString());
});
in the response handler, which was breaking things. This still doesnt work, though.
You're likely missing two things here.
You need to make an IAM signed request to the API Gateway per the documentation located here: Use #connections Commands in Your Backend Service
You'll need to give this lambda permission to invoke the API Gateway per the documentation here: Use IAM Authorization
I hope this helps!

NodeJS retrying POST/PUT/DELETE API, if the response time takes more than nodeJS default time

NodeJS is retrying the backend API calls if it wont get any reponse within its default timeout(2mins). This is causing an Issue for POST/PUT/DELETE API calls, which are taking more reponse time(> 2mins). Request is going to backend multiple times from Nodejs, for one request from user.
I dont want to Increase the default timeout, as my response can vary everytime.
Please let me know if there is any configuration in nodeJs/expressJS, so that i can stop the nodeJS retry totally or customise configuration for only POST/PUT/DELETE API.
Example of how i send my request:
let express = require('express');
let router = express.Router();
let async = require('async');
let expressListRoutes = require('express-list-routes');
async.parallel([
function () {
//url: resource/add/
router.post('/add', function (req, res) {
let uiModel = req.body;
let outputJson = Parser.parse(uiModel, 'CREATE');
let requestPromiseModel = {
uri: `${RESOURCES}/${uiModel.resourcePluginId}`,
method: HttpMethod.POST,
json: true,
body: outputJson,
headers: {
'Accept': MIMETypes.APPLICATION_JSON,
'Authorization': MIMETypes.TOKEN_TYPE + req.token
},
resolveWithFullResponse: true
};
rp(requestPromiseModel).then(function (results) {
res.send(results);
}).catch(function (err) {
log.error(`Error: Create Resource Action: ${err}`);
res.send(err.response);
});
});
}
]);
expressListRoutes({prefix: '/resource'}, "API:", router);
module.exports = router;
Thanks for the help In Advance :)
The usage of async.parallel in your code is useless. You are running only one function with it so it doesn't make much sense to run one thing in parallel. Also that function doesn't call the callback provided by async.parallel as its first argument so async.parallel thinks it never finishes. And to make things worse all you do in that function is call router.post() which could be called directly as:
router.post(
// ...
);
instead of:
async.parallel([
function () {
router.post(
// ...
);
}
]);
as you do know.
The only thing that it does is that it can potentially call the expressListRoutes({prefix: '/resource'}, "API:", router); before the router.post() gets called.
Also you are using req.body without using body-parser.
And finally, here is a way to define the timeout for request and request-promise - see the docs:
https://github.com/request/request#requestoptions-callback
timeout - Integer containing the number of milliseconds to wait for a server to send response headers (and start the response body) before aborting the request. Note that if the underlying TCP connection cannot be established, the OS-wide TCP connection timeout will overrule the timeout option (the default in Linux can be anywhere from 20-120 seconds).
Make sure that your OS timeouts are not taking precedence here.

Long running Node.js service to continuously make http requests

I'm beginning work on my first Node.js application and running into memory leak issues that i cannot pin down. I want the app to act as service in a way that continually runs and polls and endpoint. I think I may be missing something here. The idea of the project is to have a node application continuously make http requests to an Arduino board i have connected to my network with a web server on it. The networked board responds to the requests with some JSON or XML representing the state of the sensors attached to it. The idea of the node app is to log then emit sensor changes that will eventually be consumed by another electron project.
The node app is currently broken into a couple modules:
proxy: make the http calls to the different endpoints on arduino web server:
var http = require('http'),
KeepAliveAgent = require('keep-alive-agent');
var controllerRequestOpts = {};
function send(path, cb){
var response = '';
//build the request
var request = controllerRequestOpts;
request.path = path;
//make the call to the controller
http.get(request, function(res){
res.on('data', function(chunk){
response += chunk;
});
res.on('end', function(){
cb(null, response);
});
})
.on('error',function(e){
cb(e, null);
});
}
module.exports = function(controllerOptions){
controllerOptions.port = controllerOptions.port || 2222;
controllerRequestOpts = controllerOptions;
controllerRequestOpts.agent = new KeepAliveAgent();
return{
//JSON
queryJson: function(cb){
send('/json', cb);
},
//XML
queryXml: function(cb){
send('/xml', cb);
}
//Additional endpoints
}
}
runner: Loop forever with the interval provided making the proxy calls ot the arduino
var proxy = require('proxy');
var Watcher = require('./watcher');
var timer;
var toUpdate;
function initProxy(controllerParams){
proxy = proxy(controllerParams);
Watcher = new Watcher();
}
function startListening(startOptions){
var query;
//determine the query and callback functions based off configuration
query = startOptions.queryType === 'json'
? proxy.queryJson
: proxy.queryXml;
toUpdate = startOptions.queryType === 'json'
? Watcher.updateLatestJson
: Watcher.updateLatestXml;
//Start running and making requests every 15 seconds
timer = setInterval(function(){
query(handleResponse);
},startOptions.queryInterval);
}
function handleResponse(err, resp){
if(err){
console.log('ERROR: ' + err);
}
else{
toUpdate.call(Watcher, resp);
}
}
function stopListening(){
clearInterval(timer);
process.exit();
}
var runner = {
connect: function(controllerParams){
initProxy(controllerParams);
},
start: function(startOptions){
startListening(startOptions);
return Watcher;
},
stop: function(){
stopListening();
}
};
module.exports = runner;
I have a "Watcher" module which is just a constructor function that emits the changes back to the calling app which looks like:
var runner = require('./index');
var controllerSettings = {
hostname: '192.168.1.150',
port:2222
}
var startOptions = {
queryType: 'json',
queryInterval: 15000
}
runner.connect(controllerSettings);
var watcher = runner.start(startOptions);
watcher.on('P1Changed', printParams)
Everything is working as expected but as the app runs over time the memory usage for node process constantly increases. I'm wondering if i'm using either the http module incorrectly or if the runner shouldn't be doing a setInterval perhaps. Is there a standard way to run this kind of app as a 'service' and not so much as a 'server'
Continuously send multiple HTTP requests will cause node to create huge TLSWrap objects that the GC will not be able to clear for several minutes.
If you wish to send data to the same host(s) over and over again, you need to open a TCP connection (stream) rather than use HTTP requests that have huge overhead.

Resources