AWS Lambda Container destroy event - node.js

When to release connections and cleanup resources in lambda. In normal Node JS application we do use the hook
process.on('exit', (code) => {
console.log(`About to exit with code: ${code}`);
});
However this doesn't work on AWS Lambda. Resulting the Mysql connection in sleep mode. We don't have enough resource for such active connections. None of the AWS documentation specify a way to achieve this.
How to receive stop event of AWS Lambda container ?

The short answer is that there is no such event to know when the container is stopped.
UPDATE: I've not used this library, but https://www.npmjs.com/package/serverless-mysql appears to try to solve just this problem.
PREVIOUS LONG ANSWER:
The medium answer: after speaking with someone at AWS about this I now believe you should scope database connections at the module level so they are reused as long as the container exists. When your container is destroyed the connection will be destroyed at that point.
Original answer:
This question touches on some rather complicated issues to consider with AWS Lambda functions, mainly because of the possibility of considering connection pooling or long-lived connections to your database. To start with, Lambda functions in Node.js execute as a single, exported Node.js function with this signature (as you probably know):
exports.handler = (event, context, callback) => {
// TODO implement
callback(null, 'Hello from Lambda');
};
The cleanest and simplest way to handle database connections is to create and destroy them with every single function call. In this scenario, a database connection would be created at the beginning of your function and destroyed before your final callback is called. Something like this:
const mysql = require('mysql');
exports.handler = (event, context, callback) => {
let connection = mysql.createConnection({
host : 'localhost',
user : 'me',
password : 'secret',
database : 'my_db'
});
connection.connect();
connection.query('SELECT 1 + 1 AS solution', (error, results, fields) => {
if (error) {
connection.end();
callback(error);
}
else {
let retval = results[0].solution;
connection.end();
console.log('The solution is: ', retval);
callback(null, retval);
}
});
};
NOTE: I haven't tested that code. I'm just providing an example for discussion.
I've also seen conversations like this one discussing the possibility of placing your connection outside the main function body:
const mysql = require('mysql');
let connection = mysql.createConnection({
host : 'localhost',
user : 'me',
password : 'secret',
database : 'my_db'
});
connection.connect();
exports.handler = (event, context, callback) => {
// NOTE: should check if the connection is open first here
connection.query('SELECT 1 + 1 AS solution', (error, results, fields) => {
if (error) {
callback(error);
}
else {
let retval = results[0].solution;
console.log('The solution is: ', retval);
callback(null, retval);
}
});
};
The theory here is this: because AWS Lambda will try to reuse an existing container after the first call to your function, the next function call will already have a database connection open. The example above should probably check the existence of an open connection before using it, but you get the idea.
The problem of course is that this leaves your connection open indefinitely. I'm not a fan of this approach but depending on your specific circumstances this might work. You could also introduce a connection pool into that scenario. But regardless, you have no event in this case to cleanly destroy a connection or pool. The container process hosting your function would itself be killed. So you'd have to rely on your database killing the connection from it's end at some point.
I could be wrong about some of those details, but I believe at a high level that is what you're looking at. Hope that helps!

Related

Why AWS Lambda execution time is long using pg-promise

I started using AWS Lambda to perform a very simple task which is executing an SQL query to retrieve records from an RDS postgres database and create SQS message base on the result.
Because Amazon is only providing aws-sdk module (using node 4.3 engine) by default and we need to execute this SQL query, we have to create a custom deployment package which includes pg-promise. Here is the code I'm using:
console.info('Loading the modules...');
var aws = require('aws-sdk');
var sqs = new aws.SQS();
var config = {
db: {
username: '[DB_USERNAME]',
password: '[DB_PASSWORD]',
host: '[DB_HOST]',
port: '[DB_PORT]',
database: '[DB_NAME]'
}
};
var pgp = require('pg-promise')({});
var cn = `postgres://${config.db.username}:${config.db.password}#${config.db.host}:${config.db.port}/${config.db.database}`;
if (!db) {
console.info('Connecting to the database...');
var db = pgp(cn);
} else {
console.info('Re-use database connection...');
}
console.log('loading the lambda function...');
exports.handler = function(event, context, callback) {
var now = new Date();
console.log('Current time: ' + now.toISOString());
// Select auction that need to updated
var query = [
'SELECT *',
'FROM "users"',
'WHERE "users"."registrationDate"<=${now}',
'AND "users"."status"=1',
].join(' ');
console.info('Executing SQL query: ' + query);
db.many(query, { status: 2, now: now.toISOString() }).then(function(data) {
var ids = [];
data.forEach(function(auction) {
ids.push(auction.id);
});
if (ids.length == 0) {
callback(null, 'No user to update');
} else {
var sqsMessage = {
MessageBody: JSON.stringify({ action: 'USERS_UPDATE', data: ids}), /* required */
QueueUrl: '[SQS_USER_QUEUE]', /* required */
};
console.log('Sending SQS Message...', sqsMessage);
sqs.sendMessage(sqsMessage, function(err, sqsResponse) {
console.info('SQS message sent!');
if (err) {
callback(err);
} else {
callback(null, ids.length + ' users were affected. SQS Message created:' + sqsResponse.MessageId);
}
});
}
}).catch(function(error) {
callback(error);
});
};
When testing my lambda function, if you look at the WatchLogs, the function itself took around 500ms to run but it says that it actually took 30502.48 ms (cf. screenshots).
So I'm guessing it's taking 30 seconds to unzip my 318KB package and start executing it? That for me is just a joke or am I missing something? I tried to upload the zip and also upload my package to S3 to check if it was faster but I still have the same latency.
I noticed that the Python version can natively perform SQL request without any custom packaging...
All our applications are written in node so I don't really want to move away from it, however I have a hard time to understand why Amazon is not providing basic npm modules for database interactions.
Any comments or help are welcome. At this point I'm not sure Lambda would be benefic for us if it takes 30 seconds to run a script that is triggered every minute...
Anyone facing the same problem?
UPDATE: This is how you need to close the connection as soon as you don't need it anymore (thanks again to Vitaly for his help):
exports.handler = function(event, context, callback) {
[...]
db.many(query, { status: 2, now: now.toISOString() }).then(function(data) {
pgp.end(); // <-- This is important to close the connection directly after the request
[...]
The execution time should be measured based on the length of operations being executed, as opposed to how long it takes for the application to exit.
There are many libraries out there that make use of a connection pool in one form or another. Those typically terminate after a configurable period of inactivity.
In case of pg-promise, which in turn uses node-postgres, such period of inactivity is determined by parameter poolIdleTimeout, which defaults to 30 seconds. With pg-promise you can access it via pgp.pg.defaults.poolIdleTimeout.
If you want your process to exit after the last query has been executed, you need to shut down the connection pool, by calling pgp.end(). See chapter Library de-initialization for details.
It is also shown in most of the code examples, as those need to exit right after finishing.

Best Practices in Serverless Framework

I am new beginner in serverless framwork.
When study Best Practices in Serverless.
here
I have a question about "Initialize external services outside of your Lambda code".
How to implement it?
For example: Below code in handler.js
const getOneUser = (event, callback) => {
let response = null;
// validate parameters
if (event.accountid && process.env.SERVERLESS_SURVEYTABLE) {
let docClient = new aws.DynamoDB.DocumentClient();
let params = {
TableName: process.env.SERVERLESS_USERTABLE,
Key: {
accountid: event.accountid,
}
};
docClient.get(params, function(err, data) {
if (err) {
// console.error("Unable to get an item with the request: ", JSON.stringify(params), " along with error: ", JSON.stringify(err));
return callback(getDynamoDBError(err), null);
} else {
if (data.Item) { // got response
// compose response
response = {
accountid: data.Item.accountid,
username: data.Item.username,
email: data.Item.email,
role: data.Item.role,
};
return callback(null, response);
} else {
// console.error("Unable to get an item with the request: ", JSON.stringify(params));
return callback(new Error("404 Not Found: Unable to get an item with the request: " + JSON.stringify(params)), null);
}
}
});
}
// incomplete parameters
else {
return callback(new Error("400 Bad Request: Missing parameters: " + JSON.stringify(event)), null);
}
};
The question is that how to initial DynamoDB outside of my Lambda code.
Update 2:
Is below code optimized?
Handler.js
let survey = require('./survey');
module.exports.handler = (event, context, callback) => {
return survey.getOneSurvey({
accountid: event.accountid,
surveyid: event.surveyid
}, callback);
};
survey.js
let docClient = new aws.DynamoDB.DocumentClient();
module.exports = (() => {
const getOneSurvey = (event, callback) {....
docClient.get(params, function(err, data)...
....
};
return{
getOneSurvey : getOneSurvey,
}
})();
Here's the quote in question:
Initialize external services outside of your Lambda code
When using services (like DynamoDB) make sure to initialize outside of your lambda code. Ex: module initializer (for Node), or to a static constructor (for Java). If you initiate a connection to DDB inside the Lambda function, that code will run on every invoke.
In other words, in the same file, but outside of -- before -- the actual handler code.
let docClient = new aws.DynamoDB...
...
const getOneUser = (event, callback) => {
....
docClient.get(params, ...
When the container starts, the code outside the handler runs. When subsequent function invocations reuse the same container, you save resources and time by not instantiating the external services again. Containers are often reused, but each container only handles one concurrent request at a time, and how often they are reused and for how long is outside your control... Unless you update the function, in which case any existing containers will no longer be reused, because they'd have the old version of the function.
Your code will work as written, but isn't optimized.
The caveat with this approach that arises in current generation Node.js Lambda functions (Node 4.x/6.x) is that some objects -- notably, those that create literal persistent connections to external services -- will prevent the event loop from becoming empty (a common example is a mysql database connection, which is holding a live TCP connection to the server; by contrast, a DynamoDB "connection" is actually connectionless, since it's transport protocol is HTTPS). In this case you need to either take a different approach or allow lambda to not wait for an empty event loop before freezing the container, by setting context.callbackWaitsForEmptyEventLoop to false before calling the callback... but only do this if needed and only if you fully understand what it means. Setting it by default because some guy on the Internet said it was a good idea will potentially bring you mysterious bugs, later.

Node Postgres Module not responding

I have an amazon beanstalk node app that uses the postgres amazon RDS. To interface node with postgres I use node postgres. Code looks like this:
var pg = require('pg'),
done,client;
function DataObject(config,success,error) {
var PG_CONNECT = "postgres://"+config.username+":"+config.password+"#"+
config.server+":"+config.port+"/"+config.database;
self=this;
pg.connect(PG_CONNECT, function(_error, client, done) {
if(_error){ error();}
else
{
self.client = client;
self.done = done;
success();
}
});
}
DataObject.prototype.add_data = function(data,success,error) {
self=this;
this.client.query('INSERT INTO sample (data) VALUES ($1,$2)',
[data], function(_error, result) {
self.done();
success();
});
};
To use it I create my data object and then call add_data every time new data comes along. Within add_data I call 'this/self.done()' to release the connection back to the pool. Now when I repeatedly make those requests the client.query never gets back. Under what circumstance could this lead to a blocking/not responding database interface?
The way you are using pool is incorrect.
You are asking for a connection from pool in the function DataObject. This function acts as a constructor and is executed once per data object. Thus only one connection is asked for from the pool.
When we call add_data the first time, the query is executed and the connection is returned to the pool. Thus the consequent calls are not successful since the connection is already returned.
You can verify this by logging _error:
DataObject.prototype.add_data = function(data,success,error) {
self=this;
this.client.query('INSERT INTO sample (data) VALUES ($1,$2)',
[data], function(_error, result) {
if(_error) console.log(_error); //log the error to console
self.done();
success();
});
};
There are couple of ways you can do it differently:
Ask for a connection for every query made. Thus you'll need to move the code which ask for pool to function add_data.
Release client after performing all queries. This is a tricky way since calls are made asynchronously, you need to be careful that client is not shared i.e. no new request be made until client.query callback function is done.

Keeping open a MongoDB database connection

In so many introductory examples of using MongoDB, you see code like this:
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect("mongodb://localhost:port/adatabase", function(err, db)
{
/* Some operation... CRUD, etc. */
db.close();
});
If MongoDB is like any other database system, open and close operations are typically expensive time-wise.
So, my question is this: Is it OK to simply do the MongoClient.connect("... once, assign the returned db value to some module global, have various functions in the module do various database-related work (insert documents into collections, update documents, etc. etc.) when they're called by other parts of the application (and thereby re-use that db value), and then, when the application is done, only then do the close.
In other words, open and close are done once - not every time you need to go and do some database-related operation. And you keep re-using that db object that was returned during the initial open\connect, only to dispose of it at the end, with the close, when you're actually done with all your database-related work.
Obviously, since all the I/O is asynch, before the close you'd make sure that the last database operation completed before issuing the close. Seems like this should be OK, but i wanted to double-check just in case I'm missing something as I'm new to MongoDB. Thanks!
Yes, that is fine and typical behavior. start your app, connect to db, do operations against the db for a long time, maybe re-connect if the connection ever dies unexpectedly, and then just never close the connection (just rely on the automatic close that happens when your process dies).
mongodb version ^3.1.8
Initialize the connection as a promise:
const MongoClient = require('mongodb').MongoClient
const uri = 'mongodb://...'
const client = new MongoClient(uri)
const connection = client.connect() // initialized connection
And then call the connection whenever you wish you perform an action on the database:
// if I want to insert into the database...
const connect = connection
connect.then(() => {
const doc = { id: 3 }
const db = client.db('database_name')
const coll = db.collection('collection_name')
coll.insertOne(doc, (err, result) => {
if(err) throw err
})
})
The current accepted answer is correct in that you may keep the same database connection open to perform operations, however, it is missing details on how you can retry to connect if it closes. Below are two ways to automatically reconnect. It's in TypeScript, but it can easily be translated into normal Node.js if you need to.
Method 1: MongoClient Options
The most simple way to allow MongoDB to reconnect is to define a reconnectTries in an options when passing it into MongoClient. Any time a CRUD operation times out, it will use the parameters passed into MongoClient to decide how to retry (reconnect). Setting the option to Number.MAX_VALUE essentially makes it so that it retries forever until it's able to complete the operation. You can check out the driver source code if you want to see what errors will be retried.
class MongoDB {
private db: Db;
constructor() {
this.connectToMongoDB();
}
async connectToMongoDB() {
const options: MongoClientOptions = {
reconnectInterval: 1000,
reconnectTries: Number.MAX_VALUE
};
try {
const client = new MongoClient('uri-goes-here', options);
await client.connect();
this.db = client.db('dbname');
} catch (err) {
console.error(err, 'MongoDB connection failed.');
}
}
async insert(doc: any) {
if (this.db) {
try {
await this.db.collection('collection').insertOne(doc);
} catch (err) {
console.error(err, 'Something went wrong.');
}
}
}
}
Method 2: Try-catch Retry
If you want more granular support on trying to reconnect, you can use a try-catch with a while loop. For example, you may want to log an error when it has to reconnect or you want to do different things based on the type of error. This will also allow you to retry depending on more conditions than just the standard ones included with the driver. The insert method can be changed to the following:
async insert(doc: any) {
if (this.db) {
let isInserted = false;
while (isInserted === false) {
try {
await this.db.collection('collection').insertOne(doc);
isInserted = true;
} catch (err) {
// Add custom error handling if desired
console.error(err, 'Attempting to retry insert.');
try {
await this.connectToMongoDB();
} catch {
// Do something if this fails as well
}
}
}
}
}

How to handle Node/MongoDB connection management?

I'm using the node-mongodb-native to connect to a local MongoDB instance. I'm having a little trouble wrapping my head around how to handle the connections. I've attempted to abstract the MongoDB stuff into a custom Database module:
Database.js
var mongo = require('mongodb');
var Database = function() { return this; };
Database.prototype.doStuff = function doStuff(callback) {
mongo.connect('mongodb://127.0.0.1:27017/testdb', function(err, conn) {
conn.collection('test', function(err, coll) {
coll.find({}, function(err, cursor) {
cursor.toArray(function(err, items) {
conn.close();
return callback(err, items);
});
});
});
});
};
// Testing
new Database().doStuff(function(err, items) {
console.log(err, items);
});
Is a new connection required for each method? That seems like it would get expensive awfully quick. I imagined that perhaps the connection would be established in the constructor and subsequent calls would leverage the existing connection.
This next question may be more of a design question, but considering how connection setup and tear-down may be expensive operations, I'm considering adding a Database object that is global to my application that can be leveraged to make calls to the database. Does this seem reasonable?
Please note that the code above was roughly taken from here. Thanks for your help.
You don't need a new connection for each method - you can open it once and use it for subsequent calls. The same applies to the individual collection variables - you can cache the result of a single call to collection() and this will let you only need those callbacks once, leaving them out everywhere else.

Resources