Deleting a list of azure blobs blocks express/nodejs - node.js

Given a list of blobs I want to delete them one by one (since there's no batch/bulk delete of blobs in azure storage sdk) without blocking node's event loop or the express server. The deletion is triggered by (and runs on) a request. But although the entire flow is made as async as possible, the express server becomes unresponsive once the deletion starts, sometimes some endpoints returning 500.
const async = require('async');
const azure = require('azure-storage');
const Promise = require('bluebird');
const blobService = azure.createBlobService(CONFIG.CONNECTION_STRING);
// Endpoint
app.delete(`${BASE_URL}/files/:date`, deleteFiles);
const deleteFiles = async (req, res) => {
const date = req.params.date;
if (!date) {
return res.status(400).send();
}
await deleteBlobs(date).catch((err) => {
return res.status(500).send(err);
});
res.status(200).send();
};
const deleteBlobs = (date) => {
return new Promise(async (resolve, reject) => {
// findFiles calls blobService.listBlobsSegmentedWithPrefix and performs some async.map on the returned entrieds
const blobNames = await findFiles(date)
.catch(err => log.error('findFiles failed in deleteBlobs.', err));
async.each(
blobNames,
(blobName, callback) => {
blobService.deleteBlob(CONTAINER, blobName, (deleteError) => {
if (deleteError) {
return callback(deleteError);
}
callback();
});
},
err => {
if (err) {
return reject(err);
}
resolve();
});
});
};
So what am I missing? Why the server becomes unresponsive till the deletion finishes?
Mentions (maybe it helps): The express server/app is hosted in Azure on a Basic (B1) service plan.

I think there are 1 possible reason which could lead your express unresponsive.
Too many delete async operations, which creates lots of callback I/O events at a short time. Node.js may serve these event at first, and stuck the Node.js to deal with other incoming HTTP requests.
Can you use async.eachLimit instead of async.each to set a lower concurrency limitation?
http://caolan.github.io/async/docs.html#eachLimit

Related

Koa API server - Wait until previous request is processed before processing a new request

I'm building an API in Node with Koa which uses another API to process some information. A request comes in to my API from the client and my API does some different requests to another API. Problem is, the other API is fragile and slow so to guarantee data integrity, I have to check if there is no previous incoming request being processed, before starting a new process. My first idea was to use promises and a global boolean to check if theres an ongoing processing and await until the process has finished. Somehow this prevents concurrent requests but even if 3-4 requests come in during the process, only the first one is done and that is it. Why are the rest of the incoming requests forgotten ?
Edit: As a side note, I do not need to respond to the incoming request with processed information. I could send response right after the request is recieved. I need to do operations with the 3rd party API.
My solution so far:
The entry point:
router.get('/update', (ctx, next) => {
ctx.body = 'Updating...';
update();
next();
});
And the update function:
let updateInProgress = false;
const update = async () => {
const updateProcess = () => {
return new Promise((resolve, reject) => {
if (!updateInProgress) {
return resolve();
} else {
setTimeout(updateProcess, 5000);
}
});
};
await updateProcess();
updateInProgress = true;
// Process the request
updateInProgress = false
}
Ok, I found a working solution, not sure how elegant it is tough...
I'm guessing the problem was, that new Promise was created with the Timeout function, and another one, and another one until one of them was resolved. That did not resolve the first Promise tough and the code got stuck. The solution was to create an interval which checked if the condition is met and then resolve the Promise. If someone smarter could comment, I'd appreciate it.
let updateInProgress = false;
const update = async () => {
const updateProcess = () => {
return new Promise((resolve, reject) => {
if (!updateInProgress) {
return resolve();
} else {
const processCheck = setInterval(() => {
if (!updateInProgress) {
clearInterval(processCheck);
return resolve();
}
}, 5000);
}
});
};
await updateProcess();
updateInProgress = true;
// Process the request
updateInProgress = false
}

Why does Async firebase fetching is not working? (NODE JS)

Building a NodeJS REST API.
Trying to send load data from FireBase collection, then sending it to the user (as API response).
Looks like the problem is that it's not waits for the firebase fetch to resolve, but send back a response without the collection data. (tried to use ASYNC-AWAIT but its not working)
exports.getChatMessages = async (req, res, next) => {
const chatId = req.params.chatId
const getChatData = () => {
db
.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc')
.onSnapshot((snapshot) => {
snapshot.docs.forEach(msg => {
console.log(msg.data().messageContent)
return {
authorID: msg.data().authorID,
messageContent: msg.data().messageContent,
timeStamp: msg.data().timeStamp,
}
})
})
}
try {
const chatData = await getChatData()
console.log(chatData)
res.status(200).json({
message: 'Chat Has Found',
chatData: chatData
})
} catch (err) {
if (!err.statusCode) {
err.statusCode(500)
}
next(err)
}
}
As you can see, I've used 2 console.logs to realize what the problem, Terminal logs looks like:
[] (from console.logs(chatData))
All messages (from console.log(msg.data().messageContent))
Is there any way to block the code unti the firebase data realy fetched?
If I correctly understand, you want to send back an array of all the documents present in the messages subcollection. The following should do the trick.
exports.getChatMessages = async (req, res, next) => {
const chatId = req.params.chatId;
const collectionRef = db
.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc');
try {
const chatsQuerySnapshot = await collectionRef.get();
const chatData = [];
chatsQuerySnapshot.forEach((msg) => {
console.log(msg.data().messageContent);
chatData.push({
authorID: msg.data().authorID,
messageContent: msg.data().messageContent,
timeStamp: msg.data().timeStamp,
});
});
console.log(chatData);
res.status(200).json({
message: 'Chat Has Found',
chatData: chatData,
});
} catch (err) {
if (!err.statusCode) {
err.statusCode(500);
}
next(err);
}
};
The asynchronous get() method returns a QuerySnapshot on which you can call forEach() for enumerating all of the documents in the QuerySnapshot.
You can only await a Promise. Currently, getChatData() does not return a Promise, so awaiting it is pointless. You are trying to await a fixed value, so it resolves immediately and jumps to the next line. console.log(chatData) happens. Then, later, your (snapshot) => callback happens, but too late.
const getChatData = () => new Promise(resolve => { // Return a Promise, so it can be awaited
db.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc')
.onSnapshot(resolve) // Equivalent to .onSnapshot((snapshot) => resolve(snapshot))
})
const snapshot = await getChatData();
console.log(snapshot)
// Put your transform logic out of the function that calls the DB. A function should only do one thing if possible : call or transform, not both.
const chatData = snapshot.map(msg => ({
authorID: msg.data().authorID,
messageContent: msg.data().messageContent,
timeStamp: msg.data().timeStamp,
}));
res.status(200).json({
message: 'Chat Has Found',
chatData
})
Right now, getChatData is this (short version):
const getChatData = () => {
db
.collection('chats')
.doc(chatId)
.collection('messages')
.orderBy('timeStamp', 'asc')
.onSnapshot((snapshot) => {}) // some things inside
}
What that means is that the getChatData function calls some db query, and then returns void (nothing). I bet you'd want to return the db call (hopefully it's a Promise), so that your await does some work for you. Something along the lines of:
const getChatData = async () =>
db
.collection('chats')
// ...
Which is the same as const getChatData = async() => { return db... }
Update: Now that I've reviewed the docs once again, I see that you use onSnapshot, which is meant for updates and can fire multiple times. The first call actually makes a request, but then continues to listen on those updates. Since that seems like a regular request-response, and you want it to happen only once - use .get() docs instead of .onSnapshot(). Otherwise those listeners would stay there and cause troubles. .get() returns a Promise, so the sample fix that I've mentioned above would work perfectly and you don't need to change other pieces of the code.

limiting number of parallel request to cassandra db in nodejs

I currently parsing a file and getting its data in order tu push them in my db. To do that I made an array of query and I execute them through a loop.
The problem is that I'm limited to 2048 parallel requests.
This is the code I made:
index.js=>
const ImportClient = require("./scripts/import_client_leasing")
const InsertDb = require("./scripts/insertDb")
const cassandra = require('cassandra-driver');
const databaseConfig = require('./config/database.json');
const authProvider = new cassandra.auth.PlainTextAuthProvider(databaseConfig.cassandra.username, databaseConfig.cassandra.password);
const db = new cassandra.Client({
contactPoints: databaseConfig.cassandra.contactPoints,
authProvider: authProvider
});
ImportClient.clientLeasingImport().then(queries => { // this function parse the data and return an array of query
return InsertDb.Clients(db, queries); //inserting in the database returns something when all the promises are done
}).then(result => {
return db.shutdown(function (err, result) {});
}).then(result => {
console.log(result);
}).catch(error => {
console.log(error)
});
insertDb.js =>
module.exports = {
Clients: function (db, queries) {
DB = db;
return insertClients(queries);
}
}
function insertClients(queries) {
return new Promise((resolve, reject) => {
let promisesArray = [];
for (let i = 0; i < queries.length; i++) {
promisesArray.push(new Promise(function (resolve, reject) {
DB.execute(queries[i], function (err, result) {
if (err) {
reject(err)
} else {
resolve("success");
}
});
}));
}
Promise.all(promisesArray).then((result) => {
resolve("success");
}).catch((error) => {
resolve("error");
});
});
}
I tried multiple things, like adding an await function thats set a timout in my for loop every x seconds (but it doesn't work because i'm already in a promise), i also tried with p-queue and p-limit but it doesn't seems to work either.
I'm kinda stuck here, I'm think I'm missing something trivial but I don't really get what.
Thanks
When submitting several requests in parallel (execute() function uses asynchronous execution), you end up queueing at one of the different levels: on the driver side, on the network stack or on the server side. Excessive queueing affects the total time it takes each operation to complete. You should limit the amount of simultaneous requests at any time, also known as concurrency level, to get high throughput and low latency.
When thinking about implementing it in your code, you should consider launching a fixed amount of asynchronous executions, using your concurrency level as a cap and only adding new operations once executions within that cap completed.
Here is an example on how to limit the amount of concurrent executions when processing items in a loop: https://github.com/datastax/nodejs-driver/blob/master/examples/concurrent-executions/execute-in-loop.js
In a nutshell:
// Launch in parallel n async operations (n being the concurrency level)
for (let i = 0; i < concurrencyLevel; i++) {
promises[i] = executeOneAtATime();
}
// ...
async function executeOneAtATime() {
// ...
// Execute queries asynchronously in sequence
while (counter++ < totalLength) {;
await client.execute(query, params, options);
}
}
Ok, so I found a workaround to reach my goal.
I wrote in a file all my queries
const fs = require('fs')
fs.appendFileSync('my_file.cql', queries[i] + "\n");
and i then used
child_process.exec("cqls --file my_file", function(err, stdout, stderr){})"
to insert in cassandra all my queries

NodeJs delay each promise within Promise.all()

I'm trying to update a tool that was created a while ago which uses nodejs (I am not a JS developer, so I'm trying to piece the code together) and am getting stuck at the last hurdle.
The new functionality will take in a swagger .json definition, compare the endpoints against the matching API Gateway on the AWS Service, using the 'aws-sdk' SDK for JS and then updates the Gateway accordingly.
The code runs fine on a small definition file (about 15 endpoints) but as soon as I give it a bigger one, I start getting tons of TooManyRequestsException errors.
I understand that this is due to my calls to the API Gateway service being too quick and a delay / pause is needed. This is where I am stuck
I have tried adding;
a delay() to each promise being returned
running a setTimeout() in each promise
adding a delay to the Promise.all and Promise.mapSeries
Currently my code loops through each endpoint within the definition and then adds the response of each promise to a promise array:
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath));
Once the loop is finished I run this:
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
I have tried the same with a mapSeries (no luck).
It looks like the functions within the (getMethodResponse promise) are run immediately and hence, no matter what type of delay I add they all still just execute. My suspicious is that the I need to make (getMethodResponse) return a function and then use mapSeries but I cant get this to work either.
Code I tried:
Wrapped the getMethodResponse in this:
return function(value){}
Then added this after the loop (and within the loop - no difference):
Promise.mapSeries(function (promises) {
return 'a'();
}).then(function (results) {
console.log('result', results);
});
Also tried many other suggestions:
Here
Here
Any suggestions please?
EDIT
As request, some additional code to try pin-point the issue.
The code currently working with a small set of endpoints (within the Swagger file):
module.exports = (apiName, externalUrl) => {
return getSwaggerFromHttp(externalUrl)
.then((swagger) => {
let paths = swagger.paths;
let resourcePath = '';
let resourceMethod = '';
let promises = [];
_.each(paths, function (value, key) {
resourcePath = key;
_.each(value, function (value, key) {
resourceMethod = key;
let statusList = [];
_.each(value.responses, function (value, key) {
if (key >= 200 && key <= 204) {
statusList.push(key)
}
});
_.each(statusList, function (value, key) { //Only for 200-201 range
//Working with small set
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath))
});
});
});
//Working with small set
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
})
.catch((err) => {
winston.error(err);
});
};
I have since tried adding this in place of the return Promise.all():
Promise.map(promises, function() {
// Promise.map awaits for returned promises as well.
console.log('X');
},{concurrency: 5})
.then(function() {
return console.log("y");
});
Results of this spits out something like this (it's the same for each endpoint, there are many):
Error: TooManyRequestsException: Too Many Requests
X
Error: TooManyRequestsException: Too Many Requests
X
Error: TooManyRequestsException: Too Many Requests
The AWS SDK is being called 3 times within each promise, the functions of which are (get initiated from the getMethodResponse() function):
apigateway.getRestApisAsync()
return apigateway.getResourcesAsync(resourceParams)
apigateway.getMethodAsync(params, function (err, data) {}
The typical AWS SDK documentation state that this is typical behaviour for when too many consecutive calls are made (too fast). I've had a similar issue in the past which was resolved by simply adding a .delay(500) into the code being called;
Something like:
return apigateway.updateModelAsync(updateModelParams)
.tap(() => logger.verbose(`Updated model ${updatedModel.name}`))
.tap(() => bar.tick())
.delay(500)
EDIT #2
I thought in the name of thorough-ness, to include my entire .js file.
'use strict';
const AWS = require('aws-sdk');
let apigateway, lambda;
const Promise = require('bluebird');
const R = require('ramda');
const logger = require('../logger');
const config = require('../config/default');
const helpers = require('../library/helpers');
const winston = require('winston');
const request = require('request');
const _ = require('lodash');
const region = 'ap-southeast-2';
const methodLib = require('../aws/methods');
const emitter = require('../library/emitter');
emitter.on('updateRegion', (region) => {
region = region;
AWS.config.update({ region: region });
apigateway = new AWS.APIGateway({ apiVersion: '2015-07-09' });
Promise.promisifyAll(apigateway);
});
function getSwaggerFromHttp(externalUrl) {
return new Promise((resolve, reject) => {
request.get({
url: externalUrl,
header: {
"content-type": "application/json"
}
}, (err, res, body) => {
if (err) {
winston.error(err);
reject(err);
}
let result = JSON.parse(body);
resolve(result);
})
});
}
/*
Deletes a method response
*/
function deleteMethodResponse(httpMethod, resourceId, restApiId, statusCode, resourcePath) {
let methodResponseParams = {
httpMethod: httpMethod,
resourceId: resourceId,
restApiId: restApiId,
statusCode: statusCode
};
return apigateway.deleteMethodResponseAsync(methodResponseParams)
.delay(1200)
.tap(() => logger.verbose(`Method response ${statusCode} deleted for path: ${resourcePath}`))
.error((e) => {
return console.log(`Error deleting Method Response ${httpMethod} not found on resource path: ${resourcePath} (resourceId: ${resourceId})`); // an error occurred
logger.error('Error: ' + e.stack)
});
}
/*
Deletes an integration response
*/
function deleteIntegrationResponse(httpMethod, resourceId, restApiId, statusCode, resourcePath) {
let methodResponseParams = {
httpMethod: httpMethod,
resourceId: resourceId,
restApiId: restApiId,
statusCode: statusCode
};
return apigateway.deleteIntegrationResponseAsync(methodResponseParams)
.delay(1200)
.tap(() => logger.verbose(`Integration response ${statusCode} deleted for path ${resourcePath}`))
.error((e) => {
return console.log(`Error deleting Integration Response ${httpMethod} not found on resource path: ${resourcePath} (resourceId: ${resourceId})`); // an error occurred
logger.error('Error: ' + e.stack)
});
}
/*
Get Resource
*/
function getMethodResponse(httpMethod, statusCode, apiName, resourcePath) {
let params = {
httpMethod: httpMethod.toUpperCase(),
resourceId: '',
restApiId: ''
}
return getResourceDetails(apiName, resourcePath)
.error((e) => {
logger.unimportant('Error: ' + e.stack)
})
.then((result) => {
//Only run the comparrison of models if the resourceId (from the url passed in) is found within the AWS Gateway
if (result) {
params.resourceId = result.resourceId
params.restApiId = result.apiId
var awsMethodResponses = [];
try {
apigateway.getMethodAsync(params, function (err, data) {
if (err) {
if (err.statusCode == 404) {
return console.log(`Method ${params.httpMethod} not found on resource path: ${resourcePath} (resourceId: ${params.resourceId})`); // an error occurred
}
console.log(err, err.stack); // an error occurred
}
else {
if (data) {
_.each(data.methodResponses, function (value, key) {
if (key >= 200 && key <= 204) {
awsMethodResponses.push(key)
}
});
awsMethodResponses = _.pull(awsMethodResponses, statusCode); //List of items not found within the Gateway - to be removed.
_.each(awsMethodResponses, function (value, key) {
if (data.methodResponses[value].responseModels) {
var existingModel = data.methodResponses[value].responseModels['application/json']; //Check if there is currently a model attached to the resource / method about to be deleted
methodLib.updateResponseAssociation(params.httpMethod, params.resourceId, params.restApiId, statusCode, existingModel); //Associate this model to the same resource / method, under the new response status
}
deleteMethodResponse(params.httpMethod, params.resourceId, params.restApiId, value, resourcePath)
.delay(1200)
.done();
deleteIntegrationResponse(params.httpMethod, params.resourceId, params.restApiId, value, resourcePath)
.delay(1200)
.done();
})
}
}
})
.catch(err => {
console.log(`Error: ${err}`);
});
}
catch (e) {
console.log(`getMethodAsync failed, Error: ${e}`);
}
}
})
};
function getResourceDetails(apiName, resourcePath) {
let resourceExpr = new RegExp(resourcePath + '$', 'i');
let result = {
apiId: '',
resourceId: '',
path: ''
}
return helpers.apiByName(apiName, AWS.config.region)
.delay(1200)
.then(apiId => {
result.apiId = apiId;
let resourceParams = {
restApiId: apiId,
limit: config.awsGetResourceLimit,
};
return apigateway.getResourcesAsync(resourceParams)
})
.then(R.prop('items'))
.filter(R.pipe(R.prop('path'), R.test(resourceExpr)))
.tap(helpers.handleNotFound('resource'))
.then(R.head)
.then([R.prop('path'), R.prop('id')])
.then(returnedObj => {
if (returnedObj.id) {
result.path = returnedObj.path;
result.resourceId = returnedObj.id;
logger.unimportant(`ApiId: ${result.apiId} | ResourceId: ${result.resourceId} | Path: ${result.path}`);
return result;
}
})
.catch(err => {
console.log(`Error: ${err} on API: ${apiName} Resource: ${resourcePath}`);
});
};
function delay(t) {
return new Promise(function(resolve) {
setTimeout(resolve, t)
});
}
module.exports = (apiName, externalUrl) => {
return getSwaggerFromHttp(externalUrl)
.then((swagger) => {
let paths = swagger.paths;
let resourcePath = '';
let resourceMethod = '';
let promises = [];
_.each(paths, function (value, key) {
resourcePath = key;
_.each(value, function (value, key) {
resourceMethod = key;
let statusList = [];
_.each(value.responses, function (value, key) {
if (key >= 200 && key <= 204) {
statusList.push(key)
}
});
_.each(statusList, function (value, key) { //Only for 200-201 range
promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath))
});
});
});
//Working with small set
return Promise.all(promises)
.catch((err) => {
winston.error(err);
})
})
.catch((err) => {
winston.error(err);
});
};
You apparently have a misunderstanding about what Promise.all() and Promise.map() do.
All Promise.all() does is keep track of a whole array of promises to tell you when the async operations they represent are all done (or one returns an error). When you pass it an array of promises (as you are doing), ALL those async operations have already been started in parallel. So, if you're trying to limit how many async operations are in flight at the same time, it's already too late at that point. So, Promise.all() by itself won't help you control how many are running at once in any way.
I've also noticed since, that it seems this line promises.push(getMethodResponse(resourceMethod, value, apiName, resourcePath)) is actually executing promises and not simply adding them to the array. Seems like the last Promise.all() doesn't actually do much.
Yep, when you execute promises.push(getMethodResponse()), you are calling getMethodResponse() immediately right then. That starts the async operation immediately. That function then returns a promise and Promise.all() will monitor that promise (along with all the other ones you put in the array) to tell you when they are all done. That's all Promise.all() does. It monitors operations you've already started. To keep the max number of requests in flight at the same time below some threshold, you have to NOT START the async operations all at once like you are doing. Promise.all() does not do that for you.
For Bluebird's Promise.map() to help you at all, you have to pass it an array of DATA, not promises. When you pass it an array of promises that represent async operations that you've already started, it can do no more than Promise.all() can do. But, if you pass it an array of data and a callback function that can then initiate an async operation for each element of data in the array, THEN it can help you when you use the concurrency option.
Your code is pretty complex so I will illustrate with a simple web scraper that wants to read a large list of URLs, but for memory considerations, only process 20 at a time.
const rp = require('request-promise');
let urls = [...]; // large array of URLs to process
Promise.map(urls, function(url) {
return rp(url).then(function(data) {
// process scraped data here
return someValue;
});
}, {concurrency: 20}).then(function(results) {
// process array of results here
}).catch(function(err) {
// error here
});
In this example, hopefully you can see that an array of data items are being passed into Promise.map() (not an array of promises). This, then allows Promise.map() to manage how/when the array is processed and, in this case, it will use the concurrency: 20 setting to make sure that no more than 20 requests are in flight at the same time.
Your effort to use Promise.map() was passing an array of promises, which does not help you since the promises represent async operations that have already been started:
Promise.map(promises, function() {
...
});
Then, in addition, you really need to figure out what exactly causes the TooManyRequestsException error by either reading documentation on the target API that exhibits this or by doing a whole bunch of testing because there can be a variety of things that might cause this and without knowing exactly what you need to control, it just takes a lot of wild guesses to try to figure out what might work. The most common things that an API might detect are:
Simultaneous requests from the same account or source.
Requests per unit of time from the same account or source (such as request per second).
The concurrency operation in Promise.map() will easily help you with the first option, but will not necessarily help you with the second option as you can limit to a low number of simultaneous requests and still exceed a requests per second limit. The second needs some actual time control. Inserting delay() statements will sometimes work, but even that is not a very direct method of managing it and will either lead to inconsistent control (something that works sometimes, but not other times) or sub-optimal control (limiting yourself to something far below what you can actually use).
To manage to a request per second limit, you need some actual time control with a rate limiting library or actual rate limiting logic in your own code.
Here's an example of a scheme for limiting the number of requests per second you are making: How to Manage Requests to Stay Below Rate Limiting.

How should I connect to a Redis instance from an AWS Lambda function?

I'm trying to build an API for a single-page web app using AWS Lambda and the Serverless Framework. I want to use Redis Cloud for storage, mostly for its combination of speed and data persistence. I may use more Redis Cloud features in the future, so I'd prefer to avoid using ElastiCache for this. My Redis Cloud instance is running in the same AWS region as my function.
I have a function called related that takes a hashtag from a GET request to an API endpoint, and checks to see if there's an entry for it in the database. If it's there, it should return the results immediately. If not, it should query RiteTag, write the results to Redis, and then return the results to the user.
I'm pretty new to this, so I'm probably doing something adorably naive. Here's the event handler:
'use strict'
const lib = require('../lib/related')
module.exports.handler = function (event, context) {
lib.respond(event, (err, res) => {
if (err) {
return context.fail(err)
} else {
return context.succeed(res)
}
})
}
Here's the ../lib/related.js file:
var redis = require('redis')
var jsonify = require('redis-jsonify')
var rt = require('./ritetag')
var redisOptions = {
host: process.env.REDIS_URL,
port: process.env.REDIS_PORT,
password: process.env.REDIS_PASS
}
var client = jsonify(redis.createClient(redisOptions))
module.exports.respond = function (event, callback) {
var tag = event.hashtag.replace(/^#/, '')
var key = 'related:' + tag
client.on('connect', () => {
console.log('Connected:', client.connected)
})
client.on('end', () => {
console.log('Connection closed.')
})
client.on('ready', function () {
client.get(key, (err, res) => {
if (err) {
client.quit()
callback(err)
} else {
if (res) {
// Tag is found in Redis, so send results directly.
client.quit()
callback(null, res)
} else {
// Tag is not yet in Redis, so query Ritetag.
rt.hashtagDirectory(tag, (err, res) => {
if (err) {
client.quit()
callback(err)
} else {
client.set(key, res, (err) => {
if (err) {
callback(err)
} else {
client.quit()
callback(null, res)
}
})
}
})
}
}
})
})
}
All of this works as expected, to a point. If I run the function locally (using sls function run related), I have no problems whatsoever—tags are read from and written to the Redis database as they should be. However, when I deploy it (using sls dash deploy), it works the first time it's run after deployment, and then stops working. All subsequent attempts to run it simply return null to the browser (or Postman, or curl, or the web app). This is true regardless of whether the tag I use for testing is already in the database or not. If I then re-deploy, making no changes to the function itself, it works again—once.
On my local machine, the function first logs Connected: true to the console, then the results of the query, then Connection closed. On AWS, it logs Connected: true, then the results of the query, and that's it. On the second run, it logs Connection closed. and nothing else. On the third and all subsequent runs, it logs nothing at all. Neither environment ever reports any errors.
It seems pretty clear that the problem is with the connection to Redis. If I don't close it in the callbacks, then subsequent attempts to call the function just time out. I've also tried using redis.unref instead of redis.quit, but that didn't seem to make any difference.
Any help would be greatly appreciated.
I've now solved my own problem, and I hope I can be of help to someone experiencing this problem in the future.
There are two major considerations when connecting to a database like I did in the code above from a Lambda function:
Once context.succeed(), context.fail(), or context.done() is called, AWS may freeze any processes that haven't finished yet. This is what was causing AWS to log Connection closed on the second call to my API endpoint—the process was frozen just before Redis finished closing, then thawed on the next call, at which point it continued right where it left off, reporting that the connection was closed. Takeaway: if you want to close your database connection, make sure it's fully closed before you call one of those methods. You can do this by putting a callback in an event handler that's triggered by a connection close (.on('end'), in my case).
If you split your code into separate files and require them at the top of each file, like I did, Amazon will cache as many of those modules as possible in memory. If that's causing problems, try moving the require() calls inside a function instead of at the top of the file, then exporting that function. Those modules will then be re-imported whenever the function is run.
Here's my updated code. Note that I've also put my Redis configuration into a separate file, so I can import it into other Lambda functions without duplicating code.
The Event Handler
'use strict'
const lib = require('../lib/related')
module.exports.handler = function (event, context) {
lib.respond(event, (err, res) => {
if (err) {
return context.fail(err)
} else {
return context.succeed(res)
}
})
}
Redis Configuration
module.exports = () => {
const redis = require('redis')
const jsonify = require('redis-jsonify')
const redisOptions = {
host: process.env.REDIS_URL,
port: process.env.REDIS_PORT,
password: process.env.REDIS_PASS
}
return jsonify(redis.createClient(redisOptions))
}
The Function
'use strict'
const rt = require('./ritetag')
module.exports.respond = function (event, callback) {
const redis = require('./redis')()
const tag = event.hashtag.replace(/^#/, '')
const key = 'related:' + tag
let error, response
redis.on('end', () => {
callback(error, response)
})
redis.on('ready', function () {
redis.get(key, (err, res) => {
if (err) {
redis.quit(() => {
error = err
})
} else {
if (res) {
// Tag is found in Redis, so send results directly.
redis.quit(() => {
response = res
})
} else {
// Tag is not yet in Redis, so query Ritetag.
rt.hashtagDirectory(tag, (err, res) => {
if (err) {
redis.quit(() => {
error = err
})
} else {
redis.set(key, res, (err) => {
if (err) {
redis.quit(() => {
error = err
})
} else {
redis.quit(() => {
response = res
})
}
})
}
})
}
}
})
})
}
This works exactly as it should—and it's blazing fast, too.

Resources