Node JS queries on Dynamo DB hangs forever using IAM roles - node.js

I'm trying to connect my Node JS app to Dynamo DB from a EKS instance using IAM roles.
As you probably know, credentials should be passed automatically to the container in this case. As a prove of that, I tried to install the aws-cli within the same container and it's perfectly working.
However, my Node JS script is hanging forever when I try to execute it and no errors are returned.
I checked few things: proxy, permissions (OS user), etc.. Nothing seemed related to this problem.
const { DynamoDBClient, QueryCommand } = require("#aws-sdk/client-dynamodb");
const ddbClient = new DynamoDBClient({
region: "eu-west-1",
});
// Set the parameters
const params = {
KeyConditionExpression: "itemRef = :itemRef",
ExpressionAttributeValues: {
":itemRef": { S: "ABC" },
},
TableName: "my-sandbox",
};
const run = async () => {
try {
const data = await ddbClient.send(new QueryCommand(params));
data.Items.forEach(function (element, index, array) {
console.log(element);
return data;
});
} catch (err) {
console.error(err);
}
};
run();
Any ideas?

Related

Firestore document not found when using Firebase-Admin running on Firestore Emulator

When I use firebase.firestore() on client-side, the following function works well and managed to find the document:
With firebase.firestore()
function getUserProfile(email, returnDoc) {
var db = firebase.firestore();
var docRef = db.collection("Users").doc(email);
docRef.get().then(function (doc) {
if (doc.exists) {
returnDoc(undefined, doc);
} else {
returnDoc("User not found.", doc);
}
}).catch(function (error) {
returnDoc("Error getting user.", doc);
});
};
On index.js
getUserProfile(user.email, function (err, userProfile) {
if (!err) {
$scope.firstName = userProfile.get("FirstName");
$scope.lastName = userProfile.get("LastName");
$scope.email = userProfile.get("Email");
$scope.$apply();
} else {
alert(err);
};
});
But when I tried to create another similar function using firebase-admin, the following function can't find the document with the same email argument:
In db.js With admin.firestore()
const admin = require('firebase-admin');
admin.initializeApp();
let db = admin.firestore();
function getUserData(email, returnDoc) {
console.log(`imtp-db.getUserData: ${email}`); // email data is correct and exist in database.
let docRef = db.collection("Users").doc(email);
return docRef.get().then(function (doc) {
console.log(`doc.exists: ${doc.exists}`); // doc.exists: false here.
if (doc.exists) {
console.log("Document data:", doc.data());
return returnDoc(undefined, doc);
} else {
return returnDoc("User not found.", doc);
};
}).catch(function (error) {
return returnDoc("Error getting user.", doc);
});
};
exports.getUserData = getUserData;
In Cloud Function:
const functions = require('firebase-functions');
const db = require("./middleware/db.js");
exports.getUserProfile = functions.https.onCall((data, context) => {
var userProfile = undefined;
console.log(`data.email = ${data.email}`);
return db.getUserData(data.email, function (err, userDoc) {
console.log(`exports.getUserProfile.err = ${err}`);
if (!err) {
userProfile = userDoc.data();
return {
error: err,
returnData: userProfile
};
} else {
return {
error: err,
returnData: userProfile
};
};
});
});
Everything works well in the above function without errors except that the doc.exists always evaluated to false and always return "User not found.", why? Anything that I've done wrong in the second version of my code? Thanks!
NOTE: I'm running all emulators: firebase emulators:start
In reference to this post, apparently, the answer is simple: The local Firestore Emulator has NO DATA. This can be easily proven with the following approach based on the exact same code in the question:
Start all emulators except Firestore Emulator:
firebase serve --only hosting
firebase emulators:start --only functions
Now the document is accessible and doc.exists == true:
Without starting the Firestore Emulator, the code will be forced to access the production server with data. Of course, this is not recommended, especially when you're on Blaze Plan. Hence, as given in the above link, we should setup the test data locally before using Firestore Emulator.
As for the reason why the client-side firebase.firestore() was able to access the data, this is because the Firestore Emulator runs on port different than the hosting. So the firebase.firestore() that was being executed in index.js at the web client side was using localhost:5000, bypassing the Firestore Emulator and went straight to the production server. To resolve this issue, this Firebase Documentation could be the solution (but not yet tested):
// Initialize your Web app as described in the Get started for Web
// firebaseApp previously initialized using firebase.initializeApp().
var db = firebaseApp.firestore();
if (location.hostname === "localhost") {
db.settings({
host: "localhost:8080",
ssl: false
});
}
When you use an emulator, Firebase would not fetch from the actual Firestore database. It fetches from the Firestore emulator, which you can access with firebase emulator ui, to manage data there. Firestore emulator is different from your actual firestore database.
if you run firebase emulators:start you can view the url to access the emulator ui

Elastic Search update doesn't return any result with node.js

I have an elastic search connection in my code as below.
const config = require('../../config/index');
const logger = require('winston');
var elasticsearch = require('elasticsearch');
var elasticClient;
var state = {
connection: null,
}
exports.connect = function (done) {
try {
logger.info("elasticsearch 000");
if (state.connection) return done()
elasticClient = new elasticsearch.Client({
host: config.elasticSearch.url,
log: 'info'
});
state.connection = elasticClient;
logger.info("elasticsearch connected on url : ", config.elasticSearch.url);
done()
} catch (e) {
logger.info("elasticsearch connect exception ", e)
}
}
exports.get = function () {
return state.connection
}
I'm using this connection in this way...
function Update(_id, data, callback) {
elasticClient.get().update({
index: indexName,
type: tablename,
id: _id,
retry_on_conflict: 5,
body: {
doc: data,
doc_as_upsert: true
}
}, (err, results) => {
if (err) {
console.log("= = = = [elasticClient Update err]= = = = =", err);
}
return callback(err, results)
})
}
Issue: When I call this update function, It's not returning any data... And I got this error...
error : StatusCodeError: Request Timeout after 30000ms
/node_modules/elasticsearch/src/lib/transport.js:397:9
Note: For Elastic Search connection, I'm using Amazon Elastic Search service and I'm passing its VPC endpoint.
Node version: 12.14.1
Elasticsearch version 6.3
Package.json : "elasticsearch": "16.6.0"
When you say "local terminal", do you mean a terminal in your laptop?
If so, notice ElasticSearch should not be open to the public Internet, as exposing your database like that is prone to data leaks (a single misconfiguration and you're done).
Anyways, if from some computer A you expect to be able to connect to ElasticSearch and you are not being able, then the first things I'd check are the security groups and in which VPC/subnet the database is located. If the DB is in a private subnet (as it should be) then it's ok not to be able to access it from outside.
Think of how your security model should work. Usually, your ElasticSearch would be in a private subnet, and you would only connect to it from instances that are connected to that subnet and that are allowed in the rules of your security groups.

How to manage Postgres connection in concurrent AWS lambda function?

Anybody who has experience building concurrent AWS Lambda Function with Postgres?
I have to build a lambda cron that will ingest thousands of invoices into a Postgres database. I have to call the ingestion lambda function concurrently for each invoices. The problem is, because the it is concurrent, each instance of the ingestion function will create a connection to the database. Which means, if I have a 1000 invoice to ingest, each invoice will invoke a lambda function, that will create 1000 database connection. This will exhaust the max connection that Postgres can handle. Some instance of the lambda function invoked will return an error saying that there are no more connection available.
Any tips you can give how to handle this problem?
Here are some snippets of my code:
ingestInvoiceList.js
var AWS = require('aws-sdk');
var sftp = require('ssh2-sftp-client');
var lambda = AWS.Lambda();
exports.handler = async (evenrt) => {
...
let folder_contents;
try {
// fetch list of Zip format invoices
folder_contents = await sftp.list(client_folder);
} catch (err) {
console.log(`[${client}]: ${err.toString()}`);
throw new Error(`[${client}]: ${err.toString()}`);
}
let invoiceCount = 0;
let funcName = 'ingestInvoice';
for (let item of folder_contents) {
if (item.type === '-') {
let payload = JSON.stringify({
invoice: item.name
});
let params = {
FunctionName: funcName,
Payload: payload,
InvocationType: 'Event'
};
//invo9ke ingest invoice concurrently
let result = await new Promise((resolve) => {
lambda.invoke(params, (err, data) => {
if (err) resolve(err);
else resolve(data);
});
});
console.log('result: ', result);
invoiceCount++;
}
}
...
}
ingestInvoice.js
var AWS = require('aws-sdk');
var sftp = require('ssh2-sftp-client');
var DBClient = require('db.js')l
var lambda = AWS.Lambda();
exports.handler = async (evenrt) => {
...
let invoice = event.invoice;
let client = 'client name';
let db = new DBClient();
try {
console.log(`[${client}]: Extracting documents from ${invoice}`);
try {
// get zip file from sftp server
await sftp.fastGet(invoice, '/tmp/tmp.zip', {});
} catch (err) {
throw err;
}
let zip;
try {
// extract the zip file...
zip = await new Promise((resolve, reject) => {
fs.readFile("/tmp/tmp.zip", async function (err, data) {
if (err) return reject(err);
let unzippedData;
try {
unzippedData = await JSZip.loadAsync(data);
} catch (err) {
return reject(err);
}
return resolve(unzippedData);
});
});
} catch (err) {
throw err;
}
let unibillRegEx = /unibill.+\.txt/g;
let files = [];
zip.forEach(async (path, entry) => {
if (unibillRegEx.exec(entry.name)) {
files['unibillObj'] = entry;
} else {
files['pdfObj'] = entry;
}
});
// await db.getClient().connect();
await db.setSchema(client);
console.log('Schema has been set.');
let unibillStr = await files.unibillObj.async('string');
console.log('ingesting ', files.unibillObj.name);
//Do ingestion queries here...
...
await uploadInvoiceDocsToS3(client, files);
} catch (err) {
console.error(err.stack);
throw err;
} finally {
try {
// console.log('Disconnecting from database...');
// await db.endClient();
console.log('Disconnecting from SFTP...');
await sftp.end();
} catch (err) {
console.log('ERROR: ' + err.toString());
throw err;
}
}
...
}
db.js
var { Pool } = require('pg');
module.exports = class DBClient {
constructor() {
this.pool = new Pool();
}
async setSchema(schema) {
await this.execQuery(`SET search_path TO ${schema}`);
}
async execQuery(sql) {
return await this.pool.query(sql);
}
}
Any answer would be appreciated, thank you!
I see two ways to handle this. Ultimately it depends on how fast you want to process this data.
Change the concurrency setting for you Lambda to a "Reserve Concurrency:
.
This will allow you to limit the number of concurrent Lambda's running (see this link for more details).
Change your code to queue the work to be done in an SQS queue. From there you would have to create another Lambda to be triggered by the queue and process it as needed. This Lambda could decide how much to pull off the queue at a time and it too would likely need to be limited on concurrency. But you could tune it to, for example, run for the maximum 15 minutes which may be enough to empty the queue and would not kill the DB. Or if you had, say, a max concurrency of 100 then you would process quickly without killing the DB.
First, you have to initialize your connection outside the handler, so each time your warm lambda will be executed it won't open a new one:
const db = new DBClient();
exports.handler = async (event) => {
...
await db.query(...)
...
}
If is node-pg there is a package that keep tracks of all the idle connections, kill them if necessary and retry in case of error or sorry, too many clients already:
https://github.com/MatteoGioioso/serverless-pg
Any other custom implemented retry mechanism with backoff will work as well.
There is also a one for MySQL as well: https://github.com/jeremydaly/serverless-mysql
These days a good solution to consider for this problem, on AWS, is RDS Proxy, which acts as a transparent proxy between your lambda(s) and database:
Amazon RDS Proxy allows applications to pool and share connections established with the database, improving database efficiency, application scalability, and security.

How to connect to Mongodb reliably in a serverless setup?

8 out of ten times everything connects well. That said, I sometimes get a MongoClient must be connected before calling MongoClient.prototype.db error. How should I change my code so it works reliably (100%)?
I tried a code snippet from one of the creators of the Now Zeit platform.
My handler
const { send } = require('micro');
const { handleErrors } = require('../../../lib/errors');
const cors = require('../../../lib/cors')();
const qs = require('micro-query');
const mongo = require('../../../lib/mongo');
const { ObjectId } = require('mongodb');
const handler = async (req, res) => {
let { limit = 5 } = qs(req);
limit = parseInt(limit);
limit = limit > 10 ? 10 : limit;
const db = await mongo();
const games = await db
.collection('games_v3')
.aggregate([
{
$match: {
removed: { $ne: true }
}
},
{ $sample: { size: limit } }
])
.toArray();
send(res, 200, games);
};
module.exports = handleErrors(cors(handler));
My mongo script that reuses the connection in case the lambda is still warm:
// Based on: https://spectrum.chat/zeit/now/now-2-0-connect-to-database-on-every-function-invocation~e25b9e64-6271-4e15-822a-ddde047fa43d?m=MTU0NDkxODA3NDExMg==
const MongoClient = require('mongodb').MongoClient;
if (!process.env.MONGODB_URI) {
throw new Error('Missing env MONGODB_URI');
}
let client = null;
module.exports = function getDb(fn) {
if (client && !client.isConnected) {
client = null;
console.log('[mongo] client discard');
}
if (client === null) {
client = new MongoClient(process.env.MONGODB_URI, {
useNewUrlParser: true
});
console.log('[mongo] client init');
} else if (client.isConnected) {
console.log('[mongo] client connected, quick return');
return client.db(process.env.MONGO_DB_NAME);
}
return new Promise((resolve, reject) => {
client.connect(err => {
if (err) {
client = null;
console.error('[mongo] client err', err);
return reject(err);
}
console.log('[mongo] connected');
resolve(client.db(process.env.MONGO_DB_NAME));
});
});
};
I need my handler to be 100% reliable.
if (client && !client.isConnected) {
client = null;
console.log('[mongo] client discard');
}
This code can cause problems! Even though you're setting client to null, that client still exists, will continue connecting to mongo, will not be garbage collected, and its callback connection code will still run, but in its callback client will refer to the next client that's created that is not necessarily connected.
A common pattern for this kind of code is to only ever return a single promise from the getDB call:
let clientP = null;
function getDb(fn) {
if (clientP) return clientP;
clientP = new Promise((resolve, reject) => {
client = new MongoClient(process.env.MONGODB_URI, {
useNewUrlParser: true
});
client.connect(err => {
if (err) {
console.error('[mongo] client err', err);
return reject(err);
}
console.log('[mongo] connected');
resolve(client.db(process.env.MONGO_DB_NAME));
});
});
return clientP;
};
I had the same issue. In my case it was caused by calling getDb() before a previous getDb() call had returned. In this case, I believe that 'client.isConnected' returns true, even though it is still connecting.
This was caused by forgetting to put an 'await' before the getDb() call in one location. I tracked down which by outputting a callstack from getDb using:
console.log(new Error().stack);
I don't see the same issue in the sample code in the question, though it could be triggered by another bit of code that isn't shown.
I have written this article talking about serverless, lambda e db connections. There are some good concepts which could help you to find the root cause of your problem. There are also example and use cases of how to mitigate connection pool issues.
Just by looking your code I can tell it is missing this:
context.callbackWaitsForEmptyEventLoop = false;
Serverless: Dynamodb x Mongodb x Aurora serverless

Openwhisk failing to call compose postgre sql

I have tried for more hours than I care to admit to get an openWhisk function to call a postgre sql datbase on Compose.io. Here is my code:
My latest incarnation is this:
function myAction(params) {
return new Promise(function(resolve, reject) {
console.log('Connecting to Compose database');
// console.log('Params ---> ', params);
var mysql = require('promise-mysql');
var fs = require('fs');
var pg = require('pg');
var request = require('request')
var Promise = require('promise/lib/es6-extensions');
var connString = "postgres:xxxx";
pg.connect(connString, function(err, client, done) {
console.log("connectiong..", err, client, done);
if (err) {
console.log('[connectToCompose] failed to fetch client from pool', err);
reject(err);
} else {
params.client = client; params.done = done;
console.log('[connectToCompose] obtained a Compose client');
return(params);
}
})
// params.client.done();
// console.log("closing connectiong");
})
}
exports.main = myAction;
I have a similar example where I connect to a different SQL database (not Compose) and use sql promise not postgre and it works. What am I doing wrong?
To work with OpenWhisk and any database offering, you need to use promisified JavaScript code; the event loop as it's used in ordinary nodejs isn't available. I have an example that uses pg-promise (taken more or less exactly from the project docs) and it works fine for me. Try something like this:
const promise = require('bluebird');
const initOptions = {
promiseLib: promise // overriding the default (ES6 Promise);
};
const pgp = require('pg-promise')(options);
const conn_info = {...connection info...};
const db = pgp(conn_info);
module.exports.main = function main(args) {
db.any('SELECT * FROM items')
.then(data => {
console.log('DATA:', data);
// return whatever data you wanted
resolve({message: 'success'});
})
.catch(error => {
console.log('ERROR:', error);
});
}
Not all of the dependencies here are available on OpenWhisk by default, so when you deploy the action, include both your *.js file and the whole of node_modules/ in a zip file, and upload that. It's definitely possible to use the Compose Postgres with OpenWhisk, if that helps to encourage you :)

Resources