I am working on a node app that essentially is a simple AWS SQS poller that should sit and listen to new items in different queues.
Here is my module.export:
module.exports = {
readMessage: function(qParams, qType, tableName) {
logger.debug(qType);
SQS.receiveMessage(qParams, handleSqsResponse);
function handleSqsResponse (err, data) {
if(err) logger.error("handleSqsResponse error:" + err);
if (data && data.Messages) {
data.Messages.forEach(processMessage)
readMessage(); // continue reading until draining the queue (or UPTIME reached)
}
else{
logger.debug("no data in sqs.");
// process.exit();
}
}
// 'processing' is mainly writing to logs using winston. Could add here any transformations and transmission to remote systems
function processMessage(sqsMessage){
// Parse sqs messag
var msgObj = JSON.parse(sqsMessage.Body);
// Process
logger.info(msgObj.Message);
_.extend(qParams, { "ReceiptHandle": sqsMessage.ReceiptHandle });
dbMap[qType](msgObj, qParams, tableName);
}
}
}
The issue I am running into is when I attempt to call readMessage(); again. I get the error of ReferenceError: readMessage is not defined
module.exports is a plain object that is exposed to outer modules that has a method readMessage. readMessage() should be module.exports.readMessage().
Also i would suggest creating a variable and then exporting that:
var obj = {
readMessage: function(qParams, qType, tableName) {
logger.debug(qType);
SQS.receiveMessage(qParams, handleSqsResponse);
function handleSqsResponse (err, data) {
if(err) logger.error("handleSqsResponse error:" + err);
if (data && data.Messages) {
data.Messages.forEach(processMessage)
obj.readMessage(); // continue reading until draining the queue (or UPTIME reached)
}
else{
logger.debug("no data in sqs.");
// process.exit();
}
}
// 'processing' is mainly writing to logs using winston. Could add here any transformations and transmission to remote systems
function processMessage(sqsMessage){
// Parse sqs messag
var msgObj = JSON.parse(sqsMessage.Body);
// Process
logger.info(msgObj.Message);
_.extend(qParams, { "ReceiptHandle": sqsMessage.ReceiptHandle });
dbMap[qType](msgObj, qParams, tableName);
}
}
}
module.exports = obj;
Please note that I only responded to the question you specifically asked. I didn't take into account any architectural issue associate with the code.
function functionName(has = false){
var total = 0;
if(has){
functionName(true)
} else {
// Todo
}
}
module.exports.functionName = functionName;
Related
Anybody who has experience building concurrent AWS Lambda Function with Postgres?
I have to build a lambda cron that will ingest thousands of invoices into a Postgres database. I have to call the ingestion lambda function concurrently for each invoices. The problem is, because the it is concurrent, each instance of the ingestion function will create a connection to the database. Which means, if I have a 1000 invoice to ingest, each invoice will invoke a lambda function, that will create 1000 database connection. This will exhaust the max connection that Postgres can handle. Some instance of the lambda function invoked will return an error saying that there are no more connection available.
Any tips you can give how to handle this problem?
Here are some snippets of my code:
ingestInvoiceList.js
var AWS = require('aws-sdk');
var sftp = require('ssh2-sftp-client');
var lambda = AWS.Lambda();
exports.handler = async (evenrt) => {
...
let folder_contents;
try {
// fetch list of Zip format invoices
folder_contents = await sftp.list(client_folder);
} catch (err) {
console.log(`[${client}]: ${err.toString()}`);
throw new Error(`[${client}]: ${err.toString()}`);
}
let invoiceCount = 0;
let funcName = 'ingestInvoice';
for (let item of folder_contents) {
if (item.type === '-') {
let payload = JSON.stringify({
invoice: item.name
});
let params = {
FunctionName: funcName,
Payload: payload,
InvocationType: 'Event'
};
//invo9ke ingest invoice concurrently
let result = await new Promise((resolve) => {
lambda.invoke(params, (err, data) => {
if (err) resolve(err);
else resolve(data);
});
});
console.log('result: ', result);
invoiceCount++;
}
}
...
}
ingestInvoice.js
var AWS = require('aws-sdk');
var sftp = require('ssh2-sftp-client');
var DBClient = require('db.js')l
var lambda = AWS.Lambda();
exports.handler = async (evenrt) => {
...
let invoice = event.invoice;
let client = 'client name';
let db = new DBClient();
try {
console.log(`[${client}]: Extracting documents from ${invoice}`);
try {
// get zip file from sftp server
await sftp.fastGet(invoice, '/tmp/tmp.zip', {});
} catch (err) {
throw err;
}
let zip;
try {
// extract the zip file...
zip = await new Promise((resolve, reject) => {
fs.readFile("/tmp/tmp.zip", async function (err, data) {
if (err) return reject(err);
let unzippedData;
try {
unzippedData = await JSZip.loadAsync(data);
} catch (err) {
return reject(err);
}
return resolve(unzippedData);
});
});
} catch (err) {
throw err;
}
let unibillRegEx = /unibill.+\.txt/g;
let files = [];
zip.forEach(async (path, entry) => {
if (unibillRegEx.exec(entry.name)) {
files['unibillObj'] = entry;
} else {
files['pdfObj'] = entry;
}
});
// await db.getClient().connect();
await db.setSchema(client);
console.log('Schema has been set.');
let unibillStr = await files.unibillObj.async('string');
console.log('ingesting ', files.unibillObj.name);
//Do ingestion queries here...
...
await uploadInvoiceDocsToS3(client, files);
} catch (err) {
console.error(err.stack);
throw err;
} finally {
try {
// console.log('Disconnecting from database...');
// await db.endClient();
console.log('Disconnecting from SFTP...');
await sftp.end();
} catch (err) {
console.log('ERROR: ' + err.toString());
throw err;
}
}
...
}
db.js
var { Pool } = require('pg');
module.exports = class DBClient {
constructor() {
this.pool = new Pool();
}
async setSchema(schema) {
await this.execQuery(`SET search_path TO ${schema}`);
}
async execQuery(sql) {
return await this.pool.query(sql);
}
}
Any answer would be appreciated, thank you!
I see two ways to handle this. Ultimately it depends on how fast you want to process this data.
Change the concurrency setting for you Lambda to a "Reserve Concurrency:
.
This will allow you to limit the number of concurrent Lambda's running (see this link for more details).
Change your code to queue the work to be done in an SQS queue. From there you would have to create another Lambda to be triggered by the queue and process it as needed. This Lambda could decide how much to pull off the queue at a time and it too would likely need to be limited on concurrency. But you could tune it to, for example, run for the maximum 15 minutes which may be enough to empty the queue and would not kill the DB. Or if you had, say, a max concurrency of 100 then you would process quickly without killing the DB.
First, you have to initialize your connection outside the handler, so each time your warm lambda will be executed it won't open a new one:
const db = new DBClient();
exports.handler = async (event) => {
...
await db.query(...)
...
}
If is node-pg there is a package that keep tracks of all the idle connections, kill them if necessary and retry in case of error or sorry, too many clients already:
https://github.com/MatteoGioioso/serverless-pg
Any other custom implemented retry mechanism with backoff will work as well.
There is also a one for MySQL as well: https://github.com/jeremydaly/serverless-mysql
These days a good solution to consider for this problem, on AWS, is RDS Proxy, which acts as a transparent proxy between your lambda(s) and database:
Amazon RDS Proxy allows applications to pool and share connections established with the database, improving database efficiency, application scalability, and security.
In ,winston when I tried logging by passing a mongoose query result as a metadata argument, winston just spit out like a thousand lines of log before the task quit.
So for a log like this :
tSchool.findById(bus.schoolid,function(err,school){
winston.info('loaded school',school);
});
Here's a small piece of whats get output :
return _next.apply(this, arguments);
}, remove=function wrappedPointCut() {
var args = [].slice.call(arguments);
var lastArg = args.pop();
var fn;
var originalStack = new Error().stack;
var $results;
if (lastArg && typeof lastArg !== 'function') {
args.push(lastArg);
} else {
fn = lastArg;
}
var promise = new Promise.ES6(function(resolve, reject) {
args.push(function(error) {
if (error) {
// gh-2633: since VersionError is very generic, take the
// stack trace of the original save() function call rather
// than the async trace
if (error instanceof VersionError) {
error.stack = originalStack;
}
_this.$__handleReject(error);
reject(error);
return;
}
// There may be multiple results and promise libs other than
// mpromise don't support passing multiple values to `resolve()`
$results = Array.prototype.slice.call(arguments, 1);
resolve.apply(promise, $results);
});
_this[newName].apply(_this, args);
});
if (fn) {
if (_this.constructor.$wrapCallback) {
fn = _this.constructor.$wrapCallback(fn);
}
return promise.then(
function() {
process.nextTick(function() {
fn.apply(null, [null].concat($results));
});
},
function(error) {
process.nextTick(function() {
fn(error);
});
});
}
return promise;
}
So I wanted to know a few things :
Why is passing a mongoose query result, which is supposed to be just a small json object, printing such gibberish?
Will this happen for other objects also - like the err objects in callbacks .etc?
How do I prevent this? Checking each and every log statement to ensure no query results are passed is not very practical.
Thanks in advance.
Update :
Issues #862, #474 and #914 are tracking/related to this problem, but there hasn't been much progress.
This issue has been fixed in pull request #977 of Winston. you can check out the details on the PR page.
How do I get the number of messages currently en-queued?
My code is basically the following:
function readQueue() {
var open = require('amqplib').connect(config.rabbitServer);
open.then(function (conn) {
var ok = conn.createChannel();
ok = ok.then(function (ch) {
ch.prefetch(config.bulkSize);
setInterval(function () {
handleMessages();
}, config.bulkInterval);
ch.assertQueue(config.inputQueue);
ch.consume(config.inputQueue, function (msg) {
if (msg !== null) {
pendingMessages.push(msg);
}
});
});
return ok;
}).then(null, console.warn);
}
I found nothing in the documentation or while debugging, and I did see a different library that allows this, so wondering if amqplib supports this as well.
You can get the queue-length with amqplib.
In my case the queue has the feature 'durable:true'. You have to pass it as an option.
var amqp = require('amqplib/callback_api');
amqp.connect(amqp_url, function(err, conn) {
conn.createChannel(function(err, ch) {
var q = 'task2_queue';
ch.assertQueue(q, {durable: true}, function(err, ok) {
console.log(ok);
});
});
});
It will return an object like this:
{ queue: 'task2_queue', messageCount: 34, consumerCount: 2 }
For more information: https://www.squaremobius.net/amqp.node/channel_api.html#channel_assertQueue
I think the assertQueue method call will return an object that contains the current message count. I don't remember the exact property name off-hand, but it should be in there.
The real trick, though, is that this number will never be updated once you call assertQueue. The only way to get an updated message count is to call assertQueue again. This can have some performance implications if you're checking it too frequently.
You should call channel.checkQueue(queueName) and then you will get an object { queue: 'queueName', messageCount: 1, consumerCount: 0 } where the property messageCount which is the exactly current number of messages in the queue
I couldn't find a direct solution using node, but by using api from RabbitMQ I was able to get message count.
After enabling management plugin of RabbitMQ the apis can be accessed using http://127.0.0.1:15672/api/queues/vhost/name and user login as guest with password guest.
var request = require('request');
var count_url = "http://guest:guest#127.0.0.1:15672/api/queues/%2f/" + q;
var mincount = 0;
..........
..........
request({
url : count_url
}, function(error, response, body) {
console.log("Called RabbitMQ API");
if (error) {
console.error("Unable to fetch Queued Msgs Count" + error);
return;
}
else
{
var message = JSON.parse(body);
if (message.hasOwnProperty("messages_ready")) {
// this DOES NOT COUNT UnAck msgs
var msg_ready = JSON.stringify(message.messages_ready);
console.log("message.messages_ready=" + msg_ready);
if (msg_ready == mincount) {
console.log("mincount Reached ..Requesting Producer");
///Code to Produce msgs ..
}
}
if (message.hasOwnProperty("messages")) {
// _messages_ total messages i.e including unAck
var msg = JSON.stringify(message.messages);
console.log("message.messages=" + msg);
}
}
});
I am using Nodejs for the backend. I tried this npm package to create a simple work flow (AMAZON-SWF). The package has an example folder which contains files which I put in my node project so that I understand how it works.
The problem is that the Decider is not receiving any task from the SWF server. because of which my work flow never runs. Is there some configuration problem. Please point out what errors I have done.
Below is the code for quick reference. The only change the code has is the version number change and change in the domain name. Otherwise it is the same code as the code which you can find here.
Following is the decider code.
var swf = require('./index');
var myDecider = new swf.Decider({
"domain": "test-domain",
"taskList": {"name": "my-workflow-tasklist"},
"identity": "Decider-01",
"maximumPageSize": 100,
"reverseOrder": false // IMPORTANT: must replay events in the right order, ie. from the start
});
myDecider.on('decisionTask', function (decisionTask) {
console.log("Got a new decision task !");
if(!decisionTask.eventList.scheduled('step1')) {
decisionTask.response.schedule({
name: 'step1',
activity: 'simple-activity'
});
}
else {
decisionTask.response.stop({
result: "some workflow output data"
});
}
decisionTask.response.respondCompleted(decisionTask.response.decisions, function(err, result) {
if(err) {
console.log(err);
return;
}
console.log("responded with some data !");
});
});
myDecider.on('poll', function(d) {
//console.log(_this.config.identity + ": polling for decision tasks...");
console.log("polling for tasks...", d);
});
// Start polling
myDecider.start();
/**
* It is not recommanded to stop the poller in the middle of a long-polling request,
* because SWF might schedule an DecisionTask to this poller anyway, which will obviously timeout.
*
* The .stop() method will wait for the end of the current polling request,
* eventually wait for a last decision execution, then stop properly :
*/
process.on('SIGINT', function () {
console.log('Got SIGINT ! Stopping decider poller after this request...please wait...');
myDecider.stop();
});
Following is activity code:
/**
* This simple worker example will respond to any incoming task
* on the 'my-workflow-tasklist, by setting the input parameters as the results of the task
*/
var swf = require('./index');
var activityPoller = new swf.ActivityPoller({
domain: 'test-domain-newspecies',
taskList: { name: 'my-workflow-tasklist' },
identity: 'simple-activity'
});
activityPoller.on('error',function() {
console.log('error');
});
activityPoller.on('activityTask', function(task) {
console.log("Received new activity task !");
var output = task.input;
task.respondCompleted(output, function (err) {
if(err) {
console.log(err);
return;
}
console.log("responded with some data !");
});
});
activityPoller.on('poll', function(d) {
console.log("polling for activity tasks...", d);
});
activityPoller.on('error', function(error) {
console.log(error);
});
// Start polling
activityPoller.start();
/**
* It is not recommanded to stop the poller in the middle of a long-polling request,
* because SWF might schedule an ActivityTask to this poller anyway, which will obviously timeout.
*
* The .stop() method will wait for the end of the current polling request,
* eventually wait for a last activity execution, then stop properly :
*/
process.on('SIGINT', function () {
console.log('Got SIGINT ! Stopping activity poller after this request...please wait...');
activityPoller.stop();
});
Following is the code which registers:
var awsswf = require('./index');
var swf = awsswf.createClient();
/**
* Register the domain "test-domain"
*/
swf.registerDomain({
name: "test-domain-newspecies",
description: "this is a just a test domain",
workflowExecutionRetentionPeriodInDays: "3"
}, function (err, results) {
if (err && err.code != 'DomainAlreadyExistsFault') {
console.log("Unable to register domain: ", err);
return;
}
console.log("'test-domain-newspecies' registered !")
/**
* Register the WorkflowType "simple-workflow"
*/
swf.registerWorkflowType({
domain: "test-domain-newspecies",
name: "simple-workflow",
version: "2.0"
}, function (err, results) {
if (err && err.code != 'TypeAlreadyExistsFault') {
console.log("Unable to register workflow: ", err);
return;
}
console.log("'simple-workflow' registered !")
/**
* Register the ActivityType "simple-activity"
*/
swf.registerActivityType({
domain: "test-domain-newspecies",
name: "simple-activity",
version: "2.0"
}, function (err, results) {
if (err && err.code != 'TypeAlreadyExistsFault') {
console.log("Unable to register activity type: ", err);
return;
}
console.log("'simple-activity' registered !");
});
});
});
Following is the code which starts the workflow execution:
var swf = require('./index');
var workflow = new swf.Workflow({
"domain": "test-domain-newspecies",
"workflowType": {
"name": "simple-workflow",
"version": "2.0"
},
"taskList": { "name": "my-workflow-tasklist" },
"executionStartToCloseTimeout": "1800",
"taskStartToCloseTimeout": "1800",
"tagList": ["example"],
"childPolicy": "TERMINATE"
});
var workflowExecution = workflow.start({ input: "any data ..."}, function (err, runId) {
if (err) { console.log("Cannot start workflow : ", err); return; }
console.log("Workflow started, runId: " +runId);
});
Following is index.js file
var basePath = "../node_modules/aws-swf/lib/";
exports.AWS = require('aws-swf').AWS;
exports.AWS.config.loadFromPath(__dirname + '/../config/awsConfig.json');
exports.createClient = require(basePath+"swf").createClient;
exports.Workflow = require(basePath+"workflow").Workflow;
exports.WorkflowExecution = require(basePath+"workflow-execution").WorkflowExecution;
exports.ActivityPoller = require(basePath+"activity-poller").ActivityPoller;
exports.ActivityTask = require(basePath+"activity-task").ActivityTask;
exports.Decider = require(basePath+"decider").Decider;
exports.DecisionTask = require(basePath+"decision-task").DecisionTask;
exports.EventList = require(basePath+"event-list").EventList;
exports.DecisionResponse = require(basePath+"decision-response").DecisionResponse;
exports.Poller = require(basePath+"poller").Poller;
The way run this code is by opening three terminal simultaneously. Then I execute the following command in respective terminal.
activity
node <activity-file-name>
decider
node <decider-file-name>
start and register I run in the same terminal.
node <register-file-name>
node <start-file-name>
It stands out that in the decider you are using "test-domain", but in the rest of the code you are using"test-domain-newspecies".
If the domain "test-domain" is not registered you should get an UnknownResourceFault error when polling for a decision task.
I have a small data gathering web app running with NodeJS and Couchbase. The requirement is, that when a 3rd party pushes some data to us and we are able to process it, we return the 200 header, but if there are any problems with storing that data, we return 500. This means that they can re-try with the failed data batch.
I'm having an issue where the 200 is always returned (because the DB calls are completed asynchronously). Here's an example:
...
var app = express();
function create(req, res) {
var error = false;
// Parse all the entries in request
for (var i = 0; i < req.body.length; i++) {
var event = req.body[i];
if (!event.email) {
// log error to file
error = true;
res.send("Event object does not have an email address!", 500);
}
// Greate the id index value
var event_id = 'blah';
// See if record already exists
db.get(event_id, function (err, result) {
var doc = result.value;
if (doc === undefined) {
// Add a new record
db.add(event_id, event, function (err, result) {
if (err) {
error = true;
res.send('There were processing errors', 500);
}
});
}
});
}
if (error)
res.send("Try again", 500);
else
res.send("OK", 200);
}
app.post('/create', create);
Is there a way of making the app wait for those DB calls to complete, i.e. for this funciton to be synchronous? Or am I using a wrong tech for this? :(
I decided to go with NodeJS+Couchbase because we are likely to have a very high amount of calls, where the data (small JSON objects) must be written, read and deleted. EDIT: Ah the data structure is likely to change for various events, so being able to store non-uniformly shaped documents its of a great advantage!
This is a typical use case for the async library, which is a utility-belt library with lots of patterns to work with asynchronous functions.
Since you need to call an asynchronous function for each record, you can use async.each, which executes an asynchronous function for all elements of an array. A last callback is called when all asynchronous tasks are finished.
var app = express();
function handleEvent = function (event, callback) {
if (! event.email) {
callback(new Error('Event object does not have an email address!'));
}
var event_id = 'blah';
db.get(event_id, function (err, result) {
var doc = result.value;
if (doc === undefined) {
// Add a new record
db.add(event_id, event, function (err, result) {
if (err) {
callback(new Error('There were processing errors'));
}
else {
callback(null);
}
});
}
});
}
function create(req, res) {
// https://github.com/caolan/async#each
async.each(req.body, handleEvent, function (err) {
if (err)
res.send(err.message, 500);
else
res.send('OK', 200);
});
}