I am trying to inside a Lambda function run a for loop which will parse and send SQS messages to a certai queue. Currently it is running the for loop and creating the params properly (I checked via logging) and is running a log message just outside/after the for loop saying the lambda is done.
Issue is that the SQS message isn't being sent and/or arriving in the SQS queue.
I haven't inclued the rest of the lambda function as it is just noise and doesn't relate to the issue since it is running correctly already, the only issue is with the sqs message.
for (var i = 0; i < dogs.length; i++) {
let MessageBody = JSON.stringify(dogs[i]);
let params = {
MessageBody,
QueueUrl: process.env.serviceQueue,
DelaySeconds: 0
};
sqs.sendMessage(params, function(err, data) {
if (err) {
logger.error(`sqs.sendMessage: Error message: ${err}`);
} else {
let stringData = JSON.stringify(data);
logger.info(`sqs.sendMessage: Data: ${stringData}`);
}
});
}
iterating over multiple async requests, and using callback is a recipe for disaster as well as messy code. Id recommend the below (using async/await)
await Promise.all(dogs.map(async (dog) => {
let params = {
MessageBody: JSON.stringify(dog),
QueueUrl: process.env.serviceQueue,
DelaySeconds: 0
}
let data = await sqs.sendMessage(params).promise().catch(err => {
logger.error(`sqs.sendMessage: Error message: ${err}`);
});
logger.info(`sqs.sendMessage: Data: ${JSON.stringify(data)}`);
}));
Related
I have created a NodeJS application to insert data into a MongoDB collection. This database insertion is done by using a Kafka. Kafka-node is the plugin I have used to call Kafka.
I can create the topic and send a message to the consumer at the producer level. The message and topic are taken from the POST request.
This is how I call the Kafka. Parameters are topic and message.
Every time I call this API, the producer is creating a new message and sent it to the consumer. In each call, all previous messages will be returned to the consumer.
I have used the fromOffset: 'earliest' and fromOffset: 'latest' options to restrict the previous messages, not working.
Can anyone give me a suggestion?
Version of Kafka-node
"kafka-node": "^5.0.0",
Code I have used
var kafka = require('kafka-node');
const {MongoClient} = require('mongodb');
var url = 'mongodb://127.0.0.1:27017/';
const mongoClient = new MongoClient(url);
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
offset = new kafka.Offset(client),
Consumer = kafka.Consumer,
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var topic = req.body.topic;
var sentMessage = JSON.stringify(req.body.messages);
producer.send(payloads, async function( err, data) {
})
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[
{ topic: topic, partition: 0 }
],
{
autoCommit: false,
fromOffset: 'earliest'
}
);
consumer.on('message', async function (message) {
console.log("Message : "+JSON.stringify(message))
try {
var currentdate = new Date();
var datetime = "Last Sync: " + currentdate.getDate() + "/"
+ (currentdate.getMonth()+1) + "/"
+ currentdate.getFullYear() + " # "
+ currentdate.getHours() + ":"
+ currentdate.getMinutes() + ":"
+ currentdate.getSeconds();
var abb = await createListing(mongoClient,
{
topic: topic,
message: sentMessage,
time: datetime
}
);
} catch (e) {
console.error(":"+e);
}
finally {
}
});
await mongoClient.close();
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
async function createListing(client, newListing){
await mongoClient.connect();
const result = await
client.db("sample_airbnb").collection("listingsAndReviews").insertOne(newListing);
console.log(`New listing created with the following id: ${result.insertedId}`);
return result.insertedId;
}
}
The Messages consumed at the consumer are
Thanks,
You consumer will always consume all offsets that have not been marked consumed by its consumer group before.
This means that after consuming a given message (or a batch of messages), you need to commit the highest consumed offset to your Kafka cluster, to effectively mark those messages as consumed. Only then will your consumer group not re-consume those messages on startup.
To commit your offsets, you can either use kafka.js’s autoCommit feature (which you explicitly disabled in your implementation), or manually commit your offsets using the API provided by kafka.js.
You can find the documentation to both here: https://kafka.js.org/docs/consuming#a-name-auto-commit-a-autocommit
I made some changes in the code, Now I can retrieve the latest message from my topic.
I have created consumer inside the offset.fetchLatestOffsets([topics],cb), and made some changes in the consumer options.
var payloads = [
{ topic: topicName, messages: messageTotopic, partition: 0}
];
producer.send(payloads, async function(err, data) {
});
var client = new kafka.KafkaClient();
offset.fetchLatestOffsets([topic], async function (error, offsets) {
if (error)
console.log(error);
offsetA = JSON.stringify(offsets[topic][0])
console.log('offset Value:: '+offsetA);
var consumer = new Consumer(
client,
[
{
topic: topic,
partition: 0,
offset: offsetA-1, // Offset value starts from 0
}
], {
autoCommit: false,
fromOffset: true,
}
);
consumer.on('message', async function (message) {
console.log("Message from last offset:: " + JSON.stringify(message)); // will return the latest message.
consumer.close();
});
});
Using this way I am able to overcome the memory leakage issue related to the event emitters in the KafkaClient.
I have a ScheduledEvent on my lamda function for every 24 hours and then inside function, I am calling SQS to get my messages.
export class EmailNotificationProcessor {
public static async run(): Promise<void> {
console.log('event');
await this.getNotificationFromSqs();
}
private static async getNotificationFromSqs(): Promise<void> {
const messagesToDelete: DeleteMessageBatchRequestEntryList = [];
const messageRequest: ReceiveMessageRequest = {
QueueUrl: process.env.DID_NOTIFICATION_SQS_QUEUE,
MaxNumberOfMessages:10,
WaitTimeSeconds:20
}
const { Messages }: ReceiveMessageResult = await receiveMessage(messageRequest);
console.log('Messages', Messages);
console.log('Total Messages ', Messages.length);
if (Messages && Messages.length > 0) {
for (const message of Messages) {
console.log('body is ', message.Body);
messagesToDelete.push({
Id: message.MessageId,
ReceiptHandle: message.ReceiptHandle,
} as DeleteMessageBatchRequestEntry);
}
}
await deleteMessages(messagesToDelete);
}
}
I am expecting 1 to 30 messages inside my queue and want to process all messages before sending an email which consists of the content that I will parse from sqs body.
My function for receiving messages
export const receiveMessage = async (request: SQS.ReceiveMessageRequest): Promise<PromiseResult<SQS.ReceiveMessageResult, AWSError>> =>{
console.log('inside receive');
return sqs.receiveMessage(request).promise();
}
Now I am not able to receive all messages at once and only getting 3 messages or sometimes 1 message at a time.
I know limit for API call is 10 in one single request but is there any way to wait and get all your message.
First of all, There is no configuration to get more then 10 messages from queue.
ReceiveMessage: Retrieves one or more messages (up to 10), from the specified queue
For your other problems: I think you are using Short poll ReceiveMessage call.If the number of messages in the queue is extremely small, you might not receive any messages in a particular ReceiveMessage response.
Try Long Polling:
Long polling helps reduce the cost of using Amazon SQS by eliminating the number of empty responses (when there are no messages available for a ReceiveMessage request) and false empty responses (when messages are available but aren't included in a response).
Note* For getting more messages you need to wrap the call to SQS in a loop and keep requesting more messages until the queue is empty, which can lead you to get duplicate messages as well so try VisibilityTimeout in that problem.
Try VisibilityTimeout: The duration (in seconds) that the received messages are hidden from subsequent retrieve requests after being retrieved by a ReceiveMessage request.
Sample Wrap up SQS call code:
function getMessages(params, count = 0, callback) {
let allMessages = [];
sqs.receiveMessage(params, function (err, data) {
if (err || (data && !data.Messages || data.Messages.length <= 0)) {
if(++count >= config.SQSRetries ){
return callback(null, allMessages);
}
return setTimeout(() => {
return getMessages(params, count, callback);
}, 500);
} else if (++count !== config.SQSRetries ){
allMessages.push(data);
return setTimeout(() => {
return getMessages(params, count, callback);
}, 500);
} else {
allMessages.push(data);
callback(null, allMessages);
}
});
In config.SQSRetries, we have set the value according to our requirement but as your SQS have 1 to 30 messages, Then '7' will be good for you!
Links: RecieveMessage UserGuide
Anybody who has experience building concurrent AWS Lambda Function with Postgres?
I have to build a lambda cron that will ingest thousands of invoices into a Postgres database. I have to call the ingestion lambda function concurrently for each invoices. The problem is, because the it is concurrent, each instance of the ingestion function will create a connection to the database. Which means, if I have a 1000 invoice to ingest, each invoice will invoke a lambda function, that will create 1000 database connection. This will exhaust the max connection that Postgres can handle. Some instance of the lambda function invoked will return an error saying that there are no more connection available.
Any tips you can give how to handle this problem?
Here are some snippets of my code:
ingestInvoiceList.js
var AWS = require('aws-sdk');
var sftp = require('ssh2-sftp-client');
var lambda = AWS.Lambda();
exports.handler = async (evenrt) => {
...
let folder_contents;
try {
// fetch list of Zip format invoices
folder_contents = await sftp.list(client_folder);
} catch (err) {
console.log(`[${client}]: ${err.toString()}`);
throw new Error(`[${client}]: ${err.toString()}`);
}
let invoiceCount = 0;
let funcName = 'ingestInvoice';
for (let item of folder_contents) {
if (item.type === '-') {
let payload = JSON.stringify({
invoice: item.name
});
let params = {
FunctionName: funcName,
Payload: payload,
InvocationType: 'Event'
};
//invo9ke ingest invoice concurrently
let result = await new Promise((resolve) => {
lambda.invoke(params, (err, data) => {
if (err) resolve(err);
else resolve(data);
});
});
console.log('result: ', result);
invoiceCount++;
}
}
...
}
ingestInvoice.js
var AWS = require('aws-sdk');
var sftp = require('ssh2-sftp-client');
var DBClient = require('db.js')l
var lambda = AWS.Lambda();
exports.handler = async (evenrt) => {
...
let invoice = event.invoice;
let client = 'client name';
let db = new DBClient();
try {
console.log(`[${client}]: Extracting documents from ${invoice}`);
try {
// get zip file from sftp server
await sftp.fastGet(invoice, '/tmp/tmp.zip', {});
} catch (err) {
throw err;
}
let zip;
try {
// extract the zip file...
zip = await new Promise((resolve, reject) => {
fs.readFile("/tmp/tmp.zip", async function (err, data) {
if (err) return reject(err);
let unzippedData;
try {
unzippedData = await JSZip.loadAsync(data);
} catch (err) {
return reject(err);
}
return resolve(unzippedData);
});
});
} catch (err) {
throw err;
}
let unibillRegEx = /unibill.+\.txt/g;
let files = [];
zip.forEach(async (path, entry) => {
if (unibillRegEx.exec(entry.name)) {
files['unibillObj'] = entry;
} else {
files['pdfObj'] = entry;
}
});
// await db.getClient().connect();
await db.setSchema(client);
console.log('Schema has been set.');
let unibillStr = await files.unibillObj.async('string');
console.log('ingesting ', files.unibillObj.name);
//Do ingestion queries here...
...
await uploadInvoiceDocsToS3(client, files);
} catch (err) {
console.error(err.stack);
throw err;
} finally {
try {
// console.log('Disconnecting from database...');
// await db.endClient();
console.log('Disconnecting from SFTP...');
await sftp.end();
} catch (err) {
console.log('ERROR: ' + err.toString());
throw err;
}
}
...
}
db.js
var { Pool } = require('pg');
module.exports = class DBClient {
constructor() {
this.pool = new Pool();
}
async setSchema(schema) {
await this.execQuery(`SET search_path TO ${schema}`);
}
async execQuery(sql) {
return await this.pool.query(sql);
}
}
Any answer would be appreciated, thank you!
I see two ways to handle this. Ultimately it depends on how fast you want to process this data.
Change the concurrency setting for you Lambda to a "Reserve Concurrency:
.
This will allow you to limit the number of concurrent Lambda's running (see this link for more details).
Change your code to queue the work to be done in an SQS queue. From there you would have to create another Lambda to be triggered by the queue and process it as needed. This Lambda could decide how much to pull off the queue at a time and it too would likely need to be limited on concurrency. But you could tune it to, for example, run for the maximum 15 minutes which may be enough to empty the queue and would not kill the DB. Or if you had, say, a max concurrency of 100 then you would process quickly without killing the DB.
First, you have to initialize your connection outside the handler, so each time your warm lambda will be executed it won't open a new one:
const db = new DBClient();
exports.handler = async (event) => {
...
await db.query(...)
...
}
If is node-pg there is a package that keep tracks of all the idle connections, kill them if necessary and retry in case of error or sorry, too many clients already:
https://github.com/MatteoGioioso/serverless-pg
Any other custom implemented retry mechanism with backoff will work as well.
There is also a one for MySQL as well: https://github.com/jeremydaly/serverless-mysql
These days a good solution to consider for this problem, on AWS, is RDS Proxy, which acts as a transparent proxy between your lambda(s) and database:
Amazon RDS Proxy allows applications to pool and share connections established with the database, improving database efficiency, application scalability, and security.
How does my function continuously check for an incoming message? The following function exits, after receiving a message. Considering, long polling has been enabled for the queue how do I continuously check for a new message?
function checkMessage(){
var params = {
QueueUrl : Constant.QUEUE_URL,
VisibilityTimeout: 0,
WaitTimeSeconds: 0
}
sqs.receiveMessage(params,(err,data) => {
if(data){
console.log("%o",data);
}
});
}
Your function would need to continually poll Amazon SQS.
Long Polling will delay a response by up to 20 seconds if there are no messages available. If a message becomes available during that period, it will be immediately returned. If there is no message after 20 seconds, it returns without providing a message.
Therefore, your function would need to poll SQS again (perhaps doing something else in the meantime).
var processMessages = (function (err, data) {
if (data.Messages) {
for (i = 0; i < data.Messages.length; i++) {
var message = data.Messages[i];
var body = JSON.parse(message.Body);
// process message
// delete if successful
}
}
});
while (true) {
sqs.receiveMessage({
QueueUrl: sqsQueueUrl,
MaxNumberOfMessages: 5, // how many messages to retrieve in a batch
VisibilityTimeout: 60, // how long until these messages are available to another consumer
WaitTimeSeconds: 15 // how many seconds to wait for messages before continuing
}, processMessages);
}
(function checkMessage(){
var params = {
QueueUrl : Constant.QUEUE_URL,
VisibilityTimeout: 0,
WaitTimeSeconds: 0
}
sqs.receiveMessage(params,(err,data) => {
if(data){
console.log("%o",data);
}
checkMessage()
});
})()
To continuously check for an incoming message in your aws sqs you will want to recusrsively call the aws sqs whenever a data is returned.
How do I get the number of messages currently en-queued?
My code is basically the following:
function readQueue() {
var open = require('amqplib').connect(config.rabbitServer);
open.then(function (conn) {
var ok = conn.createChannel();
ok = ok.then(function (ch) {
ch.prefetch(config.bulkSize);
setInterval(function () {
handleMessages();
}, config.bulkInterval);
ch.assertQueue(config.inputQueue);
ch.consume(config.inputQueue, function (msg) {
if (msg !== null) {
pendingMessages.push(msg);
}
});
});
return ok;
}).then(null, console.warn);
}
I found nothing in the documentation or while debugging, and I did see a different library that allows this, so wondering if amqplib supports this as well.
You can get the queue-length with amqplib.
In my case the queue has the feature 'durable:true'. You have to pass it as an option.
var amqp = require('amqplib/callback_api');
amqp.connect(amqp_url, function(err, conn) {
conn.createChannel(function(err, ch) {
var q = 'task2_queue';
ch.assertQueue(q, {durable: true}, function(err, ok) {
console.log(ok);
});
});
});
It will return an object like this:
{ queue: 'task2_queue', messageCount: 34, consumerCount: 2 }
For more information: https://www.squaremobius.net/amqp.node/channel_api.html#channel_assertQueue
I think the assertQueue method call will return an object that contains the current message count. I don't remember the exact property name off-hand, but it should be in there.
The real trick, though, is that this number will never be updated once you call assertQueue. The only way to get an updated message count is to call assertQueue again. This can have some performance implications if you're checking it too frequently.
You should call channel.checkQueue(queueName) and then you will get an object { queue: 'queueName', messageCount: 1, consumerCount: 0 } where the property messageCount which is the exactly current number of messages in the queue
I couldn't find a direct solution using node, but by using api from RabbitMQ I was able to get message count.
After enabling management plugin of RabbitMQ the apis can be accessed using http://127.0.0.1:15672/api/queues/vhost/name and user login as guest with password guest.
var request = require('request');
var count_url = "http://guest:guest#127.0.0.1:15672/api/queues/%2f/" + q;
var mincount = 0;
..........
..........
request({
url : count_url
}, function(error, response, body) {
console.log("Called RabbitMQ API");
if (error) {
console.error("Unable to fetch Queued Msgs Count" + error);
return;
}
else
{
var message = JSON.parse(body);
if (message.hasOwnProperty("messages_ready")) {
// this DOES NOT COUNT UnAck msgs
var msg_ready = JSON.stringify(message.messages_ready);
console.log("message.messages_ready=" + msg_ready);
if (msg_ready == mincount) {
console.log("mincount Reached ..Requesting Producer");
///Code to Produce msgs ..
}
}
if (message.hasOwnProperty("messages")) {
// _messages_ total messages i.e including unAck
var msg = JSON.stringify(message.messages);
console.log("message.messages=" + msg);
}
}
});