Wrong commit order when using autoCommit=false in HighlevelConsumer - node.js

I'm using a HighlevelProducer and HighlevelConsumer to send and receive Messages. The HighlevelConsumer is configured with autoCommit=false as I want to commit Messages only when it was produced successfully. The problem is, that the first message never really gets commited.
Example:
Send Messages 1-10.
Receive Message 1
Receive Message 2
Commit Message 2
...
Receive Message 10
Commit Message 10
Commit Message 1
If I restart my Consumer, all messages from 1 to 10 are processed again. Only if I send new messages to the consumer, the old messages get committed. This happens for any number of messages.
My Code reads as follows:
var kafka = require('kafka-node'),
HighLevelConsumer = kafka.HighLevelConsumer,
client = new kafka.Client("localhost:2181/");
consumer = new HighLevelConsumer(
client,
[
{ topic: 'mytopic' }
],
{
groupId: 'my-group',
id: "my-consumer-1",
autoCommit: false
}
);
consumer.on('message', function (message) {
console.log("consume: " + message.offset);
consumer.commit(function (err, data) {
console.log("commited:" + message.offset);
});
console.log("consumed:" + message.offset);
});
process.on('SIGINT', function () {
consumer.close(true, function () {
process.exit();
});
});
process.on('exit', function () {
consumer.close(true, function () {
process.exit();
});
});
var messages = 10;
var kafka = require('kafka-node'),
HighLevelProducer = kafka.HighLevelProducer,
client = new kafka.Client("localhost:2181/");
var producer = new HighLevelProducer(client, { partitionerType: 2, requireAcks: 1 });
producer.on('error', function (err) { console.log(err) });
producer.on('ready', function () {
for (i = 0; i < messages; i++) {
payloads = [{ topic: 'mytopic', messages: "" }];
producer.send(payloads, function (err, data) {
err ? console.log(i + "err", err) : console.log(i + "data", data);
});
}
});
Am I doing something wrong or is this a bug in kafka-node?

A commit of message 2 is an implicit commit of message 1.
As you commits are done asynchronously, and commit of message 1 and message 2 are done quick after each other (ie, committing 2 happens before the consumer did send the commit of 1), the first commit will not happen explicitly and only a single commit of message 2 will be sent.

Related

How to consume the latest message from Kafka-consumer using NodeJs?

I have created a NodeJS application to insert data into a MongoDB collection. This database insertion is done by using a Kafka. Kafka-node is the plugin I have used to call Kafka.
I can create the topic and send a message to the consumer at the producer level. The message and topic are taken from the POST request.
This is how I call the Kafka. Parameters are topic and message.
Every time I call this API, the producer is creating a new message and sent it to the consumer. In each call, all previous messages will be returned to the consumer.
I have used the fromOffset: 'earliest' and fromOffset: 'latest' options to restrict the previous messages, not working.
Can anyone give me a suggestion?
Version of Kafka-node
"kafka-node": "^5.0.0",
Code I have used
var kafka = require('kafka-node');
const {MongoClient} = require('mongodb');
var url = 'mongodb://127.0.0.1:27017/';
const mongoClient = new MongoClient(url);
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
offset = new kafka.Offset(client),
Consumer = kafka.Consumer,
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var topic = req.body.topic;
var sentMessage = JSON.stringify(req.body.messages);
producer.send(payloads, async function( err, data) {
})
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[
{ topic: topic, partition: 0 }
],
{
autoCommit: false,
fromOffset: 'earliest'
}
);
consumer.on('message', async function (message) {
console.log("Message : "+JSON.stringify(message))
try {
var currentdate = new Date();
var datetime = "Last Sync: " + currentdate.getDate() + "/"
+ (currentdate.getMonth()+1) + "/"
+ currentdate.getFullYear() + " # "
+ currentdate.getHours() + ":"
+ currentdate.getMinutes() + ":"
+ currentdate.getSeconds();
var abb = await createListing(mongoClient,
{
topic: topic,
message: sentMessage,
time: datetime
}
);
} catch (e) {
console.error(":"+e);
}
finally {
}
});
await mongoClient.close();
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
async function createListing(client, newListing){
await mongoClient.connect();
const result = await
client.db("sample_airbnb").collection("listingsAndReviews").insertOne(newListing);
console.log(`New listing created with the following id: ${result.insertedId}`);
return result.insertedId;
}
}
The Messages consumed at the consumer are
Thanks,
You consumer will always consume all offsets that have not been marked consumed by its consumer group before.
This means that after consuming a given message (or a batch of messages), you need to commit the highest consumed offset to your Kafka cluster, to effectively mark those messages as consumed. Only then will your consumer group not re-consume those messages on startup.
To commit your offsets, you can either use kafka.js’s autoCommit feature (which you explicitly disabled in your implementation), or manually commit your offsets using the API provided by kafka.js.
You can find the documentation to both here: https://kafka.js.org/docs/consuming#a-name-auto-commit-a-autocommit
I made some changes in the code, Now I can retrieve the latest message from my topic.
I have created consumer inside the offset.fetchLatestOffsets([topics],cb), and made some changes in the consumer options.
var payloads = [
{ topic: topicName, messages: messageTotopic, partition: 0}
];
producer.send(payloads, async function(err, data) {
});
var client = new kafka.KafkaClient();
offset.fetchLatestOffsets([topic], async function (error, offsets) {
if (error)
console.log(error);
offsetA = JSON.stringify(offsets[topic][0])
console.log('offset Value:: '+offsetA);
var consumer = new Consumer(
client,
[
{
topic: topic,
partition: 0,
offset: offsetA-1, // Offset value starts from 0
}
], {
autoCommit: false,
fromOffset: true,
}
);
consumer.on('message', async function (message) {
console.log("Message from last offset:: " + JSON.stringify(message)); // will return the latest message.
consumer.close();
});
});
Using this way I am able to overcome the memory leakage issue related to the event emitters in the KafkaClient.

async for loop not putting sqs message in queue

I am trying to inside a Lambda function run a for loop which will parse and send SQS messages to a certai queue. Currently it is running the for loop and creating the params properly (I checked via logging) and is running a log message just outside/after the for loop saying the lambda is done.
Issue is that the SQS message isn't being sent and/or arriving in the SQS queue.
I haven't inclued the rest of the lambda function as it is just noise and doesn't relate to the issue since it is running correctly already, the only issue is with the sqs message.
for (var i = 0; i < dogs.length; i++) {
let MessageBody = JSON.stringify(dogs[i]);
let params = {
MessageBody,
QueueUrl: process.env.serviceQueue,
DelaySeconds: 0
};
sqs.sendMessage(params, function(err, data) {
if (err) {
logger.error(`sqs.sendMessage: Error message: ${err}`);
} else {
let stringData = JSON.stringify(data);
logger.info(`sqs.sendMessage: Data: ${stringData}`);
}
});
}
iterating over multiple async requests, and using callback is a recipe for disaster as well as messy code. Id recommend the below (using async/await)
await Promise.all(dogs.map(async (dog) => {
let params = {
MessageBody: JSON.stringify(dog),
QueueUrl: process.env.serviceQueue,
DelaySeconds: 0
}
let data = await sqs.sendMessage(params).promise().catch(err => {
logger.error(`sqs.sendMessage: Error message: ${err}`);
});
logger.info(`sqs.sendMessage: Data: ${JSON.stringify(data)}`);
}));

Node Js / Typescript - AMQP Consumer

I am trying my hand at node.js/typescript for the first time and having a bit of trouble making a consumer for a rabbit queue.
Code:
let amqp = require('amqp');
let connection = amqp.createConnection({url: "amqp://" + RABBITMQ_USER + ":" + RABBITMQ_PASSWORD + "#" + RABBITMQ_HOST + ":" + RABBITMQ_PORT + RABBITMQ_VHOST});
connection.on('ready', function() {
connection.exchange(RABBITMQ_WORKER_EXCHANGE, function (exchange) {
connection.queue(RABBITMQ_QUEUE, function (queue) {
queue.bind(exchange, function() {
queue.publish(function (message) {
console.log('subscribed to queue');
let encoded_payload = unescape(message.data);
let payload = JSON.parse(encoded_payload);
console.log('Received a message:');
console.log(payload);
})
})
})
})
})
It seems to connect to the amqp server and throws no errors but it just sits there and doesn't consume anything. Is there a step I am missing?
Any help would be greatly appreciated,
Thank you.
Here is my solution that is working based off of amqp's JS tutorial.
https://www.rabbitmq.com/tutorials/tutorial-three-javascript.html
Probably not up to TypeScript standards, feel free to correct me if there's a better way.
#!/usr/bin/env node
require('dotenv').config();
import amqp = require('amqplib/callback_api');
import db = require('./database');
amqp.connect({
protocol: process.env.RABBITMQ_PROTOCOL,
hostname: process.env.RABBITMQ_HOST,
port: process.env.RABBITMQ_PORT,
username: process.env.RABBITMQ_USER,
password: process.env.RABBITMQ_PASSWORD,
vhost: process.env.RABBITMQ_VHOST
}, function(err, conn) {
conn.createChannel(function (err, ch) {
// set exchange that is being used
ch.assertExchange(process.env.RABBITMQ_WORKER_EXCHANGE, 'direct', {durable: true});
// set queue that is being used
ch.assertQueue(process.env.RABBITMQ_QUEUE, {durable: true}, function (err, q) {
console.log(" [*] Waiting for messages in %s. To exit press CTRL+C", q.queue);
// bind the queue to the exchange
ch.bindQueue(q.queue, process.env.RABBITMQ_WORKER_EXCHANGE, '');
// consume from the queue, one message at a time.
ch.consume(q.queue, function (msg) {
console.log("Message received: %s", msg.content.toString());
//save message to db
db.store(msg.content.toString()).then(function() {
//acknowledge receipt of message to amqp
console.log("Acknowledging message");
ch.ack(msg, true);
});
}, {noAck: false});
});
});
});
import * as Amqp from "amqp-ts";
var connection = new Amqp.Connection("amqp://localhost");
var exchange = connection.declareExchange("ExchangeName");
var queue = connection.declareQueue("QueueName");
queue.bind(exchange);
queue.activateConsumer((message) => {
console.log("Message received: " + message.getContent());
});
// it is possible that the following message is not received because
// it can be sent before the queue, binding or consumer exist
var msg = new Amqp.Message("Test");
exchange.send(msg);
connection.completeConfiguration().then(() => {
// the following message will be received because
// everything you defined earlier for this connection now exists
var msg2 = new Amqp.Message("Test2");
exchange.send(msg2);
});

Unable to dequeue messages from Rabbitmq using amqp package in node

I enqueue my messages to Rabiitmq using the following node js code:
//enqueue.js
var amqp = require('amqplib/callback_api');
amqp.connect('amqp://localhost', function(err, conn) {
conn.createChannel(function(err, ch) {
var q = 'hello';
ch.assertQueue(q, {durable: true});
ch.sendToQueue(q, new Buffer('Msg 1'));
ch.sendToQueue(q, new Buffer('Msg 2'));
ch.sendToQueue(q, new Buffer('Msg 3'));
ch.sendToQueue(q, new Buffer('Msg 4'));
ch.sendToQueue(q, new Buffer('Msg 5'));
});
setTimeout(function() { conn.close(); process.exit(0) }, 500);
});
The enqueue process happens as intended.
Now I wish to dequeue just 1 message and for that I use this code:
//dequeue.js
var amqp = require('amqplib/callback_api');
amqp.connect('amqp://localhost', function(err, conn) {
conn.createChannel(function(err, ch) {
var q = 'hello';
ch.assertQueue(q, {durable: true});
ch.prefetch(1);
ch.consume(q, function(msg) {
console.log(" [x] Received %s", msg.content.toString());
}, {noAck: false});
});
});
As expected the output is:
[x] Received Msg 1
But Msg 1 is still in the queue and it is not dequeued. So I tried adding ch.ack(msg) after the console.log() statement in dequeue.js but instead of just 1 message getting dequeued all the messages are getting dequeued!
Please help me to dequeue just 1 message.
Set noack to true.
noAck (boolean): if true, the message will be assumed by the server to
be acknowledged (i.e., dequeued) as soon as it's been sent over the
wire. Default is false, that is, you will be expected to acknowledge
the message.
http://www.squaremobius.net/amqp.node/channel_api.html#channel_get
Or remove the noack and make sure you check that msg !== null (if you call ch.ack() on a null object, it's just going to throw a channel exception).

RabbitMQ with NodeJS - using amqplib to get message count

How do I get the number of messages currently en-queued?
My code is basically the following:
function readQueue() {
var open = require('amqplib').connect(config.rabbitServer);
open.then(function (conn) {
var ok = conn.createChannel();
ok = ok.then(function (ch) {
ch.prefetch(config.bulkSize);
setInterval(function () {
handleMessages();
}, config.bulkInterval);
ch.assertQueue(config.inputQueue);
ch.consume(config.inputQueue, function (msg) {
if (msg !== null) {
pendingMessages.push(msg);
}
});
});
return ok;
}).then(null, console.warn);
}
I found nothing in the documentation or while debugging, and I did see a different library that allows this, so wondering if amqplib supports this as well.
You can get the queue-length with amqplib.
In my case the queue has the feature 'durable:true'. You have to pass it as an option.
var amqp = require('amqplib/callback_api');
amqp.connect(amqp_url, function(err, conn) {
conn.createChannel(function(err, ch) {
var q = 'task2_queue';
ch.assertQueue(q, {durable: true}, function(err, ok) {
console.log(ok);
});
});
});
It will return an object like this:
{ queue: 'task2_queue', messageCount: 34, consumerCount: 2 }
For more information: https://www.squaremobius.net/amqp.node/channel_api.html#channel_assertQueue
I think the assertQueue method call will return an object that contains the current message count. I don't remember the exact property name off-hand, but it should be in there.
The real trick, though, is that this number will never be updated once you call assertQueue. The only way to get an updated message count is to call assertQueue again. This can have some performance implications if you're checking it too frequently.
You should call channel.checkQueue(queueName) and then you will get an object { queue: 'queueName', messageCount: 1, consumerCount: 0 } where the property messageCount which is the exactly current number of messages in the queue
I couldn't find a direct solution using node, but by using api from RabbitMQ I was able to get message count.
After enabling management plugin of RabbitMQ the apis can be accessed using http://127.0.0.1:15672/api/queues/vhost/name and user login as guest with password guest.
var request = require('request');
var count_url = "http://guest:guest#127.0.0.1:15672/api/queues/%2f/" + q;
var mincount = 0;
..........
..........
request({
url : count_url
}, function(error, response, body) {
console.log("Called RabbitMQ API");
if (error) {
console.error("Unable to fetch Queued Msgs Count" + error);
return;
}
else
{
var message = JSON.parse(body);
if (message.hasOwnProperty("messages_ready")) {
// this DOES NOT COUNT UnAck msgs
var msg_ready = JSON.stringify(message.messages_ready);
console.log("message.messages_ready=" + msg_ready);
if (msg_ready == mincount) {
console.log("mincount Reached ..Requesting Producer");
///Code to Produce msgs ..
}
}
if (message.hasOwnProperty("messages")) {
// _messages_ total messages i.e including unAck
var msg = JSON.stringify(message.messages);
console.log("message.messages=" + msg);
}
}
});

Resources