I have created a NodeJS application to insert data into a MongoDB collection. This database insertion is done by using a Kafka. Kafka-node is the plugin I have used to call Kafka.
I can create the topic and send a message to the consumer at the producer level. The message and topic are taken from the POST request.
This is how I call the Kafka. Parameters are topic and message.
Every time I call this API, the producer is creating a new message and sent it to the consumer. In each call, all previous messages will be returned to the consumer.
I have used the fromOffset: 'earliest' and fromOffset: 'latest' options to restrict the previous messages, not working.
Can anyone give me a suggestion?
Version of Kafka-node
"kafka-node": "^5.0.0",
Code I have used
var kafka = require('kafka-node');
const {MongoClient} = require('mongodb');
var url = 'mongodb://127.0.0.1:27017/';
const mongoClient = new MongoClient(url);
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
offset = new kafka.Offset(client),
Consumer = kafka.Consumer,
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var topic = req.body.topic;
var sentMessage = JSON.stringify(req.body.messages);
producer.send(payloads, async function( err, data) {
})
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[
{ topic: topic, partition: 0 }
],
{
autoCommit: false,
fromOffset: 'earliest'
}
);
consumer.on('message', async function (message) {
console.log("Message : "+JSON.stringify(message))
try {
var currentdate = new Date();
var datetime = "Last Sync: " + currentdate.getDate() + "/"
+ (currentdate.getMonth()+1) + "/"
+ currentdate.getFullYear() + " # "
+ currentdate.getHours() + ":"
+ currentdate.getMinutes() + ":"
+ currentdate.getSeconds();
var abb = await createListing(mongoClient,
{
topic: topic,
message: sentMessage,
time: datetime
}
);
} catch (e) {
console.error(":"+e);
}
finally {
}
});
await mongoClient.close();
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
async function createListing(client, newListing){
await mongoClient.connect();
const result = await
client.db("sample_airbnb").collection("listingsAndReviews").insertOne(newListing);
console.log(`New listing created with the following id: ${result.insertedId}`);
return result.insertedId;
}
}
The Messages consumed at the consumer are
Thanks,
You consumer will always consume all offsets that have not been marked consumed by its consumer group before.
This means that after consuming a given message (or a batch of messages), you need to commit the highest consumed offset to your Kafka cluster, to effectively mark those messages as consumed. Only then will your consumer group not re-consume those messages on startup.
To commit your offsets, you can either use kafka.js’s autoCommit feature (which you explicitly disabled in your implementation), or manually commit your offsets using the API provided by kafka.js.
You can find the documentation to both here: https://kafka.js.org/docs/consuming#a-name-auto-commit-a-autocommit
I made some changes in the code, Now I can retrieve the latest message from my topic.
I have created consumer inside the offset.fetchLatestOffsets([topics],cb), and made some changes in the consumer options.
var payloads = [
{ topic: topicName, messages: messageTotopic, partition: 0}
];
producer.send(payloads, async function(err, data) {
});
var client = new kafka.KafkaClient();
offset.fetchLatestOffsets([topic], async function (error, offsets) {
if (error)
console.log(error);
offsetA = JSON.stringify(offsets[topic][0])
console.log('offset Value:: '+offsetA);
var consumer = new Consumer(
client,
[
{
topic: topic,
partition: 0,
offset: offsetA-1, // Offset value starts from 0
}
], {
autoCommit: false,
fromOffset: true,
}
);
consumer.on('message', async function (message) {
console.log("Message from last offset:: " + JSON.stringify(message)); // will return the latest message.
consumer.close();
});
});
Using this way I am able to overcome the memory leakage issue related to the event emitters in the KafkaClient.
Related
I am working on a large project where multiple technologies are used. We are processing large amount of data in every day.
We are using MSSQL and MongoDB to store data from the client. SQL server is used to store the data as RDBMS while MongoDB is used to store data like a view to speed up the fetching. ColdFusion is used as a backed and cfthreads are used to update the MongoDB. There is a huge memory leakage due to the cfthread. So we found a solution to use Kafka in a server, where MongoDB can be updated using the kafka.
Producer and consumer is created using kafka-node in the NodeJS. Kafka and ColdFusion are in different servers. After every sql updates, same changes should be added or updated in the MongoDB. Can we do this synchronization using Kafka?
How can I call the Kafka producer from a ColdFusion server?
Is prioritized topics can be created using a producer?
Do I need to create a topic for every database actions?
Code I have used to create a producer and consumer.
producer.js
var kafka = require('kafka-node');
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var sentMessage = JSON.stringify(req.body.messages);
payloads = [
{ topic: req.body.topic, messages: sentMessage, partition: 0}
];
producer.send(payloads, function( err, data) {
// res.json(data);
})
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
}
consumer.js
var kafka = require('kafka-node');
const createConsumer = async(req,res) =>{
var topic = req.params.topic;
Consumer = kafka.Consumer,
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[{ topic: topic, offset: 0}],
{
autoCommit: false
}
);
res.send({
message: "consumer created",
topic: topic
})
consumer.on('message', function (message) {
console.log('message : ',message);
});
consumer.on('error', function (err) {
console.log('Error:',err);
})
consumer.on('offsetOutOfRange', function (err) {
console.log('offsetOutOfRange:',err);
})
}
And I requesting to provide an example.
I have written a node module to connect to Kafka.
kafka-connect.js
var kafka = require('kafka-node');
var Producer = kafka.Producer,
client = new kafka.Client(),
producer = new Producer(client);
module.exports = {
producer
};
KafkaService.js
const {producer} = require('./kafka-connect');
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
});
producer.on('ready', function () {
console.log('Producer is ready');
});
const KafkaService = {
sendRecord: (kafkaTopic, data, callback = (err, data) => console.log(err)) => {
var sendingData = {};
sendingData.event_data = JSON.stringify(data);
sendingData.event_type = 6;
const record = [
{
topic: kafkaTopic,
messages: sendingData,
partition : 0
}
];
producer.send(record, callback);
}
};
module.exports = {
KafkaService
};
Now I am using these two to publish data to Kafka. Following is the code to do so:
const {KafkaService} = require('../kafka/KafkaService');
const {newOrder} = require('../objs/newOrderEvent');
KafkaService.sendRecord("incentive_order_data", newOrder);
But running this file gives error :
{ BrokerNotAvailableError: Broker not available
at new BrokerNotAvailableError (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/errors/BrokerNotAvailableError.js:11:9)
at Client.loadMetadataForTopics (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:389:15)
at Client.send (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:562:10)
at /Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:241:10
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:473:16
at iteratorCallback (/Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:1064:13)
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:969:16
at buildRequest (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:257:24)
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:3110:16
at eachOfArrayLike (/Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:1069:9) message: 'Broker not available' }
Producer is ready
Apparently, publish method is getting called before the producer is ready. I am not able to come up with a solution to this. One way is to bring Promises in the picture, but that is just my hypothesis, exact method might be different.
You're not waiting for the producer to be ready.
You'll need to do this
producer.on('ready', function () {
console.log('Producer is ready');
// send data here
});
I am trying my hand at node.js/typescript for the first time and having a bit of trouble making a consumer for a rabbit queue.
Code:
let amqp = require('amqp');
let connection = amqp.createConnection({url: "amqp://" + RABBITMQ_USER + ":" + RABBITMQ_PASSWORD + "#" + RABBITMQ_HOST + ":" + RABBITMQ_PORT + RABBITMQ_VHOST});
connection.on('ready', function() {
connection.exchange(RABBITMQ_WORKER_EXCHANGE, function (exchange) {
connection.queue(RABBITMQ_QUEUE, function (queue) {
queue.bind(exchange, function() {
queue.publish(function (message) {
console.log('subscribed to queue');
let encoded_payload = unescape(message.data);
let payload = JSON.parse(encoded_payload);
console.log('Received a message:');
console.log(payload);
})
})
})
})
})
It seems to connect to the amqp server and throws no errors but it just sits there and doesn't consume anything. Is there a step I am missing?
Any help would be greatly appreciated,
Thank you.
Here is my solution that is working based off of amqp's JS tutorial.
https://www.rabbitmq.com/tutorials/tutorial-three-javascript.html
Probably not up to TypeScript standards, feel free to correct me if there's a better way.
#!/usr/bin/env node
require('dotenv').config();
import amqp = require('amqplib/callback_api');
import db = require('./database');
amqp.connect({
protocol: process.env.RABBITMQ_PROTOCOL,
hostname: process.env.RABBITMQ_HOST,
port: process.env.RABBITMQ_PORT,
username: process.env.RABBITMQ_USER,
password: process.env.RABBITMQ_PASSWORD,
vhost: process.env.RABBITMQ_VHOST
}, function(err, conn) {
conn.createChannel(function (err, ch) {
// set exchange that is being used
ch.assertExchange(process.env.RABBITMQ_WORKER_EXCHANGE, 'direct', {durable: true});
// set queue that is being used
ch.assertQueue(process.env.RABBITMQ_QUEUE, {durable: true}, function (err, q) {
console.log(" [*] Waiting for messages in %s. To exit press CTRL+C", q.queue);
// bind the queue to the exchange
ch.bindQueue(q.queue, process.env.RABBITMQ_WORKER_EXCHANGE, '');
// consume from the queue, one message at a time.
ch.consume(q.queue, function (msg) {
console.log("Message received: %s", msg.content.toString());
//save message to db
db.store(msg.content.toString()).then(function() {
//acknowledge receipt of message to amqp
console.log("Acknowledging message");
ch.ack(msg, true);
});
}, {noAck: false});
});
});
});
import * as Amqp from "amqp-ts";
var connection = new Amqp.Connection("amqp://localhost");
var exchange = connection.declareExchange("ExchangeName");
var queue = connection.declareQueue("QueueName");
queue.bind(exchange);
queue.activateConsumer((message) => {
console.log("Message received: " + message.getContent());
});
// it is possible that the following message is not received because
// it can be sent before the queue, binding or consumer exist
var msg = new Amqp.Message("Test");
exchange.send(msg);
connection.completeConfiguration().then(() => {
// the following message will be received because
// everything you defined earlier for this connection now exists
var msg2 = new Amqp.Message("Test2");
exchange.send(msg2);
});
I will explain what I want to achieve and then what I have done (without results)
I have two node services connected between them with RabbitMQ ussing exchanges (topic):
What I want is shutdown C1 while still sending messages to something.orange.something. Then I want to restart my C1 again and receive all the messages that I have lost.
What happens to me now is that each time I restart my consumer creates a new queue and creates a new binding in my exchange with the same routing key. So I have now two queues receiving the same information.
If I configure my queue with the param {exclusive: true}, I solve part of the problem, I no longer have queues without receivers, but still having the same problem... all messages sent without an active receiver are lost.
it's possible?
Here my code:
sender:
'use strict';
const amqp = require('amqplib/callback_api');
const logatim = require('logatim');
logatim.setLevel('info')
amqp.connect('amqp://localhost', (err, conn) => {
conn.createChannel((err, ch) => {
let ex = 'direct_colors';
let args = process.argv.slice(2);
let colors = ['colors.en.green', 'colors.en.yellow', 'colors.es.red']
ch.assertExchange(ex, 'topic', {durable: true});
setInterval(() => {
let color = colors[Math.floor(Math.random() * 3)];
let msg = `This is a ${color} message`;
ch.publish(ex, color, new Buffer(msg));
logatim[color.split('.').pop()].info(msg);
}, 1000);
});
});
reveiver:
'use strict';
const amqp = require('amqplib/callback_api');
const logatim = require('logatim');
logatim.setLevel('info');
const args = process.argv.slice(2);
amqp.connect('amqp://localhost', (err, conn) => {
conn.createChannel((err, ch) => {
var ex = 'direct_colors';
ch.assertExchange(ex, 'topic', {durable: true});
ch.assertQueue('', {exclusive: true, durable: true}, (err, q) => {
logatim.green.info(' [*] Waiting for logs. To exit press CTRL+C');
args.forEach((arg) => {
ch.bindQueue(q.queue, ex, arg);
});
ch.consume(q.queue, (msg) => {
logatim[msg.fields.routingKey.split('.').pop()].info(` [x] ${msg.content.toString()}`);
});
});
});
});
You need named queues. When you declare the queue in your receiver class, give it a well-known name (constant), something like
ch.assertQueue('my_service_1_queue', {durable: true}, ...
the basic examples of them are in RabbitMQ Tutorial
When you consumer will go down and re-start, it will be consuming from the same named queue.
NOTE: you don't need the exclusive queue there, as it will be deleted when you consumer goes down.
I'm using a HighlevelProducer and HighlevelConsumer to send and receive Messages. The HighlevelConsumer is configured with autoCommit=false as I want to commit Messages only when it was produced successfully. The problem is, that the first message never really gets commited.
Example:
Send Messages 1-10.
Receive Message 1
Receive Message 2
Commit Message 2
...
Receive Message 10
Commit Message 10
Commit Message 1
If I restart my Consumer, all messages from 1 to 10 are processed again. Only if I send new messages to the consumer, the old messages get committed. This happens for any number of messages.
My Code reads as follows:
var kafka = require('kafka-node'),
HighLevelConsumer = kafka.HighLevelConsumer,
client = new kafka.Client("localhost:2181/");
consumer = new HighLevelConsumer(
client,
[
{ topic: 'mytopic' }
],
{
groupId: 'my-group',
id: "my-consumer-1",
autoCommit: false
}
);
consumer.on('message', function (message) {
console.log("consume: " + message.offset);
consumer.commit(function (err, data) {
console.log("commited:" + message.offset);
});
console.log("consumed:" + message.offset);
});
process.on('SIGINT', function () {
consumer.close(true, function () {
process.exit();
});
});
process.on('exit', function () {
consumer.close(true, function () {
process.exit();
});
});
var messages = 10;
var kafka = require('kafka-node'),
HighLevelProducer = kafka.HighLevelProducer,
client = new kafka.Client("localhost:2181/");
var producer = new HighLevelProducer(client, { partitionerType: 2, requireAcks: 1 });
producer.on('error', function (err) { console.log(err) });
producer.on('ready', function () {
for (i = 0; i < messages; i++) {
payloads = [{ topic: 'mytopic', messages: "" }];
producer.send(payloads, function (err, data) {
err ? console.log(i + "err", err) : console.log(i + "data", data);
});
}
});
Am I doing something wrong or is this a bug in kafka-node?
A commit of message 2 is an implicit commit of message 1.
As you commits are done asynchronously, and commit of message 1 and message 2 are done quick after each other (ie, committing 2 happens before the consumer did send the commit of 1), the first commit will not happen explicitly and only a single commit of message 2 will be sent.