I am working on a large project where multiple technologies are used. We are processing large amount of data in every day.
We are using MSSQL and MongoDB to store data from the client. SQL server is used to store the data as RDBMS while MongoDB is used to store data like a view to speed up the fetching. ColdFusion is used as a backed and cfthreads are used to update the MongoDB. There is a huge memory leakage due to the cfthread. So we found a solution to use Kafka in a server, where MongoDB can be updated using the kafka.
Producer and consumer is created using kafka-node in the NodeJS. Kafka and ColdFusion are in different servers. After every sql updates, same changes should be added or updated in the MongoDB. Can we do this synchronization using Kafka?
How can I call the Kafka producer from a ColdFusion server?
Is prioritized topics can be created using a producer?
Do I need to create a topic for every database actions?
Code I have used to create a producer and consumer.
producer.js
var kafka = require('kafka-node');
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var sentMessage = JSON.stringify(req.body.messages);
payloads = [
{ topic: req.body.topic, messages: sentMessage, partition: 0}
];
producer.send(payloads, function( err, data) {
// res.json(data);
})
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
}
consumer.js
var kafka = require('kafka-node');
const createConsumer = async(req,res) =>{
var topic = req.params.topic;
Consumer = kafka.Consumer,
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[{ topic: topic, offset: 0}],
{
autoCommit: false
}
);
res.send({
message: "consumer created",
topic: topic
})
consumer.on('message', function (message) {
console.log('message : ',message);
});
consumer.on('error', function (err) {
console.log('Error:',err);
})
consumer.on('offsetOutOfRange', function (err) {
console.log('offsetOutOfRange:',err);
})
}
And I requesting to provide an example.
Related
I have created a NodeJS application to insert data into a MongoDB collection. This database insertion is done by using a Kafka. Kafka-node is the plugin I have used to call Kafka.
I can create the topic and send a message to the consumer at the producer level. The message and topic are taken from the POST request.
This is how I call the Kafka. Parameters are topic and message.
Every time I call this API, the producer is creating a new message and sent it to the consumer. In each call, all previous messages will be returned to the consumer.
I have used the fromOffset: 'earliest' and fromOffset: 'latest' options to restrict the previous messages, not working.
Can anyone give me a suggestion?
Version of Kafka-node
"kafka-node": "^5.0.0",
Code I have used
var kafka = require('kafka-node');
const {MongoClient} = require('mongodb');
var url = 'mongodb://127.0.0.1:27017/';
const mongoClient = new MongoClient(url);
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
offset = new kafka.Offset(client),
Consumer = kafka.Consumer,
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var topic = req.body.topic;
var sentMessage = JSON.stringify(req.body.messages);
producer.send(payloads, async function( err, data) {
})
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[
{ topic: topic, partition: 0 }
],
{
autoCommit: false,
fromOffset: 'earliest'
}
);
consumer.on('message', async function (message) {
console.log("Message : "+JSON.stringify(message))
try {
var currentdate = new Date();
var datetime = "Last Sync: " + currentdate.getDate() + "/"
+ (currentdate.getMonth()+1) + "/"
+ currentdate.getFullYear() + " # "
+ currentdate.getHours() + ":"
+ currentdate.getMinutes() + ":"
+ currentdate.getSeconds();
var abb = await createListing(mongoClient,
{
topic: topic,
message: sentMessage,
time: datetime
}
);
} catch (e) {
console.error(":"+e);
}
finally {
}
});
await mongoClient.close();
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
async function createListing(client, newListing){
await mongoClient.connect();
const result = await
client.db("sample_airbnb").collection("listingsAndReviews").insertOne(newListing);
console.log(`New listing created with the following id: ${result.insertedId}`);
return result.insertedId;
}
}
The Messages consumed at the consumer are
Thanks,
You consumer will always consume all offsets that have not been marked consumed by its consumer group before.
This means that after consuming a given message (or a batch of messages), you need to commit the highest consumed offset to your Kafka cluster, to effectively mark those messages as consumed. Only then will your consumer group not re-consume those messages on startup.
To commit your offsets, you can either use kafka.js’s autoCommit feature (which you explicitly disabled in your implementation), or manually commit your offsets using the API provided by kafka.js.
You can find the documentation to both here: https://kafka.js.org/docs/consuming#a-name-auto-commit-a-autocommit
I made some changes in the code, Now I can retrieve the latest message from my topic.
I have created consumer inside the offset.fetchLatestOffsets([topics],cb), and made some changes in the consumer options.
var payloads = [
{ topic: topicName, messages: messageTotopic, partition: 0}
];
producer.send(payloads, async function(err, data) {
});
var client = new kafka.KafkaClient();
offset.fetchLatestOffsets([topic], async function (error, offsets) {
if (error)
console.log(error);
offsetA = JSON.stringify(offsets[topic][0])
console.log('offset Value:: '+offsetA);
var consumer = new Consumer(
client,
[
{
topic: topic,
partition: 0,
offset: offsetA-1, // Offset value starts from 0
}
], {
autoCommit: false,
fromOffset: true,
}
);
consumer.on('message', async function (message) {
console.log("Message from last offset:: " + JSON.stringify(message)); // will return the latest message.
consumer.close();
});
});
Using this way I am able to overcome the memory leakage issue related to the event emitters in the KafkaClient.
I have a Kafka consumer topic, where I am able to get the data from the topic. But I wanted to add the newrelic for the topic.
How can I write to manually send the events to newrelic.
I have a newrelic.js file and I have configured it, but I have read that for Kafka I need to write manually the events.
Sample Code Snippet
const newrelic = require('newrelic');
const config = require('./config')
const topicConfig = require('./topicConfig')
const scheduler = require("../scheduler");
const Kafka = require("node-rdkafka");
const defaultConfig = {...config}
const topicConf = {...topicConfig}
try {
var topic = new Kafka.KafkaConsumer(
defaultConfig,
topicConf,
{}
);
topic.connect();
logger.info(
"topic object",
topic
);
topic
.on("ready", function () {
topic.subscribe([config.topicConf]);
topic.consume();
})
.on("data", function (data) {
scheduler.handleMessage(data.value);
})
.on("error", function (err) {
logger.error("Error in topic consumer ", err);
});
} catch (error) {
logger.error(
"Exception while connecting to kafka",
error
);
}
How can I add the manual events to newrelic? Which events to use, like these below were mentioned on doc, but confused, How to integrate and where to add inside this try catch block.
Example methods as in the docs:
newrelic.setTransactionName(name)
newrelic.setControllerName(name, [action])
Any help appreciated.
You have to define custom event something like this. Pass the event to addPageAction custom event.
window.addEventListener('DOMContentLoaded', (event) => {
document.getElementById('btnFndCntnt').addEventListener('click',function (e) {
newrelic.addPageAction('Find');
})
});
I created a producer in javascript and started pushing messages into kafka topic.when i am consuming messages on console consumer getting null values. i don't know why here is my kafka producer code.
v
here is nodejs server logs which indicate the messages has been send to kafka topic:
data arrived
get into postdata
kafka producer is connected and ready.
Sent payload to Kafka: [ { topic: 'test1',
message:
{ values:
'{"AlternateUniqueKey":"","DownloadedDeviceUniqueKey”:”1235”,”NetworkInfo":{"SIM1":false,"IMEI1":"","NetworkType1":"","OperatorName1":"","PhoneNumber1":"","PhoneType1":"","SignalStrength1":0,"SimCountryCode1":"","SimSerialNumber1":"","StateOfService1":"","SIM2":false,"IMEI2":"","NetworkType2":"","OperatorName2":"","PhoneNumber2":"","PhoneType2":"","SignalStrength2":0,"SimCountryCode2":"","SimSerialNumber2":"","StateOfService2":"","Error":"android.permission.READ_PHONE_STATEpermission not granted","Status":-1},"FirstBootDate":"2018-12-06T16:21:35.744+0530","DeviceID":"39a2afecbe00dae1","PhoneInfo":{"SDKVersion":27,"AndroidVersion":"8.1.0","Brand":"LAVA","Device":"Z50","Hardware":"mt6735","IMEI1":"","IMEI2":"","IsRooted":false,"IsRootedString":"No","Manufacturer":"LAVA","Model":"Z50","Product":"Z50","Serial":"","SoftwareVersion":"1528860449","Status":1},"Battery":{"BatteryState":"Discharging","Capacity":0,"Health":"Good","Level":83,"Status":1,"Temperature":24,"Voltage":0},"CreatedDate":"2018-12-06T16:21:35.770+0530","ConsumerID":0,"WiFi":{"BSSID":"02:00:00:00:00:00","Frequency":2462,"Is5GHz":false,"MACAddress":"02:00:00:00:00:00","MaxWifiSpeed":65,"SSID":"<unknown ssid>","SignalStrength":-68,"Status":1},"device":{"Brand":"LAVA","AlternateUniqueKey":"","ProductName":"Z50","DownloadedDeviceUniqueKey":"","Device":"Z50","Manufacturer":"LAVA","ProductUniqueID":""},"ProductUniqueID":"","status":1,"app":"Servify","timezone":"+0530","version":"53","languagecode":1,"LanguageID":1,"LanguageCode":"en","CountryCode":"IN","CountryID":105,"PhoneCode":91,"sourcedevice":"Android","skipMapping":true}' },
partition: 0,
attributes: 0 } ]
result: { test1: { '0': 41 } }
data inside kafka topic log file is in unreadable format:
8��Z������������������������������
8��Z������������������������������
here is consumer output:
kafka-console-consumer --bootstrap-server localhost:9092 --topic test1 --from-beginning
null
null
null
null
null
Any help highly appreciated!
You are using the wrong key in payloads, it should be messages:messages, not message:messages.
Full example
var kafka = require('kafka-node');
var topicName = 'test1';
var client = new kafka.Client('localhost:2181');
var producer = new kafka.HighLevelProducer(client);
var messages = 'hello world';
console.log('get into postdata');
payloads = [{topic: topicName, messages: messages, partition:0}];
producer.on('ready', function() {
producer.send(payloads, function(error, result) {
console.info('Sent payload to Kafka: ', payloads);
if (error) {
console.error(error);
} else {
console.log('result: ', result);
}
});
});
I have written a node module to connect to Kafka.
kafka-connect.js
var kafka = require('kafka-node');
var Producer = kafka.Producer,
client = new kafka.Client(),
producer = new Producer(client);
module.exports = {
producer
};
KafkaService.js
const {producer} = require('./kafka-connect');
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
});
producer.on('ready', function () {
console.log('Producer is ready');
});
const KafkaService = {
sendRecord: (kafkaTopic, data, callback = (err, data) => console.log(err)) => {
var sendingData = {};
sendingData.event_data = JSON.stringify(data);
sendingData.event_type = 6;
const record = [
{
topic: kafkaTopic,
messages: sendingData,
partition : 0
}
];
producer.send(record, callback);
}
};
module.exports = {
KafkaService
};
Now I am using these two to publish data to Kafka. Following is the code to do so:
const {KafkaService} = require('../kafka/KafkaService');
const {newOrder} = require('../objs/newOrderEvent');
KafkaService.sendRecord("incentive_order_data", newOrder);
But running this file gives error :
{ BrokerNotAvailableError: Broker not available
at new BrokerNotAvailableError (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/errors/BrokerNotAvailableError.js:11:9)
at Client.loadMetadataForTopics (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:389:15)
at Client.send (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:562:10)
at /Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:241:10
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:473:16
at iteratorCallback (/Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:1064:13)
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:969:16
at buildRequest (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:257:24)
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:3110:16
at eachOfArrayLike (/Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:1069:9) message: 'Broker not available' }
Producer is ready
Apparently, publish method is getting called before the producer is ready. I am not able to come up with a solution to this. One way is to bring Promises in the picture, but that is just my hypothesis, exact method might be different.
You're not waiting for the producer to be ready.
You'll need to do this
producer.on('ready', function () {
console.log('Producer is ready');
// send data here
});
I'm using highland to handle back pressure. My code is:
const pipeline = [{
$match: { 'published': true, status: 'Approved' }
}];
const cursor = UserModel.aggregate(pipeline)
.cursor().exec();
// iterating over each hospitality one by one
highland(cursor)
.map((doc) => {
// some code
return doc;
})
.map((doc) => {
// some code
return doc;
})
.map((doc) => {
// some code
return doc;
})
.errors(function (err) {
winston.error('error', err);
})
.done(() => {
winston.info('JOB: done');
});
I want one document to fetch then process through map's streams one by one.
I'm not sure if this would handle backpressure because highland doc say:
Please see Back-pressure section into the highland doc.
Some streams (such as those based on events) cannot be paused. In
these cases data is buffered until the consumer is ready to handle it.
Please suggest any other way if this does not handle backpressure!