I have written a node module to connect to Kafka.
kafka-connect.js
var kafka = require('kafka-node');
var Producer = kafka.Producer,
client = new kafka.Client(),
producer = new Producer(client);
module.exports = {
producer
};
KafkaService.js
const {producer} = require('./kafka-connect');
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
});
producer.on('ready', function () {
console.log('Producer is ready');
});
const KafkaService = {
sendRecord: (kafkaTopic, data, callback = (err, data) => console.log(err)) => {
var sendingData = {};
sendingData.event_data = JSON.stringify(data);
sendingData.event_type = 6;
const record = [
{
topic: kafkaTopic,
messages: sendingData,
partition : 0
}
];
producer.send(record, callback);
}
};
module.exports = {
KafkaService
};
Now I am using these two to publish data to Kafka. Following is the code to do so:
const {KafkaService} = require('../kafka/KafkaService');
const {newOrder} = require('../objs/newOrderEvent');
KafkaService.sendRecord("incentive_order_data", newOrder);
But running this file gives error :
{ BrokerNotAvailableError: Broker not available
at new BrokerNotAvailableError (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/errors/BrokerNotAvailableError.js:11:9)
at Client.loadMetadataForTopics (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:389:15)
at Client.send (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:562:10)
at /Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:241:10
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:473:16
at iteratorCallback (/Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:1064:13)
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:969:16
at buildRequest (/Users/rajat.mishra/self/nodekafka/node_modules/kafka-node/lib/client.js:257:24)
at /Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:3110:16
at eachOfArrayLike (/Users/rajat.mishra/self/nodekafka/node_modules/async/dist/async.js:1069:9) message: 'Broker not available' }
Producer is ready
Apparently, publish method is getting called before the producer is ready. I am not able to come up with a solution to this. One way is to bring Promises in the picture, but that is just my hypothesis, exact method might be different.
You're not waiting for the producer to be ready.
You'll need to do this
producer.on('ready', function () {
console.log('Producer is ready');
// send data here
});
Related
I have created a NodeJS application to insert data into a MongoDB collection. This database insertion is done by using a Kafka. Kafka-node is the plugin I have used to call Kafka.
I can create the topic and send a message to the consumer at the producer level. The message and topic are taken from the POST request.
This is how I call the Kafka. Parameters are topic and message.
Every time I call this API, the producer is creating a new message and sent it to the consumer. In each call, all previous messages will be returned to the consumer.
I have used the fromOffset: 'earliest' and fromOffset: 'latest' options to restrict the previous messages, not working.
Can anyone give me a suggestion?
Version of Kafka-node
"kafka-node": "^5.0.0",
Code I have used
var kafka = require('kafka-node');
const {MongoClient} = require('mongodb');
var url = 'mongodb://127.0.0.1:27017/';
const mongoClient = new MongoClient(url);
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
offset = new kafka.Offset(client),
Consumer = kafka.Consumer,
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var topic = req.body.topic;
var sentMessage = JSON.stringify(req.body.messages);
producer.send(payloads, async function( err, data) {
})
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[
{ topic: topic, partition: 0 }
],
{
autoCommit: false,
fromOffset: 'earliest'
}
);
consumer.on('message', async function (message) {
console.log("Message : "+JSON.stringify(message))
try {
var currentdate = new Date();
var datetime = "Last Sync: " + currentdate.getDate() + "/"
+ (currentdate.getMonth()+1) + "/"
+ currentdate.getFullYear() + " # "
+ currentdate.getHours() + ":"
+ currentdate.getMinutes() + ":"
+ currentdate.getSeconds();
var abb = await createListing(mongoClient,
{
topic: topic,
message: sentMessage,
time: datetime
}
);
} catch (e) {
console.error(":"+e);
}
finally {
}
});
await mongoClient.close();
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
async function createListing(client, newListing){
await mongoClient.connect();
const result = await
client.db("sample_airbnb").collection("listingsAndReviews").insertOne(newListing);
console.log(`New listing created with the following id: ${result.insertedId}`);
return result.insertedId;
}
}
The Messages consumed at the consumer are
Thanks,
You consumer will always consume all offsets that have not been marked consumed by its consumer group before.
This means that after consuming a given message (or a batch of messages), you need to commit the highest consumed offset to your Kafka cluster, to effectively mark those messages as consumed. Only then will your consumer group not re-consume those messages on startup.
To commit your offsets, you can either use kafka.js’s autoCommit feature (which you explicitly disabled in your implementation), or manually commit your offsets using the API provided by kafka.js.
You can find the documentation to both here: https://kafka.js.org/docs/consuming#a-name-auto-commit-a-autocommit
I made some changes in the code, Now I can retrieve the latest message from my topic.
I have created consumer inside the offset.fetchLatestOffsets([topics],cb), and made some changes in the consumer options.
var payloads = [
{ topic: topicName, messages: messageTotopic, partition: 0}
];
producer.send(payloads, async function(err, data) {
});
var client = new kafka.KafkaClient();
offset.fetchLatestOffsets([topic], async function (error, offsets) {
if (error)
console.log(error);
offsetA = JSON.stringify(offsets[topic][0])
console.log('offset Value:: '+offsetA);
var consumer = new Consumer(
client,
[
{
topic: topic,
partition: 0,
offset: offsetA-1, // Offset value starts from 0
}
], {
autoCommit: false,
fromOffset: true,
}
);
consumer.on('message', async function (message) {
console.log("Message from last offset:: " + JSON.stringify(message)); // will return the latest message.
consumer.close();
});
});
Using this way I am able to overcome the memory leakage issue related to the event emitters in the KafkaClient.
I am making use of "socket.io-client" and "socket.io stream" to make a request and then stream some data. I have the following code that handles this logic
Client Server Logic
router.get('/writeData', function(req, res) {
var io = req.app.get('socketio');
var nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
var nameNodeData = {};
async.waterfall([
checkForDataNodes,
readFileFromS3
], function(err, result) {
if (err !== null) {
res.json(err);
}else{
res.json("Finished Writing to DN's");
}
});
function checkForDataNodes(cb) {
nameNodeSocket.on('nameNodeData', function(data) {
nameNodeData = data;
console.log(nameNodeData);
cb(null, nameNodeData);
});
if (nameNodeData.numDataNodes === 0) {
cb("No datanodes found");
}
}
function readFileFromS3(nameNodeData, cb) {
for (var i in nameNodeData['blockToDataNodes']) {
var IP = nameNodeData['blockToDataNodes'][i]['ipValue'];
var dataNodeSocket = io.connect('http://'+ IP +":5000");
var ss = require("socket.io-stream");
var stream = ss.createStream();
var byteStartRange = nameNodeData['blockToDataNodes'][i]['byteStart'];
var byteStopRange = nameNodeData['blockToDataNodes'][i]['byteStop'];
paramsWithRange['Range'] = "bytes=" + byteStartRange.toString() + "-" + byteStopRange.toString();
//var file = require('fs').createWriteStream('testFile' + i + '.txt');
var getFileName = nameNodeData['blockToDataNodes'][i]['key'].split('/');
var fileData = {
'mainFile': paramsWithRange['Key'].split('/')[1],
'blockName': getFileName[1]
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
s3.getObject(paramsWithRange).createReadStream().pipe(stream);
//dataNodeSocket.disconnect();
}
cb(null);
}
});
Server Logic (that gets the data)
var dataNodeIO = require('socket.io')(server);
var ss = require("socket.io-stream");
dataNodeIO.on('connection', function(socket) {
console.log("Succesfully connected!");
ss(socket).on('sendData', function(stream, data) {
var IP = data['ipValue'];
var blockName = data['blockName'];
var mainFile = data['mainFile'];
dataNode.makeDir(mainFile);
dataNode.addToReport(mainFile, blockName);
stream.pipe(fs.createWriteStream(mainFile + '/' + blockName));
});
});
How can I properly disconnect the connections in function readFileFromS3. I have noticed using dataNodeSocket.disconnect() at the end does not work as I cannot verify the data was received on the 2nd server. But if I comment it out, I can see the data being streamed to the second server.
My objective is to close the connections in Client Server side
It appears that the main problem with closing the socket is that you weren't waiting for the stream to be done writing before trying to close the socket. So, because the writing is all asynchronous and finishes sometime later, you were trying to close the socket before the data had been written.
Also because you were putting asynchronous operations inside a for loop, you were also running all your operations in parallel which may not be exactly what you want as it makes error handling more difficult and server load more difficult.
Here's the code I would suggest that does the following:
Create a function streamFileFromS3() that streams a single file and returns a promise that will notify when it's done.
Use await in a for loop with that streamFileFromS3() to serialize the operations. You don't have to serialize them, but then you would have to change your error handling to figure out what to do if one errors while the others are already running and you'd have to be more careful about concurrency issues.
Use try/catch to catch any errors from streamFileFromS3().
Add error handling on the stream.
Change all occurrences of data['propertyName'] to data.propertyName. The only time you need to use brackets is if the property name contains a character that is not allowed in a Javascript identifier or if the property name is in a variable. Otherwise, the dot notation is preferred.
Add socket.io connection error handling logic for both socket.io connections.
Set returned status to 500 when there's an error processing the request
So, here's the code for that:
const ss = require("socket.io-stream");
router.get('/writeData', function(req, res) {
const io = req.app.get('socketio');
function streamFileFromS3(ip, data) {
return new Promise((resolve, reject) => {
const dataNodeSocket = io.connect(`http://${ip}:5000`);
dataNodeSocket.on('connect_error', reject);
dataNodeSocket.on('connect_timeout', () {
reject(new Error(`timeout connecting to http://${ip}:5000`));
});
dataNodeSocket.on('connection', () => {
// dataNodeSocket connected now
const stream = ss.createStream().on('error', reject);
paramsWithRange.Range = `bytes=${data.byteStart}-${data.byteStop}`;
const filename = data.key.split('/')[1];
const fileData = {
'mainFile': paramsWithRange.Key.split('/')[1],
'blockName': filename
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
// get S3 data and pipe it to the socket.io stream
s3.getObject(paramsWithRange).createReadStream().on('error', reject).pipe(stream);
stream.on('close', () => {
dataNodeSocket.disconnect();
resolve();
});
});
});
}
function connectError(msg) {
res.status(500).send(`Error connecting to ${NAMENODE_ADDRESS}`);
}
const nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
nameNodeSocket.on('connect_error', connectError).on('connect_timeout', connectError);
nameNodeSocket.on('nameNodeData', async (nameNodeData) => {
try {
for (let item of nameNodeData.blockToDataNodes) {
await streamFileFromS3(item.ipValue, item);
}
res.json("Finished Writing to DN's");
} catch(e) {
res.status(500).json(e);
}
});
});
Other notes:
I don't know what paramsWithRange is as it is not declared here and when you were doing everything in parallel, it was getting shared among all the connections which is asking for a concurrency issue. In my serialized implementation, it's probably safe to share it, but the way it is now bothers me as it's a concurrency issue waiting to happen.
I am quite new to Node js and RabbitMq. I wanted to create 100k queues. but I am getting the following error after creating around 6ooo queues.
error: "Cannot read property 'createChannel' of undefined"
I want to call conn.createConnection() method again if I have an error. How to do this?
Here's the code
var amqp = require("amqplib/callback_api");
var time = 0;
var limit = 100000
var timer = setInterval(() => {
time+=1;
if(time>=limit){
clearInterval(timer);
}
var now = Date.now()
//RabbitMQ
amqp.connect("amqp://localhost",function(err,conn){
conn.createChannel(function(err,ch){
if(err){
//try to do conn.createChannel again
}
var q = "queue_name"+time.toString();
// console.log(q);
var msg = "this is the message string!!!";
ch.assertQueue(q,{durable: false});
ch.sendToQueue(q,new Buffer(msg),{persistent: false});
// console.log("time = "+time);
});
});
//RabbitMQ`
},10);
What you are doing is not only creating 100k queues, but also 100k connections and channels. Likely you are running out of file handles and you can adjust the ulimit to allow more. However, you probably don't want to create all those connections and channels.
Try using one connection with one publisher channel with something along these lines:
var amqp = require("amqplib/callback_api");
var time = 0;
var limit = 100000
amqp.connect("amqp://localhost", function(err, conn) {
if (err) {
console.log('Somethings wrong with connection:', err);
return;
}
conn.createChannel(function(err, ch){
if (err) {
console.log('Somethings wrong with channel:', err);
return;
}
var timer = setInterval(() => {
time+=1;
if(time>=limit) {
clearInterval(timer);
}
var now = Date.now()
var q = "queue_name"+time.toString();
var msg = "this is the message string!!!";
ch.assertQueue(q, {durable: false} );
ch.sendToQueue(q, new Buffer(msg),{persistent: false});
console.log('sent', time);
},10);
});
});
This event is firing twice. I'm trying to figure out why.
On one client, I have:
import Net from 'net';
import Chalk from 'chalk';
const fallback = [2,5,10,25,50,100,250,500,1000,2000];
class LocalNetworkInterface {
constructor({path}) {
this._sock = new Net.Socket();
this._pending = {};
this._count = 0;
this._retry = 0;
const connect = () => {
this._sock.connect({path});
};
this._sock.on('connect',() => {
this._retry = 0;
console.log(`Connected to ${Chalk.underline(path)}`);
});
this._sock.on('data',buffer => {
let data = JSON.parse(buffer);
this._pending[data.queryId].resolve(data);
delete this._pending[data.queryId];
});
this._sock.on('end', () => {
console.log(`Lost connection to ${Chalk.underline(path)}. Attempting to reconnect...`);
connect();
});
this._sock.on('error', err => {
if(err.code === 'ENOENT') {
let ms = fallback[this._retry];
if(this._retry < fallback.length - 1) ++this._retry;
console.log(`Socket server unavailable. Trying again in ${ms}ms`);
setTimeout(connect, ms);
}
});
connect();
}
// ...
}
And the server:
const sockServer = Net.createServer(c => {
c.on('data', buffer => {
let data = JSON.parse(buffer);
// log('Received',data);
let ql = queryLogger();
runQuery(Object.assign({}, data, {schema})).then(result => {
ql(`${Chalk.magenta('socket')} ${print(data.query).trim()}`);
let response = Object.assign({}, result, {queryId: data.queryId});
c.write(JSON.stringify(response));
});
})
});
sockServer.on('error', serverError => {
if(serverError.code === 'EADDRINUSE') {
let clientSocket = new Net.Socket();
clientSocket.on('error', clientError => {
if(clientError.code === 'ECONNREFUSED') {
FileSystem.unlink(SOCK_FILE, unlinkErr => {
if(unlinkErr) throw unlinkErr;
sockServer.listen(SOCK_FILE, () => {
log(`Sock server improperly shut down. Listening on '${sockServer.address()}'`)
});
});
}
});
clientSocket.connect({path: SOCK_FILE}, () => {
throw new Error(`Server already running`);
});
}
});
['SIGTERM','SIGINT'].forEach(signal => process.on(signal, () => {
console.log(`\rReceived ${Chalk.yellow(signal)}, shutting down ${Chalk.red('❤')}`);
sockServer.close();
process.exit();
}));
sockServer.listen(SOCK_FILE, () => {
log(`Listening on ${Chalk.underline(sockServer.address())}`)
});
When I restart the server, I see "Lost connection" twice on the client. Why?
The documentation says:
Emitted when the other end of the socket sends a FIN packet.
The server isn't sending two "FIN" packets is it? Any way I can verify?
Seeing this in docs in regard to connect...
"...This function is asynchronous. When the 'connect' event is emitted the socket is established. If there is a problem connecting, the 'connect' event will not be emitted, the 'error' event will be emitted with the exception."
The fact that the connect event might simply not be firing simply making it look to you like the end event fired twice? Like #robertklep said, maybe expand that error check for more than specific code.
I think it's because on end, I immediately try to reconnect and then the same event is being caught again. Seems kind of strange that it would do that, but delaying it to the next tick works:
this._sock.on('end', () => {
console.log(`${Chalk.yellow('Lost connection')} to ${Chalk.underline(path)}. Attempting to reconnect...`);
process.nextTick(connect);
});
I will explain what I want to achieve and then what I have done (without results)
I have two node services connected between them with RabbitMQ ussing exchanges (topic):
What I want is shutdown C1 while still sending messages to something.orange.something. Then I want to restart my C1 again and receive all the messages that I have lost.
What happens to me now is that each time I restart my consumer creates a new queue and creates a new binding in my exchange with the same routing key. So I have now two queues receiving the same information.
If I configure my queue with the param {exclusive: true}, I solve part of the problem, I no longer have queues without receivers, but still having the same problem... all messages sent without an active receiver are lost.
it's possible?
Here my code:
sender:
'use strict';
const amqp = require('amqplib/callback_api');
const logatim = require('logatim');
logatim.setLevel('info')
amqp.connect('amqp://localhost', (err, conn) => {
conn.createChannel((err, ch) => {
let ex = 'direct_colors';
let args = process.argv.slice(2);
let colors = ['colors.en.green', 'colors.en.yellow', 'colors.es.red']
ch.assertExchange(ex, 'topic', {durable: true});
setInterval(() => {
let color = colors[Math.floor(Math.random() * 3)];
let msg = `This is a ${color} message`;
ch.publish(ex, color, new Buffer(msg));
logatim[color.split('.').pop()].info(msg);
}, 1000);
});
});
reveiver:
'use strict';
const amqp = require('amqplib/callback_api');
const logatim = require('logatim');
logatim.setLevel('info');
const args = process.argv.slice(2);
amqp.connect('amqp://localhost', (err, conn) => {
conn.createChannel((err, ch) => {
var ex = 'direct_colors';
ch.assertExchange(ex, 'topic', {durable: true});
ch.assertQueue('', {exclusive: true, durable: true}, (err, q) => {
logatim.green.info(' [*] Waiting for logs. To exit press CTRL+C');
args.forEach((arg) => {
ch.bindQueue(q.queue, ex, arg);
});
ch.consume(q.queue, (msg) => {
logatim[msg.fields.routingKey.split('.').pop()].info(` [x] ${msg.content.toString()}`);
});
});
});
});
You need named queues. When you declare the queue in your receiver class, give it a well-known name (constant), something like
ch.assertQueue('my_service_1_queue', {durable: true}, ...
the basic examples of them are in RabbitMQ Tutorial
When you consumer will go down and re-start, it will be consuming from the same named queue.
NOTE: you don't need the exclusive queue there, as it will be deleted when you consumer goes down.