I have a case, where I enqueue some messages using 'queue'. Now I want to extract these messages serially. I am using 'worker' but it fires all messages in async way calling my callbackfunction for each message at the same time. Is there a way I can control the worker to extract only a single message or call some de-queue message serially?
// Code enqueuing 3 messages...
var queue = new NR.queue({connection: connectionDetails});
queue.enqueue(config.Redis.resquequeuename, config.Redis.resquequeuejob, {
message_id: 1,
message_payload: "this is a test body of my message"
});
queue.enqueue(config.Redis.resquequeuename, config.Redis.resquequeuejob, {
message_id: 2,
message_payload: "this is a test body of my message"
});
queue.enqueue(config.Redis.resquequeuename, config.Redis.resquequeuejob, {
message_id: 3,
message_payload: "this is a test body of my message"
});
// Code using worker to dequeue messages
var GetQueueMessage = function(appcallback){
var job = {};
job[config.Redis.resquequeuejob] = {
plugins: [],
pluginOptions: {},
perform: function(compositemessage, callback){
callback(null, appcallback(null, compositemessage));
}
};
var workername = os.hostname() + ":" + process.pid;
var worker = new NR.worker({connection: connectionDetails, queues: [config.Redis.resquequeuename], 'name': workername}, job);
worker.connect(function(){
worker.workerCleanup();
console.log("Worker starting, Resque worker. Worker name: "+ workername);
worker.start();
});
// different even handlers for worker here...
};
workerGetQueueMessage(function(err, pCompositeMessage){
console.log("Composite Message returned from Worker: " + JSON.stringify(pCompositeMessage));
console.log(new Date(dt.now()));
});
Related
I have created a NodeJS application to insert data into a MongoDB collection. This database insertion is done by using a Kafka. Kafka-node is the plugin I have used to call Kafka.
I can create the topic and send a message to the consumer at the producer level. The message and topic are taken from the POST request.
This is how I call the Kafka. Parameters are topic and message.
Every time I call this API, the producer is creating a new message and sent it to the consumer. In each call, all previous messages will be returned to the consumer.
I have used the fromOffset: 'earliest' and fromOffset: 'latest' options to restrict the previous messages, not working.
Can anyone give me a suggestion?
Version of Kafka-node
"kafka-node": "^5.0.0",
Code I have used
var kafka = require('kafka-node');
const {MongoClient} = require('mongodb');
var url = 'mongodb://127.0.0.1:27017/';
const mongoClient = new MongoClient(url);
var Producer = kafka.Producer,
client = new kafka.KafkaClient(),
offset = new kafka.Offset(client),
Consumer = kafka.Consumer,
producer = new Producer(client);
producer.on('ready', function () {
console.log('Producer is ready');
});
producer.on('error', function (err) {
console.log('Producer is in error state');
console.log(err);
})
const createProducer = async(req,res,next) => {
var topic = req.body.topic;
var sentMessage = JSON.stringify(req.body.messages);
producer.send(payloads, async function( err, data) {
})
client = new kafka.KafkaClient(),
consumer = new Consumer(client,
[
{ topic: topic, partition: 0 }
],
{
autoCommit: false,
fromOffset: 'earliest'
}
);
consumer.on('message', async function (message) {
console.log("Message : "+JSON.stringify(message))
try {
var currentdate = new Date();
var datetime = "Last Sync: " + currentdate.getDate() + "/"
+ (currentdate.getMonth()+1) + "/"
+ currentdate.getFullYear() + " # "
+ currentdate.getHours() + ":"
+ currentdate.getMinutes() + ":"
+ currentdate.getSeconds();
var abb = await createListing(mongoClient,
{
topic: topic,
message: sentMessage,
time: datetime
}
);
} catch (e) {
console.error(":"+e);
}
finally {
}
});
await mongoClient.close();
res.send({
message: 'Successfully send data from producer',
payloads: payloads
})
async function createListing(client, newListing){
await mongoClient.connect();
const result = await
client.db("sample_airbnb").collection("listingsAndReviews").insertOne(newListing);
console.log(`New listing created with the following id: ${result.insertedId}`);
return result.insertedId;
}
}
The Messages consumed at the consumer are
Thanks,
You consumer will always consume all offsets that have not been marked consumed by its consumer group before.
This means that after consuming a given message (or a batch of messages), you need to commit the highest consumed offset to your Kafka cluster, to effectively mark those messages as consumed. Only then will your consumer group not re-consume those messages on startup.
To commit your offsets, you can either use kafka.js’s autoCommit feature (which you explicitly disabled in your implementation), or manually commit your offsets using the API provided by kafka.js.
You can find the documentation to both here: https://kafka.js.org/docs/consuming#a-name-auto-commit-a-autocommit
I made some changes in the code, Now I can retrieve the latest message from my topic.
I have created consumer inside the offset.fetchLatestOffsets([topics],cb), and made some changes in the consumer options.
var payloads = [
{ topic: topicName, messages: messageTotopic, partition: 0}
];
producer.send(payloads, async function(err, data) {
});
var client = new kafka.KafkaClient();
offset.fetchLatestOffsets([topic], async function (error, offsets) {
if (error)
console.log(error);
offsetA = JSON.stringify(offsets[topic][0])
console.log('offset Value:: '+offsetA);
var consumer = new Consumer(
client,
[
{
topic: topic,
partition: 0,
offset: offsetA-1, // Offset value starts from 0
}
], {
autoCommit: false,
fromOffset: true,
}
);
consumer.on('message', async function (message) {
console.log("Message from last offset:: " + JSON.stringify(message)); // will return the latest message.
consumer.close();
});
});
Using this way I am able to overcome the memory leakage issue related to the event emitters in the KafkaClient.
I have a process that uses RabbitMQ and NodeJS to do image processing. Due to the intensive task, I think I have the same issue as the link here https://github.com/squaremo/amqp.node/issues/261
I am trying to figure out how to implement the last comment on that issue.
"Yep. NodeJS is single-threaded, and if you use that thread to do something for a long time, nothing else will happen. As #michaelklishin suggests, the known solution for this general problem is using a child process, or the cluster module."
EDIT:
I Updated the code below with a sample of how I think I can do this with the amqp-connection-manager module. Right now I use a global variable to hold the actual message to be able to ack. I am guessing there is a better way to do this.
//Used to be an example for how to keep the connection thread and the working thread separate
//in order to fix the issue of missing heartbeat intervals due to processing on the same thread
const cluster = require('cluster');
var amqp = require('amqp-connection-manager');
var config = require('./config.json');
var working_queue = "Test_Queue";
//DONT REALLY DO THIS
var rabbit_msg_data;
//******* CLUSTER SETUP *******
// This will spawn off the number of worker for this process.
if (cluster.isMaster) {
console.log("Master "+process.pid+" is running");
worker = cluster.fork();
cluster.on('exit', (worker, code, signal) => {
if(signal)
{
console.log("worker was killed by signal: "+signal);
console.log(worker);
}
else if (code !== 0)
{
console.log("worker exited with error code: "+code);
console.log(worker);
}
else
{
console.log("Worker "+worker.process.pid+" exited successfully");
console.log(worker);
//Not sure if this works this way or if I need to put this worker into variables
}
});
//testing sending a message back and forth
// setTimeout(function() {
// worker.send("I got a request!");
// }, 1000);
//******** RABBIT MQ CONNECTION **********
// Create a connection manager to rabbitmq
var connection = amqp.connect(config.rabbit_connections_arr, {json: true, heartbeatIntervalInSeconds: 2});
connection.on('connect', function() {
console.log('Connected to rabbitmq');
});
connection.on('disconnect', function(params) {
console.log('Disconnected from rabbitmq:', params.err.stack);
});
// Set up a channel listening for messages in the queue.
var channelWrapper_listening = connection.createChannel({
setup: function(channel) {
// `channel` here is a regular amqplib `ConfirmChannel`.
return Promise.all([
channel.assertQueue(working_queue, {durable: true}),
channel.prefetch(1),
channel.consume(working_queue, function(data){
rabbit_msg_data = data;
worker.send(data.content.toString());
}, requeue = false)
]);
}
});
worker.on('message', function(msg){
// console.log("Worker to Master (ack): ", msg.content.toString());
console.log("Worker to Master (ack): ", msg);
//console.log(msg.content.toString());
channelWrapper_listening.ack(rabbit_msg_data);
});
}
else //All worker processes (MAIN LOGIC)
{
console.log("Worker "+process.pid+" started");
process.on('message',function(msg){
console.log("Master to Worker (working): ", msg);
//send msg back when done working on it.
setTimeout(function() {
process.send(msg);
}, 5000);
});
}
How does my function continuously check for an incoming message? The following function exits, after receiving a message. Considering, long polling has been enabled for the queue how do I continuously check for a new message?
function checkMessage(){
var params = {
QueueUrl : Constant.QUEUE_URL,
VisibilityTimeout: 0,
WaitTimeSeconds: 0
}
sqs.receiveMessage(params,(err,data) => {
if(data){
console.log("%o",data);
}
});
}
Your function would need to continually poll Amazon SQS.
Long Polling will delay a response by up to 20 seconds if there are no messages available. If a message becomes available during that period, it will be immediately returned. If there is no message after 20 seconds, it returns without providing a message.
Therefore, your function would need to poll SQS again (perhaps doing something else in the meantime).
var processMessages = (function (err, data) {
if (data.Messages) {
for (i = 0; i < data.Messages.length; i++) {
var message = data.Messages[i];
var body = JSON.parse(message.Body);
// process message
// delete if successful
}
}
});
while (true) {
sqs.receiveMessage({
QueueUrl: sqsQueueUrl,
MaxNumberOfMessages: 5, // how many messages to retrieve in a batch
VisibilityTimeout: 60, // how long until these messages are available to another consumer
WaitTimeSeconds: 15 // how many seconds to wait for messages before continuing
}, processMessages);
}
(function checkMessage(){
var params = {
QueueUrl : Constant.QUEUE_URL,
VisibilityTimeout: 0,
WaitTimeSeconds: 0
}
sqs.receiveMessage(params,(err,data) => {
if(data){
console.log("%o",data);
}
checkMessage()
});
})()
To continuously check for an incoming message in your aws sqs you will want to recusrsively call the aws sqs whenever a data is returned.
I'm using a HighlevelProducer and HighlevelConsumer to send and receive Messages. The HighlevelConsumer is configured with autoCommit=false as I want to commit Messages only when it was produced successfully. The problem is, that the first message never really gets commited.
Example:
Send Messages 1-10.
Receive Message 1
Receive Message 2
Commit Message 2
...
Receive Message 10
Commit Message 10
Commit Message 1
If I restart my Consumer, all messages from 1 to 10 are processed again. Only if I send new messages to the consumer, the old messages get committed. This happens for any number of messages.
My Code reads as follows:
var kafka = require('kafka-node'),
HighLevelConsumer = kafka.HighLevelConsumer,
client = new kafka.Client("localhost:2181/");
consumer = new HighLevelConsumer(
client,
[
{ topic: 'mytopic' }
],
{
groupId: 'my-group',
id: "my-consumer-1",
autoCommit: false
}
);
consumer.on('message', function (message) {
console.log("consume: " + message.offset);
consumer.commit(function (err, data) {
console.log("commited:" + message.offset);
});
console.log("consumed:" + message.offset);
});
process.on('SIGINT', function () {
consumer.close(true, function () {
process.exit();
});
});
process.on('exit', function () {
consumer.close(true, function () {
process.exit();
});
});
var messages = 10;
var kafka = require('kafka-node'),
HighLevelProducer = kafka.HighLevelProducer,
client = new kafka.Client("localhost:2181/");
var producer = new HighLevelProducer(client, { partitionerType: 2, requireAcks: 1 });
producer.on('error', function (err) { console.log(err) });
producer.on('ready', function () {
for (i = 0; i < messages; i++) {
payloads = [{ topic: 'mytopic', messages: "" }];
producer.send(payloads, function (err, data) {
err ? console.log(i + "err", err) : console.log(i + "data", data);
});
}
});
Am I doing something wrong or is this a bug in kafka-node?
A commit of message 2 is an implicit commit of message 1.
As you commits are done asynchronously, and commit of message 1 and message 2 are done quick after each other (ie, committing 2 happens before the consumer did send the commit of 1), the first commit will not happen explicitly and only a single commit of message 2 will be sent.
Thanks in advance for anyone who reads this.
I need to be able to send gcm messages (notifications) to a list of client IDs at a certain time.
I am trying to use Agenda.js since it has a persistence layer.
The following code seems to work just fine initially, executing exactly when it is supposed to. But, after a while of letting the server just chill doing nothing, the job will start executing in a loop.
It will also include
"WARNING: Date in past. Will never be fired."
Here is the relevant code.
var agenda = new agenda({db: {address: configParams.db}});
schedule_notifications = function(req) {
// define an agenda task named notify
agenda.define('notify', function(job, done) {
// create a gcm message
var message = new gcm.Message({
notification: { "body": 'test' }
});
var sender = new gcm.Sender('server id');
var regTokens = ['phone id'];
// send the message
sender.send(message, { registrationTokens: regTokens }, function(err, response) {
if (err) console.error(err);
else console.log(response);
done();
});
});
// get the object from the request
var req_json = JSON.parse(req.body.data),
keys = Object.keys(req_json),
key_string = keys[0],
start_obj = new Date(req_json[key_string][0].start);
// schedule the job with the date object found in the request
// start_obj, for example could be made using
// start_obj = new Date();
// notify is the name of the job to run
agenda.schedule(start_obj, 'notify');
agenda.start();
// can comment agenda.schedule and uncomment the following line to delete the unfinished jobs in db
// agenda.purge(function(err, numRemoved) {});
}
Does anyone have any idea of why this could be happening? Any tips on how to debug this issue?
Thanks!
I fixed the problem. I added in the job.remove function and it no longer spazzes.
var agenda = new agenda({db: {address: configParams.db}});
schedule_notifications = function(req) {
// define an agenda task named notify
agenda.define('notify', function(job, done) {
// create a gcm message
var message = new gcm.Message({
notification: { "body": 'test' }
});
var sender = new gcm.Sender('server id');
var regTokens = ['phone id'];
// send the message
sender.send(message, { registrationTokens: regTokens }, function(err, response) {
if (err) console.error(err);
else console.log(response);
done();
});
job.remove(function(err) {
if(!err) console.log("Successfully removed job from collection");
})
});
// get the object from the request
var req_json = JSON.parse(req.body.data),
keys = Object.keys(req_json),
key_string = keys[0],
start_obj = new Date(req_json[key_string][0].start);
// schedule the job with the date object found in the request
// start_obj, for example could be made using
// start_obj = new Date();
// notify is the name of the job to run
agenda.schedule(start_obj, 'notify');
agenda.start();
// can comment agenda.schedule and uncomment the following line to delete the unfinished jobs in db
// agenda.purge(function(err, numRemoved) {});
}