Can I filter an Azure ServiceBusService using node.js SDK?

Can I filter an Azure ServiceBusService using node.js SDK? - node.js

I have millions of messages in a queue and the first ten million or so are irrelevant. Each message has a sequential ActionId so ideally anything < 10000000 I can just ignore or better yet delete from the queue. What I have so far:
let azure = require("azure");
function processMessage(sb, message) {
// Deserialize the JSON body into an object representing the ActionRecorded event
var actionRecorded = JSON.parse(message.body);
console.log(`processing id: ${actionRecorded.ActionId} from ${actionRecorded.ActionTaken.ActionTakenDate}`);
if (actionRecorded.ActionId < 10000000) {
// When done, delete the message from the queue
console.log(`Deleting message: ${message.brokerProperties.MessageId} with ActionId: ${actionRecorded.ActionId}`);
sb.deleteMessage(message, function(deleteError, response) {
if (deleteError) {
console.log("Error deleting message: " + message.brokerProperties.MessageId);
}
});
}
// immediately check for another message
checkForMessages(sb);
}
function checkForMessages(sb) {
// Checking for messages
sb.receiveQueueMessage("my-queue-name", { isPeekLock: true }, function(receiveError, message) {
if (receiveError && receiveError === "No messages to receive") {
console.log("No messages left in queue");
return;
} else if (receiveError) {
console.log("Receive error: " + receiveError);
} else {
processMessage(sb, message);
}
});
}
let connectionString = "Endpoint=sb://<myhub>.servicebus.windows.net/;SharedAccessKeyName=KEYNAME;SharedAccessKey=[mykey]"
let serviceBusService = azure.createServiceBusService(connectionString);
checkForMessages(serviceBusService);
I've tried looking at the docs for withFilter but it doesn't seem like that applies to queues.
I don't have access to create or modify the underlying queue aside from the operations mentioned above since the queue is provided by a client.
Can I either
Filter my results that I get from the queue
speed up the queue processing somehow?

Filter my results that I get from the queue
As you found, filters as a feature are only applicable to Topics & Subscriptions.
speed up the queue processing somehow
If you were to use the #azure/service-bus package which is the newer, faster library to work with Service Bus, you could receive the messages in ReceiveAndDelete mode until you reach the message with ActionId 9999999, close that receiver and then create a new receiver in PeekLock mode. For more on these receive modes, see https://learn.microsoft.com/en-us/azure/service-bus-messaging/message-transfers-locks-settlement#settling-receive-operations

Related

RabbitMQ data lost on crash

I'm using RabbitMQ to store and retrieve data. I referred this article. I have set the durable flag to true and the noAck flag to false (i need to store the messages on the queue even after consuming).
I created these scenarios:
I updated stock data 3 times with consumers off state (inactive). Then I activated the consumer.It consumed all the three messages from the queue. [Works good.]
Now I again produced three messages (consumer inactive again) then I turned off the rabbitmq server. When I restarted the server and activated the consumer. It doesn't seem to be consuming the data (are the messages that were on the queue has been lost?)
Consumer :
connection.createChannel(function (error1, channel) {
if (error1) {
throw error1;
}
var queue = "updateStock2";
channel.assertQueue(queue, {
durable: true,
});
console.log(
" [*] Waiting for stockData messages in %s. To exit press CTRL+C",
queue
);
channel.consume(
queue,
function (data) {
stock = JSON.parse(data.content.toString());
console.log(" [x] Received Stock:", stock.name + " : " + stock.value);
},
{
noAck: false,
}
);
Producer :
connection.createChannel(function (error1, channel) {
if (error1) {
throw error1;
}
var queue = "updateStock2";
channel.assertQueue(queue, {
durable: true,
});
channel.sendToQueue(queue, Buffer.from(data));
console.log(" [x] Sent %s", data);
});
setTimeout(function () {
connection.close();
//process.exit(0);
}, 500);});
Aren't they persistent? If the server crashes all the messages in the queue are gone forever?
How to retrieve data that were in the queue when the server crashes?
Thanks in advance.

Why your messages have lost?
Regret to say, you did not declare {persistent: true} when you send message.Check https://www.rabbitmq.com/tutorials/tutorial-two-javascript.html, so you should use channel.sendToQueue(queue, Buffer.from(msg), {persistent: true});
Aren't they persistent?
Durable queues will be recovered on node boot, including messages in them published as persistent. Messages published as transient will be discarded during recovery, even if they were stored in durable queues.
Which middleware maybe better for you?
If you want a middleware which can persist messages even if consumed by consumers, you maybe need kafka

Async base-local with MQTT

I need to synchronize a base and a local client with MQTT. If client publishes then the other one will get the message.
If my MQTT broker is down, I need to stop sending messages, save the messages somewhere, wait for a connection, then continue sending.
If my local or base client is down for a second, I need to save the message which I didn't send, then send it when I turn on my base/local.
I'm working with Node.js and can't figure out how to implement this.
This is my handler when I connect or disconnect with my MQTT server.
client.on('connect',()=>{
store.state = true;
run(store).then((value)=>console.log('stop run'));
});
client.on('offline',()=>{
store.state = false;
console.log('offline');
});
This is my run function. I use store.state to decide if I should stop this interval. But this code does not seem to be a good way to implement my concept.
function run(store) {
return new Promise((resolve,reject)=>{
let interval = setInterval(()=>{
if (!store.state) {
clearInterval(interval);
resolve(true);
}
else if (store.queue.length > 0) {
let data = store.queue.pop();
let res = client.publish('push',JSON.stringify(data),{qos:2});
}
},300)
});
}
What should I do to implement a function which always sends, stop upon 'disconnect', then continues sending when connected?

I don't think set interval which 300ms is good.
If you want something that "always runs", at set intervals and in spite of any errors inside the loop, setInterval() makes sense. You are right that queued messages can be cleared faster than "once every 300 ms".
Since MQTT.js has a built-in queue, you could simplify a lot by using it. However, your messages are published to a target called "push", so I guess you want them delivered in the order of the queue. This answer keeps the queue and focuses on sending the next message as soon as the last one is confirmed.
What if res=client.publish(..) false ?
Good point! If you want to make sure it arrives, better to remove it once the publish has succeeded. For this, you need to retrieve the value without removing it, and use the callback argument to find out what happened (publish() is asynchronous). If that was the only change, it might look like:
let data = store.queue[store.queue.length - 1];
client.publish('push', JSON.stringify(data), {qos:2}, (err) => {
if(!err) {
store.queue.pop();
}
// Ready for next publish; call this function again
});
Extending that to include a callback-based run:
function publishFromQueue(data) {
return new Promise((resolve,reject)=>{
let res = client.publish('push', JSON.stringify(data), {qos:2}, (err) => {
resolve(!err);
});
});
}
async function run(store) {
while (store.queue.length > 0 && store.state) {
let data = store.queue[store.queue.length - 1];
let res = await publishFromQueue(data);
if(res) {
store.queue.pop();
}
}
}
This should deliver all the queued messages in order as soon as possible, without blocking. The only drawback is that it does not run constantly. You have two options:
Recur at set intervals, as you have already done. Slower, though you could set a shorter interval.
Only run() when needed, like:
let isRunning = false; //Global for tracking state of running
function queueMessage(data) {
store.queue.push(data);
if(!isRunning) {
isRunning = true;
run(store);
}
isRunning = false;
}
As long as you can use this instead of pushing to the queue, it should come out similar length, and more immediate and efficient.

How to control commit of a consumed kafka message using kafka-node

I'm using Node with kafka for the first time, using kafka-node. Consuming a message requires calling an external API, which might even take a second to response. I wish to overcome sudden failures of my consumer, in a way that if a consumer fails, another consumer that will consume that will replace it will receive the same message that its work was not completed.
I'm using kafka 0.10 and trying to use ConsumerGroup.
I thought of setting autoCommit: false in options, and committing the message only once its work has been completed (as I previously done with some Java code some time ago).
However, I can't seem to be sure how should I correctly commit the message only once it is done. How should I commit it?
Another worry I have is that it seems, because of the callbacks, that the next message is being read before the previous one had finished. And I'm afraid that if message x+2 have finished before message x+1, then the offset will be set at x+2, thus in case of failure x+1 will never be re-executed.
Here is basically what I did so far:
var options = {
host: connectionString,
groupId: consumerGroupName,
id: clientId,
autoCommit: false
};
var kafka = require("kafka-node");
var ConsumerGroup = kafka.ConsumerGroup;
var consumerGroup = new ConsumerGroup(options, topic);
consumerGroup.on('connect', function() {
console.log("Consuming Kafka %s, topic=%s", JSON.stringify(options), topic);
});
consumerGroup.on('message', function(message) {
console.log('%s read msg Topic="%s" Partition=%s Offset=%d', this.client.clientId, message.topic, message.partition, message.offset);
console.log(message.value);
doSomeStuff(function() {
// HOW TO COMMIT????
consumerGroup.commit(function(err, data) {
console.log("------ Message done and committed ------");
});
});
});
consumerGroup.on('error', function(err) {
console.log("Error in consumer: " + err);
close();
});
process.once('SIGINT', function () {
close();
});
var close = function() {
// SHOULD SEND 'TRUE' TO CLOSE ???
consumerGroup.close(true, function(error) {
if (error) {
console.log("Consuming closed with error", error);
} else {
console.log("Consuming closed");
}
});
};

One thing you can do here is to have a retry mechanism for every message you process.
You can consult my answer on this thread:
https://stackoverflow.com/a/44328233/2439404
I consume messages from Kafka using kafka-consumer, batch them together using async/cargo and put them in async/queue (in-memory queue). The queue takes a worker function as an arguement to which I am passing a async/retryable.
For your problem, you can just use retryable to do processing on your messages.
https://caolan.github.io/async/docs.html#retryable
This may solve your problem.

can I limit consumption of kafka-node consumer?

It seems like my kafka node consumer:
var kafka = require('kafka-node');
var consumer = new Consumer(client, [], {
...
});
is fetching way too many messages than I can handle in certain cases.
Is there a way to limit it (for example accept no more than 1000 messages per second, possibly using the pause api?)
I'm using kafka-node, which seems to have a limited api comparing to the Java version

In Kafka, poll and process should happen in a coordinated/synchronized way. Ie, after each poll, you should process all received data first, before you do the next poll. This pattern will automatically throttle the number of messages to the max throughput your client can handle.
Something like this (pseudo-code):
while(isRunning) {
messages = poll(...)
for(m : messages) {
process(m);
}
}
(That is the reason, why there is not parameter "fetch.max.messages" -- you just do not need it.)

I had a similar situation where I was consuming messages from Kafka and had to throttle the consumption because my consumer service was dependent on a third party API which had its own constraints.
I used async/queue along with a wrapper of async/cargo called asyncTimedCargo for batching purpose.
The cargo gets all the messages from the kafka-consumer and sends it to queue upon reaching a size limit batch_config.batch_size or timeout batch_config.batch_timeout.
async/queue provides saturated and unsaturated callbacks which you can use to stop the consumption if your queue task workers are busy. This would stop the cargo from filling up and your app would not run out of memory. The consumption would resume upon unsaturation.
//cargo-service.js
module.exports = function(key){
return new asyncTimedCargo(function(tasks, callback) {
var length = tasks.length;
var postBody = [];
for(var i=0;i<length;i++){
var message ={};
var task = JSON.parse(tasks[i].value);
message = task;
postBody.push(message);
}
var postJson = {
"json": {"request":postBody}
};
sms_queue.push(postJson);
callback();
}, batch_config.batch_size, batch_config.batch_timeout)
};
//kafka-consumer.js
cargo = cargo-service()
consumer.on('message', function (message) {
if(message && message.value && utils.isValidJsonString(message.value)) {
var msgObject = JSON.parse(message.value);
cargo.push(message);
}
else {
logger.error('Invalid JSON Message');
}
});
// sms-queue.js
var sms_queue = queue(
retryable({
times: queue_config.num_retries,
errorFilter: function (err) {
logger.info("inside retry");
console.log(err);
if (err) {
return true;
}
else {
return false;
}
}
}, function (task, callback) {
// your worker task for queue
callback()
}), queue_config.queue_worker_threads);
sms_queue.saturated = function() {
consumer.pause();
logger.warn('Queue saturated Consumption paused: ' + sms_queue.running());
};
sms_queue.unsaturated = function() {
consumer.resume();
logger.info('Queue unsaturated Consumption resumed: ' + sms_queue.running());
};

From FAQ in the README
Create a async.queue with message processor and concurrency of one (the message processor itself is wrapped with setImmediate function so it will not freeze up the event loop)
Set the queue.drain to resume() the consumer
The handler for consumer's message event to pause() the consumer and pushes the message to the queue.

As far as I know the API does not have any kind of throttling. But both consumers (Consumer and HighLevelConsumer) have a 'pause()' function. So you could stop consuming if you get to much messages. Maybe that already offers what you need.
Please keep in mind what's happening. You send a fetch request to the broker and get a batch of message back. You can configure the min and max size of the messages (according to the documentation not the number of messages) you want to fetch:
{
....
// This is the minimum number of bytes of messages that must be available to give a response, default 1 byte
fetchMinBytes: 1,
// The maximum bytes to include in the message set for this partition. This helps bound the size of the response.
fetchMaxBytes: 1024 * 1024,
}

I was facing the same issue, initially fetchMaxBytes value was
fetchMaxBytes: 1024 * 1024 * 10 // 10MB
I just chanbed it to
fetchMaxBytes: 1024
It worked very smoothly after the change.

RabbitMQ / AMQP: single queue, multiple consumers for same message?

I am just starting to use RabbitMQ and AMQP in general.
I have a queue of messages
I have multiple consumers, which I would like to do different things with the same message.
Most of the RabbitMQ documentation seems to be focused on round-robin, ie where a single message is consumed by a single consumer, with the load being spread between each consumer. This is indeed the behavior I witness.
An example: the producer has a single queue, and send messages every 2 sec:
var amqp = require('amqp');
var connection = amqp.createConnection({ host: "localhost", port: 5672 });
var count = 1;
connection.on('ready', function () {
var sendMessage = function(connection, queue_name, payload) {
var encoded_payload = JSON.stringify(payload);
connection.publish(queue_name, encoded_payload);
}
setInterval( function() {
var test_message = 'TEST '+count
sendMessage(connection, "my_queue_name", test_message)
count += 1;
}, 2000)
})
And here's a consumer:
var amqp = require('amqp');
var connection = amqp.createConnection({ host: "localhost", port: 5672 });
connection.on('ready', function () {
connection.queue("my_queue_name", function(queue){
queue.bind('#');
queue.subscribe(function (message) {
var encoded_payload = unescape(message.data)
var payload = JSON.parse(encoded_payload)
console.log('Recieved a message:')
console.log(payload)
})
})
})
If I start the consumer twice, I can see that each consumer is consuming alternate messages in round-robin behavior. Eg, I'll see messages 1, 3, 5 in one terminal, 2, 4, 6 in the other.
My question is:
Can I have each consumer receive the same messages? Ie, both consumers get message 1, 2, 3, 4, 5, 6? What is this called in AMQP/RabbitMQ speak? How is it normally configured?
Is this commonly done? Should I just have the exchange route the message into two separate queues, with a single consumer, instead?

Can I have each consumer receive the same messages? Ie, both consumers get message 1, 2, 3, 4, 5, 6? What is this called in AMQP/RabbitMQ speak? How is it normally configured?
No, not if the consumers are on the same queue. From RabbitMQ's AMQP Concepts guide:
it is important to understand that, in AMQP 0-9-1, messages are load balanced between consumers.
This seems to imply that round-robin behavior within a queue is a given, and not configurable. Ie, separate queues are required in order to have the same message ID be handled by multiple consumers.
Is this commonly done? Should I just have the exchange route the message into two separate queues, with a single consumer, instead?
No it's not, single queue/multiple consumers with each consumer handling the same message ID isn't possible. Having the exchange route the message onto into two separate queues is indeed better.
As I don't require too complex routing, a fanout exchange will handle this nicely. I didn't focus too much on Exchanges earlier as node-amqp has the concept of a 'default exchange' allowing you to publish messages to a connection directly, however most AMQP messages are published to a specific exchange.
Here's my fanout exchange, both sending and receiving:
var amqp = require('amqp');
var connection = amqp.createConnection({ host: "localhost", port: 5672 });
var count = 1;
connection.on('ready', function () {
connection.exchange("my_exchange", options={type:'fanout'}, function(exchange) {
var sendMessage = function(exchange, payload) {
console.log('about to publish')
var encoded_payload = JSON.stringify(payload);
exchange.publish('', encoded_payload, {})
}
// Recieve messages
connection.queue("my_queue_name", function(queue){
console.log('Created queue')
queue.bind(exchange, '');
queue.subscribe(function (message) {
console.log('subscribed to queue')
var encoded_payload = unescape(message.data)
var payload = JSON.parse(encoded_payload)
console.log('Recieved a message:')
console.log(payload)
})
})
setInterval( function() {
var test_message = 'TEST '+count
sendMessage(exchange, test_message)
count += 1;
}, 2000)
})
})

The last couple of answers are almost correct - I have tons of apps that generate messages that need to end up with different consumers so the process is very simple.
If you want multiple consumers to the same message, do the following procedure.
Create multiple queues, one for each app that is to receive the message, in each queue properties, "bind" a routing tag with the amq.direct exchange. Change you publishing app to send to amq.direct and use the routing-tag (not a queue). AMQP will then copy the message into each queue with the same binding. Works like a charm :)
Example: Lets say I have a JSON string I generate, I publish it to the "amq.direct" exchange using the routing tag "new-sales-order", I have a queue for my order_printer app that prints order, I have a queue for my billing system that will send a copy of the order and invoice the client and I have a web archive system where I archive orders for historic/compliance reasons and I have a client web interface where orders are tracked as other info comes in about an order.
So my queues are: order_printer, order_billing, order_archive and order_tracking
All have the binding tag "new-sales-order" bound to them, all 4 will get the JSON data.
This is an ideal way to send data without the publishing app knowing or caring about the receiving apps.

Just read the rabbitmq tutorial. You publish message to exchange, not to queue; it is then routed to appropriate queues. In your case, you should bind separate queue for each consumer. That way, they can consume messages completely independently.

Yes each consumer can receive the same messages. have a look at
http://www.rabbitmq.com/tutorials/tutorial-three-python.html
http://www.rabbitmq.com/tutorials/tutorial-four-python.html
http://www.rabbitmq.com/tutorials/tutorial-five-python.html
for different ways to route messages. I know they are for python and java but its good to understand the principles, decide what you are doing and then find how to do it in JS. Its sounds like you want to do a simple fanout (tutorial 3), which sends the messages to all queues connected to the exchange.
The difference with what you are doing and what you want to do is basically that you are going to set up and exchange or type fanout. Fanout excahnges send all messages to all connected queues. Each queue will have a consumer that will have access to all the messages separately.
Yes this is commonly done, it is one of the features of AMPQ.

The send pattern is a one-to-one relationship. If you want to "send" to more than one receiver you should be using the pub/sub pattern. See http://www.rabbitmq.com/tutorials/tutorial-three-python.html for more details.

RabbitMQ / AMQP: single queue, multiple consumers for same message and page refresh.
rabbit.on('ready', function () { });
sockjs_chat.on('connection', function (conn) {
conn.on('data', function (message) {
try {
var obj = JSON.parse(message.replace(/\r/g, '').replace(/\n/g, ''));
if (obj.header == "register") {
// Connect to RabbitMQ
try {
conn.exchange = rabbit.exchange(exchange, { type: 'topic',
autoDelete: false,
durable: false,
exclusive: false,
confirm: true
});
conn.q = rabbit.queue('my-queue-'+obj.agentID, {
durable: false,
autoDelete: false,
exclusive: false
}, function () {
conn.channel = 'my-queue-'+obj.agentID;
conn.q.bind(conn.exchange, conn.channel);
conn.q.subscribe(function (message) {
console.log("[MSG] ---> " + JSON.stringify(message));
conn.write(JSON.stringify(message) + "\n");
}).addCallback(function(ok) {
ctag[conn.channel] = ok.consumerTag; });
});
} catch (err) {
console.log("Could not create connection to RabbitMQ. \nStack trace -->" + err.stack);
}
} else if (obj.header == "typing") {
var reply = {
type: 'chatMsg',
msg: utils.escp(obj.msga),
visitorNick: obj.channel,
customField1: '',
time: utils.getDateTime(),
channel: obj.channel
};
conn.exchange.publish('my-queue-'+obj.agentID, reply);
}
} catch (err) {
console.log("ERROR ----> " + err.stack);
}
});
// When the visitor closes or reloads a page we need to unbind from RabbitMQ?
conn.on('close', function () {
try {
// Close the socket
conn.close();
// Close RabbitMQ
conn.q.unsubscribe(ctag[conn.channel]);
} catch (er) {
console.log(":::::::: EXCEPTION SOCKJS (ON-CLOSE) ::::::::>>>>>>> " + er.stack);
}
});
});

As I assess your case is:
I have a queue of messages (your source for receiving messages, lets name it q111)
I have multiple consumers, which I would like to do different things with the same message.
Your problem here is while 3 messages are received by this queue, message 1 is consumed by a consumer A, other consumers B and C consumes message 2 and 3. Where as you are in need of a setup where rabbitmq passes on the same copies of all these three messages(1,2,3) to all three connected consumers (A,B,C) simultaneously.
While many configurations can be made to achieve this, a simple way is to use the following two step concept:
Use a dynamic rabbitmq-shovel to pickup messages from the desired queue(q111) and publish to a fanout exchange (exchange exclusively created and dedicated for this purpose).
Now re-configure your consumers A,B & C (who were listening to queue(q111)) to listen from this Fanout exchange directly using a exclusive & anonymous queue for each consumer.
Note: While using this concept don't consume directly from the source queue(q111), as messages already consumed wont be shovelled to your Fanout exchange.
If you think this does not satisfies your exact requirement... feel free to post your suggestions :-)

I think you should check sending your messages using the fan-out exchanger. That way you willl receiving the same message for differents consumers, under the table RabbitMQ is creating differents queues for each one of this new consumers/subscribers.
This is the link for see the tutorial example in javascript
https://www.rabbitmq.com/tutorials/tutorial-one-javascript.html

To get the behavior you want, simply have each consumer consume from its own queue. You'll have to use a non-direct exchange type (topic, header, fanout) in order to get the message to all of the queues at once.

If you happen to be using the amqplib library as I am, they have a handy example of an implementation of the Publish/Subscribe RabbitMQ tutorial which you might find handy.

There is one interesting option in this scenario I haven`t found in answers here.
You can Nack messages with "requeue" feature in one consumer to process them in another.
Generally speaking it is not a right way, but maybe it will be good enough for someone.
https://www.rabbitmq.com/nack.html
And beware of loops (when all concumers nack+requeue message)!

Fan out was clearly what you wanted. fanout
read rabbitMQ tutorial:
https://www.rabbitmq.com/tutorials/tutorial-three-javascript.html
here's my example:
Publisher.js:
amqp.connect('amqp://<user>:<pass>#<host>:<port>', async (error0, connection) => {
if (error0) {
throw error0;
}
console.log('RabbitMQ connected')
try {
// Create exchange for queues
channel = await connection.createChannel()
await channel.assertExchange(process.env.EXCHANGE_NAME, 'fanout', { durable: false });
await channel.publish(process.env.EXCHANGE_NAME, '', Buffer.from('msg'))
} catch(error) {
console.error(error)
}
})
Subscriber.js:
amqp.connect('amqp://<user>:<pass>#<host>:<port>', async (error0, connection) => {
if (error0) {
throw error0;
}
console.log('RabbitMQ connected')
try {
// Create/Bind a consumer queue for an exchange broker
channel = await connection.createChannel()
await channel.assertExchange(process.env.EXCHANGE_NAME, 'fanout', { durable: false });
const queue = await channel.assertQueue('', {exclusive: true})
channel.bindQueue(queue.queue, process.env.EXCHANGE_NAME, '')
console.log(" [*] Waiting for messages in %s. To exit press CTRL+C");
channel.consume('', consumeMessage, {noAck: true});
} catch(error) {
console.error(error)
}
});
here is an example i found in the internet. maybe can also help.
https://www.codota.com/code/javascript/functions/amqplib/Channel/assertExchange

You just need to assign different groups to the consumers.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Can I filter an Azure ServiceBusService using node.js SDK? - node.js

Related

RabbitMQ data lost on crash

Async base-local with MQTT

How to control commit of a consumed kafka message using kafka-node

can I limit consumption of kafka-node consumer?

RabbitMQ / AMQP: single queue, multiple consumers for same message?

Categories

Resources