Message sequence acting unexpectedly in RabbitMQ - python-3.x

I'd like to implement a RabbitMQ topology similar to Option 3 here, except for some differences:
The new topology should handle a few 1000 messages per day. And it shall have two exchanges: one to deal with the main queues (about 30), the other to deal with retry and error queues (about 60). I've been following this tutorial and the usual RMQ tutorials, plus many SO posts. The RMQ server is fired up in a Docker container.
The problem I am facing is that the not all messages are being picked up by the consumer, and the sequence of receiving the messages is unexpected. I'm also seeing the same message being rejected twice. Here's my code:
exchanges.py
def callback(self, channel, method, properties, body):
print("delivery_tag: {0}".format(method.delivery_tag))
data = json.loads(body)
routingKey = data.get('routing-key')
routingKey_dl_error = queues_dict[routingKey]['error']
print(" [X] Got {0}".format(body))
print(" [X] Received {0} (try: {1})".format(data.get('keyword'), int(properties.priority)+1))
# redirect faulty messages to *.error queues
if data.get('keyword') == 'FAIL':
channel.basic_publish(exchange='exchange.retry',
routing_key=routingKey_dl_error,
body=json.dumps(data),
properties=pika.BasicProperties(
delivery_mode=2,
priority=int(properties.priority),
timestamp=int(time.time()),
headers=properties.headers))
print(" [*] Sent to error queue: {0}".format(routingKey_dl_error))
time.sleep(5)
channel.basic_ack(delivery_tag=method.delivery_tag) #leaving this in creates 1000s of iterations(?!)
# check number of sent counts
else:
# redirect messages that exceed MAX_RETRIES to *.error queues
if properties.priority >= MAX_RETRIES - 1:
print(" [!] {0} Rejected after {1} retries".format(data.get('keyword'), int(properties.priority) + 1))
channel.basic_publish(exchange='exchange.retry',
routing_key=routingKey_dl_error,
body=json.dumps(data),
properties=pika.BasicProperties(
delivery_mode=2,
priority=int(properties.priority),
timestamp=int(time.time()),
headers=properties.headers))
print(" [*] Sent to error queue: {0}".format(routingKey_dl_error))
#channel.basic_ack(delivery_tag=method.delivery_tag)
else:
timestamp = time.time()
now = datetime.datetime.now()
expire = 1000 * int((now.replace(hour=23, minute=59, second=59, microsecond=999999) - now).total_seconds())
# to reject job we create new one with other priority and expiration
channel.basic_publish(exchange='exchange_main',
routing_key=routingKey,
body=json.dumps(data),
properties=pika.BasicProperties(
delivery_mode=2,
priority=int(properties.priority) + 1,
timestamp=int(timestamp),
expiration=str(expire),
headers=properties.headers))
# send back acknowledgement about job
channel.basic_ack(delivery_tag=method.delivery_tag) # nack or reject???
print("[!] Rejected. Going to sleep for a while...")
time.sleep(5)
def exchange(self):
# 1 - connect and channel setup
parameters = "..."
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
# 2 - declare exchanges
# declares the main exchange to be used by all producers to send messages. External facing
channel.exchange_declare(exchange='exchange_main',
exchange_type='direct',
durable=True,
auto_delete=False)
# declares the dead letter exchange. Routes messages to *error and *retry queues. Internal use only
channel.exchange_declare(exchange='exchange.retry',
exchange_type='direct',
durable=True,
auto_delete=False)
# 3- bind the external facing exchange to the internal exchange
#channel.exchange_bind(destination='exchange.retry', source='exchange_main')
# 4 - declare queues
# Create durable queues bound to the exchange_main exchange
for queue_name in self.queueName_list:
queueArgs = {
"x-message-ttl": 5000,
"x-dead-letter-exchange": 'exchange.retry',
#"x-dead-letter-routing-key": queue_name + '.retry'
}
channel.queue_declare(queue=queue_name, durable=True, arguments=queueArgs)
# Create durable queues bound to the exchange.retry exchange
'''
for queue_dl_name in self.queueName_dl_list:
if queue_dl_name[-5:] == 'retry':
queueArgs_retry = {
"x-message-ttl": 5000,
"x-dead-letter-exchange": 'exchange_main',
"x-dead-letter-routing-key": queue_dl_name[:-6]
}
channel.queue_declare(queue=queue_dl_name, durable=True, arguments=queueArgs_retry)
else:
channel.queue_declare(queue=queue_dl_name, durable=True)
'''
for queue_dl_name in self.queueName_dl_list:
channel.queue_declare(queue=queue_dl_name, durable=True)
# 5 - bind retry and main queues to exchanges
# bind queues to exchanges. Allows for messages to be saved when no consumer is present
for queue_name in self.queueName_list:
channel.queue_bind(queue=queue_name, exchange='exchange_main')
for queue_dl_name in self.queueName_dl_list:
channel.queue_bind(queue=queue_dl_name, exchange='exchange.retry')
# 6 - don't dispatch a new message to worker until processed and acknowledged the previous one, dispatch to next worker instead
channel.basic_qos(prefetch_count=1)
# 7 - consume the message
all_queues = self.queueName_list + self.queueName_dl_list
for queue in all_queues:
channel.basic_consume(queue=queue,
on_message_callback=self.callback,
auto_ack=False)
print ('[*] Waiting for data for:')
for queue in all_queues:
print(' ' + queue)
print ('[*] To exit press CTRL+C')
try:
channel.start_consuming()
except KeyboardInterrupt:
channel.stop_consuming()
channel.close()
connection.close()
producer.py
# 1 - connect and channel setup
parameters = "..."
try:
connection = pika.BlockingConnection(parameters)
except pika.exceptions.AMQPConnectionError as err:
print("AMQP connection failure. Ensure RMQ server is running.")
raise err
channel = connection.channel() # create a channel in TCP connection
# 2 - Turn on delivery confirmations (either a basic.ack or basic.nack)
channel.confirm_delivery()
# 3 - send message to rmq
print(" [*] Sending message to create a queue")
# set header parameters
count = 3
for i in range(1, count + 1):
if self.keyword is None:
message = "data {0}".format(i)
else:
message = self.keyword
timestamp = time.time()
now = datetime.datetime.now()
expire = 1000 * int((now.replace(hour=23, minute=59, second=59, microsecond=999999) - now).total_seconds())
headers = dict()
headers['RMQ_Header_Key'] = self.queueName
headers['x-retry-count'] = 0
headers['x-death'] = None
data = {
'routing-key': self.queueName,
'keyword': message,
'domain': message,
'created': int(timestamp),
'expire': expire
}
# properties are often uses for bits of data that your code needs to have, but aren't part of the actual message body.
channel.basic_publish(
exchange='exchange_main',
routing_key=self.queueName,
body=json.dumps(data),
properties=pika.BasicProperties(
delivery_mode=2, # makes persistent job
priority=0, # default priority
timestamp=int(timestamp), # timestamp of job creation
expiration=str(expire), # job expiration
headers=headers
))
print(" [*] Sent message: {0} via routing key: {1}".format(message, self.queueName))
# 4 - close channel and connection
channel.close()
connection.close()
After firing up the exchange.py, I then send from my command line in another Terminal window: python3 producer.py queue1
And then get:
delivery_tag: 1
[X] Got b'{"routing-key": "queue1", "keyword": "data 1", "domain": "data 1", "created": 1567068725, "expire": 47274000}'
[X] Received data 1 (try: 1)
[!] Rejected. Going to sleep for a while...
delivery_tag: 2
[X] Got b'{"routing-key": "queue1", "keyword": "data 1", "domain": "data 1", "created": 1567068725, "expire": 47274000}'
[X] Received data 1 (try: 2)
[!] Rejected. Going to sleep for a while...
delivery_tag: 3
[X] Got b'{"routing-key": "queue1", "keyword": "data 3", "domain": "data 3", "created": 1567068725, "expire": 47274000}'
[X] Received data 3 (try: 1)
[!] Rejected. Going to sleep for a while...
delivery_tag: 4
[X] Got b'{"routing-key": "queue1", "keyword": "data 3", "domain": "data 3", "created": 1567068725, "expire": 47274000}'
[X] Received data 3 (try: 2)
[!] Rejected. Going to sleep for a while...
delivery_tag: 5
[X] Got b'{"routing-key": "queue1", "keyword": "data 3", "domain": "data 3", "created": 1567068725, "expire": 47274000}'
[X] Received data 3 (try: 3)
[!] data 3 Rejected after 3 retries
[*] Sent to error queue: queue1.error
delivery_tag: 6
[X] Got b'{"routing-key": "queue1", "keyword": "data 3", "domain": "data 3", "created": 1567068725, "expire": 47274000}'
[X] Received data 3 (try: 3)
[!] data 3 Rejected after 3 retries
[*] Sent to error queue: queue1.error
Questions:
Is my code current implementation corresponding to my desired topology?
Direct vs topic: is a direct exchange the most optimal/efficient solution in this case?
One exchange vs two: is it advised to stick to 2 exchanges, or can this be simplified to just one exchange for everything?
How do I test for messages which are not normal i.e. sent to the retry loop section of my callback function? The callback currently doesn't handle "normal" messages (i.e. without retries been triggered or simply failed messages).
Is binding the two exchanges necessary? Commenting out this code made no difference.
Do I need to implement arguments (channel.queue_declare) to both dead letter and non-dead letter queues? I know that the non-dead letter queues are to have the arguments declared, whereby the x-dead-letter-exchange is set, but I'm not sure whether the x-dead-letter-routing-key should also be set too.
Do I need ack / nack every time a message is published, because I notice differing behaviour when this is and isn't implemented (i.e. w/o ack the FAIL message is not sent 3 times but only two, with ack it sent more than 3)
In the output above, "data 1" is only consumed twice, "data 2" doesn't appear at all, and "data 3" reaches the MAX_RETRY limit of 3 times but then gets sent to the *.error queue twice (not once), which I find strange. What is RMQ doing here?

Related

Input block the subscriber/client of receiving message in Pub/Sub system

I am trying to build a publisher/subscriber system. In simple words, a publisher can publish messages to all subscribers via a broker. Subscribers can support 2 functionalities: 1) receive a message from the broker 2) send a message to the broker with input method. But there is a problem: Subscribers block while waiting the input from stdin and they receive the publishers messages after input, even though no input is needed.
Ι wοuld like to solve the problem in this direction: while waiting for input from stdin, a subscriber can receive a message from publish. I tried "curses" but i failed.
I post a part of my code
subscriber.py
while True:
try:
command = input("Enter a command: ")
#command = sys.stdin.readline()
if command == "quit":
break
command.strip()
command_to_broker = process_command(command, sub_id)
if command_to_broker == error_message:
print("Command with wrong format!")
continue
sock.sendall(bytes(command_to_broker, ENCODING))
received = str(sock.recv(BUFFER_SIZE), ENCODING)
print("Received from BROKER: " + received)
except:
print("Error/Disconnect")
broker.py (starts a publisher thread)
def publisher_thread(connection, topics_and_subscribers, subscribers_and_ports, subscribers):
while True:
data = connection.recv(BUFFER_SIZE)
print("Command from PUBLISHER {}".format(data.decode(ENCODING)))
response = 'OK'
command = data.decode(ENCODING).split(" ", 3)
pub_id = command[0]
topic = command[2]
message = command[3]
if topic in topics_and_subscribers:
for s in subscribers:
print(s.getpeername())
sum = s.send(bytes(message, ENCODING))
print(sum)
if not data:
break
connection.sendall(bytes(response, ENCODING))
connection.close()

How to count messages from a subscription utilizing streaming pull with GCP Pub/Sub?

I am confused about the GCP Pub/Sub REST API.
Background: I am trying to count the number of messages in a pubsub subscription but I can not iterate through the message object streaming pull.
Therefore, I will need to rely upon the REST API provided: https://cloud.google.com/pubsub/docs/pull#asynchronous-pull
Based on my understanding of the REST API:
It currently pulls messages as expected but when I try to iterate through a loop the stack trace highlights the Message object cannot be iterable.
What I have tried :
with my current implementation, it only repeats 1 message being sent for each company involved
company_name = {}
if len(message) == 0:
logging.warning('Nothing pulled from pubsub')
else:
logging.info('Pulled %s messages from pubsub' % str(len(message.data)))
for msg in message:
if msg.attributes in message:
agency_name[message.attributes['company_name']] = 1
else:
agency_name[message.attributes['company_name']] += 1
message.ack()
what is the best way of achieving this solution?
In addition to what #guillaume said, you can check this GCP Documentation for reading time-series data using Python: https://cloud.google.com/monitoring/docs/samples/monitoring-read-timeseries-simple#code-sample
from google.cloud import monitoring_v3
import time
client = monitoring_v3.MetricServiceClient()
project_name = f"projects/anjela"
now = time.time()
seconds = int(now)
nanos = int((now - seconds) * 10 ** 9)
interval = monitoring_v3.TimeInterval(
{
"end_time": {"seconds": seconds, "nanos": nanos},
"start_time": {"seconds": (seconds - 1200), "nanos": nanos},
}
)
results = client.list_time_series(
request={
"name": project_name,
"filter": 'metric.type = "pubsub.googleapis.com/subscription/num_undelivered_messages"',
"interval": interval,
"view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
}
)
for result in results:
print(result)
I put this filter pubsub.googleapis.com/subscription/num_undelivered_messages as it tracks the number of unacknowledged or backlog messages. You can use this GCP documentation to alter the filter according to your purpose: https://cloud.google.com/monitoring/api/metrics_gcp#gcp-pubsub
Result in Cloud Monitoring Interface:
Response:

Pause/Delay sending of new batch of users from swarm

I have a test case where I need to spawn 1000 websocket connections and sustain a conversation over them through a Locust task (It has a prefedined send/receive process for the websocket connections). I can successfully do it by the following setup in locust:
Max Number of Users: 1000
Hatch rate: 1000
However, this setup opens up 1000 connection every second. Even if i lower down the hatch rate, it will come to a time where it will continue to spawn 1000 websocket connections per second. Is there a way to spawn 1000 users instantly and halt/delay the swarm in sending new 1000 connections for quite some time?
I am trying to test if a my server can handle 1000 users sending and receiving messages from my server through a websocket connection. I have tried multiprocessing approach in python but I'm having a hard time to spawn connections as fast as I can with Locust.
class UserBehavior(TaskSet):
statements = [
"Do you like coffee?",
"What's your favorite book?",
"Do you invest in crypto?",
"Who will host the Superbowl next year?",
"Have you listened to the new Adele?",
"Coldplay released a new album",
"I watched the premiere of Succession season 3 last night",
"Who is your favorite team in the NBA?",
"I want to buy the new Travis Scott x Jordan shoes",
"I want a Lamborghini Urus",
"Have you been to the Philippines?",
"Did you sign up for a Netflix account?"
]
def on_start(self):
pass
def on_quit(self):
pass
#task
def send_convo(self):
end = False
ws_url = "ws://xx.xx.xx.xx:8080/websocket"
self.ws = create_connection(ws_url)
body = json.dumps({"text": "start blender"})
self.ws.send(body)
while True:
#print("Waiting for response..")
response = self.ws.recv()
if response != None:
if "Sorry, this world closed" in response:
end = True
break
if not end:
body = json.dumps({"text": "begin"})
self.ws.send(body)
while True:
#print("Waiting for response..")
response = self.ws.recv()
if response != None:
# print("[BOT]: ", response)
if "Sorry, this world closed" in response:
end = True
self.ws.close()
break
if not end:
body = json.dumps({"text": random.choice(self.statements)})
start_at = time.time()
self.ws.send(body)
while True:
response = self.ws.recv()
if response != None:
if "Sorry, this world closed" not in response:
response_time = int((time.time() - start_at)*1000)
print(f"[BOT]Response: {response}")
response_length = len(response)
events.request_success.fire(
request_type='Websocker Recv',
name='test/ws/echo',
response_time=response_time,
response_length=response_length,
)
else:
end = True
self.ws.close()
break
if not end:
body = json.dumps({"text": "[DONE]"})
self.ws.send(body)
while True:
response = self.ws.recv()
if response != None:
if "Sorry, this world closed" in response:
end = True
self.ws.close()
break
if not end:
time.sleep(1)
body = json.dumps({"text": "EXIT"})
self.ws.send(body)
time.sleep(1)
self.ws.close()
class WebsiteUser(HttpUser):
tasks = [UserBehavior]
wait_time = constant(2)
host = "ws://xx.xx.xx.xx:8080/websocket"
For this particular test, I set the maximum users to 1 and the hatch rate to 1 and clearly, locust keeps on sending 1 request per second as seen on the following responsees:
[BOT]Response: {"text": "No, I don't have a netflix account. I do have a Hulu account, though.", "quick_replies": null}
enter code here
[BOT]Response: {"text": "I have not, but I would love to go. I have always wanted to visit the Philippines.", "quick_replies": null
[BOT]Response: {"text": "No, I don't have a netflix account. I do have a Hulu account, though.", "quick_replies": null}
[BOT]Response: {"text": "I think it's going to be New Orleans. _POTENTIALLY_UNSAFE__", "quick_replies": null}
My expectation is after I set the maximum user to 1, and a hatch rate of 1, there would instantly be 1 websocket connection sending a random message, and receiving 1 main response from the websocket server. but what's happening is it keeps on repeating the task per second until i explicitly hit the stop button on the locust dashboard.
I would debug your logic. Put more print statements in each if block at various places and between each block. When dealing with a long list of decisions, it's easy to get things tripped up.
In this case, you are only wanting to sleep in a very specific situation but it's not happening. Most likely you're setting end = True when you're not expecting it so you're not sleeping and are immediately going to get a new user.
EDIT:
Reviewing your question and issue description again, it sounds like you expect Locust to send a single request and then never send another one. That's not how Locust works. Locust will run your task code for a user. When it's done, that user goes away and it waits for a certain amount of time (looks like you have it set to 2 seconds) and then it spawns another user and starts the task over again. The idea is it will try to keep a near constant number of users you tell it to. It will not only run 1000 users and then end the test, by default.
If you want to keep all 1000 users running, you need to make them continue to execute code. For example, you could put everything in your task in another while loop with another way to break out and end. That way even after making your socket connection and sending the single message you expect, the user will stay alive in the loop and won't end because it ran out of things to do. Doing it this way requires a lot more work and coordination but is possible. There may be other questions on SO about different approaches if this isn't exactly what you're looking for.

should one or multiple sqs client be used to receive and delete messages?

I'd like to create two functions receive_message and delete_message from the same sqs queue. Should I use the same sqs client for receiving and deleting or I can use different ones? Is there any overhead of creating a new sqs client that matters in terms of speed and performance?
I did two tests.
Test 1 is to use different sqs clients in receive and delete message function. and I recorded the time elapsed.
Test 2 is to use same sqs clients for receiving and deleting and I also recorded the time elapsed
test1:
def receive_delete_message():
"""Receive message from SQS"""
sqs = boto3.client("sqs")
queue_url = "my sqs url"
response = sqs.receive_message(
QueueUrl=queue_url,
AttributeNames=[
'SentTimestamp'
],
MaxNumberOfMessages=10, # [1, 10] default is 1, 10 is desired
MessageAttributeNames=[
'All'
],
VisibilityTimeout=1, # default is 30sec
WaitTimeSeconds=20 # [0, 20]. short poll if 0, otherwise long poll
)
if "Messages" not in response: # empty sqs queue
print("empty queue")
return
messages = response["Messages"]
for i, message in enumerate(messages):
receipt_handle = message['ReceiptHandle']
print("this is message {}, {}".format(i+1, message["Body"]))
delete_message(receipt_handle)
def delete_message(receipt_handle):
sqs2 = boto3.client("sqs")
queue_url = "my sqs url"
sqs2.delete_message(
QueueUrl=queue_url,
ReceiptHandle=receipt_handle
)
print("message deleted")
start = time.time()
receive_delete_message()
print(time.time() - start)
test 2:
sqs = boto3.client("sqs")
queue_url = "my sqs url"
def receive_delete_message():
"""Receive message from SQS"""
response = sqs.receive_message(
QueueUrl=queue_url,
AttributeNames=[
'SentTimestamp'
],
MaxNumberOfMessages=10, # [1, 10] default is 1, 10 is desired
MessageAttributeNames=[
'All'
],
VisibilityTimeout=1, # default is 30sec
WaitTimeSeconds=20 # [0, 20]. short poll if 0, otherwise long poll
)
if "Messages" not in response: # empty sqs queue
print("empty queue")
return
messages = response["Messages"]
for i, message in enumerate(messages):
receipt_handle = message['ReceiptHandle']
print("this is message {}, {}".format(i+1, message["Body"]))
delete_message(receipt_handle)
def delete_message(receipt_handle):
sqs.delete_message(
QueueUrl=queue_url,
ReceiptHandle=receipt_handle
)
print("message deleted")
start = time.time()
receive_delete_message()
print(time.time() - start)
result from test 1:
this is message 1, message1000
message deleted
this is message 2, message1002
message deleted
this is message 3, message1003
message deleted
this is message 4, message1004
message deleted
this is message 5, message1007
message deleted
this is message 6, message1008
message deleted
this is message 7, message1018
message deleted
this is message 8, message1025
message deleted
this is message 9, message1034
message deleted
this is message 10, message1036
message deleted
1.526839017868042
result from test 2
this is message 1, message1002
message deleted
this is message 2, message1013
message deleted
this is message 3, message1017
message deleted
this is message 4, message1028
message deleted
this is message 5, message1029
message deleted
this is message 6, message1044
message deleted
this is message 7, message1047
message deleted
this is message 8, message1005
message deleted
this is message 9, message1006
message deleted
this is message 10, message1015
message deleted
0.5138881206512451
You'd have to run this test 100s of times to get meaningful data, in my opinion. The primary reasons to employ multiple clients are that you want to target multiple AWS regions or use multiple sets of AWS credentials. In this case, I would just use a single client.
You are basically making a second object that has access to the exact same permissions as the first. The credentials are the same.
If you are considering deleting the message from a client (2 Lambdas maybe) other than the receiving consumer then, you are TRUSTING that the receiving consumer will ALWAYS succeed and delete the message. What happens if the consumer fails and you delete the message from the queue?
I also second what jarmod says. Seeing a 1 off ~1 second difference is not a meaningful performance benchmark. Overall, a single Lambda consumer that reads from the queue is good for a small use-case. If you are really looking for something production ready, I would look at something like: https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-using-sqs-queue.html

Why is this queue not working properly?

The following queue is not working properly somehow. Is there any obvious mistake I have made? Basically every incoming SMS message is put onto the queue, tries to send it and if it successful deletes from the queue. If its unsuccessful it sleeps for 2 seconds and tries sending it again.
# initialize queue
queue = queue.Queue()
def messagePump():
while True:
item = queue.get()
if item is not None:
status = sendText(item)
if status == 'SUCCEEDED':
queue.task_done()
else:
time.sleep(2)
def sendText(item):
response = getClient().send_message(item)
response = response['messages'][0]
if response['status'] == '0':
return 'SUCCEEDED'
else:
return 'FAILED'
#app.route('/webhooks/inbound-sms', methods=['POST'])
def delivery_receipt():
data = dict(request.form) or dict(request.args)
senderNumber = data['msisdn'][0]
incomingMessage = data['text'][0]
# came from customer service operator
if (senderNumber == customerServiceNumber):
try:
split = incomingMessage.split(';')
# get recipient phone number
recipient = split[0]
# get message content
message = split[1]
# check if target number is 10 digit long and there is a message
if (len(message) > 0):
# for confirmation send beginning string only
successText = 'Message successfully sent to: '+recipient+' with text: '+message[:7]
queue.put({'from': virtualNumber, 'to': recipient, 'text': message})
The above is running on a Flask server. So invoking messagePump:
thread = threading.Thread(target=messagePump)
thread.start()
The common in such cases is that Thread has completed execution before item started to be presented in the queue, please call thread.daemon = True before running thread.start().
Another thing which may happen here is that Thread was terminated due to exception. Make sure the messagePump handle all possible exceptions.
That topic regarding tracing exceptions on threads may be useful for you:
Catch a thread's exception in the caller thread in Python

Resources