Asyncio server stops to respond after the first request - python-3.x

I'm trying to write an asyncio-based server. The problem is, that it stops to respond after the first request.
My code is built upon this template for echo-server and this method to pass parameters to coroutines.
class MsgHandler:
def __init__(self, mem):
# here (mem:dict) I store received metrics
self.mem = mem
async def handle(self, reader, writer):
#this coroutine handles requests
data = await reader.read(1024)
print('request:', data.decode('utf-8'))
# read_msg returns an answer based on the request received
# My server closes connection on every second request
# For the first one, everything works as intended,
# so I don't thik the problem is in read_msg()
response = read_msg(data.decode('utf-8'), self.mem)
print('response:', response)
writer.write(response.encode('utf-8'))
await writer.drain()
writer.close()
def run_server(host, port):
mem = {}
msg_handler = MsgHandler(mem)
loop = asyncio.get_event_loop()
coro = asyncio.start_server(msg_handler.handle, host, port, loop=loop)
server = loop.run_until_complete(coro)
try:
loop.run_forever()
except KeyboardInterrupt:
pass
server.close()
loop.run_until_complete(server.wait_closed())
loop.close()
On the client-side I either get an empty response or ConnectionResetError (104, 'Connection reset by peer').

You are closing the writer with writer.close() in the handler, which closes the socket.
From the 3.9 docs on StreamWriter:
Also, if you don't close the stream writer, then you would still have store it somewhere in order to keep receiving messages over that same connection.

Related

Flask server using asynchronous Rpc client only answer 1 request out of two

I'm trying to implement an async RPC client within a Flask server.
The idea is that each request spawn a thread with an uuid, and each request is going to wait until there is a response in the RpcClient queue attribute object with the correct uuid.
The problem is that one request out of two fails. I think that might be a problem with multi-threading, but I don't see where it comes from.
Bug can be seen here.
Using debug print, it seems that the message with the correct uuid is received in the _on_response callback and update the queue attribute in this instance correctly, but the queue attribute within the /rpc_call/<payload> endpoint doesn't synchronize (so queue[uuid] has a value of response in the RpcClient callback but still None in the scope of the endpoint).
My code:
from flask import Flask, jsonif
from gevent.pywsgi import WSGIServer
import sys
import os
import pika
import uuid
import time
import threading
class RpcClient(object):
"""Asynchronous Rpc client."""
internal_lock = threading.Lock()
queue = {}
def __init__(self):
self.connection = pika.BlockingConnection(
pika.ConnectionParameters(host='rabbitmq'))
self.channel = self.connection.channel()
self.channel.basic_qos(prefetch_count=1)
self.channel.exchange_declare(exchange='kaldi_expe', exchange_type='topic')
# Create all the queue and bind them to the corresponding routing key
self.channel.queue_declare('request', durable=True)
result = self.channel.queue_declare('answer', durable=True)
self.channel.queue_bind(exchange='kaldi_expe', queue='request', routing_key='kaldi_expe.web.request')
self.channel.queue_bind(exchange='kaldi_expe', queue='answer', routing_key='kaldi_expe.kaldi.answer')
self.callback_queue = result.method.queue
.
thread = threading.Thread(target=self._process_data_events)
thread.setDaemon(True)
thread.start()
def _process_data_events(self):
self.channel.basic_consume(self.callback_queue, self._on_response, auto_ack=True)
while True:
with self.internal_lock:
self.connection.process_data_events()
time.sleep(0.1)
def _on_response(self, ch, method, props, body):
"""On response we simply store the result in a local dictionary."""
self.queue[props.correlation_id] = body
def send_request(self, payload):
corr_id = str(uuid.uuid4())
self.queue[corr_id] = None
with self.internal_lock:
self.channel.basic_publish(exchange='kaldi_expe',
routing_key="kaldi_expe.web.request",
properties=pika.BasicProperties(
reply_to=self.callback_queue,
correlation_id=corr_id,
),
body=payload)
return corr_id
def flask_app():
app = Flask("kaldi")
#app.route('/', methods=['GET'])
def server_is_up():
return 'server is up', 200
#app.route('/rpc_call/<payload>')
def rpc_call(payload):
"""Simple Flask implementation for making asynchronous Rpc calls. """
corr_id = app.config['RPCclient'].send_request(payload)
while app.config['RPCclient'].queue[corr_id] is None:
#print("queue server: " + str(app.config['RPCclient'].queue))
time.sleep(0.1)
return app.config['RPCclient'].queue[corr_id]
if __name__ == '__main__':
while True:
try:
rpcClient = RpcClient()
app = flask_app()
app.config['RPCclient'] = rpcClient
print("Rabbit MQ is connected, starting server", file=sys.stderr)
app.run(debug=True, threaded=True, host='0.0.0.0')
except pika.exceptions.AMQPConnectionError as e:
print("Waiting for RabbitMq startup" + str(e), file=sys.stderr)
time.sleep(1)
except Exception as e:
worker.log.error(e)
exit(e)
I found where the bug came from:
Thedebug=True of the line app.run(debug=True, threaded=True, host='0.0.0.0') restart the server at the beginning.
The whole script is then restarted from the beginning. Because of it, another rpcClient is initialized and consume from the same queue. Problem is that the previous thread is also running. This cause two rpcClient to consume from the same thread, with one that is virtually useless.

Python async: Waiting for stdin input while doing other stuff

I'm trying to create a WebSocket command line client that waits for messages from a WebSocket server but waits for user input at the same time.
Regularly polling multiple online sources every second works fine on the server, (the one running at localhost:6789 in this example), but instead of using Python's normal sleep() method, it uses asyncio.sleep(), which makes sense because sleeping and asynchronously sleeping aren't the same thing, at least not under the hood.
Similarly, waiting for user input and asynchronously waiting for user input aren't the same thing, but I can't figure out how to asynchronously wait for user input in the same way that I can asynchronously wait for an arbitrary amount of seconds, so that the client can deal with incoming messages from the WebSocket server while simultaneously waiting for user input.
The comment below in the else-clause of monitor_cmd() hopefully explains what I'm getting at:
import asyncio
import json
import websockets
async def monitor_ws():
uri = 'ws://localhost:6789'
async with websockets.connect(uri) as websocket:
async for message in websocket:
print(json.dumps(json.loads(message), indent=2, sort_keys=True))
async def monitor_cmd():
while True:
sleep_instead = False
if sleep_instead:
await asyncio.sleep(1)
print('Sleeping works fine.')
else:
# Seems like I need the equivalent of:
# line = await asyncio.input('Is this your line? ')
line = input('Is this your line? ')
print(line)
try:
asyncio.get_event_loop().run_until_complete(asyncio.wait([
monitor_ws(),
monitor_cmd()
]))
except KeyboardInterrupt:
quit()
This code just waits for input indefinitely and does nothing else in the meantime, and I understand why. What I don't understand, is how to fix it. :)
Of course, if I'm thinking about this problem in the wrong way, I'd be very happy to learn how to remedy that as well.
You can use the aioconsole third-party package to interact with stdin in an asyncio-friendly manner:
line = await aioconsole.ainput('Is this your line? ')
Borrowing heavily from aioconsole, if you would rather avoid using an external library you could define your own async input function:
async def ainput(string: str) -> str:
await asyncio.get_event_loop().run_in_executor(
None, lambda s=string: sys.stdout.write(s+' '))
return await asyncio.get_event_loop().run_in_executor(
None, sys.stdin.readline)
Borrowing heavily from aioconsole, there are 2 ways to handle.
start a new daemon thread:
import sys
import asyncio
import threading
from concurrent.futures import Future
async def run_as_daemon(func, *args):
future = Future()
future.set_running_or_notify_cancel()
def daemon():
try:
result = func(*args)
except Exception as e:
future.set_exception(e)
else:
future.set_result(result)
threading.Thread(target=daemon, daemon=True).start()
return await asyncio.wrap_future(future)
async def main():
data = await run_as_daemon(sys.stdin.readline)
print(data)
if __name__ == "__main__":
asyncio.run(main())
use stream reader:
import sys
import asyncio
async def get_steam_reader(pipe) -> asyncio.StreamReader:
loop = asyncio.get_event_loop()
reader = asyncio.StreamReader(loop=loop)
protocol = asyncio.StreamReaderProtocol(reader)
await loop.connect_read_pipe(lambda: protocol, pipe)
return reader
async def main():
reader = await get_steam_reader(sys.stdin)
data = await reader.readline()
print(data)
if __name__ == "__main__":
asyncio.run(main())

Handling ensure_future and its missing tasks

I have a streaming application that almost continuously takes the data given as input and sends an HTTP request using that value and does something with the returned value.
Obviously to speed things up I've used asyncio and aiohttp libraries in Python 3.7 to get the best performance, but it becomes hard to debug given how fast the data moves.
This is what my code looks like
'''
Gets the final requests
'''
async def apiRequest(info, url, session, reqType, post_data=''):
if reqType:
async with session.post(url, data = post_data) as response:
info['response'] = await response.text()
else:
async with session.get(url+post_data) as response:
info['response'] = await response.text()
logger.debug(info)
return info
'''
Loops through the batches and sends it for request
'''
async def main(data, listOfData):
tasks = []
async with ClientSession() as session:
for reqData in listOfData:
try:
task = asyncio.ensure_future(apiRequest(**reqData))
tasks.append(task)
except Exception as e:
print(e)
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
responses = await asyncio.gather(*tasks)
return responses #list of APIResponses
'''
Streams data in and prepares batches to send for requests
'''
async def Kconsumer(data, loop, batchsize=100):
consumer = AIOKafkaConsumer(**KafkaConfigs)
await consumer.start()
dataPoints = []
async for msg in consumer:
try:
sys.stdout.flush()
consumedMsg = loads(msg.value.decode('utf-8'))
if consumedMsg['tid']:
dataPoints.append(loads(msg.value.decode('utf-8')))
if len(dataPoints)==batchsize or time.time() - startTime>5:
'''
#1: The task below goes and sends HTTP GET requests in bulk using aiohttp
'''
task = asyncio.ensure_future(getRequests(data, dataPoints))
res = await asyncio.gather(*[task])
if task.done():
outputs = []
'''
#2: Does some ETL on the returned values
'''
ids = await asyncio.gather(*[doSomething(**{'tid':x['tid'],
'cid':x['cid'], 'tn':x['tn'],
'id':x['id'], 'ix':x['ix'],
'ac':x['ac'], 'output':to_dict(xmltodict.parse(x['response'],encoding='utf-8')),
'loop':loop, 'option':1}) for x in res[0]])
simplySaveDataIntoDataBase(id) # This is where I see some missing data in the database
dataPoints = []
except Exception as e:
logger.error(e)
logger.error(traceback.format_exc())
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
logger.error(str(exc_type) +' '+ str(fname) +' '+ str(exc_tb.tb_lineno))
if __name__ == '__main__':
loop = asyncio.get_event_loop()
asyncio.ensure_future(Kconsumer(data, loop, batchsize=100))
loop.run_forever()
Does the ensure_future need to be awaited ?
How does aiohttp handle requests that come a little later than the others? Shouldn't it hold the whole batch back instead of forgetting about it altoghter?
Does the ensure_future need to be awaited ?
Yes, and your code is doing that already. await asyncio.gather(*tasks) awaits the provided tasks and returns their results in the same order.
Note that await asyncio.gather(*[task]) doesn't make sense, because it is equivalent to await asyncio.gather(task), which is again equivalent to await task. In other words, when you need the result of getRequests(data, dataPoints), you can write res = await getRequests(data, dataPoints) without the ceremony of first calling ensure_future() and then calling gather().
In fact, you almost never need to call ensure_future yourself:
if you need to await multiple tasks, you can pass coroutine objects directly to gather, e.g. gather(coroutine1(), coroutine2()).
if you need to spawn a background task, you can call asyncio.create_task(coroutine(...))
How does aiohttp handle requests that come a little later than the others? Shouldn't it hold the whole batch back instead of forgetting about it altoghter?
If you use gather, all requests must finish before any of them return. (That is not aiohttp policy, it's how gather works.) If you need to implement a timeout, you can use asyncio.wait_for or similar.

Stream producer and consumer with asyncio gather python

I wrote a script for a socket server that simply listens for incoming connections and processes the incoming data. The chosen architecture is the asyncio.start_server for the socket management and the asyncio.Queues for passing the data between the producer and consumer coroutines. The problem is that the consume(q1) function is executed only once (at the first script startup). Then it is not more executed. Is the line run_until_complete(asyncio.gather()) wrong?
import asyncio
import functools
async def handle_readnwrite(reader, writer, q1): #Producer coroutine
data = await reader.read(1024)
message = data.decode()
await writer.drain()
await q1.put(message[3:20])
await q1.put(None)
writer.close() #Close the client socket
async def consume(q1): #Consumer coroutine
while True:
# wait for an item from the producer
item = await q1.get()
if item is None:
logging.debug('None items') # the producer emits None to indicate that it is done
break
do_something(item)
loop = asyncio.get_event_loop()
q1 = asyncio.Queue(loop=loop)
producer_coro = asyncio.start_server(functools.partial(handle_readnwrite, q1=q1), '0.0.0.0', 3000, loop=loop)
consumer_coro = consume(q1)
loop.run_until_complete(asyncio.gather(consumer_coro,producer_coro))
try:
loop.run_forever()
except KeyboardInterrupt:
pass
loop.close()
handle_readnwrite always enqueues the None terminator, which causes consume to break (and therefore finish the coroutine). If consume should continue running and process other messages, the None terminator must not be sent after each message.

asyncio.sleep from within a UDP server not working [duplicate]

This question already has an answer here:
Calling a coroutine from asyncio.Protocol.data_received
(1 answer)
Closed 6 years ago.
I'm playing with asyncio UDP server example and would like to have a sleep from within the datagram_received method.
import asyncio
class EchoServerProtocol:
def connection_made(self, transport):
self.transport = transport
def datagram_received(self, data, addr):
message = data.decode()
print('Received %r from %s' % (message, addr))
# Sleep
await asyncio.sleep(1)
print('Send %r to %s' % (message, addr))
self.transport.sendto(data, addr)
loop = asyncio.get_event_loop()
print("Starting UDP server")
# One protocol instance will be created to serve all client requests
listen = loop.create_datagram_endpoint(EchoServerProtocol,
local_addr=('127.0.0.1', 9999))
transport, protocol = loop.run_until_complete(listen)
try:
loop.run_forever()
except KeyboardInterrupt:
pass
transport.close()
loop.close()
This fails with a SyntaxError on the sleep line (Python 3.5.1). Using time.sleep is obviously not working as it prevents any other datagram to be received. Any hints on how to solve this?
The goal is to replace this sleep with a real non-blocking I/O call.
It seems that the await has to live in an async def (coroutine). To do so, you must fire a call via asyncio.ensure_future.
def datagram_received(self, data, addr):
asyncio.ensure_future(self.reply(data, addr))
async def reply(self, data, addr):
await asyncio.sleep(1)
self.transport.sendto(data, addr)

Resources