I'm creating a python3 tornado web server that may listen to an MQTT broker and whenever listens a new message from it, broadcasts it to the connected browsers, through web sockets. However, seems that Tornado doesn't like calls to its API from a thread different to IOLoop.current() and I can't figure out another solution...
I've already tried to write some code. I've put the whole MQTT client (in this case called PMCU client), on a separated thread which loops and listens to MQTT notifications.
def on_pmcu_data(data):
for websocket_client in websocket_clients:
print("Sending websocket message")
websocket_client.write_message(data) # Here it stuck!
print("Sent")
class WebSocketHandler(tornado.websocket.WebSocketHandler):
def open(self):
websocket_clients.append(self)
def on_close(self):
websocket_clients.remove(self)
def make_app():
return tornado.web.Application([
(r'/ws', WebSocketHandler)
])
if __name__ == "__main__":
main_loop = IOLoop().current()
pmcu_client = PMCUClient(on_pmcu_data)
threading.Thread(target=lambda: pmcu_client.listen("5.4.3.2")).start()
app = make_app()
app.listen(8080)
main_loop.start()
However as I said, seems that calls to Tornado API outside the IOLoop.current() blocks: the code above only prints Sending websocket message.
My intent is to run websocket_client.write_message(data) on IOLoop.current() event loop. But seems that the function IOLoop.current().spawn_callback(lambda: websocket_client.write_message(data)) not works after IOLoop.current() has started. How could I achieve that?
I know that I have a huge misunderstanding of IOLoop, asyncio, on which it depends, and python3 async.
on_pmcu_data is being called in a separate thread but the websocket is controlled by Tornado's event loop. You can't write to a websocket from a thread unless you have access to the event loop.
You'll need to ask the IOLoop to write the data to websockets.
Solution 1:
For simple cases, if you don't want to change much in the code, you can do this:
if __name__ == "__main__":
main_loop = IOLoop().current()
on_pmcu_data_callback = lambda data: main_loop.add_callback(on_pmcu_data, data)
pmcu_client = PMCUClient(on_pmcu_data_callback)
...
This should solve your problem.
Solution 2:
For more elaborate cases, you can pass the main_loop to PMCUClient class and then use add_callback (or spawn_callback) to run on_pmcu_data.
Example:
if __name__ == "__main__":
main_loop = IOLoop().current()
pmcu_client = PMCUClient(on_pmcu_data, main_loop) # also pass the main loop
...
Then in PMCUCLient class:
class PMCUClient:
def __init__(self, on_pmcu_data, main_loop):
...
self.main_loop = main_loop
def lister(...):
...
self.main_loop.add_callback(self.on_pmcu_data, data)
Related
I'm trying to implement an async RPC client within a Flask server.
The idea is that each request spawn a thread with an uuid, and each request is going to wait until there is a response in the RpcClient queue attribute object with the correct uuid.
The problem is that one request out of two fails. I think that might be a problem with multi-threading, but I don't see where it comes from.
Bug can be seen here.
Using debug print, it seems that the message with the correct uuid is received in the _on_response callback and update the queue attribute in this instance correctly, but the queue attribute within the /rpc_call/<payload> endpoint doesn't synchronize (so queue[uuid] has a value of response in the RpcClient callback but still None in the scope of the endpoint).
My code:
from flask import Flask, jsonif
from gevent.pywsgi import WSGIServer
import sys
import os
import pika
import uuid
import time
import threading
class RpcClient(object):
"""Asynchronous Rpc client."""
internal_lock = threading.Lock()
queue = {}
def __init__(self):
self.connection = pika.BlockingConnection(
pika.ConnectionParameters(host='rabbitmq'))
self.channel = self.connection.channel()
self.channel.basic_qos(prefetch_count=1)
self.channel.exchange_declare(exchange='kaldi_expe', exchange_type='topic')
# Create all the queue and bind them to the corresponding routing key
self.channel.queue_declare('request', durable=True)
result = self.channel.queue_declare('answer', durable=True)
self.channel.queue_bind(exchange='kaldi_expe', queue='request', routing_key='kaldi_expe.web.request')
self.channel.queue_bind(exchange='kaldi_expe', queue='answer', routing_key='kaldi_expe.kaldi.answer')
self.callback_queue = result.method.queue
.
thread = threading.Thread(target=self._process_data_events)
thread.setDaemon(True)
thread.start()
def _process_data_events(self):
self.channel.basic_consume(self.callback_queue, self._on_response, auto_ack=True)
while True:
with self.internal_lock:
self.connection.process_data_events()
time.sleep(0.1)
def _on_response(self, ch, method, props, body):
"""On response we simply store the result in a local dictionary."""
self.queue[props.correlation_id] = body
def send_request(self, payload):
corr_id = str(uuid.uuid4())
self.queue[corr_id] = None
with self.internal_lock:
self.channel.basic_publish(exchange='kaldi_expe',
routing_key="kaldi_expe.web.request",
properties=pika.BasicProperties(
reply_to=self.callback_queue,
correlation_id=corr_id,
),
body=payload)
return corr_id
def flask_app():
app = Flask("kaldi")
#app.route('/', methods=['GET'])
def server_is_up():
return 'server is up', 200
#app.route('/rpc_call/<payload>')
def rpc_call(payload):
"""Simple Flask implementation for making asynchronous Rpc calls. """
corr_id = app.config['RPCclient'].send_request(payload)
while app.config['RPCclient'].queue[corr_id] is None:
#print("queue server: " + str(app.config['RPCclient'].queue))
time.sleep(0.1)
return app.config['RPCclient'].queue[corr_id]
if __name__ == '__main__':
while True:
try:
rpcClient = RpcClient()
app = flask_app()
app.config['RPCclient'] = rpcClient
print("Rabbit MQ is connected, starting server", file=sys.stderr)
app.run(debug=True, threaded=True, host='0.0.0.0')
except pika.exceptions.AMQPConnectionError as e:
print("Waiting for RabbitMq startup" + str(e), file=sys.stderr)
time.sleep(1)
except Exception as e:
worker.log.error(e)
exit(e)
I found where the bug came from:
Thedebug=True of the line app.run(debug=True, threaded=True, host='0.0.0.0') restart the server at the beginning.
The whole script is then restarted from the beginning. Because of it, another rpcClient is initialized and consume from the same queue. Problem is that the previous thread is also running. This cause two rpcClient to consume from the same thread, with one that is virtually useless.
I build two different Tornado/Python3 API services.
Service1 opens several different threads and semi-parallel sends a GET request to Service2.
However, Service2 (Tornado/Python3) then proceeds to sequentially process the GET request.
Code example of Service2:
class MainHandler(tornado.web.RequestHandler):
def get(self):
self.write("Some Microservice v1")
def make_app():
return tornado.web.Application([
(r"/v1", MainHandler),
(r"/v1/addfile", AddHandler, dict(filepaths = filepaths)),
(r"/v1/getfiles", GetHandler, dict(filepaths = filepaths)),
(r"/v1/getfile", GetFileHandler, dict(filepaths = filepaths)),
])
if __name__ == "__main__":
app = make_app()
app.listen(8887, address='127.0.0.1')
tornado.ioloop.IOLoop.current().start()
I have tried to manually fork processes, to achieve better performance, but this did not work.
if __name__ == "__main__":
#app = make_app()
#app.listen(8887, address='127.0.0.1')
#tornado.ioloop.IOLoop.current().start()
sockets = tornado.netutil.bind_sockets(8887)
tornado.process.fork_processes(8)
server = tornado.httpserver.HTTPServer(app)
server.add_sockets(sockets)
tornado.ioloop.IOLoop.instance().start()
What am I missing here? Do I actually need to go into each Handler class and define coroutines and callbacks?
Greatful for help,
Thank you.
Working as intended. My Service1 was using up all my cores on the test environment, that is why there were no cores left for Service2.
I have this part of code which is doing psubscribe to redis. I want to run this part of code in a thread an working in the background while the other part of code will check some notifications from this below.
def psubscribe(context, param1, param2, param3):
context.test_config = load_config()
RedisConnector(context.test_config["redis_host"],
context.test_config["redis_db_index"])
redis_notification_subscriber_connector = RedisConnector(context.test_config["notification__redis_host"],
int(param3),
int(context.test_config[
"notification_redis_port"]))
context.redis_connectors = redis_notification_connector.psubscribe_to_redis_event(param1,
timeout_seconds=int(
param2)
)
what I have done till now: but its not running :(
context.t = threading.Thread(target=psubscribe, args=['param1', 'param2', 'param3'])
context.t.start()
It is actually working. I think you didn't need actually to pass context variable to your psubscribe function.
Here is an example:
Start http server that listens on port 8000 as a background thread
Send http requests to it and validate response
Feature scenario:
Scenario: Run background process and validate responses
Given Start background process
Then Validate outputs
background_steps.py file:
import threading
import logging
from behave import *
from features.steps.utils import run_server
import requests
#given("Start background process")
def step_impl(context):
context.t = threading.Thread(target=run_server, args=[8000])
context.t.daemon = True
context.t.start()
#then("Validate outputs")
def step_impl(context):
response = requests.get('http://127.0.0.1:8000')
assert response.status_code == 501
utils.py file
from http.server import HTTPServer, BaseHTTPRequestHandler
def run_server(port, server_class=HTTPServer, handler_class=BaseHTTPRequestHandler):
server_address = ('', port)
httpd = server_class(server_address, handler_class)
httpd.serve_forever()
In my project I try to start a REST API (built with FastAPI and run with Hypercorn), additional I want on startup also to start a RabbitMQ Consumer (with aio_pika):
Aio Pika offers a robust connection which automatically reconnects on failure. If I run the code below with hypercorn app:app the consumer and the rest interface starts correctly, but the reconnect from aio_pika does not work anymore. How can I archive a production stable RabbitMQ Consumer and RestAPI in two different processes (or threads?). My python version is 3.7, please note I am actually a Java and Go developer in case my approach is not the Python way :-)
#app.on_event("startup")
def startup():
loop = asyncio.new_event_loop()
asyncio.ensure_future(main(loop))
#app.get("/")
def read_root():
return {"Hello": "World"}
async def main(loop):
connection = await aio_pika.connect_robust(
"amqp://guest:guest#127.0.0.1/", loop=loop
)
async with connection:
queue_name = "test_queue"
# Creating channel
channel = await connection.channel() # type: aio_pika.Channel
# Declaring queue
queue = await channel.declare_queue(
queue_name,
auto_delete=True
) # type: aio_pika.Queue
async with queue.iterator() as queue_iter:
# Cancel consuming after __aexit__
async for message in queue_iter:
async with message.process():
print(message.body)
if queue.name in message.body.decode():
break
With the help of #pgjones I managed changed the consuming start to:
#app.on_event("startup")
def startup():
loop = asyncio.get_event_loop()
asyncio.ensure_future(main(loop))
And start the job with asyncio.ensure_future and pass the current event loop as an argument, which solved the issue.
Would be interesting if somebody has a different/better approach
Thanks!
I am writing a web socket server in python. I have tried the approach below with txws, autobahn, and tornado, all with similar results.
I seem to have massive memory consumption with secure websockets and I cannot figure out where or why this might be happening. Below is an example in tornado, but I can provide examples in autobahn or txws.
import tornado.httpserver
import tornado.websocket
import tornado.ioloop
import tornado.web
import json
class AuthHandler(tornado.websocket.WebSocketHandler):
def open(self):
print 'new connection for auth'
def on_message(self, message):
message = json.loads(message)
client_id = message['client_id']
if client_id not in app.clients:
app.clients[client_id] = self
self.write_message('Agent Recorded')
def on_close(self):
print 'auth connection closed'
class MsgHandler(tornado.websocket.WebSocketHandler):
def open(self):
print 'new connection for msg'
def on_message(self, message):
message = json.loads(message)
to_client = message['client_id']
if to_client in app.clients:
app.clients[to_client].write_message('You got a message')
def on_close(self):
print 'msg connection closed'
app = tornado.web.Application([
(r'/auth', AuthHandler),
(r'/msg', MsgHandler)
])
app.clients = {}
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(app, ssl_options={
'certfile': 'tests/keys/server.crt',
'keyfile': 'tests/keys/server.key'
})
http_server.listen(8000)
tornado.ioloop.IOLoop.instance().start()
After making around 10,000 connections I find I am using around 700MB of memory with SSL compared to 43MB without, and I never get it back unless I kill the process. It seems like the problem is closely tied to the amount of connections made rather than messages sent.
The consumption seems to happen independent of the client (I wrote my own client and tried other clients).
Are secure websockets really that much more memory intensive that plain websockets? Or is my server code not implementing it correctly?
I think the best solution is to use a real webserver (nginx apache) as a proxy and let it manage the ssl layer.