Python secure websocket memory consumption - memory-leaks

I am writing a web socket server in python. I have tried the approach below with txws, autobahn, and tornado, all with similar results.
I seem to have massive memory consumption with secure websockets and I cannot figure out where or why this might be happening. Below is an example in tornado, but I can provide examples in autobahn or txws.
import tornado.httpserver
import tornado.websocket
import tornado.ioloop
import tornado.web
import json
class AuthHandler(tornado.websocket.WebSocketHandler):
def open(self):
print 'new connection for auth'
def on_message(self, message):
message = json.loads(message)
client_id = message['client_id']
if client_id not in app.clients:
app.clients[client_id] = self
self.write_message('Agent Recorded')
def on_close(self):
print 'auth connection closed'
class MsgHandler(tornado.websocket.WebSocketHandler):
def open(self):
print 'new connection for msg'
def on_message(self, message):
message = json.loads(message)
to_client = message['client_id']
if to_client in app.clients:
app.clients[to_client].write_message('You got a message')
def on_close(self):
print 'msg connection closed'
app = tornado.web.Application([
(r'/auth', AuthHandler),
(r'/msg', MsgHandler)
])
app.clients = {}
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(app, ssl_options={
'certfile': 'tests/keys/server.crt',
'keyfile': 'tests/keys/server.key'
})
http_server.listen(8000)
tornado.ioloop.IOLoop.instance().start()
After making around 10,000 connections I find I am using around 700MB of memory with SSL compared to 43MB without, and I never get it back unless I kill the process. It seems like the problem is closely tied to the amount of connections made rather than messages sent.
The consumption seems to happen independent of the client (I wrote my own client and tried other clients).
Are secure websockets really that much more memory intensive that plain websockets? Or is my server code not implementing it correctly?

I think the best solution is to use a real webserver (nginx apache) as a proxy and let it manage the ssl layer.

Related

How to send ros2 messages from a websocket server to connected clients in tornado

I have a ros2 publisher script that sends custom messages from ros2 nodes. What I need to do is to have a subscriber (which is also my websocket server) to listen to the message that the pulisher sends then convert it to a dictionary and send it as a json from the websocket server to a connected websocket client. I have already checked the rosbridge repo but I could not make it work. It doesn't have enough documentation and I am new to ros.
I need something like this:
import rclpy
import sys
from rclpy.node import Node
import tornado.ioloop
import tornado.httpserver
import tornado.web
import threading
from custom.msg import CustomMsg
from .convert import message_to_ordereddict
wss = []
class wsHandler(tornado.websocket.WebSocketHandler):
def open(self):
print 'Online'
if self not in wss:
wss.append(self)
def on_close(self):
print 'Offline'
if self in wss:
wss.remove(self)
def wsSend(message):
for ws in wss:
ws.write_message(message)
class MinimalSubscriber(Node):
def __init__(self):
super().__init__('minimal_subscriber')
self.subscription = self.create_subscription(CustomMsg, 'topic', self.CustomMsg_callback, 10)
self.subscription # prevent unused variable warning
def CustomMsg_callback(self, msg):
ws_message = message_to_ordereddict(msg)
wsSend(ws_message)
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(tornado.web.Application(wsHandler))
http_server.listen(8888)
main_loop = tornado.ioloop.IOLoop.instance()
# Start main loop
main_loop.start()
so the callback function in MinimalSubscriber class, receives the ros message, converts it to dictionary and sends it to websocket client. I am a bit confused how to make these two threads (ros and websocket) to communicate with each other.
So I think I got a bit confused myself going through the threading. So I changed my approach and made it work using the tornado periodic callback and the spin_once function of rclpy as the callback function. I would post my solution as it might help some people who has the same issue.
import queue
import rclpy
from rclpy.node import Node
import tornado.ioloop
import tornado.httpserver
import tornado.web
from custom.msg import CustomMsg
from .convert import message_to_ordereddict
wss = []
class wsHandler(tornado.websocket.WebSocketHandler):
#classmethod
def route_urls(cls):
return [(r'/',cls, {}),]
def open(self):
print 'Online'
if self not in wss:
wss.append(self)
def on_close(self):
print 'Offline'
if self in wss:
wss.remove(self)
def make_app():
myWebHandler = wsHandler.route_urls()
return tornado.web.Application(myWebHandler)
message_queue = queue.Queue
class MinimalSubscriber(Node):
def __init__(self):
super().__init__('minimal_subscriber')
self.subscription = self.create_subscription(CustomMsg, 'topic', self.CustomMsg_callback, 10)
self.subscription # prevent unused variable warning
def CustomMsg_callback(self, msg):
msg_dict = message_to_ordereddict(msg)
msg_queue.put(msg_dict)
if __name__ == "__main__":
rclpy.init(args=args)
minimal_subscriber = MinimalSubscriber()
def send_ros_to_clients():
rclpy.spin_once(minimal_subscriber)
my_msg = msg_queue.get()
for client in ws_clients:
client.write_message(my_msg)
app = make_app()
server = tornado.httpserver.HTTPServer(app)
server.listen(8888)
tornado.ioloop.PeriodicCallback(send_ros_to_clients, 1).start()
tornado.ioloop.IOLoop.current().start()
minimal_subscriber.destroy_node()
rclpy.shutdown()
I also implemented the wsSend function into the send_ros_to_clients function. Some might say that using a global queue is not the best practice but I could not come up with another solution. I would appreciate any suggestions or corrections on my solution.

Flask server using asynchronous Rpc client only answer 1 request out of two

I'm trying to implement an async RPC client within a Flask server.
The idea is that each request spawn a thread with an uuid, and each request is going to wait until there is a response in the RpcClient queue attribute object with the correct uuid.
The problem is that one request out of two fails. I think that might be a problem with multi-threading, but I don't see where it comes from.
Bug can be seen here.
Using debug print, it seems that the message with the correct uuid is received in the _on_response callback and update the queue attribute in this instance correctly, but the queue attribute within the /rpc_call/<payload> endpoint doesn't synchronize (so queue[uuid] has a value of response in the RpcClient callback but still None in the scope of the endpoint).
My code:
from flask import Flask, jsonif
from gevent.pywsgi import WSGIServer
import sys
import os
import pika
import uuid
import time
import threading
class RpcClient(object):
"""Asynchronous Rpc client."""
internal_lock = threading.Lock()
queue = {}
def __init__(self):
self.connection = pika.BlockingConnection(
pika.ConnectionParameters(host='rabbitmq'))
self.channel = self.connection.channel()
self.channel.basic_qos(prefetch_count=1)
self.channel.exchange_declare(exchange='kaldi_expe', exchange_type='topic')
# Create all the queue and bind them to the corresponding routing key
self.channel.queue_declare('request', durable=True)
result = self.channel.queue_declare('answer', durable=True)
self.channel.queue_bind(exchange='kaldi_expe', queue='request', routing_key='kaldi_expe.web.request')
self.channel.queue_bind(exchange='kaldi_expe', queue='answer', routing_key='kaldi_expe.kaldi.answer')
self.callback_queue = result.method.queue
.
thread = threading.Thread(target=self._process_data_events)
thread.setDaemon(True)
thread.start()
def _process_data_events(self):
self.channel.basic_consume(self.callback_queue, self._on_response, auto_ack=True)
while True:
with self.internal_lock:
self.connection.process_data_events()
time.sleep(0.1)
def _on_response(self, ch, method, props, body):
"""On response we simply store the result in a local dictionary."""
self.queue[props.correlation_id] = body
def send_request(self, payload):
corr_id = str(uuid.uuid4())
self.queue[corr_id] = None
with self.internal_lock:
self.channel.basic_publish(exchange='kaldi_expe',
routing_key="kaldi_expe.web.request",
properties=pika.BasicProperties(
reply_to=self.callback_queue,
correlation_id=corr_id,
),
body=payload)
return corr_id
def flask_app():
app = Flask("kaldi")
#app.route('/', methods=['GET'])
def server_is_up():
return 'server is up', 200
#app.route('/rpc_call/<payload>')
def rpc_call(payload):
"""Simple Flask implementation for making asynchronous Rpc calls. """
corr_id = app.config['RPCclient'].send_request(payload)
while app.config['RPCclient'].queue[corr_id] is None:
#print("queue server: " + str(app.config['RPCclient'].queue))
time.sleep(0.1)
return app.config['RPCclient'].queue[corr_id]
if __name__ == '__main__':
while True:
try:
rpcClient = RpcClient()
app = flask_app()
app.config['RPCclient'] = rpcClient
print("Rabbit MQ is connected, starting server", file=sys.stderr)
app.run(debug=True, threaded=True, host='0.0.0.0')
except pika.exceptions.AMQPConnectionError as e:
print("Waiting for RabbitMq startup" + str(e), file=sys.stderr)
time.sleep(1)
except Exception as e:
worker.log.error(e)
exit(e)
I found where the bug came from:
Thedebug=True of the line app.run(debug=True, threaded=True, host='0.0.0.0') restart the server at the beginning.
The whole script is then restarted from the beginning. Because of it, another rpcClient is initialized and consume from the same queue. Problem is that the previous thread is also running. This cause two rpcClient to consume from the same thread, with one that is virtually useless.

Run actions on Tornado main loop, after it starts

I'm creating a python3 tornado web server that may listen to an MQTT broker and whenever listens a new message from it, broadcasts it to the connected browsers, through web sockets. However, seems that Tornado doesn't like calls to its API from a thread different to IOLoop.current() and I can't figure out another solution...
I've already tried to write some code. I've put the whole MQTT client (in this case called PMCU client), on a separated thread which loops and listens to MQTT notifications.
def on_pmcu_data(data):
for websocket_client in websocket_clients:
print("Sending websocket message")
websocket_client.write_message(data) # Here it stuck!
print("Sent")
class WebSocketHandler(tornado.websocket.WebSocketHandler):
def open(self):
websocket_clients.append(self)
def on_close(self):
websocket_clients.remove(self)
def make_app():
return tornado.web.Application([
(r'/ws', WebSocketHandler)
])
if __name__ == "__main__":
main_loop = IOLoop().current()
pmcu_client = PMCUClient(on_pmcu_data)
threading.Thread(target=lambda: pmcu_client.listen("5.4.3.2")).start()
app = make_app()
app.listen(8080)
main_loop.start()
However as I said, seems that calls to Tornado API outside the IOLoop.current() blocks: the code above only prints Sending websocket message.
My intent is to run websocket_client.write_message(data) on IOLoop.current() event loop. But seems that the function IOLoop.current().spawn_callback(lambda: websocket_client.write_message(data)) not works after IOLoop.current() has started. How could I achieve that?
I know that I have a huge misunderstanding of IOLoop, asyncio, on which it depends, and python3 async.
on_pmcu_data is being called in a separate thread but the websocket is controlled by Tornado's event loop. You can't write to a websocket from a thread unless you have access to the event loop.
You'll need to ask the IOLoop to write the data to websockets.
Solution 1:
For simple cases, if you don't want to change much in the code, you can do this:
if __name__ == "__main__":
main_loop = IOLoop().current()
on_pmcu_data_callback = lambda data: main_loop.add_callback(on_pmcu_data, data)
pmcu_client = PMCUClient(on_pmcu_data_callback)
...
This should solve your problem.
Solution 2:
For more elaborate cases, you can pass the main_loop to PMCUClient class and then use add_callback (or spawn_callback) to run on_pmcu_data.
Example:
if __name__ == "__main__":
main_loop = IOLoop().current()
pmcu_client = PMCUClient(on_pmcu_data, main_loop) # also pass the main loop
...
Then in PMCUCLient class:
class PMCUClient:
def __init__(self, on_pmcu_data, main_loop):
...
self.main_loop = main_loop
def lister(...):
...
self.main_loop.add_callback(self.on_pmcu_data, data)

Tornado Request with WebSocket Callback

I'm trying to start a long blocking function after receiving an HTTP request. The request must be responded inmediately (200 OK or 500 Internal Error), but the process should run in the background and send a notification to a WebSocket after finished.
Also, the application should receive other requests for processing and these must also be responded inmediately, without blocking the previous ones.
I'm using add_callback, but I'm not sure if it's the correct way to use tornado, since it's blocking the incoming HTTP requests. I've tried using different threads, but I got exceptions when trying to call the send_message method inside the WebSocket handler.
import time
from tornado import gen
from tornado.ioloop import IOLoop
from tornado.web import Application, RequestHandler, asynchronous
from tornado.websocket import WebSocketHandler
def long_process(id):
time.sleep(5)
class RequestWeb(RequestHandler):
#gen.coroutine
def process(self, id):
# Trying to call long_process, just like
# yield gen.Task(IOLoop.current().add_timeout, time.time() + 10)
# The response must be sent inmediately, but the process should run in the background
IOLoop.current().add_callback(callback=lambda: long_process(id))
#asynchronous
#gen.coroutine
def get(self, id):
IOLoop.current().add_future(self.process(id), self.process_complete)
self.write("OK")
def process_complete(self, future):
SocketHandler.send_message('Processing complete')
class SocketHandler(WebSocketHandler):
connections = set()
def open(self):
SocketHandler.connections.add(self)
#classmethod
def send_message(cls, message):
for ws in cls.connections:
ws.write_message(message)
def make_app():
return Application([
(r'/api/(?P<id>[a-zA-Z0-9]+)$', RequestWeb),
(r'/ws', SocketHandler)
])
if __name__ == "__main__":
app = make_app()
app.listen(8000)
IOLoop.current().start()

Jupyter for WebSocket communication

I'm working on an app which needs to have a WebSockets API, and will also integrate Jupyter (former IPython) notebooks as a relatively minor feature. Since Jupyter already uses WebSockets for communication, how difficult it would be to integrate it as a general library for serving other WebSockets APIs apart to its own? Or am I better off using another library such as aiohttp? I'm looking for any advice and hints abut the best practices for this. Thanks!
You can proxy WebSockets from your main application to Jupyter.
It really doesn't matter what technology you use to serve WebSockets, the proxy loop will be very similar (wait for message, push message forward). However, it will be web server dependent as Python does not have standard to WebSockets akin WSGI.
I did one in pyramid_notebook project. Running Jupyter in its own process is must as, at least by the time of writing the code, embedding Jupyter directly to your application was not feasible. I am not sure though if the latest version have changed this. Jupyter itself was using Tornado.
"""UWSGI websocket proxy."""
from urllib.parse import urlparse, urlunparse
import logging
import time
import uwsgi
from pyramid import httpexceptions
from ws4py import WS_VERSION
from ws4py.client import WebSocketBaseClient
#: HTTP headers we need to proxy to upstream websocket server when the Connect: upgrade is performed
CAPTURE_CONNECT_HEADERS = ["sec-websocket-extensions", "sec-websocket-key", "origin"]
logger = logging.getLogger(__name__)
class ProxyClient(WebSocketBaseClient):
"""Proxy between upstream WebSocket server and downstream UWSGI."""
#property
def handshake_headers(self):
"""
List of headers appropriate for the upgrade
handshake.
"""
headers = [
('Host', self.host),
('Connection', 'Upgrade'),
('Upgrade', 'WebSocket'),
('Sec-WebSocket-Key', self.key.decode('utf-8')),
# Origin is proxyed from the downstream server, don't set it twice
# ('Origin', self.url),
('Sec-WebSocket-Version', str(max(WS_VERSION)))
]
if self.protocols:
headers.append(('Sec-WebSocket-Protocol', ','.join(self.protocols)))
if self.extra_headers:
headers.extend(self.extra_headers)
logger.info("Handshake headers: %s", headers)
return headers
def received_message(self, m):
"""Push upstream messages to downstream."""
# TODO: No support for binary messages
m = str(m)
logger.debug("Incoming upstream WS: %s", m)
uwsgi.websocket_send(m)
logger.debug("Send ok")
def handshake_ok(self):
"""
Called when the upgrade handshake has completed
successfully.
Starts the client's thread.
"""
self.run()
def terminate(self):
super(ProxyClient, self).terminate()
def run(self):
"""Combine async uwsgi message loop with ws4py message loop.
TODO: This could do some serious optimizations and behave asynchronously correct instead of just sleep().
"""
self.sock.setblocking(False)
try:
while not self.terminated:
logger.debug("Doing nothing")
time.sleep(0.050)
logger.debug("Asking for downstream msg")
msg = uwsgi.websocket_recv_nb()
if msg:
logger.debug("Incoming downstream WS: %s", msg)
self.send(msg)
s = self.stream
self.opened()
logger.debug("Asking for upstream msg")
try:
bytes = self.sock.recv(self.reading_buffer_size)
if bytes:
self.process(bytes)
except BlockingIOError:
pass
except Exception as e:
logger.exception(e)
finally:
logger.info("Terminating WS proxy loop")
self.terminate()
def serve_websocket(request, port):
"""Start UWSGI websocket loop and proxy."""
env = request.environ
# Send HTTP response 101 Switch Protocol downstream
uwsgi.websocket_handshake(env['HTTP_SEC_WEBSOCKET_KEY'], env.get('HTTP_ORIGIN', ''))
# Map the websocket URL to the upstream localhost:4000x Notebook instance
parts = urlparse(request.url)
parts = parts._replace(scheme="ws", netloc="localhost:{}".format(port))
url = urlunparse(parts)
# Proxy initial connection headers
headers = [(header, value) for header, value in request.headers.items() if header.lower() in CAPTURE_CONNECT_HEADERS]
logger.info("Connecting to upstream websockets: %s, headers: %s", url, headers)
ws = ProxyClient(url, headers=headers)
ws.connect()
# TODO: Will complain loudly about already send headers - how to abort?
return httpexceptions.HTTPOk()

Resources