tornado server is incompatible with threading module - multithreading

I'm using tornado with threads.
In short, each time the websocket handler receives a requests, it start to execute a task, which might take a few minutes.
However, once a client is connected, no other client can be connected, until the first one disconnects.
Any ideas?
I've attached a minimal example that uses time.sleep to simulate long running tasks.
import tornado.web
import tornado.websocket
import tornado.httpserver
import tornado.ioloop
import time
import json
import threading
class TaskHandler(tornado.websocket.WebSocketHandler):
def open(self):
pass
def check_origin(self, origin):
return True
def on_message(self, message):
try:
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
threads = []
for target in [worker_A, worker_B]:
t = threading.Thread(target = target, args = ({'xxx': 'yyy'}, ))
t.daemon = True
t.start()
threads.append(t)
for t in threads:
t.join()
except Exception, e:
print 'TaskHandler: exception: ', e
pass
self.write_message(json.dumps({'status': 'done'}))
def on_close(self):
pass
class Server(tornado.web.Application):
def __init__(self):
handlers = [
('/task', TaskHandler),
]
tornado.web.Application.__init__(self, handlers)
if __name__ == '__main__':
server = tornado.httpserver.HTTPServer(Server())
server.listen(8765, address = '127.0.0.1')
tornado.ioloop.IOLoop.instance().start()

You block the whole Tornado event loop for 100 seconds in t.join. Unless you have a yield statement or schedule a callback and exit a function, then your function is not asynchronous. Notice how your function "on_message" begins two threads and then calls t.join on each -- how can Tornado's event loop accomplish any other work while your function is waiting for t.join?
Instead, use a ThreadPoolExecutor something like this:
thread_pool = ThreadPoolExecutor(4)
class TaskHandler(tornado.websocket.WebSocketHandler):
# Make this an asynchronous coroutine
#gen.coroutine
def on_message_coroutine(self, message):
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
futures = []
for target in [worker_A, worker_B]:
f = thread_pool.submit(target, {'xxx': 'yyy'})
futures.append(future)
# Now the event loop can do other things
yield futures
def on_message(self, message):
IOLoop.current().spawn_callback(self.on_message_coroutine,
message)

Related

Multiprocessing pool executing synchronous

I need an asynchronous parent process to handover function calls to a process pool.
The imports are to time consuming to spawn a new worker/process every time. So I thought to put tasks in an asyncio.queue and have a consumer listen to it and hand them off to the workers. (Sort of like how Gunicorn works but I don't want to run a webserver in order to make the calls).
However the function call seems to only be executed if I call res.get() on the response of pool.apply_async() but then it just runs as if I would call a normal synchronous for-loop.
This is my code:
#!/usr/bin/env python
import os
import time
import multiprocessing as mp
import asyncio
def f(x: list) -> int:
print(f'the pid of this process is: {os.getpid()}')
time.sleep(1)
return len(x)
def callback_func(x):
print(f'this is the callback function')
print(x)
async def consumer(queue):
with mp.Pool(processes=4) as pool:
while True:
x = await queue.get()
if x == 'stop':
break
# this makes it seem to run synchronous:
res = pool.apply_async(f, (x,))
print(res.get(), x, os.getpid())
# if I run this instead, both f() and callback_func
# are not executed.
#res = pool.apply_async(f, (x,), callback_func)
#print(x, os.getpid())
queue.task_done()
print(f'consumed')
async def producer(queue):
for i in range(20):
await queue.put([i,i+1,i+2])
# await asyncio.sleep(0.5)
await queue.put('stop')
async def main():
queue = asyncio.Queue()
input_coroutines = [consumer(queue), producer(queue)]
for f in asyncio.as_completed(input_coroutines):
try:
result = await f
print(result)
except Exception as e:
print('caught exception')
print(e)
if __name__ == "__main__":
asyncio.run(main())
What am I doing wrong?

Wait for message using python's async protocol

Into:
I am working in a TCP server that receives events over TCP. For this task, I decided to use asyncio Protocol libraries (yeah, maybe I should have used Streams), the reception of events works fine.
Problem:
I need to be able to connect to the clients, so I create another "server" used to look up all my connected clients, and after finding the correct one, I use the Protocol class transport object to send a message and try to grab the response by reading a buffer variable that always has the last received message.
My problem is, after sending the message, I don't know how to wait for the response, so I always get the previous message from the buffer.
I will try to simplify the code to illustrate (please, keep in mind that this is an example, not my real code):
import asyncio
import time
CONN = set()
class ServerProtocol(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
CONN.add(self)
def data_received(self, data):
self.buffer = data
# DO OTHER STUFF
print(data)
def connection_lost(self, exc=None):
CONN.remove(self)
class ConsoleProtocol(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
# Get first value just to ilustrate
self.client = next(iter(CONN))
def data_received(self, data):
# Forward the message to the client
self.client.transport.write(data)
# wait a fraction of a second
time.sleep(0.2)
# foward the response of the client
self.transport.write(self.client.buffer)
def main():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(
loop.create_server(protocol_factory=ServerProtocol,
host='0.0.0.0',
port=6789))
loop.run_until_complete(
loop.create_server(protocol_factory=ConsoleProtocol,
host='0.0.0.0',
port=9876))
try:
loop.run_forever()
except Exception as e:
print(e)
finally:
loop.close()
if __name__ == '__main__':
main()
This is not only my first experience writing a TCP server, but is also my first experience working with parallelism. So it took me days to realize that my sleep not only would not work, but I was locking the server while it "sleeps".
Any help is welcome.
time.sleep(0.2) is blocking, should not used in async programming, which will block the whole execution, if your program runing with 100 clients, the last client will be delayed for 0.2*99 seconds, which is not what you want.
the right way is trying to let program wait 0.2s but not blocking, then other concurrent clients would not be delayed,we can use thread.
import asyncio
import time
import threading
CONN = set()
class ServerProtocol(asyncio.Protocol):
def dealy_thread(self):
time.sleep(0.2)
def connection_made(self, transport):
self.transport = transport
CONN.add(self)
def data_received(self, data):
self.buffer = data
# DO OTHER STUFF
print(data)
def connection_lost(self, exc=None):
CONN.remove(self)
class ConsoleProtocol(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
# Get first value just to ilustrate
self.client = next(iter(CONN))
def data_received(self, data):
# Forward the message to the client
self.client.transport.write(data)
# wait a fraction of a second
thread = threading.Thread(target=self.delay_thread, args=())
thread.daemon = True
thread.start()
# foward the response of the client
self.transport.write(self.client.buffer)

Mocking REST APIs with Flask_restful using threading

I'm looking to mock a set of REST APIs for some tests. The following main() function works fine (i.e. it returns {"some-data": 1234} as json to the browser when I GET localhost:8099). The issue is it blocks the main thread:
from gevent import monkey, sleep, pywsgi
monkey.patch_all()
import flask
from flask_restful import reqparse, abort, Api, Resource
import queue
import sys
import threading
STUFFS = {"some-data": 1234}
class Stuff(Resource):
def get(self):
return flask.jsonify(STUFFS)
class ControlThread(threading.Thread):
def __init__(self, http_server, stop_event):
threading.Thread.__init__(self)
self.stop_event = stop_event
self.http_server = http_server
self.running = False
def run(self):
try:
while not self.stop_event.is_set():
if not self.running:
self.http_server.start()
self.running = True
sleep(0.001)
except (KeyboardInterrupt, SystemExit):
pass
self.http_server.stop()
class StuffMock:
def __init__(self, port, name=None):
if name is None:
name = __name__
self.app = flask.Flask(name)
self.api = Api(self.app)
self.api.add_resource(Stuff, "/stuff/")
self.stop_event = threading.Event()
self.http_server = pywsgi.WSGIServer(('', port), self.app)
self.serving_thread = ControlThread(self.http_server,
self.stop_event)
self.serving_thread.daemon = True
def start(self):
self.serving_thread.start()
def stop(self):
self.stop_event.set()
self.serving_thread.join()
def main():
mocker = StuffMock(8099)
mocker.start()
try:
while True:
sleep(0.01)
except (KeyboardInterrupt, SystemExit):
mocker.stop()
sys.exit()
if __name__ == "__main__":
main()
Without the sleep() call in the while loop above, nothing resolves. Here is a more succinct usage to demonstrate:
import time
from stuff_mock import StuffMock
mocker = StuffMock(8099)
mocker.start()
while True:
user_text = input("let's do some work on the main thread: ")
# will only resolve the GET request after user input
# (i.e. when the main thread executes this sleep call)
time.sleep(0.1)
if user_text == "q":
break
mocker.stop()
The gevent threading module seems to work differently from the core one. Does anyone have any tips or ideas about what's going on under the hood?
Found that if I switch out threading for multiprocessing (and threading.Thread for multiprocessing.Process), everything works as expected, and I can spin up arbitrary numbers of mockers without blocking.

Python asyncio - Loop exits with Task was destroyed but it is pending

This is the relevant code of my python program:
import discord
import asyncio
class Bot(discord.Client):
def __init__(self):
super().__init__()
#asyncio.coroutine
def my_background_task(self):
yield from self.wait_until_ready()
while not self.is_closed:
yield from asyncio.sleep(3600*24) # <- This is line 76 where it fails
doSomething()
bot = Bot()
loop = asyncio.get_event_loop()
try:
loop.create_task(bot.my_background_task())
loop.run_until_complete(bot.login('username', 'password'))
loop.run_until_complete(bot.connect())
except Exception:
loop.run_until_complete(bot.close())
finally:
loop.close()
The program occasionally quits (on its own, while it should not) with no other errors or warning other than
Task was destroyed but it is pending!
task: <Task pending coro=<my_background_task() running at bin/discordBot.py:76> wait_for=<Future pending cb=[Task._wakeup()]>>
How to ensure the program won't randomly quit? I have Python 3.4.3+ on Xubuntu 15.10.
This is because the discord client module needs control once every minute or so.
This means that any function that steals control for more than a certain time causes discord's client to enter an invalid state (which will manifest itself as an exception some point later, perhaps upon next method call of client).
To ensure that the discord module client can ping the discord server, you should use a true multi-threading solution.
One solution is to offload all heavy processing onto a separate process (a separate thread will not do, because Python has a global interpreter lock) and use the discord bot as a thin layer whose responsibility is to populate work queues.
Related reading:
https://discordpy.readthedocs.io/en/latest/faq.html#what-does-blocking-mean
Example solution... this is WAY beyond the scope of the problem, but I already had the code mostly written. If I had more time, I would write a shorter solution :)
2 parts, discord interaction and processing server:
This is the discord listener.
import discord
import re
import asyncio
import traceback
import websockets
import json
# Call a function on other server
async def call(methodName, *args, **kwargs):
async with websockets.connect('ws://localhost:9001/meow') as websocket:
payload = json.dumps( {"method":methodName, "args":args, "kwargs": kwargs})
await websocket.send(payload)
#...
resp = await websocket.recv()
#...
return resp
client = discord.Client()
tok = open("token.dat").read()
#client.event
async def on_ready():
print('Logged in as')
print(client.user.name)
print(client.user.id)
print('------')
#client.event
async def on_error(event, *args, **kwargs):
print("Error?")
#client.event
async def on_message(message):
try:
if message.author.id == client.user.id:
return
m = re.match("(\w+) for (\d+).*?", message.content)
if m:
g = m.groups(1)
methodName = g[0]
someNumber = int(g[1])
response = await call(methodName, someNumber)
if response:
await client.send_message(message.channel, response[0:2000])
except Exception as e:
print (e)
print (traceback.format_exc())
client.run(tok)
This is the worker server for processing heavy requests. You can make this part sync or async.
I chose to use some magic called a websocket to send data from one python process to another one. But you can use anything you want. You could make one script write files into a dir, and the other script could read the files out and process them, for example.
import tornado
import tornado.websocket
import tornado.httpserver
import json
import asyncio
import inspect
import time
class Handler:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def consume(self, text):
return "You said {0} and I say hiya".format(text)
async def sweeps(self, len):
await asyncio.sleep(len)
return "Slept for {0} seconds asynchronously!".format(len)
def sleeps(self, len):
time.sleep(len)
return "Slept for {0} seconds synchronously!".format(len)
class MyService(Handler, tornado.websocket.WebSocketHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def stop(self):
Handler.server.stop()
def open(self):
print("WebSocket opened")
def on_message(self, message):
print (message)
j = json.loads(message)
methodName = j["method"]
args = j.get("args", ())
method = getattr(self, methodName)
if inspect.iscoroutinefunction(method):
loop = asyncio.get_event_loop()
task = loop.create_task(method(*args))
task.add_done_callback( lambda res: self.write_message(res.result()))
future = asyncio.ensure_future(task)
elif method:
resp = method(*args)
self.write_message(resp)
def on_close(self):
print("WebSocket closed")
application = tornado.web.Application([
(r'/meow', MyService),
])
if __name__ == "__main__":
from tornado.platform.asyncio import AsyncIOMainLoop
AsyncIOMainLoop().install()
http_server = tornado.httpserver.HTTPServer(application)
Handler.server = http_server
http_server.listen(9001)
asyncio.get_event_loop().run_forever()
Now, if you run both processes in separate python scripts, and tell your bot "sleep for 100", it will sleep for 100 seconds happily!
The asyncio stuff functions as a make-shift work queue, and you can properly separate the listener from the backend processing by running them as separate python scripts.
Now, no matter how long your functions run in the 'server' part, the client part will never be prevented from pinging the discord server.
Image failed to upload, but... anyway, this is how to tell the bot to sleep and reply... note that the sleep is synchronous.
http://i.imgur.com/N4ZPPbB.png
I don't think problem happens while asyncio.sleep. Anyway you shouldn't suppress exception you got:
bot = Bot()
loop = asyncio.get_event_loop()
try:
# ...
except Exception as e:
loop.run_until_complete(bot.close())
raise e # <--- reraise exception you got while execution to see it (or log it here)
finally:
# ...
You have to manually stop your task on exit:
import discord
import asyncio
class Bot(discord.Client):
def __init__(self):
super().__init__()
#asyncio.coroutine
def my_background_task(self):
yield from self.wait_until_ready()
while not self.is_closed:
yield from asyncio.sleep(3600*24) # <- This is line 76 where it fails
doSomething()
bot = Bot()
loop = asyncio.get_event_loop()
try:
task = loop.create_task(bot.my_background_task())
loop.run_until_complete(bot.login('username', 'password'))
loop.run_until_complete(bot.connect())
except Exception:
loop.run_until_complete(bot.close())
finally:
task.cancel()
try:
loop.run_until_complete(task)
except Exception:
pass
loop.close()

How to use aiopg pool in multi-threaded application?

I have a python 3.4.3, postgreSQL 9.4, aiopg-0.7.0. An example of multi-threaded applications, was taken from this site. How to use the pool? The thread hangs when the operation of the select.
import time
import asyncio
import aiopg
import functools
from threading import Thread, current_thread, Event
from concurrent.futures import Future
class B(Thread):
def __init__(self, start_event):
Thread.__init__(self)
self.loop = None
self.tid = None
self.event = start_event
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.tid = current_thread()
self.loop.call_soon(self.event.set)
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def add_task(self, coro):
"""this method should return a task object, that I
can cancel, not a handle"""
def _async_add(func, fut):
try:
ret = func()
fut.set_result(ret)
except Exception as e:
fut.set_exception(e)
f = functools.partial(asyncio.async, coro, loop=self.loop)
if current_thread() == self.tid:
return f() # We can call directly if we're not going between threads.
else:
# We're in a non-event loop thread so we use a Future
# to get the task from the event loop thread once
# it's ready.
fut = Future()
self.loop.call_soon_threadsafe(_async_add, f, fut)
return fut.result()
def cancel_task(self, task):
self.loop.call_soon_threadsafe(task.cancel)
#asyncio.coroutine
def test(pool, name_task):
while True:
print(name_task, 'running')
with (yield from pool.cursor()) as cur:
print(name_task, " select. ")
yield from cur.execute("SELECT count(*) FROM test")
count = yield from cur.fetchone()
print(name_task, ' Result: ', count)
yield from asyncio.sleep(3)
#asyncio.coroutine
def connect_db():
dsn = 'dbname=%s user=%s password=%s host=%s' % ('testdb', 'user', 'passw', '127.0.0.1')
pool = yield from aiopg.create_pool(dsn)
print('create pool type =', type(pool))
# future.set_result(pool)
return (pool)
event = Event()
b = B(event)
b.start()
event.wait() # Let the loop's thread signal us, rather than sleeping
loop_db = asyncio.get_event_loop()
pool = loop_db.run_until_complete(connect_db())
time.sleep(2)
t = b.add_task(test(pool, 'Task1')) # This is a real task
t = b.add_task(test(pool, 'Task2'))
while True:
time.sleep(10)
b.stop()
Not return result in 'yield from cur.execute("SELECT count(*) FROM test")'
Long story short: you cannot share aiopg pool object from different event loops.
Every aiopg.Pool is coupled to event loop. If you don't specify loop parameter explicitly it is taken from asyncio.get_event_loop() call.
So it your example you have a pool coupled to event loop from main thread.
When you execute db query from separate thread you trying to accomplish it by executing thread's loop, not the main one. It doesn't work.

Resources