Multiprocessing pool executing synchronous - python-3.x

I need an asynchronous parent process to handover function calls to a process pool.
The imports are to time consuming to spawn a new worker/process every time. So I thought to put tasks in an asyncio.queue and have a consumer listen to it and hand them off to the workers. (Sort of like how Gunicorn works but I don't want to run a webserver in order to make the calls).
However the function call seems to only be executed if I call res.get() on the response of pool.apply_async() but then it just runs as if I would call a normal synchronous for-loop.
This is my code:
#!/usr/bin/env python
import os
import time
import multiprocessing as mp
import asyncio
def f(x: list) -> int:
print(f'the pid of this process is: {os.getpid()}')
time.sleep(1)
return len(x)
def callback_func(x):
print(f'this is the callback function')
print(x)
async def consumer(queue):
with mp.Pool(processes=4) as pool:
while True:
x = await queue.get()
if x == 'stop':
break
# this makes it seem to run synchronous:
res = pool.apply_async(f, (x,))
print(res.get(), x, os.getpid())
# if I run this instead, both f() and callback_func
# are not executed.
#res = pool.apply_async(f, (x,), callback_func)
#print(x, os.getpid())
queue.task_done()
print(f'consumed')
async def producer(queue):
for i in range(20):
await queue.put([i,i+1,i+2])
# await asyncio.sleep(0.5)
await queue.put('stop')
async def main():
queue = asyncio.Queue()
input_coroutines = [consumer(queue), producer(queue)]
for f in asyncio.as_completed(input_coroutines):
try:
result = await f
print(result)
except Exception as e:
print('caught exception')
print(e)
if __name__ == "__main__":
asyncio.run(main())
What am I doing wrong?

Related

Trying to understand asyncio with Python

I am trying to run some concurrent tasks with asyncio. Currently, i got the following example:
import asyncio
from time import sleep
from signal import SIGINT, SIGTERM, signal
async def f():
print("Got in F")
await asyncio.sleep(10)
print("Finished Sleep in F")
return "f"
async def g():
print("Got in G")
await asyncio.sleep(20)
print("Finished Sleep in G")
return "g"
async def count_timer():
for i in range(20):
print(i)
sleep(1)
async def main():
task_g = asyncio.create_task(g())
task_f = asyncio.create_task(f())
await task_g
await task_f
task_counter = asyncio.create_task(count_timer())
await task_counter
return
if __name__ == "__main__":
import time
s = time.perf_counter()
asyncio.run(main())
elapsed = time.perf_counter() - s
print(f"{__file__} executed in {elapsed:0.2f} seconds.")
What Im trying to do is to call the counter_timer function after the f and g functions were called, but still run all three concurrently.
Thank you in advance,
Lucas Delfino Nogueira.

How to correctly use async-await with thread pool in Python 3

I want to achieve same effect as
# Code 1
from multiprocessing.pool import ThreadPool as Pool
from time import sleep, time
def square(a):
print('start', a)
sleep(a)
print('end', a)
return a * a
def main():
p = Pool(2)
queue = list(range(4))
start = time()
results = p.map(square, queue)
print(results)
print(time() - start)
if __name__ == "__main__":
main()
with async functions like
# Code 2
from multiprocessing.pool import ThreadPool as Pool
from time import sleep, time
import asyncio
async def square(a):
print('start', a)
sleep(a) # await asyncio.sleep same effect
print('end', a)
return a * a
async def main():
p = Pool(2)
queue = list(range(4))
start = time()
results = p.map_async(square, queue)
results = results.get()
results = [await result for result in results]
print(results)
print(time() - start)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
Currently Code 1 takes 4 seconds and Code 2 takes 6 seconds which means it is not running in parallel. What is the correct and cleanest way to run multiple async functions in parallel?
Better to be python 3.6 compatible. Thank you!
map_async() is not the same "async" as in async def - if it is fed with an async def method, it won't actually run it but return a coroutine instance immediately (try calling such a method without await). Then you awaited on the 4 coroutines one by one, that equals to sequential execution, and ended up with 6 seconds.
Please see following example:
from time import time
import asyncio
from asyncio.locks import Semaphore
semaphore = Semaphore(2)
async def square(a):
async with semaphore:
print('start', a)
await asyncio.sleep(a)
print('end', a)
return a * a
async def main():
start = time()
tasks = []
for a in range(4):
tasks.append(asyncio.ensure_future(square(a)))
await asyncio.wait(tasks)
print([t.result() for t in tasks])
print(time() - start)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
The Semaphore acts similarly like the ThreadPool - it allows only 2 concurrent coroutines entering the async with semaphore: block.

Appending to merged async generators in Python

I'm trying to merge a bunch of asynchronous generators in Python 3.7 while still adding new async generators on iteration. I'm currently using aiostream to merge my generators:
from asyncio import sleep, run
from aiostream.stream import merge
async def go():
yield 0
await sleep(1)
yield 50
await sleep(1)
yield 100
async def main():
tasks = merge(go(), go(), go())
async for v in tasks:
print(v)
if __name__ == '__main__':
run(main())
However, I need to be able to continue to add to the running tasks once the loop has begun. Something like.
from asyncio import sleep, run
from aiostream.stream import merge
async def go():
yield 0
await sleep(1)
yield 50
await sleep(1)
yield 100
async def main():
tasks = merge(go(), go(), go())
async for v in tasks:
if v == 50:
tasks.merge(go())
print(v)
if __name__ == '__main__':
run(main())
The closest I've got to this is using the aiostream library but maybe this can also be written fairly neatly with just the native asyncio standard library.
Here is an implementation that should work efficiently even with a large number of async iterators:
class merge:
def __init__(self, *iterables):
self._iterables = list(iterables)
self._wakeup = asyncio.Event()
def _add_iters(self, next_futs, on_done):
for it in self._iterables:
it = it.__aiter__()
nfut = asyncio.ensure_future(it.__anext__())
nfut.add_done_callback(on_done)
next_futs[nfut] = it
del self._iterables[:]
return next_futs
async def __aiter__(self):
done = {}
next_futs = {}
def on_done(nfut):
done[nfut] = next_futs.pop(nfut)
self._wakeup.set()
self._add_iters(next_futs, on_done)
try:
while next_futs:
await self._wakeup.wait()
self._wakeup.clear()
for nfut, it in done.items():
try:
ret = nfut.result()
except StopAsyncIteration:
continue
self._iterables.append(it)
yield ret
done.clear()
if self._iterables:
self._add_iters(next_futs, on_done)
finally:
# if the generator exits with an exception, or if the caller stops
# iterating, make sure our callbacks are removed
for nfut in next_futs:
nfut.remove_done_callback(on_done)
def append_iter(self, new_iter):
self._iterables.append(new_iter)
self._wakeup.set()
The only change required for your sample code is that the method is named append_iter, not merge.
This can be done using stream.flatten with an asyncio queue to store the new generators.
import asyncio
from aiostream import stream, pipe
async def main():
queue = asyncio.Queue()
await queue.put(go())
await queue.put(go())
await queue.put(go())
xs = stream.call(queue.get)
ys = stream.cycle(xs)
zs = stream.flatten(ys, task_limit=5)
async with zs.stream() as streamer:
async for item in streamer:
if item == 50:
await queue.put(go())
print(item)
Notice that you may tune the number of tasks that can run at the same time using the task_limit argument. Also note that zs can be elegantly defined using the pipe syntax:
zs = stream.call(queue.get) | pipe.cycle() | pipe.flatten(task_limit=5)
Disclaimer: I am the project maintainer.

tornado server is incompatible with threading module

I'm using tornado with threads.
In short, each time the websocket handler receives a requests, it start to execute a task, which might take a few minutes.
However, once a client is connected, no other client can be connected, until the first one disconnects.
Any ideas?
I've attached a minimal example that uses time.sleep to simulate long running tasks.
import tornado.web
import tornado.websocket
import tornado.httpserver
import tornado.ioloop
import time
import json
import threading
class TaskHandler(tornado.websocket.WebSocketHandler):
def open(self):
pass
def check_origin(self, origin):
return True
def on_message(self, message):
try:
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
threads = []
for target in [worker_A, worker_B]:
t = threading.Thread(target = target, args = ({'xxx': 'yyy'}, ))
t.daemon = True
t.start()
threads.append(t)
for t in threads:
t.join()
except Exception, e:
print 'TaskHandler: exception: ', e
pass
self.write_message(json.dumps({'status': 'done'}))
def on_close(self):
pass
class Server(tornado.web.Application):
def __init__(self):
handlers = [
('/task', TaskHandler),
]
tornado.web.Application.__init__(self, handlers)
if __name__ == '__main__':
server = tornado.httpserver.HTTPServer(Server())
server.listen(8765, address = '127.0.0.1')
tornado.ioloop.IOLoop.instance().start()
You block the whole Tornado event loop for 100 seconds in t.join. Unless you have a yield statement or schedule a callback and exit a function, then your function is not asynchronous. Notice how your function "on_message" begins two threads and then calls t.join on each -- how can Tornado's event loop accomplish any other work while your function is waiting for t.join?
Instead, use a ThreadPoolExecutor something like this:
thread_pool = ThreadPoolExecutor(4)
class TaskHandler(tornado.websocket.WebSocketHandler):
# Make this an asynchronous coroutine
#gen.coroutine
def on_message_coroutine(self, message):
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
futures = []
for target in [worker_A, worker_B]:
f = thread_pool.submit(target, {'xxx': 'yyy'})
futures.append(future)
# Now the event loop can do other things
yield futures
def on_message(self, message):
IOLoop.current().spawn_callback(self.on_message_coroutine,
message)

How to use aiopg pool in multi-threaded application?

I have a python 3.4.3, postgreSQL 9.4, aiopg-0.7.0. An example of multi-threaded applications, was taken from this site. How to use the pool? The thread hangs when the operation of the select.
import time
import asyncio
import aiopg
import functools
from threading import Thread, current_thread, Event
from concurrent.futures import Future
class B(Thread):
def __init__(self, start_event):
Thread.__init__(self)
self.loop = None
self.tid = None
self.event = start_event
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.tid = current_thread()
self.loop.call_soon(self.event.set)
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def add_task(self, coro):
"""this method should return a task object, that I
can cancel, not a handle"""
def _async_add(func, fut):
try:
ret = func()
fut.set_result(ret)
except Exception as e:
fut.set_exception(e)
f = functools.partial(asyncio.async, coro, loop=self.loop)
if current_thread() == self.tid:
return f() # We can call directly if we're not going between threads.
else:
# We're in a non-event loop thread so we use a Future
# to get the task from the event loop thread once
# it's ready.
fut = Future()
self.loop.call_soon_threadsafe(_async_add, f, fut)
return fut.result()
def cancel_task(self, task):
self.loop.call_soon_threadsafe(task.cancel)
#asyncio.coroutine
def test(pool, name_task):
while True:
print(name_task, 'running')
with (yield from pool.cursor()) as cur:
print(name_task, " select. ")
yield from cur.execute("SELECT count(*) FROM test")
count = yield from cur.fetchone()
print(name_task, ' Result: ', count)
yield from asyncio.sleep(3)
#asyncio.coroutine
def connect_db():
dsn = 'dbname=%s user=%s password=%s host=%s' % ('testdb', 'user', 'passw', '127.0.0.1')
pool = yield from aiopg.create_pool(dsn)
print('create pool type =', type(pool))
# future.set_result(pool)
return (pool)
event = Event()
b = B(event)
b.start()
event.wait() # Let the loop's thread signal us, rather than sleeping
loop_db = asyncio.get_event_loop()
pool = loop_db.run_until_complete(connect_db())
time.sleep(2)
t = b.add_task(test(pool, 'Task1')) # This is a real task
t = b.add_task(test(pool, 'Task2'))
while True:
time.sleep(10)
b.stop()
Not return result in 'yield from cur.execute("SELECT count(*) FROM test")'
Long story short: you cannot share aiopg pool object from different event loops.
Every aiopg.Pool is coupled to event loop. If you don't specify loop parameter explicitly it is taken from asyncio.get_event_loop() call.
So it your example you have a pool coupled to event loop from main thread.
When you execute db query from separate thread you trying to accomplish it by executing thread's loop, not the main one. It doesn't work.

Resources