How to pass async function to run_in_executor - python-3.x

I am trying to pass some_async_func function to run_in_executor.
In the following example I program executes with the following warning:
coroutine 'some_async_func' was never awaited
Would anyone of you know if it's possible to pass async function to run_in_executor?
I am trying to write a program which will run X async coroutines in Y number of threads.
import asyncio
import random
from threading import Thread, get_ident
async def some_async_func(num):
#
# Many other async functions is called here.
# I simplified this function just to execute 'sleep'.
#
ident = get_ident()
print(f"produce: {num} in thread {ident}", flush=True)
await asyncio.sleep(random.uniform(0, 0.5))
return {ident: num}
async def large_func():
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(None, some_async_func, 1)
def side_thread():
loop = asyncio.new_event_loop()
loop.run_until_complete(large_func())
loop.close()
t1 = Thread(target=side_thread, args=(), daemon=True)
t1.start()
t1.join()

Finally, I implemented a sync wrapper for async function and passed it as follows:
def async_func_wrapper():
loop = asyncio.new_event_loop()
results = loop.run_until_complete(some_async_func(10))
loop.close()
return results
[...]
with concurrent.futures.ProcessPoolExecutor() as pool:
result = await loop.run_in_executor(pool, async_func_wrapper)

Related

coroutine was never awaited, async multithreading

async def existance(s, name):
async with s.head(f"https://example.com/{name}") as r1:
if r1.status == 404:
print('wow i worked')
async def process(names):
with ThreadPoolExecutor(max_workers=3) as executor:
async with aiohttp.ClientSession() as s:
loop = asyncio.get_event_loop()
tasks = []
for name in names:
if len(name) >= 5 and len(name) < 16 and name.isalnum():
task = loop.run_in_executor(
executor,
existance,
*(s, name)
)
tasks.append(task)
return await asyncio.gather(*tasks)
while True:
start_time = time.time()
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(process(names))
loop.run_until_complete(future)
I'm using the code above to try and split my tasks created across multiple threads while checking them all asynchronously.
I'm getting this error:
RuntimeWarning: coroutine 'existance' was never awaited
future = asyncio.ensure_future(process(names))
I'm still somewhat of a python beginner and I can't really figure out what I should change here to get the result I want.
Any help is appreciated and I'm sorry if this is a duplicate question.
Since existance is async, you don't need threads at all, so you can implement process like this:
async def process(names):
async with aiohttp.ClientSession() as s:
tasks = []
for name in names:
if len(name) >= 5 and len(name) < 16 and name.isalnum():
tasks.append(existance(s, name))
return await asyncio.gather(*tasks)
If existance is calling some synchronous slow function which might block the event loop (such as BeautifulSoup parsing), you can fix that by running just that function through a thread pool. Introduce run_in_executor locally like this:
# instead of:
# result = slow_calculation(arg1, arg2) # sync/slow code
# use this:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, slow_calculation, arg1, arg2)
But don't attempt to call async functions through run_in_executor, that won't work.

Process tasks in batchs in asyncio

I have got a funcion that generates tasks (io bound tasks):
def get_task():
while True:
new_task = _get_task()
if new_task is not None:
yield new_task
else:
sleep(1)
And I am trying to write a consumer in asyncio that will be processing max 10 tasks at the time and one task is finished then will take new one.
I am not sure if I should use semaphores or is there any kind of asycio pool executor? I started to write a pseudocode with threads:
def run(self)
while True:
self.semaphore.acquire() # first acquire, then get task
t = get_task()
self.process_task(t)
def process_task(self, task):
try:
self.execute_task(task)
self.mark_as_done(task)
except:
self.mark_as_failed(task)
self.semaphore.release()
Could anyone help me? I have no clue where to put async/await keywords
Simple task cap using asyncio.Sepmaphore
async def max10(task_generator):
semaphore = asyncio.Semaphore(10)
async def bounded(task):
async with semaphore:
return await task
async for task in task_generator:
asyncio.ensure_future(bounded(task))
The problem with this solution is that tasks are being drawn from the generator greedily. For example, if generator reads from a large database, the program could run out of memory.
Other than that it's idiomatic and well-behaved.
A solution, that uses async generator protocol to pull new tasks on demand:
async def max10(task_generator):
tasks = set()
gen = task_generator.__aiter__()
try:
while True:
while len(tasks) < 10:
tasks.add(await gen.__anext__())
_done, tasks = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
except StopAsyncIteration:
await asyncio.gather(*tasks)
It may be considered sub-optimal, because it doesn't start executing tasks until 10 are available.
And here's concise and magic solution using worker pattern:
async def max10(task_generator):
async def worker():
async for task in task_generator:
await task
await asyncio.gather(*[worker() for i in range(10)])
It relies on a somewhat counter-intuitive property of being able to have multiple async iterators over the same async generator, in which case each generated item is seen by only one iterator.
My gut tells me that none of these solutions behaves properly on cancellation.
Async isn't threads. If for example you have tasks that are file IO bound then write them async using aiofiles
async with aiofiles.open('filename', mode='r') as f:
contents = await f.read()
Then replace task with your tasks. If you want to only run 10 at a time await asyncio.gather every 10 tasks.
import asyncio
async def task(x):
await asyncio.sleep(0.5)
print( x, "is done" )
async def run(loop):
futs = []
for x in range(50):
futs.append( task(x) )
await asyncio.gather( *futs )
loop = asyncio.get_event_loop()
loop.run_until_complete( run(loop) )
loop.close()
If you can't write the tasks async and need threads this is a basic example using asyncio's ThreadPoolExecutor. Note that with max_workers=5 only 5 tasks are run at a time.
import time
from concurrent.futures import ThreadPoolExecutor
import asyncio
def blocking(x):
time.sleep(1)
print( x, "is done" )
async def run(loop):
futs = []
executor = ThreadPoolExecutor(max_workers=5)
for x in range(15):
future = loop.run_in_executor(executor, blocking, x)
futs.append( future )
await asyncio.sleep(4)
res = await asyncio.gather( *futs )
loop = asyncio.get_event_loop()
loop.run_until_complete( run(loop) )
loop.close()
As pointed out by Dima Tismek, using semaphores to limit concurrency is vulnerable to exhausting task_generator too eagerly, since there is no backpressure between obtaining the tasks and submitting them to the event loop. A better option, also explored by the other answer, is not to spawn a task as soon as the generator has produced an item, but to create a fixed number of workers that exhaust the generator concurrently.
There are two areas where the code could be improved:
there is no need for a semaphore - it is superfluous when the number of tasks is fixed to begin with;
handling cancellation of generated tasks and of the throttling task.
Here is an implementation that tackles both issues:
async def throttle(task_generator, max_tasks):
it = task_generator.__aiter__()
cancelled = False
async def worker():
async for task in it:
try:
await task
except asyncio.CancelledError:
# If a generated task is canceled, let its worker
# proceed with other tasks - except if it's the
# outer coroutine that is cancelling us.
if cancelled:
raise
# other exceptions are propagated to the caller
worker_tasks = [asyncio.create_task(worker())
for i in range(max_tasks)]
try:
await asyncio.gather(*worker_tasks)
except:
# In case of exception in one worker, or in case we're
# being cancelled, cancel all workers and propagate the
# exception.
cancelled = True
for t in worker_tasks:
t.cancel()
raise
A simple test case:
async def mock_task(num):
print('running', num)
await asyncio.sleep(random.uniform(1, 5))
print('done', num)
async def mock_gen():
tnum = 0
while True:
await asyncio.sleep(.1 * random.random())
print('generating', tnum)
yield asyncio.create_task(mock_task(tnum))
tnum += 1
if __name__ == '__main__':
asyncio.run(throttle(mock_gen(), 3))

How to iterate over an asynchronous iterator with a timeout?

I think it's easier to understand in terms of code:
try:
async for item in timeout(something(), timeout=60):
await do_something_useful(item)
except asyncio.futures.TimeoutError:
await refresh()
I want the async for to run at most 60 seconds.
I needed to do something like this to create a websocket(also an async iterator) which times out if it doesn't get a message after a certain duration. I settled on the following:
socket_iter = socket.__aiter__()
try:
while True:
message = await asyncio.wait_for(
socket_iter.__anext__(),
timeout=10
)
except asyncio.futures.TimeoutError:
# streaming is completed
pass
AsyncTimedIterable could be the implementation of timeout() in your code:
class _AsyncTimedIterator:
__slots__ = ('_iterator', '_timeout', '_sentinel')
def __init__(self, iterable, timeout, sentinel):
self._iterator = iterable.__aiter__()
self._timeout = timeout
self._sentinel = sentinel
async def __anext__(self):
try:
return await asyncio.wait_for(self._iterator.__anext__(), self._timeout)
except asyncio.TimeoutError:
return self._sentinel
class AsyncTimedIterable:
__slots__ = ('_factory', )
def __init__(self, iterable, timeout=None, sentinel=None):
self._factory = lambda: _AsyncTimedIterator(iterable, timeout, sentinel)
def __aiter__(self):
return self._factory()
(original answer)
Or use this class to replace your timeout() function:
class AsyncTimedIterable:
def __init__(self, iterable, timeout=None, sentinel=None):
class AsyncTimedIterator:
def __init__(self):
self._iterator = iterable.__aiter__()
async def __anext__(self):
try:
return await asyncio.wait_for(self._iterator.__anext__(),
timeout)
except asyncio.TimeoutError:
return sentinel
self._factory = AsyncTimedIterator
def __aiter__(self):
return self._factory()
A simple approach is to use an asyncio.Queue, and separate the code into two coroutines:
queue = asyncio.Queue()
async for item in something():
await queue.put(item)
In another coroutine:
while True:
try:
item = await asyncio.wait_for(queue.get(), 60)
except asyncio.TimeoutError:
pass
else:
if item is None:
break # use None or whatever suits you to gracefully exit
await do_something_useful(item)
refresh()
Please note, it will make the queue grow if the handler do_something_useful() is slower than something() generates items. You may set a maxsize on the queue to limit the buffer size.
Answer to your question can be different based on nature of refresh function. If it's very short-running function it can be freely called inside coroutine. But if it's blocking function (due to network or CPU) it should be ran in executor to avoid freezing asyncio event loop.
Code below shows example for the first case, changing it to run refresh in executor is not hard.
Second thing to be clarified is a nature of asynchronous iterator. As far as I understand, you're using it to either get result from something or None if timeout occurred.
If I understand logic correctly, your code can be written clearer (similar to non-async style as asyncio is created to allow) using async_timeout context manager and without using asynchronous iterator at all:
import asyncio
from async_timeout import timeout
async def main():
while True:
try:
async with timeout(60):
res = await something()
await do_something_useful(item)
except asyncio.TimeoutError:
pass
finally:
refresh()
Your question is missing a couple of details, but assuming something() is an async iterator or generator and you want item to be sentinel everytime something has not yielded a value within the timeout, here is an implementation of timeout():
import asyncio
from typing import *
T = TypeVar('T')
# async generator, needs python 3.6
async def timeout(it: AsyncIterator[T], timeo: float, sentinel: T) -> AsyncGenerator[T, None]:
try:
nxt = asyncio.ensure_future(it.__anext__())
while True:
try:
yield await asyncio.wait_for(asyncio.shield(nxt), timeo)
nxt = asyncio.ensure_future(it.__anext__())
except asyncio.TimeoutError:
yield sentinel
except StopAsyncIteration:
pass
finally:
nxt.cancel() # in case we're getting cancelled our self
test:
async def something():
yield 1
await asyncio.sleep(1.1)
yield 2
await asyncio.sleep(2.1)
yield 3
async def test():
expect = [1, None, 2, None, None, 3]
async for item in timeout(something(), 1, None):
print("Check", item)
assert item == expect.pop(0)
asyncio.get_event_loop().run_until_complete(test())
When wait_for() times out it will cancel the task. Therefore, we need to wrap it.__anext__() in a task and then shield it, to be able to resume the iterator.
I want the coroutine to execute refresh at least every 60 seconds.
If you need to execute refresh every 60 seconds regardless of what happens with do_something_useful, you can arrange that with a separate coroutine:
import time
async def my_loop():
# ensure refresh() is invoked at least once in 60 seconds
done = False
async def repeat_refresh():
last_run = time.time()
while not done:
await refresh()
now = time.time()
await asyncio.sleep(max(60 - (now - last_run), 0))
last_run = now
# start repeat_refresh "in the background"
refresh_task = asyncio.get_event_loop().create_task(repeat_refresh())
try:
async for item in something():
if item is not None:
await do_something_useful(item)
await refresh()
finally:
done = True

Test calling a python coroutine (async def) from a regular function

Let's say I have some asyncio coroutine which fetches some data and returns it. Like this:
async def fetch_data(*args):
result = await some_io()
return result
Basically this coroutine is called from the chain of coroutines, and initial coroutine is runned by creating a task.
But what if for test purposes I want to run only one coroutine this way just when running some file:
if __name__ == '__main__':
result = await fetch_data(*args)
print(result)
And obviously I can't do this since I'm trying to run and await coroutine from not coroutine function.
So the question is: is there some correct way to get data from coroutine without calling it function?
I can make some Future object for result and await it, but maybe there are some other more simple and clearer ways?
You will need to create an event loop to run your coroutine:
import asyncio
async def async_func():
return "hello"
loop = asyncio.get_event_loop()
result = loop.run_until_complete(async_func())
loop.close()
print(result)
Or as a function:
def run_coroutine(f, *args, **kwargs):
loop = asyncio.get_event_loop()
result = loop.run_until_complete(f(*args, **kwargs))
loop.close()
return result
Use like this:
print(run_coroutine(async_func))
Or:
assert "expected" == run_coroutine(fetch_data, "param1", param2="foo")
For Python 3.7+, you may use asyncio.run():
import asyncio
async def fetch_data():
return "sample_data"
asyncio.run(fetch_data())

Task top-up with asyncio

In my project, I have a list to tasks that I execute with.
loop.run_until_complete(tasks)
However, there is an infinite number of tasks, so at the moment, I execute them in batches. Essentially, I have this:
def get_results(tasks):
return [result for result in loop.run_until_complete(handle_tasks(tasks))]
while True:
tasks = get_tasks()
results = get_results(tasks)
I get a number of tasks, I lunch a regular function that uses a loop to perform these tasks asynchronously and returns the results.
This approach works, but I believe it can be improved.
Instead of doing batches of tasks, I would like to do some sort of task top-up.
Something like this:
while True:
if current_tasks < max_tasks:
new_tasks = get_tasks(max_tasks - current_tasks)
add_tasks(new_tasks)
current_tasks, results = stats_and_results()
I appreciate any ideas on how to approach this problem.
Thanks!
We had a similar problem and ended up writing a small "Pool" wrapper that takes jobs and run them with a predefined concurrency.
import asyncio
import sys
class Pool:
def __init__(self, concurrency):
self._sem = asyncio.BoundedSemaphore(concurrency)
self.jobs = []
async def __aenter__(self):
return self
async def __aexit__(self, *_):
if len(self.jobs) > 0:
await asyncio.wait(self.jobs)
def put(self, coro):
assert asyncio.iscoroutine(coro)
async def wrapped():
async with self._sem:
await coro
fut = asyncio.ensure_future(wrapped())
self.jobs.append(fut)
async def __aiter__(self):
return self
async def __anext__(self):
try:
coro = self.jobs.pop(0)
except IndexError:
raise StopAsyncIteration()
else:
return await coro
You can then use it this way:
async def main():
pool = Pool(10)
for task in get_tasks():
pool.put(task)
async for result in pool:
print('got', result)
This will schedule all the task, run at most 10 of them concurrently and return the results as they come to the main() coroutine

Resources