how to cache asyncio coroutines

how to cache asyncio coroutines - python-3.x

I am using aiohttp to make a simple HTTP request in python 3.4 like this:
response = yield from aiohttp.get(url)
The application requests the same URL over and over again so naturally I wanted to cache it. My first attempt was something like this:
#functools.lru_cache(maxsize=128)
def cached_request(url):
return aiohttp.get(url)
The first call to cached_request works fine, but in later calls I end up with None instead of the response object.
I am rather new to asyncio so I tried a lot of combinations of the asyncio.coroutine decorator, yield from and some other things, but none seemed to work.
So how does caching coroutines work?

Maybe a bit late, but I've started a new package that may help: https://github.com/argaen/aiocache. Contributions/comments are always welcome.
An example:
import asyncio
from collections import namedtuple
from aiocache import cached
from aiocache.serializers import PickleSerializer
Result = namedtuple('Result', "content, status")
#cached(ttl=10, serializer=PickleSerializer())
async def async_main():
print("First ASYNC non cached call...")
await asyncio.sleep(1)
return Result("content", 200)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
print(loop.run_until_complete(async_main()))
print(loop.run_until_complete(async_main()))
print(loop.run_until_complete(async_main()))
print(loop.run_until_complete(async_main()))
Note that as an extra, it can cache any python object into redis using Pickle serialization. In case you just want to work with memory, you can use the SimpleMemoryCache backend :).

An popular async version of lru_cache exist here: async_lru

To use functools.lru_cache with coroutines, the following code works.
class Cacheable:
def __init__(self, co):
self.co = co
self.done = False
self.result = None
self.lock = asyncio.Lock()
def __await__(self):
with (yield from self.lock):
if self.done:
return self.result
self.result = yield from self.co.__await__()
self.done = True
return self.result
def cacheable(f):
def wrapped(*args, **kwargs):
r = f(*args, **kwargs)
return Cacheable(r)
return wrapped
#functools.lru_cache()
#cacheable
async def foo():
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()
The following is thread safe
class ThreadSafeCacheable:
def __init__(self, co):
self.co = co
self.done = False
self.result = None
self.lock = threading.Lock()
def __await__(self):
while True:
if self.done:
return self.result
if self.lock.acquire(blocking=False):
self.result = yield from self.co.__await__()
self.done = True
return self.result
else:
yield from asyncio.sleep(0.005)

I wrote a simple cache decorator myself:
def async_cache(maxsize=128):
cache = {}
def decorator(fn):
def wrapper(*args):
key = ':'.join(args)
if key not in cache:
if len(cache) >= maxsize:
del cache[cache.keys().next()]
cache[key] = yield from fn(*args)
return cache[key]
return wrapper
return decorator
#async_cache()
#asyncio.coroutine
def expensive_io():
....
This kind-of-works. But many aspects can probably be improved. For example: If the cached function is called a second time before the first call returns, it will execute a second time.

I'm not that familiar with aiohttp so I'm not sure of exactly what is happening that would cause Nones to be returned, but the lru_cache decorator will not work with async functions.
I use a decorator which does essentially the same thing; note that it is different to tobib's decorator above in that it will always return a future or a task, rather than the value:
from collections import OrderedDict
from functools import _make_key, wraps
def future_lru_cache(maxsize=128):
# support use as decorator without calling, for this case maxsize will
# not be an int
try:
real_max_size = int(maxsize)
except ValueError:
real_max_size = 128
cache = OrderedDict()
async def run_and_cache(func, args, kwargs):
"""Run func with the specified arguments and store the result
in cache."""
result = await func(*args, **kwargs)
cache[_make_key(args, kwargs, False)] = result
if len(cache) > real_max_size:
cache.popitem(False)
return result
def wrapper(func):
#wraps(func)
def decorator(*args, **kwargs):
key = _make_key(args, kwargs, False)
if key in cache:
# Some protection against duplicating calls already in
# progress: when starting the call cache the future, and if
# the same thing is requested again return that future.
if isinstance(cache[key], asyncio.Future):
return cache[key]
else:
f = asyncio.Future()
f.set_result(cache[key])
return f
else:
task = asyncio.Task(run_and_cache(func, args, kwargs))
cache[key] = task
return task
return decorator
if callable(maxsize):
return wrapper(maxsize)
else:
return wrapper
I used _make_key from functools as lru_cache does, I guess it's supposed to be private so probably better to copy it over.

This is how I think it's most easily done, using the built-in lru_cache and futures:
import asyncio
import functools
# parameterless decorator
def async_lru_cache_decorator(async_function):
#functools.lru_cache
def cached_async_function(*args, **kwargs):
coroutine = async_function(*args, **kwargs)
return asyncio.ensure_future(coroutine)
return cached_async_function
# decorator with options
def async_lru_cache(*lru_cache_args, **lru_cache_kwargs):
def async_lru_cache_decorator(async_function):
#functools.lru_cache(*lru_cache_args, **lru_cache_kwargs)
def cached_async_function(*args, **kwargs):
coroutine = async_function(*args, **kwargs)
return asyncio.ensure_future(coroutine)
return cached_async_function
return async_lru_cache_decorator
#async_lru_cache(maxsize=128)
async def your_async_function(...): ...
This is basically taking your original function and wrapping it so I can store the Coroutine it returns and convert it into a Future. This way, this can be treated as a regular function and you can lru_cache-it as you would usually do it.
Why is wrapping it in a Future necessary? Python coroutines are low level constructs and you can't await one more than once (You would get RuntimeError: cannot reuse already awaited coroutine). Futures, on the other hand, are handy and can be awaited consecutively and will return the same result.
One caveat is that caching a Future will also cache when the original functions raised an Error. The original lru_cache does not cache interrupted executions, so watch out for this edge case using the solution above.
Further tweaking can be done to merge both the parameter-less and the parameterized decorators, like the original lru_cache which supports both usages.

Another variant of lru decorator, which caches not yet finished coroutines, very useful with parallel requests to the same key:
import asyncio
from collections import OrderedDict
from functools import _make_key, wraps
def async_cache(maxsize=128, event_loop=None):
cache = OrderedDict()
if event_loop is None:
event_loop = asyncio.get_event_loop()
awaiting = dict()
async def run_and_cache(func, args, kwargs):
"""await func with the specified arguments and store the result
in cache."""
result = await func(*args, **kwargs)
key = _make_key(args, kwargs, False)
cache[key] = result
if len(cache) > maxsize:
cache.popitem(False)
cache.move_to_end(key)
return result
def decorator(func):
#wraps(func)
async def wrapper(*args, **kwargs):
key = _make_key(args, kwargs, False)
if key in cache:
return cache[key]
if key in awaiting:
task = awaiting[key]
return await asyncio.wait_for(task, timeout=None, loop=event_loop)
task = asyncio.ensure_future(run_and_cache(func, args, kwargs), loop=event_loop)
awaiting[key] = task
result = await asyncio.wait_for(task, timeout=None, loop=event_loop)
del awaiting[key]
return result
return wrapper
return decorator
async def test_async_cache(event_loop):
counter = 0
n, m = 10, 3
#async_cache(maxsize=n, event_loop=event_loop)
async def cached_function(x):
nonlocal counter
await asyncio.sleep(0) # making event loop switch to other coroutine
counter += 1
return x
tasks = [asyncio.ensure_future(cached_function(x), loop=event_loop)
for x in list(range(n)) * m]
done, pending = await asyncio.wait(tasks, loop=event_loop, timeout=1)
assert len(done) == n * m
assert counter == n
event_loop = asyncio.get_event_loop()
task = asyncio.ensure_future(test_async_cache(event_loop))
event_loop.run_until_complete(task)

I think that the simplest way is to use aiohttp_cache (documentation)
pip install aiohttp-cache
And use it in code:
from aiohttp_cache import cache, setup_cache
#cache() # <-- DECORATED FUNCTION
async def example_1(request):
return web.Response(text="Example")
app = web.Application()
app.router.add_route('GET', "/", example_1)
setup_cache(app) # <-- INITIALIZED aiohttp-cache
web.run_app(app, host="127.0.0.1")

Try async-cache :pypi async-cache :github for caching async functions in python.
It also supports function which have parameters of user defined or object type or unhashable type which is not supported in either functools.lru_cache or async_lru .
Usage:
pip install async-cache
from cache import AsyncLRU
#AsyncLRU(maxsize=128)
async def func(*args, **kwargs):
pass

I wrote a simple package named asyncio-cache - https://github.com/matan1008/asyncio-cache.
I tried to keep the code as close as possible to the original python implementation and as simple as possible.
For example:
from asyncio_cache import lru_cache
import aiohttp
#lru_cache(maxsize=128)
async def cached_get(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()

Related

How to use asynchronous iterator using aiter() and anext() builtins

I have gone through the documentation of aiter and anext (New in version 3.10). But not understanding how to use them.
I have the following program:
import asyncio
async def get_range():
for i in range(10):
print(f"start {i}")
await asyncio.sleep(1)
print(f"end {i}")
yield i
class AIter:
def __init__(self, N):
self.i = 0
self.N = N
def __aiter__(self):
return self
async def __anext__(self):
i = self.i
print(f"start {i}")
await asyncio.sleep(1)
print(f"end {i}")
if i >= self.N:
raise StopAsyncIteration
self.i += 1
return i
async def main():
async for p in AIter(10):
print(f"finally {p}")
if __name__ == "__main__":
asyncio.run(main())
How can I use aiter and anext builtin here?

Like with the regular synchronous iter and next builtins, you rarely need to use the new builtins directly. The async for loop in main calls the __aiter__ and __anext__ methods of your class already. If that does all you want, you're home free.
You only need to explicitly use aiter and anext if you are writing code that interacts with an asynchronous iterator in some way not directly supported by a async for loop. For instance, here's an asynchronous generator that yields pairs of values from the iterable it's given:
async def pairwise(aiterable, default=None):
ait = aiter(aiterable) # get a reference to the iterator
async for x in ait:
yield x, await anext(ait, default) # get an extra value, yield a 2-tuple
If you loop on pairwise(AIter(10)) in your main function, you'll find that it now prints tuples of numbers, like finally (0, 1). Before each tuple, you'll see two sets of the begin and end lines printed by the iterator class, one for each value that ends up in the paired result.

Running blocking code in separate executor is failing with coroutines cannot be used with run_in_executor()

hi following is minimum viable example which is failing to run a blocking code in a separate
event loop, can someone please let me know how to fix issue where code is failing as
TypeError: coroutines cannot be used with run_in_executor()
import asyncio
class XYZ:
def __init__(self, pool):
self.pool = pool
def _connect(self, router):
return self.pool.connect(router)
async def _call(self, router, procedure, *args, **kwargs):
async with self._connect(router) as session:
return await session.call(procedure, *args, **kwargs)
########this is blocking code##########
def get_system_info(self,router ,system_name):
loop = asyncio.get_event_loop()
return loop.run_in_executor(None, self._call, f'url')
if __name__ == "__main__":
d = XYZ('pool')
loop = asyncio.get_event_loop()
loop.run_until_complete( d.get_system_info('route','sysname'))

The code is failing because loop.run_in_executor(executor, func, *args)¶ has a function as an argument not a coroutine. change aysnc def _call to synchronous i.e def _call, and make appropriate changes inside it. An option for that would be to convert the session as a task and use run_until_completein _call function

Inheritance in iterable implementation of python's multiprocessing.Queue

I found the default implementation of python's multiprocessing.Queue lacking, in that it's not iterable like any other collection. So I went about the effort of creating a 'subclass' of it, adding the feature in. As you can see from the code below, it's not a proper subclass, as multiprocess.Queue isn't a direct class itself, but a factory function, and the real underlying class is multiprocess.queues.Queue. I don't have the understanding nor effort to expend necessary to go about mimicking the factory function just so I can inherit from the class properly, so I simply had the new class create it's own instance from the factory and treat it as the superclass. Here is the code;
from multiprocessing import Queue, Value, Lock
import queue
class QueueClosed(Exception):
pass
class IterableQueue:
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
def close(self):
with self.close_lock:
self.closed.value = True
self.queue.close()
def put(self, elem, block=True, timeout=None):
with self.close_lock:
if self.closed.value:
raise QueueClosed()
else:
self.queue.put(elem, block, timeout)
def put_nowait(self, elem):
self.put(elem, False)
def get(self, block=True):
if not block:
return self.queue.get_nowait()
elif self.closed.value:
try:
return self.queue.get_nowait()
except queue.Empty:
return None
else:
val = None
while not self.closed.value:
try:
val = self.queue.get_nowait()
break
except queue.Empty:
pass
return val
def get_nowait(self):
return self.queue.get_nowait()
def join_thread(self):
return self.queue.join_thread()
def __iter__(self):
return self
def __next__(self):
val = self.get()
if val == None:
raise StopIteration()
else:
return val
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
This allows me to instantiate an IterableQueue object just like a normal multiprocessing.Queue, put elements into it like normal, and then inside child consumers, simply loop over it like so;
from iterable_queue import IterableQueue
from multiprocessing import Process, cpu_count
import os
def fib(n):
if n < 2:
return n
return fib(n-1) + fib(n-2)
def consumer(queue):
print(f"[{os.getpid()}] Consuming")
for i in queue:
print(f"[{os.getpid()}] < {i}")
n = fib(i)
print(f"[{os.getpid()}] {i} > {n}")
print(f"[{os.getpid()}] Closing")
def producer():
print("Enqueueing")
with IterableQueue() as queue:
procs = [Process(target=consumer, args=(queue,)) for _ in range(cpu_count())]
[p.start() for p in procs]
[queue.put(i) for i in range(36)]
print("Finished")
if __name__ == "__main__":
producer()
and it works almost seamlessly; the consumers exit the loop once the queue has been closed, but only after exhausting all remaining elements. However, I was unsatisfied with the lack of inherited methods. In an attempt to mimic actual inheritance behavior, I tried adding the following meta function call to the class;
def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
else:
return self.queue.__getattr__[name]
However, this fails when instances of the IterableQueue class are manipulated inside child multiprocessing.Process threads, as the class's __dict__ property is not preserved within them. I attempted to remedy this in a hacky manner by replacing the class's default __dict__ with a multiprocessing.Manager().dict(), like so;
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
self.__dict__ = Manager().dict(self.__dict__)
However on doing so, I received an error stating RuntimeError: Synchronized objects should only be shared between processes through inheritance. So my question is, how should I go about inheriting from the Queue class properly such that the subclass has inherited access to all of it's properties? In addition, while the queue is empty but not closed, the consumers all sit in a busy loop instead of a true IO block, taking up valuable cpu resources. If you have any suggestions on concurrency and race condition issues I might run into with this code, or how I might solve the busy loop issue, I'd be willing to take suggestions therein as well.
Based on code provided by MisterMiyagi, I created this general purpose IterableQueue class which can accept arbitrary input, blocks properly, and does not hang on queue close;
from multiprocessing.queues import Queue
from multiprocessing import get_context
class QueueClosed(Exception):
pass
class IterableQueue(Queue):
def __init__(self, maxsize=0, *, ctx=None):
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
super().put((None, False))
super().close()
def __iter__(self):
return self
def __next__(self):
try:
return self.get()
except QueueClosed:
raise StopIteration
def get(self, *args, **kwargs):
result, is_open = super().get(*args, **kwargs)
if not is_open:
super().put((None, False))
raise QueueClosed
return result
def put(self, val, *args, **kwargs):
super().put((val, True), *args, **kwargs)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()

The multiprocess.Queue wrapper only serves to use the default context.
def Queue(self, maxsize=0):
'''Returns a queue object'''
from .queues import Queue
return Queue(maxsize, ctx=self.get_context())
When inheriting, you can replicate this in the __init__ method. This allows you to inherit the entire Queue behaviour. You only need to add the iterator methods:
from multiprocessing.queues import Queue
from multiprocessing import get_context
class IterableQueue(Queue):
"""
``multiprocessing.Queue`` that can be iterated to ``get`` values
:param sentinel: signal that no more items will be received
"""
def __init__(self, maxsize=0, *, ctx=None, sentinel=None):
self.sentinel = sentinel
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
self.put(self.sentinel)
# wait until buffer is flushed...
while self._buffer:
time.sleep(0.01)
# before shutting down the sender
super().close()
def __iter__(self):
return self
def __next__(self):
result = self.get()
if result == self.sentinel:
# re-queue sentinel for other listeners
self.put(result)
raise StopIteration
return result
Note that the sentinel to indicate end-of-queue is compared by equality, because identity is not preserved across processes. The often-used queue.Queue sentinel object() does not work properly with this.

How to iterate over an asynchronous iterator with a timeout?

I think it's easier to understand in terms of code:
try:
async for item in timeout(something(), timeout=60):
await do_something_useful(item)
except asyncio.futures.TimeoutError:
await refresh()
I want the async for to run at most 60 seconds.

I needed to do something like this to create a websocket(also an async iterator) which times out if it doesn't get a message after a certain duration. I settled on the following:
socket_iter = socket.__aiter__()
try:
while True:
message = await asyncio.wait_for(
socket_iter.__anext__(),
timeout=10
)
except asyncio.futures.TimeoutError:
# streaming is completed
pass

AsyncTimedIterable could be the implementation of timeout() in your code:
class _AsyncTimedIterator:
__slots__ = ('_iterator', '_timeout', '_sentinel')
def __init__(self, iterable, timeout, sentinel):
self._iterator = iterable.__aiter__()
self._timeout = timeout
self._sentinel = sentinel
async def __anext__(self):
try:
return await asyncio.wait_for(self._iterator.__anext__(), self._timeout)
except asyncio.TimeoutError:
return self._sentinel
class AsyncTimedIterable:
__slots__ = ('_factory', )
def __init__(self, iterable, timeout=None, sentinel=None):
self._factory = lambda: _AsyncTimedIterator(iterable, timeout, sentinel)
def __aiter__(self):
return self._factory()
(original answer)
Or use this class to replace your timeout() function:
class AsyncTimedIterable:
def __init__(self, iterable, timeout=None, sentinel=None):
class AsyncTimedIterator:
def __init__(self):
self._iterator = iterable.__aiter__()
async def __anext__(self):
try:
return await asyncio.wait_for(self._iterator.__anext__(),
timeout)
except asyncio.TimeoutError:
return sentinel
self._factory = AsyncTimedIterator
def __aiter__(self):
return self._factory()

A simple approach is to use an asyncio.Queue, and separate the code into two coroutines:
queue = asyncio.Queue()
async for item in something():
await queue.put(item)
In another coroutine:
while True:
try:
item = await asyncio.wait_for(queue.get(), 60)
except asyncio.TimeoutError:
pass
else:
if item is None:
break # use None or whatever suits you to gracefully exit
await do_something_useful(item)
refresh()
Please note, it will make the queue grow if the handler do_something_useful() is slower than something() generates items. You may set a maxsize on the queue to limit the buffer size.

Answer to your question can be different based on nature of refresh function. If it's very short-running function it can be freely called inside coroutine. But if it's blocking function (due to network or CPU) it should be ran in executor to avoid freezing asyncio event loop.
Code below shows example for the first case, changing it to run refresh in executor is not hard.
Second thing to be clarified is a nature of asynchronous iterator. As far as I understand, you're using it to either get result from something or None if timeout occurred.
If I understand logic correctly, your code can be written clearer (similar to non-async style as asyncio is created to allow) using async_timeout context manager and without using asynchronous iterator at all:
import asyncio
from async_timeout import timeout
async def main():
while True:
try:
async with timeout(60):
res = await something()
await do_something_useful(item)
except asyncio.TimeoutError:
pass
finally:
refresh()

Your question is missing a couple of details, but assuming something() is an async iterator or generator and you want item to be sentinel everytime something has not yielded a value within the timeout, here is an implementation of timeout():
import asyncio
from typing import *
T = TypeVar('T')
# async generator, needs python 3.6
async def timeout(it: AsyncIterator[T], timeo: float, sentinel: T) -> AsyncGenerator[T, None]:
try:
nxt = asyncio.ensure_future(it.__anext__())
while True:
try:
yield await asyncio.wait_for(asyncio.shield(nxt), timeo)
nxt = asyncio.ensure_future(it.__anext__())
except asyncio.TimeoutError:
yield sentinel
except StopAsyncIteration:
pass
finally:
nxt.cancel() # in case we're getting cancelled our self
test:
async def something():
yield 1
await asyncio.sleep(1.1)
yield 2
await asyncio.sleep(2.1)
yield 3
async def test():
expect = [1, None, 2, None, None, 3]
async for item in timeout(something(), 1, None):
print("Check", item)
assert item == expect.pop(0)
asyncio.get_event_loop().run_until_complete(test())
When wait_for() times out it will cancel the task. Therefore, we need to wrap it.__anext__() in a task and then shield it, to be able to resume the iterator.

I want the coroutine to execute refresh at least every 60 seconds.
If you need to execute refresh every 60 seconds regardless of what happens with do_something_useful, you can arrange that with a separate coroutine:
import time
async def my_loop():
# ensure refresh() is invoked at least once in 60 seconds
done = False
async def repeat_refresh():
last_run = time.time()
while not done:
await refresh()
now = time.time()
await asyncio.sleep(max(60 - (now - last_run), 0))
last_run = now
# start repeat_refresh "in the background"
refresh_task = asyncio.get_event_loop().create_task(repeat_refresh())
try:
async for item in something():
if item is not None:
await do_something_useful(item)
await refresh()
finally:
done = True

Task top-up with asyncio

In my project, I have a list to tasks that I execute with.
loop.run_until_complete(tasks)
However, there is an infinite number of tasks, so at the moment, I execute them in batches. Essentially, I have this:
def get_results(tasks):
return [result for result in loop.run_until_complete(handle_tasks(tasks))]
while True:
tasks = get_tasks()
results = get_results(tasks)
I get a number of tasks, I lunch a regular function that uses a loop to perform these tasks asynchronously and returns the results.
This approach works, but I believe it can be improved.
Instead of doing batches of tasks, I would like to do some sort of task top-up.
Something like this:
while True:
if current_tasks < max_tasks:
new_tasks = get_tasks(max_tasks - current_tasks)
add_tasks(new_tasks)
current_tasks, results = stats_and_results()
I appreciate any ideas on how to approach this problem.
Thanks!

We had a similar problem and ended up writing a small "Pool" wrapper that takes jobs and run them with a predefined concurrency.
import asyncio
import sys
class Pool:
def __init__(self, concurrency):
self._sem = asyncio.BoundedSemaphore(concurrency)
self.jobs = []
async def __aenter__(self):
return self
async def __aexit__(self, *_):
if len(self.jobs) > 0:
await asyncio.wait(self.jobs)
def put(self, coro):
assert asyncio.iscoroutine(coro)
async def wrapped():
async with self._sem:
await coro
fut = asyncio.ensure_future(wrapped())
self.jobs.append(fut)
async def __aiter__(self):
return self
async def __anext__(self):
try:
coro = self.jobs.pop(0)
except IndexError:
raise StopAsyncIteration()
else:
return await coro
You can then use it this way:
async def main():
pool = Pool(10)
for task in get_tasks():
pool.put(task)
async for result in pool:
print('got', result)
This will schedule all the task, run at most 10 of them concurrently and return the results as they come to the main() coroutine

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

how to cache asyncio coroutines - python-3.x

An popular async version of lru_cache exist here: async_lru

Related

How to use asynchronous iterator using aiter() and anext() builtins

Running blocking code in separate executor is failing with coroutines cannot be used with run_in_executor()

Inheritance in iterable implementation of python's multiprocessing.Queue

How to iterate over an asynchronous iterator with a timeout?

Task top-up with asyncio

Categories

Resources