Inheritance in iterable implementation of python's multiprocessing.Queue - python-3.x

I found the default implementation of python's multiprocessing.Queue lacking, in that it's not iterable like any other collection. So I went about the effort of creating a 'subclass' of it, adding the feature in. As you can see from the code below, it's not a proper subclass, as multiprocess.Queue isn't a direct class itself, but a factory function, and the real underlying class is multiprocess.queues.Queue. I don't have the understanding nor effort to expend necessary to go about mimicking the factory function just so I can inherit from the class properly, so I simply had the new class create it's own instance from the factory and treat it as the superclass. Here is the code;
from multiprocessing import Queue, Value, Lock
import queue
class QueueClosed(Exception):
pass
class IterableQueue:
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
def close(self):
with self.close_lock:
self.closed.value = True
self.queue.close()
def put(self, elem, block=True, timeout=None):
with self.close_lock:
if self.closed.value:
raise QueueClosed()
else:
self.queue.put(elem, block, timeout)
def put_nowait(self, elem):
self.put(elem, False)
def get(self, block=True):
if not block:
return self.queue.get_nowait()
elif self.closed.value:
try:
return self.queue.get_nowait()
except queue.Empty:
return None
else:
val = None
while not self.closed.value:
try:
val = self.queue.get_nowait()
break
except queue.Empty:
pass
return val
def get_nowait(self):
return self.queue.get_nowait()
def join_thread(self):
return self.queue.join_thread()
def __iter__(self):
return self
def __next__(self):
val = self.get()
if val == None:
raise StopIteration()
else:
return val
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
This allows me to instantiate an IterableQueue object just like a normal multiprocessing.Queue, put elements into it like normal, and then inside child consumers, simply loop over it like so;
from iterable_queue import IterableQueue
from multiprocessing import Process, cpu_count
import os
def fib(n):
if n < 2:
return n
return fib(n-1) + fib(n-2)
def consumer(queue):
print(f"[{os.getpid()}] Consuming")
for i in queue:
print(f"[{os.getpid()}] < {i}")
n = fib(i)
print(f"[{os.getpid()}] {i} > {n}")
print(f"[{os.getpid()}] Closing")
def producer():
print("Enqueueing")
with IterableQueue() as queue:
procs = [Process(target=consumer, args=(queue,)) for _ in range(cpu_count())]
[p.start() for p in procs]
[queue.put(i) for i in range(36)]
print("Finished")
if __name__ == "__main__":
producer()
and it works almost seamlessly; the consumers exit the loop once the queue has been closed, but only after exhausting all remaining elements. However, I was unsatisfied with the lack of inherited methods. In an attempt to mimic actual inheritance behavior, I tried adding the following meta function call to the class;
def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
else:
return self.queue.__getattr__[name]
However, this fails when instances of the IterableQueue class are manipulated inside child multiprocessing.Process threads, as the class's __dict__ property is not preserved within them. I attempted to remedy this in a hacky manner by replacing the class's default __dict__ with a multiprocessing.Manager().dict(), like so;
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
self.__dict__ = Manager().dict(self.__dict__)
However on doing so, I received an error stating RuntimeError: Synchronized objects should only be shared between processes through inheritance. So my question is, how should I go about inheriting from the Queue class properly such that the subclass has inherited access to all of it's properties? In addition, while the queue is empty but not closed, the consumers all sit in a busy loop instead of a true IO block, taking up valuable cpu resources. If you have any suggestions on concurrency and race condition issues I might run into with this code, or how I might solve the busy loop issue, I'd be willing to take suggestions therein as well.
Based on code provided by MisterMiyagi, I created this general purpose IterableQueue class which can accept arbitrary input, blocks properly, and does not hang on queue close;
from multiprocessing.queues import Queue
from multiprocessing import get_context
class QueueClosed(Exception):
pass
class IterableQueue(Queue):
def __init__(self, maxsize=0, *, ctx=None):
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
super().put((None, False))
super().close()
def __iter__(self):
return self
def __next__(self):
try:
return self.get()
except QueueClosed:
raise StopIteration
def get(self, *args, **kwargs):
result, is_open = super().get(*args, **kwargs)
if not is_open:
super().put((None, False))
raise QueueClosed
return result
def put(self, val, *args, **kwargs):
super().put((val, True), *args, **kwargs)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()

The multiprocess.Queue wrapper only serves to use the default context.
def Queue(self, maxsize=0):
'''Returns a queue object'''
from .queues import Queue
return Queue(maxsize, ctx=self.get_context())
When inheriting, you can replicate this in the __init__ method. This allows you to inherit the entire Queue behaviour. You only need to add the iterator methods:
from multiprocessing.queues import Queue
from multiprocessing import get_context
class IterableQueue(Queue):
"""
``multiprocessing.Queue`` that can be iterated to ``get`` values
:param sentinel: signal that no more items will be received
"""
def __init__(self, maxsize=0, *, ctx=None, sentinel=None):
self.sentinel = sentinel
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
self.put(self.sentinel)
# wait until buffer is flushed...
while self._buffer:
time.sleep(0.01)
# before shutting down the sender
super().close()
def __iter__(self):
return self
def __next__(self):
result = self.get()
if result == self.sentinel:
# re-queue sentinel for other listeners
self.put(result)
raise StopIteration
return result
Note that the sentinel to indicate end-of-queue is compared by equality, because identity is not preserved across processes. The often-used queue.Queue sentinel object() does not work properly with this.

Related

How could I create a docstring decorator in the presence of properties?

I have a collection of ever more specialized classes which correspond to collections of the same kind of data (temperature, density, etc) but for different drifts, for example, one subclass has dimensions (nx, ny) and a different suclass has dimensions (ncv), and I want to reflect that in the docstrings, for having a better documentation using Sphinx.
After reading many very useful threads here in Stack Overflow, I have arrived to this model:
import numpy as np
from functools import wraps
def class_decorator(cls):
import ipdb; ipdb.set_trace()
clsdict = {}
mro = cls.mro()
mro.reverse()
for tmp in mro[1:]: ##Ignore object class parent.
clsdict.update(tmp.__dict__)
for name, method in clsdict.items():
if hasattr(method, '__og_doc__'):
try:
method.__doc__ = method.__og_doc__.format(**clsdict)
except:
pass
else:
try:
method.__og_doc__ = method.__doc__
method.__doc__ = method.__doc__.format(**clsdict)
except:
pass
return cls
def mark_documentation(fn):
if not hasattr(fn, '__og_doc__'):
try:
fn.__og_doc__ = fn.__doc__
except:
pass
#wraps(fn)
def wrapped(*args, **kwargs):
return fn(*args, **kwargs)
return wrapped
def documented_property(fn):
if not hasattr(fn, '__og_doc__'):
try:
fn.__og_doc__ = fn.__doc__
except:
pass
#wraps(fn)
def wrapped(*args, **kwargs):
return fn(*args, **kwargs)
prp= property(wrapped)
prp.__og_doc__ = fn.__og_doc__
return prp
#class_decorator
class Base(object):
_GRID_DIM = 'nx, ny'
_TYPE = 'BaseData'
def __init__(self, name):
self.name = name
def shape(self):
""" This docstring contains the type '{_TYPE}' of class."""
print('Simple')
def operation(self, a, b, oper=np.sum, **kwargs):
""" Test for functions with args and kwargs in {_TYPE}"""
return oper([a,b])
#classmethod
def help(cls, var):
try:
print(get(cls, var).__doc__)
except:
print("No docstring yet.")
#class_decorator
class Advanced(Base):
_GRID_DIM = 'ncv'
_TYPE = 'AdvancedData'
def __init__(self,name):
super().__init__(name)
#property
#mark_documentation
# #documented_property
def arkansas(self):
"""({_GRID_DIM}, ns): Size of Arkansaw."""
return 'Yeah'
I am aiming to get the correctly formatted docstring when I call the help method or I use Sphinx, so that:
> adv = Advanced('ADV')
> adv.help("arkansas")
(ncv, ns): Size of Arkansaw.
> adv.help("operation")
Test for functions with args and kwargs in AdvancedData
I have managed to make it work so far, except for properties, because I assigned __og_doc__ to the function, but the property does not have that attribute. My last attempt at monkeypatching this, documented_property, fails because property is inmutable (as expected), and I cannot come up with any way to avoid this roadblock.
Is there any way around this problem?

Python3: Subclass of Process, call methods in run()

I've tried to extend the multiprocessing.Process class to use it in a Command pattern way... There is a scheduler instance, where the client invokes commands and calls for execution. But the command code never terminates after self.execute() is called. Here is the command class:
class Command(Process):
def __init__(self):
super().__init__()
self.result = None
self.command_name = type(self).__name__
self.shell = False
#from Process
def run(self):
super().run()
print("running "+self.command_name)
sys.stdout.flush()
self.execute()
print("finished "+self.command_name)
sys.stdout.flush()
sys.exit(0)
def execute(self):
pass
the idea is simple that each sub class of Command provides its own code in the execute() method. For instances:
class LoadCommand(Command):
def __init__(self,parameterA,...):
super().__init__()
...
def execute(self):
print("executing LoadCommand")
....
return
this is my scheduler:
class Scheduler:
_instance = None
_history_queue = []
_command_queue = []
_logger = None
#IPC, negative maxsize means infinite size
_pipe = Queue(maxsize=-1)
def __init__(self):
raise RuntimeError('Call getInstance() instead')
#classmethod
def getInstance(cls):
if cls._instance is None:
cls._instance = cls.__new__(cls)
return cls._instance
def getPipe(self):
print(self._pipe)
return self._pipe
def enqueueCommand(self,command):
# if isinstance(command,Command):
self._command_queue.append(command)
def executeQueue(self, synchronicMode):
while len(self._command_queue) > 0:
command = self._command_queue.pop(0)
command.start()
if synchronicMode:
#wait until this process is done
print("Waiting\n")
command.join(10)
if command.is_alive():
print("process isn't finished")
else:
print("process finished")
self._history_queue.append(command)
I've tried to call sys.exit(0) immediately after the run begins with success (process terminates). So maybe there is an error in the inheritance hierarchy, but I can't see it.

Pytests with context manager

I am trying to understand how to test Context managert with pytests.
I created some Class and need to count how much times was called static method do_dome_stuff
class Iterator():
def __init__(self):
pass
#staticmethod
def do_some_stuff():
pass
def __enter__(self):
return [i for i in range(10)]
def __exit__(self, *args):
return True
iterator = Iterator()
def f(iterator):
with iterator as i:
for _ in i:
iterator.do_some_stuff()
I have created py.test file and need to check if function was called 10 times. But my solution isn't working:
#pytest.fixture
def iterator():
return MagicMock(spec=Iterator)
def test_f(iterator):
f(iterator)
assert (iterator.do_some_stuff.call_count == 10)
Thanks in advance
The reason your code doesn't work, is that MagicMock(spec=Iterator) replaces the __enter__ method of your Iterator class by a MagicMock object, see the MagicMock documentation. This means that in your test, the value of i in function f is a MagicMock object instead of list(range(10)), so the code inside the for loop is never executed.
To make it work, you will probably only want to mock the do_some_stuff method:
#pytest.fixture
def iterator():
it = Iterator()
it.do_some_stuff = Mock()
return it
def test_f(iterator):
f(iterator)
assert (iterator.do_some_stuff.call_count == 10)

Apply decorator to all method of sub classes for timeit

I have a method decorator looking like
def debug_run(fn):
from functools import wraps
#wraps(fn)
def wrapper(self, *args, **kw):
# log some stuff
# timeit fn
res = fn(self, *args, **kw)
return wrapper
Right now I used to use it apply on each method that I want to debug. Now i'm trying to apply to all class method using a class decorator looking like.
Rather doing
class A():
#debug_run
def f(self):
pass
I do
#decallmethods(debug_run)
class A():
def f(self):
pass
def decallmethods(decorator):
def dectheclass(cls):
for name, m in inspect.getmembers(cls, inspect.ismethod):
if name in getattr(cls, 'METHODS_TO_INSPECT', []):
setattr(cls, name, decorator(m))
return cls
return dectheclass
Trying to apply to decorator to the base class, not working as expected. no log to the console. Now i wonder if this approach is the good or I should used something else (apply the debug decorator to selected method from base class to all sub classes).
[EDIT]
Finally found why no logs were printed
Why is there a difference between inspect.ismethod and inspect.isfunction from python 2 -> 3?
Here a complete example reflecting my code
import inspect
import time
import logging as logger
from functools import wraps
logger.basicConfig(format='LOGGER - %(asctime)s %(message)s', level=logger.DEBUG)
def debug_run(fn):
#wraps(fn)
def wrapper(self, *args, **kw):
logger.debug(
"call method %s of instance %s with %r and %s "
% (fn.__name__, self, args, kw))
time1 = time.time()
res = fn(self, *args, **kw)
time2 = time.time()
logger.debug(
"%s function %0.3f ms" % (fn, (time2-time1)*1000.0))
return res
return wrapper
def decallmethods(decorator):
def dectheclass(cls):
for name, m in inspect.getmembers(
cls, predicate=lambda x: inspect.isfunction(x) or inspect.ismethod(x)):
methods_to_inspect = getattr(cls, 'METHODS_TO_INSPECT', [])
if name in methods_to_inspect:
setattr(cls, name, decorator(m))
return cls
return dectheclass
class B(object):
METHODS_TO_INSPECT = ["bfoo1", "bfoo2", "foo"]
def __str__(self):
return "%s:%s" % (repr(self), id(self))
def bfoo1(self):
pass
def bfoo2(self):
pass
def foo(self):
pass
def run(self):
print("print - Base run doing nothing")
class C(object):
pass
#decallmethods(debug_run)
class A(B, C):
METHODS_TO_INSPECT = ["bfoo1", "bfoo2", "foo", "run"]
def foo(self):
print("print - A foo")
def run(self):
self.bfoo1()
self.bfoo2()
self.foo()
a = A()
b = B()
a.run()
b.run()
In this case applying decallmethods to B, will not affect the A so i must to apply to both A and B thus to all sub classes of B.
It is possible to have such mechanism that permit to apply decallmethods to all sub classes methods ?
look at this:
How can I decorate all functions of a class without typing it over and over for each method added? Python
delnan has a good answer,
only add this rule to his answer
if name in getattr(cls, 'METHODS_TO_INSPECT', []):

how to cache asyncio coroutines

I am using aiohttp to make a simple HTTP request in python 3.4 like this:
response = yield from aiohttp.get(url)
The application requests the same URL over and over again so naturally I wanted to cache it. My first attempt was something like this:
#functools.lru_cache(maxsize=128)
def cached_request(url):
return aiohttp.get(url)
The first call to cached_request works fine, but in later calls I end up with None instead of the response object.
I am rather new to asyncio so I tried a lot of combinations of the asyncio.coroutine decorator, yield from and some other things, but none seemed to work.
So how does caching coroutines work?
Maybe a bit late, but I've started a new package that may help: https://github.com/argaen/aiocache. Contributions/comments are always welcome.
An example:
import asyncio
from collections import namedtuple
from aiocache import cached
from aiocache.serializers import PickleSerializer
Result = namedtuple('Result', "content, status")
#cached(ttl=10, serializer=PickleSerializer())
async def async_main():
print("First ASYNC non cached call...")
await asyncio.sleep(1)
return Result("content", 200)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
print(loop.run_until_complete(async_main()))
print(loop.run_until_complete(async_main()))
print(loop.run_until_complete(async_main()))
print(loop.run_until_complete(async_main()))
Note that as an extra, it can cache any python object into redis using Pickle serialization. In case you just want to work with memory, you can use the SimpleMemoryCache backend :).
An popular async version of lru_cache exist here: async_lru
To use functools.lru_cache with coroutines, the following code works.
class Cacheable:
def __init__(self, co):
self.co = co
self.done = False
self.result = None
self.lock = asyncio.Lock()
def __await__(self):
with (yield from self.lock):
if self.done:
return self.result
self.result = yield from self.co.__await__()
self.done = True
return self.result
def cacheable(f):
def wrapped(*args, **kwargs):
r = f(*args, **kwargs)
return Cacheable(r)
return wrapped
#functools.lru_cache()
#cacheable
async def foo():
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()
The following is thread safe
class ThreadSafeCacheable:
def __init__(self, co):
self.co = co
self.done = False
self.result = None
self.lock = threading.Lock()
def __await__(self):
while True:
if self.done:
return self.result
if self.lock.acquire(blocking=False):
self.result = yield from self.co.__await__()
self.done = True
return self.result
else:
yield from asyncio.sleep(0.005)
I wrote a simple cache decorator myself:
def async_cache(maxsize=128):
cache = {}
def decorator(fn):
def wrapper(*args):
key = ':'.join(args)
if key not in cache:
if len(cache) >= maxsize:
del cache[cache.keys().next()]
cache[key] = yield from fn(*args)
return cache[key]
return wrapper
return decorator
#async_cache()
#asyncio.coroutine
def expensive_io():
....
This kind-of-works. But many aspects can probably be improved. For example: If the cached function is called a second time before the first call returns, it will execute a second time.
I'm not that familiar with aiohttp so I'm not sure of exactly what is happening that would cause Nones to be returned, but the lru_cache decorator will not work with async functions.
I use a decorator which does essentially the same thing; note that it is different to tobib's decorator above in that it will always return a future or a task, rather than the value:
from collections import OrderedDict
from functools import _make_key, wraps
def future_lru_cache(maxsize=128):
# support use as decorator without calling, for this case maxsize will
# not be an int
try:
real_max_size = int(maxsize)
except ValueError:
real_max_size = 128
cache = OrderedDict()
async def run_and_cache(func, args, kwargs):
"""Run func with the specified arguments and store the result
in cache."""
result = await func(*args, **kwargs)
cache[_make_key(args, kwargs, False)] = result
if len(cache) > real_max_size:
cache.popitem(False)
return result
def wrapper(func):
#wraps(func)
def decorator(*args, **kwargs):
key = _make_key(args, kwargs, False)
if key in cache:
# Some protection against duplicating calls already in
# progress: when starting the call cache the future, and if
# the same thing is requested again return that future.
if isinstance(cache[key], asyncio.Future):
return cache[key]
else:
f = asyncio.Future()
f.set_result(cache[key])
return f
else:
task = asyncio.Task(run_and_cache(func, args, kwargs))
cache[key] = task
return task
return decorator
if callable(maxsize):
return wrapper(maxsize)
else:
return wrapper
I used _make_key from functools as lru_cache does, I guess it's supposed to be private so probably better to copy it over.
This is how I think it's most easily done, using the built-in lru_cache and futures:
import asyncio
import functools
# parameterless decorator
def async_lru_cache_decorator(async_function):
#functools.lru_cache
def cached_async_function(*args, **kwargs):
coroutine = async_function(*args, **kwargs)
return asyncio.ensure_future(coroutine)
return cached_async_function
# decorator with options
def async_lru_cache(*lru_cache_args, **lru_cache_kwargs):
def async_lru_cache_decorator(async_function):
#functools.lru_cache(*lru_cache_args, **lru_cache_kwargs)
def cached_async_function(*args, **kwargs):
coroutine = async_function(*args, **kwargs)
return asyncio.ensure_future(coroutine)
return cached_async_function
return async_lru_cache_decorator
#async_lru_cache(maxsize=128)
async def your_async_function(...): ...
This is basically taking your original function and wrapping it so I can store the Coroutine it returns and convert it into a Future. This way, this can be treated as a regular function and you can lru_cache-it as you would usually do it.
Why is wrapping it in a Future necessary? Python coroutines are low level constructs and you can't await one more than once (You would get RuntimeError: cannot reuse already awaited coroutine). Futures, on the other hand, are handy and can be awaited consecutively and will return the same result.
One caveat is that caching a Future will also cache when the original functions raised an Error. The original lru_cache does not cache interrupted executions, so watch out for this edge case using the solution above.
Further tweaking can be done to merge both the parameter-less and the parameterized decorators, like the original lru_cache which supports both usages.
Another variant of lru decorator, which caches not yet finished coroutines, very useful with parallel requests to the same key:
import asyncio
from collections import OrderedDict
from functools import _make_key, wraps
def async_cache(maxsize=128, event_loop=None):
cache = OrderedDict()
if event_loop is None:
event_loop = asyncio.get_event_loop()
awaiting = dict()
async def run_and_cache(func, args, kwargs):
"""await func with the specified arguments and store the result
in cache."""
result = await func(*args, **kwargs)
key = _make_key(args, kwargs, False)
cache[key] = result
if len(cache) > maxsize:
cache.popitem(False)
cache.move_to_end(key)
return result
def decorator(func):
#wraps(func)
async def wrapper(*args, **kwargs):
key = _make_key(args, kwargs, False)
if key in cache:
return cache[key]
if key in awaiting:
task = awaiting[key]
return await asyncio.wait_for(task, timeout=None, loop=event_loop)
task = asyncio.ensure_future(run_and_cache(func, args, kwargs), loop=event_loop)
awaiting[key] = task
result = await asyncio.wait_for(task, timeout=None, loop=event_loop)
del awaiting[key]
return result
return wrapper
return decorator
async def test_async_cache(event_loop):
counter = 0
n, m = 10, 3
#async_cache(maxsize=n, event_loop=event_loop)
async def cached_function(x):
nonlocal counter
await asyncio.sleep(0) # making event loop switch to other coroutine
counter += 1
return x
tasks = [asyncio.ensure_future(cached_function(x), loop=event_loop)
for x in list(range(n)) * m]
done, pending = await asyncio.wait(tasks, loop=event_loop, timeout=1)
assert len(done) == n * m
assert counter == n
event_loop = asyncio.get_event_loop()
task = asyncio.ensure_future(test_async_cache(event_loop))
event_loop.run_until_complete(task)
I think that the simplest way is to use aiohttp_cache (documentation)
pip install aiohttp-cache
And use it in code:
from aiohttp_cache import cache, setup_cache
#cache() # <-- DECORATED FUNCTION
async def example_1(request):
return web.Response(text="Example")
app = web.Application()
app.router.add_route('GET', "/", example_1)
setup_cache(app) # <-- INITIALIZED aiohttp-cache
web.run_app(app, host="127.0.0.1")
Try async-cache :pypi async-cache :github for caching async functions in python.
It also supports function which have parameters of user defined or object type or unhashable type which is not supported in either functools.lru_cache or async_lru .
Usage:
pip install async-cache
from cache import AsyncLRU
#AsyncLRU(maxsize=128)
async def func(*args, **kwargs):
pass
I wrote a simple package named asyncio-cache - https://github.com/matan1008/asyncio-cache.
I tried to keep the code as close as possible to the original python implementation and as simple as possible.
For example:
from asyncio_cache import lru_cache
import aiohttp
#lru_cache(maxsize=128)
async def cached_get(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()

Resources