I've tried to extend the multiprocessing.Process class to use it in a Command pattern way... There is a scheduler instance, where the client invokes commands and calls for execution. But the command code never terminates after self.execute() is called. Here is the command class:
class Command(Process):
def __init__(self):
super().__init__()
self.result = None
self.command_name = type(self).__name__
self.shell = False
#from Process
def run(self):
super().run()
print("running "+self.command_name)
sys.stdout.flush()
self.execute()
print("finished "+self.command_name)
sys.stdout.flush()
sys.exit(0)
def execute(self):
pass
the idea is simple that each sub class of Command provides its own code in the execute() method. For instances:
class LoadCommand(Command):
def __init__(self,parameterA,...):
super().__init__()
...
def execute(self):
print("executing LoadCommand")
....
return
this is my scheduler:
class Scheduler:
_instance = None
_history_queue = []
_command_queue = []
_logger = None
#IPC, negative maxsize means infinite size
_pipe = Queue(maxsize=-1)
def __init__(self):
raise RuntimeError('Call getInstance() instead')
#classmethod
def getInstance(cls):
if cls._instance is None:
cls._instance = cls.__new__(cls)
return cls._instance
def getPipe(self):
print(self._pipe)
return self._pipe
def enqueueCommand(self,command):
# if isinstance(command,Command):
self._command_queue.append(command)
def executeQueue(self, synchronicMode):
while len(self._command_queue) > 0:
command = self._command_queue.pop(0)
command.start()
if synchronicMode:
#wait until this process is done
print("Waiting\n")
command.join(10)
if command.is_alive():
print("process isn't finished")
else:
print("process finished")
self._history_queue.append(command)
I've tried to call sys.exit(0) immediately after the run begins with success (process terminates). So maybe there is an error in the inheritance hierarchy, but I can't see it.
I found the default implementation of python's multiprocessing.Queue lacking, in that it's not iterable like any other collection. So I went about the effort of creating a 'subclass' of it, adding the feature in. As you can see from the code below, it's not a proper subclass, as multiprocess.Queue isn't a direct class itself, but a factory function, and the real underlying class is multiprocess.queues.Queue. I don't have the understanding nor effort to expend necessary to go about mimicking the factory function just so I can inherit from the class properly, so I simply had the new class create it's own instance from the factory and treat it as the superclass. Here is the code;
from multiprocessing import Queue, Value, Lock
import queue
class QueueClosed(Exception):
pass
class IterableQueue:
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
def close(self):
with self.close_lock:
self.closed.value = True
self.queue.close()
def put(self, elem, block=True, timeout=None):
with self.close_lock:
if self.closed.value:
raise QueueClosed()
else:
self.queue.put(elem, block, timeout)
def put_nowait(self, elem):
self.put(elem, False)
def get(self, block=True):
if not block:
return self.queue.get_nowait()
elif self.closed.value:
try:
return self.queue.get_nowait()
except queue.Empty:
return None
else:
val = None
while not self.closed.value:
try:
val = self.queue.get_nowait()
break
except queue.Empty:
pass
return val
def get_nowait(self):
return self.queue.get_nowait()
def join_thread(self):
return self.queue.join_thread()
def __iter__(self):
return self
def __next__(self):
val = self.get()
if val == None:
raise StopIteration()
else:
return val
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
This allows me to instantiate an IterableQueue object just like a normal multiprocessing.Queue, put elements into it like normal, and then inside child consumers, simply loop over it like so;
from iterable_queue import IterableQueue
from multiprocessing import Process, cpu_count
import os
def fib(n):
if n < 2:
return n
return fib(n-1) + fib(n-2)
def consumer(queue):
print(f"[{os.getpid()}] Consuming")
for i in queue:
print(f"[{os.getpid()}] < {i}")
n = fib(i)
print(f"[{os.getpid()}] {i} > {n}")
print(f"[{os.getpid()}] Closing")
def producer():
print("Enqueueing")
with IterableQueue() as queue:
procs = [Process(target=consumer, args=(queue,)) for _ in range(cpu_count())]
[p.start() for p in procs]
[queue.put(i) for i in range(36)]
print("Finished")
if __name__ == "__main__":
producer()
and it works almost seamlessly; the consumers exit the loop once the queue has been closed, but only after exhausting all remaining elements. However, I was unsatisfied with the lack of inherited methods. In an attempt to mimic actual inheritance behavior, I tried adding the following meta function call to the class;
def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
else:
return self.queue.__getattr__[name]
However, this fails when instances of the IterableQueue class are manipulated inside child multiprocessing.Process threads, as the class's __dict__ property is not preserved within them. I attempted to remedy this in a hacky manner by replacing the class's default __dict__ with a multiprocessing.Manager().dict(), like so;
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
self.__dict__ = Manager().dict(self.__dict__)
However on doing so, I received an error stating RuntimeError: Synchronized objects should only be shared between processes through inheritance. So my question is, how should I go about inheriting from the Queue class properly such that the subclass has inherited access to all of it's properties? In addition, while the queue is empty but not closed, the consumers all sit in a busy loop instead of a true IO block, taking up valuable cpu resources. If you have any suggestions on concurrency and race condition issues I might run into with this code, or how I might solve the busy loop issue, I'd be willing to take suggestions therein as well.
Based on code provided by MisterMiyagi, I created this general purpose IterableQueue class which can accept arbitrary input, blocks properly, and does not hang on queue close;
from multiprocessing.queues import Queue
from multiprocessing import get_context
class QueueClosed(Exception):
pass
class IterableQueue(Queue):
def __init__(self, maxsize=0, *, ctx=None):
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
super().put((None, False))
super().close()
def __iter__(self):
return self
def __next__(self):
try:
return self.get()
except QueueClosed:
raise StopIteration
def get(self, *args, **kwargs):
result, is_open = super().get(*args, **kwargs)
if not is_open:
super().put((None, False))
raise QueueClosed
return result
def put(self, val, *args, **kwargs):
super().put((val, True), *args, **kwargs)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
The multiprocess.Queue wrapper only serves to use the default context.
def Queue(self, maxsize=0):
'''Returns a queue object'''
from .queues import Queue
return Queue(maxsize, ctx=self.get_context())
When inheriting, you can replicate this in the __init__ method. This allows you to inherit the entire Queue behaviour. You only need to add the iterator methods:
from multiprocessing.queues import Queue
from multiprocessing import get_context
class IterableQueue(Queue):
"""
``multiprocessing.Queue`` that can be iterated to ``get`` values
:param sentinel: signal that no more items will be received
"""
def __init__(self, maxsize=0, *, ctx=None, sentinel=None):
self.sentinel = sentinel
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
self.put(self.sentinel)
# wait until buffer is flushed...
while self._buffer:
time.sleep(0.01)
# before shutting down the sender
super().close()
def __iter__(self):
return self
def __next__(self):
result = self.get()
if result == self.sentinel:
# re-queue sentinel for other listeners
self.put(result)
raise StopIteration
return result
Note that the sentinel to indicate end-of-queue is compared by equality, because identity is not preserved across processes. The often-used queue.Queue sentinel object() does not work properly with this.
I am following the :A Curious Course on Coroutines and Concurrency to learn coroutine, encounter problem to get the following codes running:
The code mimic an operating system to schedule tasks
from queue import Quue
class Task:
taskid = 0
def __init__(self, target):
Task.taskid += 1 #count the task
self.tid = Task.taskid
self.tartet = target
self.sendval = None
def run(self):
return self.target.send(self.sendval)
class Scheduler:
def __init__(self):
self.ready = Queue() # a queue of tasks that are ready to run.
self.taskmap = {} #dictionary that keeps track of all active tasks (each task has a unique integer task ID)
def new(self, target): #introduce a new task to the scheduler
newtask = Task(target)
self.taskmap[newtask.tid] = newtask
def schedule(self, task):
self.ready.put(task)
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
Test it with
def foo():
while True:
print("I'm foo")
yield
def bar():
while True:
print("I'm bar")
yield
It pending instead of return value
In [85]: schedule.new(foo())
In [86]: schedule.new(bar())
In [87]: schedule.mainloop()
^C---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
I review the codes and find problem with
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
while self.taskmap, but there is no methods to remove elements, so it is an infinite loop
I changed it to
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
However, it still not work.
What's the problem with my code.
I have a cycling threading method like this:
def make_periodic(self, method, period_sec, *args):
method(*args)
parameters = [method, period_sec] + list(args)
threading.Timer(period_sec, self.make_periodic, parameters).start()
What is the best way to stop the the cycling of a method of one type?
For example:
import threading
class TestThreading:
PERIOD = 5
def __init__(self):
self.number = 0
self.text = "t"
def method_1(self):
print self.number
self.number += 1
def method_2(self, text):
print self.text
self.text += text
def make_periodic(self, method, period_sec, *args):
method(*args)
parameters = [method, period_sec] + list(args)
threading.Timer(period_sec, self.make_periodic, parameters).start()
if __name__ == '__main__':
test = TestThreading()
test.make_periodic(test.method_1, TestThreading.PERIOD)
test.make_periodic(test.method_2, TestThreading.PERIOD, "t")
# stops the cycling of method_2, but the method_1 continues
test.stop_threading(test.method_2)
Try to keep a reference for each timer in a dictionary: my_dict["method_name"] = timer. In that case when you decide to stop the timer just call my_dict["method_name"].cancel().
I'm implementing an image downloader with the producer-consumer model. One thread is responsible for generating (url, filename) pairs and put them in queue. I want MAX_THREADS threads to pick the pairs and start downloading. Here are my threads:
class Extractor(Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs=None, verbose=None, items=None):
super(Extractor, self).__init__()
self.target = target
self.name = name
self.items = items
def run(self):
while True:
for item in self.items:
if not QUEUE.full():
QUEUE.put_nowait(extract(item))
logging.debug('Putting ' + str(item) + ' : ' + str(QUEUE.qsize()) + ' items in queue')
class Downloader(Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs=None, verbose=None):
super(Downloader, self).__init__()
self.target = target
self.name = name
self.seen = set()
def run(self):
while True:
if not QUEUE.empty():
pair = QUEUE.get_nowait()
# I have seen the URL
if pair[0] in self.seen:
continue
else:
# Never seen it before
self.seen.add(pair[0])
logging.debug('Downloading ' + str(pair[1]) + ' : ' + str(QUEUE.qsize()) + ' items in queue')
download_one_pic(pair)
if __name__ == '__main__':
items = None
items = crawl('username__', items)
worker_threads = []
producer = Extractor(name='Extractor', items=items)
producer.daemon = True
producer.start()
consumer = Downloader(name='Downloader[1]')
consumer2 = Downloader(name='Downloader[2]')
worker_threads.append(consumer)
worker_threads.append(consumer2)
for thread in worker_threads:
thread.start()
thread.join()
The queue has the max size of 50 and I want Producer thread to run regardless of other threads so I have it demonized. One thing is weird is that the consumer2 thread never gets started and I don't have any idea why. As in my log, only Downloader[1] does the job and the queue keeps fluctuating between 49 and 50 so I knew that the Downloader[2] never gets started.
Calling join() on a thread waits until it is complete before it returns. That loop at the end of your code will only ever execute once because the Downloader classes loop forever. Call start in one loop and then loop over them again to join() and wait there after all threads have been started