I've tried to extend the multiprocessing.Process class to use it in a Command pattern way... There is a scheduler instance, where the client invokes commands and calls for execution. But the command code never terminates after self.execute() is called. Here is the command class:
class Command(Process):
def __init__(self):
super().__init__()
self.result = None
self.command_name = type(self).__name__
self.shell = False
#from Process
def run(self):
super().run()
print("running "+self.command_name)
sys.stdout.flush()
self.execute()
print("finished "+self.command_name)
sys.stdout.flush()
sys.exit(0)
def execute(self):
pass
the idea is simple that each sub class of Command provides its own code in the execute() method. For instances:
class LoadCommand(Command):
def __init__(self,parameterA,...):
super().__init__()
...
def execute(self):
print("executing LoadCommand")
....
return
this is my scheduler:
class Scheduler:
_instance = None
_history_queue = []
_command_queue = []
_logger = None
#IPC, negative maxsize means infinite size
_pipe = Queue(maxsize=-1)
def __init__(self):
raise RuntimeError('Call getInstance() instead')
#classmethod
def getInstance(cls):
if cls._instance is None:
cls._instance = cls.__new__(cls)
return cls._instance
def getPipe(self):
print(self._pipe)
return self._pipe
def enqueueCommand(self,command):
# if isinstance(command,Command):
self._command_queue.append(command)
def executeQueue(self, synchronicMode):
while len(self._command_queue) > 0:
command = self._command_queue.pop(0)
command.start()
if synchronicMode:
#wait until this process is done
print("Waiting\n")
command.join(10)
if command.is_alive():
print("process isn't finished")
else:
print("process finished")
self._history_queue.append(command)
I've tried to call sys.exit(0) immediately after the run begins with success (process terminates). So maybe there is an error in the inheritance hierarchy, but I can't see it.
Related
I have a long running task, which for example's sake I have made an infinite while loop:
def long_task(parent, progress_callback):
top = 100000
x = 0
while True:
if x < top:
if not parent.stop:
progress_callback.emit(x)
x += 1
else:
break
else:
x = 0
progress_callback.emit(x)
x += 1
I have a Worker class that subclasses QRunnable, and then I can override the run() method with whatever function is passed to the Worker.
class ThreadWorker(QtCore.QRunnable):
def __init__(self, fn, *args, **kwargs):
super(ThreadWorker, self).__init__()
self.fn = fn
self.args = args
self.kwargs = kwargs
self.signals = ThreadWorkerSignals()
self.kwargs['progress_callback'] = self.signals.progress
self.running = False
#QtCore.pyqtSlot()
def run(self):
self.running = True
try:
result = self.fn(*self.args, **self.kwargs)
except:
traceback.print_exc()
exctype, value = sys.exc_info()[:2]
self.signals.error.emit((exctype, value, traceback.format_exc()))
else:
self.signals.result.emit(result) # Return the result of the processing
finally:
self.signals.finished.emit() # Done
I create two instances of Worker within my MainWindow, and pass the same long-running task to each worker. Both workers are added to my MainWindow's QThreadPool and then I call start(worker) on each to begin the worker's run() method. I now have two threads running the infinite loop:
class MainWindow(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
## NOT SHOWING THE REST OF INIT CODE
def create_workers(self):
self.worker1 = ThreadWorker(self.long_task, parent=self)
self.worker1.signals.progress.connect(lambda x: self.long_label_1.setText(str(x)))
self.worker2 = ThreadWorker(self.long_task, parent=self)
self.worker2.signals.progress.connect(lambda x: self.long_label_2.setText(str(x)))
self.threadpool.start(self.worker1)
self.threadpool.start(self.worker2)
self.stop = False
Please note the self.stop attribute above - this also belongs to the MainWindow class.
All I want to do is break the loop (interrupt the run() method of a worker) when I press a button.
As you can see, I am referencing parent.stop during every iteration of the worker's while loop. Right now, if I press my button, MainWindow's stop attribute turns True and the loop breaks when the worker class sees this change.
def stop_tasks(self):
self.stop = True
This works fine and accomplishes my goal, but I am wondering if this is dangerous and if there is a better way to do this? I only ask because it seems risky to reference an outside class attribute from within a separate running thread, and I don't know what could go wrong.
I found the default implementation of python's multiprocessing.Queue lacking, in that it's not iterable like any other collection. So I went about the effort of creating a 'subclass' of it, adding the feature in. As you can see from the code below, it's not a proper subclass, as multiprocess.Queue isn't a direct class itself, but a factory function, and the real underlying class is multiprocess.queues.Queue. I don't have the understanding nor effort to expend necessary to go about mimicking the factory function just so I can inherit from the class properly, so I simply had the new class create it's own instance from the factory and treat it as the superclass. Here is the code;
from multiprocessing import Queue, Value, Lock
import queue
class QueueClosed(Exception):
pass
class IterableQueue:
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
def close(self):
with self.close_lock:
self.closed.value = True
self.queue.close()
def put(self, elem, block=True, timeout=None):
with self.close_lock:
if self.closed.value:
raise QueueClosed()
else:
self.queue.put(elem, block, timeout)
def put_nowait(self, elem):
self.put(elem, False)
def get(self, block=True):
if not block:
return self.queue.get_nowait()
elif self.closed.value:
try:
return self.queue.get_nowait()
except queue.Empty:
return None
else:
val = None
while not self.closed.value:
try:
val = self.queue.get_nowait()
break
except queue.Empty:
pass
return val
def get_nowait(self):
return self.queue.get_nowait()
def join_thread(self):
return self.queue.join_thread()
def __iter__(self):
return self
def __next__(self):
val = self.get()
if val == None:
raise StopIteration()
else:
return val
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
This allows me to instantiate an IterableQueue object just like a normal multiprocessing.Queue, put elements into it like normal, and then inside child consumers, simply loop over it like so;
from iterable_queue import IterableQueue
from multiprocessing import Process, cpu_count
import os
def fib(n):
if n < 2:
return n
return fib(n-1) + fib(n-2)
def consumer(queue):
print(f"[{os.getpid()}] Consuming")
for i in queue:
print(f"[{os.getpid()}] < {i}")
n = fib(i)
print(f"[{os.getpid()}] {i} > {n}")
print(f"[{os.getpid()}] Closing")
def producer():
print("Enqueueing")
with IterableQueue() as queue:
procs = [Process(target=consumer, args=(queue,)) for _ in range(cpu_count())]
[p.start() for p in procs]
[queue.put(i) for i in range(36)]
print("Finished")
if __name__ == "__main__":
producer()
and it works almost seamlessly; the consumers exit the loop once the queue has been closed, but only after exhausting all remaining elements. However, I was unsatisfied with the lack of inherited methods. In an attempt to mimic actual inheritance behavior, I tried adding the following meta function call to the class;
def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
else:
return self.queue.__getattr__[name]
However, this fails when instances of the IterableQueue class are manipulated inside child multiprocessing.Process threads, as the class's __dict__ property is not preserved within them. I attempted to remedy this in a hacky manner by replacing the class's default __dict__ with a multiprocessing.Manager().dict(), like so;
def __init__(self, maxsize=0):
self.closed = Value('b', False)
self.close_lock = Lock()
self.queue = Queue(maxsize)
self.__dict__ = Manager().dict(self.__dict__)
However on doing so, I received an error stating RuntimeError: Synchronized objects should only be shared between processes through inheritance. So my question is, how should I go about inheriting from the Queue class properly such that the subclass has inherited access to all of it's properties? In addition, while the queue is empty but not closed, the consumers all sit in a busy loop instead of a true IO block, taking up valuable cpu resources. If you have any suggestions on concurrency and race condition issues I might run into with this code, or how I might solve the busy loop issue, I'd be willing to take suggestions therein as well.
Based on code provided by MisterMiyagi, I created this general purpose IterableQueue class which can accept arbitrary input, blocks properly, and does not hang on queue close;
from multiprocessing.queues import Queue
from multiprocessing import get_context
class QueueClosed(Exception):
pass
class IterableQueue(Queue):
def __init__(self, maxsize=0, *, ctx=None):
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
super().put((None, False))
super().close()
def __iter__(self):
return self
def __next__(self):
try:
return self.get()
except QueueClosed:
raise StopIteration
def get(self, *args, **kwargs):
result, is_open = super().get(*args, **kwargs)
if not is_open:
super().put((None, False))
raise QueueClosed
return result
def put(self, val, *args, **kwargs):
super().put((val, True), *args, **kwargs)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
The multiprocess.Queue wrapper only serves to use the default context.
def Queue(self, maxsize=0):
'''Returns a queue object'''
from .queues import Queue
return Queue(maxsize, ctx=self.get_context())
When inheriting, you can replicate this in the __init__ method. This allows you to inherit the entire Queue behaviour. You only need to add the iterator methods:
from multiprocessing.queues import Queue
from multiprocessing import get_context
class IterableQueue(Queue):
"""
``multiprocessing.Queue`` that can be iterated to ``get`` values
:param sentinel: signal that no more items will be received
"""
def __init__(self, maxsize=0, *, ctx=None, sentinel=None):
self.sentinel = sentinel
super().__init__(
maxsize=maxsize,
ctx=ctx if ctx is not None else get_context()
)
def close(self):
self.put(self.sentinel)
# wait until buffer is flushed...
while self._buffer:
time.sleep(0.01)
# before shutting down the sender
super().close()
def __iter__(self):
return self
def __next__(self):
result = self.get()
if result == self.sentinel:
# re-queue sentinel for other listeners
self.put(result)
raise StopIteration
return result
Note that the sentinel to indicate end-of-queue is compared by equality, because identity is not preserved across processes. The often-used queue.Queue sentinel object() does not work properly with this.
I am trying to run a function "generate_model" on thread which takes three arguments.
def thread_for_generate_model(Thread):
def __init__(self, name, job_id, boolean_string, Batch_size):
self.name = name
self.job_id = job_id
self.boolean_string = boolean_string
self.Batch_size = Batch_size
def run(self):
LOGGER.info("vector model create started for job_id: %s on thread %s", self.job_id, self.name)
generate_model(self.job_id, self.boolean_string, self.Batch_size)
LOGGER.info("vector model created for job_id: %s", self.job_id)
def main():
....
thread_for_generate_model("Thread_for_vectormodel", job_id, generate_search_string(job_id,keywords), 5000).start()
# I am trying to run this function on a thread
# generate_model(job_id, generate_search_string(job_id,keywords), 5000)
....
I got an error ,
TypeError: thread_for_generate_model() takes 1 positional argument but 4 were given
by solution in the link, I have modified as below by adding an additional parameter
def run(self, event= None)
but still has the same error. how to rectify it?
Code below should do what you are trying to do - I have just added a few dummy functions etc. to get the code not throw syntax error or undefined functions/variables etc.. This is roughly the structure you can follow.
As pointed out in the comments - use def something to define a method. and class Something to define a class.
from threading import Thread
import logging
import time
LOGGER = logging.getLogger()
logging.basicConfig()
class thread_for_generate_model(Thread):
def __init__(self, name, job_id, boolean_string, Batch_size):
Thread.__init__(self)
self.name = name
self.job_id = job_id
self.boolean_string = boolean_string
self.Batch_size = Batch_size
def run(self):
LOGGER.info("vector model create started for job_id: %s on thread %s", self.job_id, self.name)
generate_model(self.job_id, self.boolean_string, self.Batch_size)
LOGGER.info("vector model created for job_id: %s", self.job_id)
def generate_search_string(job_id, keywords):
return False
def generate_model(job_id, string, batch_size):
while True:
time.sleep(1)
def main():
job_id = 0
keywords = ['a', 'b']
thread_for_generate_model("Thread_for_vectormodel", job_id, generate_search_string(job_id,keywords), 5000).start()
# I am trying to run this function on a thread
# generate_model(job_id, generate_search_string(job_id,keywords), 5000)
if __name__ == '__main__':
main()
I have been trying to get my application to terminate gracefully for quite some time now, but so far none of the answers I have found worked.
The sample code below illustrates the structure of my application. It basically is a chain of threads that passes data to one another using Queues.
from abc import abstractmethod
from time import sleep
from threading import Thread, Event
from queue import Queue
import signal
import sys
class StoppableThread(Thread):
def __init__(self):
super().__init__()
self.stopper = Event()
self.queue = Queue()
#abstractmethod
def actual_job(self):
pass
def stop_running(self):
self.stopper.set()
def run(self):
while not self.stopper.is_set():
print(self.stopper.is_set())
self.actual_job()
self.queue.join()
class SomeObjectOne(StoppableThread):
def __init__(self, name, some_object_two):
super().__init__()
self.name = name
self.obj_two = some_object_two
def actual_job(self):
# print('{} is currently running'.format(self.name))
input_string = 'some string'
print('{} outputs {}'.format(self.name, input_string))
self.obj_two.queue.put(input_string)
sleep(2)
class SomeObjectTwo(StoppableThread):
def __init__(self, name, some_object_three):
super().__init__()
self.name = name
self.some_object_three = some_object_three
def actual_job(self):
# print('{} is currently running'.format(self.name))
some_string = self.queue.get()
inverted = some_string[::-1]
print('{} outputs {}'.format(self.name , inverted))
self.some_object_three.queue.put(inverted)
sleep(2)
class SomeObjectThree(StoppableThread):
def __init__(self, name):
super().__init__()
self.name = name
def actual_job(self):
print('{} is currently running'.format(self.name))
some_string = self.queue.get()
print('{} outputs {}'.format(self.name ,some_string[::-1]))
sleep(2)
class ServiceExit(Exception):
"""
Custom exception which is used to trigger the clean exit
of all running threads and the main program.
"""
pass
def service_shutdown(signum, frame):
print('Caught signal %d' % signum)
raise ServiceExit
signal.signal(signal.SIGTERM, service_shutdown)
signal.signal(signal.SIGINT, service_shutdown)
if __name__ == '__main__':
thread_three = SomeObjectThree('SomeObjectThree')
thread_two = SomeObjectTwo('SomeObjectTwo', thread_three)
thread_one = SomeObjectOne('SomeObjectOne', thread_two)
try:
thread_three.start()
thread_two.start()
thread_one.start()
# Keep the main thread running, otherwise signals are ignored.
while True:
sleep(0.5)
except ServiceExit:
print('Running service exit')
thread_three.stop_running()
thread_two.stop_running()
thread_one.stop_running()
thread_one.join()
thread_two.join()
thread_three.join()
sys.exit(0)
Now, if I run this code and ctrl-C to terminate, thread_one seems to join as expected, but the code gets stuck at thread_two.join().
Because thread_one is the only thread with a continuous empty queue, I expect it has something to do with the queue.
Any ideas?
In the run() method of StoppableThread you have this:
self.queue.join()
join() is a blocking method:
Blocks until all items in the queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
queue. The count goes down whenever a consumer thread calls
task_done() to indicate that the item was retrieved and all work on it
is complete. When the count of unfinished tasks drops to zero, join()
unblocks.
So in order for join() to return, it's not enough to get() an item in the other thread, you must also indicate that it's been processed with task_done():
from abc import abstractmethod
from time import sleep
from threading import Thread, Event
from queue import Queue
import signal
import sys
class StoppableThread(Thread):
def __init__(self):
super().__init__()
self.stopper = Event()
self.queue = Queue()
#abstractmethod
def actual_job(self):
pass
def stop_running(self):
self.stopper.set()
def run(self):
while not self.stopper.is_set():
print(self.stopper.is_set())
self.actual_job()
self.queue.join()
class SomeObjectOne(StoppableThread):
def __init__(self, name, some_object_two):
super().__init__()
self.name = name
self.obj_two = some_object_two
def actual_job(self):
# print('{} is currently running'.format(self.name))
input_string = 'some string'
print('{} outputs {}'.format(self.name, input_string))
self.obj_two.queue.put(input_string)
sleep(2)
class SomeObjectTwo(StoppableThread):
def __init__(self, name, some_object_three):
super().__init__()
self.name = name
self.some_object_three = some_object_three
def actual_job(self):
# print('{} is currently running'.format(self.name))
some_string = self.queue.get()
inverted = some_string[::-1]
print('{} outputs {}'.format(self.name , inverted))
self.queue.task_done()
self.some_object_three.queue.put(inverted)
sleep(2)
class SomeObjectThree(StoppableThread):
def __init__(self, name):
super().__init__()
self.name = name
def actual_job(self):
print('{} is currently running'.format(self.name))
some_string = self.queue.get()
print('{} outputs {}'.format(self.name ,some_string[::-1]))
self.queue.task_done()
sleep(2)
class ServiceExit(Exception):
"""
Custom exception which is used to trigger the clean exit
of all running threads and the main program.
"""
pass
def service_shutdown(signum, frame):
print('Caught signal %d' % signum)
raise ServiceExit
signal.signal(signal.SIGTERM, service_shutdown)
signal.signal(signal.SIGINT, service_shutdown)
if __name__ == '__main__':
thread_three = SomeObjectThree('SomeObjectThree')
thread_two = SomeObjectTwo('SomeObjectTwo', thread_three)
thread_one = SomeObjectOne('SomeObjectOne', thread_two)
try:
thread_three.start()
thread_two.start()
thread_one.start()
# Keep the main thread running, otherwise signals are ignored.
while True:
sleep(0.5)
except ServiceExit:
print('Running service exit')
thread_three.stop_running()
thread_two.stop_running()
thread_one.stop_running()
thread_one.join()
thread_two.join()
thread_three.join()
asyncio.Task.all_tasks() gives a list of all tasks for an event loop, but I can't find anything similar for sockets, and in particular, datagram sockets associated with a loop?
The absence of sockets & tasks could then signal "end of life" for the loop.
The question is, in the following example, what to put in loop_not_empty() that makes it return False when the task set is empty and there are no associated sockets (ie after two seconds)
Example:
import asyncio
import socket
import threading
class Handler(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
print("connection made")
def datagram_received(self, data, addr):
if data == b'die':
print("shutting down")
self.transport.abort()
#asyncio.coroutine
def sometask():
yield from asyncio.sleep(1)
print("task done")
def loop_not_empty(l):
# if asyncio.Task.all_tasks() == set() and WHAT_GOES_HERE
# return False
return True
def main():
a,b = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM)
l = asyncio.get_event_loop()
asyncio.ensure_future(sometask(), loop=l)
asyncio.ensure_future(l.create_datagram_endpoint(Handler, sock=a), loop=l)
threading.Timer(2, lambda: b.send(b'die')).start()
while loop_not_empty(l):
l.run_until_complete(asyncio.sleep(1, loop=l))
main()
Here is a solution that uses a simple class and asyncio.Event() to count the number of active jobs and signals the loop to stop when all jobs are done:
import asyncio
import random
class UseCounter:
def __init__(self, loop=None):
self.loop = loop
self.event = asyncio.Event(loop=loop)
self.n = 0 # The number of active jobs
def __enter__(self):
self.enter()
def __exit__(self, exc_type, exc_val, exc_tb):
self.exit()
def enter(self):
self.n += 1
def exit(self):
self.n -= 1
if self.n == 0:
self.event.set()
async def wait(self):
return await self.event.wait()
async def my_coroutine(counter, term):
with counter:
print("start", term)
n = random.uniform(0.2, 1.5)
await asyncio.sleep(n)
print("end", term)
loop = asyncio.get_event_loop()
counter = UseCounter(loop)
terms = ["apple", "banana", "melon"]
for term in terms:
asyncio.ensure_future(my_coroutine(counter, term))
loop.run_until_complete(counter.wait())
loop.close()
For your example above, add .enter() to connection_made() and .exit() to connection_lost().