How to gracefully terminate a multithreaded Python application that uses queue.Queue - multithreading

I have been trying to get my application to terminate gracefully for quite some time now, but so far none of the answers I have found worked.
The sample code below illustrates the structure of my application. It basically is a chain of threads that passes data to one another using Queues.
from abc import abstractmethod
from time import sleep
from threading import Thread, Event
from queue import Queue
import signal
import sys
class StoppableThread(Thread):
def __init__(self):
super().__init__()
self.stopper = Event()
self.queue = Queue()
#abstractmethod
def actual_job(self):
pass
def stop_running(self):
self.stopper.set()
def run(self):
while not self.stopper.is_set():
print(self.stopper.is_set())
self.actual_job()
self.queue.join()
class SomeObjectOne(StoppableThread):
def __init__(self, name, some_object_two):
super().__init__()
self.name = name
self.obj_two = some_object_two
def actual_job(self):
# print('{} is currently running'.format(self.name))
input_string = 'some string'
print('{} outputs {}'.format(self.name, input_string))
self.obj_two.queue.put(input_string)
sleep(2)
class SomeObjectTwo(StoppableThread):
def __init__(self, name, some_object_three):
super().__init__()
self.name = name
self.some_object_three = some_object_three
def actual_job(self):
# print('{} is currently running'.format(self.name))
some_string = self.queue.get()
inverted = some_string[::-1]
print('{} outputs {}'.format(self.name , inverted))
self.some_object_three.queue.put(inverted)
sleep(2)
class SomeObjectThree(StoppableThread):
def __init__(self, name):
super().__init__()
self.name = name
def actual_job(self):
print('{} is currently running'.format(self.name))
some_string = self.queue.get()
print('{} outputs {}'.format(self.name ,some_string[::-1]))
sleep(2)
class ServiceExit(Exception):
"""
Custom exception which is used to trigger the clean exit
of all running threads and the main program.
"""
pass
def service_shutdown(signum, frame):
print('Caught signal %d' % signum)
raise ServiceExit
signal.signal(signal.SIGTERM, service_shutdown)
signal.signal(signal.SIGINT, service_shutdown)
if __name__ == '__main__':
thread_three = SomeObjectThree('SomeObjectThree')
thread_two = SomeObjectTwo('SomeObjectTwo', thread_three)
thread_one = SomeObjectOne('SomeObjectOne', thread_two)
try:
thread_three.start()
thread_two.start()
thread_one.start()
# Keep the main thread running, otherwise signals are ignored.
while True:
sleep(0.5)
except ServiceExit:
print('Running service exit')
thread_three.stop_running()
thread_two.stop_running()
thread_one.stop_running()
thread_one.join()
thread_two.join()
thread_three.join()
sys.exit(0)
Now, if I run this code and ctrl-C to terminate, thread_one seems to join as expected, but the code gets stuck at thread_two.join().
Because thread_one is the only thread with a continuous empty queue, I expect it has something to do with the queue.
Any ideas?

In the run() method of StoppableThread you have this:
self.queue.join()
join() is a blocking method:
Blocks until all items in the queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
queue. The count goes down whenever a consumer thread calls
task_done() to indicate that the item was retrieved and all work on it
is complete. When the count of unfinished tasks drops to zero, join()
unblocks.
So in order for join() to return, it's not enough to get() an item in the other thread, you must also indicate that it's been processed with task_done():
from abc import abstractmethod
from time import sleep
from threading import Thread, Event
from queue import Queue
import signal
import sys
class StoppableThread(Thread):
def __init__(self):
super().__init__()
self.stopper = Event()
self.queue = Queue()
#abstractmethod
def actual_job(self):
pass
def stop_running(self):
self.stopper.set()
def run(self):
while not self.stopper.is_set():
print(self.stopper.is_set())
self.actual_job()
self.queue.join()
class SomeObjectOne(StoppableThread):
def __init__(self, name, some_object_two):
super().__init__()
self.name = name
self.obj_two = some_object_two
def actual_job(self):
# print('{} is currently running'.format(self.name))
input_string = 'some string'
print('{} outputs {}'.format(self.name, input_string))
self.obj_two.queue.put(input_string)
sleep(2)
class SomeObjectTwo(StoppableThread):
def __init__(self, name, some_object_three):
super().__init__()
self.name = name
self.some_object_three = some_object_three
def actual_job(self):
# print('{} is currently running'.format(self.name))
some_string = self.queue.get()
inverted = some_string[::-1]
print('{} outputs {}'.format(self.name , inverted))
self.queue.task_done()
self.some_object_three.queue.put(inverted)
sleep(2)
class SomeObjectThree(StoppableThread):
def __init__(self, name):
super().__init__()
self.name = name
def actual_job(self):
print('{} is currently running'.format(self.name))
some_string = self.queue.get()
print('{} outputs {}'.format(self.name ,some_string[::-1]))
self.queue.task_done()
sleep(2)
class ServiceExit(Exception):
"""
Custom exception which is used to trigger the clean exit
of all running threads and the main program.
"""
pass
def service_shutdown(signum, frame):
print('Caught signal %d' % signum)
raise ServiceExit
signal.signal(signal.SIGTERM, service_shutdown)
signal.signal(signal.SIGINT, service_shutdown)
if __name__ == '__main__':
thread_three = SomeObjectThree('SomeObjectThree')
thread_two = SomeObjectTwo('SomeObjectTwo', thread_three)
thread_one = SomeObjectOne('SomeObjectOne', thread_two)
try:
thread_three.start()
thread_two.start()
thread_one.start()
# Keep the main thread running, otherwise signals are ignored.
while True:
sleep(0.5)
except ServiceExit:
print('Running service exit')
thread_three.stop_running()
thread_two.stop_running()
thread_one.stop_running()
thread_one.join()
thread_two.join()
thread_three.join()

Related

Python3: Subclass of Process, call methods in run()

I've tried to extend the multiprocessing.Process class to use it in a Command pattern way... There is a scheduler instance, where the client invokes commands and calls for execution. But the command code never terminates after self.execute() is called. Here is the command class:
class Command(Process):
def __init__(self):
super().__init__()
self.result = None
self.command_name = type(self).__name__
self.shell = False
#from Process
def run(self):
super().run()
print("running "+self.command_name)
sys.stdout.flush()
self.execute()
print("finished "+self.command_name)
sys.stdout.flush()
sys.exit(0)
def execute(self):
pass
the idea is simple that each sub class of Command provides its own code in the execute() method. For instances:
class LoadCommand(Command):
def __init__(self,parameterA,...):
super().__init__()
...
def execute(self):
print("executing LoadCommand")
....
return
this is my scheduler:
class Scheduler:
_instance = None
_history_queue = []
_command_queue = []
_logger = None
#IPC, negative maxsize means infinite size
_pipe = Queue(maxsize=-1)
def __init__(self):
raise RuntimeError('Call getInstance() instead')
#classmethod
def getInstance(cls):
if cls._instance is None:
cls._instance = cls.__new__(cls)
return cls._instance
def getPipe(self):
print(self._pipe)
return self._pipe
def enqueueCommand(self,command):
# if isinstance(command,Command):
self._command_queue.append(command)
def executeQueue(self, synchronicMode):
while len(self._command_queue) > 0:
command = self._command_queue.pop(0)
command.start()
if synchronicMode:
#wait until this process is done
print("Waiting\n")
command.join(10)
if command.is_alive():
print("process isn't finished")
else:
print("process finished")
self._history_queue.append(command)
I've tried to call sys.exit(0) immediately after the run begins with success (process terminates). So maybe there is an error in the inheritance hierarchy, but I can't see it.

Mocking REST APIs with Flask_restful using threading

I'm looking to mock a set of REST APIs for some tests. The following main() function works fine (i.e. it returns {"some-data": 1234} as json to the browser when I GET localhost:8099). The issue is it blocks the main thread:
from gevent import monkey, sleep, pywsgi
monkey.patch_all()
import flask
from flask_restful import reqparse, abort, Api, Resource
import queue
import sys
import threading
STUFFS = {"some-data": 1234}
class Stuff(Resource):
def get(self):
return flask.jsonify(STUFFS)
class ControlThread(threading.Thread):
def __init__(self, http_server, stop_event):
threading.Thread.__init__(self)
self.stop_event = stop_event
self.http_server = http_server
self.running = False
def run(self):
try:
while not self.stop_event.is_set():
if not self.running:
self.http_server.start()
self.running = True
sleep(0.001)
except (KeyboardInterrupt, SystemExit):
pass
self.http_server.stop()
class StuffMock:
def __init__(self, port, name=None):
if name is None:
name = __name__
self.app = flask.Flask(name)
self.api = Api(self.app)
self.api.add_resource(Stuff, "/stuff/")
self.stop_event = threading.Event()
self.http_server = pywsgi.WSGIServer(('', port), self.app)
self.serving_thread = ControlThread(self.http_server,
self.stop_event)
self.serving_thread.daemon = True
def start(self):
self.serving_thread.start()
def stop(self):
self.stop_event.set()
self.serving_thread.join()
def main():
mocker = StuffMock(8099)
mocker.start()
try:
while True:
sleep(0.01)
except (KeyboardInterrupt, SystemExit):
mocker.stop()
sys.exit()
if __name__ == "__main__":
main()
Without the sleep() call in the while loop above, nothing resolves. Here is a more succinct usage to demonstrate:
import time
from stuff_mock import StuffMock
mocker = StuffMock(8099)
mocker.start()
while True:
user_text = input("let's do some work on the main thread: ")
# will only resolve the GET request after user input
# (i.e. when the main thread executes this sleep call)
time.sleep(0.1)
if user_text == "q":
break
mocker.stop()
The gevent threading module seems to work differently from the core one. Does anyone have any tips or ideas about what's going on under the hood?
Found that if I switch out threading for multiprocessing (and threading.Thread for multiprocessing.Process), everything works as expected, and I can spin up arbitrary numbers of mockers without blocking.

Running a function on thread gives an error

I am trying to run a function "generate_model" on thread which takes three arguments.
def thread_for_generate_model(Thread):
def __init__(self, name, job_id, boolean_string, Batch_size):
self.name = name
self.job_id = job_id
self.boolean_string = boolean_string
self.Batch_size = Batch_size
def run(self):
LOGGER.info("vector model create started for job_id: %s on thread %s", self.job_id, self.name)
generate_model(self.job_id, self.boolean_string, self.Batch_size)
LOGGER.info("vector model created for job_id: %s", self.job_id)
def main():
....
thread_for_generate_model("Thread_for_vectormodel", job_id, generate_search_string(job_id,keywords), 5000).start()
# I am trying to run this function on a thread
# generate_model(job_id, generate_search_string(job_id,keywords), 5000)
....
I got an error ,
TypeError: thread_for_generate_model() takes 1 positional argument but 4 were given
by solution in the link, I have modified as below by adding an additional parameter
def run(self, event= None)
but still has the same error. how to rectify it?
Code below should do what you are trying to do - I have just added a few dummy functions etc. to get the code not throw syntax error or undefined functions/variables etc.. This is roughly the structure you can follow.
As pointed out in the comments - use def something to define a method. and class Something to define a class.
from threading import Thread
import logging
import time
LOGGER = logging.getLogger()
logging.basicConfig()
class thread_for_generate_model(Thread):
def __init__(self, name, job_id, boolean_string, Batch_size):
Thread.__init__(self)
self.name = name
self.job_id = job_id
self.boolean_string = boolean_string
self.Batch_size = Batch_size
def run(self):
LOGGER.info("vector model create started for job_id: %s on thread %s", self.job_id, self.name)
generate_model(self.job_id, self.boolean_string, self.Batch_size)
LOGGER.info("vector model created for job_id: %s", self.job_id)
def generate_search_string(job_id, keywords):
return False
def generate_model(job_id, string, batch_size):
while True:
time.sleep(1)
def main():
job_id = 0
keywords = ['a', 'b']
thread_for_generate_model("Thread_for_vectormodel", job_id, generate_search_string(job_id,keywords), 5000).start()
# I am trying to run this function on a thread
# generate_model(job_id, generate_search_string(job_id,keywords), 5000)
if __name__ == '__main__':
main()

What is the best way to run a Python function after some PyQt5 QThread classes finish work?

I'm using PyQt5 and Python3, I use 3 QThread classes to run something and after they are done I need to execute a 4th QThread class. But the execution of the 4th need to take place after all of the QThread classes finish work, or only 2 or only 1. It must not run while the first 3 are working.
I looked on the internet but I couldn't find a solution. My code looks like this:
class MyWindow(QtWidgets.QMainWindow):
def __init__(self):
QtWidgets.QMainWindow.__init__(self)
file_path = os.path.abspath('builder_gui.ui')
uic.loadUi(file_path, self)
self.obj1 = TasksThread1(self.comboBox.currentText(),self.comboBox_6.currentText())
self.obj2 = TasksThread2(self.comboBox_2.currentText(),self.comboBox_5.currentText())
self.obj3 = TasksThread3(self.comboBox_3.currentText(),self.comboBox_4.currentText())
self.obj4 = TasksThread4()
self.menubar.setNativeMenuBar(False)
self.progressVal = 1
self.cwd = os.getcwd()
self.obj1.newValueProgress.connect(self.increment_progress)
self.obj1.message.connect(self.status_bar)
self.obj2.newValueProgress.connect(self.increment_progress)
self.obj2.message.connect(self.status_bar)
self.obj3.newValueProgress.connect(self.increment_progress)
self.obj3.message.connect(self.status_bar)
self.obj4.newValueProgress.connect(self.increment_progress)
self.obj4.message.connect(self.status_bar)
self.obj4.doneSignal.connect(self.calculate_done_limit)
self.pushButton.pressed.connect(self.execute_build_script)
def calculate_done_limit(self):
limitCalc = 100 - int(self.progressBar.value())
self.increment_progress(limitCalc)
def run_gits_all(self):
if self.crowdTwistCheck.isChecked():
self.obj1.start()
else:
pass
if self.ThemeCheck.isChecked():
self.obj2.start()
else:
pass
if self.mainAwsCheck.isChecked():
self.obj3.start()
else:
pass
def execute_build_script(self):
self.progressBar.setValue(1)
self.progressVal = 1
self.run_gits_all()
def execute_last_part(self):
self.obj4.start()
def status_bar(self, value_in):
read1 = self.textBrowser.toPlainText()
self.textBrowser.setText(read1 + "\n" + value_in)
def increment_progress(self,valueIn):
self.progressVal += valueIn
self.progressBar.setValue(self.progressVal)
if __name__ == '__main__':
import sys
app = QtWidgets.QApplication(sys.argv)
window = MyWindow()
window.show()
sys.exit(app.exec_())
My first 3 QThreads are like this:
class TasksThread1(QThread):
newValueProgress = QtCore.pyqtSignal(int)
message = QtCore.pyqtSignal(str)
doneSignal = QtCore.pyqtSignal()
def __init__(self, branch, git):
QThread.__init__(self)
self.branch = branch
self.git = git
def remove_folder(self):
do_something_1
def CrowdTwistRepo(self):
do_something_2
def run(self):
self.remove_folder()
self.CrowdTwistRepo()
My last QThread looks like this:
class TasksThread4(QThread):
newValueProgress = QtCore.pyqtSignal(int)
message = QtCore.pyqtSignal(str)
doneSignal = QtCore.pyqtSignal()
def __init__(self):
QThread.__init__(self)
def gulp_sass_function(self):
do_something_1
def gulp_uglify_function(self):
do_something_2
def zipping_function(self):
do_something_3
def run(self):
self.gulp_sass_function()
self.gulp_uglify_function()
self.zipping_function()
If I run the code, all of the QThreads start and I want my 4th QThread to start only after the first 3 have done working. I used QThreads to improve the GUI experience, the GUI froze alot.
thanks,
When your first 3 threads are done, send a signal. Then connect this signal to a function that will start the last thread.

how to check if asyncio loop has any associated sockets

asyncio.Task.all_tasks() gives a list of all tasks for an event loop, but I can't find anything similar for sockets, and in particular, datagram sockets associated with a loop?
The absence of sockets & tasks could then signal "end of life" for the loop.
The question is, in the following example, what to put in loop_not_empty() that makes it return False when the task set is empty and there are no associated sockets (ie after two seconds)
Example:
import asyncio
import socket
import threading
class Handler(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
print("connection made")
def datagram_received(self, data, addr):
if data == b'die':
print("shutting down")
self.transport.abort()
#asyncio.coroutine
def sometask():
yield from asyncio.sleep(1)
print("task done")
def loop_not_empty(l):
# if asyncio.Task.all_tasks() == set() and WHAT_GOES_HERE
# return False
return True
def main():
a,b = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM)
l = asyncio.get_event_loop()
asyncio.ensure_future(sometask(), loop=l)
asyncio.ensure_future(l.create_datagram_endpoint(Handler, sock=a), loop=l)
threading.Timer(2, lambda: b.send(b'die')).start()
while loop_not_empty(l):
l.run_until_complete(asyncio.sleep(1, loop=l))
main()
Here is a solution that uses a simple class and asyncio.Event() to count the number of active jobs and signals the loop to stop when all jobs are done:
import asyncio
import random
class UseCounter:
def __init__(self, loop=None):
self.loop = loop
self.event = asyncio.Event(loop=loop)
self.n = 0 # The number of active jobs
def __enter__(self):
self.enter()
def __exit__(self, exc_type, exc_val, exc_tb):
self.exit()
def enter(self):
self.n += 1
def exit(self):
self.n -= 1
if self.n == 0:
self.event.set()
async def wait(self):
return await self.event.wait()
async def my_coroutine(counter, term):
with counter:
print("start", term)
n = random.uniform(0.2, 1.5)
await asyncio.sleep(n)
print("end", term)
loop = asyncio.get_event_loop()
counter = UseCounter(loop)
terms = ["apple", "banana", "melon"]
for term in terms:
asyncio.ensure_future(my_coroutine(counter, term))
loop.run_until_complete(counter.wait())
loop.close()
For your example above, add .enter() to connection_made() and .exit() to connection_lost().

Resources