Producers - consumers in python - multithreading

For the producer-consumer problem I have come up with this solution:
import threading
import random
import time
class Bucket:
def __init__(self, size):
self.size = size
self.current_size = 0
self.cond_var = threading.Condition()
def available_for_put(self):
return self.current_size < self.size
def available_for_get(self):
return self.current_size > 0
def put(self):
self.current_size = self.current_size + 1
print(self)
self.cond_var.notify_all()
def get(self):
self.current_size = self.current_size - 1
print(self)
self.cond_var.notify_all()
def acquire(self):
self.cond_var.acquire()
def release(self):
self.cond_var.release()
def wait(self):
self.cond_var.wait()
def __str__(self):
return "Size is {0}".format(self.current_size)
class Worker(threading.Thread):
PRODUCER = 1
CONSUMER = 0
def __init__(self, bucket, kind):
threading.Thread.__init__(self)
self.kind = kind
self.bucket = bucket
def run(self):
while(1):
self.bucket.acquire()
while(((self.kind == Worker.PRODUCER) and (not self.bucket.available_for_put())) or \
((self.kind == Worker.CONSUMER) and (not self.bucket.available_for_get()))):
self.bucket.wait()
### self.bucket.acquire()
if self.kind == Worker.PRODUCER:
self.bucket.put()
else:
self.bucket.get()
time.sleep(0.1)
self.bucket.release()
bucket = Bucket(10)
workers = []
for i in range(10):
workers.append(Worker(bucket, i % 2))
for w in workers:
w.start()
print("Thread started")
for w in workers:
w.join()
Apparently if I remove the while(1) loop and make every thread run the block inside the loop only once it reaches a deadlock and I can't understand why.

The Producer-Consumer pattern can be readily implemented using Python's built in Queue support:
queue - a synchronised queue class
This could simplify your code. Also very useful is the scheduler:
schedule - event scheduler
And since your question is tagged with Python-3.x, you should definitely have a look at the concurrent.futures module:
futures - launching parallel tasks
Your workers could be tasks and the bucket could become a queue.

Apparently the problem is that after waking up from wait you reacquire the lock and thus the now commented out acquire will block.

Related

Producer Consumer message sharing not working in multiprocessing

i am trying to run a scenario where i have a producer which is capturing frames from webcam and putting it in a queue.
and then consumer reads image from input queue and does some processing and puts o/p image in outgoing queue.
Issue is, consumer read from queue is not blocking. Ideally it should be, also when it reads value from queue, size is always constant 128, which is wrong. I am sure size of image that I am putting in queue is far greater.
from __future__ import print_function
import multiprocessing
import time
import logging
import sys
import cv2
class Consumer(multiprocessing.Process):
def __init__(self, incoming_q, outgoing_q):
multiprocessing.Process.__init__(self)
self.outgoing_q = outgoing_q
self.incoming_q = incoming_q
def run(self):
proc_name = self.name
print(f"{proc_name} - inside process_feed..starting")
while True:
#print(f"size of incoming_q=>{self.incoming_q.qsize()}")
try:
#print(f"{proc_name} - size of B incoming_q=>{self.incoming_q.qsize()}")
image_np = self.incoming_q.get(True)
size_of_img = sys.getsizeof(image_np)
#print(f"{proc_name} - size of A incoming_q=>{self.incoming_q.qsize()}")
if size_of_img > 128:
print(f"{proc_name} - size image=>{size_of_img}")
time.sleep(1)
self.outgoing_q.put_nowait(image_np)
except:
pass
print("inside process_feed..ending")
class Producer(multiprocessing.Process):
def __init__(self, incoming_q, outgoing_q):
multiprocessing.Process.__init__(self)
self.incoming_q = incoming_q
self.outgoing_q = outgoing_q
def run(self):
proc_name = self.name
print("inside capture_feed")
stream = cv2.VideoCapture(0)
try:
counter = 0
while True:
counter += 1
if counter == 1:
if not self.incoming_q.full():
(grabbed, image_np) = stream.read()
size_of_img = sys.getsizeof(image_np)
print(f"{proc_name}........B.......=>{self.incoming_q.qsize()}")
print(f"{proc_name} - size image=>{size_of_img}")
self.incoming_q.put(image_np)
print(f"{proc_name}........A.......=>{self.incoming_q.qsize()}")
counter = 0
try:
image_np = self.outgoing_q.get_nowait()
logging.info("reading value for o/p")
cv2.imshow('object detection', image_np)
except:
pass
if cv2.waitKey(25) & 0xFF == ord('q'):
break
finally:
stream.release()
cv2.destroyAllWindows()
print("inside capture_feed..ending")
if __name__ == '__main__':
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
stream = cv2.VideoCapture(0)
incoming_q = multiprocessing.Queue(maxsize=100)
outgoing_q = multiprocessing.Queue(maxsize=100)
logging.info("before start of thread")
max_process = 1
processes = []
processes.append(Producer(incoming_q, outgoing_q))
for i in range(max_process):
p = Consumer(incoming_q, outgoing_q)
p.daemon = True
processes.append(p)
logging.info("inside main thread..middle")
for p in processes:
p.start()
logging.info("inside main thread..ending")
logging.info("waiting in main thread too....")
logging.info("waiting in main thread finished....")
for p in processes:
p.join()
logging.info("inside main thread..ended")
I was able to figure out issue with my approach. I missed whole concept of pickle (serialization).
I changed my code to serialize numpy array before writing to queue and deserialize after reading it. Code started working as expected.
also printing 128 as sizeof np array is fine, i was misinterpreting that number.
def serialize_ndarray(arr:np.ndarray):
serialized = pickle.dumps(arr)
return serialized
def deserialize_ndarray(string):
data = pickle.loads(string)
return data

Getting returning value from multithreading in python 3

I'm trying to get one or several returning values from a thread in a multithreading process. The code I show get cycled with no way to interrupt it with Ctrl-C, Ctrl+D.
import queue as Queue
import threading
class myThread (threading.Thread):
def __init__(self, threadID, name, region):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.region = region
def run(self):
GetSales(self.region)
def GetSales(strReg):
print("Thread-" + strReg)
return "Returning-" + strReg
def Main():
RegionList = []
RegionList.append("EMEA")
RegionList.append("AP")
RegionList.append("AM")
# Create threads
threads = []
x = 0
for region in RegionList:
x += 1
rthread = myThread(x, "Thread-" + region, region) # Create new thread
rthread.start() # Start new thread
threads.append(rthread) # Add new thread to threads list
que = Queue.Queue()
# Wait for all threads to complete
for t in threads:
t.join()
result = que.get()
print(t.name + " -> Done")
Main()
If I comment line "result = que.get()" the program runs with no issues.
What you are looking for is future and async management.
Firstly, your program loop indefinitely because of the line que.get(), because there is nothing in the queue, it wait that something happen, which will never happen. You don't use it.
What you want to do is an async task and get the result :
import asyncio
async def yourExpensiveTask():
// some long calculation
return 42
async main():
tasks = []
tasks += [asyncio.create_task(yourExpensiveTask())]
tasks += [asyncio.create_task(yourExpensiveTask())]
for task in tasks:
result = await task
print(result)
See also https://docs.python.org/3/library/asyncio-task.html

How to gracefully terminate a multithreaded Python application that uses queue.Queue

I have been trying to get my application to terminate gracefully for quite some time now, but so far none of the answers I have found worked.
The sample code below illustrates the structure of my application. It basically is a chain of threads that passes data to one another using Queues.
from abc import abstractmethod
from time import sleep
from threading import Thread, Event
from queue import Queue
import signal
import sys
class StoppableThread(Thread):
def __init__(self):
super().__init__()
self.stopper = Event()
self.queue = Queue()
#abstractmethod
def actual_job(self):
pass
def stop_running(self):
self.stopper.set()
def run(self):
while not self.stopper.is_set():
print(self.stopper.is_set())
self.actual_job()
self.queue.join()
class SomeObjectOne(StoppableThread):
def __init__(self, name, some_object_two):
super().__init__()
self.name = name
self.obj_two = some_object_two
def actual_job(self):
# print('{} is currently running'.format(self.name))
input_string = 'some string'
print('{} outputs {}'.format(self.name, input_string))
self.obj_two.queue.put(input_string)
sleep(2)
class SomeObjectTwo(StoppableThread):
def __init__(self, name, some_object_three):
super().__init__()
self.name = name
self.some_object_three = some_object_three
def actual_job(self):
# print('{} is currently running'.format(self.name))
some_string = self.queue.get()
inverted = some_string[::-1]
print('{} outputs {}'.format(self.name , inverted))
self.some_object_three.queue.put(inverted)
sleep(2)
class SomeObjectThree(StoppableThread):
def __init__(self, name):
super().__init__()
self.name = name
def actual_job(self):
print('{} is currently running'.format(self.name))
some_string = self.queue.get()
print('{} outputs {}'.format(self.name ,some_string[::-1]))
sleep(2)
class ServiceExit(Exception):
"""
Custom exception which is used to trigger the clean exit
of all running threads and the main program.
"""
pass
def service_shutdown(signum, frame):
print('Caught signal %d' % signum)
raise ServiceExit
signal.signal(signal.SIGTERM, service_shutdown)
signal.signal(signal.SIGINT, service_shutdown)
if __name__ == '__main__':
thread_three = SomeObjectThree('SomeObjectThree')
thread_two = SomeObjectTwo('SomeObjectTwo', thread_three)
thread_one = SomeObjectOne('SomeObjectOne', thread_two)
try:
thread_three.start()
thread_two.start()
thread_one.start()
# Keep the main thread running, otherwise signals are ignored.
while True:
sleep(0.5)
except ServiceExit:
print('Running service exit')
thread_three.stop_running()
thread_two.stop_running()
thread_one.stop_running()
thread_one.join()
thread_two.join()
thread_three.join()
sys.exit(0)
Now, if I run this code and ctrl-C to terminate, thread_one seems to join as expected, but the code gets stuck at thread_two.join().
Because thread_one is the only thread with a continuous empty queue, I expect it has something to do with the queue.
Any ideas?
In the run() method of StoppableThread you have this:
self.queue.join()
join() is a blocking method:
Blocks until all items in the queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
queue. The count goes down whenever a consumer thread calls
task_done() to indicate that the item was retrieved and all work on it
is complete. When the count of unfinished tasks drops to zero, join()
unblocks.
So in order for join() to return, it's not enough to get() an item in the other thread, you must also indicate that it's been processed with task_done():
from abc import abstractmethod
from time import sleep
from threading import Thread, Event
from queue import Queue
import signal
import sys
class StoppableThread(Thread):
def __init__(self):
super().__init__()
self.stopper = Event()
self.queue = Queue()
#abstractmethod
def actual_job(self):
pass
def stop_running(self):
self.stopper.set()
def run(self):
while not self.stopper.is_set():
print(self.stopper.is_set())
self.actual_job()
self.queue.join()
class SomeObjectOne(StoppableThread):
def __init__(self, name, some_object_two):
super().__init__()
self.name = name
self.obj_two = some_object_two
def actual_job(self):
# print('{} is currently running'.format(self.name))
input_string = 'some string'
print('{} outputs {}'.format(self.name, input_string))
self.obj_two.queue.put(input_string)
sleep(2)
class SomeObjectTwo(StoppableThread):
def __init__(self, name, some_object_three):
super().__init__()
self.name = name
self.some_object_three = some_object_three
def actual_job(self):
# print('{} is currently running'.format(self.name))
some_string = self.queue.get()
inverted = some_string[::-1]
print('{} outputs {}'.format(self.name , inverted))
self.queue.task_done()
self.some_object_three.queue.put(inverted)
sleep(2)
class SomeObjectThree(StoppableThread):
def __init__(self, name):
super().__init__()
self.name = name
def actual_job(self):
print('{} is currently running'.format(self.name))
some_string = self.queue.get()
print('{} outputs {}'.format(self.name ,some_string[::-1]))
self.queue.task_done()
sleep(2)
class ServiceExit(Exception):
"""
Custom exception which is used to trigger the clean exit
of all running threads and the main program.
"""
pass
def service_shutdown(signum, frame):
print('Caught signal %d' % signum)
raise ServiceExit
signal.signal(signal.SIGTERM, service_shutdown)
signal.signal(signal.SIGINT, service_shutdown)
if __name__ == '__main__':
thread_three = SomeObjectThree('SomeObjectThree')
thread_two = SomeObjectTwo('SomeObjectTwo', thread_three)
thread_one = SomeObjectOne('SomeObjectOne', thread_two)
try:
thread_three.start()
thread_two.start()
thread_one.start()
# Keep the main thread running, otherwise signals are ignored.
while True:
sleep(0.5)
except ServiceExit:
print('Running service exit')
thread_three.stop_running()
thread_two.stop_running()
thread_one.stop_running()
thread_one.join()
thread_two.join()
thread_three.join()

how to check if asyncio loop has any associated sockets

asyncio.Task.all_tasks() gives a list of all tasks for an event loop, but I can't find anything similar for sockets, and in particular, datagram sockets associated with a loop?
The absence of sockets & tasks could then signal "end of life" for the loop.
The question is, in the following example, what to put in loop_not_empty() that makes it return False when the task set is empty and there are no associated sockets (ie after two seconds)
Example:
import asyncio
import socket
import threading
class Handler(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
print("connection made")
def datagram_received(self, data, addr):
if data == b'die':
print("shutting down")
self.transport.abort()
#asyncio.coroutine
def sometask():
yield from asyncio.sleep(1)
print("task done")
def loop_not_empty(l):
# if asyncio.Task.all_tasks() == set() and WHAT_GOES_HERE
# return False
return True
def main():
a,b = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM)
l = asyncio.get_event_loop()
asyncio.ensure_future(sometask(), loop=l)
asyncio.ensure_future(l.create_datagram_endpoint(Handler, sock=a), loop=l)
threading.Timer(2, lambda: b.send(b'die')).start()
while loop_not_empty(l):
l.run_until_complete(asyncio.sleep(1, loop=l))
main()
Here is a solution that uses a simple class and asyncio.Event() to count the number of active jobs and signals the loop to stop when all jobs are done:
import asyncio
import random
class UseCounter:
def __init__(self, loop=None):
self.loop = loop
self.event = asyncio.Event(loop=loop)
self.n = 0 # The number of active jobs
def __enter__(self):
self.enter()
def __exit__(self, exc_type, exc_val, exc_tb):
self.exit()
def enter(self):
self.n += 1
def exit(self):
self.n -= 1
if self.n == 0:
self.event.set()
async def wait(self):
return await self.event.wait()
async def my_coroutine(counter, term):
with counter:
print("start", term)
n = random.uniform(0.2, 1.5)
await asyncio.sleep(n)
print("end", term)
loop = asyncio.get_event_loop()
counter = UseCounter(loop)
terms = ["apple", "banana", "melon"]
for term in terms:
asyncio.ensure_future(my_coroutine(counter, term))
loop.run_until_complete(counter.wait())
loop.close()
For your example above, add .enter() to connection_made() and .exit() to connection_lost().

How to know Python Queue in full active?

I am using code as below for multiple thread in python3, I tried Threads in cpu_count() with 2, 3 and 4 times, but I am not sure if all those threads in using, how can I check if there are some queues are never used?
queue = Queue()
for x in range(cpu_count() * 2):
worker = DownloadWorker(queue)
worker.daemon = True
worker.start()
queue.join()
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
link, download_path = self.queue.get()
download_link(link, download_path)
self.queue.task_done()
def downloadImage(imageServer, imageLocal, queue):
queue.put((imageServer, imageLocal))
if you want to know if all your threads are working, you can just print the thread name every time it starts a task:
from threading import Thread
from queue import Queue
import random
import time
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
self.queue.get()
print('Thread: {}'.format(self.name))
time.sleep(random.random())
queue = Queue()
for i in range(100):
queue.put('data')
queue.task_done()
for x in range(4):
worker = DownloadWorker(queue)
worker.daemon = True
worker.start()
time.sleep(10)
Queue uses threading.Condition internally to block/release threads that called get() and threading.Condition uses a threading.Lock. From the documentation of threading.Lock:
When more than one thread is blocked in acquire() waiting for the
state to turn to unlocked, only one thread proceeds when a release()
call resets the state to unlocked; which one of the waiting threads
proceeds is not defined, and may vary across implementations.
I hope this answers the question.

Resources