I was just reading through the threading API in Python 3 and I don't see a method of syncing threads. In my case I'm creating performance testing in a Server and Client environment so I might want to use the thread class call this test with N users:
def tDoTest( ):
doSync() # Wait for all thread creation
doLogin()
doSync() # Wait for all users to login
startTest()
create100Rows()
endTest()
doSync() # Wait for completion
doLogout()
I was hoping there was a built in way to handle this that I missed.
You could use a blocking queue.
import queue
q = queue.Queue()
If you want one thread to wait for all members of a group of three other threads to do some task, the waiting thread could get() three tokens from the queue:
for i in range(3):
q.get()
Each of the three awaited threads could signify that it has completed its task by putting an informationless token into the queue:
q.put(())
The waiting thread will not exit its for loop until it has collected all three tokens.
You could wrap it up in a class:
import queue
import threading
import time
class CountdownLatch:
def __init__(self, N):
self.N = N
self.q = queue.Queue()
def check_in(self):
self.q.put(())
def await_all_checkins(self):
for i in range(self.N):
self.q.get()
def demo_CountDownLatch():
cdl = CountdownLatch(3)
def task(cdl,delay):
time.sleep(delay)
print(delay)
cdl.check_in()
threading.Thread(target=task, args=(cdl,2.0)).start()
threading.Thread(target=task, args=(cdl,1.5)).start()
threading.Thread(target=task, args=(cdl,1.0)).start()
cdl.await_all_checkins()
print("nighty night.")
if __name__ == "__main__":
demo_CountDownLatch()
OK after a bunch of searching this is the answer. Never expected this functionality to be named Barrier(). The is a built in function for it.
b = Barrier(2, timeout=5)
def doSync():
ct = threading.currentThread()
b.wait( )
print('Sync done: ' + ct.getName())
The output looks like as expected now:
0.75
1.0
Sync done: 1
Sync done: 2
0.75
1.0
Sync done: 1
Sync done: 2
0.75
1.0
Sync done: 1
Sync done: 2
Related
Im trying to generate data in two threads and get that data in a separate thread that prints the data.
3 threads, 2 threads generate data , 1 thread consumes the data generated.
The Problem: not getting both generated data into the consumer thread
How can I pass data generated in 2 threads and deliver it in the consumer thread?
#from threading import Thread
import concurrent.futures
import time
# A thread that produces data
def producer(out_q):
while True:
# Produce some data
global data
data = data + 2
out_q.put(data)
# Another thread that produces data
def ac(out2_q):
while True:
global x
x = x + 898934567
out2_q.put(data)
# A thread that consumes data
def consumer(in_q):
while True:
# Get BOTH produced data from 2 threads
data = in_q.get()
# Process the data
time.sleep(.4)
print(data, end=' ', flush=True)
x=0
data = 0
q = Queue()
with concurrent.futures.ThreadPoolExecutor() as executor:
t1 = executor.submit(consumer, q)
t2 = executor.submit(producer,q)
t3 = executor.submit(ac, q)```
I recommend to go with threading.Thread in this case. Please see the code below and follow comments. Feel free to ask questions.
from threading import Thread, Event
from queue import Queue
import time
def producer_one(q: Queue, e: Event):
while not e.is_set():
q.put("one")
time.sleep(1)
print("Producer # one stopped")
def producer_two(q: Queue, e: Event):
while not e.is_set():
q.put("two")
time.sleep(2)
print("Producer # two stopped")
def consumer(q: Queue):
while True:
item = q.get()
print(item)
q.task_done() # is used to unblock queue - all tasks were done
time.sleep(2)
# will never be printed ! - since it is daemon thread
print("All work is done by consumer!")
if __name__ == '__main__':
_q = Queue() # "connects" threads
_e = Event() # is used to stop producers from the Main Thread
# create threads block
producer_th1 = Thread(target=producer_one, args=(_q, _e, ))
producer_th2 = Thread(target=producer_two, args=(_q, _e, ))
# daemon means that thread will be stopped when main thread stops
consumer_th = Thread(target=consumer, args=(_q, ), daemon=True)
try:
# starts block:
producer_th1.start()
producer_th2.start()
consumer_th.start()
time.sleep(20)
_e.set() # ask producers to stop
except KeyboardInterrupt:
_e.set() # ask producer threads to stop
print("Asked Producer Threads to stop")
finally:
producer_th1.join() # main thread is block until producer_th1 is not stopped
producer_th2.join() # main thread is block until producer_th2 is not stopped
_q.join() # now wait consumer to finish all tasks from queue
print("Queue is empty and program will be finished soon")
time.sleep(2) # just wait 2 seconds to show that consumer stops with main thread
print("All done!")
I have a number of certain objects which need to run a specific function at specific ever-changing intervals, again and again, until they decide they are done.
For example, one object may need to wait 30 seconds, run, wait 60 seconds, run, wait 10 seconds, run... You get the point, and this could be going on for 30-120 different objects, running the exact same kind of function.
I was thinking that simply having a function that sleeps for the exact interval would solve my problem, but, correct me if I'm wrong, I remembered that thread pools can only run a certain number of threads at any given time (12 for me). How do I get around this limit?
class Thing(object):
def getCurrentPeriod(self):
return random.randint(5, 30) # Some ever changing period of time
def refresh(self):
doThings() # A long running task that is disk and network intensive
def waitRefresh(self):
period = self.getCurrentPeriod()
time.sleep(period) # Wait that period out
self.refresh()
return self.needRefresh()
# Boolean if it needs to restart - Not sure about how to reschedule,
# or specifically where to connect the worker emit when it finishes
# to make sure this *specific* Thing obj gets it's waitRefresh func called again.
class App(QMainWindow):
def __init__(self, *args, **kwargs):
super(MainWindow, self).__init__(*args, **kwargs)
self.threadpool = QThreadPool()
# Add initial objects to pool (other portions of app may add more over time)
for thing in self.acquireThings():
worker = Worker(thing.waitRefresh)
self.threadpool.start(worker)
Doesn't include the WorkerSignals class nor the QRunnable subclass, this example includes what I usually do. The example is tackling the same problem, but in a (most likely) inefficient way.
edit: New example with complete working example of how time.sleep does not pause the thread and allow others to work. I feel that async may be the only implementation, but is there a quick fix so I don't have to alter my entire app?
Here's what it looks like when you try to sleep more than 12 threads.
The ultimate solution came when I decided to actually try the QTimer class. Perhaps there are more optimized solutions, but this one seems to hit all the checkboxes, even if it's worryingly simple.
import random
import time
import traceback
from functools import partial
from PyQt5.QtCore import *
from PyQt5.QtGui import QFont
from PyQt5.QtWidgets import *
class WorkerSignals(QObject):
"""
Represents the signals a Worker can emit.
"""
finished = pyqtSignal()
starting = pyqtSignal(int) # ID of thread
result = pyqtSignal(tuple) # Tuple refresh result, result and ID
class Worker(QRunnable):
"""
A worker designed to tell when it's starting, when it's finished and the result.
Designed to work around Thread.refresh().
"""
def __init__(self, fn, thread_id, *args, **kwargs):
super(Worker, self).__init__()
# Store constructor arguments (re-used for processing)
self.fn = fn
self.id = thread_id
self.args = args
self.kwargs = kwargs
self.signals = WorkerSignals()
#pyqtSlot()
def run(self):
"""
Runs a given method, and emits the result with the Worker's coordinated ID.
"""
try:
self.signals.starting.emit(self.id) # Thread is now finally ready to work.
result = self.fn(*self.args, **self.kwargs) # Refresh Thread!
self.signals.result.emit(result) # Thread is finished, emit result tuple.
except:
traceback.print_exc()
finally:
self.signals.finished.emit() # Done
class Thread(object):
"""
Basic Rules for a Thread Object:
Cannot store the next timestamp on the object (it's a database object, I don't believe it's good practice
to be creating sessions over and over to simply read/write the access time.
ID and Active are allowed as booleans.
"""
i = -1
def __init__(self):
self.id = Thread.nextID()
self.active = True
self.refreshes = 0
def refresh(self) -> tuple:
"""
'Refreshes' a thread. Waits a specific period, then decides whether Thread object should be deactivated or
returned from additional refreshes. Chance of deactivation lowers with each refresh.
:return: The refresh result, a tuple with a boolean and the thread's ID (for identifying it later)
"""
# Represents my SQL Alchemy Model's refresh() function
self.refreshes += 1
time.sleep(random.randint(2, 5))
if random.random() <= max(0.1, 1.0 - ((self.refreshes + 5) / 10)):
self.active = False
return self.active, self.id
#staticmethod
def getRefreshTime() -> float:
"""
Represents the amount of time before a thread should be refreshed.
Should NOT be used to determine whether the thread is still active or not.
:return: The time period that should be waited.
"""
return random.uniform(10, 300)
#staticmethod
def nextID() -> int:
"""
Returns integer thread IDs in sequence to remove possibility of duplicate IDs.
:return: Integer Thread ID
"""
Thread.i += 1
return Thread.i
def __repr__(self):
return f'Thread(id={self.id} active={self.active})'
class MainWindow(QMainWindow):
"""
GUI containing a Label, Button and ListWidget showing all the active sleeping/working threads.
Manages a threadpool, a number of background singleshot timers, etc.
"""
def __init__(self, *args, **kwargs):
super(MainWindow, self).__init__(*args, **kwargs)
# Widgets Setup
layout = QVBoxLayout()
self.list = QListWidget()
self.l = QLabel("Total Active: 0")
self.button = QPushButton("Refresh List")
self.button.pressed.connect(self.refreshList)
self.button.setDisabled(True)
layout.addWidget(self.l)
layout.addWidget(self.button)
layout.addWidget(self.list)
w = QWidget()
w.setLayout(layout)
self.setCentralWidget(w)
self.show()
# Periodically add threads to the pool.
self.poolTimer = QTimer()
self.poolTimer.setInterval(5_000)
self.poolTimer.timeout.connect(self.addThreads)
# Threading Setup
self.threadpool = QThreadPool()
print("Multithreading with maximum %d threads" % self.threadpool.maxThreadCount())
self.active, self.threads = {}, {}
# Add a number of threads to start with.
for _ in range(random.randint(5, 16)):
self.setupThread(Thread())
self.poolTimer.start()
def refreshList(self):
"""
Refreshes the ListWidget in the GUI with all the active/sleeping/working threads.
"""
self.list.clear()
bold = QFont()
bold.setBold(True)
active = 0
for thread in self.threads.values():
item = QListWidgetItem(
f'Thread {thread.id}/{thread.refreshes}')
# Bold a thread if it's working
if self.active[thread.id]:
active += 1
item.setFont(bold)
self.list.addItem(item)
self.l.setText(f'Total Active: {active}/{len(self.threads)}')
def refreshResult(self, result) -> None:
"""
When a thread is finished, the result determines it's next course of action, which is either
to return to the pool again, or delete itself.
:param result: A tuple containing the result (bool) and the connected Thread ID.
"""
self.active[result[1]] = False
if result[0]:
print(f'Restarting Thread {result[1]}')
self.setupThread(self.threads[result[1]]) # Add by ID, which would normally be a database GET
else:
print(f'Thread {result[1]} shutting down.')
del self.active[result[1]]
del self.threads[result[1]]
self.refreshList()
def updateActivity(self, thread_id) -> None:
"""
Connected to the starting signal, helps signal when a thread is actually being refreshed.
:param thread_id: The Thread ID
"""
print(f'Thread {thread_id} is now active/working.')
self.active[thread_id] = True
def refresh(self, thread):
"""
Adds a new worker to the threadpool to be refreshed.
Can't be considered a real start to the thread.refresh function, as the pool has a max of 12 workers at any time.
The 'starting' signal can tell us when a specific thread is actually being refreshed, and is represented
as a Bold element in the list.
:param thread: A thread instance.
"""
print(f'Adding Thread {thread.id} to the pool.')
worker = Worker(thread.refresh, thread_id=thread.id)
worker.signals.result.connect(self.refreshResult)
worker.signals.starting.connect(self.updateActivity)
self.threadpool.start(worker)
# self.active[thread.id] = True
self.refreshList()
def setupThread(self, thread) -> None:
"""
Adds a new timer designated to start a specific thread.
:param thread: A thread instance.
"""
self.active[thread.id] = False
self.threads[thread.id] = thread
t = QTimer()
period = thread.getRefreshTime()
t.singleShot(period * 1000, partial(self.refresh, thread=thread))
print(f'Thread {thread.id} will start in {period} seconds.')
self.refreshList()
def addThreads(self):
"""
Adds a number of threads to the pool. Called automatically every couple seconds.
"""
add = max(0, 30 + random.randint(-5, 5) - len(self.threads))
if add > 0:
print(f'Adding {add} thread{"s" if add > 1 else ""}.')
for _ in range(add):
self.setupThread(Thread())
app = QApplication([])
window = MainWindow()
app.exec_()
When a Thread is requested, a Timer is created and singleShot is fired on an extra function that will add it to the threadpool. This threadpool can handle up to 12 refreshing continious 'refreshing' threads, and signals allow the GUI to update the moment a change is found.
Thousands of 'Thread' objects can be waiting and it seems singleShot is capable of adding them to the pool exactly when they need to be.
Signals help differentiate when a thread is sleeping, working and active (but inactive Thread objects are immediately removed).
The only caveats I can think of with this program is:
1) Can a QThread implementation beat it?
2) What happens to the QTimer once it's singleshot function has executed and fired? Will they be properly GC'd, or can thousands build up in the background consuming resources?
I'm trying to make an asyncio worker class that will consume jobs from a job queue and process up to N jobs in parallel. Some jobs may queue additional jobs. When the job queue is empty and the worker finishes all of its current jobs, it should end.
I'm still struggling with asyncio conceptually. Here is one of my attempts, where N=3:
import asyncio, logging, random
async def do_work(id_):
await asyncio.sleep(random.random())
return id_
class JobQueue:
''' Maintains a list of all pendings jobs. '''
def __init__(self):
self._queue = asyncio.Queue()
self._max_id = 10
for id_ in range(self._max_id):
self._queue.put_nowait(id_ + 1)
def add_job(self):
self._max_id += 1
self._queue.put_nowait(self._max_id)
async def get_job(self):
return await self._queue.get()
def has_jobs(self):
return self._queue.qsize() > 0
class JobWorker:
''' Processes up to 3 jobs at a time in parallel. '''
def __init__(self, job_queue):
self._current_jobs = set()
self._job_queue = job_queue
self._semaphore = asyncio.Semaphore(3)
async def run(self):
while self._job_queue.has_jobs() or len(self._current_jobs) > 0:
print('Acquiring semaphore...')
await self._semaphore.acquire()
print('Getting a job...')
job_id = await self._job_queue.get_job()
print('Scheduling job {}'.format(job_id))
self._current_jobs.add(job_id)
task = asyncio.Task(do_work(job_id))
task.add_done_callback(self.task_finished)
def task_finished(self, task):
job_id = task.result()
print('Finished job {} / released semaphore'.format(job_id))
self._current_jobs.remove(job_id)
self._semaphore.release()
if random.random() < 0.2:
print('Queuing a new job')
self._job_queue.add_job()
loop = asyncio.get_event_loop()
jw = JobWorker(JobQueue())
print('Starting event loop')
loop.run_until_complete(jw.run())
print('Event loop ended')
loop.close()
An excerpt of the output:
Starting event loop
Acquiring semaphore...
Getting a job...
Scheduling job 1
Acquiring semaphore...
Getting a job...
Scheduling job 2
Acquiring semaphore...
Getting a job...
Scheduling job 3
Acquiring semaphore...
Finished job 2 / released semaphore
Getting a job...
Scheduling job 4
...snip...
Acquiring semaphore...
Finished job 11 / released semaphore
Getting a job...
Finished job 12 / released semaphore
Finished job 13 / released semaphore
It appears to correctly process all jobs while processing no more than 3 jobs at any one time. However, the program hangs after the last job is finished. As indicated by the output, it appears to be hanging at job_id = await self._job_queue.get_job(). Once the job queue is empty, this coroutine will never resume, and the check to see if the job queue is empty (at the top of the loop) isn't reached again.
I've tried working around this in a number of ways but conceptually something just don't quite fit. My current WIP is passing some futures between the queue and the worker and then using some combination of asyncio.wait(...) on all of them, but it's getting ugly and I'm wondering if there is an elegant solution that I'm overlooking.
You could take advantage of queue.task_done that indicates that a formerly enqueued task is complete. Then you can combine queue.join and queue.get using asyncio.wait: if queue.join finishes and queue.get doesn't, this means all the jobs have been completed.
See this example:
class Worker:
def __init__(self, func, n=3):
self.func = func
self.queue = asyncio.Queue()
self.semaphore = asyncio.Semaphore(n)
def put(self, *args):
self.queue.put_nowait(args)
async def run(self):
while True:
args = await self._get()
if args is None:
return
asyncio.ensure_future(self._target(args))
async def _get(self):
get_task = asyncio.ensure_future(self.queue.get())
join_task = asyncio.ensure_future(self.queue.join())
await asyncio.wait([get_task, join_task], return_when='FIRST_COMPLETED')
if get_task.done():
return task.result()
async def _target(self, args):
try:
async with self.semaphore:
return await self.func(*args)
finally:
self.queue.task_done()
You can timeout get_job with simple asyncio.wait_for. For example with 1s, and get back to the beginning of loop on timeout.
async def run(self):
while self._job_queue.has_jobs() or len(self._current_jobs) > 0:
print('Acquiring semaphore...')
await self._semaphore.acquire()
print('Getting a job...')
try:
job_id = await asyncio.wait_for(self._job_queue.get_job(), 1)
except asyncio.TimeoutError:
continue
print('Scheduling job {}'.format(job_id))
self._current_jobs.add(job_id)
task = asyncio.Task(do_work(job_id))
task.add_done_callback(self.task_finished)
I am using code as below for multiple thread in python3, I tried Threads in cpu_count() with 2, 3 and 4 times, but I am not sure if all those threads in using, how can I check if there are some queues are never used?
queue = Queue()
for x in range(cpu_count() * 2):
worker = DownloadWorker(queue)
worker.daemon = True
worker.start()
queue.join()
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
link, download_path = self.queue.get()
download_link(link, download_path)
self.queue.task_done()
def downloadImage(imageServer, imageLocal, queue):
queue.put((imageServer, imageLocal))
if you want to know if all your threads are working, you can just print the thread name every time it starts a task:
from threading import Thread
from queue import Queue
import random
import time
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
self.queue.get()
print('Thread: {}'.format(self.name))
time.sleep(random.random())
queue = Queue()
for i in range(100):
queue.put('data')
queue.task_done()
for x in range(4):
worker = DownloadWorker(queue)
worker.daemon = True
worker.start()
time.sleep(10)
Queue uses threading.Condition internally to block/release threads that called get() and threading.Condition uses a threading.Lock. From the documentation of threading.Lock:
When more than one thread is blocked in acquire() waiting for the
state to turn to unlocked, only one thread proceeds when a release()
call resets the state to unlocked; which one of the waiting threads
proceeds is not defined, and may vary across implementations.
I hope this answers the question.
I have created a program to generate data points of functions that I later plot. The program takes a class which defines the function, creates a data outputting object which when called generates the data to a text file. To make the whole process faster I put the jobs in threads, however when I do, the data generated is not always correct. I have attached a picture to show what I mean:
Here are some of the relevant bits of code:
from queue import Queue
import threading
import time
queueLock = threading.Lock()
workQueue = Queue(10)
def process_data(threadName, q, queue_window, done):
while not done.get():
queueLock.acquire() # check whether or not the queue is locked
if not workQueue.empty():
data = q.get()
# data is the Plot object to be run
queueLock.release()
data.parent_window = queue_window
data.process()
else:
queueLock.release()
time.sleep(1)
class WorkThread(threading.Thread):
def __init__(self, threadID, q, done):
threading.Thread.__init__(self)
self.ID = threadID
self.q = q
self.done = done
def get_qw(self, queue_window):
# gets the queue_window object
self.queue_window = queue_window
def run(self):
# this is called when thread.start() is called
print("Thread {0} started.".format(self.ID))
process_data(self.ID, self.q, self.queue_window, self.done)
print("Thread {0} finished.".format(self.ID))
class Application(Frame):
def __init__(self, etc):
self.threads = []
# does some things
def makeThreads(self):
for i in range(1, int(self.threadNum.get()) +1):
thread = WorkThread(i, workQueue, self.calcsDone)
self.threads.append(thread)
# more code which just processes the function etc, sorts out the gui stuff.
And in a separate class (as I'm using tkinter, so the actual code to get the threads to run is called in a different window) (self.parent is the Application class):
def run_jobs(self):
if self.running == False:
# threads are only initiated when jobs are to be run
self.running = True
self.parent.calcsDone.set(False)
self.parent.threads = [] # just to make sure that it is initially empty, we want new threads each time
self.parent.makeThreads()
self.threads = self.parent.threads
for thread in self.threads:
thread.get_qw(self)
thread.start()
# put the jobs in the workQueue
queueLock.acquire()
for job in self.job_queue:
workQueue.put(job)
queueLock.release()
else:
messagebox.showerror("Error", "Jobs already running")
This is all the code which relates to the threads.
I don't know why when I run the program with multiple threads some data points are incorrect, whilst running it with just 1 single thread the data is all perfect. I tried looking up "threadsafe" processes, but couldn't find anything.
Thanks in advance!