Why does this thread never gets declared or started? - multithreading

I'm implementing an image downloader with the producer-consumer model. One thread is responsible for generating (url, filename) pairs and put them in queue. I want MAX_THREADS threads to pick the pairs and start downloading. Here are my threads:
class Extractor(Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs=None, verbose=None, items=None):
super(Extractor, self).__init__()
self.target = target
self.name = name
self.items = items
def run(self):
while True:
for item in self.items:
if not QUEUE.full():
QUEUE.put_nowait(extract(item))
logging.debug('Putting ' + str(item) + ' : ' + str(QUEUE.qsize()) + ' items in queue')
class Downloader(Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs=None, verbose=None):
super(Downloader, self).__init__()
self.target = target
self.name = name
self.seen = set()
def run(self):
while True:
if not QUEUE.empty():
pair = QUEUE.get_nowait()
# I have seen the URL
if pair[0] in self.seen:
continue
else:
# Never seen it before
self.seen.add(pair[0])
logging.debug('Downloading ' + str(pair[1]) + ' : ' + str(QUEUE.qsize()) + ' items in queue')
download_one_pic(pair)
if __name__ == '__main__':
items = None
items = crawl('username__', items)
worker_threads = []
producer = Extractor(name='Extractor', items=items)
producer.daemon = True
producer.start()
consumer = Downloader(name='Downloader[1]')
consumer2 = Downloader(name='Downloader[2]')
worker_threads.append(consumer)
worker_threads.append(consumer2)
for thread in worker_threads:
thread.start()
thread.join()
The queue has the max size of 50 and I want Producer thread to run regardless of other threads so I have it demonized. One thing is weird is that the consumer2 thread never gets started and I don't have any idea why. As in my log, only Downloader[1] does the job and the queue keeps fluctuating between 49 and 50 so I knew that the Downloader[2] never gets started.

Calling join() on a thread waits until it is complete before it returns. That loop at the end of your code will only ever execute once because the Downloader classes loop forever. Call start in one loop and then loop over them again to join() and wait there after all threads have been started

Related

What is the best way to stop (interrupt) QRunnable in QThreadPool?

I have a long running task, which for example's sake I have made an infinite while loop:
def long_task(parent, progress_callback):
top = 100000
x = 0
while True:
if x < top:
if not parent.stop:
progress_callback.emit(x)
x += 1
else:
break
else:
x = 0
progress_callback.emit(x)
x += 1
I have a Worker class that subclasses QRunnable, and then I can override the run() method with whatever function is passed to the Worker.
class ThreadWorker(QtCore.QRunnable):
def __init__(self, fn, *args, **kwargs):
super(ThreadWorker, self).__init__()
self.fn = fn
self.args = args
self.kwargs = kwargs
self.signals = ThreadWorkerSignals()
self.kwargs['progress_callback'] = self.signals.progress
self.running = False
#QtCore.pyqtSlot()
def run(self):
self.running = True
try:
result = self.fn(*self.args, **self.kwargs)
except:
traceback.print_exc()
exctype, value = sys.exc_info()[:2]
self.signals.error.emit((exctype, value, traceback.format_exc()))
else:
self.signals.result.emit(result) # Return the result of the processing
finally:
self.signals.finished.emit() # Done
I create two instances of Worker within my MainWindow, and pass the same long-running task to each worker. Both workers are added to my MainWindow's QThreadPool and then I call start(worker) on each to begin the worker's run() method. I now have two threads running the infinite loop:
class MainWindow(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
## NOT SHOWING THE REST OF INIT CODE
def create_workers(self):
self.worker1 = ThreadWorker(self.long_task, parent=self)
self.worker1.signals.progress.connect(lambda x: self.long_label_1.setText(str(x)))
self.worker2 = ThreadWorker(self.long_task, parent=self)
self.worker2.signals.progress.connect(lambda x: self.long_label_2.setText(str(x)))
self.threadpool.start(self.worker1)
self.threadpool.start(self.worker2)
self.stop = False
Please note the self.stop attribute above - this also belongs to the MainWindow class.
All I want to do is break the loop (interrupt the run() method of a worker) when I press a button.
As you can see, I am referencing parent.stop during every iteration of the worker's while loop. Right now, if I press my button, MainWindow's stop attribute turns True and the loop breaks when the worker class sees this change.
def stop_tasks(self):
self.stop = True
This works fine and accomplishes my goal, but I am wondering if this is dangerous and if there is a better way to do this? I only ask because it seems risky to reference an outside class attribute from within a separate running thread, and I don't know what could go wrong.

Getting returning value from multithreading in python 3

I'm trying to get one or several returning values from a thread in a multithreading process. The code I show get cycled with no way to interrupt it with Ctrl-C, Ctrl+D.
import queue as Queue
import threading
class myThread (threading.Thread):
def __init__(self, threadID, name, region):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.region = region
def run(self):
GetSales(self.region)
def GetSales(strReg):
print("Thread-" + strReg)
return "Returning-" + strReg
def Main():
RegionList = []
RegionList.append("EMEA")
RegionList.append("AP")
RegionList.append("AM")
# Create threads
threads = []
x = 0
for region in RegionList:
x += 1
rthread = myThread(x, "Thread-" + region, region) # Create new thread
rthread.start() # Start new thread
threads.append(rthread) # Add new thread to threads list
que = Queue.Queue()
# Wait for all threads to complete
for t in threads:
t.join()
result = que.get()
print(t.name + " -> Done")
Main()
If I comment line "result = que.get()" the program runs with no issues.
What you are looking for is future and async management.
Firstly, your program loop indefinitely because of the line que.get(), because there is nothing in the queue, it wait that something happen, which will never happen. You don't use it.
What you want to do is an async task and get the result :
import asyncio
async def yourExpensiveTask():
// some long calculation
return 42
async main():
tasks = []
tasks += [asyncio.create_task(yourExpensiveTask())]
tasks += [asyncio.create_task(yourExpensiveTask())]
for task in tasks:
result = await task
print(result)
See also https://docs.python.org/3/library/asyncio-task.html

How to use aiopg pool in multi-threaded application?

I have a python 3.4.3, postgreSQL 9.4, aiopg-0.7.0. An example of multi-threaded applications, was taken from this site. How to use the pool? The thread hangs when the operation of the select.
import time
import asyncio
import aiopg
import functools
from threading import Thread, current_thread, Event
from concurrent.futures import Future
class B(Thread):
def __init__(self, start_event):
Thread.__init__(self)
self.loop = None
self.tid = None
self.event = start_event
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.tid = current_thread()
self.loop.call_soon(self.event.set)
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def add_task(self, coro):
"""this method should return a task object, that I
can cancel, not a handle"""
def _async_add(func, fut):
try:
ret = func()
fut.set_result(ret)
except Exception as e:
fut.set_exception(e)
f = functools.partial(asyncio.async, coro, loop=self.loop)
if current_thread() == self.tid:
return f() # We can call directly if we're not going between threads.
else:
# We're in a non-event loop thread so we use a Future
# to get the task from the event loop thread once
# it's ready.
fut = Future()
self.loop.call_soon_threadsafe(_async_add, f, fut)
return fut.result()
def cancel_task(self, task):
self.loop.call_soon_threadsafe(task.cancel)
#asyncio.coroutine
def test(pool, name_task):
while True:
print(name_task, 'running')
with (yield from pool.cursor()) as cur:
print(name_task, " select. ")
yield from cur.execute("SELECT count(*) FROM test")
count = yield from cur.fetchone()
print(name_task, ' Result: ', count)
yield from asyncio.sleep(3)
#asyncio.coroutine
def connect_db():
dsn = 'dbname=%s user=%s password=%s host=%s' % ('testdb', 'user', 'passw', '127.0.0.1')
pool = yield from aiopg.create_pool(dsn)
print('create pool type =', type(pool))
# future.set_result(pool)
return (pool)
event = Event()
b = B(event)
b.start()
event.wait() # Let the loop's thread signal us, rather than sleeping
loop_db = asyncio.get_event_loop()
pool = loop_db.run_until_complete(connect_db())
time.sleep(2)
t = b.add_task(test(pool, 'Task1')) # This is a real task
t = b.add_task(test(pool, 'Task2'))
while True:
time.sleep(10)
b.stop()
Not return result in 'yield from cur.execute("SELECT count(*) FROM test")'
Long story short: you cannot share aiopg pool object from different event loops.
Every aiopg.Pool is coupled to event loop. If you don't specify loop parameter explicitly it is taken from asyncio.get_event_loop() call.
So it your example you have a pool coupled to event loop from main thread.
When you execute db query from separate thread you trying to accomplish it by executing thread's loop, not the main one. It doesn't work.

Threaded result not giving same result as un-threaded result (python)

I have created a program to generate data points of functions that I later plot. The program takes a class which defines the function, creates a data outputting object which when called generates the data to a text file. To make the whole process faster I put the jobs in threads, however when I do, the data generated is not always correct. I have attached a picture to show what I mean:
Here are some of the relevant bits of code:
from queue import Queue
import threading
import time
queueLock = threading.Lock()
workQueue = Queue(10)
def process_data(threadName, q, queue_window, done):
while not done.get():
queueLock.acquire() # check whether or not the queue is locked
if not workQueue.empty():
data = q.get()
# data is the Plot object to be run
queueLock.release()
data.parent_window = queue_window
data.process()
else:
queueLock.release()
time.sleep(1)
class WorkThread(threading.Thread):
def __init__(self, threadID, q, done):
threading.Thread.__init__(self)
self.ID = threadID
self.q = q
self.done = done
def get_qw(self, queue_window):
# gets the queue_window object
self.queue_window = queue_window
def run(self):
# this is called when thread.start() is called
print("Thread {0} started.".format(self.ID))
process_data(self.ID, self.q, self.queue_window, self.done)
print("Thread {0} finished.".format(self.ID))
class Application(Frame):
def __init__(self, etc):
self.threads = []
# does some things
def makeThreads(self):
for i in range(1, int(self.threadNum.get()) +1):
thread = WorkThread(i, workQueue, self.calcsDone)
self.threads.append(thread)
# more code which just processes the function etc, sorts out the gui stuff.
And in a separate class (as I'm using tkinter, so the actual code to get the threads to run is called in a different window) (self.parent is the Application class):
def run_jobs(self):
if self.running == False:
# threads are only initiated when jobs are to be run
self.running = True
self.parent.calcsDone.set(False)
self.parent.threads = [] # just to make sure that it is initially empty, we want new threads each time
self.parent.makeThreads()
self.threads = self.parent.threads
for thread in self.threads:
thread.get_qw(self)
thread.start()
# put the jobs in the workQueue
queueLock.acquire()
for job in self.job_queue:
workQueue.put(job)
queueLock.release()
else:
messagebox.showerror("Error", "Jobs already running")
This is all the code which relates to the threads.
I don't know why when I run the program with multiple threads some data points are incorrect, whilst running it with just 1 single thread the data is all perfect. I tried looking up "threadsafe" processes, but couldn't find anything.
Thanks in advance!

Producers - consumers in python

For the producer-consumer problem I have come up with this solution:
import threading
import random
import time
class Bucket:
def __init__(self, size):
self.size = size
self.current_size = 0
self.cond_var = threading.Condition()
def available_for_put(self):
return self.current_size < self.size
def available_for_get(self):
return self.current_size > 0
def put(self):
self.current_size = self.current_size + 1
print(self)
self.cond_var.notify_all()
def get(self):
self.current_size = self.current_size - 1
print(self)
self.cond_var.notify_all()
def acquire(self):
self.cond_var.acquire()
def release(self):
self.cond_var.release()
def wait(self):
self.cond_var.wait()
def __str__(self):
return "Size is {0}".format(self.current_size)
class Worker(threading.Thread):
PRODUCER = 1
CONSUMER = 0
def __init__(self, bucket, kind):
threading.Thread.__init__(self)
self.kind = kind
self.bucket = bucket
def run(self):
while(1):
self.bucket.acquire()
while(((self.kind == Worker.PRODUCER) and (not self.bucket.available_for_put())) or \
((self.kind == Worker.CONSUMER) and (not self.bucket.available_for_get()))):
self.bucket.wait()
### self.bucket.acquire()
if self.kind == Worker.PRODUCER:
self.bucket.put()
else:
self.bucket.get()
time.sleep(0.1)
self.bucket.release()
bucket = Bucket(10)
workers = []
for i in range(10):
workers.append(Worker(bucket, i % 2))
for w in workers:
w.start()
print("Thread started")
for w in workers:
w.join()
Apparently if I remove the while(1) loop and make every thread run the block inside the loop only once it reaches a deadlock and I can't understand why.
The Producer-Consumer pattern can be readily implemented using Python's built in Queue support:
queue - a synchronised queue class
This could simplify your code. Also very useful is the scheduler:
schedule - event scheduler
And since your question is tagged with Python-3.x, you should definitely have a look at the concurrent.futures module:
futures - launching parallel tasks
Your workers could be tasks and the bucket could become a queue.
Apparently the problem is that after waking up from wait you reacquire the lock and thus the now commented out acquire will block.

Resources