Python 3 threading , queue - python-3.x

I'm facing with a "little" problem regarding adding data to queue.Queue from a thread.
Environment data: Ubuntu 18.04 / Python 3.6.7 / Python 3.8.5
In the rows below I will post my simplified code. Any help will be appreciated.
from threading import Thread,Lock
from queue import Queue
from random import randint
thread = None
thread_lock = Lock()
q = Queue()
def worker(number):
random_number= [str(randint(1,999)),number]
q.put(random_number)
def first_function(iteration):
global thread
some_list=[]
with thread_lock:
threads=[]
if thread is None:
for iterator in range(iteration):
thread = Thread(target=worker,args=(iterator,))
threads.append(thread)
thread.start()
for thread_ in threads:
thread_.join()
thread = None
while not q.empty():
some_list.append(q.get())
return (some_list)
print(first_function(10))
print(first_function(5))
My second call will return an empty list. Please give me an idea .

The problem is with "thread = None" after the method is executed first time on thread is append an <Thread ... > and in second call when you verify
"if thread is None", suprize is not None.

Related

python3 - Main thread kills child thread after some timeout?

I'm not sure it is doable with thread in python. Basically, I have a function which invokes GDAL library to open an image file. But this can be stuck, so, after 10 seconds, if the file cannot be opened, then it should raise an exception.
import threading
import osgeo.gdal as gdal
def test(filepath):
# After 10 seconds, if the filepath cannot be opened, this function must end and throw exception.
# If the filepath can be opened before 10 seconds, then it return dataset
dataset = gdal.Open(filepath)
return dataset
filepath="http://.../test.tif"
t = threading.Thread(target = test, args = [filepath])
t.start()
# is there something called t.timeout(10)
# and if this thread cannot be finished in 10 seconds, it raises a RuntimeException?
t.join()
I ended up using multiprocessing and Queue from multiprocessing to achieve what I wanted:
import multiprocessing
import time
from multiprocessing import Queue
q = Queue()
TIME_OUT = 5
def worker(x, queue):
time.sleep(15)
a = (1, 5, 6, 7)
queue.put(a)
queue = Queue()
process = multiprocessing.Process(target=worker, args=(5, queue,))
process.start()
# main thread waits child process after TIME_OUT
process.join(TIME_OUT)
if process.is_alive():
print("Process hangs!")
process.terminate()
print("Process finished")
print(queue.qsize())
if queue.qsize() > 0:
a, b, _, d = queue.get()
print(a, b, d)

How to pass data between 3 threads that contain while True loops in Python?

Im trying to generate data in two threads and get that data in a separate thread that prints the data.
3 threads, 2 threads generate data , 1 thread consumes the data generated.
The Problem: not getting both generated data into the consumer thread
How can I pass data generated in 2 threads and deliver it in the consumer thread?
#from threading import Thread
import concurrent.futures
import time
# A thread that produces data
def producer(out_q):
while True:
# Produce some data
global data
data = data + 2
out_q.put(data)
# Another thread that produces data
def ac(out2_q):
while True:
global x
x = x + 898934567
out2_q.put(data)
# A thread that consumes data
def consumer(in_q):
while True:
# Get BOTH produced data from 2 threads
data = in_q.get()
# Process the data
time.sleep(.4)
print(data, end=' ', flush=True)
x=0
data = 0
q = Queue()
with concurrent.futures.ThreadPoolExecutor() as executor:
t1 = executor.submit(consumer, q)
t2 = executor.submit(producer,q)
t3 = executor.submit(ac, q)```
I recommend to go with threading.Thread in this case. Please see the code below and follow comments. Feel free to ask questions.
from threading import Thread, Event
from queue import Queue
import time
def producer_one(q: Queue, e: Event):
while not e.is_set():
q.put("one")
time.sleep(1)
print("Producer # one stopped")
def producer_two(q: Queue, e: Event):
while not e.is_set():
q.put("two")
time.sleep(2)
print("Producer # two stopped")
def consumer(q: Queue):
while True:
item = q.get()
print(item)
q.task_done() # is used to unblock queue - all tasks were done
time.sleep(2)
# will never be printed ! - since it is daemon thread
print("All work is done by consumer!")
if __name__ == '__main__':
_q = Queue() # "connects" threads
_e = Event() # is used to stop producers from the Main Thread
# create threads block
producer_th1 = Thread(target=producer_one, args=(_q, _e, ))
producer_th2 = Thread(target=producer_two, args=(_q, _e, ))
# daemon means that thread will be stopped when main thread stops
consumer_th = Thread(target=consumer, args=(_q, ), daemon=True)
try:
# starts block:
producer_th1.start()
producer_th2.start()
consumer_th.start()
time.sleep(20)
_e.set() # ask producers to stop
except KeyboardInterrupt:
_e.set() # ask producer threads to stop
print("Asked Producer Threads to stop")
finally:
producer_th1.join() # main thread is block until producer_th1 is not stopped
producer_th2.join() # main thread is block until producer_th2 is not stopped
_q.join() # now wait consumer to finish all tasks from queue
print("Queue is empty and program will be finished soon")
time.sleep(2) # just wait 2 seconds to show that consumer stops with main thread
print("All done!")

In PyGTK, how to start thread and continue calling function only when thread terminates

After reading many questions about threads and .join() function, I still can not find how to adapt the basic pygobject threads example from documentation, so that it matches my use case:
#!/bin/python3
import threading
import time
from gi.repository import GLib, Gtk, GObject
def app_main():
win = Gtk.Window(default_height=50, default_width=300)
win.connect("destroy", Gtk.main_quit)
def update_progess(i):
progress.pulse()
progress.set_text(str(i))
return False
def example_target():
for i in range(50):
GLib.idle_add(update_progess, i)
time.sleep(0.2)
def start_actions(self):
print("do a few thing before thread starts")
thread = threading.Thread(target=example_target)
thread.daemon = True
thread.start()
print("do other things after thread finished")
mainBox = Gtk.Box(spacing=20, orientation="vertical")
win.add(mainBox)
btn = Gtk.Button(label="start actions")
btn.connect("clicked", start_actions)
mainBox.pack_start(btn, False, False, 0)
progress = Gtk.ProgressBar(show_text=True)
mainBox.pack_start(progress, False, False, 0)
win.show_all()
if __name__ == "__main__":
app_main()
Gtk.main()
How to make this code print "do other things after thread finished" only after my thread terminates and without freezing main window?
First, just to make it clear, the thread isn't finished after you call its start method.
Look at the definition of the code running in the thread:
def example_target():
for i in range(50):
GLib.idle_add(update_progess, i)
time.sleep(0.2)
What this does is basically repeat the following 50 times:
tell GTK to execute update_progress at the next time the system is idle (has no events to process).
sleeps for 0.2 seconds.
You could define a function after_thread, and have that scheduled when the thread finishes:
def example_target():
for i in range(50):
GLib.idle_add(update_progess, i)
time.sleep(0.2)
# loop is finished, thread will end.
GLib.idle_add(after_thread)

Python - How to use multiprocessing Lock in class instance?

I am using Python 3.7 on Windows.
What I am trying to do:
- lock a method of an instance of a class, when another process has acquired that same lock.
Attempts:
I have already successfully done this, but I don't want a global variable here for the lock, but instead one completely internal to the class
from multiprocessing import Lock, freeze_support,Pool
from time import sleep
def do_work(name):
print(name+' waiting for lock to work...',end='')
sleep(2)
with lock:
print('done!')
print(name+' doing work...',end='')
sleep(5)
print('done!')
def init(olock):
global lock
lock = olock
if __name__ == '__main__':
freeze_support()
args_list = [('a'),('b'),('c')]
lock=Lock()
p=Pool(8,initializer=init,initargs=(lock,))
p.map_async(do_work,args_list)
p.close()
p.join()
When this last chunk of code runs, it takes ~17.3 seconds, because of the lock. Without the lock it takes ~7 seconds.
I have tried to implement this inside a class, but the lock does nothing, and it always runs in ~7 seconds.
class O():
def __init__(self):
self.lock=Lock()
def __getstate__(self): # used to remove multiprocess object(s) from class, so it can be pickled
self_dict=self.__dict__.copy()
del self_dict['lock']
return self_dict
def __setstate__(self,state): # used to remove multiprocess object(s) from class, so it can be pickled
self.__dict__.update(state)
def _do_work(self,name):
print(name+' waiting for lock to work...',end='')
sleep(2)
with self.lock:
print('done!')
print(name+' doing work...',end='')
sleep(5)
print('done!')
if __name__ == '__main__':
freeze_support()
c = O()
pool = Pool(8)
pool.apply_async(c._do_work,('a',))
pool.apply_async(c._do_work,('b',))
pool.apply_async(c._do_work,('c',))
pool.close()
pool.join()
Question:
So, what can I do to lock up this class instance while I call a method which interacts with a resource asynchronously through multiprocessing?
apply_async will pickle function object and send to pool worker process by queue, but as c._do_work is a bound method, the instance will be pickled too, which results in an error. you could wrap it within a plain function:
c = O()
def w(*args):
return c._do_work(*args)
if __name__ == '__main__':
pool = Pool(1)
pool.apply_async(w, ('a',))
...
and you should remove __setstate__/__getstate__.

Python multiprocessing script partial output

I am following the principles laid down in this post to safely output the results which will eventually be written to a file. Unfortunately, the code only print 1 and 2, and not 3 to 6.
import os
import argparse
import pandas as pd
import multiprocessing
from multiprocessing import Process, Queue
from time import sleep
def feed(queue, parlist):
for par in parlist:
queue.put(par)
print("Queue size", queue.qsize())
def calc(queueIn, queueOut):
while True:
try:
par=queueIn.get(block=False)
res=doCalculation(par)
queueOut.put((res))
queueIn.task_done()
except:
break
def doCalculation(par):
return par
def write(queue):
while True:
try:
par=queue.get(block=False)
print("response:",par)
except:
break
if __name__ == "__main__":
nthreads = 2
workerQueue = Queue()
writerQueue = Queue()
considerperiod=[1,2,3,4,5,6]
feedProc = Process(target=feed, args=(workerQueue, considerperiod))
calcProc = [Process(target=calc, args=(workerQueue, writerQueue)) for i in range(nthreads)]
writProc = Process(target=write, args=(writerQueue,))
feedProc.start()
feedProc.join()
for p in calcProc:
p.start()
for p in calcProc:
p.join()
writProc.start()
writProc.join()
On running the code it prints,
$ python3 tst.py
Queue size 6
response: 1
response: 2
Also, is it possible to ensure that the write function always outputs 1,2,3,4,5,6 i.e. in the same order in which the data is fed into the feed queue?
The error is somehow with the task_done() call. If you remove that one, then it works, don't ask me why (IMO that's a bug). But the way it works then is that the queueIn.get(block=False) call throws an exception because the queue is empty. This might be just enough for your use case, a better way though would be to use sentinels (as suggested in the multiprocessing docs, see last example). Here's a little rewrite so your program uses sentinels:
import os
import argparse
import multiprocessing
from multiprocessing import Process, Queue
from time import sleep
def feed(queue, parlist, nthreads):
for par in parlist:
queue.put(par)
for i in range(nthreads):
queue.put(None)
print("Queue size", queue.qsize())
def calc(queueIn, queueOut):
while True:
par=queueIn.get()
if par is None:
break
res=doCalculation(par)
queueOut.put((res))
def doCalculation(par):
return par
def write(queue):
while not queue.empty():
par=queue.get()
print("response:",par)
if __name__ == "__main__":
nthreads = 2
workerQueue = Queue()
writerQueue = Queue()
considerperiod=[1,2,3,4,5,6]
feedProc = Process(target=feed, args=(workerQueue, considerperiod, nthreads))
calcProc = [Process(target=calc, args=(workerQueue, writerQueue)) for i in range(nthreads)]
writProc = Process(target=write, args=(writerQueue,))
feedProc.start()
feedProc.join()
for p in calcProc:
p.start()
for p in calcProc:
p.join()
writProc.start()
writProc.join()
A few things to note:
the sentinel is putting a None into the queue. Note that you need one sentinel for every worker process.
for the write function you don't need to do the sentinel handling as there's only one process and you don't need to handle concurrency (if you would do the empty() and then get() thingie in your calc function you would run into a problem if e.g. there's only one item left in the queue and both workers check empty() at the same time and then both want to do get() and then one of them is locked forever)
you don't need to put feed and write into processes, just put them into your main function as you don't want to run it in parallel anyway.
how can I have the same order in output as in input? [...] I guess multiprocessing.map can do this
Yes map keeps the order. Rewriting your program into something simpler (as you don't need the workerQueue and writerQueue and adding random sleeps to prove that the output is still in order:
from multiprocessing import Pool
import time
import random
def calc(val):
time.sleep(random.random())
return val
if __name__ == "__main__":
considerperiod=[1,2,3,4,5,6]
with Pool(processes=2) as pool:
print(pool.map(calc, considerperiod))

Resources