python manager managed list - python-3.x

I am using pythons multiprocessing module in some of my code. I have a controller class that controls a class and performs some action.
import multiprocessing
from multiprocessing import Queue, Process, Manager
class dosomething(multiprocessing.Process):
def __init__(self,managerList):
self.mlist = managerList
print self.mlist
def run(self):
self.mlist.append((4,5,6))
class doController:
def __init__(self):
mgr = Manager()
self.mlist = mgr.list()
self.mlist.append((1,2,3,4))
t = dosomething(self.mlist)
#t.daemon = True
#t.start()
def printer(self):
return self.mlist
gd = doController()
print gd.printer()
Pring mlist in the init part of dosomething prints [(1, 2, 3, 4)] as expected but the list in the dosomething part does not work giving out IOError 11. Can anyone help if it's right or wrong?

The call to the Process.__init__ is missing.
You don't necessarely need to create a Process subclass you could use functions:
from multiprocessing import Process, Manager
def dosomething(mlist):
mlist.append((4,5,6))
def main():
manager = Manager()
L = manager.list((1,2,3,4))
p = Process(target=dosomething, args=(L,))
p.start()
p.join()
print L
if __name__ == '__main__':
main()

Related

Why serial code is faster than concurrent.futures in this case?

I am using the following code to process some pictures for my ML project and I would like to parallelize it.
import multiprocessing as mp
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
#parallel call of the function
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
ood_id = ex.map(track_ids, seqs)
if I run the code serially it takes 3.0 minutes but for parallel with concurrent, it takes 3.5 minutes.
can someone please explain why is that? and present a way to solve the problem.
btw, I have 12 cores.
Thanks
Here's a brief example of how one might go about profiling multiprocessing code vs serial execution:
from multiprocessing import Pool
from cProfile import Profile
from pstats import Stats
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
def profile_seq():
p = Profile() #one and only profiler instance
p.enable()
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
p.disable()
return Stats(p), seqs
def track_ids_pr(seq):
p = Profile() #profile the child tasks
p.enable()
retval = track_ids(seq)
p.disable()
return (Stats(p, stream="dummy"), retval)
def profile_parallel():
p = Profile() #profile stuff in the main process
p.enable()
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
retvals = ex.map(track_ids_pr, seqs)
p.disable()
s = Stats(p)
out = []
for ret in retvals:
s.add(ret[0])
out.append(ret[1])
return s, out
if __name__ == "__main__":
stat, retval = profile_parallel()
stat.print_stats()
EDIT: Unfortunately I found out that pstat.Stats objects cannot be used normally with multiprocessing.Queue because it is not pickleable (which is needed for the operation of concurrent.futures). Evidently it normally will store a reference to a file for the purpose of writing statistics to that file, and if none is given, it will by default grab a reference to sys.stdout. We don't actually need that reference however until we actually want to print out the statistics, so we can just give it a temporary value to prevent the pickle error, and then restore an appropriate value later. The following example should be copy-paste-able and run just fine rather than the pseudocode-ish example above.
from multiprocessing import Queue, Process
from cProfile import Profile
from pstats import Stats
import sys
def isprime(x):
for d in range(2, int(x**.5)):
if x % d == 0:
return False
return True
def foo(retq):
p = Profile()
p.enable()
primes = []
max_n = 2**20
for n in range(3, max_n):
if isprime(n):
primes.append(n)
p.disable()
retq.put(Stats(p, stream="dummy")) #Dirty hack: set `stream` to something picklable then override later
if __name__ == "__main__":
q = Queue()
p1 = Process(target=foo, args=(q,))
p1.start()
p2 = Process(target=foo, args=(q,))
p2.start()
s1 = q.get()
s1.stream = sys.stdout #restore original file
s2 = q.get()
# s2.stream #if we are just adding this `Stats` object to another the `stream` just gets thrown away anyway.
s1.add(s2) #add up the stats from both child processes.
s1.print_stats() #s1.stream gets used here, but not before. If you provide a file to write to instead of sys.stdout, it will write to that file)
p1.join()
p2.join()

Python freezes when accessing string value in subprocess

I spent nearly the whole day with this and came to the end of my knowledge:
I want to change a shared multiprocessing.Value string in the subprocess, but python hangs as soon as the subprocess is trying to change the shared value.
Below an example code:
from multiprocessing import Process, Value, freeze_support
from ctypes import c_wchar_p
def test(x):
with x.get_lock():
x.value = 'THE TEST WORKED'
return
if __name__ == "__main__":
freeze_support()
value = Value(c_wchar_p, '')
p = Process(target=test, args = (value,))
p.start()
print(p.pid)
# this try block is to also allow p.run()
try:
p.join()
p.terminate()
except:
pass
print(value.value)
What I tried and does not work:
I tried ctypes c_wchar_p and c_char_p, but both result in the same freezing.
I tried also without x.get_lock()
I tried also without freeze_support()
What works (but does not help):
Using a float as the shared value (value = Value('d',0) and x.value = 1).
Running the Process without starting a subprocess (replace p.start() with p.run() )
I am using Windows 10 64 bit and Python 3.6.4 (Spyder, but also tried outside of Spyder).
Any help welcome!
A shared pointer won't work in another process because the pointer is only valid in the process in which it was created. Instead, use an array:
import multiprocessing as mp
def test(x):
x.value = b'Test worked!'
if __name__ == "__main__":
x = mp.Array('c',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.value)
Output:
b'Test worked!'
Note that array type 'c' is specialized and returns a SynchronizedString vs. other types that return SynchronizedArray. Here's how to use type 'u' for example:
import multiprocessing as mp
from ctypes import *
def test(x):
x.get_obj().value = 'Test worked!'
if __name__ == "__main__":
x = mp.Array('u',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.get_obj().value)
Output:
Test worked!
Note that operations on the wrapped value that are non-atomic such as += that do read/modify/write should be protected with a with x.get_lock(): context manager.

What is the best way to run a Python function after some PyQt5 QThread classes finish work?

I'm using PyQt5 and Python3, I use 3 QThread classes to run something and after they are done I need to execute a 4th QThread class. But the execution of the 4th need to take place after all of the QThread classes finish work, or only 2 or only 1. It must not run while the first 3 are working.
I looked on the internet but I couldn't find a solution. My code looks like this:
class MyWindow(QtWidgets.QMainWindow):
def __init__(self):
QtWidgets.QMainWindow.__init__(self)
file_path = os.path.abspath('builder_gui.ui')
uic.loadUi(file_path, self)
self.obj1 = TasksThread1(self.comboBox.currentText(),self.comboBox_6.currentText())
self.obj2 = TasksThread2(self.comboBox_2.currentText(),self.comboBox_5.currentText())
self.obj3 = TasksThread3(self.comboBox_3.currentText(),self.comboBox_4.currentText())
self.obj4 = TasksThread4()
self.menubar.setNativeMenuBar(False)
self.progressVal = 1
self.cwd = os.getcwd()
self.obj1.newValueProgress.connect(self.increment_progress)
self.obj1.message.connect(self.status_bar)
self.obj2.newValueProgress.connect(self.increment_progress)
self.obj2.message.connect(self.status_bar)
self.obj3.newValueProgress.connect(self.increment_progress)
self.obj3.message.connect(self.status_bar)
self.obj4.newValueProgress.connect(self.increment_progress)
self.obj4.message.connect(self.status_bar)
self.obj4.doneSignal.connect(self.calculate_done_limit)
self.pushButton.pressed.connect(self.execute_build_script)
def calculate_done_limit(self):
limitCalc = 100 - int(self.progressBar.value())
self.increment_progress(limitCalc)
def run_gits_all(self):
if self.crowdTwistCheck.isChecked():
self.obj1.start()
else:
pass
if self.ThemeCheck.isChecked():
self.obj2.start()
else:
pass
if self.mainAwsCheck.isChecked():
self.obj3.start()
else:
pass
def execute_build_script(self):
self.progressBar.setValue(1)
self.progressVal = 1
self.run_gits_all()
def execute_last_part(self):
self.obj4.start()
def status_bar(self, value_in):
read1 = self.textBrowser.toPlainText()
self.textBrowser.setText(read1 + "\n" + value_in)
def increment_progress(self,valueIn):
self.progressVal += valueIn
self.progressBar.setValue(self.progressVal)
if __name__ == '__main__':
import sys
app = QtWidgets.QApplication(sys.argv)
window = MyWindow()
window.show()
sys.exit(app.exec_())
My first 3 QThreads are like this:
class TasksThread1(QThread):
newValueProgress = QtCore.pyqtSignal(int)
message = QtCore.pyqtSignal(str)
doneSignal = QtCore.pyqtSignal()
def __init__(self, branch, git):
QThread.__init__(self)
self.branch = branch
self.git = git
def remove_folder(self):
do_something_1
def CrowdTwistRepo(self):
do_something_2
def run(self):
self.remove_folder()
self.CrowdTwistRepo()
My last QThread looks like this:
class TasksThread4(QThread):
newValueProgress = QtCore.pyqtSignal(int)
message = QtCore.pyqtSignal(str)
doneSignal = QtCore.pyqtSignal()
def __init__(self):
QThread.__init__(self)
def gulp_sass_function(self):
do_something_1
def gulp_uglify_function(self):
do_something_2
def zipping_function(self):
do_something_3
def run(self):
self.gulp_sass_function()
self.gulp_uglify_function()
self.zipping_function()
If I run the code, all of the QThreads start and I want my 4th QThread to start only after the first 3 have done working. I used QThreads to improve the GUI experience, the GUI froze alot.
thanks,
When your first 3 threads are done, send a signal. Then connect this signal to a function that will start the last thread.

ZeroMQ hangs in a python multiprocessing class/object solution

I'm trying to use ZeroMQ in Python (pyzmq) together with multiprocessing. As a minmal (not) working example I have a server- and a client-class which both inherit from multiprocessing.Process. The client as a child-process should send a message to the server-child-process which should print the message:
#mpzmq_class.py
from multiprocessing import Process
import zmq
class Server(Process):
def __init__(self):
super(Server, self).__init__()
self.ctx = zmq.Context()
self.socket = self.ctx.socket(zmq.PULL)
self.socket.connect("tcp://localhost:6068")
def run(self):
msg = self.socket.recv_string()
print(msg)
class Client(Process):
def __init__(self):
super(Client, self).__init__()
self.ctx = zmq.Context()
self.socket = self.ctx.socket(zmq.PUSH)
self.socket.bind("tcp://*:6068")
def run(self):
msg = "Hello World!"
self.socket.send_string(msg)
if __name__ == "__main__":
s = Server()
c = Client()
s.start()
c.start()
s.join()
c.join()
Now if I run this the server-process seems to hang at the receive-call msg = socket.receive_string(). In another (more complicated) case, it even hung at the socket.connect("...")-statement.
If I rewrite the script to use functions instead of classes/objects, it runs just fine:
# mpzmq_function.py
from multiprocessing import Process
import zmq
def server():
ctx = zmq.Context()
socket = ctx.socket(zmq.PULL)
socket.connect("tcp://localhost:6068")
msg = socket.recv_string()
print(msg)
def client():
ctx = zmq.Context()
socket = ctx.socket(zmq.PUSH)
socket.bind("tcp://*:6068")
msg = "Hello World!"
socket.send_string(msg)
if __name__ == "__main__":
s = Process(target=server)
c = Process(target=client)
s.start()
c.start()
s.join()
c.join()
Output:
paul#AP-X:~$ python3 mpzmq_function.py
Hello World!
Can anybody help me with this? I guess it's something I didn't understand concerning the usage of multiprocessing.
Thank you!
I run into the same issue.
I guess the problem is, that the run method has no access to the context object.
Maybe it has something to do with the C implementation and the fact, that processes do not have shared memory.
If instantiate the context in the run method, it works.
Here a working example:
#mpzmq_class.py
from multiprocessing import Process
import zmq
class Base(Process):
"""
Inherit from Process and
holds the zmq address.
"""
def __init__(self, address):
super().__init__()
self.address = address
class Server(Base):
def run(self):
ctx = zmq.Context()
socket = ctx.socket(zmq.PULL)
socket.connect(self.address)
msg = socket.recv_string()
print(msg)
class Client(Base):
def run(self):
ctx = zmq.Context()
socket = ctx.socket(zmq.PUSH)
socket.bind(self.address)
msg = "Hello World!"
socket.send_string(msg)
if __name__ == "__main__":
server_addr = "tcp://127.0.1:6068"
client_addr = "tcp://*:6068"
s = Server(server_addr)
c = Client(client_addr)
s.start()
c.start()
s.join()
c.join()
I added a base class to demonstrate that you can still access normal Python objects from the run method. If you put the context object into the init Method, it won't work.

Infinite threaded function in class constructor

I have a class that runs an infinite loop using threads to populate a thread-safe queue:
from threading import Thread
from Queue import Queue
import time
class factory:
def __init__(self):
self.running = True
self.q = Queue()
t = Thread(target=self.count_indefinitely)
t.start()
time.sleep(3)
print self.q.qsize()
def count_indefinitely(self):
i = 0
while self.running:
i += 1
self.q.put(i)
if __name__ == '__main__':
f = factory()
time.sleep(2)
print 'Hello!'
f.running = False
The code reaches the part where I need to print out the size of the queue. However, I can't get it to print "hello" in the main function. How should I go about fixing this?

Resources