I am using pythons multiprocessing module in some of my code. I have a controller class that controls a class and performs some action.
import multiprocessing
from multiprocessing import Queue, Process, Manager
class dosomething(multiprocessing.Process):
def __init__(self,managerList):
self.mlist = managerList
print self.mlist
def run(self):
self.mlist.append((4,5,6))
class doController:
def __init__(self):
mgr = Manager()
self.mlist = mgr.list()
self.mlist.append((1,2,3,4))
t = dosomething(self.mlist)
#t.daemon = True
#t.start()
def printer(self):
return self.mlist
gd = doController()
print gd.printer()
Pring mlist in the init part of dosomething prints [(1, 2, 3, 4)] as expected but the list in the dosomething part does not work giving out IOError 11. Can anyone help if it's right or wrong?
The call to the Process.__init__ is missing.
You don't necessarely need to create a Process subclass you could use functions:
from multiprocessing import Process, Manager
def dosomething(mlist):
mlist.append((4,5,6))
def main():
manager = Manager()
L = manager.list((1,2,3,4))
p = Process(target=dosomething, args=(L,))
p.start()
p.join()
print L
if __name__ == '__main__':
main()
Related
I am using the following code to process some pictures for my ML project and I would like to parallelize it.
import multiprocessing as mp
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
#parallel call of the function
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
ood_id = ex.map(track_ids, seqs)
if I run the code serially it takes 3.0 minutes but for parallel with concurrent, it takes 3.5 minutes.
can someone please explain why is that? and present a way to solve the problem.
btw, I have 12 cores.
Thanks
Here's a brief example of how one might go about profiling multiprocessing code vs serial execution:
from multiprocessing import Pool
from cProfile import Profile
from pstats import Stats
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
def profile_seq():
p = Profile() #one and only profiler instance
p.enable()
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
p.disable()
return Stats(p), seqs
def track_ids_pr(seq):
p = Profile() #profile the child tasks
p.enable()
retval = track_ids(seq)
p.disable()
return (Stats(p, stream="dummy"), retval)
def profile_parallel():
p = Profile() #profile stuff in the main process
p.enable()
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
retvals = ex.map(track_ids_pr, seqs)
p.disable()
s = Stats(p)
out = []
for ret in retvals:
s.add(ret[0])
out.append(ret[1])
return s, out
if __name__ == "__main__":
stat, retval = profile_parallel()
stat.print_stats()
EDIT: Unfortunately I found out that pstat.Stats objects cannot be used normally with multiprocessing.Queue because it is not pickleable (which is needed for the operation of concurrent.futures). Evidently it normally will store a reference to a file for the purpose of writing statistics to that file, and if none is given, it will by default grab a reference to sys.stdout. We don't actually need that reference however until we actually want to print out the statistics, so we can just give it a temporary value to prevent the pickle error, and then restore an appropriate value later. The following example should be copy-paste-able and run just fine rather than the pseudocode-ish example above.
from multiprocessing import Queue, Process
from cProfile import Profile
from pstats import Stats
import sys
def isprime(x):
for d in range(2, int(x**.5)):
if x % d == 0:
return False
return True
def foo(retq):
p = Profile()
p.enable()
primes = []
max_n = 2**20
for n in range(3, max_n):
if isprime(n):
primes.append(n)
p.disable()
retq.put(Stats(p, stream="dummy")) #Dirty hack: set `stream` to something picklable then override later
if __name__ == "__main__":
q = Queue()
p1 = Process(target=foo, args=(q,))
p1.start()
p2 = Process(target=foo, args=(q,))
p2.start()
s1 = q.get()
s1.stream = sys.stdout #restore original file
s2 = q.get()
# s2.stream #if we are just adding this `Stats` object to another the `stream` just gets thrown away anyway.
s1.add(s2) #add up the stats from both child processes.
s1.print_stats() #s1.stream gets used here, but not before. If you provide a file to write to instead of sys.stdout, it will write to that file)
p1.join()
p2.join()
I spent nearly the whole day with this and came to the end of my knowledge:
I want to change a shared multiprocessing.Value string in the subprocess, but python hangs as soon as the subprocess is trying to change the shared value.
Below an example code:
from multiprocessing import Process, Value, freeze_support
from ctypes import c_wchar_p
def test(x):
with x.get_lock():
x.value = 'THE TEST WORKED'
return
if __name__ == "__main__":
freeze_support()
value = Value(c_wchar_p, '')
p = Process(target=test, args = (value,))
p.start()
print(p.pid)
# this try block is to also allow p.run()
try:
p.join()
p.terminate()
except:
pass
print(value.value)
What I tried and does not work:
I tried ctypes c_wchar_p and c_char_p, but both result in the same freezing.
I tried also without x.get_lock()
I tried also without freeze_support()
What works (but does not help):
Using a float as the shared value (value = Value('d',0) and x.value = 1).
Running the Process without starting a subprocess (replace p.start() with p.run() )
I am using Windows 10 64 bit and Python 3.6.4 (Spyder, but also tried outside of Spyder).
Any help welcome!
A shared pointer won't work in another process because the pointer is only valid in the process in which it was created. Instead, use an array:
import multiprocessing as mp
def test(x):
x.value = b'Test worked!'
if __name__ == "__main__":
x = mp.Array('c',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.value)
Output:
b'Test worked!'
Note that array type 'c' is specialized and returns a SynchronizedString vs. other types that return SynchronizedArray. Here's how to use type 'u' for example:
import multiprocessing as mp
from ctypes import *
def test(x):
x.get_obj().value = 'Test worked!'
if __name__ == "__main__":
x = mp.Array('u',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.get_obj().value)
Output:
Test worked!
Note that operations on the wrapped value that are non-atomic such as += that do read/modify/write should be protected with a with x.get_lock(): context manager.
I'm using PyQt5 and Python3, I use 3 QThread classes to run something and after they are done I need to execute a 4th QThread class. But the execution of the 4th need to take place after all of the QThread classes finish work, or only 2 or only 1. It must not run while the first 3 are working.
I looked on the internet but I couldn't find a solution. My code looks like this:
class MyWindow(QtWidgets.QMainWindow):
def __init__(self):
QtWidgets.QMainWindow.__init__(self)
file_path = os.path.abspath('builder_gui.ui')
uic.loadUi(file_path, self)
self.obj1 = TasksThread1(self.comboBox.currentText(),self.comboBox_6.currentText())
self.obj2 = TasksThread2(self.comboBox_2.currentText(),self.comboBox_5.currentText())
self.obj3 = TasksThread3(self.comboBox_3.currentText(),self.comboBox_4.currentText())
self.obj4 = TasksThread4()
self.menubar.setNativeMenuBar(False)
self.progressVal = 1
self.cwd = os.getcwd()
self.obj1.newValueProgress.connect(self.increment_progress)
self.obj1.message.connect(self.status_bar)
self.obj2.newValueProgress.connect(self.increment_progress)
self.obj2.message.connect(self.status_bar)
self.obj3.newValueProgress.connect(self.increment_progress)
self.obj3.message.connect(self.status_bar)
self.obj4.newValueProgress.connect(self.increment_progress)
self.obj4.message.connect(self.status_bar)
self.obj4.doneSignal.connect(self.calculate_done_limit)
self.pushButton.pressed.connect(self.execute_build_script)
def calculate_done_limit(self):
limitCalc = 100 - int(self.progressBar.value())
self.increment_progress(limitCalc)
def run_gits_all(self):
if self.crowdTwistCheck.isChecked():
self.obj1.start()
else:
pass
if self.ThemeCheck.isChecked():
self.obj2.start()
else:
pass
if self.mainAwsCheck.isChecked():
self.obj3.start()
else:
pass
def execute_build_script(self):
self.progressBar.setValue(1)
self.progressVal = 1
self.run_gits_all()
def execute_last_part(self):
self.obj4.start()
def status_bar(self, value_in):
read1 = self.textBrowser.toPlainText()
self.textBrowser.setText(read1 + "\n" + value_in)
def increment_progress(self,valueIn):
self.progressVal += valueIn
self.progressBar.setValue(self.progressVal)
if __name__ == '__main__':
import sys
app = QtWidgets.QApplication(sys.argv)
window = MyWindow()
window.show()
sys.exit(app.exec_())
My first 3 QThreads are like this:
class TasksThread1(QThread):
newValueProgress = QtCore.pyqtSignal(int)
message = QtCore.pyqtSignal(str)
doneSignal = QtCore.pyqtSignal()
def __init__(self, branch, git):
QThread.__init__(self)
self.branch = branch
self.git = git
def remove_folder(self):
do_something_1
def CrowdTwistRepo(self):
do_something_2
def run(self):
self.remove_folder()
self.CrowdTwistRepo()
My last QThread looks like this:
class TasksThread4(QThread):
newValueProgress = QtCore.pyqtSignal(int)
message = QtCore.pyqtSignal(str)
doneSignal = QtCore.pyqtSignal()
def __init__(self):
QThread.__init__(self)
def gulp_sass_function(self):
do_something_1
def gulp_uglify_function(self):
do_something_2
def zipping_function(self):
do_something_3
def run(self):
self.gulp_sass_function()
self.gulp_uglify_function()
self.zipping_function()
If I run the code, all of the QThreads start and I want my 4th QThread to start only after the first 3 have done working. I used QThreads to improve the GUI experience, the GUI froze alot.
thanks,
When your first 3 threads are done, send a signal. Then connect this signal to a function that will start the last thread.
I'm trying to use ZeroMQ in Python (pyzmq) together with multiprocessing. As a minmal (not) working example I have a server- and a client-class which both inherit from multiprocessing.Process. The client as a child-process should send a message to the server-child-process which should print the message:
#mpzmq_class.py
from multiprocessing import Process
import zmq
class Server(Process):
def __init__(self):
super(Server, self).__init__()
self.ctx = zmq.Context()
self.socket = self.ctx.socket(zmq.PULL)
self.socket.connect("tcp://localhost:6068")
def run(self):
msg = self.socket.recv_string()
print(msg)
class Client(Process):
def __init__(self):
super(Client, self).__init__()
self.ctx = zmq.Context()
self.socket = self.ctx.socket(zmq.PUSH)
self.socket.bind("tcp://*:6068")
def run(self):
msg = "Hello World!"
self.socket.send_string(msg)
if __name__ == "__main__":
s = Server()
c = Client()
s.start()
c.start()
s.join()
c.join()
Now if I run this the server-process seems to hang at the receive-call msg = socket.receive_string(). In another (more complicated) case, it even hung at the socket.connect("...")-statement.
If I rewrite the script to use functions instead of classes/objects, it runs just fine:
# mpzmq_function.py
from multiprocessing import Process
import zmq
def server():
ctx = zmq.Context()
socket = ctx.socket(zmq.PULL)
socket.connect("tcp://localhost:6068")
msg = socket.recv_string()
print(msg)
def client():
ctx = zmq.Context()
socket = ctx.socket(zmq.PUSH)
socket.bind("tcp://*:6068")
msg = "Hello World!"
socket.send_string(msg)
if __name__ == "__main__":
s = Process(target=server)
c = Process(target=client)
s.start()
c.start()
s.join()
c.join()
Output:
paul#AP-X:~$ python3 mpzmq_function.py
Hello World!
Can anybody help me with this? I guess it's something I didn't understand concerning the usage of multiprocessing.
Thank you!
I run into the same issue.
I guess the problem is, that the run method has no access to the context object.
Maybe it has something to do with the C implementation and the fact, that processes do not have shared memory.
If instantiate the context in the run method, it works.
Here a working example:
#mpzmq_class.py
from multiprocessing import Process
import zmq
class Base(Process):
"""
Inherit from Process and
holds the zmq address.
"""
def __init__(self, address):
super().__init__()
self.address = address
class Server(Base):
def run(self):
ctx = zmq.Context()
socket = ctx.socket(zmq.PULL)
socket.connect(self.address)
msg = socket.recv_string()
print(msg)
class Client(Base):
def run(self):
ctx = zmq.Context()
socket = ctx.socket(zmq.PUSH)
socket.bind(self.address)
msg = "Hello World!"
socket.send_string(msg)
if __name__ == "__main__":
server_addr = "tcp://127.0.1:6068"
client_addr = "tcp://*:6068"
s = Server(server_addr)
c = Client(client_addr)
s.start()
c.start()
s.join()
c.join()
I added a base class to demonstrate that you can still access normal Python objects from the run method. If you put the context object into the init Method, it won't work.
I have a class that runs an infinite loop using threads to populate a thread-safe queue:
from threading import Thread
from Queue import Queue
import time
class factory:
def __init__(self):
self.running = True
self.q = Queue()
t = Thread(target=self.count_indefinitely)
t.start()
time.sleep(3)
print self.q.qsize()
def count_indefinitely(self):
i = 0
while self.running:
i += 1
self.q.put(i)
if __name__ == '__main__':
f = factory()
time.sleep(2)
print 'Hello!'
f.running = False
The code reaches the part where I need to print out the size of the queue. However, I can't get it to print "hello" in the main function. How should I go about fixing this?