Python multiprocessing a child function - python-3.x

I have trying to learn multi processing.
I have a simple function which generates a list of numbers and I am trying to use multiprocessing to add the numbers if it is divisible by 10.
My objective is to run the child function in parallel with available cpu.
import multiprocessing
import time
def add_multiple_of_10_v0(number):
number_list = []
for i in range(1, number):
x = i**3 + i**2 + i + 1
number_list.append(x)
print(number_list)
pool = multiprocessing.Pool(6)
result = 0
for value in pool.map(check_multiple_10_v0, number_list):
if value > 0:
result = result + value
else:
pass
pool.close()
pool.join()
return result
def check_multiple_10_v0(in_number):
if in_number % 10 == 0:
time.sleep(5)
return in_number
else:
return -1
I am getting the below error -
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
Am new to python and multiprocessing and would appreciate guidance.

Related

Why serial code is faster than concurrent.futures in this case?

I am using the following code to process some pictures for my ML project and I would like to parallelize it.
import multiprocessing as mp
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
#parallel call of the function
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
ood_id = ex.map(track_ids, seqs)
if I run the code serially it takes 3.0 minutes but for parallel with concurrent, it takes 3.5 minutes.
can someone please explain why is that? and present a way to solve the problem.
btw, I have 12 cores.
Thanks
Here's a brief example of how one might go about profiling multiprocessing code vs serial execution:
from multiprocessing import Pool
from cProfile import Profile
from pstats import Stats
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
def profile_seq():
p = Profile() #one and only profiler instance
p.enable()
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
p.disable()
return Stats(p), seqs
def track_ids_pr(seq):
p = Profile() #profile the child tasks
p.enable()
retval = track_ids(seq)
p.disable()
return (Stats(p, stream="dummy"), retval)
def profile_parallel():
p = Profile() #profile stuff in the main process
p.enable()
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
retvals = ex.map(track_ids_pr, seqs)
p.disable()
s = Stats(p)
out = []
for ret in retvals:
s.add(ret[0])
out.append(ret[1])
return s, out
if __name__ == "__main__":
stat, retval = profile_parallel()
stat.print_stats()
EDIT: Unfortunately I found out that pstat.Stats objects cannot be used normally with multiprocessing.Queue because it is not pickleable (which is needed for the operation of concurrent.futures). Evidently it normally will store a reference to a file for the purpose of writing statistics to that file, and if none is given, it will by default grab a reference to sys.stdout. We don't actually need that reference however until we actually want to print out the statistics, so we can just give it a temporary value to prevent the pickle error, and then restore an appropriate value later. The following example should be copy-paste-able and run just fine rather than the pseudocode-ish example above.
from multiprocessing import Queue, Process
from cProfile import Profile
from pstats import Stats
import sys
def isprime(x):
for d in range(2, int(x**.5)):
if x % d == 0:
return False
return True
def foo(retq):
p = Profile()
p.enable()
primes = []
max_n = 2**20
for n in range(3, max_n):
if isprime(n):
primes.append(n)
p.disable()
retq.put(Stats(p, stream="dummy")) #Dirty hack: set `stream` to something picklable then override later
if __name__ == "__main__":
q = Queue()
p1 = Process(target=foo, args=(q,))
p1.start()
p2 = Process(target=foo, args=(q,))
p2.start()
s1 = q.get()
s1.stream = sys.stdout #restore original file
s2 = q.get()
# s2.stream #if we are just adding this `Stats` object to another the `stream` just gets thrown away anyway.
s1.add(s2) #add up the stats from both child processes.
s1.print_stats() #s1.stream gets used here, but not before. If you provide a file to write to instead of sys.stdout, it will write to that file)
p1.join()
p2.join()

My ProcessPoolExecutor does not run the whole function

I need to run a function in parallel with different processes. I am using the code below. My function arguments types are (nested list of binary numbers, int). The problem is when I run the script processes start running but they finish in the middle of the function (check where I put "Here" in the code) without no error.
def CreateP(pattern):
print("start")
import pandas as pd
P=[]
for j in range( n1):
for i in range(n):
for i1 in range(n):
if i1!=i:
#Here (my processes end here)
if pattern[i][j]==pattern[i1][j]:
if (i,i1,j) not in P:
P.append((i,i1,j))
if (i1,i,j) not in P:
P.append((i1,i,j))
else:
if pattern[i][j]==1:
if (i1,i,j) not in P:
P.append((i1,i,j))
else:
if (i,i1,j) not in P:
P.append((i,i1,j))
pd.DataFrame(P).to_csv("test1.csv" ,index=False)
pd.DataFrame(pattern).to_csv("test2.csv" ,index=False)
return P
def main():
import concurrent.futures
for i in range(a1):
with concurrent.futures.ProcessPoolExecutor(max_workers=61) as executor:
f1=executor.submit(CreateP,( Pbig1[i],1))
f2=executor.submit(CreateP,( Pbig2[i],2))
if __name__ == "__main__":
main()

Muti-core parallel computing over a for loop in python-3.x

I have a simple for loop which is to print a number from 1 to 9999 with 5 seconds sleep in between. The code is as below:
import time
def run():
length = 10000
for i in range(1, length):
print(i)
time.sleep(5)
run()
I want to apply multiprocessing to run the for loop concurrently with multi-cores. So I amended the code above to take 5 cores:
import multiprocessing as mp
import time
def run():
length = 10000
for i in range(1, length):
print(i)
time.sleep(5)
if __name__ == '__main__':
p = mp.Pool(5)
p.map(run())
p.close()
There is no issue in running the job but it seems like it is not running in parallel with 5 cores. How could I get the code worked as expected?
First, you are running the same 1..9999 loop 5 times, and second, you are executing the run function instead of passing it to the .map() method.
You must prepare your queue before passing it to the Pool instance so that all 5 workers process the same queue:
import multiprocessing as mp
import time
def run(i):
print(i)
time.sleep(5)
if __name__ == '__main__':
length = 10000
queue = range(1, length)
p = mp.Pool(5)
p.map(run, queue)
p.close()
Note that it will process the numbers out of order as explained in the documentation. For example, worker #1 will process 1..500, worker #2 will process 501..1000 etc:
This method chops the iterable into a number of chunks which it submits to the process pool as separate tasks. The (approximate) size of these chunks can be specified by setting chunksize to a positive integer.
If you want to process the numbers more similarly to the single threaded version, set chunksize to 1:
p.map(run, queue, 1)

Python freezes when accessing string value in subprocess

I spent nearly the whole day with this and came to the end of my knowledge:
I want to change a shared multiprocessing.Value string in the subprocess, but python hangs as soon as the subprocess is trying to change the shared value.
Below an example code:
from multiprocessing import Process, Value, freeze_support
from ctypes import c_wchar_p
def test(x):
with x.get_lock():
x.value = 'THE TEST WORKED'
return
if __name__ == "__main__":
freeze_support()
value = Value(c_wchar_p, '')
p = Process(target=test, args = (value,))
p.start()
print(p.pid)
# this try block is to also allow p.run()
try:
p.join()
p.terminate()
except:
pass
print(value.value)
What I tried and does not work:
I tried ctypes c_wchar_p and c_char_p, but both result in the same freezing.
I tried also without x.get_lock()
I tried also without freeze_support()
What works (but does not help):
Using a float as the shared value (value = Value('d',0) and x.value = 1).
Running the Process without starting a subprocess (replace p.start() with p.run() )
I am using Windows 10 64 bit and Python 3.6.4 (Spyder, but also tried outside of Spyder).
Any help welcome!
A shared pointer won't work in another process because the pointer is only valid in the process in which it was created. Instead, use an array:
import multiprocessing as mp
def test(x):
x.value = b'Test worked!'
if __name__ == "__main__":
x = mp.Array('c',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.value)
Output:
b'Test worked!'
Note that array type 'c' is specialized and returns a SynchronizedString vs. other types that return SynchronizedArray. Here's how to use type 'u' for example:
import multiprocessing as mp
from ctypes import *
def test(x):
x.get_obj().value = 'Test worked!'
if __name__ == "__main__":
x = mp.Array('u',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.get_obj().value)
Output:
Test worked!
Note that operations on the wrapped value that are non-atomic such as += that do read/modify/write should be protected with a with x.get_lock(): context manager.

Use of asyncio event loop recursively

I am new to asyncio ( used with python3.4 ) and I am not sure I use it as one should. I have seen in this thread that it can be use to execute a function every n seconds (in my case ms) without having to dive into threading.
I use it to get data from laser sensors through a basic serial protocol every n ms until I get m samples.
Here is the definition of my functions :
def countDown( self,
loop,
funcToDo,
*args,
counter = [ 1 ],
**kwargs ):
""" At every call, it executes funcToDo ( pass it args and kwargs )
and count down from counter to 0. Then, it stop loop """
if counter[ 0 ] == 0:
loop.stop()
else:
funcToDo( *args, **kwargs )
counter[ 0 ] -= 1
def _frangeGen( self, start = 0, stop = None, step = 1 ):
""" use to generate a time frange from start to stop by step step """
while stop is None or start < stop:
yield start
start += step
def callEvery( self,
loop,
interval,
funcToCall,
*args,
now = True,
**kwargs ):
""" repeat funcToCall every interval sec in loop object """
nb = kwargs.get( 'counter', [ 1000 ] )
def repeat( now = True,
times = self._frangeGen( start = loop.time(),
stop=loop.time()+nb[0]*interval,
step = interval ) ):
if now:
funcToCall( *args, **kwargs )
loop.call_at( next( times ), repeat )
repeat( now = now )
And this is how I use it (getAllData is the function that manage serial communication) :
ts = 0.01
nbOfSamples = 1000
loop = asyncio.get_event_loop()
callEvery( loop, ts, countDown, loop, getAllData, counter = [nbOfSamples] )
loop.run_forever()
I want to put that bloc into a function and call it as often as I want, something like this :
for i in range( nbOfMeasures ):
myFunction()
processData()
But the second test does not call getAllData 1000 times, only twice, sometimes thrice. The interesting fact is one time in two I get as much data as I want. I don't really understand, and I can't find anything in the docs, so I am asking for your help. Any explanation or an easier way to do it is gladly welcome :)
You are complicating things too much and, generally speaking, doing recursion when you have an event loop is bad design.
asyncio is fun only when you make use of coroutines. Here's one way of doing it:
import asyncio as aio
def get_laser_data():
"""
get data from the laser using blocking IO
"""
...
#aio.coroutine
def get_samples(loop, m, n):
"""
loop = asyncio event loop
m = number of samples
n = time between samples
"""
samples = []
while len(samples) < m:
sample = yield from loop.run_in_executor(None, get_laser_data)
samples.append(sample)
yield from aio.sleep(n)
return samples
#aio.coroutine
def main(loop):
for i in range(nbOfMeasures):
samples = yield from get_samples(loop, 1000, 0.01)
...
loop = aio.get_event_loop()
loop.run_until_complete(main(loop))
loop.close()
If you are completely confused by this, consider reading some tutorials/documentation about asyncio.
But I would like to point out that you must use a thread to get the data from the laser sensor. Doing any blocking IO in the same thread that the event loop is running will block the loop and throw off aio.sleep. This is what yield from loop.run_in_executor(None, get_laser_data) is doing. It's running the get_laser_data function in a separate thread.
In python 3.5, you can make use of the async for syntax and create an asynchronous iterator to control your time frames. It has to implement the __aiter__ and __anext__ methods:
class timeframes(collections.AsyncIterator):
def __init__(self, steps, delay=1.0, *, loop=None):
self.loop = asyncio.get_event_loop() if loop is None else loop
self.ref = self.loop.time()
self.delay = delay
self.steps = steps
self.iter = iter(range(steps))
async def __anext__(self):
try:
when = self.ref + next(self.iter) * self.delay
except StopIteration:
raise StopAsyncIteration
else:
future = asyncio.Future()
self.loop.call_at(when, future.set_result, None)
await future
return self.loop.time()
async def __aiter__(self):
return self
Here's a coroutine that simulates an execution:
async def simulate(steps, delay, execution):
# Prepare timing
start = loop.time()
expected = steps * delay - delay + execution
# Run simulation
async for t in timeframes(steps, delay):
await loop.run_in_executor(None, time.sleep, execution)
# Return error
result = loop.time() - start
return result - expected
And this is the kind of result you'll get on a linux OS:
>>> loop = asyncio.get_event_loop()
>>> simulation = simulate(steps=1000, delay=0.020, execution=0.014)
>>> error = loop.run_until_complete(simulation)
>>> print("Overall error = {:.3f} ms".format(error * 1000))
Overall error = 1.199 ms
It is different on a windows OS (see this answer) but the event loop will catch up and the overall error should never exceed 15 ms.

Resources