How to use MPIPool inside a loop? - python-3.x

I want to use MPIPool inside a loop on my university cluster, but every time the code freeze at the first iteration, is there anyone knowing what happed and what should I do now?
The example codes are shown below:
import sys
import time
import emcee
import numpy as np
from schwimmbad import MPIPool
from multiprocessing import Pool
def log_prob(theta):
t = time.time() + np.random.uniform(0.005, 0.008)
while True:
if time.time() \>= t:
break
return -0.5\*np.sum(theta\*\*2)
for i in range(5):
np.random.seed(i+10)
initial = np.random.randn(32, 5)
nwalkers, ndim = initial.shape
nsteps = 100
pool = MPIPool()
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, pool=pool)
start = time.time()
sampler.run_mcmc(initial, nsteps)
end = time.time()
print(end - start)
pool.close()
mpiexec -n 16 python3 /feynman/home/dap/lceg/yl272379/test.py

Related

Script Multiprocessing dont finish all task, and also i get 100 cpu?

i need to ask if part of my script is correct, working "i think fine" but i think really i have somethink wrong, because still i get CPU 100% and so many time dont finish all task but after 50/100 task is like frozen.
Any info how to edit it ? Or Maybe just tell me where is the error ?
Thank you
Ps. I have inserted all the modules that the script requires and only the part that should be of interest for multiprocessing and also just firt part of the script.
Many Thanks
from __future__ import print_function
import sys
import os
import easygui
import pyautogui as py
import datetime
import pwinput
import json
from collections import Counter
import random
import string
import threading
import subprocess
import multiprocessing
import queue
from multiprocessing import cpu_count
from multiprocessing import Value, Lock, Process, Queue, current_process
import numpy as np
import grequests
import requests
from requests.exceptions import ConnectionError
from requests.exceptions import HTTPError
import time
from time import sleep
number_of_processes = cpu_count()
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
self.lock = Lock()
def increment(self):
with self.lock:
self.val.value += 1
def value(self):
with self.lock:
return self.val.value
def updateTitle(number_of_processes,number_of_task,counterhits,counterdone,countersl,countml,username):
while True:
hits = int(counterhits.value())
done = int(counterdone.value())
shtot = int(countersl.value())
maitot = int(countml.value())
remain_scan = number_of_task - hits
elapsed = time.strftime('%H:%M:%S', time.gmtime(time.time() - start))
ctypes.windll.kernel32.SetConsoleTitleW(f'Site Valid For: {number_of_task} | Started: {hits} | Complete: {done} | Remain: {remain_scan} | SL Found: {shtot} | ML Found: {maitot} | Threads: {number_of_processes} | Time elapsed: {elapsed} ! Licensed at: {username}')
sleep(0.3)
def worker_main(tasks_to_do,tasks_finished,counterhits,counterdone,countersl,countml):
while True:
try:
site = tasks_to_do.get_nowait()
if site is None:
break
except Queue.Empty:
break
except Queue.Full:
sleep(0.5)
continue
counterhits.increment()
do_work(site,counterhits,counterdone,countersl,countml)
tasks_finished.put(site + current_process().name)
counterdone.increment()
return True
def main():
global username
number_of_task = int(len(filter_data))
counterhits = Counter(0)
counterdone = Counter(0)
countersl = Counter(0)
countml = Counter(0)
tasks_to_do = Queue()
tasks_finished = Queue()
processes1 = []
prefix = ['http://']
# creating processes
for w in range(number_of_processes):
p1 = Process(target=worker_main, args=(tasks_to_do,tasks_finished,counterhits,counterdone,countersl,countml))
processes1.append(p1)
p1.start()
procs = [Process(target=updateTitle, args=(number_of_processes,number_of_task,counterhits,counterdone,countersl,countml,username), daemon=True) for i in range(1)]
for p in procs: p.start()
for site_il in filter_data:
site_or = site_il.rstrip("\n")
if (site_or.startswith("http://")) :
site_or = site_or.replace("http://","")
elif (site_or.startswith("https://")) :
site_or = site_or.replace("https://","")
site_or = site_or.rstrip()
site_or = site_or.split('/')[0]
if ('www.' in site_or) :
site_or = site_or.replace("www.", "")
sitexx = [sub + site_or for sub in prefix]
for site in sitexx:
tasks_to_do.put(site)
# completing process
for p1 in processes1:
p1.join()
for p in procs: p.join()
# print the output
while not tasks_finished.empty():
print(tasks_finished.get())
os.system('pause>nul')
return True
if __name__ == '__main__':
if sys.platform.startswith('win'):
# On Windows calling this function is necessary.
multiprocessing.freeze_support()
main()

Multiprocessing with multiprocessing.Pool

I am having issue with even the most basic task using mutiprocessing.Tool method.
It seems to be working but never finish the simplest task.
Could you please help what am I doing wrong?
I read some articles, tried to understand it, but could figure it out. I added a short example (with list(map(squared, range(2_000_000))), it works, but not the below.)
Thanks in advance,
Roland
"""
from multiprocessing import Pool
import time
process_pool = Pool(processes = 4)
def squared(n):
return n ** 2
start = time.perf_counter()
process_pool.apply(squared, range(2_000_000))
end = time.perf_counter() - start
print(f"Run time: {end}")
"""
It seems to be a case of multithread..... Have you tried something like:
from concurrent.futures import ThreadPoolExecutor, as_completed
num_of_threads = 50 # Number of threads executing at the same time
with ThreadPoolExecutor(max_workers=num_of_threads) as executor:
tasks = []
for i in i_list:
tasks.append(
executor.submit(
<Function_to_execute>, i
)
)
for future in as_completed(tasks):
if future.result():
yield future.result() # Here can be just a return, yield you return a generator
I think you want imap() (and move squared() before you define the Pool):
from multiprocessing import Pool
import time
def squared(n):
return n ** 2
process_pool = Pool(processes = 4)
start = time.perf_counter()
process_pool.imap(squared, range(2))
end = time.perf_counter() - start
print(f"Run time: {end}")
just keep in mind this is not a very representative example, since you dont do anything with the results; something better would be
with Pool(4) as pool:
results = pool.imap(squared, range(2_000_000))
for result in results:
pass # do something here with the result

time.time() library returns unexpected result when using joblib

I have a program that creates several instances of a class, Test and then does some work on each instance of the class, keeping track of how much time the work took. I recently decided to parallelize this code using the joblib library, and am running into an error: the total_time variable at the end is now 0.0.
The python environment on my machine is
$ python3
Python 3.7.0 (default, Sep 18 2018, 18:47:08)
[Clang 10.0.0 (clang-1000.10.43.1)] on darwin
Below is an MCVE for this issue:
import time
import random
import multiprocessing
import joblib
class Test:
def __init__(self):
self.name = ""
self.duration = 0.0
def add_test(a):
temp = Test()
temp.name = str(a)
return temp
def run_test(test):
test_start = time.time()
rand = random.randint(1,3)
time.sleep(rand)
test_end = time.time()
test.duration = round(test_end - test_start, 3)
print(f"Test {test.name} ran in {test.duration}")
def main():
tests = []
for a in range(1,10):
tests.append(add_test(a))
num_cores = multiprocessing.cpu_count()
joblib.Parallel(n_jobs=num_cores)(joblib.delayed(run_test)(test) for test in tests)
total_time = round(sum(test.duration for test in tests), 3)
print(f"This run took {total_time} seconds.")
if __name__ == '__main__':
main()
If I add a print(list(test.duration for test in tests)) in main(), I see that test.duration is 0.0 after run_test() is called. can be seen from running the above input, test.duration is set to a non-zero value (where appropriate) inside run_test().
I'm not too familiar with python classes or the joblib library, so I'm not sure if the issue I'm experiencing is related to a misuse of classes or some other issue that's beyond me.
Thank you!
With thanks to num8lock on Reddit here is the correct way to solve this:
import time
import random
import multiprocessing
import joblib
class Test:
def __init__(self, name):
self.name = name
self.duration = 0.0
self.start = time.perf_counter()
def run(self):
rand = random.randint(1,3)
time.sleep(rand)
_end = time.perf_counter()
self.duration = _end - self.start
print(f"Test {self.name} ran in {self.duration}")
return self.duration
def add(a):
return Test(str(a))
def make_test(test):
return test.run()
def main():
num_cores = multiprocessing.cpu_count()
tests = []
for a in range(1,10):
tests.append(add(a))
jobs = joblib.Parallel(n_jobs=num_cores)(joblib.delayed(make_test)(t) for t in tests)
total_time = sum(job for job in jobs)
print(f"This run took {total_time} seconds.")
if __name__ == '__main__':
main()

Fastest way to call two functions multiple times without using a list of input parameters

I have written a small programm that uses a pipe.
The parent takes care of camera connection while the child is processing the images.
The child process calls two functions FunctionA and FunctionB. Both times the image from the parent is processed.
I want to run the two functions as fast as possible since they are time consuming 0.1s, 0.12s. I tried multiprocessing.Processes and multiprocessing.pools. The bottle neck in the first case is the creation of the two processes for each image passed by the parent. Pools only make sense if I would have a list of images which I cant wait for since the speed up from the pipe will be wasted which is already quite good -3.123 seconds. Does somebody has a smart solution for this sort of problem?
Kind regards :)
Example code:
import multiprocessing
import cv2
import glob
import cv2
from multiprocessing import pool
from multiprocessing.dummy import Pool as ThreadPool
import glob
import time
from ProcA import FunctionA
from ProcB import FunctionB
import Lines
import Feature
#===============================================================================
# Test Pipe
#===============================================================================
def cam_loop(pipe_parent):
imagePathes = glob.glob("Images\*.jpg")
for path in imagePathes:
image = cv2.imread(path)
pipe_parent.send(image)
StringFromChild = pipe_parent.recv()
print("StringFromChild:",StringFromChild)
def show_loop(pipe_child):
#cv2.namedWindow('pepe')
proc = Preprocessor.Preprocessor()
line = Lines.Lines()
features = Feature.FeatureDetector()
imgIdx = 0
Q_Barcode = multiprocessing.Queue(10)
Q_CapFeatures = multiprocessing.Queue(100)
while True:
image = pipe_child.recv()
start = time.time()
#Calculating features
#Create processes
p1 = multiprocessing.Process(target = FunctionA, args = (proc, image, imgIdx, None, None, None,))
p1.start()
p2 = multiprocessing.Process(target = FunctionB, args = (line, proc, features, image, imgIdx,))
p2.start()
p1.join()
p2.join()
#send features calculated to parent
pipe_child.send("OK")
end = time.time()
print("TimeMultiProc",end - start)
start = time.time()
#Calculating feature
FunctionA(proc, image, imgIdx, None, None, None)
FunctionB(line, proc, features, image, imgIdx)
#send features calculated to parent
end = time.time()
print("TimeSerial",end - start)
if __name__ == '__main__':
logger = multiprocessing.log_to_stderr()
logger.setLevel(multiprocessing.SUBDEBUG)
pipe_parent, pipe_child = multiprocessing.Pipe()
cam_process = multiprocessing.Process(target=cam_loop,args=(pipe_parent, ))
cam_process.start()
show_process = multiprocessing.Process(target=show_loop,args=(pipe_child, ))
show_process.start()
cam_process.join()
show_loop.join()

Using multiprocessing and ProcessPoolExecutor simultaneously

I am trying to create a simple script for python3.5 that can execute heavy computer vision algorithms in parallel. I have created a process by multiprocessing.Process in main process.
Inside that process I create concurrent.futures.ProcessPoolExecutor. Spawned process submits tasks to processPoolExecutor and it works perfectly fine. But when I try to stop and join spawned process it hangs on join.
Also if replace processPoolExecuter to threadPoolExecuter everything works perfectly. What did I miss?
Here is main file:
import multiprocessing as mp
import queue as Queue
import numpy as np
import cv2
from time import sleep
import executer_debug
def worker(queue):
pExecutor = executer_debug.Worker()
pExecutor.set()
while True:
print("-->{}<--".format(pExecutor.get()))
sleep(1)
try:
income = queue.get_nowait()
break
except Queue.Empty:
pass
pExecutor.set()
print("<1>{}<1>".format(pExecutor.get()))
print("<2>{}<2>".format(pExecutor.get()))
def main():
queue = mp.Queue()
currProcess = mp.Process(target = worker, args=(queue,))
currProcess.start()
frame = np.zeros((480,640), dtype=np.uint8)
while True:
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
print("stopped")
queue.put("stop")
currProcess.join()
if __name__ == "__main__":
main()
And here is the second file. Code is very simple just enough to demonstrate the issue.
import collections
from concurrent.futures import ProcessPoolExecutor
from time import sleep
import multiprocessing as mp
def worker():
return 1
class Worker():
def __init__(self):
self.workers_count = 4
self.poolExecutor = ProcessPoolExecutor(max_workers = self.workers_count)
self.executors = collections.deque()
def set(self):
self.executors.append(self.poolExecutor.submit(worker))
def get(self):
if len(self.executors) > 0:
if self.executors[0].done():
return self.executors.popleft().result()
else:
return 0
else:
return -1
Thank you!

Resources