Script Multiprocessing dont finish all task, and also i get 100 cpu? - python-3.x

i need to ask if part of my script is correct, working "i think fine" but i think really i have somethink wrong, because still i get CPU 100% and so many time dont finish all task but after 50/100 task is like frozen.
Any info how to edit it ? Or Maybe just tell me where is the error ?
Thank you
Ps. I have inserted all the modules that the script requires and only the part that should be of interest for multiprocessing and also just firt part of the script.
Many Thanks
from __future__ import print_function
import sys
import os
import easygui
import pyautogui as py
import datetime
import pwinput
import json
from collections import Counter
import random
import string
import threading
import subprocess
import multiprocessing
import queue
from multiprocessing import cpu_count
from multiprocessing import Value, Lock, Process, Queue, current_process
import numpy as np
import grequests
import requests
from requests.exceptions import ConnectionError
from requests.exceptions import HTTPError
import time
from time import sleep
number_of_processes = cpu_count()
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
self.lock = Lock()
def increment(self):
with self.lock:
self.val.value += 1
def value(self):
with self.lock:
return self.val.value
def updateTitle(number_of_processes,number_of_task,counterhits,counterdone,countersl,countml,username):
while True:
hits = int(counterhits.value())
done = int(counterdone.value())
shtot = int(countersl.value())
maitot = int(countml.value())
remain_scan = number_of_task - hits
elapsed = time.strftime('%H:%M:%S', time.gmtime(time.time() - start))
ctypes.windll.kernel32.SetConsoleTitleW(f'Site Valid For: {number_of_task} | Started: {hits} | Complete: {done} | Remain: {remain_scan} | SL Found: {shtot} | ML Found: {maitot} | Threads: {number_of_processes} | Time elapsed: {elapsed} ! Licensed at: {username}')
sleep(0.3)
def worker_main(tasks_to_do,tasks_finished,counterhits,counterdone,countersl,countml):
while True:
try:
site = tasks_to_do.get_nowait()
if site is None:
break
except Queue.Empty:
break
except Queue.Full:
sleep(0.5)
continue
counterhits.increment()
do_work(site,counterhits,counterdone,countersl,countml)
tasks_finished.put(site + current_process().name)
counterdone.increment()
return True
def main():
global username
number_of_task = int(len(filter_data))
counterhits = Counter(0)
counterdone = Counter(0)
countersl = Counter(0)
countml = Counter(0)
tasks_to_do = Queue()
tasks_finished = Queue()
processes1 = []
prefix = ['http://']
# creating processes
for w in range(number_of_processes):
p1 = Process(target=worker_main, args=(tasks_to_do,tasks_finished,counterhits,counterdone,countersl,countml))
processes1.append(p1)
p1.start()
procs = [Process(target=updateTitle, args=(number_of_processes,number_of_task,counterhits,counterdone,countersl,countml,username), daemon=True) for i in range(1)]
for p in procs: p.start()
for site_il in filter_data:
site_or = site_il.rstrip("\n")
if (site_or.startswith("http://")) :
site_or = site_or.replace("http://","")
elif (site_or.startswith("https://")) :
site_or = site_or.replace("https://","")
site_or = site_or.rstrip()
site_or = site_or.split('/')[0]
if ('www.' in site_or) :
site_or = site_or.replace("www.", "")
sitexx = [sub + site_or for sub in prefix]
for site in sitexx:
tasks_to_do.put(site)
# completing process
for p1 in processes1:
p1.join()
for p in procs: p.join()
# print the output
while not tasks_finished.empty():
print(tasks_finished.get())
os.system('pause>nul')
return True
if __name__ == '__main__':
if sys.platform.startswith('win'):
# On Windows calling this function is necessary.
multiprocessing.freeze_support()
main()

Related

How to use MPIPool inside a loop?

I want to use MPIPool inside a loop on my university cluster, but every time the code freeze at the first iteration, is there anyone knowing what happed and what should I do now?
The example codes are shown below:
import sys
import time
import emcee
import numpy as np
from schwimmbad import MPIPool
from multiprocessing import Pool
def log_prob(theta):
t = time.time() + np.random.uniform(0.005, 0.008)
while True:
if time.time() \>= t:
break
return -0.5\*np.sum(theta\*\*2)
for i in range(5):
np.random.seed(i+10)
initial = np.random.randn(32, 5)
nwalkers, ndim = initial.shape
nsteps = 100
pool = MPIPool()
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, pool=pool)
start = time.time()
sampler.run_mcmc(initial, nsteps)
end = time.time()
print(end - start)
pool.close()
mpiexec -n 16 python3 /feynman/home/dap/lceg/yl272379/test.py

Process Multiple onCreated events parallelly in python watchdog

I am trying to detect if any new files are created on a directory ; if created I want to process it (takes 10 minutes to give output), in the mean time other new files would also be created in the folder.
How do i register the watchdog's oncreated with multiprocess such that instead of waiting for one file to be completed it spawns a new process everytime a file it created.
import time
import datetime
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import multiprocessing as mp
def on_created(event):
print(f"hey, {event.src_path} has been created!")
time.sleep(10)
doProcessing(event.src_path)
print(f"hey for {event.src_path}")
if __name__ == "__main__":
patterns = "*"
ignore_patterns = ""
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
path = "D:\watcher"
go_recursively = True
my_observer = Observer()
my_observer.schedule(my_event_handler, path, recursive=go_recursively)
my_observer.start()
my_event_handler.on_created = on_created
#my_event_handler.on_deleted = on_deleted
#my_event_handler.on_modified = on_modified
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
my_observer.stop()
my_observer.join()
def doProcessing(filename):
print("Processing")
Sorry for so many commented out portions of the code; in essence pool.apply_async(print_func, (event,)) is what helped solve the problem; once events are pushed into the queue; the process_on_load function iterates through the queue and asynchronously runs the print_func.
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 21 22:02:55 2019
#author: 1009758
"""
import os
import time
import datetime
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import multiprocessing as mp
from multiprocessing import Process
from multiprocessing import Queue
import threading
from multiprocessing import Pool
PROCESSES = mp.cpu_count() - 1
NUMBER_OF_TASKS = 10
class FileLoaderWatchdog(PatternMatchingEventHandler):
''' Watches a nominated directory and when a * type file is
moved
'''
def __init__(self, queue, patterns):
PatternMatchingEventHandler.__init__(self, patterns=patterns)
self.queue = queue
def process(self, event):
'''
event.event_type
'modified' | 'created' | 'moved' | 'deleted'
event.is_directory
True | False
event.src_path
path/to/observed/file
'''
self.queue.put(event)
def on_created(self, event):
self.process(event)
now = datetime.datetime.utcnow()
#print(f"hey for {event.src_path}")
print ("{0} -- event {1} off the queue ...".format(now.strftime("%Y/%m/%d %H:%M:%S"), event.src_path))
def print_func(event):
time.sleep(5)
now = datetime.datetime.utcnow()
print ("{0} -- Pulling {1} off the queue ...".format(now.strftime("%Y/%m/%d %H:%M:%S"), event.src_path))
def info(title):
print(title)
print('module name:', __name__)
print('parent process:', os.getppid())
print('process id:', os.getpid())
def process_load_queue(q):
'''This is the worker thread function. It is run as a daemon
threads that only exit when the main thread ends.
Args
==========
q: Queue() object
'''
while True:
if not q.empty():
#mp.set_start_method('spawn')
event = q.get()
pool = Pool(processes=1)
pool.apply_async(print_func, (event,))
##p = Pool(5)
#p.map(print_func,(event,))
#print_func(event)
#info('main line')
#procs = []
#proc = Process(target=print_func, args=(event,))
#procs.append(proc)
#proc.start()
#for proc in procs:
# proc.join()
#print ("{0} -- Pulling {1} off the queue ...".format(now.strftime("%Y/%m/%d %H:%M:%S"), event.src_path))
#time.sleep(5)
# now2 = datetime.datetime.utcnow()
#print ("{0} -- Replying {1} off the queue ...".format(now2.strftime("%Y/%m/%d %H:%M:%S"), event.src_path))
else:
time.sleep(1)
if __name__ == '__main__':
# create queue
watchdog_queue = Queue()
# Set up a worker thread to process database load
# setup watchdog to monitor directory for trigger files
#args = sys.argv[1:]
patt = ["*"]
path_watch = "D:\watcher"
event_handler = FileLoaderWatchdog(watchdog_queue, patterns=patt)
observer = Observer()
observer.schedule(event_handler, path=path_watch)
observer.start()
#pool=Pool(processes = 1)
#pool.apply_async(process_load_queue, (watchdog_queue,))
worker = threading.Thread(target=process_load_queue, args=(watchdog_queue,))
worker.setDaemon(True)
worker.start()
#p = Pool(2)
#p.map(observer,watchdog_queue)
#asyncio.run(main())
try:
while True:
time.sleep(2)
except KeyboardInterrupt:
observer.stop()
observer.join()

time.time() library returns unexpected result when using joblib

I have a program that creates several instances of a class, Test and then does some work on each instance of the class, keeping track of how much time the work took. I recently decided to parallelize this code using the joblib library, and am running into an error: the total_time variable at the end is now 0.0.
The python environment on my machine is
$ python3
Python 3.7.0 (default, Sep 18 2018, 18:47:08)
[Clang 10.0.0 (clang-1000.10.43.1)] on darwin
Below is an MCVE for this issue:
import time
import random
import multiprocessing
import joblib
class Test:
def __init__(self):
self.name = ""
self.duration = 0.0
def add_test(a):
temp = Test()
temp.name = str(a)
return temp
def run_test(test):
test_start = time.time()
rand = random.randint(1,3)
time.sleep(rand)
test_end = time.time()
test.duration = round(test_end - test_start, 3)
print(f"Test {test.name} ran in {test.duration}")
def main():
tests = []
for a in range(1,10):
tests.append(add_test(a))
num_cores = multiprocessing.cpu_count()
joblib.Parallel(n_jobs=num_cores)(joblib.delayed(run_test)(test) for test in tests)
total_time = round(sum(test.duration for test in tests), 3)
print(f"This run took {total_time} seconds.")
if __name__ == '__main__':
main()
If I add a print(list(test.duration for test in tests)) in main(), I see that test.duration is 0.0 after run_test() is called. can be seen from running the above input, test.duration is set to a non-zero value (where appropriate) inside run_test().
I'm not too familiar with python classes or the joblib library, so I'm not sure if the issue I'm experiencing is related to a misuse of classes or some other issue that's beyond me.
Thank you!
With thanks to num8lock on Reddit here is the correct way to solve this:
import time
import random
import multiprocessing
import joblib
class Test:
def __init__(self, name):
self.name = name
self.duration = 0.0
self.start = time.perf_counter()
def run(self):
rand = random.randint(1,3)
time.sleep(rand)
_end = time.perf_counter()
self.duration = _end - self.start
print(f"Test {self.name} ran in {self.duration}")
return self.duration
def add(a):
return Test(str(a))
def make_test(test):
return test.run()
def main():
num_cores = multiprocessing.cpu_count()
tests = []
for a in range(1,10):
tests.append(add(a))
jobs = joblib.Parallel(n_jobs=num_cores)(joblib.delayed(make_test)(t) for t in tests)
total_time = sum(job for job in jobs)
print(f"This run took {total_time} seconds.")
if __name__ == '__main__':
main()

Using multiprocessing and ProcessPoolExecutor simultaneously

I am trying to create a simple script for python3.5 that can execute heavy computer vision algorithms in parallel. I have created a process by multiprocessing.Process in main process.
Inside that process I create concurrent.futures.ProcessPoolExecutor. Spawned process submits tasks to processPoolExecutor and it works perfectly fine. But when I try to stop and join spawned process it hangs on join.
Also if replace processPoolExecuter to threadPoolExecuter everything works perfectly. What did I miss?
Here is main file:
import multiprocessing as mp
import queue as Queue
import numpy as np
import cv2
from time import sleep
import executer_debug
def worker(queue):
pExecutor = executer_debug.Worker()
pExecutor.set()
while True:
print("-->{}<--".format(pExecutor.get()))
sleep(1)
try:
income = queue.get_nowait()
break
except Queue.Empty:
pass
pExecutor.set()
print("<1>{}<1>".format(pExecutor.get()))
print("<2>{}<2>".format(pExecutor.get()))
def main():
queue = mp.Queue()
currProcess = mp.Process(target = worker, args=(queue,))
currProcess.start()
frame = np.zeros((480,640), dtype=np.uint8)
while True:
cv2.imshow('frame',frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
print("stopped")
queue.put("stop")
currProcess.join()
if __name__ == "__main__":
main()
And here is the second file. Code is very simple just enough to demonstrate the issue.
import collections
from concurrent.futures import ProcessPoolExecutor
from time import sleep
import multiprocessing as mp
def worker():
return 1
class Worker():
def __init__(self):
self.workers_count = 4
self.poolExecutor = ProcessPoolExecutor(max_workers = self.workers_count)
self.executors = collections.deque()
def set(self):
self.executors.append(self.poolExecutor.submit(worker))
def get(self):
if len(self.executors) > 0:
if self.executors[0].done():
return self.executors.popleft().result()
else:
return 0
else:
return -1
Thank you!

Python 3 run two applcations in windows parallel

one question: I want to run two different exe files parallel (Windows).
All my tests starts the two applications, but one after the other (after closing the application). What's wrong?
import threading
import subprocess
import os.path
def Worker(aPrg):
_, name = os.path.split(aPrg)
if os.path.isfile(aPrg):
lExe = []
lExe.append(aPrg)
print('Start: ' + name)
lResult = subprocess.call(lExe)
else:
print('ERROR: ' + name + ' not available!')
return
def main():
t1 = threading.Thread(target=Worker('C:\\windows\\notepad.exe'))
t2 = threading.Thread(target=Worker('c:\\windows\\explorer.exe'))
t1.start()
t2.start()
if __name__ == '__main__':
main()
Thanks for all ideas!
Geosucher
The reasons for this problem is discussed here : Python threading appears to run threads sequentially
This should help you:
import subprocess
import os.path
import multiprocessing
def Worker(aPrg):
_, name = os.path.split(aPrg)
if os.path.isfile(aPrg):
lExe = []
lExe.append(aPrg)
print('Start: ' + name)
lResult = subprocess.call(lExe)
else:
print('ERROR: ' + name + ' not available!')
return
def main():
p = multiprocessing.Pool(2)
p.map(Worker, ('c:\\windows\\explorer.exe','c:\\windows\\explorer.exe'))
if __name__ == '__main__':
main()

Resources