How can I speed up this loop using multiprocessing or multithreading? - python-3.x

I am afraid that I'm not doing the multithreading thing the right way, so I came here in search of wisdom. I have two arrays of addresses and I have to check if the address of the first array exists in the second array and in case it doesn't look for the most similar address in array 2.
The array that has the "oficial" addresses is called directory and the array that I need to validate is called look_address.
The code goes as follows:
import pandas as pd
import numpy as np
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from datetime import datetime,timedelta
import threading
import queue
class myThread(threading.Thread):
def __init__(self,threadID,name,q):
threading.Thread.__init__(self)
self.threadID = threadID
self.name=name
self.q = q
def run(self):
print(f"starting {self.name}")
process_data(self.name,self.q)
print(f"ending {self.name}")
locs = []
ratios={}
def process_data(threadName,q):
while not exitFlag:
queueLock.acquire()
if not workQueue.empty():
d = q.get()
queueLock.release()
d = d.strip()
if directory.isin([d]).any():
locs.append(d)
else:
pos = process.extract(d,directory.values,scorer=fuzz.ratio,limit=50)
ratios[d] = pos
else:
queueLock.release()
threadlist = ["T-1","T-2","T-3","T-4","T-5","T-6","T-7","T-8","T-9","T-10"]
nameList = look_address
queueLock = threading.Lock()
workQueue = queue.Queue(len(nameList)+1)
threads=[]
threadID=1
exitFlag=0
for name in threadlist:
thread = myThread(threadID,name,workQueue)
thread.start()
threads.append(thread)
threadID+=1
queueLock.acquire()
for addr in nameList:
workQueue.put(addr)
queueLock.release()
total_steps = len(workQueue.queue)
tot_sec = 0
t0 = datetime.now()
while not workQueue.empty():
total_seconds =(datetime.now()-t0).total_seconds()
if total_seconds == 0:
total_seconds = 1e-8
progress = 1-len(workQueue.queue)/total_steps
tot_sec+=total_seconds
print("\rProgreso: {pr:.2f}% || Buenas/Errores: {gb}/{bd}".format(
pr = progress*100,
its = 1/total_seconds,
elap = timedelta(seconds=np.round(tot_sec)),
gb=len(locs),
bd=len(errors),
eta = timedelta(seconds=np.round(total_seconds*(total_steps-len(workQueue.queue))))),end="",flush=True)
exitFlag = 1
for t in threads:
t.join()
print("\nExiting Main Thread")
Each request in process.extract takes around 25s (did a %timeit). Now, with the script above it doesn't seems to speed up the data processing. It has been running for like 2 hours and it has progressed by around 4.29%.
My two questions are:
Is the implementation of multithreading correct?
How can I speed up the data processing? Maybe run this on a VPS on amazon or google?
I want to understand why this is so slow and how I can speed things up.
EDIT: Changed from:
if not workQueue.empty():
d = q.get()
d = d.strip()
if directory.isin([d]).any():
locs.append(d)
else:
pos = process.extract(d,directory.values,scorer=fuzz.ratio,limit=50)
ratios[d] = pos
queueLock.release()
to:
if not workQueue.empty():
d = q.get()
queueLock.release()
d = d.strip()
if directory.isin([d]).any():
locs.append(d)
else:
pos = process.extract(d,directory.values,scorer=fuzz.ratio,limit=50)
ratios[d] = pos

Related

Why serial code is faster than concurrent.futures in this case?

I am using the following code to process some pictures for my ML project and I would like to parallelize it.
import multiprocessing as mp
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
#parallel call of the function
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
ood_id = ex.map(track_ids, seqs)
if I run the code serially it takes 3.0 minutes but for parallel with concurrent, it takes 3.5 minutes.
can someone please explain why is that? and present a way to solve the problem.
btw, I have 12 cores.
Thanks
Here's a brief example of how one might go about profiling multiprocessing code vs serial execution:
from multiprocessing import Pool
from cProfile import Profile
from pstats import Stats
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
def profile_seq():
p = Profile() #one and only profiler instance
p.enable()
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
p.disable()
return Stats(p), seqs
def track_ids_pr(seq):
p = Profile() #profile the child tasks
p.enable()
retval = track_ids(seq)
p.disable()
return (Stats(p, stream="dummy"), retval)
def profile_parallel():
p = Profile() #profile stuff in the main process
p.enable()
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
retvals = ex.map(track_ids_pr, seqs)
p.disable()
s = Stats(p)
out = []
for ret in retvals:
s.add(ret[0])
out.append(ret[1])
return s, out
if __name__ == "__main__":
stat, retval = profile_parallel()
stat.print_stats()
EDIT: Unfortunately I found out that pstat.Stats objects cannot be used normally with multiprocessing.Queue because it is not pickleable (which is needed for the operation of concurrent.futures). Evidently it normally will store a reference to a file for the purpose of writing statistics to that file, and if none is given, it will by default grab a reference to sys.stdout. We don't actually need that reference however until we actually want to print out the statistics, so we can just give it a temporary value to prevent the pickle error, and then restore an appropriate value later. The following example should be copy-paste-able and run just fine rather than the pseudocode-ish example above.
from multiprocessing import Queue, Process
from cProfile import Profile
from pstats import Stats
import sys
def isprime(x):
for d in range(2, int(x**.5)):
if x % d == 0:
return False
return True
def foo(retq):
p = Profile()
p.enable()
primes = []
max_n = 2**20
for n in range(3, max_n):
if isprime(n):
primes.append(n)
p.disable()
retq.put(Stats(p, stream="dummy")) #Dirty hack: set `stream` to something picklable then override later
if __name__ == "__main__":
q = Queue()
p1 = Process(target=foo, args=(q,))
p1.start()
p2 = Process(target=foo, args=(q,))
p2.start()
s1 = q.get()
s1.stream = sys.stdout #restore original file
s2 = q.get()
# s2.stream #if we are just adding this `Stats` object to another the `stream` just gets thrown away anyway.
s1.add(s2) #add up the stats from both child processes.
s1.print_stats() #s1.stream gets used here, but not before. If you provide a file to write to instead of sys.stdout, it will write to that file)
p1.join()
p2.join()

Producer Consumer message sharing not working in multiprocessing

i am trying to run a scenario where i have a producer which is capturing frames from webcam and putting it in a queue.
and then consumer reads image from input queue and does some processing and puts o/p image in outgoing queue.
Issue is, consumer read from queue is not blocking. Ideally it should be, also when it reads value from queue, size is always constant 128, which is wrong. I am sure size of image that I am putting in queue is far greater.
from __future__ import print_function
import multiprocessing
import time
import logging
import sys
import cv2
class Consumer(multiprocessing.Process):
def __init__(self, incoming_q, outgoing_q):
multiprocessing.Process.__init__(self)
self.outgoing_q = outgoing_q
self.incoming_q = incoming_q
def run(self):
proc_name = self.name
print(f"{proc_name} - inside process_feed..starting")
while True:
#print(f"size of incoming_q=>{self.incoming_q.qsize()}")
try:
#print(f"{proc_name} - size of B incoming_q=>{self.incoming_q.qsize()}")
image_np = self.incoming_q.get(True)
size_of_img = sys.getsizeof(image_np)
#print(f"{proc_name} - size of A incoming_q=>{self.incoming_q.qsize()}")
if size_of_img > 128:
print(f"{proc_name} - size image=>{size_of_img}")
time.sleep(1)
self.outgoing_q.put_nowait(image_np)
except:
pass
print("inside process_feed..ending")
class Producer(multiprocessing.Process):
def __init__(self, incoming_q, outgoing_q):
multiprocessing.Process.__init__(self)
self.incoming_q = incoming_q
self.outgoing_q = outgoing_q
def run(self):
proc_name = self.name
print("inside capture_feed")
stream = cv2.VideoCapture(0)
try:
counter = 0
while True:
counter += 1
if counter == 1:
if not self.incoming_q.full():
(grabbed, image_np) = stream.read()
size_of_img = sys.getsizeof(image_np)
print(f"{proc_name}........B.......=>{self.incoming_q.qsize()}")
print(f"{proc_name} - size image=>{size_of_img}")
self.incoming_q.put(image_np)
print(f"{proc_name}........A.......=>{self.incoming_q.qsize()}")
counter = 0
try:
image_np = self.outgoing_q.get_nowait()
logging.info("reading value for o/p")
cv2.imshow('object detection', image_np)
except:
pass
if cv2.waitKey(25) & 0xFF == ord('q'):
break
finally:
stream.release()
cv2.destroyAllWindows()
print("inside capture_feed..ending")
if __name__ == '__main__':
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
stream = cv2.VideoCapture(0)
incoming_q = multiprocessing.Queue(maxsize=100)
outgoing_q = multiprocessing.Queue(maxsize=100)
logging.info("before start of thread")
max_process = 1
processes = []
processes.append(Producer(incoming_q, outgoing_q))
for i in range(max_process):
p = Consumer(incoming_q, outgoing_q)
p.daemon = True
processes.append(p)
logging.info("inside main thread..middle")
for p in processes:
p.start()
logging.info("inside main thread..ending")
logging.info("waiting in main thread too....")
logging.info("waiting in main thread finished....")
for p in processes:
p.join()
logging.info("inside main thread..ended")
I was able to figure out issue with my approach. I missed whole concept of pickle (serialization).
I changed my code to serialize numpy array before writing to queue and deserialize after reading it. Code started working as expected.
also printing 128 as sizeof np array is fine, i was misinterpreting that number.
def serialize_ndarray(arr:np.ndarray):
serialized = pickle.dumps(arr)
return serialized
def deserialize_ndarray(string):
data = pickle.loads(string)
return data

Using multiprocessing inside a function

I want to take the working code from below and put it into a function.
import multiprocessing as mp
def parameters(x,n):
for i in x:
yield (i,n)
def power(a):
x, n = a
return x**n
if __name__ == '__main__':
p = [i for i in range(1000)]
p = parameters(p,2)
agents = 4
chunk = 10
with mp.Pool(processes = agents) as pool:
o = pool.map(power,p,chunksize = chunk)
print(o)
So that I can call it whenever I want. I tried doing something like this:
import multiprocessing as mp
def parameters(x,n):
for i in x:
yield (i,n)
def power(a):
x, n = a
return x**n
def calculate(s,n):
p = [i for i in range(s)]
p = parameters(p,n)
agents = 4
chunk = 10
with mp.Pool(processes = agents) as pool:
o = pool.map(power,p,chunksize = chunk)
return o
print(calculate(1000,2))
However this does not work at all, It tells me That another process has started before one has ended. But the code above did work. Is there a way to properly take this code into a function? If not with this setup, then with what setup?
Make sure to protect code that should only run in the main process with if __name__ == '__main__':. This code works:
import multiprocessing as mp
def parameters(x,n):
for i in x:
yield (i,n)
def power(a):
x, n = a
return x**n
def calculate(s,n):
p = [i for i in range(s)]
p = parameters(p,n)
agents = 4
chunk = 10
with mp.Pool(processes = agents) as pool:
o = pool.map(power,p,chunksize = chunk)
return o
if __name__ == '__main__':
print(calculate(1000,2))
Without the if, the following error is raised:
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.

Python multiprocessing update size of iterable

i have the following:
from multiprocessing import Pool
def process_elements(index_of_data_inputs):
<process>
if <condition>:
# i would like to change the size of data_inputs
if __name__ == '__main__':
pool = Pool() # Create a multiprocessing Pool
pool.map(process_elements, range(0, len(data_inputs)) # process data_inputs iterable with pool
how i can change the size of data_inputs and so change the number of times process_elements
is called?
the work behind that i would like to parallelize is:
i = 0
while i < len(elements):
new_elems = process_some_elements(x,y)
if len(new_elems) > 0:
elements = elements + new_elems
i += 1
Consider simple example of communication between processes with multiprocessing module in Python:
import multiprocessing
import queue
import random
def process_elements(num, comq):
val = random.random()
if val > 0.5:
comq.put(1)
return num, int(1000 * val)
if __name__ == '__main__':
# initial data
numbers = list(range(10))
# data structure fot communication between multiple processes
m = multiprocessing.Manager()
q = m.Queue()
with multiprocessing.Pool(processes=4) as pool:
# get answer for original data
ans = pool.starmap(process_elements, [(num, q) for num in numbers])
print(numbers)
print(ans)
# create additional data based on the answer for initial data
new_numbers = numbers[-1:]
try:
while True:
new_numbers.append(new_numbers[-1] + q.get_nowait())
except queue.Empty:
pass
# get answer for additional data
new_ans = pool.starmap(process_elements, [(num, q) for num in new_numbers[1:]])
print(new_numbers)
print(new_ans)

multithread pixel plotting lib for python

I am doing a graphics project and have to use pixel plotting function only. I came across pygame but it doesnt seem to provide its calls to be used in threads. Please suggest me workaround this or any other python library that can be used with threads.
#! /usr/bin/env python
import threading
import time
class plotterthread(threading.Thread):
def __init__(self,screen,queue):
threading.Thread.__init__(self)
self.screen = screen
self.__queue__ = queue
self._colorset = False
self.kill = False
def setcolor(self,r,g,b):
self._r = r
self._g = g
self._b = b
self._colorset = True
def run(self):
try:
while not self.__queue__.empty() and not self.kill:
(x,y) = self.__queue__.get(timeout=1)
if not self._colorset:
#this is where error occures
#this calls pygame.set_at() method
self.screen.plotpixel((x,y))
else:
self.screen.plotpixel((x,y),(self._r,self._g,self._b))
except:
pass
class plotter:
def __init__(self,screen,min,max,queue,ppt=50):
self.screen = screen
self.min = min
self.max = max
self.ppt = ppt # pixels per thread
self.coords = queue
def plot(self):
noOfcoords = self.coords.qsize()
noOfthreads = int(noOfcoords/self.ppt)
if noOfthreads > self.max:
noOfthreads = self.max
elif noOfthreads < self.min:
noOfthreads = self.min
self.threadpool = []
for i in range(noOfthreads):
pthread = plotterthread(screen = self.screen,queue = self.coords)
self.threadpool.append(pthread)
pthread.start()
while True:
try:
livethreads = 0
for t in self.threadpool:
if t is not None and t.isAlive():
livethreads += 1
t.join(timeout=1)
if livethreads == 0:
break
except KeyboardInterrupt:
for threads in self.threadpool:
if threads is not None and threads.isAlive():
threads.kill = True
break
Here, screen is the instance of the window on which I am trying to draw.
queue is the Queue which contains all the calculated coordinates to be plotted.
All I wanted to do was first calculate all the pixels and push them into a queue and then share the queue with threads to plot them in parallel to speed up the process.

Resources