I have a function with two parameters
reqs =[1223,1456,1243,20455]
url = "pass a url"
def crawl(i,url):
print("%s is %s" % (i, url))
I want to trigger above function by multi processing concept.
from multiprocessing import Pool
if __name__ == '__main__':
p = Pool(5)
print(p.map([crawl(i,url) for i in reqs]))
above code is not working for me. can anyone please help me on this!
----- ADDING NEW CODE ---------
from multiprocessing import Pool
reqs = [1223,1456,1243,20455]
url = "pass a url"
def crawl(combined_args):
print("%s is %s" % (combined_args[0], combined_args[1]))
def main():
p = Pool(5)
print(p.map(crawl, [(i,url) for i in reqs]))
if __name__ == '__main__':
main()
when I am trying to execute above code, I am getting below error
According to the multiprocessing.Pool.map this is the function argument line:
map(func, iterable[, chunksize])
You are trying to pass to the map a iterator instead of (func, iterable).
Please refer to the following example of multiprocessing.pool (source):
import time
from multiprocessing import Pool
work = (["A", 5], ["B", 2], ["C", 1], ["D", 3])
def work_log(work_data):
print(" Process %s waiting %s seconds" % (work_data[0], work_data[1]))
time.sleep(int(work_data[1]))
print(" Process %s Finished." % work_data[0])
def pool_handler():
p = Pool(2)
p.map(work_log, work)
if __name__ == '__main__':
pool_handler()
Please note that he is passing one argument to the work_log function and in the function he use the index to get to the relevant fields.
Refering to your example:
from multiprocessing import Pool
reqs = [1223,1456,1243,20455]
url = "pass a url"
def crawl(combined_args):
print("%s is %s" % (combined_args[0], combined_args[1]))
def main():
p = Pool(5)
print(p.map(crawl, [(i,url) for i in reqs]))
if __name__ == '__main__':
main()
Results with:
1223 is pass a url
1456 is pass a url
1243 is pass a url
20455 is pass a url
[None, None, None, None] # This is the output of the map function
Issue resolved. crawl function should in separate module like below:
crawler.py
def crawl(combined_args):
print("%s is %s" % (combined_args[0], combined_args[1]))
run.py
from multiprocessing import Pool
import crawler
def main():
p = Pool(5)
print(p.map(crawler.crawl, [(i,url) for i in reqs]))
if __name__ == '__main__':
main()
Then output will be like below:
**output :**
1223 is pass a url
1456 is pass a url
1243 is pass a url
20455 is pass a url
[None, None, None, None] # This is the output of the map function
Related
Could someone explain to me why my code doesn't print the expected output [Thread1 36] from the thread? I am currently using python3.7.0 on a mac OS Catalina 10.15.2
Here is my code:
import timeit, _thread
def expmod1(a, n, m):
return (a**n)%m
# avg_time is a higher order function
# argument is the different variations of the expmod(135, 202, 53) function
def avg_time(thread_name, expmod_function):
print("Start")
result = expmod_function(135, 202, 53)
print(thread_name + " " + str(result), flush=True)
return result
# analysis of all three functions based on average timecost using a constant function as defined by avg_time
def analysis1():
try:
_thread.start_new_thread(avg_time, ("Thread1", expmod1))
except:
print("Unable to start thread")
def main():
analysis1()
if __name__ == "__main__":
main()
I spent nearly the whole day with this and came to the end of my knowledge:
I want to change a shared multiprocessing.Value string in the subprocess, but python hangs as soon as the subprocess is trying to change the shared value.
Below an example code:
from multiprocessing import Process, Value, freeze_support
from ctypes import c_wchar_p
def test(x):
with x.get_lock():
x.value = 'THE TEST WORKED'
return
if __name__ == "__main__":
freeze_support()
value = Value(c_wchar_p, '')
p = Process(target=test, args = (value,))
p.start()
print(p.pid)
# this try block is to also allow p.run()
try:
p.join()
p.terminate()
except:
pass
print(value.value)
What I tried and does not work:
I tried ctypes c_wchar_p and c_char_p, but both result in the same freezing.
I tried also without x.get_lock()
I tried also without freeze_support()
What works (but does not help):
Using a float as the shared value (value = Value('d',0) and x.value = 1).
Running the Process without starting a subprocess (replace p.start() with p.run() )
I am using Windows 10 64 bit and Python 3.6.4 (Spyder, but also tried outside of Spyder).
Any help welcome!
A shared pointer won't work in another process because the pointer is only valid in the process in which it was created. Instead, use an array:
import multiprocessing as mp
def test(x):
x.value = b'Test worked!'
if __name__ == "__main__":
x = mp.Array('c',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.value)
Output:
b'Test worked!'
Note that array type 'c' is specialized and returns a SynchronizedString vs. other types that return SynchronizedArray. Here's how to use type 'u' for example:
import multiprocessing as mp
from ctypes import *
def test(x):
x.get_obj().value = 'Test worked!'
if __name__ == "__main__":
x = mp.Array('u',15)
p = mp.Process(target=test, args = (x,))
p.start()
p.join()
print(x.get_obj().value)
Output:
Test worked!
Note that operations on the wrapped value that are non-atomic such as += that do read/modify/write should be protected with a with x.get_lock(): context manager.
The following returns "NameError: name 'times_2' is not defined", and I can't figure out why:
def pass_data(data): return times_2(data)
def times_2(data): return data*2
import multiprocessing
multiprocessing.pool = Pool()
pool.ncpus = 2
res = pool.map(pass_data, range(5))
print(res)
What I'm actually trying to do is apply a function to a pandas dataframe. However, because I can't use a lambda function to do this:
pool.map(lambda x: x.apply(get_weather, axis=1), df_split)
instead I have this with the following helper methods, but it throws a "NameError: name 'get_weather' is not defined":
def get_weather(df):
*do stuff*
return weather_df
def pass_dataframe(df):
return df.apply(get_weather, axis=1)
results = pool.map(pass_dataframe, df_split)
Try using Pool like this:
from multiprocessing import Pool
def pass_data(data): return times_2(data)
def times_2(data): return data*2
with Pool(processes=4) as pool:
res = pool.map(pass_data, range(5))
print(res)
On Windows:
from multiprocessing import Pool
def pass_data(data): return times_2(data)
def times_2(data): return data*2
if __name__ == '__main__':
with Pool(processes=4) as pool:
res = pool.map(pass_data, range(5))
print(res)
See docs https://docs.python.org/3/library/multiprocessing.html#multiprocessing-programming
from num2words import num2words
import re
from googletrans import Translator
import time, os, glob
import concurrent.futures
start_time = time.time()
translator = Translator()
src_dir="/home/lol/patrika1"
dest_file="/home/lol/df.txt"
counter=1
def n2w(match):
return translator.translate(num2words(int(match.group(1))),dest='hi').text
def clean_file(file_name):
global counter
fil = open(file_name,'r')
lines = fil.read()
fil.close()
# more logic
result=re.sub(r'[\n]+','\n',result2)
counter+=1
print(counter)
print(file_name)
cleaned.write(result)
print("--- %s seconds ---" % (time.time() - start_time))
if __name__ == '__main__':
global cleaned
os.chdir(src_dir)
file_list=glob.glob("*.txt")
cleaned=open(dest_file,'a')
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(clean_file,file_list[:10])
print("finish "+ str(counter))
cleaned.close()
Counter has a value 1 in the main function when i print.
How do i maintain a count of how many files have been processed by the function?
As usually global variable use is not advised ( what does it count ? who modifies it ? and the case where 2 scripts to merge use the same "counter" for 2 different things ), you could use that type of construct :
class FileCleaner:
Counter = 0
#classmethod
def clean(cls, file_name):
...
cls.Counter +=1
...
and then access FileCleaner.Counter from anywhere and call function with FileCleaner.clean( ... )
maybe try cleaner code before using ProcessPoolExecutor model, as it does not give easy to read code ( hopefully will be helped soon by subinterpreters) you would need to split the list of files, call executor in try/except , add +1 to the class counter on success all that from a file cleaning pool class. not from main.
import concurrent.futures
import threading
import math
PRIMES = [
112272535095293,
112582705942171,
112272535095293,
115280095190773,
115797848077099,
1099726899285419]
class PrimePoolTester:
Counter = 0
#classmethod
def is_prime(cls,n):
if n % 2 == 0:
return False
sqrt_n = int(math.floor(math.sqrt(n)))
for i in range(3, sqrt_n + 1, 2):
if n % i == 0:
return False
return True
#classmethod
def execute(cls,primes):
with concurrent.futures.ProcessPoolExecutor() as executor:
for number, prime in zip(primes, executor.map(cls.is_prime, primes)):
cls.Counter += 1
print('(%s)-%d : %d is prime: %s' % (threading.current_thread().name, cls.Counter, number, prime))
class Runner_interpreter:
def __init__(self, thread_count, worker):
self.thr = []
for _ in range(thread_count):
t = threading.Thread(target = worker)
t.daemon = True
t.start()
self.thr.append( t )
def join(self):
for th in self.thr:
th.join()
if __name__ == '__main__':
def job():
global worklist
PrimePoolTester.execute( worklist.pop(0) )
worklist = [ PRIMES ] * 4
#use 4 "core"
Runner_interpreter(4,job).join()
How could you set a variable equal to 'O' or '-' and then put that in an if statement like the one below:
if variable == 'O':
print 'hi'
how could you do that for:
import threading
from array import array
from Queue import Queue, Full
import pyaudio
CHUNK_SIZE = 1024
MIN_VOLUME = 500
BUF_MAX_SIZE = CHUNK_SIZE * 10
def main():
stopped = threading.Event()
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))
listen_t = threading.Thread(target=listen, args=(stopped, q))
listen_t.start()
record_t = threading.Thread(target=record, args=(stopped, q))
record_t.start()
try:
while True:
listen_t.join(0.1)
record_t.join(0.1)
except KeyboardInterrupt:
stopped.set()
listen_t.join()
record_t.join()
def record(stopped, q):
while True:
if stopped.wait(timeout=0):
break
chunk = q.get()
vol = max(chunk)
if vol >= MIN_VOLUME:
# TODO: write to file
print "O",
else:
print "-",
def listen(stopped, q):
stream = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024,
)
while True:
if stopped.wait(timeout=0):
break
try:
q.put(array('h', stream.read(CHUNK_SIZE)))
except Full:
pass # discard
if __name__ == '__main__':
main()
Could you use that so if the output is 'O' then print hi? Will somebody write the code for me because I have been trying for a little bit to write this code and I have still not been able to make the code work for me. Thank You.
In order to use if then statement in Python, first we need to declare variable value with proper syntax according to requirement and type.
s = "O"
if s == 'O':
print 'hi'