Python3 multiprocessing shared dictionary consume by all process - python-3.x

I'm beginner for multiprocessing,
i would like to use multiprocessing for parallel code running with streaming data.
To start good, I have coded below and got error.
Could you please tell me correct way to print on the screen.
Code:
import sys
from multiprocessing import Process, Manager
import time
def producer(dic, name):
for i in range(10000):
dic["A"] = i
time.sleep(2)
def consumer(dic, name):
for i in range(10000):
aval = dic.get("A")
#print(f" {name} - Val = {aval}")
sys.stdout.write(f" {name} - Val = {aval}")
sys.stdout.flush()
time.sleep(2.2)
if __name__ == '__main__':
manager = Manager()
dic = manager.dict()
Process(target=producer, args=(dic,"TT")).start()
time.sleep(1)
Process(target=consumer, args=(dic,"Con1")).start()
Process(target=consumer, args=(dic,"Con2")).start()
When I run the same in the windows console, I got below error, how can I print Consumer's print function in the console.Thanks
(base) PS D:\> python .\mulpro.py
Process Process-3:
Process Process-4:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\managers.py", line 811, in
_callmethod
conn = self._tls.connection
AttributeError: 'ForkAwareLocal' object has no attribute 'connection'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 297, in _
bootstrap
self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 99, in ru
n
self._target(*self._args, **self._kwargs)
File "D:\mulpro.py", line 19, in consumer
aval = dic.get("A")
File "<string>", line 2, in get
File "C:\ProgramData\Anaconda3\lib\multiprocessing\managers.py", line 815, in
_callmethod
self._connect()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\managers.py", line 802, in
_connect
conn = self._Client(self._token.address, authkey=self._authkey)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 490, i
n Client
c = PipeClient(address)
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 691, i
n PipeClient
_winapi.WaitNamedPipe(address, 1000)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\managers.py", line 811, in
_callmethod
conn = self._tls.connection
FileNotFoundError: [WinError 2] The system cannot find the file specified
AttributeError: 'ForkAwareLocal' object has no attribute 'connection'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 297, in _
bootstrap
self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 99, in ru
n
self._target(*self._args, **self._kwargs)
File "D:\mulpro.py", line 19, in consumer
aval = dic.get("A")
File "<string>", line 2, in get
File "C:\ProgramData\Anaconda3\lib\multiprocessing\managers.py", line 815, in
_callmethod
self._connect()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\managers.py", line 802, in
_connect
conn = self._Client(self._token.address, authkey=self._authkey)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 490, i
n Client
c = PipeClient(address)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 691, i
n PipeClient
_winapi.WaitNamedPipe(address, 1000)
FileNotFoundError: [WinError 2] The system cannot find the file specified
Process Process-2:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 297, in _
bootstrap
self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 99, in ru
n
self._target(*self._args, **self._kwargs)
File "D:\mulpro.py", line 13, in producer
dic["A"] = i
File "<string>", line 2, in __setitem__
File "C:\ProgramData\Anaconda3\lib\multiprocessing\managers.py", line 818, in
_callmethod
conn.send((self._id, methodname, args, kwds))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 206, i
n send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 280, i
n _send_bytes
ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closed

The reason might be that the main process doesn't wait for two children process, so make your code like this could work:
def run():
manager = Manager()
dic = manager.dict()
Process(target=producer, args=(dic,"TT")).start()
time.sleep(1)
Process(target=consumer, args=(dic,"Con1")).start()
Process(target=consumer, args=(dic,"Con2")).start()
while True:
pass
if __name__ == '__main__':
run()
However it's very strange that if I append a dead loop in main instead of using another function, it still raise that exception. Anyway, the code above could help.

Related

Why doesn't this simple multiprocessing function work?

I am just learning about multiprocessing in python. And when I am trying to run the code I receive these problems.
Code
from multiprocessing import Process, cpu_count
def counter1(num):
count = 0
while count < num:
count += 1
def main():
t1_start = perf_counter()
processes = []
for _ in range(4):
p = Process(target=counter1, args=(10000,))
p.start()
processes.append(p)
for process in processes:
process.join()
t1_stop = perf_counter()
print("Elapsed time: {}".format(t1_stop - t1_start))
if __name__ == '__main__':
main()
Problems
Traceback (most recent call last):
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\code.py", line 90, in runcode
exec(code, self.locals)
File "<input>", line 1, in <module>
File "C:\Program Files\JetBrains\PyCharm 2021.3.3\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 198, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2021.3.3\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/Женя/PycharmProjects/test_loria/mp.py", line 54, in <module>
main()
File "C:/Users/Женя/PycharmProjects/test_loria/mp.py", line 43, in main
p.start()
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <function counter1 at 0x000001FBC61EC040>: attribute lookup counter1 on __main__ failed
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\Женя\AppData\Local\Programs\Python\Python310\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
I have no idea what is going on. Because if I try to run the same code in another file, everything will be fine.

Pyttsx3 Voice KeyError: 'sapi5'

Getting unknown voice id error. I tried with ( Pyttsx3 2.6 and python 3.6 ), ( pyttsx3==2.9 & python 3.10)
Traceback (most recent call last):
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\__init__.py", line 44, in init
eng = _activeEngines[driverName]
File "E:\anaconda3\envs\dummy\lib\weakref.py", line 137, in __getitem__
o = self.data[key]()
KeyError: None
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Dnyanesh/PycharmProjects/dummy/main.py", line 5, in <module>
engine = pyttsx3.init()
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\__init__.py", line 46, in init
eng = Engine(driverName, debug)
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\engine.py", line 52, in __init__
self.proxy = driver.DriverProxy(weakref.proxy(self), driverName, debug)
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\driver.py", line 77, in __init__
self._driver = self._module.buildDriver(weakref.proxy(self))
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\drivers\sapi5.py", line 22, in buildDriver
return SAPI5Driver(proxy)
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\drivers\sapi5.py", line 41, in __init__
self.setProperty('voice', self.getProperty('voice'))
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\drivers\sapi5.py", line 82, in setProperty
token = self._tokenFromId(value)
File "E:\anaconda3\envs\dummy\lib\site-packages\pyttsx3\drivers\sapi5.py", line 66, in _tokenFromId
raise ValueError('unknown voice id %s', id_)
ValueError: ('unknown voice id %s', 'HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech_OneCore\\Voices\\Tokens\\MSTTS_V110_enUS_DavidM')

pytorch distributed training fails when use 'gloo' backend

I run the distributed training code in the pytorch tutorial, when I use the 'gloo' backend, the code raises RuntimeError when init_process_group.
"""run.py:"""
#!/usr/bin/env python
import os
import torch
import torch.distributed as dist
from torch.multiprocessing import Process
def run(rank, size):
""" Distributed function to be implemented later. """
pass
def init_process(rank, size, fn, backend='gloo'):
""" Initialize the distributed environment. """
os.environ['MASTER_ADDR'] = '127.0.0.1'
os.environ['MASTER_PORT'] = '29500'
dist.init_process_group(backend, rank=rank, world_size=size)
fn(rank, size)
if __name__ == "__main__":
size = 2
processes = []
for rank in range(size):
p = Process(target=init_process, args=(rank, size, run))
p.start()
processes.append(p)
for p in processes:
p.join()
While getting the error:
Process Process-2:
Traceback (most recent call last):
File "/home/wuyiming/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/wuyiming/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "<ipython-input-1-6cc8a94a6551>", line 16, in init_process
dist.init_process_group(backend, rank=rank, world_size=size)
File "/home/wuyiming/anaconda3/lib/python3.7/site-packages/torch/distributed/distributed_c10d.py", line 416, in init_process_group
timeout=timeout)
File "/home/wuyiming/anaconda3/lib/python3.7/site-packages/torch/distributed/distributed_c10d.py", line 484, in _new_process_group_helper
timeout=timeout)
RuntimeError: [enforce fail at /pytorch/third_party/gloo/gloo/transport/tcp/device.cc:198] ifa != nullptr. Unable to find interface for: [0.0.0.27]
Process Process-1:
Traceback (most recent call last):
File "/home/wuyiming/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/wuyiming/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "<ipython-input-1-6cc8a94a6551>", line 16, in init_process
dist.init_process_group(backend, rank=rank, world_size=size)
File "/home/wuyiming/anaconda3/lib/python3.7/site-packages/torch/distributed/distributed_c10d.py", line 416, in init_process_group
timeout=timeout)
File "/home/wuyiming/anaconda3/lib/python3.7/site-packages/torch/distributed/distributed_c10d.py", line 484, in _new_process_group_helper
timeout=timeout)
RuntimeError: [enforce fail at /pytorch/third_party/gloo/gloo/transport/tcp/device.cc:198] ifa != nullptr. Unable to find interface for: [0.0.0.27]
If I change the backbone from 'gloo' to 'NCCL', the code runs correctly.

Python threading causing issues with google api

I'm running through a list of locations and trying to find places along my route. This is my first attempt at threading, so any tips would be appreciated! When i run this it'll work fine for the first few iterations, but then i start getting a KeyError and the API response says route is not found (even though it should be). If I search along a shorter route, everything runs fine. When I extend the route past a couple of hours of drive time I start getting these errors. Is it possible that I'm overloading it or does my code look off?
import pandas as pd
from threading import Thread
import threading
import requests
start_input = input("start: ")
end_input = input("end: ")
out_way = input("out of the way: ")
out_way_secs = int(out_way) * 60
thread_local = threading.local()
def get_session():
if not getattr(thread_local, "session", None):
thread_local.session = requests.Session()
return thread_local.session
def get_routes(url, start, end, waypoint, idx):
session = get_session()
with session.get(url, params={'origins': f'{start}|{waypoint}', 'destinations': f'{start}|{end}',
'key': '# key'}) as response:
route = response.json()
if route['rows'][1]['elements'][0]['status'] != 'OK':
results[idx] = {'# info'}
else:
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
leg1 = route['rows'][1]['elements'][0]['duration']['value']
leg2 = route['rows'][1]['elements'][1]['duration']['value']
time_added = (leg1 + leg2) - nonstop_route
time_added_mins = str(datetime.timedelta(seconds=(leg1 + leg2) - nonstop_route))
more_time = time_added_mins.split(':')
added_time_str = str(f'{more_time[0]}:{more_time[1]}:{more_time[2]} away!')
if time_added < allowable_time:
results[idx] = {# info to return}
return results[idx]
if __name__ == "__main__":
start_time = time.time()
output_df = pd.DataFrame(columns=['Location', 'Added Time', 'Notes'])
threads = [None] * coords[0]
results = [None] * coords[0]
for i in range(len(threads)):
threads[i] = Thread(target=get_routes, args=('https://maps.googleapis.com/maps/api/distancematrix/json',
start_input, end_input, stops[i], i))
threads[i].start()
for i in range(len(threads)):
threads[i].join()
for x in range(len(results)):
output_df = output_df.append(results[x], ignore_index=True)
output_df = output_df.sort_values(['Added Time'], ascending=True)
output_df.to_csv('output.csv', index=False)
there are 3 errors that it will get, this first one pops up by itself and the last 2 will come together. The code is the same when I run it, so not sure why i'm getting different errors.
This is the most common error that comes by itself (the routing duration works fine when run individually):
Exception in thread Thread-171:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:program.py", line 46, in get_routes
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
KeyError: 'duration'
The two below I get together and are less common:
Exception in thread Thread-436:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/program.py", line 40, in get_routes
route = response.json()
File "C:\requests\models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Python37-32\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Python37-32\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python37-32\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
second error:
Exception in thread Thread-196:
Traceback (most recent call last):
File "C:\site-packages\urllib3\response.py", line 360, in _error_catcher
yield
File "C:\urllib3\response.py", line 442, in read
data = self._fp.read(amt)
File "C:\Python37-32\lib\http\client.py", line 447, in read
n = self.readinto(b)
File "C:\Python37-32\lib\http\client.py", line 491, in readinto
n = self.fp.readinto(b)
File "C:\Python37-32\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "C:\Python37-32\lib\ssl.py", line 1052, in recv_into
return self.read(nbytes, buffer)
File "C:\Python37-32\lib\ssl.py", line 911, in read
return self._sslobj.read(len, buffer)
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\site-packages\requests\models.py", line 750, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "C:\site-packages\urllib3\response.py", line 494, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "C:\site-packages\urllib3\response.py", line 459, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "C:\Python37-32\lib\contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "C:\site-packages\urllib3\response.py", line 378, in _error_catcher
raise ProtocolError('Connection broken: %r' % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None)", ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))

SyntaxError raised when using joblib with lxml on python 3.5

I am trying to parallelize the tasks of correcting texts on many documents with Python, so I naturally found "joblib". I want each task to be to correct a given document. Here is the structure of the code:
if __name__ == '__main__':
lexicon = build_compact_lexicon()
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(find_errors)('GDL', i, 1, lexicon) for i in range(1798, 1820))
I am using the function find_errors summed up here :
def find_errors(newspaper, year, month, lexicon):
# parse the input newspaper text data using etree parser from LXML
# detect errors in the text
return found_errors_type1, found_errors_type2, found_errors_type3
This does raise me a few errors
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 130, in __call__
return self.func(*args, **kwargs)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 72, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 72, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "hellowordParallel.py", line 85, in find_errors
tree = etree.parse(xml_file_path)
File "src/lxml/lxml.etree.pyx", line 3427, in lxml.etree.parse (src/lxml/lxml.etree.c:79801)
File "src/lxml/parser.pxi", line 1805, in lxml.etree._parseDocument (src/lxml/lxml.etree.c:116293)
TypeError: cannot parse from 'NoneType'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 392, in find_cookie
line_string = line.decode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 24: invalid start byte
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 139, in __call__
tb_offset=1)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/format_stack.py", line 373, in format_exc
frames = format_records(records)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/format_stack.py", line 274, in format_records
for token in generate_tokens(linereader):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 514, in _tokenize
line = readline()
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/format_stack.py", line 265, in linereader
line = getline(file, lnum[0])
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/linecache.py", line 16, in getline
lines = getlines(filename, module_globals)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/linecache.py", line 47, in getlines
return updatecache(filename, module_globals)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/linecache.py", line 136, in updatecache
with tokenize.open(fullname) as fp:
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 456, in open
encoding, lines = detect_encoding(buffer.readline)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 433, in detect_encoding
encoding = find_cookie(first)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 397, in find_cookie
raise SyntaxError(msg)
File "<string>", line None
SyntaxError: invalid or missing encoding declaration for '/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/lxml/etree.so'
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "hellowordParallel.py", line 160, in <module>
results = Parallel(n_jobs=num_cores)(delayed(find_errors)('GDL', i, 1, lexicon) for i in range(1798, 1820))
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 810, in __call__
self.retrieve()
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 727, in retrieve
self._output.extend(job.get())
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
SyntaxError: invalid or missing encoding declaration for '/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/lxml/etree.so'
I don't understand if this is due do something related with configs or if my function doesn't fit in a parallel implementation... (I guess it should...)
Did it happen to some of you before?
Hope my question is clear and there is enough information for someone to give me some help!

Resources