Multiproceccing + PyMongo lead to [Errno 111] - python-3.x

Good day!
I've just started playing around with pymongo and multiprocessing. I have received a multicore unit for my experiments, which runs Ubuntu 18.04.4 LTS, codename: bionic. Just for the sake of experiment I have tried it both with python 3.8 and python 3.10, unfortunately the results are similar:
>7lvv_E mol:na length:29 DNA (28-MER)
ELSE 7lvv_E
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 48, in mapstar
return list(map(*args))
File "LoadDataOnSequence.py", line 54, in createCollectionPDB
x = newCol.insert_one(dict2Write)
File "/home/username/.local/lib/python3.8/site-packages/pymongo/collection.py", line 698, in insert_one
self._insert(document,
File "/home/username/.local/lib/python3.8/site-packages/pymongo/collection.py", line 613, in _insert
return self._insert_one(
File "/home/username/.local/lib/python3.8/site-packages/pymongo/collection.py", line 602, in _insert_one
self.__database.client._retryable_write(
File "/home/username/.local/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1497, in _retryable_write
with self._tmp_session(session) as s:
File "/usr/lib/python3.8/contextlib.py", line 113, in __enter__
return next(self.gen)
File "/home/username/.local/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1829, in _tmp_session
s = self._ensure_session(session)
File "/home/username/.local/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1816, in _ensure_session
return self.__start_session(True, causal_consistency=False)
File "/home/username/.local/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1766, in __start_session
server_session = self._get_server_session()
File "/home/username/.local/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1802, in _get_server_session
return self._topology.get_server_session()
File "/home/username/.local/lib/python3.8/site-packages/pymongo/topology.py", line 496, in get_server_session
self._select_servers_loop(
File "/home/username/.local/lib/python3.8/site-packages/pymongo/topology.py", line 215, in _select_servers_loop
raise ServerSelectionTimeoutError(
pymongo.errors.ServerSelectionTimeoutError: 127.0.0.1:27017: [Errno 111] Connection refused, Timeout: 30s, Topology Description: <TopologyDescription id: 60db2071e53de99692268c6f, topology_type: Single, servers: [<ServerDescription ('127.0.0.1', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('127.0.0.1:27017: [Errno 111] Connection refused')>]>
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "LoadDataOnSequence.py", line 82, in <module>
myPool.map(createCollectionPDB, listFile("datum/pdb_seqres.txt"))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.8/multiprocessing/pool.py", line 771, in get
raise self._value
pymongo.errors.ServerSelectionTimeoutError: 127.0.0.1:27017: [Errno 111] Connection refused, Timeout: 30s, Topology Description: <TopologyDescription id: 60db2071e53de99692268c6f, topology_type: Single, servers: [<ServerDescription ('127.0.0.1', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('127.0.0.1:27017: [Errno 111] Connection refused')>]>
I have been trying multiple times by modifying my code different ways, no luck though.
Also, I have tried both running code from PyCharm via SSH and by creating the local (at multicore machine) folder with all the necessary files.
I count the number of cores and create my MongoClient:
from multiprocessing import *
from pymongo import MongoClient
#Number of cores
x = cpu_count()
print(x)
myClient = MongoClient('mongodb://127.0.0.1:27017/')
I prepare a list to pass, using that function:
def listFile(fileName):
fOpen = open(fileName)
listFile = fOpen.readlines()
arrOfArrs = []
tmp1 = []
for i in listFile:
# print(i)
if i.startswith(">"):
if len(tmp1) > 1:
arrOfArrs.append(tmp1)
tmp1 = []
tmp1.append(i.strip())
else:
tmp1.append(i.strip())
#print(listFile)
return arrOfArrs
That's the way I can prepare a big text file (in reality there's going to be even a larger one, I am just testing using one of the PDB files: https://www.wwpdb.org/ftp/pdb-ftp-sites I use the seqres file, I am not linking the exact file, as it will download immediately). And I suppose everything works till that moment.
Next is the function, which will be used in Pool:
def createCollectionPDB(fP):
lineName = ""
lineFASTA = ""
colName = ""
PDBName = ""
chainIDName = ""
typeOfMol = ""
molLen = ""
proteinName = ""
for i in fP:
print("test", i)
print(lineName)
if ">" in i:
lineName = i.strip()
print("LINE NAME")
colName = lineName.split(" ")[0].strip()[1:]
print("COLNAME", colName)
PDBName = lineName.split("_")[0].strip()
chainIDName = colName.split("_")[-1].strip()
typeOfMol = lineName.split(" ")[1].strip().split(":")[1].strip()
molLen = lineName.split(" ")[2].strip().split(":")[-1].strip()#[3].split(" ")[0].strip()
proteinName = lineName.split(" ")[-1].strip()
print(colName, PDBName, chainIDName, typeOfMol, molLen, proteinName)
else:
print("ELSE", colName)
lineFASTA = i.strip()
dict2Write={"PDB_ID" : PDBName, "Chain_ID" : chainIDName, "Molecule Type" : typeOfMol, "Length" : molLen, "Protein_Name" : proteinName, "FASTA" : lineFASTA}
myNewDB = myClient["MyPrjPrj_PDBs"]
newCol = myNewDB[colName]
x = newCol.insert_one(dict2Write)
print("PDB", x.inserted_id)#'''
That one used to work as well. Finally I multiprocess:
f1 = listFile("datum/pdb_seqres.txt")
myPool = Pool(processes=x)
myPool.map(createCollectionPDB, f1)
myPool.join()
myPool.close()
I have been looking through various solutions, like changing the Python version, trying different (5.0 and 4.x) versions of mongo, as well, as restarting mongo. I have also tried changing the number of processes, which leaves me with pretty much the same error, though stopping at a different line. Another option I've tried was using ssh_pymongo, with no luck as well.
Also it works without multiprocessing, though w/o multiprocessing I use it on a smaller file.

Each process needs to have its own client, therefore you most likely need to create the client in each process instead of creating one prior to invoking multiprocessing.
Forked process: Failure during socket delivery: Broken pipe contains general information on how MongoDB drivers handle forking.

Related

Getting a OSERROR: [Errno 121] Remote I/O error

I am facing problems with my code that has been working perfectly fine and ran everything that I needed it to. This happens from time to time but this time I don't know what my problem is. I recently tried to place a sampling frequency so I can control how many times my data is running in a second but since I made those changes I had nothing but errors so I deleted the changes that I made and now I have errors although I am using the original code that I was using before hand.
My electrical connection is perfect so this is not the issue. I also am not getting any errors in the terminal while using i2cget -y 1
This is my python code (also using INA219 sensor):
#Importing libraries
import csv
from ina219 import INA219
from ina219 import DeviceRangeError
SHUNT_OHMS = 0.1
read_ina = INA219(SHUNT_OHMS)
read_ina.configure()
def read_all():
data = {}
data['Bus Voltage'] = read_ina.voltage()
data['Bus Current'] = read_ina.current()
data['Power'] = read_ina.power()
data['Shunt Voltage'] = read_ina.shunt_voltage()
return data
with open('SensorData.csv', 'w') as f:
data = read_all()
writer = csv.DictWriter(f,
fieldnames = list (data.keys()))
writer.writeheader()
exit = False
while not exit:
try:
writer.writerow(data)
data = read_all()
except KeyboardInterrupt:
exit = True
It is supposed to create a csv file that shows the voltage and all of that in a loop (in the csv file). The code is pretty straightforward. Can anyone help me fix this issue?
This is the error that I keep facing:
Traceback (most recent call last):
File "/home/pi/Downloads/scripts/Assignment2 CreateCSV/SensorData.py", line 40, in <module>
data = read_all()
File "/home/pi/Downloads/scripts/Assignment2 CreateCSV/SensorData.py", line 20, in read_all
data['Bus Voltage'] = read_ina.voltage()
File "/usr/local/lib/python3.5/dist-packages/ina219.py", line 180, in voltage
value = self._voltage_register()
File "/usr/local/lib/python3.5/dist-packages/ina219.py", line 363, in _voltage_register
register_value = self._read_voltage_register()
File "/usr/local/lib/python3.5/dist-packages/ina219.py", line 367, in _read_voltage_register
return self.__read_register(self.__REG_BUSVOLTAGE)
File "/usr/local/lib/python3.5/dist-packages/ina219.py", line 394, in __read_register
register_value = self._i2c.readU16BE(register)
File "/usr/local/lib/python3.5/dist-packages/Adafruit_GPIO/I2C.py", line 190, in readU16BE
return self.readU16(register, little_endian=False)
File "/usr/local/lib/python3.5/dist-packages/Adafruit_GPIO/I2C.py", line 164, in readU16
result = self._bus.read_word_data(self._address,register) & 0xFFFF
File "/usr/local/lib/python3.5/dist-packages/Adafruit_PureIO/smbus.py", line 226, in read_word_data
ioctl(self._device.fileno(), I2C_RDWR, request)
OSError: [Errno 121] Remote I/O error

deploying flask application using gunicorn throws OSError: [Errno 0] Error

deploying flask application using gunicorn throws OSError: [Errno 0] Error everytime.
The beow is the error:
[2021-06-28 12:00:21 +0000] [11] [ERROR] Socket error processing request.
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/gunicorn/workers/sync.py", line 135, in handle
req = next(parser)
File "/usr/local/lib/python3.8/dist-packages/gunicorn/http/parser.py", line 42, in __next__
self.mesg = self.mesg_class(self.cfg, self.unreader, self.source_addr, self.req_count)
File "/usr/local/lib/python3.8/dist-packages/gunicorn/http/message.py", line 180, in __init__
super().__init__(cfg, unreader, peer_addr)
File "/usr/local/lib/python3.8/dist-packages/gunicorn/http/message.py", line 54, in __init__
unused = self.parse(self.unreader)
File "/usr/local/lib/python3.8/dist-packages/gunicorn/http/message.py", line 192, in parse
self.get_data(unreader, buf, stop=True)
File "/usr/local/lib/python3.8/dist-packages/gunicorn/http/message.py", line 183, in get_data
data = unreader.read()
File "/usr/local/lib/python3.8/dist-packages/gunicorn/http/unreader.py", line 37, in read
d = self.chunk()
File "/usr/local/lib/python3.8/dist-packages/gunicorn/http/unreader.py", line 64, in chunk
return self.sock.recv(self.mxchunk)
File "/usr/lib/python3.8/ssl.py", line 1226, in recv
return self.read(buflen)
File "/usr/lib/python3.8/ssl.py", line 1101, in read
return self._sslobj.read(len)
OSError: [Errno 0] Error
Environment details:
Python version: Python 3.8.5
guncorn module version: 20.0.4
gunicorn config file:
"""Gunicorn configurations."""
import os
bind = "0.0.0.0:443"
workers = os.cpu_count() - 1
keyfile = "/home/xyz/certs/psa_xyz.key"
certfile = "/home/xyz/certs/psa_xyz.crt"
timeout = 120
Either another application is already using that port or you have a firewall issue blocking you from accessing that port. To confirm that, you can try changing the port to 5000.
So your config should look something like this.
"""Gunicorn configurations."""
import os
bind = "0.0.0.0:5000"
workers = os.cpu_count() - 1
keyfile = "/home/xyz/certs/psa_xyz.key"
certfile = "/home/xyz/certs/psa_xyz.crt"
timeout = 120

Python threading causing issues with google api

I'm running through a list of locations and trying to find places along my route. This is my first attempt at threading, so any tips would be appreciated! When i run this it'll work fine for the first few iterations, but then i start getting a KeyError and the API response says route is not found (even though it should be). If I search along a shorter route, everything runs fine. When I extend the route past a couple of hours of drive time I start getting these errors. Is it possible that I'm overloading it or does my code look off?
import pandas as pd
from threading import Thread
import threading
import requests
start_input = input("start: ")
end_input = input("end: ")
out_way = input("out of the way: ")
out_way_secs = int(out_way) * 60
thread_local = threading.local()
def get_session():
if not getattr(thread_local, "session", None):
thread_local.session = requests.Session()
return thread_local.session
def get_routes(url, start, end, waypoint, idx):
session = get_session()
with session.get(url, params={'origins': f'{start}|{waypoint}', 'destinations': f'{start}|{end}',
'key': '# key'}) as response:
route = response.json()
if route['rows'][1]['elements'][0]['status'] != 'OK':
results[idx] = {'# info'}
else:
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
leg1 = route['rows'][1]['elements'][0]['duration']['value']
leg2 = route['rows'][1]['elements'][1]['duration']['value']
time_added = (leg1 + leg2) - nonstop_route
time_added_mins = str(datetime.timedelta(seconds=(leg1 + leg2) - nonstop_route))
more_time = time_added_mins.split(':')
added_time_str = str(f'{more_time[0]}:{more_time[1]}:{more_time[2]} away!')
if time_added < allowable_time:
results[idx] = {# info to return}
return results[idx]
if __name__ == "__main__":
start_time = time.time()
output_df = pd.DataFrame(columns=['Location', 'Added Time', 'Notes'])
threads = [None] * coords[0]
results = [None] * coords[0]
for i in range(len(threads)):
threads[i] = Thread(target=get_routes, args=('https://maps.googleapis.com/maps/api/distancematrix/json',
start_input, end_input, stops[i], i))
threads[i].start()
for i in range(len(threads)):
threads[i].join()
for x in range(len(results)):
output_df = output_df.append(results[x], ignore_index=True)
output_df = output_df.sort_values(['Added Time'], ascending=True)
output_df.to_csv('output.csv', index=False)
there are 3 errors that it will get, this first one pops up by itself and the last 2 will come together. The code is the same when I run it, so not sure why i'm getting different errors.
This is the most common error that comes by itself (the routing duration works fine when run individually):
Exception in thread Thread-171:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:program.py", line 46, in get_routes
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
KeyError: 'duration'
The two below I get together and are less common:
Exception in thread Thread-436:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/program.py", line 40, in get_routes
route = response.json()
File "C:\requests\models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Python37-32\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Python37-32\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python37-32\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
second error:
Exception in thread Thread-196:
Traceback (most recent call last):
File "C:\site-packages\urllib3\response.py", line 360, in _error_catcher
yield
File "C:\urllib3\response.py", line 442, in read
data = self._fp.read(amt)
File "C:\Python37-32\lib\http\client.py", line 447, in read
n = self.readinto(b)
File "C:\Python37-32\lib\http\client.py", line 491, in readinto
n = self.fp.readinto(b)
File "C:\Python37-32\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "C:\Python37-32\lib\ssl.py", line 1052, in recv_into
return self.read(nbytes, buffer)
File "C:\Python37-32\lib\ssl.py", line 911, in read
return self._sslobj.read(len, buffer)
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\site-packages\requests\models.py", line 750, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "C:\site-packages\urllib3\response.py", line 494, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "C:\site-packages\urllib3\response.py", line 459, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "C:\Python37-32\lib\contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "C:\site-packages\urllib3\response.py", line 378, in _error_catcher
raise ProtocolError('Connection broken: %r' % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None)", ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))

Python Multiprocessing( TypeError: cannot serialize '_io.BufferedReader' object )

I'm trying to make dictionary attack on zip file using Pool to increase speed.
But I face next error in Python 3.6, while it works in Python 2.7:
Traceback (most recent call last):
File "zip_crack.py", line 42, in <module>
main()
File "zip_crack.py", line 28, in main
for result in results:
File "/usr/lib/python3.6/multiprocessing/pool.py", line 761, in next
raise value
File "/usr/lib/python3.6/multiprocessing/pool.py", line 450, in _ handle_tasks
put(task)
File "/usr/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot serialize '_io.BufferedReader' object
I tried to search for same errors but couldn't find answer that can help here.
Code looks like this
def crack(pwd, f):
try:
key = pwd.strip()
f.extractall(pwd=key)
return True
except:
pass
z_file = zipfile.ZipFile("../folder.zip")
with open('words.dic', 'r') as passes:
start = time.time()
lines = passes.readlines()
pool = Pool(50)
results = pool.imap_unordered(partial(crack, f=z_file), lines)
pool.close()
for result in results:
if result:
pool.terminate()
break
pool.join()
I also tried another approach using map
with contextlib.closing(Pool(50)) as pool:
pool.map(partial(crack, f=z_file), lines)
which worked great and found passwords quickly in Python 2.7 but it throws same exception in python 3.6

Don't catch SerialException at pySerial inWaiting()

I work with Python3. My purpose is to write simple script, which may handle connect/disconnect and read data. I use pySerial library.
At initial stage of the script I have
ser = serial.Serial('/dev/rfcomm0')
Later I have the next code:
def readAndPrint():
try:
waitingChar = ser.inWaiting()
except serial.SerialException as e:
print("Got serial exception")
portOpenFlag = False
readException = True
if (waitingChar > 0):
print("Got some data")
data_str = ser.read(ser.inWaiting())
print(data_str)
Everything is fine till I read data, but when I close Bluetooth connection form another side I got
s = fcntl.ioctl(self.fd, TIOCINQ, TIOCM_zero_str)
OSError: [Errno 5] Input/output error
and never actually arrive into except serial.SerialException case.
What is wrong?
EDIT:
This is the traceback:
Traceback (most recent call last):
File "python_scripts/serialTest.py", line 43, in <module>
readAndHandleException()
File "python_scripts/serialTest.py", line 36, in readAndHandleException
readAndPrint()
File "python_scripts/serialTest.py", line 22, in readAndPrint
waitingChar = ser.inWaiting()
File "/usr/lib/python3/dist-packages/serial/serialposix.py", line 435, in inWaiting
s = fcntl.ioctl(self.fd, TIOCINQ, TIOCM_zero_str)
OSError: [Errno 5] Input/output error

Resources