WSQ files not opening with Pillow/wsq when using joblib.Parallel - python-3.x

I am trying to preprocess large amounts of WSQ images for model training using both the Pillow and wsq libraries. To speed up my code, I am trying to use Parallel but this causes an UnidentifiedImageError.
I verified that the files are there where they should be, and that the function runs without errors when used in a regular for-loop. Other files (eg csv files) can be opened inside the function without errors, so I presume that the error lies with the combination of Parallel and Pillow/wsq. All libraries are up to date. As I am just starting out with Pillow and multiprocessing, I have no idea yet on how to fix this and any help would be highly appreciated.
Code:
from joblib import Parallel, delayed
from PIL import Image
import multiprocessing
import wsq
import numpy as np
def process_image(i):
path = "/home/user/project/wsq/image_"+str(i)+".wsq"
img = np.array(Image.open(path))
#some preprocessing, saving as npz
output_path = "/home/user/project/npz/image_"+str(i)+".npz"
np.savez_compressed(output_path, img)
return None
inputs = range(100000)
num_cores = multiprocessing.cpu_count()
Parallel(n_jobs=num_cores)(delayed(process_image)(i) for i in inputs)
Output:
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/user/.local/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 431, in _process_worker
r = call_item()
File "/home/user/.local/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 285, in __call__
return self.fn(*self.args, **self.kwargs)
File "/home/user/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 595, in __call__
return self.func(*args, **kwargs)
File "/home/user/.local/lib/python3.8/site-packages/joblib/parallel.py", line 262, in __call__
return [func(*args, **kwargs)
File "/home/user/.local/lib/python3.8/site-packages/joblib/parallel.py", line 262, in <listcomp>
return [func(*args, **kwargs)
File "preprocess_images.py", line 9, in process_image
img = np.array(Image.open(path))
File "/home/user/.local/lib/python3.8/site-packages/PIL/Image.py", line 2967, in open
raise UnidentifiedImageError(
PIL.UnidentifiedImageError: cannot identify image file '/home/user/project/wsq/image_1.wsq'
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "preprocess_images.py", line 18, in <module>
Parallel(n_jobs=num_cores)(delayed(process_image)(i) for i in inputs)
File "/home/user/.local/lib/python3.8/site-packages/joblib/parallel.py", line 1054, in __call__
self.retrieve()
File "/home/user/.local/lib/python3.8/site-packages/joblib/parallel.py", line 933, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/home/user/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 542, in wrap_future_result
return future.result(timeout=timeout)
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 439, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 388, in __get_result
raise self._exception
PIL.UnidentifiedImageError: cannot identify image file '/home/user/project/wsq/image_1.wsq'

Related

Pytorch loader throws error after some iterations

The code runs for several iterations and throws the following error.
My Dataset
class Dataset(Dataset):
'Characterizes a dataset for PyTorch'
def __init__(self, input_feature_paths, target_feature_folder) -> None:
self.input_feature_paths = input_feature_paths
self.target_feature_folder = target_feature_folder
def __len__(self):
#return sum(1 for _ in self.input_feature_paths)
return len(self.input_feature_paths)
def __getitem__(self, index) -> None:
input_feature_path = self.input_feature_paths[index]
input_feature = load(input_feature_path, map_location='cpu')
target_feature_path = self.target_feature_folder / input_feature_path.parts[-1]
target_feature = load(target_feature_path, map_location='cpu')
return input_feature.to(dtype=torch.float64), target_feature.to(dtype=torch.float64)
I set dtype torch float64 because it throws the same error while writing on the tensorboard summary writer.
Error Stack
Traceback (most recent call last):
File "student_audio_feature_extractor.py", line 178, in <module>
train(dt, input_frame)
File "student_audio_feature_extractor.py", line 164, in train
model, train_loss = train_step(model, train_loader, optimizer, criterion)
File "student_audio_feature_extractor.py", line 80, in train_step
for input_feature, target_feature in train_loader:
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
data = self._next_data()
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1313, in _next_data
return self._process_data(data)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
data.reraise()
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/_utils.py", line 543, in reraise
raise exception
RuntimeError: Caught RuntimeError in DataLoader worker process 4.
Original Traceback (most recent call last):
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 61, in fetch
return self.collate_fn(data)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 265, in default_collate
return collate(batch, collate_fn_map=default_collate_fn_map)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 143, in collate
return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility.
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 143, in <listcomp>
return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility.
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 120, in collate
return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 162, in collate_tensor_fn
out = elem.new(storage).resize_(len(batch), *list(elem.size()))
RuntimeError: Trying to resize storage that is not resizable
I had a tensor of shape [] that's why it throws this error i changed it and it works now.

Unable to use Graphviz library in Python

Here is the code I try to execute:
from graphviz import Graph
# Instantiate a new Graph object
dot = Graph('Data Science Process', format='png')
# Add nodes
dot.node('A', 'Get Data')
dot.node('B', 'Clean, Prepare, & Manipulate Data')
dot.node('C', 'Train Model')
dot.node('D', 'Test Data')
dot.node('E', 'Improve')
# Connect these nodes
dot.edges(['AB', 'BC', 'CD', 'DE'])
# Save chart
#dot.render('data_science_flowchart', view=True)
The render function won't work, and I have not idea what is wrong. If commented out, the code works, but of course produces nothing. My goal is to make a visualization of the graph (PNG image, or PDF file). Just trying to plot a rudimentary flowchart in Python, I am open to using other libraries than graphviz: I am new to this, and tried graphviz after reading recommendations, tested a dozen scripts posted online, but none of them work, always resulting in the same error.
Here is the error:
$ py graph2.py
py graph2.py
Traceback (most recent call last):
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\site-packages\graphviz\backend\execute.py", line 81, in run_check
proc = subprocess.run(cmd, **kwargs)
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 501, in run
with Popen(*popenargs, **kwargs) as process:
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 966, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 1435, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\vince\graph2.py", line 17, in <module>
dot.render('data_science_flowchart', view=True)
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\site-packages\graphviz\_tools.py", line 171, in wrapper
return func(*args, **kwargs)
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\site-packages\graphviz\rendering.py", line 122, in render
rendered = self._render(*args, **kwargs)
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\site-packages\graphviz\_tools.py", line 171, in wrapper
return func(*args, **kwargs)
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\site-packages\graphviz\backend\rendering.py", line 324, in render
execute.run_check(cmd,
File "C:\Users\vince\AppData\Local\Programs\Python\Python310\lib\site-packages\graphviz\backend\execute.py", line 84, in run_check
raise ExecutableNotFound(cmd) from e
graphviz.backend.execute.ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH

Python3.8--Cartopy/Fiona error: shapefile not recognized as a supported file format

Running into trouble here making a simple plot with Cartopy. Well, the issue suddenly arose out of a more complicated plot, but it comes down to simply having Cartopy ingest a shapefile of state boundaries.
Quick code snapshot:
#!/home/fewx/anaconda3/bin/python3.8
import matplotlib.pyplot as plt
import xarray as xr
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io.shapereader import Reader
from cartopy.feature import ShapelyFeature
import cartopy
import geopandas as gpd
import matplotlib.ticker as mticker
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([235., 290., 20., 55.])
ax.add_feature(cfeature.COASTLINE.with_scale('110m'), linewidth=0.5)
ax.add_feature(cfeature.STATES.with_scale('110m'), linewidth=0.5)
##Print default location of needed natural earth datasets
path = cartopy.config['data_dir']
print(path)
# Display the plot
#plt.show()
plt.savefig('cartopy_test.png')
The coastline shapefile plots perfectly fine, but I fail when trying to access the necessary state shapefile, 'ne_110m_admin_1_states_provinces_lakes.shp', with the error:
ERROR:fiona._env:`/home/fewx/.local/share/cartopy/shapefiles/natural_earth/cultural/ne_110m_admin_1_states_provinces_lakes.shp' not recognized as a supported file format.
Traceback (most recent call last):
File "fiona/_shim.pyx", line 83, in fiona._shim.gdal_open_vector
File "fiona/_err.pyx", line 291, in fiona._err.exc_wrap_pointer
fiona._err.CPLE_OpenFailedError: '/home/fewx/.local/share/cartopy/shapefiles/natural_earth/cultural/ne_110m_admin_1_states_provinces_lakes.shp' not recognized as a supported file format.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "./plot_cartopy_contourf.py", line 35, in <module>
plt.savefig('cartopy_test.png')
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py", line 859, in savefig
res = fig.savefig(*args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/figure.py", line 2311, in savefig
self.canvas.print_figure(fname, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_qt5agg.py", line 81, in print_figure
super().print_figure(*args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py", line 2210, in print_figure
result = print_method(
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py", line 1639, in wrapper
return func(*args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py", line 509, in print_png
FigureCanvasAgg.draw(self)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py", line 407, in draw
self.figure.draw(self.renderer)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/figure.py", line 1863, in draw
mimage._draw_list_compositing_images(
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/image.py", line 131, in _draw_list_compositing_images
a.draw(renderer)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/cartopy/mpl/geoaxes.py", line 479, in draw
return matplotlib.axes.Axes.draw(self, renderer=renderer, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/cbook/deprecation.py", line 411, in wrapper
return func(*inner_args, **inner_kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py", line 2747, in draw
mimage._draw_list_compositing_images(renderer, self, artists)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/image.py", line 131, in _draw_list_compositing_images
a.draw(renderer)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/cartopy/mpl/feature_artist.py", line 155, in draw
geoms = self._feature.intersecting_geometries(extent)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/cartopy/feature/__init__.py", line 302, in intersecting_geometries
return super(NaturalEarthFeature, self).intersecting_geometries(extent)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/cartopy/feature/__init__.py", line 110, in intersecting_geometries
return (geom for geom in self.geometries() if
File "/home/fewx/anaconda3/lib/python3.8/site-packages/cartopy/feature/__init__.py", line 287, in geometries
geometries = tuple(shapereader.Reader(path).geometries())
File "/home/fewx/anaconda3/lib/python3.8/site-packages/cartopy/io/shapereader.py", line 197, in __init__
with fiona.open(filename) as f:
File "/home/fewx/anaconda3/lib/python3.8/site-packages/fiona/env.py", line 417, in wrapper
return f(*args, **kwargs)
File "/home/fewx/anaconda3/lib/python3.8/site-packages/fiona/__init__.py", line 256, in open
c = Collection(path, mode, driver=driver, encoding=encoding,
File "/home/fewx/anaconda3/lib/python3.8/site-packages/fiona/collection.py", line 162, in __init__
self.session.start(self, **kwargs)
File "fiona/ogrext.pyx", line 540, in fiona.ogrext.Session.start
File "fiona/_shim.pyx", line 90, in fiona._shim.gdal_open_vector
fiona.errors.DriverError: '/home/fewx/.local/share/cartopy/shapefiles/natural_earth/cultural/ne_110m_admin_1_states_provinces_lakes.shp' not recognized as a supported file format.
Note that the parent zip file for 'ne_110m_admin_1_states_provinces_lakes.shp' does not exist in the corresponding directly, mainly because NACISCDN is down (again), and I needed to pull the files from github.
Any ideas what would cause this? I'm a bit a newbie here...and this test code is running on a brand new conda-based installation of python that was built over the past week. Do I have incompatible versions of libraries (e.g. PROJ, GDAL, etc)? This error has seem to come out of nowhere with this newer installation.
Thanks for you help!
UPDATE::
I had success plotting states, but I needed to physically plot the 'ne_110m_admin_1_states_provinces_shp.shp' file that came in the cartopy_offlinedata package via Conda. This required creating a new, independent feature via cfeat.NaturalEarthFeature(), and specifying which shapefile to use:
states_provinces = cfeat.NaturalEarthFeature(
category='cultural',
name='admin_1_states_provinces_shp',
scale='110m',
facecolor='none')
ax.add_feature(states_provinces, edgecolor='black')
This makes me think I have corrupted shapefiles that I was trying to plot, causing the failure...but I won't know for sure until I can pull the full package down from NACISCDN--which is still down.

Dask - trying to read hdfs data getting error ArrowIOError: HDFS file does not exist

I tried creating a dataframe from csv stored in hdfs. Connecting is successful. But when trying to get output of len function getting error.
Code:
from dask_yarn import YarnCluster
from dask.distributed import Client, LocalCluster
import dask.dataframe as dd
import subprocess
import os
# GET HDFS CLASSPATH
classpath = subprocess.Popen(["/usr/hdp/current/hadoop-client/bin/hadoop", "classpath", "--glob"], stdout=subprocess.PIPE).communicate()[0]
os.environ["HADOOP_HOME"] = "/usr/hdp/current/hadoop-client"
os.environ["ARROW_LIBHDFS_DIR"] = "/usr/hdp/3.1.4.0-315/usr/lib/"
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java/"
os.environ["CLASSPATH"] = classpath.decode("utf-8")
# GET HDFS CLASSPATH
classpath = subprocess.Popen(["/usr/hdp/current/hadoop-client/bin/hadoop", "classpath", "--glob"], stdout=subprocess.PIPE).communicate()[0]
cluster = YarnCluster(environment='python:///opt/anaconda3/bin/python3', worker_vcores=32, worker_memory="128GiB", n_workers=10)
client = Client(cluster)
client
df = dd.read_csv('hdfs://masterha/data/batch/82.csv')
len(df)
Error:
>>> len(ddf)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/opt/anaconda3/lib/python3.7/site-packages/dask/dataframe/core.py", line 504, in __len__
len, np.sum, token="len", meta=int, split_every=False
File "/opt/anaconda3/lib/python3.7/site-packages/dask/base.py", line 165, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/opt/anaconda3/lib/python3.7/site-packages/dask/base.py", line 436, in compute
results = schedule(dsk, keys, **kwargs)
File "/opt/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 2539, in get
results = self.gather(packed, asynchronous=asynchronous, direct=direct)
File "/opt/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 1839, in gather
asynchronous=asynchronous,
File "/opt/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 756, in sync
self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
File "/opt/anaconda3/lib/python3.7/site-packages/distributed/utils.py", line 333, in sync
raise exc.with_traceback(tb)
File "/opt/anaconda3/lib/python3.7/site-packages/distributed/utils.py", line 317, in f
result[0] = yield future
File "/opt/anaconda3/lib/python3.7/site-packages/tornado/gen.py", line 735, in run
value = future.result()
File "/opt/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 1695, in _gather
raise exception.with_traceback(traceback)
File "/opt/anaconda3/lib/python3.7/site-packages/dask/bytes/core.py", line 181, in read_block_from_file
with copy.copy(lazy_file) as f:
File "/opt/anaconda3/lib/python3.7/site-packages/fsspec/core.py", line 88, in __enter__
f = self.fs.open(self.path, mode=mode)
File "/opt/anaconda3/lib/python3.7/site-packages/fsspec/implementations/hdfs.py", line 116, in <lambda>
return lambda *args, **kw: getattr(PyArrowHDFS, item)(self, *args, **kw)
File "/opt/anaconda3/lib/python3.7/site-packages/fsspec/spec.py", line 708, in open
path, mode=mode, block_size=block_size, autocommit=ac, **kwargs
File "/opt/anaconda3/lib/python3.7/site-packages/fsspec/implementations/hdfs.py", line 116, in <lambda>
return lambda *args, **kw: getattr(PyArrowHDFS, item)(self, *args, **kw)
File "/opt/anaconda3/lib/python3.7/site-packages/fsspec/implementations/hdfs.py", line 72, in _open
return HDFSFile(self, path, mode, block_size, **kwargs)
File "/opt/anaconda3/lib/python3.7/site-packages/fsspec/implementations/hdfs.py", line 171, in __init__
self.fh = fs.pahdfs.open(path, mode, block_size, **kwargs)
File "pyarrow/io-hdfs.pxi", line 431, in pyarrow.lib.HadoopFileSystem.open
File "pyarrow/error.pxi", line 83, in pyarrow.lib.check_status
pyarrow.lib.ArrowIOError: HDFS file does not exist: /data/batch/82.csv
It looks like your file "/data/batch/82.csv" doesn't exist. You might want to verify that you have the right path.

SyntaxError raised when using joblib with lxml on python 3.5

I am trying to parallelize the tasks of correcting texts on many documents with Python, so I naturally found "joblib". I want each task to be to correct a given document. Here is the structure of the code:
if __name__ == '__main__':
lexicon = build_compact_lexicon()
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(find_errors)('GDL', i, 1, lexicon) for i in range(1798, 1820))
I am using the function find_errors summed up here :
def find_errors(newspaper, year, month, lexicon):
# parse the input newspaper text data using etree parser from LXML
# detect errors in the text
return found_errors_type1, found_errors_type2, found_errors_type3
This does raise me a few errors
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 130, in __call__
return self.func(*args, **kwargs)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 72, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 72, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "hellowordParallel.py", line 85, in find_errors
tree = etree.parse(xml_file_path)
File "src/lxml/lxml.etree.pyx", line 3427, in lxml.etree.parse (src/lxml/lxml.etree.c:79801)
File "src/lxml/parser.pxi", line 1805, in lxml.etree._parseDocument (src/lxml/lxml.etree.c:116293)
TypeError: cannot parse from 'NoneType'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 392, in find_cookie
line_string = line.decode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 24: invalid start byte
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 139, in __call__
tb_offset=1)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/format_stack.py", line 373, in format_exc
frames = format_records(records)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/format_stack.py", line 274, in format_records
for token in generate_tokens(linereader):
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 514, in _tokenize
line = readline()
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/format_stack.py", line 265, in linereader
line = getline(file, lnum[0])
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/linecache.py", line 16, in getline
lines = getlines(filename, module_globals)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/linecache.py", line 47, in getlines
return updatecache(filename, module_globals)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/linecache.py", line 136, in updatecache
with tokenize.open(fullname) as fp:
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 456, in open
encoding, lines = detect_encoding(buffer.readline)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 433, in detect_encoding
encoding = find_cookie(first)
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/tokenize.py", line 397, in find_cookie
raise SyntaxError(msg)
File "<string>", line None
SyntaxError: invalid or missing encoding declaration for '/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/lxml/etree.so'
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "hellowordParallel.py", line 160, in <module>
results = Parallel(n_jobs=num_cores)(delayed(find_errors)('GDL', i, 1, lexicon) for i in range(1798, 1820))
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 810, in __call__
self.retrieve()
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/joblib/parallel.py", line 727, in retrieve
self._output.extend(job.get())
File "/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
SyntaxError: invalid or missing encoding declaration for '/home/mbl/anaconda3/envs/OCR_Correction/lib/python3.5/site-packages/lxml/etree.so'
I don't understand if this is due do something related with configs or if my function doesn't fit in a parallel implementation... (I guess it should...)
Did it happen to some of you before?
Hope my question is clear and there is enough information for someone to give me some help!

Resources