Error when trying to run via multiprocessing in Python 3 - python-3.x

The following code works fine
[process_data(item, data_frame_list[item]) for item in data_frame_list if data_frame_list[item].shape[0] > 5]
I'm trying to convert this code to run in parallel
pool_obj = multiprocessing.Pool()
[pool_obj.map(process_data,item, data_frame_list[item]) for item in data_frame_list if data_frame_list[item].shape[0] > 5]
This results in errors
Traceback (most recent call last):
File "/home/pyuser/PycharmProjects/project_sample/testyard_2.py", line 425, in <module>
[pool_obj.map(process_data,item, data_frame_list[item]) for item in data_frame_list if data_frame_list[item].shape[0] > 5]
File "/home/pyuser/PycharmProjects/project_sample/testyard_2.py", line 425, in <listcomp>
[pool_obj.map(process_data,item, data_frame_list[item]) for item in data_frame_list if data_frame_list[item].shape[0] > 5]
File "/usr/lib/python3.8/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.8/multiprocessing/pool.py", line 485, in _map_async
result = MapResult(self, chunksize, len(iterable), callback,
File "/usr/lib/python3.8/multiprocessing/pool.py", line 797, in __init__
if chunksize <= 0:
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/ops/common.py", line 69, in new_method
return method(self, other)
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/arraylike.py", line 44, in __le__
return self._cmp_method(other, operator.le)
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 6849, in _cmp_method
new_data = self._dispatch_frame_op(other, op, axis=axis)
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 6888, in _dispatch_frame_op
bm = self._mgr.apply(array_op, right=right)
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/internals/managers.py", line 325, in apply
applied = b.apply(f, **kwargs)
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/internals/blocks.py", line 382, in apply
result = func(self.values, **kwargs)
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/ops/array_ops.py", line 284, in comparison_op
res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
File "/home/pyuser/PycharmProjects/project_sample/venv/lib/python3.8/site-packages/pandas/core/ops/array_ops.py", line 73, in comp_method_OBJECT_ARRAY
result = libops.scalar_compare(x.ravel(), y, op)
File "pandas/_libs/ops.pyx", line 107, in pandas._libs.ops.scalar_compare
TypeError: '<=' not supported between instances of 'str' and 'int'
I'm not able to work out what is incorrect with what I've done. Could I please request some guidance?

Used a different library that has easier usage. All is working now.
from joblib import Parallel, delayed
import multiprocessing
Parallel(n_jobs=multiprocessing.cpu_count())(delayed(process_data)(item, data_frame_list[item]) for item in data_frame_list if data_frame_list[item].shape[0] > 5)

Related

Error when using joblib in python with undetected chromedriver

when i use (self.links is an array of strings)
Parallel(n_jobs=2)(delayed(self.buybysize)(link) for link in self.links)
with this function
def buybysize(self, link):
browser = self.browser()
//other commented stuff
def browser(self):
options = uc.ChromeOptions()
options.user_data_dir = self.user_data_dir
options.add_argument(self.add_argument)
driver = uc.Chrome(options=options)
return driver
i get the error
oblib.externals.loky.process_executor._RemoteTraceback:
Traceback (most recent call last):
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 436, in _process_worker
r = call_item()
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 288, in __call__
return self.fn(*self.args, **self.kwargs)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 595, in __call__
return self.func(*args, **kwargs)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 262, in __call__
return [func(*args, **kwargs)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 262, in <listcomp>
return [func(*args, **kwargs)
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 91, in buybysize
browser = self.browser()
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 38, in browser
driver = uc.Chrome(options=options)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/undetected_chromedriver/__init__.py", line 388, in __init__
self.browser_pid = start_detached(
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/undetected_chromedriver/dprocess.py", line 30, in start_detached
multiprocessing.Process(
File "/usr/lib/python3.8/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/usr/lib/python3.8/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/backend/process.py", line 39, in _Popen
return Popen(process_obj)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/backend/popen_loky_posix.py", line 52, in __init__
self._launch(process_obj)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/backend/popen_loky_posix.py", line 157, in _launch
pid = fork_exec(cmd_python, self._fds, env=process_obj.env)
AttributeError: 'Process' object has no attribute 'env'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/Me/PycharmProjects/zalando_buy/Start.py", line 4, in <module>
class Start:
File "/home/Me/PycharmProjects/zalando_buy/Start.py", line 7, in Start
zalando.startshopping()
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 42, in startshopping
self.openlinks()
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 50, in openlinks
Parallel(n_jobs=2)(delayed(self.buybysize)(link) for link in self.links)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 1056, in __call__
self.retrieve()
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 935, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 542, in wrap_future_result
return future.result(timeout=timeout)
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
AttributeError: 'Process' object has no attribute 'env'
Process finished with exit code 1
For me it looks like there are instabilities because undetected chromedriver maybe uses multiprocessing already, but isnt there any way where i can open multiple Browsers with UC and process each iteration parallel?
Edit: i debugged and the error appears after trying to execute this line:
driver = uc.Chrome(options=options)

Is it possible to iterate through Tensor in graph mode?

I am trying to implement Aleju's Imgaug to TFOD API. Noticed that you can not iterate through Tensors in the graph mode . I looked up for the solution and tried many suggestions but neither of them worked for my case. Do you know any work around?
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from tensorflow.python.framework.ops import EagerTensor
import tensorflow.compat.v1 as tf
import numpy as np
augseq = iaa.Sequential([# augmentation options], random_order=True)
#tf.function
def augment(image, boxes):
image_np = image.numpy().astype(np.uint8) if type(image) == EagerTensor else image
boxes_np = boxes.numpy() if type(boxes) == EagerTensor else boxes
width, height, _ = image_np.shape
bbs = []
for i in range(len(boxes_np)):
box = boxes_np[i]
ymin, xmin, ymax, xmax = box.numpy()
bbs.append(BoundingBox(
x1=xmin*width, y1=ymin*height,
x2=xmax*width, y2=ymax*height,))
bbs = BoundingBoxesOnImage(bbs, shape=image_np.shape)
image_aug, bbs_aug = augseq(image=image_np, bounding_boxes=bbs) # float np.ndarray
bbs_aug = bbs_aug.remove_out_of_image().clip_out_of_image()
boxes_aug = []
for bb in bbs_aug:
boxes_aug.append([bb.y1/height, bb.x1/width, bb.y2/height, bb.x2/width])
boxes_aug = np.array(boxes_aug)
return image_aug, boxes_aug
Stack Trace:
raceback (most recent call last):
File "/content/models/research/object_detection/model_main_tf2.py", line 115, in <module>
tf.compat.v1.app.run()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "/usr/local/lib/python3.7/dist-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/usr/local/lib/python3.7/dist-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "/content/models/research/object_detection/model_main_tf2.py", line 112, in main
record_summaries=FLAGS.record_summaries)
File "/usr/local/lib/python3.7/dist-packages/object_detection/model_lib_v2.py", line 558, in train_loop
train_dataset_fn)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py", line 348, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py", line 1199, in experimental_distribute_datasets_from_function
return self.distribute_datasets_from_function(dataset_fn, options)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py", line 1191, in distribute_datasets_from_function
dataset_fn, options)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py", line 979, in _distribute_datasets_from_function
options=options)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 181, in get_distributed_datasets_from_function
build=build,
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 1618, in __init__
self.build()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 1639, in build
self._input_contexts, self._input_workers, self._dataset_fn))
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 2350, in _create_datasets_from_function_with_input_context
dataset = dataset_fn(ctx)
File "/usr/local/lib/python3.7/dist-packages/object_detection/model_lib_v2.py", line 553, in train_dataset_fn
input_context=input_context)
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 906, in train_input
reduce_to_frame_fn=reduce_to_frame_fn)
File "/usr/local/lib/python3.7/dist-packages/object_detection/builders/dataset_builder.py", line 258, in build
batch_size, input_reader_config)
File "/usr/local/lib/python3.7/dist-packages/object_detection/builders/dataset_builder.py", line 237, in dataset_map_fn
fn_to_map, num_parallel_calls=num_parallel_calls)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py", line 348, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 3886, in map_with_legacy_function
use_legacy_function=True))
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 5505, in __init__
use_legacy_function=use_legacy_function)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 4540, in __init__
self._function.add_to_graph(ops.get_default_graph())
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 544, in add_to_graph
self._create_definition_if_needed()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 380, in _create_definition_if_needed
self._create_definition_if_needed_impl()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 407, in _create_definition_if_needed_impl
capture_resource_var_by_value=self._capture_resource_var_by_value)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 970, in func_graph_from_py_func
outputs = func(*func_graph.inputs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 4458, in wrapped_fn
ret = wrapper_helper(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 4440, in wrapper_helper
ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 699, in wrapper
raise e.ag_error_metadata.to_exception(e)
AttributeError: in user code:
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 886, in transform_and_pad_input_data_fn *
tensor_dict = pad_input_data_to_static_shapes(
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 272, in transform_input_data *
out_tensor_dict = data_augmentation_fn(out_tensor_dict)
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 623, in augment_input_data *
tensor_dict = preprocessor.preprocess(
File "/usr/local/lib/python3.7/dist-packages/object_detection/core/preprocessor.py", line 4812, in preprocess *
results = func(*args, **params)
File "/usr/local/lib/python3.7/dist-packages/object_detection/core/preprocessor.py", line 4422, in _adjust_imgaug *
adjusted_image, adjusted_boxes = tf.cast(imgaug_utils.augment(image,boxes), tf.float32)
File "/usr/local/lib/python3.7/dist-packages/object_detection/core/imgaug_utils.py", line 24, in augment *
ymin, xmin, ymax, xmax = box.numpy()
AttributeError: 'Tensor' object has no attribute 'numpy'
Here is what I tried and did not work:
Enable eager execution(It is default in tf 2.x)
Decorate/Not Decorate function with #tf.function.
Create Tf session and try to eval() or run():
InvalidArgumentError: You must feed a value for placeholder tensor 'while/Placeholder' with dtype int32
Tried on both TPU and CPU

Scikit-learn StackingClassifier with missing values

When I call model.predict_proba(X) on my StackingClassifier model the run execution crashes because the library is calling a method assert_all_finite() to check whether my dataframe contains missing values.
Since the estimators I stacked are able to handle missing values, I don't see the reason why this should happen and I didn't find anything in the documentation that says that the StackingClassifier requires data without missing values.
It's a bit hard for me to come up with a short reproducibile snippet of code given that it comes from several layers of model abstraction, but I can print out the model effectively raising the error call.
p = model.predict_proba(X_loyal)
where model is:
StackingClassifier(estimators=[('ExtraTreesClassifier_117',
ExtraTreesClassifier(bootstrap=True,
class_weight={0: 1, 1: 5},
criterion='entropy',
max_depth=11,
max_features='log2',
max_samples=0.5946040593595099,
min_samples_leaf=2,
n_estimators=163,
random_state=117)),
('RandomForestClassifier_117',
RandomForestClassifier(class_weight={0: 1,
1: 5},
criterion='entropy',
max_depth=11,
max_features='log2',
max_samples=0.5946040593595099,
min_samples_leaf=2,
n_estimators=163,
random_state=117)),
('LGBMClassifier_117',
LGBMClassifier(class_weight={0: 1, 1: 1},
deterministic=True, max_depth=9,
n_estimators=183, num_leaves=3,
subsample=0.2986274713775564,
verbose=-1))])
Error
Traceback (most recent call last):
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-12-eafa75c49322>", line 1, in <module>
model.predict_proba(X_loyal)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/utils/metaestimators.py", line 120, in <lambda>
out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/ensemble/_stacking.py", line 485, in predict_proba
return self.final_estimator_.predict_proba(self.transform(X))
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/ensemble/_stacking.py", line 522, in transform
return self._transform(X)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/ensemble/_stacking.py", line 215, in _transform
predictions = [
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/ensemble/_stacking.py", line 216, in <listcomp>
getattr(est, meth)(X)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/ensemble/_forest.py", line 674, in predict_proba
X = self._validate_X_predict(X)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/ensemble/_forest.py", line 422, in _validate_X_predict
return self.estimators_[0]._validate_X_predict(X, check_input=True)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/tree/_classes.py", line 407, in _validate_X_predict
X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr",
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/base.py", line 421, in _validate_data
X = check_array(X, **check_params)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/utils/validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/utils/validation.py", line 720, in check_array
_assert_all_finite(array,
File "/home/mlpoc/miniconda3/envs/churn/lib/python3.8/site-packages/sklearn/utils/validation.py", line 103, in _assert_all_finite
raise ValueError(
ValueError: Input contains NaN, infinity or a value too large for dtype('float32').
Versions
sklearn.__version__
Out[6]: '0.24.2'
lightgbm.__version__
Out[8]: '3.2.1'

How to create a custom keras generator to fit multiple outputs and use workers

I have one input, and multiple outputs, like a multilabel classification, but I chose to try another approach to see if I have any improvements.
I have these generators, I'm using flow_from_dataframe because I have a huge dataset (200k):
self.train_generator = datagen.flow_from_dataframe(
dataframe=train,
directory='dataset',
x_col='Filename',
y_col=columns,
batch_size=BATCH_SIZE,
color_mode='rgb',
class_mode='raw',
shuffle=True,
target_size=(HEIGHT,WIDTH))
self.test_generator = datatest.flow_from_dataframe(
dataframe=test,
directory='dataset',
x_col='Filename',
y_col=columns,
batch_size=BATCH_SIZE,
color_mode='rgb',
class_mode='raw',
target_size=(HEIGHT,WIDTH))
I'm passing to fit using this function:
def generator(self, generator):
while True:
X, y = generator.next()
y = [y[:,x] for x in range(len(columns))]
yield X,[y]
If I fit like this:
self.h = self.model.fit_generator(self.generator(self.train_generator),
steps_per_epoch=self.STEP_SIZE_TRAIN,
validation_data=self.generator(self.test_generator),
validation_steps=self.STEP_SIZE_TEST,
epochs=50,
verbose = 1,
workers = 2,
)
I get :
RuntimeError: Your generator is NOT thread-safe. Keras requires a thread-safe generator when `use_multiprocessing=False, workers > 1`.
Using multiprocessing=True:
self.h = self.model.fit_generator(self.generator(self.train_generator),
steps_per_epoch=self.STEP_SIZE_TRAIN,
validation_data=self.generator(self.test_generator),
validation_steps=self.STEP_SIZE_TEST,
epochs=50,
verbose = 1,
workers = 2,
use_multiprocessing=True,
)
Results in:
File "C:\ProgramData\Anaconda3\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 877, in _run
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 867, in pool_fn
pool = get_pool_class(True)(
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 212, in __init__
self._repopulate_pool()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 326, in _repopulate_pool_static
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'generator' object
File "C:\ProgramData\Anaconda3\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 877, in _run
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 867, in pool_fn
pool = get_pool_class(True)(
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 212, in __init__
self._repopulate_pool()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 326, in _repopulate_pool_static
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'generator' object
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
Now I'm stuck, how to solve this?
According to documentation https://keras.io/api/preprocessing/image/
The argument class_mode can be set as "multi_output" so you don't need to create a custom generator:
class_mode: one of "binary", "categorical", "input", "multi_output", "raw", sparse" or None. Default: "categorical". Mode for yielding the targets:
- "binary": 1D numpy array of binary labels,
- "categorical": 2D numpy array of one-hot encoded labels. Supports multi-label output.
- "input": images identical to input images (mainly used to work with autoencoders),
- "multi_output": list with the values of the different columns,
- "raw": numpy array of values in y_col column(s),
- "sparse": 1D numpy array of integer labels,
- None, no targets are returned (the generator will only yield batches of image data, which is useful to use in model.predict()).
I am now being able to use workers > 1, but I am not having performance improvements.

Upload Excel file to Python

I'm simply trying to upload excel.xlsx file with Python panda package so i can tokenize the text. I tried for hours but nothing works. Any help will be great:
import pandas as pd
excel_file = open(r'''C:\Users\farid-PC\Desktop\Tester.xlsx''', errors
='ignore')
movies = pd.read_excel(excel_file)
movies.head()
Errors:
Traceback (most recent call last):
File "C:/Users/farid-PC/PycharmProjects/fake_news/fault.py", line 4, in <module>
movies = pd.read_excel(excel_file)
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\pandas\util\_decorators.py", line 178, in wrapper
return func(*args, **kwargs)
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\pandas\util\_decorators.py", line 178, in wrapper
return func(*args, **kwargs)
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\pandas\io\excel.py", line 307, in read_excel
io = ExcelFile(io, engine=engine)
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\pandas\io\excel.py", line 392, in __init__
self.book = xlrd.open_workbook(file_contents=data)
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\xlrd\__init__.py", line 162, in open_workbook
ragged_rows=ragged_rows,
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\xlrd\book.py", line 91, in open_workbook_xls
biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\xlrd\book.py", line 1267, in getbof
opcode = self.get2bytes()
File "C:\Users\farid-PC\PycharmProjects\fake_news\venv\lib\site-packages\xlrd\book.py", line 672, in get2bytes
return (BYTES_ORD(hi) << 8) | BYTES_ORD(lo)
TypeError: unsupported operand type(s) for <<: 'str' and 'int'

Resources