I am trying to implement Aleju's Imgaug to TFOD API. Noticed that you can not iterate through Tensors in the graph mode . I looked up for the solution and tried many suggestions but neither of them worked for my case. Do you know any work around?
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from tensorflow.python.framework.ops import EagerTensor
import tensorflow.compat.v1 as tf
import numpy as np
augseq = iaa.Sequential([# augmentation options], random_order=True)
#tf.function
def augment(image, boxes):
image_np = image.numpy().astype(np.uint8) if type(image) == EagerTensor else image
boxes_np = boxes.numpy() if type(boxes) == EagerTensor else boxes
width, height, _ = image_np.shape
bbs = []
for i in range(len(boxes_np)):
box = boxes_np[i]
ymin, xmin, ymax, xmax = box.numpy()
bbs.append(BoundingBox(
x1=xmin*width, y1=ymin*height,
x2=xmax*width, y2=ymax*height,))
bbs = BoundingBoxesOnImage(bbs, shape=image_np.shape)
image_aug, bbs_aug = augseq(image=image_np, bounding_boxes=bbs) # float np.ndarray
bbs_aug = bbs_aug.remove_out_of_image().clip_out_of_image()
boxes_aug = []
for bb in bbs_aug:
boxes_aug.append([bb.y1/height, bb.x1/width, bb.y2/height, bb.x2/width])
boxes_aug = np.array(boxes_aug)
return image_aug, boxes_aug
Stack Trace:
raceback (most recent call last):
File "/content/models/research/object_detection/model_main_tf2.py", line 115, in <module>
tf.compat.v1.app.run()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/platform/app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "/usr/local/lib/python3.7/dist-packages/absl/app.py", line 303, in run
_run_main(main, args)
File "/usr/local/lib/python3.7/dist-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "/content/models/research/object_detection/model_main_tf2.py", line 112, in main
record_summaries=FLAGS.record_summaries)
File "/usr/local/lib/python3.7/dist-packages/object_detection/model_lib_v2.py", line 558, in train_loop
train_dataset_fn)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py", line 348, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py", line 1199, in experimental_distribute_datasets_from_function
return self.distribute_datasets_from_function(dataset_fn, options)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py", line 1191, in distribute_datasets_from_function
dataset_fn, options)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/tpu_strategy.py", line 979, in _distribute_datasets_from_function
options=options)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 181, in get_distributed_datasets_from_function
build=build,
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 1618, in __init__
self.build()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 1639, in build
self._input_contexts, self._input_workers, self._dataset_fn))
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/input_lib.py", line 2350, in _create_datasets_from_function_with_input_context
dataset = dataset_fn(ctx)
File "/usr/local/lib/python3.7/dist-packages/object_detection/model_lib_v2.py", line 553, in train_dataset_fn
input_context=input_context)
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 906, in train_input
reduce_to_frame_fn=reduce_to_frame_fn)
File "/usr/local/lib/python3.7/dist-packages/object_detection/builders/dataset_builder.py", line 258, in build
batch_size, input_reader_config)
File "/usr/local/lib/python3.7/dist-packages/object_detection/builders/dataset_builder.py", line 237, in dataset_map_fn
fn_to_map, num_parallel_calls=num_parallel_calls)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/deprecation.py", line 348, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 3886, in map_with_legacy_function
use_legacy_function=True))
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 5505, in __init__
use_legacy_function=use_legacy_function)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 4540, in __init__
self._function.add_to_graph(ops.get_default_graph())
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 544, in add_to_graph
self._create_definition_if_needed()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 380, in _create_definition_if_needed
self._create_definition_if_needed_impl()
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 407, in _create_definition_if_needed_impl
capture_resource_var_by_value=self._capture_resource_var_by_value)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/function.py", line 970, in func_graph_from_py_func
outputs = func(*func_graph.inputs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 4458, in wrapped_fn
ret = wrapper_helper(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 4440, in wrapper_helper
ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 699, in wrapper
raise e.ag_error_metadata.to_exception(e)
AttributeError: in user code:
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 886, in transform_and_pad_input_data_fn *
tensor_dict = pad_input_data_to_static_shapes(
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 272, in transform_input_data *
out_tensor_dict = data_augmentation_fn(out_tensor_dict)
File "/usr/local/lib/python3.7/dist-packages/object_detection/inputs.py", line 623, in augment_input_data *
tensor_dict = preprocessor.preprocess(
File "/usr/local/lib/python3.7/dist-packages/object_detection/core/preprocessor.py", line 4812, in preprocess *
results = func(*args, **params)
File "/usr/local/lib/python3.7/dist-packages/object_detection/core/preprocessor.py", line 4422, in _adjust_imgaug *
adjusted_image, adjusted_boxes = tf.cast(imgaug_utils.augment(image,boxes), tf.float32)
File "/usr/local/lib/python3.7/dist-packages/object_detection/core/imgaug_utils.py", line 24, in augment *
ymin, xmin, ymax, xmax = box.numpy()
AttributeError: 'Tensor' object has no attribute 'numpy'
Here is what I tried and did not work:
Enable eager execution(It is default in tf 2.x)
Decorate/Not Decorate function with #tf.function.
Create Tf session and try to eval() or run():
InvalidArgumentError: You must feed a value for placeholder tensor 'while/Placeholder' with dtype int32
Tried on both TPU and CPU
Related
I have trained a BERTopic model on colab and I am now trying to use it locally I get the IndexError.
IndexError: Failed in nopython mode pipeline (step: analyzing bytecode)
pop from empty list
The code I used is:
from sentence_transformers import SentenceTransformer
sentence_model = SentenceTransformer('KBLab/sentence-bert-swedish-cased')
model = BERTopic.load('bertopic_model')
text = "my text here for example"
text = [text]
embeddings = sentence_model.encode(text)
topic, _ = model.transform(text, embeddings)
The last line gives me the error.
Noticeably, the same code works just fine on colab. Not sure whats going on mlocally.
My numba and other related libraries are up-to-date as it was on colab.
Full Traceback:
Traceback (most recent call last):
File "/home/vaibhav/.local/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app
response = self.full_dispatch_request()
File "/home/vaibhav/.local/lib/python3.10/site-packages/flask/app.py", line 1822, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/home/vaibhav/.local/lib/python3.10/site-packages/flask/app.py", line 1820, in full_dispatch_request
rv = self.dispatch_request()
File "/home/vaibhav/.local/lib/python3.10/site-packages/flask/app.py", line 1796, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
File "app.py", line 20, in reference_prediction
preds = data_process(input_api)
File "data_process.py", line 63, in data_process
topic, _ = topic_model_mi.transform(text, embeddings)
File "/home/vaibhav/.local/lib/python3.10/site-packages/bertopic/_bertopic.py", line 423, in transform
umap_embeddings = self.umap_model.transform(embeddings)
File "/home/vaibhav/.local/lib/python3.10/site-packages/umap/umap_.py", line 2859, in transform
dmat = pairwise_distances(
File "/home/vaibhav/.local/lib/python3.10/site-packages/sklearn/metrics/pairwise.py", line 2022, in pairwise_distances
return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
File "/home/vaibhav/.local/lib/python3.10/site-packages/sklearn/metrics/pairwise.py", line 1563, in _parallel_pairwise
return func(X, Y, **kwds)
File "/home/vaibhav/.local/lib/python3.10/site-packages/sklearn/metrics/pairwise.py", line 1607, in _pairwise_callable
out[i, j] = metric(X[i], Y[j], **kwds)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/dispatcher.py", line 487, in _compile_for_args
raise e
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/dispatcher.py", line 420, in _compile_for_args
return_val = self.compile(tuple(argtypes))
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/dispatcher.py", line 965, in compile
cres = self._compiler.compile(args, return_type)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/dispatcher.py", line 125, in compile
status, retval = self._compile_cached(args, return_type)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/dispatcher.py", line 139, in _compile_cached
retval = self._compile_core(args, return_type)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/dispatcher.py", line 152, in _compile_core
cres = compiler.compile_extra(self.targetdescr.typing_context,
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 716, in compile_extra
return pipeline.compile_extra(func)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 452, in compile_extra
return self._compile_bytecode()
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 520, in _compile_bytecode
return self._compile_core()
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 499, in _compile_core
raise e
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 486, in _compile_core
pm.run(self.state)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 368, in run
raise patched_exception
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 356, in run
self._runPass(idx, pass_inst, state)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
return func(*args, **kwargs)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 311, in _runPass
mutated |= check(pss.run_pass, internal_state)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 273, in check
mangled = func(compiler_state)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/untyped_passes.py", line 86, in run_pass
func_ir = interp.interpret(bc)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/interpreter.py", line 1321, in interpret
flow.run()
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/byteflow.py", line 107, in run
runner.dispatch(state)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/byteflow.py", line 282, in dispatch
fn(state, inst)
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/byteflow.py", line 1061, in _binaryop
rhs = state.pop()
File "/home/vaibhav/.local/lib/python3.10/site-packages/numba/core/byteflow.py", line 1344, in pop
return self._stack.pop()
IndexError: Failed in nopython mode pipeline (step: analyzing bytecode)
pop from empty list
I tried to finetune a Bert model on GPU using PyTorch-Lightning's class Trainer using the following code:
from pytorch_lightning import Trainer
from models import LitAdModel, AdModel
from dataloaders import train_dataloader, test_dataloader
model = AdModel()
litmodel = LitAdModel(model=model)
trainer = Trainer(accelerator='gpu', devices=1)
trainer.fit(model=litmodel, train_dataloaders=train_dataloader,
val_dataloaders=test_dataloader)
in which train_dataloader, test_dataloader and AdModel and LitAdModel classes are defined elsewhere. When I do this without using the GPU, it works ( slowly), but with GPU it gives the following error:
File "/Users/sanjinjuricfot/developer/copy_models/test_pl.py", line
24, in
main() File "/Users/sanjinjuricfot/developer/copy_models/test_pl.py", line 18, in
main
littrain(train=train, test=test) File "/Users/sanjinjuricfot/developer/copy_models/src/_torch/littrain.py",
line 39, in littrain
trainer.fit(model=litmodel, train_dataloaders=train_dataloader, val_dataloaders=test_dataloader) File
"/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 582, in fit
call._call_and_handle_interrupt( File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py",
line 38, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 624, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 1061, in _run
results = self._run_stage() File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 1140, in _run_stage
self._run_train() File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 1153, in _run_train
self._run_sanity_check() File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 1225, in _run_sanity_check
val_loop.run() File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/loops/loop.py",
line 199, in run
self.advance(*args, **kwargs) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py",
line 152, in advance
dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs) File
"/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/loops/loop.py",
line 199, in run
self.advance(*args, **kwargs) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py",
line 121, in advance
batch = next(data_fetcher) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/utilities/fetching.py",
line 184, in next
return self.fetching_function() File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/utilities/fetching.py",
line 275, in fetching_function
return self.move_to_device(batch) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/utilities/fetching.py",
line 294, in move_to_device
batch = self.batch_to_device(batch) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py",
line 142, in batch_to_device
batch = self.trainer._call_strategy_hook("batch_to_device", batch, dataloader_idx=dataloader_idx) File
"/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 1443, in _call_strategy_hook
output = fn(*args, **kwargs) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py",
line 273, in batch_to_device
return model._apply_batch_transfer_handler(batch, device=device, dataloader_idx=dataloader_idx) File
"/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/core/module.py",
line 295, in _apply_batch_transfer_handler
batch = self._call_batch_hook("transfer_batch_to_device", batch, device, dataloader_idx) File
"/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/core/module.py",
line 283, in _call_batch_hook
return trainer_method(hook_name, *args) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py",
line 1305, in _call_lightning_module_hook
output = fn(*args, **kwargs) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/pytorch_lightning/core/hooks.py",
line 632, in transfer_batch_to_device
return move_data_to_device(batch, device) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/lightning_lite/utilities/apply_func.py",
line 101, in move_data_to_device
return apply_to_collection(batch, dtype=_TransferableDataType, function=batch_to) File
"/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/lightning_utilities/core/apply_func.py",
line 55, in apply_to_collection
v = apply_to_collection( File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/lightning_utilities/core/apply_func.py",
line 47, in apply_to_collection
return function(data, *args, **kwargs) File "/Users/sanjinjuricfot/developer/copy_models/.venv/lib/python3.10/site-packages/lightning_lite/utilities/apply_func.py",
line 95, in batch_to
data_output = data.to(device, **kwargs) TypeError: Cannot convert a MPS Tensor to float64 dtype as the MPS framework doesn't support
float64. Please use float32 instead.
I tried using this command
torch.set_default_dtype(torch.float32)
in all the relevant files and adding
.to(torch.float32)
extension to all the tensors, but it didn't work.
I am using MacBook Pro with M2 processor. Thanks in advance for any help!
I want to try my model. The data is saved in AWS. I use boto3 simply like
self.s3_img = S3Images(boto3.resource('s3'))
self.s3_obj = S3GetObjects()
I met this error when I feed the data and model in to the pytorch training pipeline.
The code looks like
import pytorch_lightning as pl
from pytorch_lightning import Trainer
trainer = Trainer(
checkpoint_callback=checkpoint_callback,
callbacks=get_callbacks(chkpt_path),
fast_dev_run=False,
max_epochs=100,
resume_from_checkpoint=checkpoint_path
)
trainer.fit(model)
The error is
File "main.py", line 191, in <module>
train()
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/hydra/main.py", line 20, in decorated_main
run_hydra(
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/hydra/_internal/utils.py", line 171, in run_hydra
hydra.run(
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/hydra/_internal/hydra.py", line 82, in run
return run_job(
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/hydra/plugins/common/utils.py", line 109, in run_job
ret.return_value = task_function(task_cfg)
File "main.py", line 176, in train
trainer.fit(model)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/pytorch_lightning/trainer/states.py", line 48, in wrapped_fn
result = fn(self, *args, **kwargs)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1084, in fit
results = self.accelerator_backend.train(model)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/pytorch_lightning/accelerators/cpu_backend.py", line 39, in train
results = self.trainer.run_pretrain_routine(model)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1224, in run_pretrain_routine
self._run_sanity_check(ref_model, model)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1257, in _run_sanity_check
eval_results = self._evaluate(model, self.val_dataloaders, max_batches, False)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py", line 305, in _evaluate
for batch_idx, batch in enumerate(dataloader):
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 352, in __iter__
return self._get_iterator()
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 294, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 801, in __init__
w.start()
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/Users/admin/opt/anaconda3/envs/kk/lib/python3.8/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'boto3.resources.factory.s3.ServiceResource'>: attribute lookup s3.ServiceResource on boto3.resources.factory failed
Can anyone tell me what's the meaning of this error and how to solve it? Thanks for any suggestions and help!
I have one input, and multiple outputs, like a multilabel classification, but I chose to try another approach to see if I have any improvements.
I have these generators, I'm using flow_from_dataframe because I have a huge dataset (200k):
self.train_generator = datagen.flow_from_dataframe(
dataframe=train,
directory='dataset',
x_col='Filename',
y_col=columns,
batch_size=BATCH_SIZE,
color_mode='rgb',
class_mode='raw',
shuffle=True,
target_size=(HEIGHT,WIDTH))
self.test_generator = datatest.flow_from_dataframe(
dataframe=test,
directory='dataset',
x_col='Filename',
y_col=columns,
batch_size=BATCH_SIZE,
color_mode='rgb',
class_mode='raw',
target_size=(HEIGHT,WIDTH))
I'm passing to fit using this function:
def generator(self, generator):
while True:
X, y = generator.next()
y = [y[:,x] for x in range(len(columns))]
yield X,[y]
If I fit like this:
self.h = self.model.fit_generator(self.generator(self.train_generator),
steps_per_epoch=self.STEP_SIZE_TRAIN,
validation_data=self.generator(self.test_generator),
validation_steps=self.STEP_SIZE_TEST,
epochs=50,
verbose = 1,
workers = 2,
)
I get :
RuntimeError: Your generator is NOT thread-safe. Keras requires a thread-safe generator when `use_multiprocessing=False, workers > 1`.
Using multiprocessing=True:
self.h = self.model.fit_generator(self.generator(self.train_generator),
steps_per_epoch=self.STEP_SIZE_TRAIN,
validation_data=self.generator(self.test_generator),
validation_steps=self.STEP_SIZE_TEST,
epochs=50,
verbose = 1,
workers = 2,
use_multiprocessing=True,
)
Results in:
File "C:\ProgramData\Anaconda3\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 877, in _run
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 867, in pool_fn
pool = get_pool_class(True)(
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 212, in __init__
self._repopulate_pool()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 326, in _repopulate_pool_static
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'generator' object
File "C:\ProgramData\Anaconda3\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 877, in _run
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 867, in pool_fn
pool = get_pool_class(True)(
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 212, in __init__
self._repopulate_pool()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 326, in _repopulate_pool_static
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'generator' object
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
Now I'm stuck, how to solve this?
According to documentation https://keras.io/api/preprocessing/image/
The argument class_mode can be set as "multi_output" so you don't need to create a custom generator:
class_mode: one of "binary", "categorical", "input", "multi_output", "raw", sparse" or None. Default: "categorical". Mode for yielding the targets:
- "binary": 1D numpy array of binary labels,
- "categorical": 2D numpy array of one-hot encoded labels. Supports multi-label output.
- "input": images identical to input images (mainly used to work with autoencoders),
- "multi_output": list with the values of the different columns,
- "raw": numpy array of values in y_col column(s),
- "sparse": 1D numpy array of integer labels,
- None, no targets are returned (the generator will only yield batches of image data, which is useful to use in model.predict()).
I am now being able to use workers > 1, but I am not having performance improvements.
I am trying to convert the pytorch model in this link to onnx model using the code below :
device=t.device('cuda:0' if t.cuda.is_available() else 'cpu')
print(device)
faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn).cuda()
#trainer = FasterRCNNTrainer(faster_rcnn).to(device)
trainer.load('./checkpoints/model.pth')
dummy_input = t.randn(1, 3, 300, 300, device = 'cuda')
#dummy_input = dummy_input.to(device)
t.onnx.export(faster_rcnn, dummy_input, "model.onnx", verbose = True)
But I get the following error (Sorry for the block quote below stackoverflow wouldn't let the whole trace be in code format and wouldn't let the question be posted otherwise):
Traceback (most recent call last):
small_object_detection_master_samirsen\onnxtest.py", line 44, in <module>
t.onnx.export(faster_rcnn, dummy_input, "fasterrcnn_10120119_06025842847785781.onnx", verbose = True)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\onnx\__init__.py",
line 132, in export
strip_doc_string, dynamic_axes)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\onnx\utils.py",
line 64, in export
example_outputs=example_outputs, strip_doc_string=strip_doc_string, dynamic_axes=dynamic_axes)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\onnx\utils.py",
line 329, in _export
_retain_param_name, do_constant_folding)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\onnx\utils.py",
line 213, in _model_to_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args, training)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\onnx\utils.py",
line 171, in _trace_and_get_graph_from_model
trace, torch_out = torch.jit.get_trace_graph(model, args, _force_outplace=True)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\jit__init__.py",
line 256, in get_trace_graph
return LegacyTracedModule(f, _force_outplace, return_inputs)(*args, **kwargs)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 547, in call
result = self.forward(*input, **kwargs)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\jit__init__.py",
line 323, in forward
out = self.inner(*trace_inputs)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 545, in call
result = self._slow_forward(*input, **kwargs)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 531, in _slow_forward
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 531, in _slow_forward
result = self.forward(*input, **kwargs)
File "D:\smallobject2\export test s\small_object_detection_master_samirsen\model\faster_rcnn.py", line
133, in forward
h, rois, roi_indices)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 545, in call
result = self._slow_forward(*input, **kwargs)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 531, in _slow_forward
result = self.forward(*input, **kwargs)
File "D:\smallobject2\export test s\small_object_detection_master_samirsen\model\faster_rcnn_vgg16.py",
line 142, in forward
pool = self.roi(x, indices_and_rois)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 545, in call
result = self._slow_forward(*input, **kwargs)
File "C:\Users\HP\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\nn\modules\module.py",
line 531, in _slow_forward
result = self.forward(*input, **kwargs)
File "D:\smallobject2\export test s\small_object_detection_master_samirsen\model\roi_module.py", line
85, in forward
return self.RoI(x, rois)
RuntimeError: Attempted to trace RoI, but tracing of legacy functions is not supported
This is because ONNX does not support torch.grad.Function. The issue is because ROI class Refer this
To overcome the issue, you have to implement the forward and backward function as a separate function definition rather than a member of ROI class.
The function call to ROI in FasterRCNNVGG16 is supposed to be altered to explicit call forward and backward functions.