I am training an AI model to recognize handwritten hangul characters along with English characters and numbers. It means that I require three datasets custom korean character dataset and other datasets.
I have three datasets and now I am merging three datasets but when I print the train_set path it shows MJSynth only which is wrong.
긴장_1227682.jpg is in my custom korean dataset not in MJSynth
Code
custom_train_set = RecognitionDataset(
parts[0].joinpath("images"),
parts[0].joinpath("labels.json"),
img_transforms=Compose(
[
T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True),
# Augmentations
T.RandomApply(T.ColorInversion(), 0.1),
ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
]
),
)
if len(parts) > 1:
for subfolder in parts[1:]:
custom_train_set.merge_dataset(
RecognitionDataset(subfolder.joinpath("images"), subfolder.joinpath("labels.json"))
)
train_set = MJSynth(
train=True,
img_folder='/media/cvpr/CM_22/mjsynth/mnt/ramdisk/max/90kDICT32px',
label_path='/media/cvpr/CM_22/mjsynth/mnt/ramdisk/max/90kDICT32px/imlist.txt',
img_transforms=T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True),
)
_train_set = SynthText(
train=True,
recognition_task=True,
download=True, # NOTE: download can take really long depending on your bandwidth
img_transforms=T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True),
)
train_set.data.extend([(np_img, target) for np_img, target in _train_set.data])
train_set.data.extend([(np_img, target) for np_img, target in custom_train_set.data])
Traceback
Traceback (most recent call last):
File "/media/cvpr/CM_22/doctr/references/recognition/train_pytorch.py", line 485, in <module>
main(args)
File "/media/cvpr/CM_22/doctr/references/recognition/train_pytorch.py", line 396, in main
fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=args.amp)
File "/media/cvpr/CM_22/doctr/references/recognition/train_pytorch.py", line 118, in fit_one_epoch
for images, targets in progress_bar(train_loader, parent=mb):
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/fastprogress/fastprogress.py", line 50, in __iter__
raise e
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/fastprogress/fastprogress.py", line 41, in __iter__
for i,o in enumerate(self.gen):
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
data = self._next_data()
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data
return self._process_data(data)
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
data.reraise()
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/_utils.py", line 543, in reraise
raise exception
FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/media/cvpr/CM_22/doctr/doctr/datasets/datasets/base.py", line 48, in __getitem__
img, target = self._read_sample(index)
File "/media/cvpr/CM_22/doctr/doctr/datasets/datasets/pytorch.py", line 37, in _read_sample
else read_img_as_tensor(os.path.join(self.root, img_name), dtype=torch.float32)
File "/media/cvpr/CM_22/doctr/doctr/io/image/pytorch.py", line 52, in read_img_as_tensor
pil_img = Image.open(img_path, mode="r").convert("RGB")
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/PIL/Image.py", line 2912, in open
fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/media/cvpr/CM_22/mjsynth/mnt/ramdisk/max/90kDICT32px/긴장_1227682.jpg'
Related
My system language is English (USA). I downloaded MJSynth from the Oxford website when I start to train a model it shows [Errno 2] No such file or directory: '/media/cvpr/CM_22/mjsynth/mnt/ramdisk/max/90kDICT32px/신_1174986.jpg' which is extremely strange for me. I am sure that there is no problem with batch size I already checked that
Last shows 신_1174986.jpg which is true how i can change language to english
Traceback (most recent call last):
File "/media/cvpr/CM_22/doctr/references/recognition/train_pytorch.py", line 485, in <module>
main(args)
File "/media/cvpr/CM_22/doctr/references/recognition/train_pytorch.py", line 396, in main
fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=args.amp)
File "/media/cvpr/CM_22/doctr/references/recognition/train_pytorch.py", line 118, in fit_one_epoch
for images, targets in progress_bar(train_loader, parent=mb):
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/fastprogress/fastprogress.py", line 50, in __iter__
raise e
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/fastprogress/fastprogress.py", line 41, in __iter__
for i,o in enumerate(self.gen):
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
data = self._next_data()
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data
return self._process_data(data)
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
data.reraise()
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/_utils.py", line 543, in reraise
raise exception
FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/media/cvpr/CM_22/doctr/doctr/datasets/datasets/base.py", line 48, in __getitem__
img, target = self._read_sample(index)
File "/media/cvpr/CM_22/doctr/doctr/datasets/datasets/pytorch.py", line 37, in _read_sample
else read_img_as_tensor(os.path.join(self.root, img_name), dtype=torch.float32)
File "/media/cvpr/CM_22/doctr/doctr/io/image/pytorch.py", line 52, in read_img_as_tensor
pil_img = Image.open(img_path, mode="r").convert("RGB")
File "/home/cvpr/anaconda3/envs/pytesseract/lib/python3.9/site-packages/PIL/Image.py", line 2912, in open
fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/media/cvpr/CM_22/mjsynth/mnt/ramdisk/max/90kDICT32px/신_1174986.jpg'
The code runs for several iterations and throws the following error.
My Dataset
class Dataset(Dataset):
'Characterizes a dataset for PyTorch'
def __init__(self, input_feature_paths, target_feature_folder) -> None:
self.input_feature_paths = input_feature_paths
self.target_feature_folder = target_feature_folder
def __len__(self):
#return sum(1 for _ in self.input_feature_paths)
return len(self.input_feature_paths)
def __getitem__(self, index) -> None:
input_feature_path = self.input_feature_paths[index]
input_feature = load(input_feature_path, map_location='cpu')
target_feature_path = self.target_feature_folder / input_feature_path.parts[-1]
target_feature = load(target_feature_path, map_location='cpu')
return input_feature.to(dtype=torch.float64), target_feature.to(dtype=torch.float64)
I set dtype torch float64 because it throws the same error while writing on the tensorboard summary writer.
Error Stack
Traceback (most recent call last):
File "student_audio_feature_extractor.py", line 178, in <module>
train(dt, input_frame)
File "student_audio_feature_extractor.py", line 164, in train
model, train_loss = train_step(model, train_loader, optimizer, criterion)
File "student_audio_feature_extractor.py", line 80, in train_step
for input_feature, target_feature in train_loader:
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
data = self._next_data()
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1313, in _next_data
return self._process_data(data)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
data.reraise()
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/_utils.py", line 543, in reraise
raise exception
RuntimeError: Caught RuntimeError in DataLoader worker process 4.
Original Traceback (most recent call last):
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 61, in fetch
return self.collate_fn(data)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 265, in default_collate
return collate(batch, collate_fn_map=default_collate_fn_map)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 143, in collate
return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility.
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 143, in <listcomp>
return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed] # Backwards compatibility.
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 120, in collate
return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
File "/home/visge/miniconda3/envs/zk_torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 162, in collate_tensor_fn
out = elem.new(storage).resize_(len(batch), *list(elem.size()))
RuntimeError: Trying to resize storage that is not resizable
I had a tensor of shape [] that's why it throws this error i changed it and it works now.
I cloned transfer-learning-library repo and working on maximum classifier discrepancy. I am trying to change the augmentation but getting the following error
Traceback (most recent call last):
File "mcd.py", line 378, in <module>
main(args)
File "mcd.py", line 145, in main
results = validate(val_loader, G, F1, F2, args)
File "mcd.py", line 290, in validate
for i, (images, target) in enumerate(val_loader):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 521, in __next__
data = self._next_data()
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
data.reraise()
File "/usr/local/lib/python3.7/dist-packages/torch/_utils.py", line 425, in reraise
raise self.exc_type(msg)
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "../../../common/vision/datasets/imagelist.py", line 48, in __getitem__
img = self.transform(img)
File "/usr/local/lib/python3.7/dist-packages/torchvision/transforms/transforms.py", line 60, in __call__
img = t(img)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torchvision/transforms/transforms.py", line 750, in forward
return F.perspective(img, startpoints, endpoints, self.interpolation, fill)
File "/usr/local/lib/python3.7/dist-packages/torchvision/transforms/functional.py", line 647, in perspective
return F_pil.perspective(img, coeffs, interpolation=pil_interpolation, fill=fill)
File "/usr/local/lib/python3.7/dist-packages/torchvision/transforms/functional_pil.py", line 289, in perspective
return img.transform(img.size, Image.PERSPECTIVE, perspective_coeffs, interpolation, **opts)
File "/usr/local/lib/python3.7/dist-packages/PIL/Image.py", line 2371, in transform
im = new(self.mode, size, fillcolor)
File "/usr/local/lib/python3.7/dist-packages/PIL/Image.py", line 2578, in new
return im._new(core.fill(mode, size, color))
TypeError: integer argument expected, got float
The previous code was
# Data loading code
normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
if args.center_crop:
train_transform = T.Compose([
ResizeImage(256),
T.CenterCrop(224),
T.RandomHorizontalFlip(),
T.ToTensor(),
normalize
])
else:
train_transform = T.Compose([
ResizeImage(256),
T.RandomResizedCrop(224),
T.RandomHorizontalFlip(),
T.ToTensor(),
normalize
])
val_transform = T.Compose([
ResizeImage(256),
T.CenterCrop(224),
T.ToTensor(),
normalize
])
I just added T.RandomPerspective(distortion_scale = 0.8, p=0.5, fill=0.6) for val_transform.
Before this I also added few other transforms for train_transform but still got the same error.
What could be the problem?
The fill argument needs to be an integer.
This transform does not support the fill parameter for Tensor types; therefore, if you wish to use the fill parameter, then you must use this transform before the ToTensor transform. At this point, the data is integral.
I have one input, and multiple outputs, like a multilabel classification, but I chose to try another approach to see if I have any improvements.
I have these generators, I'm using flow_from_dataframe because I have a huge dataset (200k):
self.train_generator = datagen.flow_from_dataframe(
dataframe=train,
directory='dataset',
x_col='Filename',
y_col=columns,
batch_size=BATCH_SIZE,
color_mode='rgb',
class_mode='raw',
shuffle=True,
target_size=(HEIGHT,WIDTH))
self.test_generator = datatest.flow_from_dataframe(
dataframe=test,
directory='dataset',
x_col='Filename',
y_col=columns,
batch_size=BATCH_SIZE,
color_mode='rgb',
class_mode='raw',
target_size=(HEIGHT,WIDTH))
I'm passing to fit using this function:
def generator(self, generator):
while True:
X, y = generator.next()
y = [y[:,x] for x in range(len(columns))]
yield X,[y]
If I fit like this:
self.h = self.model.fit_generator(self.generator(self.train_generator),
steps_per_epoch=self.STEP_SIZE_TRAIN,
validation_data=self.generator(self.test_generator),
validation_steps=self.STEP_SIZE_TEST,
epochs=50,
verbose = 1,
workers = 2,
)
I get :
RuntimeError: Your generator is NOT thread-safe. Keras requires a thread-safe generator when `use_multiprocessing=False, workers > 1`.
Using multiprocessing=True:
self.h = self.model.fit_generator(self.generator(self.train_generator),
steps_per_epoch=self.STEP_SIZE_TRAIN,
validation_data=self.generator(self.test_generator),
validation_steps=self.STEP_SIZE_TEST,
epochs=50,
verbose = 1,
workers = 2,
use_multiprocessing=True,
)
Results in:
File "C:\ProgramData\Anaconda3\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 877, in _run
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 867, in pool_fn
pool = get_pool_class(True)(
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 212, in __init__
self._repopulate_pool()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 326, in _repopulate_pool_static
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'generator' object
File "C:\ProgramData\Anaconda3\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 877, in _run
with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 867, in pool_fn
pool = get_pool_class(True)(
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 119, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 212, in __init__
self._repopulate_pool()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 303, in _repopulate_pool
return self._repopulate_pool_static(self._ctx, self.Process,
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 326, in _repopulate_pool_static
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'generator' object
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
Now I'm stuck, how to solve this?
According to documentation https://keras.io/api/preprocessing/image/
The argument class_mode can be set as "multi_output" so you don't need to create a custom generator:
class_mode: one of "binary", "categorical", "input", "multi_output", "raw", sparse" or None. Default: "categorical". Mode for yielding the targets:
- "binary": 1D numpy array of binary labels,
- "categorical": 2D numpy array of one-hot encoded labels. Supports multi-label output.
- "input": images identical to input images (mainly used to work with autoencoders),
- "multi_output": list with the values of the different columns,
- "raw": numpy array of values in y_col column(s),
- "sparse": 1D numpy array of integer labels,
- None, no targets are returned (the generator will only yield batches of image data, which is useful to use in model.predict()).
I am now being able to use workers > 1, but I am not having performance improvements.
I'm trying to carry out the tutorial named "Training a classifier" with PyTorch.
WHen trying to debug this part of the code :
import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
I get this error message :
Files already downloaded and verified Files already downloaded and verified
Files already downloaded and verified Files already downloaded and verified Traceback (most recent call last):
File "<string>", line 1, in <module>
File "D:\Anaconda\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "D:\Anaconda\lib\multiprocessing\spawn.py", line 114, in _main
prepare(preparation_data)
File "D:\Anaconda\lib\multiprocessing\spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\lib\multiprocessing\spawn.py", line 277, in
_fixup_main_from_path
run_name="__mp_main__")
File "D:\Anaconda\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "D:\Anaconda\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "D:\Anaconda\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "d:\Yggdrasil\Programmation\PyTorch\TutorialCIFAR10.py", line 36, in <module>
dataiter = iter(trainloader)
File "D:\Anaconda\lib\site-packages\torch\utils\data\dataloader.py", line 451, in __iter__
return _DataLoaderIter(self)
File "D:\Anaconda\lib\site-packages\torch\utils\data\dataloader.py", line 239, in __init__
w.start()
File "D:\Anaconda\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "D:\Anaconda\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "D:\Anaconda\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "D:\Anaconda\lib\multiprocessing\popen_spawn_win32.py", line 33, in __init__
prep_data = spawn.get_preparation_data(process_obj._name)
File "D:\Anaconda\lib\multiprocessing\spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "D:\Anaconda\lib\multiprocessing\spawn.py", line 136, in
_check_not_importing_main
is not going to be frozen to produce an executable.)
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
Traceback (most recent call last):
File "d:\Yggdrasil\Programmation\PyTorch\TutorialCIFAR10.py", line 36, in <module>
dataiter = iter(trainloader)
File "D:\Anaconda\lib\site-packages\torch\utils\data\dataloader.py", line 451, in __iter__
return _DataLoaderIter(self)
File "D:\Anaconda\lib\site-packages\torch\utils\data\dataloader.py", line 239, in __init__
w.start()
File "D:\Anaconda\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "D:\Anaconda\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj) File "D:\Anaconda\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "D:\Anaconda\lib\multiprocessing\popen_spawn_win32.py", line 65, in
__init__
reduction.dump(process_obj, to_child)
File "D:\Anaconda\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
BrokenPipeError: [Errno 32] Broken pipe
All the previous lines in the tutorial are working perfectly.
Does someone know how to solve this, please ?
Thanks a lot in advance
The question happened because Windows cannot run this DataLoader in 'num_workers' more than 0.
You can see where the trainloader come from.we can see
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
We need to change the 'num_workers' to 0.like this:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=0)
Every trainloaders need to change like this.
Got the same error. The following workaround works for me:
def run():
# code goes here
if __name__ == '__main__':
run()
This doesn't look to be a PyTorch problem. Try executing the code in Jupyter notebooks and other environment troubleshooting.
you need to add a if-clause protection as stated in the pytorch docs:
https://pytorch.org/docs/stable/notes/windows.html#usage-multiprocessing