not enough memory cpu Pytorch - pytorch

for epoch in range(max_epochs):
for i, local_sample in enumerate(training_generator):
I run the above code and I am getting a bug when loading the next batch in the dataloader, but it is not the first batch that causes the problem. I checked and the problem happens after 250 batches have been loaded. Is it possible to clear the old batches from memory so I do not get memory problems?
Traceback:
Input In [11], in train()
27 mode = 'loss'
29 for epoch in range(max_epochs):
---> 30 for i, local_sample in enumerate(training_generator):
31 batch_sample= local_sample['image'].to(device)
32 label_sample = local_sample['label'].to(device)
....
RuntimeError: [enforce fail at C:\cb\pytorch_1000000000000\work\c10\core\impl\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 4320000 bytes.
Here is the DataLoader:
class HRADataset(Dataset):
"""HRA dataset."""
def __init__(self, csv_file, image_dir, transform=None, target_transforms=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.labels = pd.read_csv(csv_file)
self.image_dir = image_dir
self.transform = transform
self.target_transforms = target_transforms
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_name = os.path.join(self.image_dir,
self.labels.iloc[idx, 0])
image = imageio.imread(img_name)
label = self.labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transforms:
label = self.target_transforms(label)
#label = self.labels.iloc[idx, 1]
sample = {'image': image, 'label': label}
return sample

Related

torch - subsample each dataset differently and concatenate them

I have two datasets, but one is larger than the other and I want to subsample it (resample in each epoch).
I probably cannot use dataloader argument sampler, as I would pass to Dataloader the already concatenated dataset.
How do I achieve this simply?
I think one solution would be to write a class SubsampledDataset(IterableDataset) which would resample every time __iter__ is called (each epoch).
(Or better use a map-style dataset, but is there a hook that gets called every epoch, like __iter__ gets?)
This is what I have so far (untested). Usage:
dataset1: Any = ...
# subsample original_dataset2, so that it is equally large in each epoch
dataset2 = RandomSampledDataset(original_dataset2, num_samples=len(dataset1))
concat_dataset = ConcatDataset([dataset1, dataset2])
data_loader = torch.utils.data.DataLoader(
concat_dataset,
sampler=RandomSamplerWithNewEpochHook(dataset2.new_epoch_hook, concat_dataset)
)
The result is that the concat_dataset will be shuffled each epoch (RandomSampler), in addition, the dataset2 component is a new sample of the (possibly larger) original_dataset2, different in each epoch.
You can add more datasets to be subsampled by doing instead of:
sampler=RandomSamplerWithNewEpochHook(dataset2.new_epoch_hook
this:
sampler=RandomSamplerWithNewEpochHook(lambda: dataset2.new_epoch_hook and dataset3.new_epoch_hook and dataset4.new_epoch_hook, ...
Code:
class RandomSamplerWithNewEpochHook(RandomSampler):
""" Wraps torch.RandomSampler and calls supplied new_epoch_hook before each epoch. """
def __init__(self, new_epoch_hook: Callable, data_source: Sized, replacement: bool = False,
num_samples: Optional[int] = None, generator=None):
super().__init__(data_source, replacement, num_samples, generator)
self.new_epoch_hook = new_epoch_hook
def __iter__(self):
self.new_epoch_hook()
return super().__iter__()
class RandomSampledDataset(Dataset):
""" Subsamples a dataset. The sample is different in each epoch.
This helps when concatenating datasets, as the subsampling rate can be different for each dataset.
Call new_epoch_hook before each epoch. (This can be done using e.g. RandomSamplerWithNewEpochHook.)
This would be arguably harder to achieve with a concatenated dataset and a sampler argument to Dataloader. The
sampler would have to be aware of the indices of subdatasets' items in the concatenated dataset, of the subsampling
for each subdataset."""
def __init__(self, dataset, num_samples, transform=lambda im: im):
self.dataset = dataset
self.transform = transform
self.num_samples = num_samples
self.sampler = RandomSampler(dataset, num_samples=num_samples)
self.current_epoch_samples = None
def new_epoch_hook(self):
self.current_epoch_samples = torch.tensor(iter(self.sampler), dtype=torch.int)
def __len__(self):
return self.num_samples
def __getitem__(self, item):
if item < 0 or item >= len(self):
raise IndexError
img = self.dataset[self.current_epoch_samples[item].item()]
return self.transform(img)
You can stop to iterate by raising StopIteration. This error is caught by Dataloader and simply stop the iteration. So you can do something like that:
class SubDataset(Dataset):
"""SubDataset class."""
def __init__(self, dataset, length):
self.dataset = dataset
self.elem = 0
self.length = length
def __getitem__(self, index):
self.elem += 1
if self.elem > self.length:
self.elem = 0
raise StopIteration # caught by DataLoader
return self.dataset[index]
def __len__(self):
return len(self.dataset)
if __name__ == '__main__':
torch.manual_seed(0)
dataloader = DataLoader(SubDataset(torch.arange(10), 5), shuffle=True)
for _ in range(3):
for x in dataloader:
print(x)
print(len(dataloader)) # 10!!
Output:
Note that setting __len__ to self.length will cause a problem because dataloader will use only indices between 0 and length-1 (that is not what you want). Unfortunately I found nothing to set the actually length without having this behaviour (due to Dataloader restriction). Thus be careful: len(dataset) is the original length and dataset.length is the new length.

Runtime error when computing the mean and std for image channel: RuntimeError: DataLoader worker (pid(s) 8780,13240,13944,14252) exited unexpectedly

I am writing a custom dataloader for a deep learning model using Pytorch and I am trying to figure out the mean and std values so that I can input them into the transforms.Normalize(mean=[], std=[]) function.
However, when I execute the code, I keep running into the following error:
RuntimeError: DataLoader worker (pid(s) 8780, 13240, 13944, 14252) exited unexpectedly
This is the entire code I am using:
device = torch.device('cuda')
num_workers = 4
batch_size = 8
class ImageData(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.filenames = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.filenames)
def __getitem__(self, index):
image_path = os.path.join(self.root_dir, self.filenames.iloc[index, 0])
image = io.imread(image_path)
sample = {"image": image}
if self.transform:
sample = self.transform(sample)
return sample
dataset = ImageData(csv_file="archive/names.csv",
root_dir="archive/data")
dataloader = DataLoader(dataset,
batch_size = batch_size,
shuffle=False,
num_workers=num_workers,
pin_memory=True)
def show_image(image):
plt.imshow(image)
##### COMPUTING MEAN AND STANDARD DEVIATION FOR THE DATASET #####
psum = torch.tensor([0.0, 0.0, 0.0])
psum_sq = torch.tensor([0.0, 0.0, 0.0])
# Loop through images
for inputs in tqdm(dataloader):
psum += inputs.sum(axis = [0, 2, 3])
psum_sq += (inputs**2).sum(axis = [0, 2, 3])
##### PLOTTING IMAGES #####
fig = plt.figure()
for i in range(len(dataset)):
sample = dataset[i]
print(i, sample['image'].shape)
ax = plt.subplot(2, 2, i + 1)
plt.tight_layout()
ax.set_title('Sample #{}'.format(i))
ax.axis('off')
show_image(**sample)
if i == 3:
plt.show()
break
I am using the code presented in the following website if you may need additional context.
How can I fix this issue?

Pytorch Problem with Custom Dataset Class

First, I made a custom dataset to load in images from my dataframe (containing the image filepath and corresponding int label):
class Dataset(torch.utils.data.Dataset):
def __init__(self, dataframe, transform=None):
self.frame = dataframe
self.transform = transform
def __len__(self):
return len(self.frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
filename = self.frame.iloc[idx, 0]
image = torch.from_numpy(io.imread(filename).transpose((2, 0, 1))).float()
label = self.frame.iloc[idx, 1]
sample = {'image': image, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
Then, I use pre-existing model architecture like so:
model = models.densenet161()
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 10) # where 10 is my number of classes
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
Finally, for training, I do the following:
model.train() # switch to train mode
for epoch in range(5):
for i, sample in enumerate(train_set): # where train_set is an instance of my Dataset class
optimizer.zero_grad()
image, label = sample['image'].unsqueeze(0), torch.Tensor(sample['label']).long()
output = model(image)
loss = criterion(output, label)
loss.backward()
optimizer.step()
However, I am experiencing errors with loss = criterion(output, label). It tells me that ValueError: Expected input batch_size (1) to match target batch_size (2).. Can someone teach me how to properly use a custom dataset, especially with loading in batches of data? Also, why am I experiencing that ValueError? Thank you!
please check the following lines:
label = self.frame.iloc[idx, 1] in dataset defination, you may print this to re-check, is this return two int
image, label = sample['image'].unsqueeze(0), torch.Tensor(sample['label']).long() in training code, you need to check the shape of the tensor

Keras : using generators to output trainingset batches and targets but also auxiliary data not used for training

I need to use generators (because of too large datasets) to yield training data and targets to a CNN for training. However, each data sample is normalized (/maxVal) and I need to un-normalize/de-normalize it just before the loss function. I don't know how to output this auxiliary data at the same time as a batch of (X,Y) from the generator?
It is something very similar to https://towardsdatascience.com/keras-data-generators-and-how-to-use-them-b69129ed779c :
import numpy as np
import cv2
from tensorflow.keras.utils import Sequence
class DataGenerator(Sequence):
"""Generates data for Keras
Sequence based data generator. Suitable for building data generator for training and prediction.
"""
def __init__(self, list_IDs, labels, image_path, mask_path,
to_fit=True, batch_size=32, dim=(256, 256),
n_channels=1, n_classes=10, shuffle=True):
"""Initialization
:param list_IDs: list of all 'label' ids to use in the generator
:param labels: list of image labels (file names)
:param image_path: path to images location
:param mask_path: path to masks location
:param to_fit: True to return X and y, False to return X only
:param batch_size: batch size at each iteration
:param dim: tuple indicating image dimension
:param n_channels: number of image channels
:param n_classes: number of output masks
:param shuffle: True to shuffle label indexes after every epoch
"""
self.list_IDs = list_IDs
self.labels = labels
self.image_path = image_path
self.mask_path = mask_path
self.to_fit = to_fit
self.batch_size = batch_size
self.dim = dim
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
"""Denotes the number of batches per epoch
:return: number of batches per epoch
"""
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
"""Generate one batch of data
:param index: index of the batch
:return: X and y when fitting. X only when predicting
"""
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X = self._generate_X(list_IDs_temp)
if self.to_fit:
y = self._generate_y(list_IDs_temp)
return X/np.max(X), y/np.max(y)
else:
return X
def on_epoch_end(self):
"""Updates indexes after each epoch
"""
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def _generate_X(self, list_IDs_temp):
"""Generates data containing batch_size images
:param list_IDs_temp: list of label ids to load
:return: batch of images
"""
# Initialization
X = np.empty((self.batch_size, *self.dim, self.n_channels))
# Generate data
for i, ID in enumerate(list_IDs_temp):
# Store sample
X[i,] = self._load_grayscale_image(self.image_path + self.labels[ID])
return X
def _generate_y(self, list_IDs_temp):
"""Generates data containing batch_size masks
:param list_IDs_temp: list of label ids to load
:return: batch if masks
"""
y = np.empty((self.batch_size, *self.dim), dtype=int)
# Generate data
for i, ID in enumerate(list_IDs_temp):
# Store sample
y[i,] = self._load_grayscale_image(self.mask_path + self.labels[ID])
return y
def _load_grayscale_image(self, image_path):
"""Load grayscale image
:param image_path: path to image to load
:return: loaded image
"""
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = img / 255
return img
So, if I have understood your need correctly, what you need to do:
Fit a MinMaxScaler on your whole target (y) dataset (if possible)
For each batch
Scale your batch's targets
Yield your batch's targets
Create a custom loss function that takes your scaler as an argument
Call your scaler's inverse_transform on your y_true and y_pred in your custom loss
Call your favorite loss function on your de-normalized y_true and y_pred and return its value

Training custom dataset in TensorFlow gives error

I want to perform image classification on my custom dataset with TensorFlow. I have imported my own dataset but stuck at the training step (not sure if it imports the complete dataset or a single batch of 50 images although list contains all file names).
Dataset Info: image resolution = 88*128 (single channel), batch size = 50.
Here is the list of operations I want to perform:
Import complete dataset (change in code if it only creates a batch of 50 images)
Train the model using my own dataset (train Images and test Images)
Proper way of creating batches.
Here is the complete code, so far:
import tensorflow as tf
import os
def init_weights(shape):
init_random_dist = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(init_random_dist)
def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape=shape)
return tf.Variable(init_bias_vals)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2by2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def convolutional_layer(input_x, shape):
W = init_weights(shape)
b = init_bias([shape[3]])
return tf.nn.relu(conv2d(input_x, W) + b)
def normal_full_layer(input_layer, size):
input_size = int(input_layer.get_shape()[1])
W = init_weights([input_size, size])
b = init_bias([size])
return tf.matmul(input_layer, W) + b
def get_labels(path):
return os.listdir(path)
def files_list(path):
return [val for sublist in [[os.path.join(j) for j in i[2]] for i in os.walk(path)] for val in sublist]
def image_tensors(filesQueue):
reader = tf.WholeFileReader()
filename, content = reader.read(filesQueue)
image = tf.image.decode_jpeg(content, channels=1)
image = tf.cast(image, tf.float32)
resized_image = tf.image.resize_images(image, [88, 128])
return resized_image
path = './data/train'
trainLabels = get_labels(path)
trainingFiles = files_list(path)
trainQueue = tf.train.string_input_producer(trainingFiles)
trainBatch = tf.train.batch([image_tensors(trainQueue)], batch_size=50)
# ^^^^^^^^ a complete dataset or only a single batch? How to check?
path = './data/test'
testLabels = get_labels(path)
testingFiles = files_list(path)
testQueue = tf.train.string_input_producer(testingFiles)
testBatch = tf.train.batch([image_tensors(testQueue)], batch_size=50)
# ^^^^^^^ same here
x = tf.placeholder(tf.float32,shape=[88, 128])
y_true = tf.placeholder(tf.float32,shape=[None,len(trainLabels)])
x_image = tf.reshape(x,[-1,88,128,1])
convo_1 = convolutional_layer(x_image,shape=[6,6,1,32])
convo_1_pooling = max_pool_2by2(convo_1)
convo_2 = convolutional_layer(convo_1_pooling,shape=[6,6,32,64])
convo_2_pooling = max_pool_2by2(convo_2)
convo_2_flat = tf.reshape(convo_2_pooling,[-1,22*32*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)
y_pred = normal_full_layer(full_one_dropout,10)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)
init = tf.global_variables_initializer()
steps = 4000
with tf.Session() as sess:
sess.run(init)
for i in range(steps):
batch_x , batch_y = tf.train.batch(trainBatch, batch_size=50)
# ^^^^^^^^^^^ Error
sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
if i%400 == 0:
print('Currently on step {}'.format(i))
print('Accuracy is:')
matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
acc = tf.reduce_mean(tf.cast(matches,tf.float32))
print(sess.run(acc,feed_dict={x:testBatch,y_true:testLabels,hold_prob:1.0}))
# ^^^^^^^^^^^^ Test Images?
print('\n')
This is the error I get:
TypeError Traceback (most recent call last)
<ipython-input-24-5d0dac5724cd> in <module>()
5 sess.run(init)
6 for i in range(steps):
----> 7 batch_x , batch_y = tf.train.batch([trainBatch], batch_size=50)
8 sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
9
c:\users\TF_User\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in __iter__(self)
503 TypeError: when invoked.
504 """
--> 505 raise TypeError("'Tensor' object is not iterable.")
506
507 def __bool__(self):
TypeError: 'Tensor' object is not iterable.
It seems like casting wrong type instead of Tensor or a List but can't figure out. Kindly, correct the issue and help me above listed issues.
It looks like you are using an unnecessary second call of tf.train.batch.
Generally you would do something like:
...
images, labels = tf.train.batch([images, labels], batch_size=50)
with tf.Session() as sess:
sess.run(init)
for i in range(steps):
sess.run(train, feed_dict={x:images,y_true:labels,hold_prob:0.5})
...
I think that TensorFlow: does tf.train.batch automatically load the next batch when the batch has finished training? should give you a better understanding of what tf.train.batch is doing and how it is used. Also the documentation on Reading Data should help too.

Resources