Image augmentation in Pytorch - pytorch

I like to augment image alternately.
I have pytorch transform code as follows.
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
#Data transform (normalization & data augmentation)
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_tfms = tt.Compose([tt.RandomCrop(32, padding = 4, padding_mode = 'reflect'),
tt.RandomHorizontalFlip(),
tt.RandomAffine(degrees=(10, 30),
translate=(0.1, 0.3),
scale=(0.7, 1.3),
shear=0.1,
resample=Image.BICUBIC)
tt.ToTensor(),
tt.Normalize(*stats)])
When I create dataset as follow and do training, all images will be augmented.
train_ds = ImageFolder('content/train', train_tfms)
But I want alternately. First image, just train as original image. But the next image is augmented.
How can I do that?

From a single dataset you can create two datasets one with augmentation and the other without, and then concatenate them. The order is going to be kept since we are using the subdataset pytorch class which will handle this for us.
train_ds_no_aug = ImageFolder('content/train')
train_ds_aug = ImageFolder('content/train', train_tfms)
# Check that aug_idx and no_aug_idx are not overlapping
aug_idx = torch.arange(1, len(train_ds_no_aug), 2)
no_aug_idx = torch.arange(0, len(train_ds_no_aug), 2)
train_ds_no_aug = torch.utils.data.Subset(train_ds_no_aug, no_aug_idx)
train_ds_aug = torch.utils.data.Subset(train_ds_aug, aug_idx)
train_ds = torch.utils.data.ChainDataset([train_ds_no_aug, train_ds_aug])
# Done :=

Related

What I missing here, using ImageFolder to get the full folder name as labels for MNIST-double dataset images?

I would like to use dataset.ImageFolder to create an Image Dataset.
My current image directory structure looks like this:
1: In train images, I have subfolders which are my labels contain 00, 01, and so on. In each folder, images contain double digits corresponding to each label
Here is the code I used followed by the output where the labels does not. match with the images
paths here
data_dir = "/home/mhamdan/hamdan/MNIST_muldigits/data/double_mnist"
train_dir = data_dir + '/train' # training_set contains training dataset
val_dir = data_dir + '/val' #contains validation dataset
test_dir = data_dir + '/test' #contains test dataset
Loading the data here
#Load the dataset with Image Folder
trainset = datasets.ImageFolder(train_dir, transform = transformation)
valset = datasets.ImageFolder(val_dir, transform = transformation)
testset = datasets.ImageFolder(test_dir, transform = transformation)
Data loaders
#define data loaders
batch_size = 32
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True,num_workers=2)
val_loader = DataLoader(valset, batch_size=batch_size, shuffle=True,num_workers=2)
test_loader = DataLoader(testset, batch_size=batch_size,num_workers=1)
Here is the plotting of random training images
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)
import matplotlib.pyplot as plt
fig = plt.figure()
for i in range(6):
plt.subplot(2,3,i+1)
plt.tight_layout()
plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
plt.title("Ground Truth: {}".format(example_targets[1]))
plt.xticks([])
plt.yticks([])
fig
As you see here, the labels are different than images
labels differ than images
Each subfolders contains a unique label associated with a label
here the images in 01 subdirectory
Last update after using the index.
I think the problem is in printing the labels,
Here is the plotting of random training images
For this, the code should be,
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)
import matplotlib.pyplot as plt
fig = plt.figure()
for i in range(6):
plt.subplot(2,3,i+1)
plt.tight_layout()
plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
plt.title("Ground Truth: {}".format(example_targets[i]))
plt.xticks([])
plt.yticks([])
fig
In your code, it was example_targets[1]), instead of i.
Here is the solution to my question by taking the indexes as dictionary labelsdec = trainset.class_to_idx and by extracting the keys as labels/classes using this functions
def getList(dict):
list = []
for key in dict.keys():
list.append(key)
return list
def getList(dict):
list = []
for key in dict.keys():
list.append(key)
return list
classes = getList(labelsdec)
Thin plotting 10 images:
def imshow(img):
img = img / 2 + 0.5 # unnormalize
plt.imshow(np.transpose(img, (1, 2, 0))) # convert from Tensor image
# obtain one batch of training images
data_iter = iter(train_loader)
images, lbls = data_iter.next()
images = images.numpy() # convert images to numpy for display
# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(10, 4))
# display 20 images
for idx in np.arange(10):
ax = fig.add_subplot(2, 10/2, idx+1, xticks=[], yticks=[])
imshow(images[idx])
label = lbls[idx]
ax.set_title(classes[lbls[idx]])
Here is how it looks see image

How to calculate the mean and the std of cifar10 data

Pytorch is using the following values as the mean and std for the cifar10 data:
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
I need to understand the concept behind calculating it because this data is 3 channel image and I do not understand what is summed and divided over what and so on.
Also if someone can share a code for calculating the mean and the std, would be so thankful.
The 0.5 values are just approximates for cifar10 mean and std values over the three channels (r,g,b). The precise values for cifar10 train set are
mean: 0.49139968, 0.48215827 ,0.44653124
std: 0.24703233 0.24348505 0.26158768
You may calculate these using the following script:
import torch
import numpy
import torchvision.datasets as datasets
from torchvision import transforms
cifar_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
imgs = [item[0] for item in cifar_trainset] # item[0] and item[1] are image and its label
imgs = torch.stack(imgs, dim=0).numpy()
# calculate mean over each channel (r,g,b)
mean_r = imgs[:,0,:,:].mean()
mean_g = imgs[:,1,:,:].mean()
mean_b = imgs[:,2,:,:].mean()
print(mean_r,mean_g,mean_b)
# calculate std over each channel (r,g,b)
std_r = imgs[:,0,:,:].std()
std_g = imgs[:,1,:,:].std()
std_b = imgs[:,2,:,:].std()
print(std_r,std_g,std_b)
Also, you may find the same mean and std values here and here
Alternative way
from torchvision import datasets
cifar_trainset = datasets.CIFAR10(root='./data', train=True, download=True )
data = cifar_trainset.data / 255 # data is numpy array
mean = data.mean(axis = (0,1,2))
std = data.std(axis = (0,1,2))
print(f"Mean : {mean} STD: {std}") #Mean : [0.491 0.482 0.446] STD: [0.247 0.243 0.261]
The previous answers either relied on loading each image individually (which is incredibly slow) or assuming that the dataset saves the data as one large numpy array.
A more general and fast approach is to use a PyTorch DataLoader.
def get_mean_std(trainLoader):
imgs = None
for batch in trainLoader:
image_batch = batch[0]
if imgs is None:
imgs = image_batch.cpu()
else:
imgs = torch.cat([imgs, image_batch.cpu()], dim=0)
imgs = imgs.numpy()
# calculate mean over each channel (r,g,b)
mean_r = imgs[:,0,:,:].mean()
mean_g = imgs[:,1,:,:].mean()
mean_b = imgs[:,2,:,:].mean()
print(mean_r,mean_g,mean_b)
# calculate std over each channel (r,g,b)
std_r = imgs[:,0,:,:].std()
std_g = imgs[:,1,:,:].std()
std_b = imgs[:,2,:,:].std()
print(std_r,std_g,std_b)

Augmenting both X and Y images with Keras

I know how to use the ImageDataGenerator to augment my data by translating, flipping, rotationg, shearing, etc. The question is let's say that I have both a training image and the corresponding segmentation images and I would like to augment both of these images. For example if I rotated a training image by 45 degrees then I would also like to augment the segmentation image by 45 degrees. In essence I want to perform the identical set of transforms to two data sets. Is that possible to do with ImageDataGenerator, or do I have to write all the augmentation functions from scratch? Thanks very much in advance.
You can use augmentations in tf.data.Dataset.map and return the image twice. I don't know of any way to do this with ImageDataGenerator.
import tensorflow as tf
import matplotlib.pyplot as plt
from skimage import data
cats = tf.concat([data.chelsea()[None, ...] for i in range(24)], axis=0)
test = tf.data.Dataset.from_tensor_slices(cats)
def augment(image):
image = tf.cast(x=image, dtype=tf.float32)
image = tf.divide(x=image, y=tf.constant(255.))
image = tf.image.random_hue(image=image, max_delta=5e-1)
image = tf.image.random_brightness(image=image, max_delta=2e-1)
return image, image
test = test.batch(1).map(augment)
fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images = next(iter(test.take(1)))
for index, image in enumerate(images):
ax = plt.subplot(1, 2, index + 1)
ax.set_xticks([])
ax.set_yticks([])
ax.imshow(tf.clip_by_value(tf.squeeze(image), clip_value_min=0, clip_value_max=1))
plt.show()

How to get a specific sample from pytorch DataLoader?

In Pytorch, is there any way of loading a specific single sample using the torch.utils.data.DataLoader class? I'd like to do some testing with it.
The tutorial uses
trainloader = torch.utils.data.DataLoader(...)
images, labels = next(iter(trainloader))
to fetch a random batch of samples. Is there are way, using DataLoader, to get a specific sample?
Cheers
Turn off the shuffle in DataLoader
Use batch_size to calculate the batch in which the desired sample you are looking for falls in
Iterate to the desired batch
Code
import torch
import numpy as np
import itertools
X= np.arange(100)
batch_size = 2
dataloader = torch.utils.data.DataLoader(X, batch_size=batch_size, shuffle=False)
sample_at = 5
k = int(np.floor(sample_at/batch_size))
my_sample = next(itertools.islice(dataloader, k, None))
print (my_sample)
Output:
tensor([4, 5])
if you want to get a specific signle sample from your dataset you can
you should check Subset class.(https://pytorch.org/docs/stable/data.html#torch.utils.data.Subset)
something like this:
indices = [0,1,2] # select your indices here as a list
subset = torch.utils.data.Subset(train_set, indices)
trainloader = DataLoader(subset , batch_size = 16 , shuffle =False) #set shuffle to False
for image , label in trainloader:
print(image.size() , '\t' , label.size())
print(image[0], '\t' , label[0]) # index the specific sample
here is a useful link if you want to learn more about the Pytorch data loading utility
(https://pytorch.org/docs/stable/data.html)

Keras Image Preprocessing

My training images are downscaled versions of their associated HR image. Thus, the input and the output images aren't the same dimension. For now, I'm using a hand-crafted sample of 13 images, but eventually I would like to be able to use my 500-ish HR (high-resolution) images dataset. This dataset, however, does not have images of the same dimension, so I'm guessing I'll have to crop them in order to obtain a uniform dimension.
I currently have this code set up: it takes a bunch of 512x512x3 images and applies a few transformations to augment the data (flips). I thus obtain a basic set of 39 images in their HR form, and then I downscale them by a factor of 4, thus obtaining my trainset which consits of 39 images of dimension 128x128x3.
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.image as mpimg
import skimage
from skimage import transform
from constants import data_path
from constants import img_width
from constants import img_height
from model import setUpModel
def setUpImages():
train = []
finalTest = []
sample_amnt = 11
max_amnt = 13
# Extracting images (512x512)
for i in range(sample_amnt):
train.append(mpimg.imread(data_path + str(i) + '.jpg'))
for i in range(max_amnt-sample_amnt):
finalTest.append(mpimg.imread(data_path + str(i+sample_amnt) + '.jpg'))
# # TODO: https://keras.io/preprocessing/image/
# ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False,
# samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-06, rotation_range=0,
# width_shift_range=0.0, height_shift_range=0.0, brightness_range=None, shear_range=0.0,
# zoom_range=0.0, channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=False,
# vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None,
# validation_split=0.0, dtype=None)
# Augmenting data
trainData = dataAugmentation(train)
testData = dataAugmentation(finalTest)
setUpData(trainData, testData)
def setUpData(trainData, testData):
# print(type(trainData)) # <class 'numpy.ndarray'>
# print(len(trainData)) # 64
# print(type(trainData[0])) # <class 'numpy.ndarray'>
# print(trainData[0].shape) # (1400, 1400, 3)
# print(trainData[len(trainData)//2-1].shape) # (1400, 1400, 3)
# print(trainData[len(trainData)//2].shape) # (350, 350, 3)
# print(trainData[len(trainData)-1].shape) # (350, 350, 3)
# TODO: substract mean of all images to all images
# Separating the training data
Y_train = trainData[:len(trainData)//2] # First half is the unaltered data
X_train = trainData[len(trainData)//2:] # Second half is the deteriorated data
# Separating the testing data
Y_test = testData[:len(testData)//2] # First half is the unaltered data
X_test = testData[len(testData)//2:] # Second half is the deteriorated data
# Adjusting shapes for Keras input # TODO: make into a function ?
X_train = np.array([x for x in X_train])
Y_train = np.array([x for x in Y_train])
Y_test = np.array([x for x in Y_test])
X_test = np.array([x for x in X_test])
# # Sanity check: display four images (2x HR/LR)
# plt.figure(figsize=(10, 10))
# for i in range(2):
# plt.subplot(2, 2, i + 1)
# plt.imshow(Y_train[i], cmap=plt.cm.binary)
# for i in range(2):
# plt.subplot(2, 2, i + 1 + 2)
# plt.imshow(X_train[i], cmap=plt.cm.binary)
# plt.show()
setUpModel(X_train, Y_train, X_test, Y_test)
# TODO: possibly remove once Keras Preprocessing is integrated?
def dataAugmentation(dataToAugment):
print("Starting to augment data")
arrayToFill = []
# faster computation with values between 0 and 1 ?
dataToAugment = np.divide(dataToAugment, 255.)
# TODO: switch from RGB channels to CbCrY
# # TODO: Try GrayScale
# trainingData = np.array(
# [(cv2.cvtColor(np.uint8(x * 255), cv2.COLOR_BGR2GRAY) / 255).reshape(350, 350, 1) for x in trainingData])
# validateData = np.array(
# [(cv2.cvtColor(np.uint8(x * 255), cv2.COLOR_BGR2GRAY) / 255).reshape(1400, 1400, 1) for x in validateData])
# adding the normal images (8)
for i in range(len(dataToAugment)):
arrayToFill.append(dataToAugment[i])
# vertical axis flip (-> 16)
for i in range(len(arrayToFill)):
arrayToFill.append(np.fliplr(arrayToFill[i]))
# horizontal axis flip (-> 32)
for i in range(len(arrayToFill)):
arrayToFill.append(np.flipud(arrayToFill[i]))
# downsizing by scale of 4 (-> 64 images of 128x128x3)
for i in range(len(arrayToFill)):
arrayToFill.append(skimage.transform.resize(
arrayToFill[i],
(img_width/4, img_height/4),
mode='reflect',
anti_aliasing=True))
# # Sanity check: display the images
# plt.figure(figsize=(10, 10))
# for i in range(64):
# plt.subplot(8, 8, i + 1)
# plt.imshow(arrayToFill[i], cmap=plt.cm.binary)
# plt.show()
return np.array(arrayToFill)
My question is: in my case, can I use the Preprocessing tool that Keras offers? I would ideally like to be able to input my varying sized images of high quality, crop them (not downsize them) to 512x512x3, and data augment them through flips and whatnot. Substracting the mean would also be part of what I'd like to achieve. That set would represent my validation set.
Reusing the validation set, I want to downscale by a factor of 4 all the images, and that would generate my training set.
Those two sets could then be split appropriately to obtain, ultimately, the famous X_train Y_train X_test Y_test.
I'm just hesitant about throwing out all the work I've done so far to preprocess my mini sample, but I'm thinking if it can all be done with a single built-in function, maybe I should give that a go.
This is my first ML project, hence me not understanding very well Keras, and the documentation isn't always the clearest. I'm thinking that the fact that I'm working with a X and Y that are different in size, maybe this function doesn't apply to my project.
Thank you! :)
Yes you can use keras preprocessing function. Below some snippets to help you...
def cropping_function(x):
...
return cropped_image
X_image_gen = ImageDataGenerator(preprocessing_function = cropping_function,
horizontal_flip = True,
vertical_flip=True)
X_train_flow = X_image_gen.flow(X_train, batch_size = 16, seed = 1)
Y_image_gen = ImageDataGenerator(horizontal_flip = True,
vertical_flip=True)
Y_train_flow = Y_image_gen.flow(y_train, batch_size = 16, seed = 1)
train_flow = zip(X_train_flow,Y_train_flow)
model.fit_generator(train_flow)
Christof Henkel's suggestion is very clean and nice. I would just like to offer another way to do it using imgaug, a convenient way to augment images in lots of different ways. It's usefull if you want more implemented augmentations or if you ever need to use some ML library other than Keras.
It unfortunatly doesn't have a way to make crops that way but it allows implementing custom functions. Here is an example function for generating random crops of a set size from an image that's at least as big as the chosen crop size:
from imgaug import augmenters as iaa
def random_crop(images, random_state, parents, hooks):
crop_h, crop_w = 128, 128
new_images = []
for img in images:
if (img.shape[0] >= crop_h) and (img.shape[1] >= crop_w):
rand_h = np.random.randint(0, img.shape[0]-crop_h)
rand_w = np.random.randint(0, img.shape[1]-crop_w)
new_images.append(img[rand_h:rand_h+crop_h, rand_w:rand_w+crop_w])
else:
new_images.append(np.zeros((crop_h, crop_w, 3)))
return np.array(new_images)
def keypoints_dummy(keypoints_on_images, random_state, parents, hooks):
return keypoints_on_images
cropper = iaa.Lambda(func_images=random_crop, func_keypoints=keypoints_dummy)
You can then combine this function with any other builtin imgaug function, for example the flip functions that you're already using like this:
seq = iaa.Sequential([cropper, iaa.Fliplr(0.5), iaa.Flipud(0.5)])
This function could then generate lots of different crops from each image. An example image with some possible results (note that it would result in actual (128, 128, 3) images, they are just merged into one image here for visualization):
Your image set could then be generated by:
crops_per_image = 10
images = [skimage.io.imread(path) for path in glob.glob('train_data/*.jpg')]
augs = np.array([seq.augment_image(img)/255 for img in images for _ in range(crops_per_image)])
It would also be simple to add new functions to be applied to the images, for example the remove mean functions you mentioned.
Here's another way performing random and center crop before resizing using native ImageDataGenerator and flow_from_directory. You can add it as preprocess_crop.py module into your project.
It first resizes image preserving aspect ratio and then performs crop. Resized image size is based on crop_fraction which is hardcoded but can be changed. See crop_fraction = 0.875 line where 0.875 appears to be the most common, e.g. 224px crop from 256px image.
Note that the implementation has been done by monkey patching keras_preprocessing.image.utils.loag_img function as I couldn't find any other way to perform crop before resizing without rewriting many other classes above.
Due to these limitations, the cropping method is enumerated into the interpolation field. Methods are delimited by : where the first part is interpolation and second is crop e.g. lanczos:random. Supported crop methods are none, center, random. When no crop method is specified, none is assumed.
How to use it
Just drop the preprocess_crop.py into your project to enable cropping. The example below shows how you can use random cropping for the training and center cropping for validation:
import preprocess_crop
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import preprocess_input
#...
# Training with random crop
train_datagen = ImageDataGenerator(
rotation_range=20,
channel_shift_range=20,
horizontal_flip=True,
preprocessing_function=preprocess_input
)
train_img_generator = train_datagen.flow_from_directory(
train_dir,
target_size = (IMG_SIZE, IMG_SIZE),
batch_size = BATCH_SIZE,
class_mode = 'categorical',
interpolation = 'lanczos:random', # <--------- random crop
shuffle = True
)
# Validation with center crop
validate_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input
)
validate_img_generator = validate_datagen.flow_from_directory(
validate_dir,
target_size = (IMG_SIZE, IMG_SIZE),
batch_size = BATCH_SIZE,
class_mode = 'categorical',
interpolation = 'lanczos:center', # <--------- center crop
shuffle = False
)
Here's preprocess_crop.py file to include with your project:
import random
import keras_preprocessing.image
def load_and_crop_img(path, grayscale=False, color_mode='rgb', target_size=None,
interpolation='nearest'):
"""Wraps keras_preprocessing.image.utils.loag_img() and adds cropping.
Cropping method enumarated in interpolation
# Arguments
path: Path to image file.
color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb".
The desired image format.
target_size: Either `None` (default to original size)
or tuple of ints `(img_height, img_width)`.
interpolation: Interpolation and crop methods used to resample and crop the image
if the target size is different from that of the loaded image.
Methods are delimited by ":" where first part is interpolation and second is crop
e.g. "lanczos:random".
Supported interpolation methods are "nearest", "bilinear", "bicubic", "lanczos",
"box", "hamming" By default, "nearest" is used.
Supported crop methods are "none", "center", "random".
# Returns
A PIL Image instance.
# Raises
ImportError: if PIL is not available.
ValueError: if interpolation method is not supported.
"""
# Decode interpolation string. Allowed Crop methods: none, center, random
interpolation, crop = interpolation.split(":") if ":" in interpolation else (interpolation, "none")
if crop == "none":
return keras_preprocessing.image.utils.load_img(path,
grayscale=grayscale,
color_mode=color_mode,
target_size=target_size,
interpolation=interpolation)
# Load original size image using Keras
img = keras_preprocessing.image.utils.load_img(path,
grayscale=grayscale,
color_mode=color_mode,
target_size=None,
interpolation=interpolation)
# Crop fraction of total image
crop_fraction = 0.875
target_width = target_size[1]
target_height = target_size[0]
if target_size is not None:
if img.size != (target_width, target_height):
if crop not in ["center", "random"]:
raise ValueError('Invalid crop method {} specified.', crop)
if interpolation not in keras_preprocessing.image.utils._PIL_INTERPOLATION_METHODS:
raise ValueError(
'Invalid interpolation method {} specified. Supported '
'methods are {}'.format(interpolation,
", ".join(keras_preprocessing.image.utils._PIL_INTERPOLATION_METHODS.keys())))
resample = keras_preprocessing.image.utils._PIL_INTERPOLATION_METHODS[interpolation]
width, height = img.size
# Resize keeping aspect ratio
# result shold be no smaller than the targer size, include crop fraction overhead
target_size_before_crop = (target_width/crop_fraction, target_height/crop_fraction)
ratio = max(target_size_before_crop[0] / width, target_size_before_crop[1] / height)
target_size_before_crop_keep_ratio = int(width * ratio), int(height * ratio)
img = img.resize(target_size_before_crop_keep_ratio, resample=resample)
width, height = img.size
if crop == "center":
left_corner = int(round(width/2)) - int(round(target_width/2))
top_corner = int(round(height/2)) - int(round(target_height/2))
return img.crop((left_corner, top_corner, left_corner + target_width, top_corner + target_height))
elif crop == "random":
left_shift = random.randint(0, int((width - target_width)))
down_shift = random.randint(0, int((height - target_height)))
return img.crop((left_shift, down_shift, target_width + left_shift, target_height + down_shift))
return img
# Monkey patch
keras_preprocessing.image.iterator.load_img = load_and_crop_img

Resources