How can I iterate over all the batches from DataLoader? - pytorch

I wanted to iterate over all the batches and save the images but with this process its saving only images of the first batch
for batch_idx, (test_data, test_targets) in enumerate(test_loader):
for i in range(0, test_loader.batch_size-1):
img = np.array(test_data[i][0])*255
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
low_black = np.array([0,0,0])
high_black = np.array([360,255,0])
mask = cv2.inRange(hsv, low_black, high_black)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img[mask>0]=random.choice(list(color_dict.values()))
cv2.imwrite(f'/content/test_data/{test_targets[i].item()}_{i+1}.png', img)

Since i is start from 0 to batch_size at every batch so the saved names are duplicated. One common way to solve it is using count:
count = 0 # here
for batch_idx, (test_data, test_targets) in enumerate(test_loader):
for i in range(0, test_loader.batch_size-1):
img = np.array(test_data[i][0])*255
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
low_black = np.array([0,0,0])
high_black = np.array([360,255,0])
mask = cv2.inRange(hsv, low_black, high_black)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img[mask>0]=random.choice(list(color_dict.values()))
cv2.imwrite(f'/content/test_data/{test_targets[i].item()}_{count}.png', img)
count+=1 # plus one every sample

The name of the saved images only depends on i and test_targets[i]
cv2.imwrite(f'/content/test_data/{test_targets[i].item()}_{i+1}.png', img)
So maybe all data are browsed but are overwritten batches after batches. It would be typically the case if test_targets[i] are redundant over batches for some fixed i.
To fix it, you can put batch_idx (that is unused here!) in the title too.

Related

draw_bounding_box function of pytorch not completing bounding boxes

I am working with torchvision.utils function draw_bounding_boxes to draw boxes on the image as a visualization. I confirmed that boxes fed to the function are of form (xmin, ymin, xmax, ymax), but the boxes produced lack lines on the boundary. See fig below:
Can anyone please tell me what would be possible problem here. The code is as given below:
DATA_DIR = "../Downloads/" if not torch.cuda.is_available() else "../ssd/PascalVOC"
batch_size = 1
image_dim = (512, 512) # height, width
if device == "cuda":
datagen = VOCLoader(rootDir=DATA_DIR, target_transform=VOCAnnotationTransform,
imSize=image_dim, split='train', scale=True, falseSamplePercentage=100,
random_flip=True, boxErrorPercentage=30, random_sampler=0)
val_datagen = VOCLoader(rootDir=DATA_DIR, target_transform=VOCAnnotationTransform,
imSize=image_dim, split='val', scale=True, falseSamplePercentage=100,
boxErrorPercentage=30, random_sampler=0)
trainData = torch.utils.data.DataLoader(datagen, batch_size=batch_size, shuffle=True,
collate_fn=collate_fn)
valData = torch.utils.data.DataLoader(val_datagen, batch_size=batch_size, shuffle=True,
collate_fn=collate_fn)
else:
datagen = VOCLoader(rootDir=DATA_DIR, target_transform=VOCAnnotationTransform,
imSize=image_dim, split='train', scale=True, falseSamplePercentage=50,
random_flip=True, random_sampler=0)
val_datagen = VOCLoader(rootDir=DATA_DIR, target_transform=VOCAnnotationTransform,
imSize=image_dim, split='val', scale=True, falseSamplePercentage=50,
random_sampler=0)
trainData = torch.utils.data.DataLoader(datagen, batch_size=batch_size, shuffle=False,
collate_fn=collate_fn)
valData = torch.utils.data.DataLoader(val_datagen, batch_size=batch_size, shuffle=False,
collate_fn=collate_fn)
trainBar = tqdm.tqdm(trainData)
valBar = tqdm.tqdm(valData)
for batch, data in enumerate(valBar):
image = data[0].to(device)
# grid = make_grid(image.cpu())
# show(grid)
targetDict = data[1]
target = targetDict['boxes']
falseBoxes = targetDict['falseBoxes'].to(device)
falseBoxes_list = tensors_to_list(falseBoxes)
pred_box, pred_var = model(image, falseBoxes_list)
target = torch.concat([*target], dim=0).to(device)
pred_coords = decode_pred_bbox_xyxy_xyxy(falseBoxes_list, pred_box, image_dim)
variance = torch.exp(pred_var)
image = de_normalize_img(image.squeeze().cpu())
image = draw_bounding_boxes((image * 255).type(torch.uint8).squeeze(), target,
colors=(0, 255, 0))
image = draw_bounding_boxes(image, pred_coords.type(torch.int), colors=(255, 0, 0))
image = draw_bounding_boxes(image, falseBoxes[:, 1:], colors=(0, 255, 255))
grid = make_grid(image)
show(grid, f"Results/image{batch}.png")
print("Done")
The function show()just converts the tensor to numpy and uses matplotlib to save it in a file.
Sorry, the problem was with the show() function which used plt.imshow() with argument interpolation="nearest". If I do not pass any extra arguments other than the image. Then it is displaying correctly.

Pytorch: Custom dataset for segmentation, error with input dimension shape

I would like to know why I get this error: 1only batches of spatial targets supported (3D tensors) but got targets of size [16, 3, 512, 512], with this dataset:
class MyDataset (Dataset):
def __init__(self, images, masks, mean, std, transforms=None):
self.images = images
self.masks = masks
self.transforms = transforms
self.mean = mean
self.std = std
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image_path = self.images[idx]
image = Image.open(image_path)
image = np.array(image)
mask_path = self.masks[idx]
mask = Image.open(mask_path)
mask = np.array(mask)
if self.transforms is not None:
aug = self.transforms(image=image, mask=mask)
image = T.ToPILImage()(aug['image'])
mask = aug['mask']
if self.transforms is None:
image = T.ToPILImage()(image)
t = T.Compose([T.Resize(256), T.ToTensor(), T.Normalize(self.mean, self.std)])
image = t(image)
mask = torch.from_numpy(mask).long().permute(2, 0, 1)
return image, mask
I have checked that images and masks have shapes [C, H, W], and I don't know why the mini-batch dimension is added as first parameter.

Map function within each batch when parsing tensorflow records

Basically this code allowed me to achieve is randomly applying image augmentation to my training samples in tfrecords. The following code will treat each batch (32 pics) the same: either flip/rotation, cutout or do nothing. But I would like to apply the image_aug within each batch such that each batch contains 25%, 25% and 50% of above mentioned transformed image.
Here is my image augmentation and parse function for tfrecords
def decode_image(image_data, shape):
image = tf.io.decode_png(image_data, channels=shape[-1])
image = tf.cast(image, tf.float32) / 255.0 # convert image to floats in [0, 1] range
image = tf.reshape(image, shape) # explicit size needed for TPU
return image
def image_aug(image):
random_num = np.random.rand()
if random_num < 0.25:
data_augmentation = keras.Sequential([
keras.layers.RandomFlip("horizontal_and_vertical"),
keras.layers.RandomRotation(0.2),
])
image = data_augmentation(tf.expand_dims(image, axis=0))
elif random_num < 0.5:
image = tfa.image.random_cutout(
# image,
tf.expand_dims(image, axis=0),
mask_size = (100, 100),
constant_values = 1
)
return tf.squeeze(image)
def parse_example(serialized, shape, data_aug=False):
features = {'image': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([], tf.int64)
}
# Parse the serialized data so we get a dict with our data.
parsed_example = tf.io.parse_single_example(serialized=serialized, features=features)
image_raw = parsed_example['image'] # Get the image as raw bytes.
image = decode_image(image_raw, shape) # Decode the raw bytes so it becomes a tensor with type.
# label = tf.io.decode_raw(parsed_example['label'], tf.uint8)
# label = tf.cast(parsed_example['label'], tf.int64)
if data_aug:
# image = image.numpy()
# for i in range(len(image)):
image = image_aug(image)
return image, tf.cast(parsed_example['label'], tf.float32)
The call of tf.dataset looks like this:
train_dataset = tf.data.TFRecordDataset(np.asarray(train_val)[tr_idx])
train_dataset = train_dataset.map(partial(parse_example, data_aug = True, shape=IMAGE_SIZE)).cache().shuffle(2048).prefetch(AUTOTUNE).batch(BATCH_SIZE).repeat(NUM_EPOCHS)
Any ideas will be appreciated!

Create custom datagenerator in Keras using my own dataset

I want to create my own custom DataGenerator on my own dataset. I have read all the images and stored the locations and their labels in two variables named images and labels. I have written this custom generator:
def data_gen(img_folder, y, batch_size):
c = 0
n_image = list(np.arange(0,len(img_folder),1)) #List of training images
random.shuffle(n_image)
while (True):
img = np.zeros((batch_size, 224, 224, 3)).astype('float') #Create zero arrays to store the batches of training images
label = np.zeros((batch_size)).astype('float') #Create zero arrays to store the batches of label images
for i in range(c, c+batch_size): #initially from 0 to 16, c = 0.
train_img = imread(img_folder[n_image[i]])
# row,col= train_img.shape
train_img = cv2.resize(train_img, (224,224), interpolation = cv2.INTER_LANCZOS4)
train_img = train_img.reshape(224, 224, 3)
# binary_img = binary_img[:,:128//2]
img[i-c] = train_img #add to array - img[0], img[1], and so on.
label[i-c] = y[n_image[i]]
c+=batch_size
if(c+batch_size>=len((img_folder))):
c=0
random.shuffle(n_image)
# print "randomizing again"
yield img, label
What I want to know is how can I add other augmentations like flip, crop, rotate to this generator? Moreover, how should I yield these augmentations so that they are linked with the correct label.
Please let me know.
You can add flip, crop, rotate on train_img before putting it into the img. That is,
# ....
While(True):
# ....
# add your data augmentation function here
train_img = data_augmentor(train_img)
img[i-c] = train_img
# ....

Correct way of doing data augmentation in TensorFlow with the dataset api?

So, I've been playing around with the TensorFlow dataset API for loading images, and segmentation masks (for a semantic segmentation project), I would like to be able to generate batches of images and masks, with each image having randomly gone through any combination of pre-processing functions like brightness changes, contrast changes, cropping, saturation changes etc. So, the first image in my batch may have no pre-processing, second may have saturation changes, third may have brightness and saturation and so on.
I tried the following:
import tensorflow as tf
from tensorflow.contrib.data import Dataset, Iterator
import random
def _resize_image(image, mask):
image = tf.image.resize_bicubic(image, [480, 640], True)
mask = tf.image.resize_bicubic(mask, [480, 640], True)
return image, mask
def _corrupt_contrast(image, mask):
image = tf.image.random_contrast(image, 0, 5)
return image, mask
def _corrupt_saturation(image, mask):
image = tf.image.random_saturation(image, 0, 5)
return image, mask
def _corrupt_brightness(image, mask):
image = tf.image.random_brightness(image, 5)
return image, mask
def _random_crop(image, mask):
seed = random.random()
image = tf.random_crop(image, [240, 320, 3], seed=seed)
mask = tf.random_crop(mask, [240, 320, 1], seed=seed)
return image, mask
def _flip_image_horizontally(image, mask):
seed = random.random()
image = tf.image.random_flip_left_right(image, seed=seed)
mask = tf.image.random_flip_left_right(mask, seed=seed)
return image, mask
def _flip_image_vertically(image, mask):
seed = random.random()
image = tf.image.random_flip_up_down(image, seed=seed)
mask = tf.image.random_flip_up_down(mask, seed=seed)
return image, mask
def _normalize_data(image, mask):
image = tf.cast(image, tf.float32)
image = image / 255.0
mask = tf.cast(mask, tf.float32)
mask = mask / 255.0
return image, mask
def _parse_data(image_paths, mask_paths):
image_content = tf.read_file(image_paths)
mask_content = tf.read_file(mask_paths)
images = tf.image.decode_png(image_content, channels=3)
masks = tf.image.decode_png(mask_content, channels=1)
return images, masks
def data_batch(image_paths, mask_paths, params, batch_size=4, num_threads=2):
# Convert lists of paths to tensors for tensorflow
images_name_tensor = tf.constant(image_paths)
mask_name_tensor = tf.constant(mask_paths)
# Create dataset out of the 2 files:
data = Dataset.from_tensor_slices(
(images_name_tensor, mask_name_tensor))
# Parse images and labels
data = data.map(
_parse_data, num_threads=num_threads, output_buffer_size=6 * batch_size)
# Normalize images and masks for vals. between 0 and 1
data = data.map(_normalize_data, num_threads=num_threads, output_buffer_size=6 * batch_size)
if params['crop'] and not random.randint(0, 1):
data = data.map(_random_crop, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['brightness'] and not random.randint(0, 1):
data = data.map(_corrupt_brightness, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['contrast'] and not random.randint(0, 1):
data = data.map(_corrupt_contrast, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['saturation'] and not random.randint(0, 1):
data = data.map(_corrupt_saturation, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['flip_horizontally'] and not random.randint(0, 1):
data = data.map(_flip_image_horizontally,
num_threads=num_threads, output_buffer_size=6 * batch_size)
if params['flip_vertically'] and not random.randint(0, 1):
data = data.map(_flip_image_vertically, num_threads=num_threads,
output_buffer_size=6 * batch_size)
# Shuffle the data queue
data = data.shuffle(len(image_paths))
# Create a batch of data
data = data.batch(batch_size)
data = data.map(_resize_image, num_threads=num_threads,
output_buffer_size=6 * batch_size)
# Create iterator
iterator = Iterator.from_structure(data.output_types, data.output_shapes)
# Next element Op
next_element = iterator.get_next()
# Data set init. op
init_op = iterator.make_initializer(data)
return next_element, init_op
But all batches returned by this have the same transformations applied to them, not different combinations, my guess is that the random.randint persists, and is not actually run for each batch, if so, how do I fix this to get the desired result?
For an example of how I plan to use it (I feel that's irrelevant to the problem but people might still want to know) can be found here
So the problem was indeed that the control flow with the if statements are with Python variables, and are only executed once when the graph is created, to do what I want to do, I had to define a placeholder that contains the boolean values of whether to apply a function or not (and feed in a new boolean tensor per iteration to change the augmentation), and control flow is handled by tf.cond. I pushed the new code to the GitHub link I posted in the question above if anyone is interested.

Resources