Transformation parameter as learning target - pytorch

Let's say, I am applyng random blur to torchvision MNIST image, and want to train the net to guess blur power. It looks natural to use standard transform, but in dataset class implementation transformations of data and target applyes independently:
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
So I can't see a way to pass blur value for insertion in target.
Well, I can rewrite dataset class, but may be there is more natural way to pass parameter from data transform to target_transform?

Here is my own approach, thow I looking for more elegant:
def myBlur( t):
sigm = ( random.uniform(0.1,12.0) )
t = torchvision.transforms.functional.gaussian_blur( t, 15, sigm )
return ( t, sigm )
class MyMNIST( torchvision.datasets.MNIST ):
#property
def processed_folder(self) -> str:
# we want files from 'MNIST' folder, not from __classname__ == 'MyMNIST'
return os.path.join(self.root, 'MNIST', 'processed')
def __getitem__(self, index: int) -> Tuple[Any, Any]:
img, target = self.data[index], int(self.targets[index])
img = Image.fromarray(img.numpy(), mode='L')
# call all transformations here explicitly
img = torchvision.transforms.ToTensor()(img)
img = torchvision.transforms.Resize( (96,96) )(img)
img, dd = myBlur( img )
img = torchvision.transforms.Normalize((0.1307,), (0.3081,))(img)
return img, dd

Related

Problem with extract label from my dataset

I have an image dataset with 35 classes, all the images are in one folder, and one part of the names of the images is their label. An example of image names is like this:
D34_Samsung_GalaxyS3Mini-images-flat-D01_I_flat_0001.jpg
And the label becomes D01 here.
In the Dataset class definition, the target variable should return the image label, right? If we consider the index, 34 should be returned for this example.
I have a code to define the dataset:
class MyDataset(Dataset):
def __init__(self, imgs , transform = None):
self.imgs = imgs
self.transform = transform or transforms.ToTensor()
self.class_to_idx = {}
def __getitem__(self, index):
image_path = self.imgs[index]
target = image_path.split('_')[0]
target = re.findall(r'D\d+.+' , target)
image = Image.open(image_path)
if self.transform is not None:
image = self.transform(image)
if target[0] in self.class_to_idx :
target = [self.class_to_idx[target[0]]]
else :
self.class_to_idx[target[0]] = len(self.class_to_idx)
target = [self.class_to_idx[target[0]]]
return image , target
def __len__(self):
return len(self.imgs)
But when I tested it, I realized it does not extract the labels correctly. That is, every time, the labels are always a number between 0 and 15 (batch-size=16).
There are 35 classes, but the target is always between 0 and 15. that is, the batch size; also, an image may get a different label each time the code is executed!
output of the above code
So I changed the Dataset code. I removed a few lines of code and directly obtained the label from the name of the images instead of using class_to_idx:
class MyDataset(Dataset):
def __init__(self, imgs , transform = None):
self.imgs = imgs
self.transform = transform or transforms.ToTensor()
self.class_to_idx = {}
def __getitem__(self, index):
image_path = self.imgs[index]
target = image_path.split('_')[0]
target = target.split('D')[1]
target = int(target)
image = Image.open(image_path)
if self.transform is not None:
image = self.transform(image)
return image , target
def __len__(self):
return len(self.imgs)
When I did the test, the numbers were no longer between 0 and 15, and there were real labels of images:
output of the changed code
My problem is that when I train the model with the first code, my CNN model trains correctly and does not give an error.
But by the second code (my edition), even though the output was correct in the test, the model cannot train and errors:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
1 criterion , optimizer , scheduler = lossAndOptim(model = model)
2
----> 3 losses_val , losses_trn , accs_val , accs_trn = train_model(model,
4 train_dl, valid_dl,
5 criterion, optimizer,
4 frames /usr/local/lib/python3.8/dist-packages/torch/nn/functional.py
in cross_entropy(input, target, weight, size_average, ignore_index,
reduce, reduction, label_smoothing) 3024 if size_average is not
None or reduce is not None: 3025 reduction =
_Reduction.legacy_get_string(size_average, reduce)
-> 3026 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3027 3028
ValueError: Expected input batch_size (16) to match target batch_size (0).
Whatever I search, the answers I see are related to the model. But there is no problem with the model, and it does not give an error with the first code.
Thank you for your advice.
I edit the source code and I googled but the answers were not related to my problem.
I changed the code as below, and my problem was solved:
There was a need to return "target" as a dictionary.
class MyDataset(Dataset):
def __init__(self, imgs , transform = None):
self.imgs = imgs
self.transform = transform or transforms.ToTensor()
self.class_to_idx = {}
def __getitem__(self, index):
image_path = self.imgs[index]
target = image_path.split('-')[0]
label = target.split('_')[0]
label = label.split('D')[1]
name = target
image = Image.open(image_path)
if self.transform is not None:
image = self.transform(image)
if target in self.class_to_idx :
target = [self.class_to_idx[target]]
else :
self.class_to_idx[target] = (int(label)-1)
target = [self.class_to_idx[target]]
return image , target
def __len__(self):
return len(self.imgs)

Generating text/csv file for image path and mask path for semantic segmentation

I have a huge set of images(60k) and masks(60k) that need to be loaded into a PyTorch dataloader for semantic segmentation.
Directory Structure:
- Segmentation
-images
-color_left_trajectory_3000_00001.jpg
-color_left_trajectory_3000_00002.jpg
...
-masks
-color_segmentation_3000_00001.jpg
-color_segmentation_3000_00002.jpg
...
I want to know the most efficient way to load these into a dataloader in Pytorch. I was thinking of generating a csv file with the paths to images and masks. How will I go about generating the same? Any other suggestions are appreciated!
I recommend that you make a custom subclass from the dataset class. In the init function, the paths to the images and masks are generated and then saved.
This is an example:
import torch
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
class CustomData(Dataset):
def __init__(self,data_dir='Segmentation', data_transform=None,split= 'train'):
self.imgs = []
self.labels= []
self.transform = data_transform
self.data_dir = data_dir
#self.imgs_dir = os.path.join(data_dir, split, 'images')
#self.labels_dir = os.path.join(data_dir, split, 'labels')
self.imgs_dir = os.path.join(data_dir, 'images')
self.labels_dir = os.path.join(data_dir, 'labels')
for img_name in os.listdir(self.imgs_dir):
img_path = os.path.join(self.imgs_dir, img_name)
label_name = "color_segmentation_"+"_".join(img.split('.')[0].split('_')[-2:])+'.png'
label_path = os.path.join(self.labels_dir, label_name)
self.imgs.append(img_path)
self.labels.append(label_path)
def __len__(self):
return len(self.imgs)
def __getitem__(self, idx):
img = Image.open(self.imgs[idx])
label = Image.open(self.labels[idx])
if self.transform is not None:
img, label = self.transform(img, label)
return img, label
class ToTensor:
def __call__(self, image, target=None):
image = F.to_tensor(image)
if target is not None:
target = torch.as_tensor(np.array(target), dtype=torch.int64)
return image, target
if __name__ == '__main__':
data = CustomData(data_transform=ToTensor)
dataloader = DataLoader(data,batch_size=10)

WeightedRandomSampler for custom image dataloader pytorch

I am trying to solve class imbalance by using Weighted Random Sampler on a custom data loader. I can't seem to find the best way to implement this. The images are in a folder and labels are in a csv file. The dataloader code without the weighted random sampler is given below.
class CassavaDataset(Dataset):
def __init__(self, df, data_root, transforms=None, output_label=True):
super().__init__()
self.df = df.reset_index(drop=True).copy() # data
self.transforms = transforms
self.data_root = data_root
self.output_label = output_label
def __len__(self):
return self.df.shape[0] # or len(self.df)
def __getitem__(self, index: int):
# get labels
if self.output_label:
target = self.df.iloc[index]['label']
path = "{}/{}".format(self.data_root, self.df.iloc[index]['image_id'])
img = get_img(path)
if self.transforms:
img = self.transforms(image=img)['image']
# do label smoothing
if self.output_label == True:
return img, target
else:
return img
What will be the best way to get weights of each class and feed it to the sampler before augmentation? Thanks in advance!

Does Keras official sample code about Transformer applied in time-series contain Position Embedding part?

The sample code for referring from url:https://keras.io/examples/timeseries/timeseries_transformer_classification/
I could not find out any description about "Position Embedding" content in full page of above url. When I looked through Transformer applied in NLP, I can clearly see the class named "TokenAndPositionEmbedding".
If it does not contain "Position Embedding", how can I apply Position Embedding in time series in sample code?
From what I can tell it does not contain the positional embedding. Something like this should work.
class PositionEmbeddingFixedWeights(Layer):
def __init__(self, sequence_length, vocab_size, output_dim, **kwargs):
super(PositionEmbeddingFixedWeights, self).__init__(**kwargs)
word_embedding_matrix = self.get_position_encoding(vocab_size, output_dim)
position_embedding_matrix = self.get_position_encoding(sequence_length, output_dim)
self.word_embedding_layer = Embedding(
input_dim=vocab_size, output_dim=output_dim,
weights=[word_embedding_matrix],
trainable=False
)
self.position_embedding_layer = Embedding(
input_dim=sequence_length, output_dim=output_dim,
weights=[position_embedding_matrix],
trainable=False
)
def get_position_encoding(self, seq_len, d, n=10000):
P = np.zeros((seq_len, d))
for k in range(seq_len):
for i in np.arange(int(d/2)):
denominator = np.power(n, 2*i/d)
P[k, 2*i] = np.sin(k/denominator)
P[k, 2*i+1] = np.cos(k/denominator)
return P
def call(self, inputs):
position_indices = tf.range(tf.shape(inputs)[-1])
embedded_words = self.word_embedding_layer(inputs)
embedded_indices = self.position_embedding_layer(position_indices)
return embedded_words + embedded_indices
This class originated from https://machinelearningmastery.com/the-transformer-positional-encoding-layer-in-keras-part-2/

Dynamic Data Iterator Value Change During Training in Pytorch

My Data Loader :
LoadData(Dataset):
def __init__(self, ..., ...., ...):
self.ns = (640, 640)
.....
.....
def __getitem__(index):
img = resize(img[index], self.ns)
.....
.....
return img
def set_size(self, ns):
self.ns = ns
And the training loop
for img in dataiterator:
forward(img)
backward()
datatiterator.dataset.set_size(new_ns)
I would like to resize images dynamically after each iteration. When I do it as above, it does not work. Is there any simple trick to do that? Please help.
Best,

Resources