Optimizing Pytorch implementation of mix-up augmentation - pytorch

So I have this code here for implementing mix-up augmentation. It's incredibly slow and I'm not sure how to make it faster. It seems like there are some operations that are unavoidable and just by nature slow like scaling images by the weight which is 0.5 then summing up each cell seems like a very slow and unavoidable operation. I'm applying this to Reinforcement Learning so I could be augmenting 64 million images, which is why I need it to be a lot faster.
Note: Here's the original author's implementation but I would assume it's equally as slow as it's essentially the same.
import torch
import utils
import os
import torch.nn.functional as F
import torchvision.transforms as TF
import torchvision.datasets as datasets
dataloader = None
data_iter = None
def _load_data(
sub_path: str, batch_size: int = 256, image_size: int = 84, num_workers: int = 16
):
global data_iter, dataloader
for data_dir in utils.load_config("datasets"):
if os.path.exists(data_dir):
fp = os.path.join(data_dir, sub_path)
if not os.path.exists(fp):
print(f"Warning: path {fp} does not exist, falling back to {data_dir}")
dataloader = torch.utils.data.DataLoader(
datasets.ImageFolder(
fp,
TF.Compose(
[
TF.RandomResizedCrop(image_size),
TF.RandomHorizontalFlip(),
TF.ToTensor(),
]
),
),
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
pin_memory=True,
)
data_iter = iter(dataloader)
break
if data_iter is None:
raise FileNotFoundError(
"failed to find image data at any of the specified paths"
)
print("Loaded dataset from", data_dir)
def _load_places(batch_size=256, image_size=84, num_workers=16, use_val=False):
partition = "val" if use_val else "train"
sub_path = os.path.join("places365_standard", partition)
print(f"Loading {partition} partition of places365_standard...")
_load_data(
sub_path=sub_path,
batch_size=batch_size,
image_size=image_size,
num_workers=num_workers,
)
def _load_coco(batch_size=256, image_size=84, num_workers=16, use_val=False):
sub_path = "COCO"
print(f"Loading COCO 2017 Val...")
_load_data(
sub_path=sub_path,
batch_size=batch_size,
image_size=image_size,
num_workers=num_workers,
)
def _get_data_batch(batch_size):
global data_iter
try:
imgs, _ = next(data_iter)
if imgs.size(0) < batch_size:
data_iter = iter(dataloader)
imgs, _ = next(data_iter)
except StopIteration:
data_iter = iter(dataloader)
imgs, _ = next(data_iter)
return imgs.cuda()
def load_dataloader(batch_size, image_size, dataset="coco"):
if dataset == "places365_standard":
if dataloader is None:
_load_places(batch_size=batch_size, image_size=image_size)
elif dataset == "coco":
if dataloader is None:
_load_coco(batch_size=batch_size, image_size=image_size)
else:
raise NotImplementedError(
f'overlay has not been implemented for dataset "{dataset}"'
)
def random_mixup(x, dataset="coco"):
"""Randomly overlay an image from Places or COCO"""
global data_iter
alpha = 0.5
load_dataloader(batch_size=x.size(0), image_size=x.size(-1), dataset=dataset)
imgs = _get_data_batch(batch_size=x.size(0)).repeat(1, x.size(1) // 3, 1, 1)
return ((1 - alpha) * (x / 255.0) + (alpha) * imgs) * 255.0

To optimize you need to use the GPU. You need to use PyTorch tensors and operations.
Example of how to do this with PyTorch:
import torch
import torch.nn.functional as F
import torchvision.transforms as TF
import torchvision.datasets as datasets
# Load the data
dataloader = torch.utils.data.DataLoader(
datasets.ImageFolder(
'path/to/data',
TF.Compose(
[
TF.RandomResizedCrop(84),
TF.RandomHorizontalFlip(),
TF.ToTensor(),
]
),
),
batch_size=256,
shuffle=True,
num_workers=16,
pin_memory=True,
)
# Get a batch of data
imgs, _ = next(iter(dataloader))
# Create a tensor of random weights
alpha = torch.rand(imgs.size(0), 1, 1, 1)
# Create a tensor of random indices
indices = torch.randint(0, imgs.size(0), (imgs.size(0),))
# Create a tensor of random images
imgs2 = imgs[indices]
# Mix the images
imgs = (1 - alpha) * imgs + alpha * imgs2
# You can also do this with a single line of code:
imgs = (1 - alpha) * imgs + alpha * imgs[torch.randint(0, imgs.size(0), (imgs.size(0),))]

Related

How to apply image transform to a list of images and maintain the right dimensions?

I'm using the Omniglot dataset, which is a set of 19,280 images, each which is 105 x 105 (grayscale).
I defined a custom Dataset class with the following transform:
class OmniglotDataset(Dataset):
def __init__(self, X, transform=None):
self.X = X
self.transform = transform
def __len__(self):
return self.X.shape[0]
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img = self.X[idx]
if self.transform:
img = self.transform(img)
return img
img_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
X_train.shape
(19280, 105, 105)
train_dataset = OmniglotDataset(X_train, transform=img_transform)
When I index a single image, it returns the right dimensions:
train_dataset[0].shape
torch.Size([1, 105, 105])
But when I index several images, it returns the dimensions in the wrong order (I expect 3 x 105 x 105):
train_dataset[[1,2,3]].shape
torch.Size([105, 3, 105])
You got the error because try apply transformation of single image to list:
A more convenient way to get a batch of any size is to use Dataloader:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
img_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
omniglot = datasets.Omniglot(root='./data', background=True, download=True, transform = img_transform)
data_loader = DataLoader(omniglot, shuffle=False, batch_size = 8)
for image_batch in data_loader:
# now image_batch contain first eight samples
print(image_batch.shape) # torch.Size([8, 1, 105, 105])
break
If you really need to get images in arbitrary order:
from operator import itemgetter
indexes = [1,3,5]
selected_samples = itemgetter(*b)(omniglot)

RuntimeError: expected type torch.cuda.FloatTensor but got torch.FloatTensor

I keep getting the error message below. I cannot seem to pinpoint to the tensor mentioned. Below you'll find the trainer.py and main.py modules. The model I am developing is GAN on CelebA dataset. I am running the code on a remote server so have spent a handful amount of time debugging my model.
This is the full error message:
Traceback (most recent call last):
File "main.py", line 52, in <module>
main(opt)
File "main.py", line 47, in main
trainer.train(train_loader)
File "/home/path/trainer.py", line 45, in train
d_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_org, size_average=False) / out_cls.size(0)
File "/home/path/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 2077, in binary_cross_entropy_with_logits
return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
RuntimeError: expected type torch.cuda.FloatTensor but got torch.FloatTensor
trainer.py
from tqdm import tqdm
import torch
import torch.nn.functional as F
from model import Discriminator, Generator
from tensorboardX import SummaryWriter
class Trainer():
def __init__(self, opt):
# Generator
self.G = Generator(64, 5, 6)
# Discriminator
self.D = Discriminator(128, 64, 5, 6)
# Generator optimizer
self.g_optimizer = torch.optim.Adam(self.G.parameters(), opt.lr)
self.d_optimizer = torch.optim.Adam(self.D.parameters(), opt.lr)
self.opt = opt
if self.opt.cuda:
self.G = self.G.cuda()
self.D = self.D.cuda()
def train(self, data_loader):
"""Function to train the model
"""
print('Training model')
writer_d = SummaryWriter('runs/disc') # discriminator writer
writer_g = SummaryWriter('runs/gen') # generator writer
print('Start training...')
for epoch in tqdm(range(self.opt.epochs)):
for x_real, label_org in tqdm(data_loader):
pass
# Generate target domain labels randomly.
rand_idx = torch.randperm(label_org.size(0))
label_trg = label_org[rand_idx]
c_org = label_org.clone()
c_trg = label_org.clone()
if self.opt.cuda:
x_real = x_real.cuda() # Input images
c_org = c_org.cuda() # Original domain labels
c_trg = c_trg.cuda() # Target domain labels
label_org = label_org.cuda() # Labels for computing classification loss
label_trg = label_trg.cuda() # Labels for computing classification loss
out_src, out_cls = self.D(x_real)
d_loss_real = - torch.mean(out_src)
d_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_org, size_average=False) / out_cls.size(0)
# Compute loss with fake images
x_fake = self.G(x_real, c_trg)
out_src, out_cls = self.D(x_fake.detach())
d_loss_fake = torch.mean(out_src)
# Compute loss for gradient penalty
alpha = torch.rand(x_real.size(0), 1, 1, 1).cuda()
x_hat = (alpha * x_real.data + (1 - alpha) * x_fake.data).requires_grad_(True)
out_src, _ = self.D(x_hat)
# Backward and optimize
d_loss = d_loss_real + d_loss_fake + d_loss_cls
self.g_optimizer.zero_grad()
self.d_optimizer.zero_grad()
d_loss.backward()
self.d_optimizer.step()
if (i + 1) % 2 == 0:
# Original-to-target domain
x_fake = self.G(x_real, c_trg)
out_src, out_cls = self.D(x_fake)
g_loss_fake = - torch.mean(out_src)
g_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_trg, size_average=False) / out_cls.size(0)
# Target-to-original domain
x_reconst = self.G(x_fake, c_org)
g_loss_rec = torch.mean(torch.abs(x_real - x_reconst))
# Backward and optimize
g_loss = g_loss_fake + g_loss_rec
self.g_optimizer.zero_grad()
self.d_optimizer.zero_grad()
g_loss.backward()
self.g_optimizer.step()
# write loss to tensorboard
writer_d.add_scalar('data/loss', d_loss, epoch)
writer_d.add_scalar('data/loss', g_loss, epoch)
print('Finished Training')
def test(self, data_loader):
with torch.no_grad():
for i, (x_real, c_org) in enumerate(data_loader):
# Prepare input images and target domain labels.
if self.opt.cuda:
x_real = x_real.cuda()
# Translate images.
x_fake_list = [x_real]
for c_trg in c_trg_list:
x_fake_list.append(self.G(x_real, c_trg))
main.py
import argparse
import random
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from preprocess import pre_process
from celeb_dataset import CelebDataset
from trainer import Trainer
# Setting up the argument parser
parser = argparse.ArgumentParser()
parser.add_argument('--workers', type=int, help='number of data loading workers', default=4)
parser.add_argument('--batchSize', type=int, default=8, help='input batch size')
parser.add_argument('--epochs', type=int, default=20, help='number of epochs to train')
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate')
parser.add_argument('--cuda', action='store_true', help='enables cuda')
parser.add_argument('--manualSeed', type=int, help='manual seed')
parser.add_argument('--dataset_path', type=str, default='./data/celeba', help='dataset path')
opt = parser.parse_args()
print(opt)
if opt.manualSeed is None:
opt.manualSeed = random.randint(1, 10000)
print("Random Seed: ", opt.manualSeed)
def main(opt):
# Setup the parameters for the training/testing
params = {
'batch_size': opt.batchSize,
'shuffle': True,
'num_workers': opt.workers
}
# preprocess and setup dataset and datalader
processed_data = pre_process(opt.dataset_path)
train_dataset = CelebDataset(processed_data[:-2000])
test_dataset = CelebDataset(processed_data[2000:])
train_loader = DataLoader(train_dataset, **params)
test_loader = DataLoader(test_dataset, **params)
trainer = Trainer(opt)
trainer.train(train_loader)
trainer.test(test_loader)
if __name__ == "__main__":
main(opt)
You are getting that error because one of out_cls, label_org is not on the GPU.
Where does your code enact the parser.add_argument('--cuda', action='store_true', help='enables cuda') option?
Perhaps something like:
trainer = Trainer(opt)
if opt.cuda:
trainer = trainer.cuda()

How to integrate LIME with PyTorch?

Using this mnist image classification model :
%reset -f
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
import torchvision.datasets as dset
import os
import torch.nn.functional as F
import time
import random
import pickle
from sklearn.metrics import confusion_matrix
import pandas as pd
import sklearn
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
root = './data'
if not os.path.exists(root):
os.mkdir(root)
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 64
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=batch_size,
shuffle=True)
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(28*28, 500)
self.fc2 = nn.Linear(500, 256)
self.fc3 = nn.Linear(256, 2)
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
num_epochs = 2
random_sample_size = 200
values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]
print(len(values_0_or_1))
print(len(values_0_or_1_testset))
train_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1,
batch_size=batch_size,
shuffle=True)
test_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1_testset,
batch_size=batch_size,
shuffle=False)
train_loader = train_loader_subset
# Hyper-parameters
input_size = 100
hidden_size = 100
num_classes = 2
# learning_rate = 0.00001
learning_rate = .0001
# Device configuration
device = 'cpu'
print_progress_every_n_epochs = 1
model = NeuralNet().to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
N = len(train_loader)
# Train the model
total_step = len(train_loader)
most_recent_prediction = []
test_actual_predicted_dict = {}
rm = random.sample(list(values_0_or_1), random_sample_size)
train_loader_subset = data_utils.DataLoader(rm, batch_size=4)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader_subset):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch) % print_progress_every_n_epochs == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
predicted_test = []
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
probs_l = []
predicted_values = []
actual_values = []
labels_l = []
with torch.no_grad():
for images, labels in test_loader_subset:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
predicted_test.append(predicted.cpu().numpy())
sm = torch.nn.Softmax()
probabilities = sm(outputs)
probs_l.append(probabilities)
labels_l.append(labels.cpu().numpy())
predicted_values.append(np.concatenate(predicted_test).ravel())
actual_values.append(np.concatenate(labels_l).ravel())
if (epoch) % 1 == 0:
print('test accuracy : ', 100 * len((np.where(np.array(predicted_values[0])==(np.array(actual_values[0])))[0])) / len(actual_values[0]))
I'm to attempting to integrate 'Local Interpretable Model-Agnostic Explanations for machine learning classifiers' : https://marcotcr.github.io/lime/
It appears PyTorch support is not enabled as it is not mentioned in doc and following tutorial :
https://marcotcr.github.io/lime/tutorials/Tutorial%20-%20images.html
With my updated code for PyTorch :
from lime import lime_image
import time
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(images[0].reshape(28,28), model(images[0]), top_labels=5, hide_color=0, num_samples=1000)
Causes error :
/opt/conda/lib/python3.6/site-packages/skimage/color/colorconv.py in gray2rgb(image, alpha)
830 is_rgb = False
831 is_alpha = False
--> 832 dims = np.squeeze(image).ndim
833
834 if dims == 3:
AttributeError: 'Tensor' object has no attribute 'ndim'
So appears tensorflow object is expected here ?
How to integrate LIME with PyTorch image classification ?
Here's my solution:
Lime expects an image input of type numpy. This is why you get the attribute error and a solution would be to convert the image (from Tensor) to numpy before passing it to the explainer object. Another solution would be to select a specific image with the test_loader_subset and convert it with img = img.numpy().
Secondly, in order to make LIME work with pytorch (or any other framework), you'll need to specify a batch prediction function which outputs the prediction scores of each class for each image. The name of this function (here I've called it batch_predict) is then passed to explainer.explain_instance(img, batch_predict, ...). The batch_predict needs to loop through all images passed to it, convert them to Tensor, make a prediction and finally return the prediction score list (with numpy values). This is how I got it working.
Note also that the images need to have shape (... ,... ,3) or (... ,... ,1) in order to be properly segmented by the default segmentation algorithm. This means that you might have to use np.transpose(img, (...)). You may specify the segmentation algorithm as well if the results are poor.
Finally you'll need to display the LIME image mask on top of the original image. This snippet shows how this may be done:
from skimage.segmentation import mark_boundaries
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False)
img_boundry = mark_boundaries(temp, mask)
plt.imshow(img_boundry)
plt.show()
This notebook is a good reference:
https://github.com/marcotcr/lime/blob/master/doc/notebooks/Tutorial%20-%20images%20-%20Pytorch.ipynb

how to fix capsule training problem for a single class of MNIST dataset?

I am training a Capsule Network with both encoder and decoder part. It works perfectly fine with all the classes (10 classes) of the MNIST data set. But when I am extracting a single class say (class 0 or class 5) and then training the capsule network, the reconstruction of the image is very poor.
Where do I need to change the network setting, or do I have an error in my data preparation?
I tried:
I changed the total class from 10 (for ten digits to 1 for 1 digit and even for 2 for 2 digits).
When I am using the default MNIST dataset, I am getting no error or tensor size, but when I am extracting a particular class and then passing it into the network, I am facing issues like a) Dimensional Issues b) Float tensor warning.
I fixed these things but manually adding a dimension and converting the data to data.float().cuda() tensor. I did this for both the case i.e when I am using the 10 Digit Capsules and when I am using the 1 Digit Capsules for training a single class digit.
But after this, the network is running fine, but I am getting really blurred and poor reconstructions. While when I am training the whole MNIST dataset without extracting any class and passing it to the network, it doesn't throw any error and the reconstruction works really fine.
I would love to share the more detail and other parts of the code -
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import Adam
from torchvision import datasets, transforms
USE_CUDA = True
### **Here we prepare the data for the complete 10 class digit training**###
class Mnist:
def __init__(self, batch_size):
dataset_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_dataset = datasets.MNIST('../data', train=True, download=True, transform=dataset_transform)
test_dataset = datasets.MNIST('../data', train=False, download=True, transform=dataset_transform)
self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
self.test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
## **Here is my code for extracting a single class digit extraction**##
class Mnist:
def __init__(self,batch_size):
dataset_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_mnist = datasets.MNIST("../data", train=True)
test_mnist = datasets.MNIST("../data", train= False)
train_image, train_label = train_mnist.train_data, train_mnist.train_labels
test_image, test_label = test_mnist.test_data, test_mnist.test_labels
train_0, test_0 = [train_image[key] for (key, label) in enumerate(train_label) if int(label) == 5],[test_image[key] for (key, label) in enumerate(test_label) if int(label) == 5]
train_label_0, test_label_0 = zero__train = [train_label[key] for (key, label) in enumerate(train_label) if int(label) == 5],[test_label[key] for (key, label) in enumerate(test_label) if int(label) == 5]
train_dataset = tuple(zip(train_0, train_label_0))
test_dataset = tuple(zip(test_0, test_label_0))
self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
self.test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
# Here is the main code for the capsule training.
''' The below code is used for training the 1 class but using the 10 Digit capsules
'''
class ConvLayer(nn.Module):
def __init__(self, in_channels=1, out_channels=256, kernel_size=9):
super(ConvLayer, self).__init__()
self.conv = nn.Conv2d(in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=1
)
def forward(self, x):
return F.relu(self.conv(x))
class PrimaryCaps(nn.Module):
def __init__(self, num_capsules=8, in_channels=256, out_channels=32, kernel_size=9):
super(PrimaryCaps, self).__init__()
self.capsules = nn.ModuleList([
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=2, padding=0)
for _ in range(num_capsules)])
def forward(self, x):
u = [capsule(x) for capsule in self.capsules]
u = torch.stack(u, dim=1)
u = u.view(x.size(0), 32 * 6 * 6, -1)
return self.squash(u)
def squash(self, input_tensor):
squared_norm = (input_tensor ** 2).sum(-1, keepdim=True)
output_tensor = squared_norm * input_tensor / ((1. + squared_norm) * torch.sqrt(squared_norm))
return output_tensor
class DigitCaps(nn.Module):
def __init__(self, num_capsules=10, num_routes=32 * 6 * 6, in_channels=8, out_channels=16):
super(DigitCaps, self).__init__()
self.in_channels = in_channels
self.num_routes = num_routes
self.num_capsules = num_capsules
self.W = nn.Parameter(torch.randn(1, num_routes, num_capsules, out_channels, in_channels))
def forward(self, x):
batch_size = x.size(0)
x = torch.stack([x] * self.num_capsules, dim=2).unsqueeze(4)
# print(f"x at epoch {epoch} is equal to : {x}")
W = torch.cat([self.W] * batch_size, dim=0)
# print(f"W at epoch {epoch} is equal to : {W}")
u_hat = torch.matmul(W, x)
# print(f"u_hatat epoch {epoch} is equal to : {u_hat}")
b_ij = Variable(torch.zeros(1, self.num_routes, self.num_capsules, 1))
if USE_CUDA:
b_ij = b_ij.cuda()
# print(f"b_ij at epoch {epoch} is equal to : {b_ij}")
num_iterations = 3
for iteration in range(num_iterations):
c_ij = F.softmax(b_ij, dim =1)
c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4)
s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)
v_j = self.squash(s_j)
# print(f"b_ij at iteration {iteration} is equal to : {b_ij}")
if iteration < num_iterations - 1:
a_ij = torch.matmul(u_hat.transpose(3, 4), torch.cat([v_j] * self.num_routes, dim=1))
b_ij = b_ij + a_ij.squeeze(4).mean(dim=0, keepdim=True)
return v_j.squeeze(1)
def squash(self, input_tensor):
squared_norm = (input_tensor ** 2).sum(-1, keepdim=True)
output_tensor = squared_norm * input_tensor / ((1. + squared_norm) * torch.sqrt(squared_norm))
return output_tensor
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.reconstraction_layers = nn.Sequential(
nn.Linear(16 * 10, 512),
nn.ReLU(inplace=True),
nn.Linear(512, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, 784),
nn.Sigmoid()
)
def forward(self, x, data):
classes = torch.sqrt((x ** 2).sum(2))
classes = F.softmax(classes, dim =1)
_, max_length_indices = classes.max(dim=1)
masked = Variable(torch.sparse.torch.eye(10))
if USE_CUDA:
masked = masked.cuda()
masked = masked.index_select(dim=0, index=max_length_indices.squeeze(1).data)
reconstructions = self.reconstraction_layers((x * masked[:, :, None, None]).view(x.size(0), -1))
reconstructions = reconstructions.view(-1, 1, 28, 28)
return reconstructions, masked
class CapsNet(nn.Module):
def __init__(self):
super(CapsNet, self).__init__()
self.conv_layer = ConvLayer()
self.primary_capsules = PrimaryCaps()
self.digit_capsules = DigitCaps()
self.decoder = Decoder()
self.mse_loss = nn.MSELoss()
def forward(self, data):
output = self.digit_capsules(self.primary_capsules(self.conv_layer(data)))
reconstructions, masked = self.decoder(output, data)
return output, reconstructions, masked
def loss(self, data, x, target, reconstructions):
return self.margin_loss(x, target) + self.reconstruction_loss(data, reconstructions)
# return self.reconstruction_loss(data, reconstructions)
def margin_loss(self, x, labels, size_average=True):
batch_size = x.size(0)
v_c = torch.sqrt((x**2).sum(dim=2, keepdim=True))
left = F.relu(0.9 - v_c).view(batch_size, -1)
right = F.relu(v_c - 0.1).view(batch_size, -1)
# print(f"shape of labels, left and right respectively - {labels.size(), left.size(), right.size()}")
loss = labels * left + 0.5 * (1.0 - labels) * right
loss = loss.sum(dim=1).mean()
return loss
def reconstruction_loss(self, data, reconstructions):
loss = self.mse_loss(reconstructions.view(reconstructions.size(0), -1), data.view(reconstructions.size(0), -1))
return loss*0.0005
capsule_net = CapsNet()
if USE_CUDA:
capsule_net = capsule_net.cuda()
optimizer = Adam(capsule_net.parameters())
capsule_net
##### Here is the problem while training####
batch_size = 100
mnist = Mnist(batch_size)
n_epochs = 5
for epoch in range(n_epochs):
capsule_net.train()
train_loss = 0
for batch_id, (data, target) in enumerate(mnist.train_loader):
target = torch.eye(10).index_select(dim=0, index=target)
data, target = Variable(data), Variable(target)
if USE_CUDA:
data, target = data.cuda(), target.cuda()
data, target = data.float().cuda(), target.float().cuda() # Here I changed the data to float and it's required only when I am using my extracted dataset for a single class
data = data[:,:,:] # Use this when 1st MNist data is used
# data = data[:,None,:,:] # Use this when I am using my extracted single class digits
optimizer.zero_grad()
output, reconstructions, masked = capsule_net(data)
loss = capsule_net.loss(data, output, target, reconstructions)
loss.backward()
optimizer.step()
train_loss += loss.item()
# if batch_id % 100 == 0:
# print ("train accuracy:", sum(np.argmax(masked.data.cpu().numpy(), 1) ==
# np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size))
print (train_loss / len(mnist.train_loader))
I used this to see the main data as image and the reconstructed image
import matplotlib
import matplotlib.pyplot as plt
def plot_images_separately(images):
"Plot the six MNIST images separately."
fig = plt.figure()
for j in range(1, 10):
ax = fig.add_subplot(1, 10, j)
ax.matshow(images[j-1], cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.show()
plot_images_separately(data[:10,0].data.cpu().numpy())
plot_images_separately(reconstructions[:10,0].data.cpu().numpy())
I checked the normal performing code and then the problematic one, I found that the dataset passed into the network was of not same nature. The problems were -
The MNIST data extracted for a single class was not transformed into tensor and no normalization was applied, although I tried passing it through the transformation.
This is what I did to fix it -
I created transformation objections and tensor objection and then passed by list comprehension elements to it. Below are the codes and the final output of my network -
Preparing class 0 dataset (dataset for the digit 5)
class Mnist:
trans = transforms.ToTensor()
normalize = transforms.Normalize((0.1307,), (0.3081,))
def init(self,batch_size):
dataset_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
trans = transforms.ToTensor()
normalize = transforms.Normalize((0.1307,), (0.3081,))
train_mnist = datasets.MNIST("../data", train=True, transform=dataset_transform)
test_mnist = datasets.MNIST("../data", train= False, transform=dataset_transform)
train_image, train_label = train_mnist.train_data, train_mnist.train_labels
test_image, test_label = test_mnist.test_data, test_mnist.test_labels
train_0, test_0 = [normalize(trans(train_image[key].unsqueeze(2).numpy())) for (key, label) in enumerate(train_label) if int(label) == 5],[test_image[key] for (key, label) in enumerate(test_label) if int(label) == 5]
train_label_0, test_label_0 = zero__train = [train_label[key] for (key, label) in enumerate(train_label) if int(label) == 5],[test_label[key] for (key, label) in enumerate(test_label) if int(label) == 5]
train_dataset = tuple(zip(train_0, train_label_0))
test_dataset = tuple(zip(test_0, test_label_0))
self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
self.test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
enter image description here

Batchsize in input shape of chainer CNN

I have a training set of 9957 images. The training set has shape (9957, 3, 60, 80).
Is batchsize required when putting training set to model?
If required can the original shape be considered correct for fitting to conv2D layer or do I need to add batchsize to input_shape?
X_train.shape
(9957, 60,80,3)
from chainer.datasets import split_dataset_random
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
def __init__(self, X, labels):
super(MyDataset, self).__init__()
self.X_ = X
self.labels_ = labels
self.size_ = X.shape[0]
def __len__(self):
return self.size_
def get_example(self, i):
return np.transpose(self.X_[i, ...], (2, 0, 1)), self.labels_[i]
batch_size = 3
label_train = y_trainHot1
dataset = MyDataset(X_train1, label_train)
dataset_train, valid = split_dataset_random(dataset, 8000, seed=0)
train_iter = iterators.SerialIterator(dataset_train, batch_size)
valid_iter = iterators.SerialIterator(valid, batch_size, repeat=False,
shuffle=False)
The code below tells you that you do not have to care the batch-size by yourself. You just use DatsetMixin and SerialIterator as is instructed in the tutorial of chainer.
from chainer.dataset import DatasetMixin
from chainer.iterators import SerialIterator
import numpy as np
NUM_IMAGES = 9957
NUM_CHANNELS = 3 # RGB
IMAGE_WIDTH = 60
IMAGE_HEIGHT = 80
NUM_CLASSES = 10
BATCH_SIZE = 32
TRAIN_SIZE = min(8000, int(NUM_IMAGES * 0.9))
images = np.random.rand(NUM_IMAGES, NUM_CHANNELS, IMAGE_WIDTH, IMAGE_HEIGHT)
labels = np.random.randint(0, NUM_CLASSES, (NUM_IMAGES,))
class MyDataset(DatasetMixin):
def __init__(self, images_, labels_):
# note: input arg.'s tailing underscore is just to avoid shadowing
super(MyDataset, self).__init__()
self.images_ = images_
self.labels_ = labels_
self.size_ = len(labels_)
def __len__(self):
return self.size_
def get_example(self, i):
return self.images_[i, ...], self.labels_[i]
dataset_train = MyDataset(images[:TRAIN_SIZE, ...], labels[:TRAIN_SIZE])
dataset_valid = MyDataset(images[TRAIN_SIZE:, ...], labels[TRAIN_SIZE:])
train_iter = SerialIterator(dataset_train, BATCH_SIZE)
valid_iter = SerialIterator(dataset_valid, BATCH_SIZE, repeat=False, shuffle=False)
###############################################################################
"""This block is just for the confirmation.
.. note: NOT recommended to call :func:`concat_examples` in your code.
Use :class:`chainer.updaters.StandardUpdater` instead.
"""
from chainer.dataset import concat_examples
batch_image, batch_label = concat_examples(next(train_iter))
print("batch_image.shape\n{}".format(batch_image.shape))
print("batch_label.shape\n{}".format(batch_label.shape))
Output
batch_image.shape
(32, 3, 60, 80)
batch_label.shape
(32,)
It should be noted that chainer.dataset.concat_example is a little bit tricky part. Usually, the users do not pay attention to this function, if you use StandardUpdater which conceals the native function chainer.dataset.concat_example.
Since chainer is designed on the scheme of Trainer, (Standard)Updater, some Optimizer, (Serial)Iterator and Dataset(Mixin), if you do not follow this scheme, you have to dive into the sea of chainer source code.

Resources