KL Divergence goes NaN on Bayesian Convolutional Neural Network - pytorch

I'm trying to implement a Bayesian Convolutional Neural Network using Pytorch on Python 3.7. I mainly orient myself on Shridhar's implementation. When running my CNN with normalized and MNIST data, the KL Divergence is NaN after a couple of iterations. I already implemented linear layers the same way and they worked perfectly fine.
I normalized the data as follows:
train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=BATCH_SIZE, shuffle=True, **LOADER_KWARGS)
eval_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=EVAL_BATCH_SIZE, shuffle=False, **LOADER_KWARGS)
My implementation of the Conv-Layer looks as follows:
class BayesianConv2d(nn.Module):
def __init__(self, in_channels, out_channels, prior_sigma, kernel_size, stride=1, padding=0, dilation=1, groups=1):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.normal = torch.distributions.Normal(0,1)
# conv-parameters
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
# Weight parameters
self.weight_mu = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size).uniform_(0, 0.1))
self.weight_rho = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size).uniform_(-3,0.1))
self.weight_sigma = 0
self.weight = 0
# Bias parameters
self.bias_mu = nn.Parameter(torch.Tensor(out_channels).uniform_(0, 0.1))
self.bias_rho = nn.Parameter(torch.Tensor(out_channels).uniform_(-3,0.1))
self.bias_sigma = 0
self.bias = 0
# prior
self.prior_sigma = prior_sigma
def forward(self, input, sample=False, calculate_log_probs=False):
# compute sigma out of rho: sigma = log(1+e^rho)
self.weight_sigma = torch.log1p(torch.exp(self.weight_rho))
self.bias_sigma = torch.log1p(torch.exp(self.bias_rho))
# sampling process -> use local reparameterization trick
activations_mu = F.conv2d(input.to(DEVICE), self.weight_mu, self.bias_mu, self.stride, self.padding, self.dilation, self.groups)
activations_sigma = torch.sqrt(1e-16 + F.conv2d((input**2).to(DEVICE), self.weight_sigma**2, self.bias_sigma**2, self.stride, self.padding, self.dilation, self.groups))
activation_epsilon = Variable(self.weight_mu.data.new(activations_sigma.size()).normal_(mean=0, std=1))
outputs = activations_mu + activations_sigma * activation_epsilon
if self.training or calculate_log_probs:
self.kl_div = 0.5 * ((2 * torch.log(self.prior_sigma / self.weight_sigma) - 1 + (self.weight_sigma / self.prior_sigma).pow(2) + ((0 - self.weight_mu) / self.prior_sigma).pow(2)).sum() \
+ (2 * torch.log(0.1 / self.bias_sigma) - 1 + (self.bias_sigma / 0.1).pow(2) + ((0 - self.bias_mu) / 0.1).pow(2)).sum())
return outputs
The implementation of the corresponding Conv-Net looks as follows:
class BayesianConvNetwork(nn.Module):
# Set up network by definining layers
def __init__(self):
super().__init__()
self.conv1 = layers.BayesianConv2d(1, 24, prior_sigma=0.1, kernel_size = (5,5), padding=2)
self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
self.conv2 = layers.BayesianConv2d(24, 48, prior_sigma=0.1, kernel_size = (5,5), padding=2)
self.pool2 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
self.conv3 = layers.BayesianConv2d(48, 64, prior_sigma=0.1, kernel_size = (5,5), padding=2)
self.pool3 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
self.fcl1 = layers.BayesianLinearWithLocalReparamTrick(4*4*64, 256, prior_sigma=0.1)
self.fcl2 = layers.BayesianLinearWithLocalReparamTrick(256, 10, prior_sigma=0.1)
# define forward function by assigning corresponding activation functions to layers
def forward(self, x, sample=False):
x = F.relu(self.conv1(x, sample))
x = self.pool1(x)
x = F.relu(self.conv2(x, sample))
x = self.pool2(x)
x = F.relu(self.conv3(x, sample))
x = self.pool3(x)
x = x.view(-1, 4*4*64)
x = F.relu(self.fcl1(x, sample))
x = F.log_softmax(self.fcl2(x, sample), dim=1)
return x
# summing up KL-divergences to obtain overall KL-divergence-value
def total_kl_div(self):
return (self.conv1.kl_div + self.conv2.kl_div + self.conv3.kl_div + self.fcl1.kl_div + self.fcl2.kl_div)
# sampling prediction: perform prediction for each of the "different networks" that result from the weight distributions
def sample_elbo(self, input, target, batch_idx, nmbr_batches, samples=SAMPLES):
outputs = torch.zeros(samples, target.shape[0], CLASSES).to(DEVICE)
kl_divs = torch.zeros(samples).to(DEVICE)
for i in range(samples): # sample through networks
outputs[i] = self(input, sample=True) # perform prediction
kl_divs[i] = self.total_kl_div() # calculate total kl_div of the network
kl_div = kl_divs.mean() # compute mean kl_div from all samples
negative_log_likelihood = F.nll_loss(outputs.mean(0), target, size_average=False)
loss = kl_weighting * kl_div + negative_log_likelihood
return loss
Has anyone faced the same issue or knows how to solve it?
Many thanks in advance!

I figured out that it appears to be an issue with the SGD-optimizer. Using Adam as optimizer solved the problem though I don't know the reason for that. If anyone has an answer on why it works with Adam but not with SGD, feel free to comment.

Related

How to increase PyTorch's AI's accuracy in image classifier?

I am trying to build a powerful image classifier.
But I have an issue. I use CIFRAS-100 dataset, and I trained a model from it.
Issue here that the correct classificatons are equal to 15%.
I tried continuing learn process, but after 2-3 attempts, model has not changed.
Code that I used for training:
import torch
import sys,os
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
batch_size = 4
trainset = torchvision.datasets.CIFAR100(root='./dataone', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR100(root='./dataone', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=2)
classes = ('aquatic mammals','fish','flowers','food containers','fruit and vegetables','household electrical devices','household furniture','insects','large carnivores','large man-made outdoor things','large natural outdoor scenes','large omnivores and herbivores','medium-sized mammals','non-insect invertebrates','people','reptiles','small mammals','trees','vehicles 1','vehicles 2')
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 100)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
import torch.optim as optim
PATH = "./model.pt"
model = Net()
net = Net()
print(os.path.exists(PATH))
if os.path.exists(PATH):
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
print("using checkpoint")
#model.eval()
# - or -
model.train()
#criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
print("training..")
# print statistics
#running_loss += loss.item()
#if i % 2000 == 1999: # print every 2000 mini-batches
# print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
# running_loss = 0.0
print('Finished Training')
#PATH = './cifar_net.pth'
#torch.save(net.state_dict(), PATH)
EPOCH = 5
LOSS = 0.4
torch.save({
'epoch': EPOCH,
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': LOSS,
}, PATH)```
It's based on PyTorch tutorial about image cassifiers, that can be found [here](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html).
I took code for resuming training from [here.](https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html)
Code that I used for testing model:
import torch
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
batch_size = 4
trainset = torchvision.datasets.CIFAR100(root='./dataone', train=False,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR100(root='./dataone', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=2)
classes = ('aquatic mammals','fish','flowers','food containers','fruit and vegetables','household electrical devices','household furniture','insects','large carnivores','large man-made outdoor things','large natural outdoor scenes','large omnivores and herbivores','medium-sized mammals','non-insect invertebrates','people','reptiles','small mammals','trees','vehicles 1','vehicles 2')
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 100)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
PATH = './cifar_net.pth'
net.load_state_dict(torch.load(PATH))
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in testloader:
images, labels = data
# calculate outputs by running images through the network
outputs = net(images)
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(correct)
print(total)
print(f'Accuracy of the network on the 100000 test images: {100 * correct // total} %')```
It's from the same image classifier tutorial by PyTorch. I added printing total and correct detected images for testing.
How can I increase accuracy, so it will be at least around 50-70%?
Or is this normal, and it means that these 15% are incorrect?
Please help.
Have you tried increasing the number of epochs? Training usually requires hundreds to thousands of iterations to obtain good results.
You could also improve the architecture by continuing the convolutional layers until you are left with a 1×1×N image where N is the number of filters in the final convolution. Then flatten and add linear layer(s). Batch Normalization and LeakyReLU activation before pooling layers may also help. Finally, you should use Softmax activation on the output since you are dealing with a classifier.
I highly recommend looking into popular classifiers such as VGG and ResNet. ResNet in particular has a feature called "residual/skip connections" that passes a copy of the output of a layer forward down the line to compensate for feature loss.
Could you provide accuracies and loss plots so we can understand better what is happening in the training (or maybe the list of accuracies and losses during training).
Also, it is a good practice to compute the validation accuracy and loss after every epoch to monitor the behaviour of the network on unseen data.
Although, as it has been said by Xynias, there are some improvements you could do on your architecture I believe the first step would be to investigate from the accuracies and losses.
Given CIFAR100 having 100 classes, this is expectable. You'll need a resonably complex network to perform well on this task. Definitely more feature maps, starting with 64 or more channels.
This Q&D architecture surpasses 50% overall accuracy after 10 epochs or so (using learning rate of 0.1 and batch size of 256, I also added RandomHorizontalFlip() transform):
class Net(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Conv2d(3, 128, 3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(128, 128, 3, stride=1, padding=1),
nn.ReLU(),
nn.AvgPool2d(2, 2),
nn.Conv2d(128, 256, 3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(256, 256, 3, stride=1, padding=1),
nn.ReLU(),
nn.AvgPool2d(2, 2),
nn.Flatten(),
nn.Dropout(0.5),
nn.Linear(16384, 100),
)
def forward(self, x):
return self.layers(x)
For a better result you may try implementing something ResNet-like, or utilize a premade (and possibly pretrained) model, for example, using timm:
import timm
net = timm.create_model('resnet18d', pretrained=True, num_classes=100)
It achieves your target metrics pretty fast with the same parameters as above.

RuntimeError: shape '[64, 3, 32, 32]' is invalid for input of size 49152

I'm trying to train a CNN model with Cifar10 dataset and I get this error:
8 optimizer.zero_grad()
9 input, target = batch
---> 10 input = input.view(batch_size, n_channel, 32, 32)
11 output = model(input)
12 loss = loss_fn(output, target)
RuntimeError: shape '[64, 3, 32, 32]' is invalid for input of size 49152
Can someone help me? I cant solve this error.
49512=64x3x16x16 but ı dont understand where does this number come from? (I'm beginner with pytorch)
My code:
...
class CNNModel(nn.Module):
def __init__(self) -> None:
super().__init__()
self.cnn1 = nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1)
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(2)
self.cnn2 = nn.Conv2d(6, 16, kernel_size=3, stride=1, padding=1)
self.relu2 = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(2)
self.fc1 = nn.Linear(8*8*16, 256)
self.fc2 = nn.Linear(256, 10)
def forward(self, x):
x = self.cnn1(x)
x = self.relu1(x)
x = self.maxpool1(x)
x = self.cnn2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
out = self.fc2(x)
return out
model = CNNModel()
model
def train_model(model, train_loader, test_loader, loss_fn, optimizer, epochs = epochs):
for epoch in range(1, epochs + 1):
training_loss = .0
validation_loss = .0
model.train()
for batch in train_loader:
optimizer.zero_grad()
input, target = batch
input = input.view(batch_size, n_channel, 32, 32)
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
training_loss += loss.data
model.eval()
num_correct = 0
num_examples = len(test_loader.dataset)
for batch in test_loader:
input, target = batch
input = input.view(batch_size, n_channel, 32, 32)
output = model(input)
loss = loss_fn(output, target)
validation_loss += loss.data
predicted = torch.max(output.data, 1)[1]
num_correct += (predicted == target).sum()
accuracy = 100 * num_correct / num_examples
print("Epoch: {}".format(epoch), "\n",
"Training loss: {:.2f}".format(training_loss), "\n",
"Accuracy: {:.2f}".format(accuracy), "\n",
"Validation loss: {:.2f}".format(validation_loss)
)
I need to write some more to get Stackoverflow to accept my question. ignore this sentence :)
Think view() as a rearrangement. For example, let's say we have input like
input = torch.randn(1,3,32,32)
with declaring the input we say input should have 1 batch size, 3 channels, and 32x32 width and height.
With view, we can rearrange these dimensions like
input=input.view(1,3*2*2,16,16)
So what we've seen from here is that without changing the total number (in our case this is equal to 133232 = 13221616) you can change your shape.
For the solution,
print(input.shape)
and then look at your dimension and then change accordingly with keeping in mind the total number must not be changed.

Why am I getting the error ValueError: Expected input batch_size (4) to match target batch_size (64)?

Why am I getting the error ValueError: Expected input batch_size (4) to match target batch_size (64)?
Is it something to do with an incorrect number of channels(?) in the first linear layer? In this example I have 128 *4 *4 as the channel.
I have tried looking online and on this site for the answer but I have not been able to find it. So, I asked here.
Here is the network:
class Net(nn.Module):
"""A representation of a convolutional neural network comprised of VGG blocks."""
def __init__(self, n_channels):
super(Net, self).__init__()
# VGG block 1
self.conv1 = nn.Conv2d(n_channels, 64, (3,3))
self.act1 = nn.ReLU()
self.pool1 = nn.MaxPool2d((2,2), stride=(2,2))
# VGG block 2
self.conv2 = nn.Conv2d(64, 64, (3,3))
self.act2 = nn.ReLU()
self.pool2 = nn.MaxPool2d((2,2), stride=(2,2))
# VGG block 3
self.conv3 = nn.Conv2d(64, 128, (3,3))
self.act3 = nn.ReLU()
self.pool3 = nn.MaxPool2d((2,2), stride=(2,2))
# Fully connected layer
self.f1 = nn.Linear(128 * 4 * 4, 1000)
self.act4 = nn.ReLU()
# Output layer
self.f2 = nn.Linear(1000, 10)
self.act5 = nn.Softmax(dim=1)
def forward(self, X):
"""This function forward propagates the input."""
# VGG block 1
X = self.conv1(X)
X = self.act1(X)
X = self.pool1(X)
# VGG block 2
X = self.conv2(X)
X = self.act2(X)
X = self.pool2(X)
# VGG block 3
X = self.conv3(X)
X = self.act3(X)
X = self.pool3(X)
# Flatten
X = X.view(-1, 128 * 4 * 4)
# Fully connected layer
X = self.f1(X)
X = self.act4(X)
# Output layer
X = self.f2(X)
X = self.act5(X)
return X
Here is the training loop:
def training_loop(
n_epochs,
optimizer,
model,
loss_fn,
train_loader):
for epoch in range(1, n_epochs + 1):
loss_train = 0.0
for i, (imgs, labels) in enumerate(train_loader):
outputs = model(imgs)
loss = loss_fn(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_train += loss.item()
if epoch == 1 or epoch % 10 == 0:
print('{} Epoch {}, Training loss {}'.format(
datetime.datetime.now(),
epoch,
loss_train / len(train_loader)))
As nerveless_child said, your dimensions are off!
For the other folks who are reviewing / studying Neural Networks, more generally, you can calculate the output dimension of a single convolutional layer by
[(W−K+2P)/S]+1
where
W is the input volume - in your case you have not given us this
K is the Kernel size - in your case 2 == "filter"
P is the padding - in your case 2
S is the stride - in your case 3
Another, prettier formulation:
That's because you're getting the dimensions wrong. From the error and your comment, I take it that your input is of the shape (64, 1, 28, 28).
Now, the shape of X at X = self.pool3(X) is (64, 128, 1, 1), which you then reshaped on the next line to (4, 128 * 4 * 4).
Long story short, the output of your model is (4, 10) i.e batch_size (4), which you're comparing on this line loss = loss_fn(outputs, labels) with a tensor of batch_size (64) as the error said.
I don't know what you're trying to do but I'm guessing that you'd want to change this line self.f1 = nn.Linear(128 * 4 * 4, 1000) to this self.f1 = nn.Linear(128 * 1 * 1, 1000)

Why do I always get the same value as the result in a CNN in pytorch?

Here is my code
dataset = pd.read_csv('augmented_data.csv')
dataset = dataset.sample(frac=1)
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(3,6,5)
self.pool = nn.MaxPool2d(2,2)
self.conv2 = nn.Conv2d(6,16,5)
self.fc1 = nn.Linear(1024144, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84,1)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 1024144)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
print(x)
x = self.fc3(x)
return x
files_read = 0
preprocess = transforms.Compose([
transforms.Resize(1024),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])
# device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
device = torch.device('cpu')
# model = ConvNet().to(device)
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = optim.Adam(model.parameters(), lr=0.001)
results = []
for index, row in dataset.iterrows():
try:
image = load_img('padded_images/' + row['image_name'] +'.jpg')
except:
image = load_img('augmented_images/' + row['image_name'] +'.jpeg')
files_read += 1
input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0).to(device)
if files_read <= 80 * len(dataset) // 100:
output = model(input_batch)
optimizer.zero_grad()
y = torch.tensor([[float(row['target'])]]).to(device)
loss = criterion(output, y)
loss.backward()
optimizer.step()
else:
model.eval()
output = model(input_batch)
results.append([1.0 if output[0][0].double() > 0.5 else 0, float(row['target'])])
So i am using pytorch CNN to classify 60k images in 2 classes. When i print the output after the model has trained, whatever the image as input, the ouput is always "tensor([[0.6384]], grad_fn=)". Always the same value. So it predicts only 1 (because it's greater than 0.5). The thing is, when i print the ouput while training, the results vary (16, 1 , 0, 4 ,0.6 etc) but when i print the output (with the same model but not trained) the results don't vary that much (0.5, 0.51, 0.49 0.52, 0.55). So I think it's safe to say that it is converging to a single value. I just don't know why. what could i do differently?

Pytorch couldn't build multi scaled kernel nested model

I'm trying to create a modified MNIST model which takes input 1x28x28 MNIST tensor images, and it kind of branches into different models with different sized kernels, and accumulates at the end, so as to give a multi-scale-kerneled response in the spatial domain of the images. I'm worried about the model, since, I'm unable to construct it.
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torchvision import datasets, transforms
import torch.nn.functional as F
import timeit
import unittest
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)
# check availability of GPU and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# define a transforms for preparing the dataset
transform = transforms.Compose([
transforms.ToTensor(), # convert the image to a pytorch tensor
transforms.Normalize((0.1307,), (0.3081,)) # normalise the images with mean and std of the dataset
])
# Load the MNIST training, test datasets using `torchvision.datasets.MNIST` using the transform defined above
train_dataset = datasets.MNIST('./data',train=True,transform=transform,download=True)
test_dataset = datasets.MNIST('./data',train=False,transform=transform,download=True)
# create dataloaders for training and test datasets
# use a batch size of 32 and set shuffle=True for the training set
train_dataloader = Data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_dataloader = Data.DataLoader(dataset=test_dataset, batch_size=32, shuffle=True)
# My Net
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# define a conv layer with output channels as 16, kernel size of 3 and stride of 1
self.conv11 = nn.Conv2d(1, 16, 3, 1) # Input = 1x28x28 Output = 16x26x26
self.conv12 = nn.Conv2d(1, 16, 5, 1) # Input = 1x28x28 Output = 16x24x24
self.conv13 = nn.Conv2d(1, 16, 7, 1) # Input = 1x28x28 Output = 16x22x22
# define a conv layer with output channels as 32, kernel size of 3 and stride of 1
self.conv21 = nn.Conv2d(16, 32, 3, 1) # Input = 16x26x26 Output = 32x24x24
self.conv22 = nn.Conv2d(16, 32, 5, 1) # Input = 16x24x24 Output = 32x20x20
self.conv23 = nn.Conv2d(16, 32, 7, 1) # Input = 16x22x22 Output = 32x16x16
# define a conv layer with output channels as 64, kernel size of 3 and stride of 1
self.conv31 = nn.Conv2d(32, 64, 3, 1) # Input = 32x24x24 Output = 64x22x22
self.conv32 = nn.Conv2d(32, 64, 5, 1) # Input = 32x20x20 Output = 64x16x16
self.conv33 = nn.Conv2d(32, 64, 7, 1) # Input = 32x16x16 Output = 64x10x10
# define a max pooling layer with kernel size 2
self.maxpool = nn.MaxPool2d(2), # Output = 64x11x11
# define dropout layer with a probability of 0.25
self.dropout1 = nn.Dropout(0.25)
# define dropout layer with a probability of 0.5
self.dropout2 = nn.Dropout(0.5)
# define a linear(dense) layer with 128 output features
self.fc11 = nn.Linear(64*11*11, 128)
self.fc12 = nn.Linear(64*8*8, 128) # after maxpooling 2x2
self.fc13 = nn.Linear(64*5*5, 128)
# define a linear(dense) layer with output features corresponding to the number of classes in the dataset
self.fc21 = nn.Linear(128, 10)
self.fc22 = nn.Linear(128, 10)
self.fc23 = nn.Linear(128, 10)
self.fc33 = nn.Linear(30,10)
def forward(self, x1):
# Use the layers defined above in a sequential way (folow the same as the layer definitions above) and
# write the forward pass, after each of conv1, conv2, conv3 and fc1 use a relu activation.
x = F.relu(self.conv11(x1))
x = F.relu(self.conv21(x))
x = F.relu(self.maxpool(self.conv31(x)))
#x = torch.flatten(x, 1)
x = x.view(-1,64*11*11)
x = self.dropout1(x)
x = F.relu(self.fc11(x))
x = self.dropout2(x)
x = self.fc21(x)
y = F.relu(self.conv12(x1))
y = F.relu(self.conv22(y))
y = F.relu(self.maxpool(self.conv32(y)))
#x = torch.flatten(x, 1)
y = y.view(-1,64*8*8)
y = self.dropout1(y)
y = F.relu(self.fc12(y))
y = self.dropout2(y)
y = self.fc22(y)
z = F.relu(self.conv13(x1))
z = F.relu(self.conv23(z))
z = F.relu(self.maxpool(self.conv33(z)))
#x = torch.flatten(x, 1)
z = z.view(-1,64*5*5)
z = self.dropout1(z)
z = F.relu(self.fc13(z))
z = self.dropout2(z)
z = self.fc23(z)
out = self.fc33(torch.cat((x, y, z), 0))
output = F.log_softmax(out, dim=1)
return output
import unittest
class TestImplementations(unittest.TestCase):
# Dataloading tests
def test_dataset(self):
self.dataset_classes = ['0 - zero',
'1 - one',
'2 - two',
'3 - three',
'4 - four',
'5 - five',
'6 - six',
'7 - seven',
'8 - eight',
'9 - nine']
self.assertTrue(train_dataset.classes == self.dataset_classes)
self.assertTrue(train_dataset.train == True)
def test_dataloader(self):
self.assertTrue(train_dataloader.batch_size == 32)
self.assertTrue(test_dataloader.batch_size == 32)
def test_total_parameters(self):
model = Net().to(device)
#self.assertTrue(sum(p.numel() for p in model.parameters()) == 1015946)
suite = unittest.TestLoader().loadTestsFromModule(TestImplementations())
unittest.TextTestRunner().run(suite)
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
# send the image, target to the device
data, target = data.to(device), target.to(device)
# flush out the gradients stored in optimizer
optimizer.zero_grad()
# pass the image to the model and assign the output to variable named output
output = model(data)
# calculate the loss (use nll_loss in pytorch)
loss = F.nll_loss(output, target)
# do a backward pass
loss.backward()
# update the weights
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
# send the image, target to the device
data, target = data.to(device), target.to(device)
# pass the image to the model and assign the output to variable named output
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
model = Net().to(device)
## Define Adam Optimiser with a learning rate of 0.01
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
start = timeit.default_timer()
for epoch in range(1, 11):
train(model, device, train_dataloader, optimizer, epoch)
test(model, device, test_dataloader)
stop = timeit.default_timer()
print('Total time taken: {} seconds'.format(int(stop - start)) )
Here is my full code. I couldn't understand what could possibly go wrong...
It is giving
<ipython-input-72-194680537dcc> in forward(self, x1)
46 x = F.relu(self.conv11(x1))
47 x = F.relu(self.conv21(x))
---> 48 x = F.relu(self.maxpool(self.conv31(x)))
49 #x = torch.flatten(x, 1)
50 x = x.view(-1,64*11*11)
TypeError: 'tuple' object is not callable
Error.
P.S.: Pytorch Noob here.
You have mistakenly placed a comma at the end of the line where you define self.maxpool : self.maxpool = nn.MaxPool2d(2), # Output = 64x11x11 see?
This comma makes self.maxpool a tuple instead of a torch.nn.modules.pooling.MaxPool2d. Drop the comma at the end and this error is fixed.
I see you haven't given the stride argument in you definition of self.maxpool = nn.MaxPool2d(2). Choose one: e.g. self.maxpool = nn.MaxPool2d(2, stride = 2).

Resources