Related
I am using PyTorch version: 1.9.0+cu102 with Convolutional Autoencoder for CIFAR-10 dataset as follows:
# Define transformations for training and test sets-
transform_train = transforms.Compose(
[
# transforms.RandomCrop(32, padding = 4),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
)
transform_test = transforms.Compose(
[
transforms.ToTensor(),
# transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
)
# Load dataset-
train_dataset = torchvision.datasets.CIFAR10(
root = './data', train = True,
download = True, transform = transform_train
)
test_dataset = torchvision.datasets.CIFAR10(
root = './data', train = False,
download = True, transform = transform_test
)
print(f"len(train_dataset) = {len(train_dataset)} & len(test_dataset) = {len(test_dataset)}")
# len(train_dataset) = 50000 & len(test_dataset) = 10000
batch_size = 64
# Create training and testing loaders-
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size = batch_size,
shuffle = True
)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size = batch_size,
shuffle = False
)
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")
# len(train_loader) = 782 & len(test_loader) = 157
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size
# (781.25, 156.25)
# Get some random training images-
images, labels = next(iter(train_loader))
print(f"images.shape: {images.shape} & labels.shape: {labels.shape}")
# images.shape: torch.Size([64, 3, 32, 32]) & labels.shape: torch.Size([64])
LEARNING_RATE = 0.001
num_epochs = 20
class Reshape(nn.Module):
def __init__(self, *args):
super().__init__()
self.shape = args
def forward(self, x):
return x.view(self.shape)
class Trim(nn.Module):
def __init__(self, *args):
super().__init__()
def forward(self, x):
return x[:, :, :32, :32]
encoder = nn.Sequential(
nn.Conv2d(
in_channels = 3, out_channels = 32,
kernel_size = 3, padding = 1,
stride = 1, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Conv2d(
in_channels = 32, out_channels = 64,
kernel_size = 3, padding = 1,
stride = 2, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Conv2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, padding = 1,
stride = 2, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Conv2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, padding = 1,
stride = 1, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Flatten(),
nn.Linear(
in_features = 4096, out_features = 1500,
bias = True
),
nn.Linear(
in_features = 1500, out_features = 500,
bias = True
),
nn.Linear(
in_features = 500, out_features = 100,
bias = True
)
)
# Sanity check-
x = torch.rand(size = (32, 3, 32, 32))
print(f"x.shape = {x.shape}")
encoder_op = encoder(x)
print(f"encoder_op.shape = {encoder_op.shape}")
# x.shape = torch.Size([32, 3, 32, 32])
# encoder_op.shape = torch.Size([32, 100])
decoder = nn.Sequential(
nn.Linear(
in_features = 100, out_features = 500,
bias = True),
nn.Linear(
in_features = 500, out_features = 1500,
bias = True),
nn.Linear(
in_features = 1500, out_features = 4096,
bias = True),
Reshape(-1, 64, 8, 8),
nn.ConvTranspose2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True),
# output: torch.Size([32, 64, 8, 8])
nn.ConvTranspose2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, stride = 2,
padding = 1, bias = True),
# output: torch.Size([32, 64, 15, 15])
nn.ConvTranspose2d(
in_channels = 64, out_channels = 32,
kernel_size = 3, stride = 2,
padding = 0, bias = True),
# torch.Size([32, 32, 31, 31])
nn.ConvTranspose2d(
in_channels = 32, out_channels = 3,
kernel_size = 3, stride = 1,
padding = 0, bias = True),
# output: torch.Size([32, 3, 33, 33])
Trim(),
# (3, 33, 33) -> (3, 32, 32)
nn.Sigmoid()
)
# Sanity check-
decoder(encoder_op).shape
# torch.Size([32, 3, 32, 32])
class AutoEncoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# Initialize an autoencoder instance-
model = AutoEncoder()
# Move model to (GPU) device-
model.to(device)
# Specify optimizer and loss function-
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fn = F.mse_loss
num_epochs = 15
# Python3 lists to hold training metrics-
trainining_loss = []
validation_loss = []
def compute_epoch_loss_autoencoder(model, data_loader, loss_fn, device):
model.eval()
curr_loss, num_examples = 0., 0
with torch.no_grad():
for features, _ in data_loader:
features = features.to(device)
logits = model(features)
loss = loss_fn(logits, features, reduction='sum')
num_examples += features.size(0)
curr_loss += loss
curr_loss = curr_loss / num_examples
return curr_loss
start_time = time.time()
for epoch in range(num_epochs):
running_loss = 0.0
model.train()
for batch_idx, (features, _) in enumerate(train_loader):
features = features.to(device)
# forward and back prop-
logits = model(features) # make predictions using model
loss = loss_fn(logits, features)
optimizer.zero_grad()
# Perform backprop-
loss.backward()
# Update model parameters-
optimizer.step()
# Compute model's performance-
running_loss += loss.item() * features.size(0)
# Compute loss using training dataset-
epoch_loss = running_loss / len(train_dataset)
trainining_loss.append(epoch_loss)
# Compute loss using validation dataset-
val_loss = compute_epoch_loss_autoencoder(
model, test_loader,
loss_fn, device
)
validation_loss.append(val_loss)
print(f"Epoch = {epoch + 1}: Autoencoder train loss = {epoch_loss:.4f} & val loss = {val_loss:.4f}")
end_time = time.time()
# Get some validation images-
for img, label in test_loader:
break
img.shape, label.shape
# (torch.Size([64, 3, 32, 32]), torch.Size([64]))
img.to(device)
# Pass batch size = 64 images through encoder to get latent space representations-
model.encoder(img)
This line gives me the error:
RuntimeError Traceback (most recent call
last)
in ()
----> 1 model.encoder(img)
4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in
_conv_forward(self, input, weight, bias)
438 _pair(0), self.dilation, self.groups)
439 return F.conv2d(input, weight, bias, self.stride,
--> 440 self.padding, self.dilation, self.groups)
441
442 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Input type (torch.FloatTensor) and weight type
(torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor
What's going wrong?
Thanks!
I know my images have only 1 channel so the first conv layer is (1,16,3,1) , but I have no idea why I got such an error.
Here is my code (I post only the related part).
org_x = train_csv.drop(['id', 'digit', 'letter'], axis=1).values
org_x = org_x.reshape(-1, 28, 28, 1)
org_x = org_x/255
org_x = np.array(org_x)
org_x = org_x.reshape(-1, 1, 28, 28)
org_x = torch.Tensor(org_x).float()
x_test = test_csv.drop(['id','letter'], axis=1).values
x_test = x_test.reshape(-1, 28, 28, 1)
x_test = x_test/255
x_test = np.array(x_test)
x_test = x_test.reshape(-1, 1, 28, 28)
x_test = torch.Tensor(x_test).float()
y = train_csv['digit']
y = list(y)
print(len(y))
org_y = np.zeros([len(y), 1])
for i in range(len(y)):
org_y[i] = y[i]
org_y = np.array(org_y)
org_y = torch.Tensor(org_y).float()
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(
org_x, org_y, test_size=0.2, random_state=42)
I checked the x_train shape is [1638, 1, 28, 28] and the x_valid shape is [410, 1, 28, 28].
transform = transforms.Compose([transforms.ToPILImage(),
transforms.ToTensor(),
transforms.Normalize((0.5, ), (0.5, )) ])
class kmnistDataset(data.Dataset):
def __init__(self, images, labels, transforms=None):
self.x = images
self.y = labels
self.transforms = transforms
def __len__(self):
return (len(self.x))
def __getitem__(self, idx):
data = np.asarray(self.x[idx][0:]).astype(np.uint8)
if self.transforms:
data = self.transforms(data)
if self.y is not None:
return (data, self.y[idx])
else:
return data
train_data = kmnistDataset(x_train, y_train, transforms=transform)
valid_data = kmnistDataset(x_valid, y_valid, transforms=transform)
# dataloaders
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=16, shuffle = False)
And here is my model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
self.bn1 = nn.BatchNorm2d(16)
self.pool = nn.MaxPool2d(2, 2)
unit = 64 * 14 * 14
self.fc1 = nn.Linear(unit, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = x.view(-1, 128 * 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
model = Net()
print(model)
Lastly,
n_epochs = 30
valid_loss_min = np.Inf
for epoch in range(1, n_epochs+1):
train_loss = 0
valid_loss = 0
###################
# train the model #
###################
model.train()
for data in train_loader:
inputs, labels = data[0], data[1]
optimizer.zero_grad()
output = model(inputs)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
#####################
# validate the model#
#####################
model.eval()
for data in valid_loader:
inputs, labels = data[0], data[1]
output = model(inputs)
loss = criterion(output, labels)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/ len(train_loader.dataset)
valid_loss = valid_loss / len(valid_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch, train_loss, valid_loss))
When I run it, I got this error message
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3], expected input[16, 3, 1, 28] to have 1 channels, but got 3 channels instead
To be specific,
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-14-b8783819421f> in <module>
14 inputs, labels = data[0], data[1]
15 optimizer.zero_grad()
---> 16 output = model(inputs)
17 loss = criterion(output, labels)
18 loss.backward()
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-12-500e34c49306> in forward(self, x)
26
27 def forward(self, x):
---> 28 x = self.pool(F.relu(self.bn1(self.conv1(x))))
29 x = F.relu(self.conv2(x))
30 x = F.relu(self.conv3(x))
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
421
422 def forward(self, input: Tensor) -> Tensor:
--> 423 return self._conv_forward(input, self.weight)
424
425 class Conv3d(_ConvNd):
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
418 _pair(0), self.dilation, self.groups)
419 return F.conv2d(input, weight, self.bias, self.stride,
--> 420 self.padding, self.dilation, self.groups)
421
422 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3], expected input[16, 3, 1, 28] to have 1 channels, but got 3 channels instead
I tried a small demo with your code. and it works fine until your code had x = x.view(-1, 64*14*14) and input shape of torch.Size([1, 1, 28 ,28])
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
self.bn1 = nn.BatchNorm2d(16)
self.pool = nn.MaxPool2d(2, 2)
unit = 64 * 14 * 14
self.fc1 = nn.Linear(unit, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
#print(x.shape)
x = x.view(-1, 64*14*14)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
model = Net()
print(model)
data = torch.rand((1,1,28,28))
pred = model(data)
And if i give my data tensor as data = torch.rand((1,3,28,28)) i get your error i.e RuntimeError: Given groups=1, weight of size [16, 1, 3, 3], expected input[16, 3, 1, 28] to have 1 channels, but got 3 channels instead
So please check your channel dim of your data just before passing it to your model i.e here (highlighted by ** **)
for data in train_loader:
inputs, labels = data[0], data[1]
optimizer.zero_grad()
**print(inputs.shape)**
output = model(inputs)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
I think the problem is with the BatchNorm() layer ==> self.bn1 = nn.BatchNorm2d(16).
the parameter in this layer should be the number of channels of the input. So if you look at your last conv layer conv3, It produces a feature map of 64 channels, thus when you're feeding this feature map to your BatchNorm(), It should be 64 as well. So you can simply do the following:
self.bn1 = nn.BatchNorm2d(64)
I am using PyTorch 1.7 and Python 3.8 with CIFAR-10 dataset. I am trying to create a block with: conv -> conv -> pool -> fc. Fully connected layer (fc) has 256 neurons. The code for this is as follows:
# Testing-
conv1 = nn.Conv2d(
in_channels = 3, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True
)
conv2 = nn.Conv2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True
)
pool = nn.MaxPool2d(
kernel_size = 2, stride = 2
)
fc1 = nn.Linear(
in_features = 64 * 16 * 16, out_features = 256
bias = True
)
images.shape
# torch.Size([32, 3, 32, 32])
x = conv1(images)
x.shape
# torch.Size([32, 64, 32, 32])
x = conv2(x)
x.shape
# torch.Size([32, 64, 32, 32])
x = pool(x)
x.shape
# torch.Size([32, 64, 16, 16])
# This line of code gives error-
x = fc1(x)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32768x16 and
16384x256)
What is going wrong?
You are nearly there! As you will have noticed nn.MaxPool returns a shape (32, 64, 16, 16) which is incompatible with a nn.Linear's input: a 2D dimensional tensor (batch, in_features). You need to broadcast to (batch, 64*16*16).
I would recommend using a nn.Flatten layer rather than broadcasting yourself. It will act as x.view(x.size(0), -1) but is clearer. By default it preserves the first dimension:
conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
pool = nn.MaxPool2d(kernel_size=2, stride=2)
flatten = nn.Flatten()
fc1 = nn.Linear(in_features=64*16*16, out_features=256)
x = conv1(images)
x = conv2(x)
x = pool(x)
x = flatten(x)
x = fc1(x)
Alternatively, you could use the functional alternative torch.flatten, where you will have to provide the start_dim as 1: x = torch.flatten(x, start_dim=1).
When you're done debugging, you could assemble your layers with nn.Sequential:
model = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(in_features=64*16*16, out_features=256)
)
x = model(images)
you need to flat the output of nn.MaxPool2d layer for giving input in nn.Linear layer.
try to use x = x.view(x.size(0), -1) before giving input to fc layer for flatten tensor.
I have my training dataset as below, where X_train is 3D with 3 channels
Shape of X_Train: (708, 256, 3)
Shape of Y_Train: (708, 4)
Then I convert them into a tensor and input into the dataloader:
X_train=torch.from_numpy(X_data)
y_train=torch.from_numpy(y_data)
training_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(training_dataset, batch_size=50, shuffle=False)
However when training the model, I get the following error:
RuntimeError: Given groups=1, weight of size 24 3 5, expected input[708, 256, 3] to have 3 channels, but got 256 channels instead
I suppose this is due to the position of the channel? In Tensorflow, the channel position is at the end, but in PyTorch the format is "Batch Size x Channel x Height x Width"? So how do I swap the positions in the x_train tensor to match the expected format in the dataloader?
class TwoLayerNet(torch.nn.Module):
def __init__(self):
super(TwoLayerNet,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(3, 3*8, kernel_size=5, stride=1),
nn.Sigmoid(),
nn.AvgPool1d(kernel_size=2, stride=0))
self.conv2 = nn.Sequential(
nn.Conv1d(3*8, 12, kernel_size=5, stride=1),
nn.Sigmoid(),
nn.AvgPool1d(kernel_size=2, stride = 0))
#self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(708, 732)
self.fc2 = nn.Linear(732, 4)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
Use permute.
X_train = torch.rand(708, 256, 3)
X_train = X_train.permute(2, 0, 1)
X_train.shape
# => torch.Size([3, 708, 256])
I want to try some toy examples in pytorch, but the training loss does not decrease in the training.
Some info is provided here:
The model is vgg16, consisted of 13 conv layers and 3 dense layers.
The data is cifar100 in pytorch.
I choose cross entropy as the loss function.
The code is as follows
# encoding: utf-8
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision
import numpy as np
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv1_1(x))
x = F.relu(self.conv1_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv2_1(x))
x = F.relu(self.conv2_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv3_1(x))
x = F.relu(self.conv3_2(x))
x = F.relu(self.conv3_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv4_1(x))
x = F.relu(self.conv4_2(x))
x = F.relu(self.conv4_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv5_1(x))
x = F.relu(self.conv5_2(x))
x = F.relu(self.conv5_3(x))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
if __name__ == '__main__':
BATCH_SIZE = 128
LOG_INTERVAL = 5
# data
transform = transforms.Compose([
transforms.ToTensor()
])
trainset = torchvision.datasets.CIFAR100(
root='./data',
train=True,
download=True,
transform=transform
)
testset = torchvision.datasets.CIFAR100(
root='./data',
train=False,
download=True,
transform=transform
)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
# model
vgg16 = VGG16(100)
vgg16.cuda()
# optimizer
optimizer = optim.SGD(vgg16.parameters(), lr=0.01)
# loss
criterion = nn.CrossEntropyLoss()
print('———— Train Start —————')
for epoch in range(20):
running_loss = 0.
for step, (batch_x, batch_y) in enumerate(trainloader):
batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
#
optimizer.zero_grad()
output = vgg16(batch_x)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
running_loss += loss.item()
if step % LOG_INTERVAL == 0:
print('[%d, %4d] loss: %.4f' % (epoch, step, running_loss / LOG_INTERVAL))
running_loss = 0.
def test():
print('———— Test Start ————')
correct = 0
total = 0
#
with torch.no_grad():
for test_x, test_y in testloader:
images, labels = test_x.cuda(), test_y.cuda()
output = vgg16(images)
_, predicted = torch.max(output.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print('Accuracy of the network is: %.4f %%' % accuracy)
print('———— Test Finish ————')
test()
print('———— Train Finish —————')
The loss stays around 4.6060 and never decrease. I have tried different learning rate but does not work.
I have noticed that you are not using Batch normalization in between your convolution layers. I have added batch normalization layers and it seems to work. Following is the modified code:
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv11_bn = nn.BatchNorm2d(64)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv12_bn = nn.BatchNorm2d(64)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv21_bn = nn.BatchNorm2d(128)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv22_bn = nn.BatchNorm2d(128)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv31_bn = nn.BatchNorm2d(256)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv32_bn = nn.BatchNorm2d(256)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv33_bn = nn.BatchNorm2d(256)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv41_bn = nn.BatchNorm2d(512)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv42_bn = nn.BatchNorm2d(512)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv43_bn = nn.BatchNorm2d(512)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv51_bn = nn.BatchNorm2d(512)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv52_bn = nn.BatchNorm2d(512)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv53_bn = nn.BatchNorm2d(512)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv11_bn(self.conv1_1(x)))
x = F.relu(self.conv12_bn(self.conv1_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv22_bn(self.conv2_1(x)))
x = F.relu(self.conv21_bn(self.conv2_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv31_bn(self.conv3_1(x)))
x = F.relu(self.conv32_bn(self.conv3_2(x)))
x = F.relu(self.conv33_bn(self.conv3_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv41_bn(self.conv4_1(x)))
x = F.relu(self.conv42_bn(self.conv4_2(x)))
x = F.relu(self.conv43_bn(self.conv4_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv51_bn(self.conv5_1(x)))
x = F.relu(self.conv52_bn(self.conv5_2(x)))
x = F.relu(self.conv53_bn(self.conv5_3(x)))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
However, a more elegant version of the same could be found here