Access individual gradients - TensorFlow2 - python-3.x

For a toy LeNet-5 CNN architecture on MNIST implemented in TensorFlow-2.10 + Python-3.10, with a batch-size = 256:
class LeNet5(Model):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = Conv2D(
filters = 6, kernel_size = (5, 5),
strides = (1, 1), activation = None,
input_shape = (28, 28, 1)
)
self.pool1 = AveragePooling2D(
pool_size = (2, 2), strides = (2, 2)
)
self.conv2 = Conv2D(
filters = 16, kernel_size = (5, 5),
strides = (1, 1), activation = None
)
self.pool2 = AveragePooling2D(
pool_size = (2, 2), strides = (2, 2)
)
self.flatten = Flatten()
self.dense1 = Dense(
units = 120, activation = None
)
self.dense2 = Dense(
units = 84, activation = None
)
self.output_layer = Dense(
units = 10, activation = None
)
def call(self, x):
x = tf.nn.relu(self.conv1(x))
x = self.pool1(x)
x = tf.nn.relu(self.conv2(x))
x = self.pool2(x)
x = self.flatten(x)
x = tf.nn.relu(self.dense1(x))
x = tf.nn.relu(self.dense2(x))
x = tf.nn.softmax(self.output_layer(x))
return x
def shape_computation(self, x):
print(f"Input shape: {x.shape}")
x = self.conv1(x)
print(f"conv1 output shape: {x.shape}")
x = self.pool1(x)
print(f"pool1 output shape: {x.shape}")
x = self.conv2(x)
print(f"conv2 output shape: {x.shape}")
x = self.pool2(x)
print(f"pool2 output shape: {x.shape}")
x = self.flatten(x)
print(f"flattened shape: {x.shape}")
x = self.dense1(x)
print(f"dense1 output shape: {x.shape}")
x = self.dense2(x)
print(f"dense2 output shape: {x.shape}")
x = self.output_layer(x)
print(f"output shape: {x.shape}")
del x
return None
# Initialize an instance of LeNet-5 CNN-
model = LeNet5()
model.build(input_shape = (None, 28, 28, 1))
# Define loss and optimizer-
loss_fn = tf.keras.losses.CategoricalCrossentropy(reduction = tf.keras.losses.Reduction.NONE)
# optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0003)
optimizer = tf.keras.optimizers.SGD(
learning_rate = 10e-3, momentum = 0.0,
nesterov = False
)
with tf.GradientTape() as grad_tape:
pred = model(x)
loss = loss_fn(y, pred)
loss.shape
TensorShape([256])
This computes individual loss for each of the 256 training images in a given batch.
# Compute gradient using loss wrt parameters-
grads = grad_tape.gradient(loss, model.trainable_variables)
type(grads), len(grads)
# (list, 10)
for i in range(len(grads)):
print(f"i: {i}, grads.shape: {grads[i].shape}")
"""
i: 0, grads.shape: (5, 5, 1, 6)
i: 1, grads.shape: (6,)
i: 2, grads.shape: (5, 5, 6, 16)
i: 3, grads.shape: (16,)
i: 4, grads.shape: (256, 120)
i: 5, grads.shape: (120,)
i: 6, grads.shape: (120, 84)
i: 7, grads.shape: (84,)
i: 8, grads.shape: (84, 10)
i: 9, grads.shape: (10,)
"""
Corresponding to loss for each training example, how can I compute gradient corresponding to each training example?

Related

RuntimeError: Given groups=1, weight of size [16, 1, 3, 3, 3], expected input[250, 64, 4, 4, 4] to have 1 channels, but got 64 channels instead

This is the architecture based on a research paper.
class NBV_Net(nn.Module):
def __init__(self, dropout_prob):
super(NBV_Net, self).__init__()
#dropout_prob = 0.0 # 1 - 0.7
# Four 3D convolutional layers
self.conv1 = nn.Conv3d(1,16, 3, stride=1, padding=1)
self.pool1 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv2 = nn.Conv3d(16, 32, 3, stride=1, padding=1)
self.pool2 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv3 = nn.Conv3d(32, 64, 3, stride=1, padding=1)
self.conv3_drop = nn.Dropout(dropout_prob)
self.pool3 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv4 = nn.Conv3d(64, 64, 3, stride=1, padding=1)
self.conv4_drop = nn.Dropout(dropout_prob)
# Five fully connected layers
self.fc1 = nn.Linear(4096, 1500)
self.fc1_drop = nn.Dropout(dropout_prob)
self.fc2 = nn.Linear(1500, 500)
self.fc2_drop = nn.Dropout(dropout_prob)
self.fc3 = nn.Linear(500, 100)
self.fc3_drop = nn.Dropout(dropout_prob)
self.fc4 = nn.Linear(100, 50)
self.fc4_drop = nn.Dropout(dropout_prob)
self.fc5 = nn.Linear(50, 3)
def forward(self, x):
## feedforward behavior of NBV-net
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = self.pool3(F.relu(self.conv3(x)))
x = self(F.relu(self.conv4(x)))
# Aplanar
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc1_drop(x)
x = F.relu(self.fc2(x))
x = self.fc2_drop(x)
x = F.relu(self.fc3(x))
x = self.fc3_drop(x)
x = F.relu(self.fc4(x))
x = self.fc4_drop(x)
x = F.tanh(self.fc5(x))
return x
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3, 3], expected input[250, 64, 4, 4, 4] to have 1 channels, but got 64 channels instead
But this code gives the Runtime Error. Similar errors are there but I could not understand what Group 1 and other dimensions mentioned exactly mean , any idea about the background of this error ?
The input shape for nn.Conv3d(1,16, 3, stride=1, padding=1) is (batch, channels, depth, height, width).
You define that the channel size is 1 but your input tensor has 64 channels.
self.conv1 = nn.Conv3d(64,16, 3, stride=1, padding=1) will resolve you error

Convolutional Autoencoder CIFAR10 PyTorch - RuntimeError

I am using PyTorch version: 1.9.0+cu102 with Convolutional Autoencoder for CIFAR-10 dataset as follows:
# Define transformations for training and test sets-
transform_train = transforms.Compose(
[
# transforms.RandomCrop(32, padding = 4),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
)
transform_test = transforms.Compose(
[
transforms.ToTensor(),
# transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
]
)
# Load dataset-
train_dataset = torchvision.datasets.CIFAR10(
root = './data', train = True,
download = True, transform = transform_train
)
test_dataset = torchvision.datasets.CIFAR10(
root = './data', train = False,
download = True, transform = transform_test
)
print(f"len(train_dataset) = {len(train_dataset)} & len(test_dataset) = {len(test_dataset)}")
# len(train_dataset) = 50000 & len(test_dataset) = 10000
batch_size = 64
# Create training and testing loaders-
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size = batch_size,
shuffle = True
)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size = batch_size,
shuffle = False
)
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")
# len(train_loader) = 782 & len(test_loader) = 157
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size
# (781.25, 156.25)
# Get some random training images-
images, labels = next(iter(train_loader))
print(f"images.shape: {images.shape} & labels.shape: {labels.shape}")
# images.shape: torch.Size([64, 3, 32, 32]) & labels.shape: torch.Size([64])
LEARNING_RATE = 0.001
num_epochs = 20
class Reshape(nn.Module):
def __init__(self, *args):
super().__init__()
self.shape = args
def forward(self, x):
return x.view(self.shape)
class Trim(nn.Module):
def __init__(self, *args):
super().__init__()
def forward(self, x):
return x[:, :, :32, :32]
encoder = nn.Sequential(
nn.Conv2d(
in_channels = 3, out_channels = 32,
kernel_size = 3, padding = 1,
stride = 1, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Conv2d(
in_channels = 32, out_channels = 64,
kernel_size = 3, padding = 1,
stride = 2, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Conv2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, padding = 1,
stride = 2, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Conv2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, padding = 1,
stride = 1, bias = True
),
nn.LeakyReLU(negative_slope = 0.01),
nn.Flatten(),
nn.Linear(
in_features = 4096, out_features = 1500,
bias = True
),
nn.Linear(
in_features = 1500, out_features = 500,
bias = True
),
nn.Linear(
in_features = 500, out_features = 100,
bias = True
)
)
# Sanity check-
x = torch.rand(size = (32, 3, 32, 32))
print(f"x.shape = {x.shape}")
encoder_op = encoder(x)
print(f"encoder_op.shape = {encoder_op.shape}")
# x.shape = torch.Size([32, 3, 32, 32])
# encoder_op.shape = torch.Size([32, 100])
decoder = nn.Sequential(
nn.Linear(
in_features = 100, out_features = 500,
bias = True),
nn.Linear(
in_features = 500, out_features = 1500,
bias = True),
nn.Linear(
in_features = 1500, out_features = 4096,
bias = True),
Reshape(-1, 64, 8, 8),
nn.ConvTranspose2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True),
# output: torch.Size([32, 64, 8, 8])
nn.ConvTranspose2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, stride = 2,
padding = 1, bias = True),
# output: torch.Size([32, 64, 15, 15])
nn.ConvTranspose2d(
in_channels = 64, out_channels = 32,
kernel_size = 3, stride = 2,
padding = 0, bias = True),
# torch.Size([32, 32, 31, 31])
nn.ConvTranspose2d(
in_channels = 32, out_channels = 3,
kernel_size = 3, stride = 1,
padding = 0, bias = True),
# output: torch.Size([32, 3, 33, 33])
Trim(),
# (3, 33, 33) -> (3, 32, 32)
nn.Sigmoid()
)
# Sanity check-
decoder(encoder_op).shape
# torch.Size([32, 3, 32, 32])
class AutoEncoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# Initialize an autoencoder instance-
model = AutoEncoder()
# Move model to (GPU) device-
model.to(device)
# Specify optimizer and loss function-
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fn = F.mse_loss
num_epochs = 15
# Python3 lists to hold training metrics-
trainining_loss = []
validation_loss = []
def compute_epoch_loss_autoencoder(model, data_loader, loss_fn, device):
model.eval()
curr_loss, num_examples = 0., 0
with torch.no_grad():
for features, _ in data_loader:
features = features.to(device)
logits = model(features)
loss = loss_fn(logits, features, reduction='sum')
num_examples += features.size(0)
curr_loss += loss
curr_loss = curr_loss / num_examples
return curr_loss
start_time = time.time()
for epoch in range(num_epochs):
running_loss = 0.0
model.train()
for batch_idx, (features, _) in enumerate(train_loader):
features = features.to(device)
# forward and back prop-
logits = model(features) # make predictions using model
loss = loss_fn(logits, features)
optimizer.zero_grad()
# Perform backprop-
loss.backward()
# Update model parameters-
optimizer.step()
# Compute model's performance-
running_loss += loss.item() * features.size(0)
# Compute loss using training dataset-
epoch_loss = running_loss / len(train_dataset)
trainining_loss.append(epoch_loss)
# Compute loss using validation dataset-
val_loss = compute_epoch_loss_autoencoder(
model, test_loader,
loss_fn, device
)
validation_loss.append(val_loss)
print(f"Epoch = {epoch + 1}: Autoencoder train loss = {epoch_loss:.4f} & val loss = {val_loss:.4f}")
end_time = time.time()
# Get some validation images-
for img, label in test_loader:
break
img.shape, label.shape
# (torch.Size([64, 3, 32, 32]), torch.Size([64]))
img.to(device)
# Pass batch size = 64 images through encoder to get latent space representations-
model.encoder(img)
This line gives me the error:
RuntimeError Traceback (most recent call
last)
in ()
----> 1 model.encoder(img)
4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in
_conv_forward(self, input, weight, bias)
438 _pair(0), self.dilation, self.groups)
439 return F.conv2d(input, weight, bias, self.stride,
--> 440 self.padding, self.dilation, self.groups)
441
442 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Input type (torch.FloatTensor) and weight type
(torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor
What's going wrong?
Thanks!

expected string or bytes-like object when calling model subclass

I have attempted to write a generative adversarial network. Below is the code of one of the discriminators.
class D1(Layer):
def __init__ (self, input_shape=(256, 256, 3), name='d1', **kwargs):
super(D1, self).__init__(name=name, **kwargs)
self.h1 = Conv2D(64, (3, 3), strides=(1, 1), padding='same')
self.h2 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h3 = LeakyReLU(alpha=0.2)
self.h4 = Conv2D(128, (3, 3), strides=(1, 1), padding='same')
self.h5 = Conv2D(128, (3, 3), strides=(1, 1), padding='same')
self.h6 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h7 = LeakyReLU(alpha=0.2)
self.h8 = Conv2D(256, (3, 3), strides=(1, 1), padding='same')
self.h9 = Conv2D(256, (3, 3), strides=(1, 1), padding='same')
self.h10 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h11 = LeakyReLU(alpha=0.2)
self.h12 = Conv2D(512, (3, 3), strides=(1, 1), padding='same')
self.h13 = Conv2D(512, (3, 3), strides=(1, 1), padding='same')
self.h14 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h15 = Flatten()
self.h16 = Dropout(0.4)
self.D1R = Dense(1, activation='sigmoid')
self.h17 = Dense(4096, activation='relu')
self.h18 = Dense(4096, activation='relu')
self.D1C = Dense(16, activation='sigmoid')
def call(self, inputs):
x = self.h1(inputs)
x = self.h2(x)
x = self.h3(x)
x = self.h4(x)
x = self.h5(x)
x = self.h6(x)
x = self.h7(x)
x = self.h8(x)
x = self.h9(x)
x = self.h10(x)
x = self.h11(x)
x = self.h12(x)
x = self.h13(x)
x = self.h14(x)
x = self.h15(x)
x = self.h16(x)
d1r = self.D1R(x)
x = self.h17(x)
x = self.h18(x)
d1c = self.D1C(x)
return d1r, d1c'''
class Discriminator1(Model):
def __init__(
self,
input_shape=(None, 256, 256, 3),
name='disc1',
**kwargs
):
super(Discriminator1, self).__init__(name=name, **kwargs)
self.d1 = D1(input_shape=input_shape)
def call(self, inputs):
image = inputs
d1r, d1c = self.d1(image)
d1_loss = d1_loss(d1r, d1c)
self.add_loss(d1_loss)
return out
When I call it in training, it throws a TypeError: expected string or byte-like object. I cannot figure what it is.
Any help? None of my functions are supposed to use strings
'''def generate_latent_noise(latent_dim, n_samples):
x_input = randn(latent_dim * n_samples)
x_input = x_input.reshape(n_samples, latent_dim)
return x_input'''
'''def generate_fake_samples(g, latent_dim, n_samples, y_i, y_l):
x_input = generate_latent_noise(latent_dim, n_samples)
X = g.predict(x_input)
y = zeros((n_samples, 1))
for i in range(n_samples-1):
intent = y_i[i]
bio = y_l[i]
return X, y, intent, bio'''
'''epochs = 200
opt = SGD(learning_rate=1e-3, momentum=0.99)
metric = Accuracy()
yi, yl = retrieve_target_labels('/content/drive/My Drive/Project/input.xlsx')
g = Generator(100)
d1 = D1((256, 256, 3))
d2 = D2((256, 256, 3))
gen = G_Model((256, 256, 3), 100, yi, yl)
disc1 = Discriminator1((256, 256, 3), 100)
disc2 = Discriminator2((256, 256, 3), 100)
art, yc_real, yi_real, yl_real =load_real_samples('/content/drive/MyDrive/Project/TrainSA.xlsx')
half_batch = yi.shape[0]
n_batch = half_batch * 2
batch_per_epoch = int(art.shape[0]/n_batch)
for epoch in range(epochs):
for batch in range(batch_per_epoch):
fake, y, yi, yl = generate_fake_samples(g, 100, half_batch, yi, yl)
real, y_real, c_real, i_real, l_real = generate_real_samples(art, half_batch, yc_real, yi_real, yl_real)
fake_image = tf.convert_to_tensor(fake)
d1r, d1c = d1(fake_image) #error!
d1_loss_fake = d1.losses
d1r, d1c = d1(real)
d1_loss = d1.losses
d2i_fake, d2l_fake = d2(fake_image)
d2_loss_fake = d2.losses
d2i, d2l = d2(real)
d2_loss = d2.losses
g_loss = gen.losses '''
It is a bit difficult to provide a minimal working example as the error is at the end of my code, after a lot of functions have been called, but I tried to include the ones that might be involved in the error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-95-d4bb0da7c68f> in <module>()
23 #d1_loss_fake = d1.losses
24 real_image = tf.convert_to_tensor(real, dtype = tf.float32)
---> 25 d1r, d1c = disc1(real_image)
26 d1_loss = d1.losses
27 d2i_fake, d2l_fake = d2(fake_image)
3 frames
/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py in
name_scope(self, name)
4126 # Scopes created in the root must match the more restrictive
4127 # op name regex, which constrains the initial character.
-> 4128 if not _VALID_OP_NAME_REGEX.match(name):
4129 raise ValueError("'%s' is not a valid scope name" % name)
4130 old_stack = self._name_stack
TypeError: expected string or bytes-like object

Convolution - Deconvolution for even and odd size

I have two different size tensors to put in the network.
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = TC(C(a)), TC(C(b))
The results are
a_out = torch.size([1, 1, 99]) # What I want is [1, 1, 100]
b_out = torch.size([1, 1, 101])
Is there any method to handle this problem?
I need your help.
Thanks
It is expected behaviour as per documentation. May be padding can be used when even input length is detected to get same length as input.
Something like this
class PadEven(nn.Module):
def __init__(self, conv, deconv, pad_value=0, padding=(0, 1)):
super().__init__()
self.conv = conv
self.deconv = deconv
self.pad = nn.ConstantPad1d(padding=padding, value=pad_value)
def forward(self, x):
nd = x.size(-1)
x = self.deconv(self.conv(x))
if nd % 2 == 0:
x = self.pad(x)
return x
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
P = PadEven(C, TC)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = P(a), P(b)

The training loss of vgg16 implemented in pytorch does not decrease

I want to try some toy examples in pytorch, but the training loss does not decrease in the training.
Some info is provided here:
The model is vgg16, consisted of 13 conv layers and 3 dense layers.
The data is cifar100 in pytorch.
I choose cross entropy as the loss function.
The code is as follows
# encoding: utf-8
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision
import numpy as np
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv1_1(x))
x = F.relu(self.conv1_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv2_1(x))
x = F.relu(self.conv2_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv3_1(x))
x = F.relu(self.conv3_2(x))
x = F.relu(self.conv3_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv4_1(x))
x = F.relu(self.conv4_2(x))
x = F.relu(self.conv4_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv5_1(x))
x = F.relu(self.conv5_2(x))
x = F.relu(self.conv5_3(x))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
if __name__ == '__main__':
BATCH_SIZE = 128
LOG_INTERVAL = 5
# data
transform = transforms.Compose([
transforms.ToTensor()
])
trainset = torchvision.datasets.CIFAR100(
root='./data',
train=True,
download=True,
transform=transform
)
testset = torchvision.datasets.CIFAR100(
root='./data',
train=False,
download=True,
transform=transform
)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
# model
vgg16 = VGG16(100)
vgg16.cuda()
# optimizer
optimizer = optim.SGD(vgg16.parameters(), lr=0.01)
# loss
criterion = nn.CrossEntropyLoss()
print('———— Train Start —————')
for epoch in range(20):
running_loss = 0.
for step, (batch_x, batch_y) in enumerate(trainloader):
batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
#
optimizer.zero_grad()
output = vgg16(batch_x)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
running_loss += loss.item()
if step % LOG_INTERVAL == 0:
print('[%d, %4d] loss: %.4f' % (epoch, step, running_loss / LOG_INTERVAL))
running_loss = 0.
def test():
print('———— Test Start ————')
correct = 0
total = 0
#
with torch.no_grad():
for test_x, test_y in testloader:
images, labels = test_x.cuda(), test_y.cuda()
output = vgg16(images)
_, predicted = torch.max(output.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print('Accuracy of the network is: %.4f %%' % accuracy)
print('———— Test Finish ————')
test()
print('———— Train Finish —————')
The loss stays around 4.6060 and never decrease. I have tried different learning rate but does not work.
I have noticed that you are not using Batch normalization in between your convolution layers. I have added batch normalization layers and it seems to work. Following is the modified code:
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv11_bn = nn.BatchNorm2d(64)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv12_bn = nn.BatchNorm2d(64)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv21_bn = nn.BatchNorm2d(128)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv22_bn = nn.BatchNorm2d(128)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv31_bn = nn.BatchNorm2d(256)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv32_bn = nn.BatchNorm2d(256)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv33_bn = nn.BatchNorm2d(256)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv41_bn = nn.BatchNorm2d(512)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv42_bn = nn.BatchNorm2d(512)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv43_bn = nn.BatchNorm2d(512)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv51_bn = nn.BatchNorm2d(512)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv52_bn = nn.BatchNorm2d(512)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv53_bn = nn.BatchNorm2d(512)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv11_bn(self.conv1_1(x)))
x = F.relu(self.conv12_bn(self.conv1_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv22_bn(self.conv2_1(x)))
x = F.relu(self.conv21_bn(self.conv2_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv31_bn(self.conv3_1(x)))
x = F.relu(self.conv32_bn(self.conv3_2(x)))
x = F.relu(self.conv33_bn(self.conv3_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv41_bn(self.conv4_1(x)))
x = F.relu(self.conv42_bn(self.conv4_2(x)))
x = F.relu(self.conv43_bn(self.conv4_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv51_bn(self.conv5_1(x)))
x = F.relu(self.conv52_bn(self.conv5_2(x)))
x = F.relu(self.conv53_bn(self.conv5_3(x)))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
However, a more elegant version of the same could be found here

Resources