linear layer in pytorch not able to forward using torch-summary - audio

from torch import nn
from torchsummary import summary
class CNNNetwork(nn.Module):
def __init__(self):
super().__init__()
# 4 conv blocks / flatten / linear / softmax
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv2d(
in_channels=16,
out_channels=32,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv3 = nn.Sequential(
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.flatten = nn.Flatten()
self.linear = nn.Linear(64*8*11, 5)
self.softmax = nn.Softmax(dim=1)
def forward(self, input_data):
x = self.conv1(input_data)
x = self.conv2(x)
x = self.conv3(x)
x = self.flatten(x)
logits = self.linear(x)
predictions = self.softmax(logits)
return predictions
if __name__ == "__main__":
cnn = CNNNetwork()
cnn.to(device=dml)
summary(cnn,(1,11,8),device=dml)
in this code:
logits = self.linear(x) is having some problem to generate summary
there are no error messages but only this
enter image description here
i want to make summary of model and later train a audio classifier where mfcc size is (30,281)
And also if you just explain to me how input for linear fn is properly calculated

Related

Convolutional Autoencoder outputs black images

I need to train an autoencoder on Adaptiope dataset. I am using a ResNet18 backbone for my encoder part.
The issue I encounter is that even after many epochs, the reconstructed image is always completely black.
On the other hand, when I use a simpler Autoencoder without the resnet18 backbone, reconstructed images turn out close to what I need them to be.
I am trying to understand why is this the case. I am a novice in the field and still cannot grasp the problem. It looks like an architectural problem but I cannot wrap my head around it.
This is my "vanilla" Encoder, with no resnet18 backbone:
`
class Encoder(nn.Module):
def __init__(self,
num_input_channels : int,
base_channel_size : int,
latent_dim : int
):
"""
Inputs:
- num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3
- base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it.
- latent_dim : Dimensionality of latent representation z
- act_fn : Activation function used throughout the encoder network
"""
super().__init__()
c_hid = base_channel_size
self.layer1 = nn.Sequential(nn.Conv2d(num_input_channels, c_hid, kernel_size=3, padding=1, stride=2), # 32x32 => 16x16
nn.ReLU(),
nn.Conv2d(c_hid, c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(c_hid, 2*c_hid, kernel_size=3, padding=1, stride=2), # 16x16 => 8x8
nn.ReLU(),
nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1, stride=2), # 8x8 => 4x4
nn.ReLU(),
nn.Flatten(), # Image grid to single feature vector
nn.Linear(351232, latent_dim))
self.linear2 = nn.Linear(latent_dim, 20*8)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
enc = self.layer1(x)
lin_p = self.linear2(enc)
p = self.softmax(lin_p)
return enc, p
This is the Encoder with Resnet18 backbone:
class Encoder(nn.Module):
def __init__(self,
num_input_channels : int,
base_channel_size : int,
latent_dim : int
):
"""
Inputs:
- num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3
- base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it.
- latent_dim : Dimensionality of latent representation z
- act_fn : Activation function used throughout the encoder network
"""
super().__init__()
c_hid = base_channel_size
self.fc_hidden1, self.fc_hidden2, self.CNN_embed_dim = 224, 768, 224
# CNN architechtures
self.ch1, self.ch2, self.ch3, self.ch4 = 16, 32, 64, 128
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3) # 2d kernel size
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2) # 2d strides
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0) # 2d padding
# encoding components
model = models.resnet18(pretrained=True)
for param in model.parameters():
param.requires_grad = False
modules = list(model.children())[:-1] # delete the last fc layer.
self.resnet_modules=modules
self.resnet = nn.Sequential(*modules)
self.fc1 = nn.Linear(model.fc.in_features, self.fc_hidden1)
self.bn1 = nn.BatchNorm1d(self.fc_hidden1, momentum=0.01)
self.relu = nn.ReLU(inplace=True)
self.layer = nn.Sequential(
nn.Flatten(), # Image grid to single feature vector
nn.Linear(224, latent_dim)) #8x224
#self.flatten = nn.Flatten(), # Image grid to single feature vector
#self.linear1 = nn.Linear(351232, latent_dim)
self.linear2 = nn.Linear(latent_dim, 20*8)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
x = self.resnet(x)
x = x.reshape(x.shape[0], 512)
x = self.fc1(x)
x = self.bn1(x)
x = self.relu(x)
enc = self.layer(x)
#x = self.fc2(x)
#x = self.bn(x)
# enc = self.layer1(x)
lin_p = self.linear2(enc)
p = self.softmax(lin_p)
return enc, p
The decoder is the same for both.
class Decoder_N(nn.Module):
def __init__(self,
num_input_channels : int,
base_channel_size : int,
latent_dim : int,
act_fn : object = nn.GELU):
"""
Inputs:
- num_input_channels : Number of channels of the image to reconstruct. For CIFAR, this parameter is 3
- base_channel_size : Number of channels we use in the last convolutional layers. Early layers might use a duplicate of it.
- latent_dim : Dimensionality of latent representation z
- act_fn : Activation function used throughout the decoder network
"""
super().__init__()
c_hid = 224
self.linear = nn.Sequential(
nn.Linear(latent_dim, 351232),
nn.ReLU()
)
self.net = nn.Sequential(
nn.ConvTranspose2d(2*c_hid, 2*c_hid, kernel_size=3, output_padding=1, padding=1, stride=2), # 4x4 => 8x8
nn.ReLU(),
nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(2*c_hid, c_hid, kernel_size=3, output_padding=1, padding=1, stride=2), # 8x8 => 16x16
nn.ReLU(),
nn.Conv2d(c_hid, c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(c_hid, 3, kernel_size=3, output_padding=1, padding=1, stride=2), # 16x16 => 32x32
nn.Tanh() # The input images is scaled between -1 and 1, hence the output has to be bounded as well
)
def forward(self, x):
x = self.linear(x)
x = x.reshape(x.shape[0], -1, 28, 28)
x = self.net(x)
return x
`
num_input_channels : 224,
base_channel_size : 3
latent_dim : 64
I expected the "advanced" autoencoder to extract my features better, but apparently this is not the case.
I solved the issue: there were issues with the normalization of images and the BatchNorm layer. I accidentally used mean and std of ImageNet for the dataset instead of the correct ones. Additionally, during training I forgot to add regularizers for the different components of my loss, leading my model to learn literally nothing.

conv2d() received an invalid combination of arguments

After resnet convolution, I want to further compress the 256 dimensions to 20 dimensions. I directly wrote a layer in the back, but after forward propagation, there is an error in this layer, I don't know why?
def forward(self, x):
x = self.conv1(x)
dif_residual1 = self.downsample1(x)
x = self.layer1_1(x)
x =x + dif_residual1
residual = x
x = self.layer1_2(x)
x = x + residual
residual = x
x = self.layer1_3(x)
x = x + residual
if self.out_channel != 256:
x = self.layer2
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
x = F.conv2d(x, weight=filters, padding=0)
The dimension of x before I do if is:
x = {Tensor:(1,256,117,240)}
But after the if statement is executed, it becomes what the picture shows。
The error I get is this:
x = F.conv2d(feature, weight=filters, padding=0)
TypeError: conv2d() received an invalid combination of arguments - got (Sequential, weight=Tensor, padding=int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
Encounter a new problem:
File "D:\software\Anaconda\envs\torch1.10\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 1, 117, 240]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
My code:
class VGG(nn.Module):
def __init__(self, in_channel, out_channel=None, init_weights=True, device='gpu',batch_size=1):
super(VGG, self).__init__()
self.batch_size = batch_size
self.out_channel = out_channel
if device == 'gpu':
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
self.device = torch.device("cpu")
modes = 'reflect'
out_channel1 = 64
self.conv1_1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
self.conv1_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
out_channel2 = 128
self.conv2_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.conv2_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
out_channel3 = 256
self.conv3_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.conv3_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
if out_channel == None:
self.out_channel = 256
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
else:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU(),
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel),
nn.LeakyReLU()
)
if init_weights:
self._init_weight()
def forward(self, x):
x = self.conv1_1(x)
x = self.conv1_2(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
feature = x
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
x = F.conv2d(x, weight = filters, padding = 0)
return x,feature
out_channel = 20
model = VGG(in_channel=12, out_channel=out_channel, init_weights=True, batch_size=batch_size)
for epoch in range(start_epoch+1,epochs):
# train
model.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs,feature = model(images.to(device))
outputs = tonser_nolmal(outputs)
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
epochs,
loss)
checkpoint = {
"net": model.state_dict(),
"optimizer": optimizer.state_dict(),
"epoch": epoch
}
torch.save(checkpoint, save_path + "/model-{}.pth".format(epoch))
# validate
model.eval()
count_acc = 0.0
count_mae = 0.0
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs,_ = model(val_images.to(device))
# outputs = F.normalize(outputs,dim=3)
outputs = tonser_nolmal(outputs)
loss = loss_function(outputs, val_labels.to(device))
count_acc = count_acc + loss.item()
mae = Evaluation().MAE(outputs, val_labels.to(device))
count_mae = count_mae + mae.item()
The error is likely to be caused by the following variable assignment:
if self.out_channel != 256:
x = self.layer2
which can be easily fixed by changing it to
x = self.layer2(x)
Update:
As OP updated his code, I did some test. There were several things which I found problematic:
self._init_weight was not provided, so I commented it out;
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device). The filter weight should have a shape of (c_out, c_in, kernel_size, kernel_size). However, batch_size appeared in the position of out_channels.
The role of filter in the forward was not clear to me. If you wanted to reduce the out_channels further from 256 to 20, then initializing your model with VGG(..., out_channel=20) is sufficient. Basically, self.conv3_3 would do the job.
On my end, I modified the code a little bit and it ran successfully:
import sys
import torch
import torch.nn as nn
from tqdm import tqdm
from torchvision.datasets import FakeData
from torch.utils.data import DataLoader
import torch.nn.functional as F
dataset = [torch.randn(12, 64, 64) for _ in range(1000)]
train_loader = DataLoader(dataset, batch_size=1, shuffle=True)
class VGG(nn.Module):
def __init__(self, in_channel, out_channel=None, init_weights=True, device='cpu', batch_size=1):
super(VGG, self).__init__()
self.batch_size = batch_size
self.out_channel = out_channel
self.device = device
modes = 'reflect'
out_channel1 = 64
self.conv1_1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
self.conv1_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
out_channel2 = 128
self.conv2_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.conv2_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.out_channel3 = out_channel3 = 256
self.conv3_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.conv3_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.out_channel = out_channel
if out_channel == None:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
else:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU(),
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel),
nn.LeakyReLU()
)
# The implementation of _init_weight is not found
# if init_weights:
# self._init_weight()
def forward(self, x):
x = self.conv1_1(x)
x = self.conv1_2(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
feature = x
if x.shape[1] == 256: # self.out_channel is None
filters = torch.ones(20, self.out_channel3, 1, 1).to(self.device)
x = F.conv2d(x, weight = filters, padding = 0)
return x, feature
out_channel = 20
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = VGG(in_channel=12, out_channel=None, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)
model = VGG(in_channel=12, out_channel=20, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)
Outputs:
torch.Size([1, 20, 64, 64])
torch.Size([1, 20, 64, 64])

`*** RuntimeError: mat1 dim 1 must match mat2 dim 0` whenever I run model(images)

def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
)
How can I deal with this error? I think the error is with self.fc, but I can't say how to fix it.
The output from self.conv(x) is of shape torch.Size([32, 64, 2, 2]): 32*64*2*2= 8192 (this is equivalent to (self.conv_out_size). The input to fully connected layer expects a single dimension vector i.e. you need to flatten it before passing to a fully connected layer in the forward function.
i.e.
class Network():
...
def foward():
...
conv_out = self.conv(x)
print(conv_out.shape)
conv_out = conv_out.view(-1, 32*64*2*2)
print(conv_out.shape)
x = self.fc(conv_out)
return x
output
torch.Size([32, 64, 2, 2])
torch.Size([1, 8192])
EDIT:
I think you're using self._get_conv_out function wrong.
It should be
def _get_conv_out(self, shape):
output = self.conv(torch.zeros(1, *shape)) # not (32, *size)
return int(numpy.prod(output.size()))
then, in the forward pass, you can use
conv_out = self.conv(x)
# flatten the output of conv layers
conv_out = conv_out.view(conv_out.size(0), -1)
x = self.fc(conv_out)
For an input of (32, 1, 110, 110), the output should be torch.Size([32, 2]).
I had the same problem however I have solved it by using a batch of 32 and tensor size of [3, 32, 32] for my images and the following configurations on my model. I am using ResNet with 9 CNN and looking for 4 outputs.
transform = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet9(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 64)
self.conv2 = conv_block(64, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.conv3 = conv_block(128, 256, pool=True)
self.conv4 = conv_block(256, 512, pool=True)
self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.classifier = nn.Sequential(nn.MaxPool2d(4),
nn.Flatten(),
nn.Dropout(0.2),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.classifier(out)
return out

Pytorch: The size of tensor a (24) must match the size of tensor b (48) at non-singleton dimension 3

Below code works fine and generate proper results.
import torch
import torch.nn as nn
import torch.nn.functional as F
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
self.conv = nn.Conv2d(
3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
self.conv1 = nn.Conv2d(
32, 512, kernel_size=1, stride=1, padding=0, bias=False)
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.conv2 = nn.Conv2d(
32, 3, kernel_size=1, stride=1, padding=0, bias=False)
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
Now i have changed in self.con and add pretrained resent with layer. Now it shows tensor mismatched error after training. All things are same just add this line in code. I put ** in those line
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
#self.conv = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.conv = resConv.layer4**
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(32, 512, kernel_size=3, stride=2, padding=1, bias=False)
self.conv1 = resConv.layer4**
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
**resConv2 = models.resnet50(pretrained=True)
resConv2.layer4 = nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False)
self.conv2 = resConv2.layer4**
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
You are doing it a wrong way, some explanation is,
**resConv = models.resnet50(pretrained=True) # you are reading a model
now you are replacing the layer in that model with newly initialized layer. Secondly, layer4 in resnet50 is a sequential block containing multiple layers. Use print to see exact the layers in model.
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
here you are using new layer.
self.conv = resConv.layer4**
As per your query regarding usage of pretrained layer, you should do it like this,
resConv = models.resnet50(pretrained=True)
print(resConv) #see the layer which you want to use
self.conv = resConv.conv1 # replace conv1 with that layer
# note: conv1 is the name of first conv layer in resnet
To add to this, I would also recommend acquiring and adding this layer (or the weights and biases) outside of the object initialization. Something like:
enc = EncoderCell()
resnet50 = models.resnet50(pretrained=True)
and then either
enc.conv = resnet50.conv1
or more ideally
enc.conv.load_state_dict(resnet50.layer1.state_dict())
The reason being, calling state_dict() on a nn.Module class creates a clone of the parameters (weights and biases in this case) which can be loaded via nn.Module.load_state_dict() method as long as the two instances of nn.Module share the same shape. So you get the pretrained weights and they are completely detached from the pretrained model. Then you can get rid of the pretrained model since it could be rather large in memory.
del resnet50
I submitted a potential improvement to the other answer, but to address the errors you are getting I am answering here also. If the code runs before your edits, and the layer you are trying to change is the same shape as the previous one, then my guess is that it may have to do with the computational graph that is formed from creating the resnet50 object. I would recommended the approach I mentioned in my edit to the other answer, but I will state it here again (note, this assumes you keep the code as it was originally):
# instantiate you encoder (repeat these steps with the decoder as well)
enc = EncoderCell()
# get the pretrained model
resnet = models.resnet50(pretrained=True)
# load the state dict into the regular conv layer
enc.conv.load_state_dict(resnet50.layer4.state_dict())
This should load the pretrained weights and biases from the resnet50 model into your conv layer, and this can be done to the decoder conv layer as well as long as they all share the same shape.
To do more testing with your mismatch error I would recommend either using a debugger or print statements in the forward() method of the models to see the shape of the tensor after each layer is applied, like so
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
print(x.size())
x = self.conv1(input)
print(x.size())
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
and of course you can put the print statements where ever else in the forward method. I would also highly recommend a debugger; pycharm makes this quite easy, and also makes it easy to see the state of variables in scientific mode beside the python console it gives. It might be worth looking up ways to calculate size of variables after they pass through certain layers like convolutional layers. This is well understood and formulas exist to calculate the size of the dimensions based on the initial size, the filter size, stride width, and padding.

How to re-use old weights in a slightly modified model?

I have a CNN network built like this for a particular task.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x))
x = F.softmax(x, 2)
return x
The model is stored using the torch built-in method like this.
net = Net()
optimizer = optim.SGD(net.parameters(), lr=1e-3)
state = {
'state_dict': net.state_dict()
'opt': optimizer.state_dict()
}
torch.save(state, 'model.pt')
I have increased a single layer in the network while the rest of the model was kept the same.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 256, kernel_size=3, padding=1) # (new added)
self.conv14 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x)) (new added)
x = F.relu(self.conv14(x))
x = F.softmax(x, 2)
return x
Since the other conv layers are kept the same, is there any way I can re-use the saved model to load the weights to conv11, conv12 and conv14 ? Instead of starting to train from beginning ?
Assume you trained the following model and now you make a minor modification to it (like adding a layer) and want to use your trained weights
import torch
import torch.nn as nn
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x))
x = F.softmax(x, 2)
return x
net = Net()
optimizer = optim.SGD(net.parameters(), lr=1e-3)
you save the model (and the optimizer state) with:
state = {'state_dict': net.state_dict(),
'opt': optimizer.state_dict()
}
torch.save(state, 'state.pt')
Your new model is (note that corresponding layers keep the same name, so you don't make conv13 -> conv14):
class NewNet(nn.Module):
def __init__(self):
super(NewNet, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.convnew = nn.Conv2d(256, 256, kernel_size=3, padding=1) # (new added)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.convnew(x)) # (new added)
x = F.relu(self.conv13(x))
x = F.softmax(x, 2)
return x
Now you can load your model.pt file:
state = torch.load('state.pt')
state is a dict, state['opt'] contains all the parameters that you had for your optimizer, for example state['opt']['param_groups'][0]['lr'] gives
0.001
Assuming corresponding layers kept the same name, you can recover your parameters and initialize the appropriate layers by:
net = NewNet()
for name, param in net.named_parameters():
if name in state['state_dict'].keys():
param = param.data
param.copy_(state['state_dict'][name])

Resources