from torch import nn
from torchsummary import summary
class CNNNetwork(nn.Module):
def __init__(self):
super().__init__()
# 4 conv blocks / flatten / linear / softmax
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv2d(
in_channels=16,
out_channels=32,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv3 = nn.Sequential(
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.flatten = nn.Flatten()
self.linear = nn.Linear(64*8*11, 5)
self.softmax = nn.Softmax(dim=1)
def forward(self, input_data):
x = self.conv1(input_data)
x = self.conv2(x)
x = self.conv3(x)
x = self.flatten(x)
logits = self.linear(x)
predictions = self.softmax(logits)
return predictions
if __name__ == "__main__":
cnn = CNNNetwork()
cnn.to(device=dml)
summary(cnn,(1,11,8),device=dml)
in this code:
logits = self.linear(x) is having some problem to generate summary
there are no error messages but only this
enter image description here
i want to make summary of model and later train a audio classifier where mfcc size is (30,281)
And also if you just explain to me how input for linear fn is properly calculated
I am un able to find error input 32*32 gray images:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1, # gray-scale images
out_channels=16,
kernel_size=5, # 5x5 convolutional kernel
stride=1, #no. of pixels pass at a time
padding=2, # to preserve size of input image
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layers
self.out = nn.Linear(32*7*7, 3)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2
x = x.view(x.size(0), -1)
output = self.out(x)
return output
cnn=CNN()
cnn
Your linear layer expects input of size 32x7x7. Given that your conv1 and conv2 layers performs max pooling with stride=2, that means your network is configured for input size of 28x28 (MNIST usual input size) and not 32x32 as you expect.
Moreover, considering the values in your error message (64x2304) I assume you are working with batch_size=64, but your images are NOT 32x32, but rather 32x?? which is slightly larger than 32, resulting with a feature map of 32x8x9 after the pooling.
def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
)
How can I deal with this error? I think the error is with self.fc, but I can't say how to fix it.
The output from self.conv(x) is of shape torch.Size([32, 64, 2, 2]): 32*64*2*2= 8192 (this is equivalent to (self.conv_out_size). The input to fully connected layer expects a single dimension vector i.e. you need to flatten it before passing to a fully connected layer in the forward function.
i.e.
class Network():
...
def foward():
...
conv_out = self.conv(x)
print(conv_out.shape)
conv_out = conv_out.view(-1, 32*64*2*2)
print(conv_out.shape)
x = self.fc(conv_out)
return x
output
torch.Size([32, 64, 2, 2])
torch.Size([1, 8192])
EDIT:
I think you're using self._get_conv_out function wrong.
It should be
def _get_conv_out(self, shape):
output = self.conv(torch.zeros(1, *shape)) # not (32, *size)
return int(numpy.prod(output.size()))
then, in the forward pass, you can use
conv_out = self.conv(x)
# flatten the output of conv layers
conv_out = conv_out.view(conv_out.size(0), -1)
x = self.fc(conv_out)
For an input of (32, 1, 110, 110), the output should be torch.Size([32, 2]).
I had the same problem however I have solved it by using a batch of 32 and tensor size of [3, 32, 32] for my images and the following configurations on my model. I am using ResNet with 9 CNN and looking for 4 outputs.
transform = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet9(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 64)
self.conv2 = conv_block(64, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.conv3 = conv_block(128, 256, pool=True)
self.conv4 = conv_block(256, 512, pool=True)
self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.classifier = nn.Sequential(nn.MaxPool2d(4),
nn.Flatten(),
nn.Dropout(0.2),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.classifier(out)
return out
Below code works fine and generate proper results.
import torch
import torch.nn as nn
import torch.nn.functional as F
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
self.conv = nn.Conv2d(
3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
self.conv1 = nn.Conv2d(
32, 512, kernel_size=1, stride=1, padding=0, bias=False)
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.conv2 = nn.Conv2d(
32, 3, kernel_size=1, stride=1, padding=0, bias=False)
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
Now i have changed in self.con and add pretrained resent with layer. Now it shows tensor mismatched error after training. All things are same just add this line in code. I put ** in those line
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
#self.conv = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.conv = resConv.layer4**
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(32, 512, kernel_size=3, stride=2, padding=1, bias=False)
self.conv1 = resConv.layer4**
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
**resConv2 = models.resnet50(pretrained=True)
resConv2.layer4 = nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False)
self.conv2 = resConv2.layer4**
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
You are doing it a wrong way, some explanation is,
**resConv = models.resnet50(pretrained=True) # you are reading a model
now you are replacing the layer in that model with newly initialized layer. Secondly, layer4 in resnet50 is a sequential block containing multiple layers. Use print to see exact the layers in model.
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
here you are using new layer.
self.conv = resConv.layer4**
As per your query regarding usage of pretrained layer, you should do it like this,
resConv = models.resnet50(pretrained=True)
print(resConv) #see the layer which you want to use
self.conv = resConv.conv1 # replace conv1 with that layer
# note: conv1 is the name of first conv layer in resnet
To add to this, I would also recommend acquiring and adding this layer (or the weights and biases) outside of the object initialization. Something like:
enc = EncoderCell()
resnet50 = models.resnet50(pretrained=True)
and then either
enc.conv = resnet50.conv1
or more ideally
enc.conv.load_state_dict(resnet50.layer1.state_dict())
The reason being, calling state_dict() on a nn.Module class creates a clone of the parameters (weights and biases in this case) which can be loaded via nn.Module.load_state_dict() method as long as the two instances of nn.Module share the same shape. So you get the pretrained weights and they are completely detached from the pretrained model. Then you can get rid of the pretrained model since it could be rather large in memory.
del resnet50
I submitted a potential improvement to the other answer, but to address the errors you are getting I am answering here also. If the code runs before your edits, and the layer you are trying to change is the same shape as the previous one, then my guess is that it may have to do with the computational graph that is formed from creating the resnet50 object. I would recommended the approach I mentioned in my edit to the other answer, but I will state it here again (note, this assumes you keep the code as it was originally):
# instantiate you encoder (repeat these steps with the decoder as well)
enc = EncoderCell()
# get the pretrained model
resnet = models.resnet50(pretrained=True)
# load the state dict into the regular conv layer
enc.conv.load_state_dict(resnet50.layer4.state_dict())
This should load the pretrained weights and biases from the resnet50 model into your conv layer, and this can be done to the decoder conv layer as well as long as they all share the same shape.
To do more testing with your mismatch error I would recommend either using a debugger or print statements in the forward() method of the models to see the shape of the tensor after each layer is applied, like so
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
print(x.size())
x = self.conv1(input)
print(x.size())
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
and of course you can put the print statements where ever else in the forward method. I would also highly recommend a debugger; pycharm makes this quite easy, and also makes it easy to see the state of variables in scientific mode beside the python console it gives. It might be worth looking up ways to calculate size of variables after they pass through certain layers like convolutional layers. This is well understood and formulas exist to calculate the size of the dimensions based on the initial size, the filter size, stride width, and padding.
I am trying to understand why my classifier has a dimension issue. Here is my code:
class convnet(nn.Module):
def __init__(self, num_classes=1000):
super(convnet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(kernel_size=2, stride = 2),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(kernel_size=2, stride = 2), #stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=2, stride = 2),
)
self.classifier = nn.Sequential(
nn.Linear(576, 128),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.Linear(64,num_classes),
nn.Softmax(),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x,1) #x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
def neuralnet(num_classes,**kwargs):
model = convnet(**kwargs)
return model
So here my issue is: expected 4D input (got 2D input)
I'm quite sure that the error arises from the flatten command, however I don't really understand why as the classifier has fully dense connections. If someone knows where I'm going wrong, that would be very helpful!
Thank you
After flattening, the input to the classifier has 2 dimensions (size: [batch_size, 576]), therefore the output of the first linear layer will also have 2 dimensions (size: [batch_size, 128]). That output is then passed to nn.BatchNorm2d, which requires its input to have 4 dimensions (size: [batch_size, channels, height, width]).
If you want to use batch norm on a 2D input, you need to use nn.BatchNorm1d, which accepts either a 3D input (size: [batch_size, channels, length]) or a 2D input (size: [batch_size, length]).
self.classifier = nn.Sequential(
nn.Linear(576, 128),
nn.BatchNorm1d(128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.BatchNorm1d(64),
nn.Linear(64,num_classes),
nn.Softmax(),
)