Convolutional Autoencoder outputs black images - conv-neural-network

I need to train an autoencoder on Adaptiope dataset. I am using a ResNet18 backbone for my encoder part.
The issue I encounter is that even after many epochs, the reconstructed image is always completely black.
On the other hand, when I use a simpler Autoencoder without the resnet18 backbone, reconstructed images turn out close to what I need them to be.
I am trying to understand why is this the case. I am a novice in the field and still cannot grasp the problem. It looks like an architectural problem but I cannot wrap my head around it.
This is my "vanilla" Encoder, with no resnet18 backbone:
`
class Encoder(nn.Module):
def __init__(self,
num_input_channels : int,
base_channel_size : int,
latent_dim : int
):
"""
Inputs:
- num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3
- base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it.
- latent_dim : Dimensionality of latent representation z
- act_fn : Activation function used throughout the encoder network
"""
super().__init__()
c_hid = base_channel_size
self.layer1 = nn.Sequential(nn.Conv2d(num_input_channels, c_hid, kernel_size=3, padding=1, stride=2), # 32x32 => 16x16
nn.ReLU(),
nn.Conv2d(c_hid, c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(c_hid, 2*c_hid, kernel_size=3, padding=1, stride=2), # 16x16 => 8x8
nn.ReLU(),
nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1, stride=2), # 8x8 => 4x4
nn.ReLU(),
nn.Flatten(), # Image grid to single feature vector
nn.Linear(351232, latent_dim))
self.linear2 = nn.Linear(latent_dim, 20*8)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
enc = self.layer1(x)
lin_p = self.linear2(enc)
p = self.softmax(lin_p)
return enc, p
This is the Encoder with Resnet18 backbone:
class Encoder(nn.Module):
def __init__(self,
num_input_channels : int,
base_channel_size : int,
latent_dim : int
):
"""
Inputs:
- num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3
- base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it.
- latent_dim : Dimensionality of latent representation z
- act_fn : Activation function used throughout the encoder network
"""
super().__init__()
c_hid = base_channel_size
self.fc_hidden1, self.fc_hidden2, self.CNN_embed_dim = 224, 768, 224
# CNN architechtures
self.ch1, self.ch2, self.ch3, self.ch4 = 16, 32, 64, 128
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3) # 2d kernel size
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2) # 2d strides
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0) # 2d padding
# encoding components
model = models.resnet18(pretrained=True)
for param in model.parameters():
param.requires_grad = False
modules = list(model.children())[:-1] # delete the last fc layer.
self.resnet_modules=modules
self.resnet = nn.Sequential(*modules)
self.fc1 = nn.Linear(model.fc.in_features, self.fc_hidden1)
self.bn1 = nn.BatchNorm1d(self.fc_hidden1, momentum=0.01)
self.relu = nn.ReLU(inplace=True)
self.layer = nn.Sequential(
nn.Flatten(), # Image grid to single feature vector
nn.Linear(224, latent_dim)) #8x224
#self.flatten = nn.Flatten(), # Image grid to single feature vector
#self.linear1 = nn.Linear(351232, latent_dim)
self.linear2 = nn.Linear(latent_dim, 20*8)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
x = self.resnet(x)
x = x.reshape(x.shape[0], 512)
x = self.fc1(x)
x = self.bn1(x)
x = self.relu(x)
enc = self.layer(x)
#x = self.fc2(x)
#x = self.bn(x)
# enc = self.layer1(x)
lin_p = self.linear2(enc)
p = self.softmax(lin_p)
return enc, p
The decoder is the same for both.
class Decoder_N(nn.Module):
def __init__(self,
num_input_channels : int,
base_channel_size : int,
latent_dim : int,
act_fn : object = nn.GELU):
"""
Inputs:
- num_input_channels : Number of channels of the image to reconstruct. For CIFAR, this parameter is 3
- base_channel_size : Number of channels we use in the last convolutional layers. Early layers might use a duplicate of it.
- latent_dim : Dimensionality of latent representation z
- act_fn : Activation function used throughout the decoder network
"""
super().__init__()
c_hid = 224
self.linear = nn.Sequential(
nn.Linear(latent_dim, 351232),
nn.ReLU()
)
self.net = nn.Sequential(
nn.ConvTranspose2d(2*c_hid, 2*c_hid, kernel_size=3, output_padding=1, padding=1, stride=2), # 4x4 => 8x8
nn.ReLU(),
nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(2*c_hid, c_hid, kernel_size=3, output_padding=1, padding=1, stride=2), # 8x8 => 16x16
nn.ReLU(),
nn.Conv2d(c_hid, c_hid, kernel_size=3, padding=1),
nn.ReLU(),
nn.ConvTranspose2d(c_hid, 3, kernel_size=3, output_padding=1, padding=1, stride=2), # 16x16 => 32x32
nn.Tanh() # The input images is scaled between -1 and 1, hence the output has to be bounded as well
)
def forward(self, x):
x = self.linear(x)
x = x.reshape(x.shape[0], -1, 28, 28)
x = self.net(x)
return x
`
num_input_channels : 224,
base_channel_size : 3
latent_dim : 64
I expected the "advanced" autoencoder to extract my features better, but apparently this is not the case.

I solved the issue: there were issues with the normalization of images and the BatchNorm layer. I accidentally used mean and std of ImageNet for the dataset instead of the correct ones. Additionally, during training I forgot to add regularizers for the different components of my loss, leading my model to learn literally nothing.

Related

linear layer in pytorch not able to forward using torch-summary

from torch import nn
from torchsummary import summary
class CNNNetwork(nn.Module):
def __init__(self):
super().__init__()
# 4 conv blocks / flatten / linear / softmax
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv2d(
in_channels=16,
out_channels=32,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv3 = nn.Sequential(
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=2
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.flatten = nn.Flatten()
self.linear = nn.Linear(64*8*11, 5)
self.softmax = nn.Softmax(dim=1)
def forward(self, input_data):
x = self.conv1(input_data)
x = self.conv2(x)
x = self.conv3(x)
x = self.flatten(x)
logits = self.linear(x)
predictions = self.softmax(logits)
return predictions
if __name__ == "__main__":
cnn = CNNNetwork()
cnn.to(device=dml)
summary(cnn,(1,11,8),device=dml)
in this code:
logits = self.linear(x) is having some problem to generate summary
there are no error messages but only this
enter image description here
i want to make summary of model and later train a audio classifier where mfcc size is (30,281)
And also if you just explain to me how input for linear fn is properly calculated

Runtime Error: mat1 and mat2 shapes cannot be multiplied (62x2304 and 1568x3)

I am un able to find error input 32*32 gray images:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1, # gray-scale images
out_channels=16,
kernel_size=5, # 5x5 convolutional kernel
stride=1, #no. of pixels pass at a time
padding=2, # to preserve size of input image
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layers
self.out = nn.Linear(32*7*7, 3)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2
x = x.view(x.size(0), -1)
output = self.out(x)
return output
cnn=CNN()
cnn
Your linear layer expects input of size 32x7x7. Given that your conv1 and conv2 layers performs max pooling with stride=2, that means your network is configured for input size of 28x28 (MNIST usual input size) and not 32x32 as you expect.
Moreover, considering the values in your error message (64x2304) I assume you are working with batch_size=64, but your images are NOT 32x32, but rather 32x?? which is slightly larger than 32, resulting with a feature map of 32x8x9 after the pooling.

`*** RuntimeError: mat1 dim 1 must match mat2 dim 0` whenever I run model(images)

def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
)
How can I deal with this error? I think the error is with self.fc, but I can't say how to fix it.
The output from self.conv(x) is of shape torch.Size([32, 64, 2, 2]): 32*64*2*2= 8192 (this is equivalent to (self.conv_out_size). The input to fully connected layer expects a single dimension vector i.e. you need to flatten it before passing to a fully connected layer in the forward function.
i.e.
class Network():
...
def foward():
...
conv_out = self.conv(x)
print(conv_out.shape)
conv_out = conv_out.view(-1, 32*64*2*2)
print(conv_out.shape)
x = self.fc(conv_out)
return x
output
torch.Size([32, 64, 2, 2])
torch.Size([1, 8192])
EDIT:
I think you're using self._get_conv_out function wrong.
It should be
def _get_conv_out(self, shape):
output = self.conv(torch.zeros(1, *shape)) # not (32, *size)
return int(numpy.prod(output.size()))
then, in the forward pass, you can use
conv_out = self.conv(x)
# flatten the output of conv layers
conv_out = conv_out.view(conv_out.size(0), -1)
x = self.fc(conv_out)
For an input of (32, 1, 110, 110), the output should be torch.Size([32, 2]).
I had the same problem however I have solved it by using a batch of 32 and tensor size of [3, 32, 32] for my images and the following configurations on my model. I am using ResNet with 9 CNN and looking for 4 outputs.
transform = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet9(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 64)
self.conv2 = conv_block(64, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.conv3 = conv_block(128, 256, pool=True)
self.conv4 = conv_block(256, 512, pool=True)
self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.classifier = nn.Sequential(nn.MaxPool2d(4),
nn.Flatten(),
nn.Dropout(0.2),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.classifier(out)
return out

Pytorch: The size of tensor a (24) must match the size of tensor b (48) at non-singleton dimension 3

Below code works fine and generate proper results.
import torch
import torch.nn as nn
import torch.nn.functional as F
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
self.conv = nn.Conv2d(
3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
self.conv1 = nn.Conv2d(
32, 512, kernel_size=1, stride=1, padding=0, bias=False)
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.conv2 = nn.Conv2d(
32, 3, kernel_size=1, stride=1, padding=0, bias=False)
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
Now i have changed in self.con and add pretrained resent with layer. Now it shows tensor mismatched error after training. All things are same just add this line in code. I put ** in those line
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
#self.conv = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.conv = resConv.layer4**
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(32, 512, kernel_size=3, stride=2, padding=1, bias=False)
self.conv1 = resConv.layer4**
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
**resConv2 = models.resnet50(pretrained=True)
resConv2.layer4 = nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False)
self.conv2 = resConv2.layer4**
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
You are doing it a wrong way, some explanation is,
**resConv = models.resnet50(pretrained=True) # you are reading a model
now you are replacing the layer in that model with newly initialized layer. Secondly, layer4 in resnet50 is a sequential block containing multiple layers. Use print to see exact the layers in model.
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
here you are using new layer.
self.conv = resConv.layer4**
As per your query regarding usage of pretrained layer, you should do it like this,
resConv = models.resnet50(pretrained=True)
print(resConv) #see the layer which you want to use
self.conv = resConv.conv1 # replace conv1 with that layer
# note: conv1 is the name of first conv layer in resnet
To add to this, I would also recommend acquiring and adding this layer (or the weights and biases) outside of the object initialization. Something like:
enc = EncoderCell()
resnet50 = models.resnet50(pretrained=True)
and then either
enc.conv = resnet50.conv1
or more ideally
enc.conv.load_state_dict(resnet50.layer1.state_dict())
The reason being, calling state_dict() on a nn.Module class creates a clone of the parameters (weights and biases in this case) which can be loaded via nn.Module.load_state_dict() method as long as the two instances of nn.Module share the same shape. So you get the pretrained weights and they are completely detached from the pretrained model. Then you can get rid of the pretrained model since it could be rather large in memory.
del resnet50
I submitted a potential improvement to the other answer, but to address the errors you are getting I am answering here also. If the code runs before your edits, and the layer you are trying to change is the same shape as the previous one, then my guess is that it may have to do with the computational graph that is formed from creating the resnet50 object. I would recommended the approach I mentioned in my edit to the other answer, but I will state it here again (note, this assumes you keep the code as it was originally):
# instantiate you encoder (repeat these steps with the decoder as well)
enc = EncoderCell()
# get the pretrained model
resnet = models.resnet50(pretrained=True)
# load the state dict into the regular conv layer
enc.conv.load_state_dict(resnet50.layer4.state_dict())
This should load the pretrained weights and biases from the resnet50 model into your conv layer, and this can be done to the decoder conv layer as well as long as they all share the same shape.
To do more testing with your mismatch error I would recommend either using a debugger or print statements in the forward() method of the models to see the shape of the tensor after each layer is applied, like so
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
print(x.size())
x = self.conv1(input)
print(x.size())
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
and of course you can put the print statements where ever else in the forward method. I would also highly recommend a debugger; pycharm makes this quite easy, and also makes it easy to see the state of variables in scientific mode beside the python console it gives. It might be worth looking up ways to calculate size of variables after they pass through certain layers like convolutional layers. This is well understood and formulas exist to calculate the size of the dimensions based on the initial size, the filter size, stride width, and padding.

Dimension error in implementation of a convolutional network

I am trying to understand why my classifier has a dimension issue. Here is my code:
class convnet(nn.Module):
def __init__(self, num_classes=1000):
super(convnet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(kernel_size=2, stride = 2),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(kernel_size=2, stride = 2), #stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=2, stride = 2),
)
self.classifier = nn.Sequential(
nn.Linear(576, 128),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.Linear(64,num_classes),
nn.Softmax(),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x,1) #x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
def neuralnet(num_classes,**kwargs):
model = convnet(**kwargs)
return model
So here my issue is: expected 4D input (got 2D input)
I'm quite sure that the error arises from the flatten command, however I don't really understand why as the classifier has fully dense connections. If someone knows where I'm going wrong, that would be very helpful!
Thank you
After flattening, the input to the classifier has 2 dimensions (size: [batch_size, 576]), therefore the output of the first linear layer will also have 2 dimensions (size: [batch_size, 128]). That output is then passed to nn.BatchNorm2d, which requires its input to have 4 dimensions (size: [batch_size, channels, height, width]).
If you want to use batch norm on a 2D input, you need to use nn.BatchNorm1d, which accepts either a 3D input (size: [batch_size, channels, length]) or a 2D input (size: [batch_size, length]).
self.classifier = nn.Sequential(
nn.Linear(576, 128),
nn.BatchNorm1d(128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.BatchNorm1d(64),
nn.Linear(64,num_classes),
nn.Softmax(),
)

Resources