load_state_dict() missing 1 required positional argument: 'state_dict' - pytorch

I am aiming to load my CNN model. I have written below lines of code.
This is my CNN model archiecture.
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(in_channels=2, out_channels=4, kernel_size=4)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(4, 8, 4)
self.fc1 = nn.Linear(8 * 6 * 6, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 1)
def forward(self, x):
# -> n, 3, 32, 32
x = self.pool(F.relu(self.conv1(x))) # -> n, 6, 14, 14
x = self.pool(F.relu(self.conv2(x))) # -> n, 16, 5, 5
x = x.flatten() # -> n, 400
x = F.relu(self.fc1(x)) # -> n, 120
x = F.relu(self.fc2(x)) # -> n, 84
# x = nn.LeakyReLU(0.1)(self.fc3(x)) # -> n, 10
x = self.fc3(x) # -> n, 10
return x
I have trained this model on one dataset and I want to test this model on another dataset therefore I saved the model by writing lines of code as below,
filepath = r'C:/Users/Q559366/Desktop/code check/CNN_Model/cnnMLmodel300Epoch.pth'
torch.save(model.state_dict(), filepath)
However, while loading the model, I got an error.
import torch
from src.data import CarBonnetSource
from src.model import ConvNet
model = ConvNet
model.load_state_dict(torch.load(filepath))
model.eval()
But I got an error: load_state_dict() missing 1 required positional argument: 'state_dict'
What can i do to successfully load my model.

Your ConvNet has not been instantiated yet, that's why calling load_state_dict will throw an error. What you should do to fix it is:
model = ConvNet()
model.load_state_dict(torch.load(filepath))
model.eval()

Related

How to run pytorch neural network as if it was in server

I have a model like so that I've already trained
import torch.nn as nn
import torch.nn.functional as F
transform = transforms.Compose([
transforms.Resize(30),
transforms.ToTensor()
])
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 10, 3)
self.fc1 = nn.Linear(780, 70)
self.fc2 = nn.Linear(70, 50)
self.fc3 = nn.Linear(50, 5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
net.load_state_dict(torch.load('classifier.pth'))
net.eval()
test_img = Image.open('test.jpg')
processed_img = transform(test_img)
output = net(processed_img)
predicted = torch.max(output.data, 1)
print(predicted)
When I run this, I get
File "XXX.py", line 26, in forward
x = F.relu(self.fc1(x))
File "/Users/XXX/env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/XXX/env/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x36 and 780x70)
I know it means the tensor shapes don't line up, but I didn't see this problem during training or testing with the data from the loaders.
So I guess my main question is how would I get this network working in a flask app api endpoint for example. How should I process the image so that the network works, or what am I doing wrong?
Update:
Think I found the issue. I assumed transform.Resize made the image into a square. It doesn't.

How can I connect between layers in two network by python and pytorch?

For speaker verification from speech, I want to impellent bellow figure. I have some question please guide me.
First I am defining a class for first 4 layers (Code model #1). Then I am using ECAPA-TDNN code from github (code model #2 – link: https://github.com/TaoRuijie/ECAPA-TDNN ). I don’t know how can I connect between two models 1 and 2 or in the other hand, how can I impelement bellow figure?
Model #1 : The first 4 layer code:
import torch.nn as nn
class NeuralNetwork(nn.Module):
def __init__(self, num_class):
super(NeuralNetwork, self).__init__()
self.conv1 = nn.Sequential(nn.Conv2d(1, 80, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
self.conv2 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv3 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv4 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv5 = nn.Sequential(nn.Conv2d(128, 20, T = flatten),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
Model #2 : ECAPA-TDNN form git-hub:
import math, torch, torchaudio
import torch.nn as nn
import torch.nn.functional as F
class ECAPA_TDNN(nn.Module):
def __init__(self, C):
super(ECAPA_TDNN, self).__init__()
self.torchfbank = torch.nn.Sequential(
PreEmphasis(),
torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_fft=512, win_length=400, hop_length=160, \
f_min = 20, f_max = 7600, window_fn=torch.hamming_window, n_mels=80),
)
self.specaug = FbankAug() # Spec augmentation
self.conv1 = nn.Conv1d(80, C, kernel_size=5, stride=1, padding=2)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(C)
self.layer1 = Bottle2neck(C, C, kernel_size=3, dilation=2, scale=8)
self.layer2 = Bottle2neck(C, C, kernel_size=3, dilation=3, scale=8)
self.layer3 = Bottle2neck(C, C, kernel_size=3, dilation=4, scale=8)
# I fixed the shape of the output from MFA layer, that is close to the setting from ECAPA paper.
self.layer4 = nn.Conv1d(3*C, 1536, kernel_size=1)
self.attention = nn.Sequential(
nn.Conv1d(4608, 256, kernel_size=1),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Tanh(), # I add this layer
nn.Conv1d(256, 1536, kernel_size=1),
nn.Softmax(dim=2),
)
self.bn5 = nn.BatchNorm1d(3072)
self.fc6 = nn.Linear(3072, 192)
self.bn6 = nn.BatchNorm1d(192)
def forward(self, x, aug):
with torch.no_grad():
x = self.torchfbank(x)+1e-6
x = x.log()
x = x - torch.mean(x, dim=-1, keepdim=True)
if aug == True:
x = self.specaug(x)
x = self.conv1(x)
x = self.relu(x)
x = self.bn1(x)
x1 = self.layer1(x)
x2 = self.layer2(x+x1)
x3 = self.layer3(x+x1+x2)
x = self.layer4(torch.cat((x1,x2,x3),dim=1))
x = self.relu(x)
t = x.size()[-1]
global_x = torch.cat((x,torch.mean(x,dim=2,keepdim=True).repeat(1,1,t), torch.sqrt(torch.var(x,dim=2,keepdim=True).clamp(min=1e-4)).repeat(1,1,t)), dim=1)
w = self.attention(global_x)
mu = torch.sum(x * w, dim=2)
sg = torch.sqrt( ( torch.sum((x**2) * w, dim=2) - mu**2 ).clamp(min=1e-4) )
x = torch.cat((mu,sg),1)
x = self.bn5(x)
x = self.fc6(x)
x = self.bn6(x)
return x
2. I am using log Mel filterbank energy for feature extraction. My dimension isn’t 80. How can I define 80 for dimension?(by batch-size? How?)
3. C is constant (128) but how can I calculate T? for one speech signal I used bellow code But for all speech in my dataset, second code didn’t work. Or T is another thing?
# Number of Channel
ch = file_info.channels('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (ch)
# Number of Frame in One Audio File
frame_one_wav = audiosegment.from_file('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (frame_one_wav)
# All Frame
all_frame = []
for i in range (number_of_files_train):
wavs_info = rate_train, sig_train = wavfile.read(wavs_train[i])
frame_wav = audiosegment.from_file(wavs_info)
all_frame.append(frame_wav())

Pytorch Transformer won't train due to tensor sizes

I tried following this tutorial for transformers:
https://www.youtube.com/watch?v=U0s0f995w14
However, when I try to train the code with my own vectors, I get the following error message:
Traceback (most recent call last):
File >"C:\Users\rreichel\Desktop\Smaragd_local\Programming\Scripts\Transformer_se>lfbuilt.py", line 279, in
loss = loss_func(outputs, target)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\modules\loss.py", line 1047, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\functional.py", line 2693, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, >ignore_index, None, reduction)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\functional.py", line 2397, in nll_loss
raise ValueError("Expected target size {}, got {}".format(out_size, >target.size()))
ValueError: Expected target size (3, 199), got torch.Size([3, 119])
when calculating the loss during training.
The code:
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 6 08:13:38 2021
#author: rreichel
"""
import torch
import torch.nn as nn
import pickle
import glob
import os
from SelfbuiltDataset_test import myDataset
import torch.optim as optim
class SelfAttention(nn.Module):
def __init__(self, embed_size, heads):
super(SelfAttention, self).__init__()
self.embed_size = embed_size
self.heads = heads
self.head_dim = embed_size // heads
assert(self.head_dim * heads == embed_size
), "Embedding size needs to be divisible by heads"
self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.fc_out = nn.Linear(heads * self.head_dim, embed_size)
def forward(self, values, keys, query, mask):
#Get number of training examples
N = query.shape[0]
value_len, key_len, query_len = values.shape[1], keys.shape[1], \
query.shape[1]
#Split the embedding into self.heads different pieces
values = values.reshape(N, value_len, self.heads, self.head_dim)
keys = keys.reshape(N, key_len, self.heads, self.head_dim)
query = query.reshape(N, query_len, self.heads, self.head_dim)
#(N, value_len, heads, head_dim)
values = self.values(values)
#(N, key_len, heads, head_dim)
keys = self.keys(keys)
#(N, query_len, heads, heads_dim)
queries = self.queries(query)
energy = torch.einsum("nqhd, nkhd -> nhqk", [queries, keys])
#queries shape: (N, query_len, heads, heads_dim),
#keys shape: (N, key_len, heads, heads_dim)
#energy: (N, heads, query_len, key_len)
#Mask padded indices so their weights become 0
if mask is not None:
energy = energy.masked_fill(mask == 0, float("-1e20"))
#Normalize energy values
attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)
#attention shape: (N, heads, query_len, key_len)
out = torch.einsum("nhql, nlhd -> nqhd", [attention, values]).reshape(
N, query_len, self.heads * self.head_dim)
#attention shape: (N, heads, query_len, key_len)
#values shape: (N, value_len, heads, heads_dim)
#out after matrix multiply: (N, query_len, heads, head_dim), then
#we reshape and flatten the last two dimensions.
out = self.fc_out(out)
return out
class TransformerBlock(nn.Module):
def __init__(self, embed_size, heads, dropout, forward_expansion):
super(TransformerBlock, self).__init__()
self.attention = SelfAttention(embed_size, heads)
self.norm1 = nn.LayerNorm(embed_size)
self.norm2 = nn.LayerNorm(embed_size)
self.feed_forward = nn.Sequential(
nn.Linear(embed_size, forward_expansion * embed_size),
nn.ReLU(),
nn.Linear(forward_expansion * embed_size, embed_size))
self.dropout = nn.Dropout(dropout)
def forward(self, value, key, query, mask):
attention = self.attention(value, key, query, mask)
# Add skip connection, run through normalization and finally dropout
x = self.dropout(self.norm1(attention + query))
forward = self.feed_forward(x)
out = self.dropout(self.norm2(forward + x))
return out
class Encoder(nn.Module):
def __init__(self, src_vocab_size, embed_size, num_layers, heads, device,
forward_expansion, dropout, max_length):
super(Encoder, self).__init__()
self.embed_size = embed_size
self.device = device
self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
self.position_embedding = nn.Embedding(max_length, embed_size)
self.layers = nn.ModuleList([TransformerBlock(embed_size, heads,
dropout=dropout, forward_expansion=forward_expansion)
for _ in range(num_layers)])
self.dropout = nn.Dropout(dropout)
def forward(self, x, mask):
N, seq_length = x.shape
positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
out = self.dropout(
(self.word_embedding(x) +
self.position_embedding(positions))
)
#In the Encoder the query, key, value are all the same, it's in the
#decoder this will change. This might look a bit odd in this case.
for layer in self.layers:
out = layer(out, out, out, mask)
return out
class DecoderBlock(nn.Module):
def __init__(self, embed_size, heads, forward_expansion, dropout, device):
super(DecoderBlock, self).__init__()
self.norm = nn.LayerNorm(embed_size)
self.attention = SelfAttention(embed_size, heads=heads)
self.transformer_block = TransformerBlock(embed_size, heads, dropout,
forward_expansion)
self.dropout = nn.Dropout(dropout)
def forward(self, x, value, key, src_mask, trg_mask):
attention = self.attention(x, x, x, trg_mask)
query = self.dropout(self.norm(attention + x))
out = self.transformer_block(value, key, query, src_mask)
return out
class Decoder(nn.Module):
def __init__(self, trg_vocab_size, embed_size, num_layers, heads,
forward_expansion, dropout, device, max_length):
super(Decoder, self).__init__()
self.device = device
self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
self.position_embedding = nn.Embedding(max_length, embed_size)
self.layers = nn.ModuleList([DecoderBlock(embed_size, heads,
forward_expansion, dropout,
device)
for _ in range(num_layers)])
self.fc_out = nn.Linear(embed_size, trg_vocab_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x, enc_out, src_mask, trg_mask):
N, seq_length = x.shape
positions = torch.arange(0, seq_length).expand(N,seq_length).to(self.device)
x = self.dropout((self.word_embedding(x) +
self.position_embedding(positions)))
for layer in self.layers:
x = layer(x, enc_out, enc_out, src_mask, trg_mask)
out = self.fc_out(x)
return out
class Transformer(nn.Module):
def __init__(self, src_vocab_size, trg_vocab_size, src_pad_idx,
trg_pad_idx, embed_size=512, num_layers=6,
forward_expansion=4, heads=8, dropout=0, device="cpu",
max_length=100):
super(Transformer, self).__init__()
self.encoder = Encoder(src_vocab_size, embed_size, num_layers, heads,
device, forward_expansion, dropout, max_length)
self.decoder = Decoder(trg_vocab_size, embed_size, num_layers, heads,
forward_expansion, dropout, device, max_length)
self.src_pad_idx = src_pad_idx
self.trg_pad_idx = trg_pad_idx
self.device = device
def make_src_mask(self, src):
#(N, 1, 1, src_len)
src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
return src_mask.to(self.device)
def make_trg_mask(self, trg):
N, trg_len = trg.shape
trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1,
trg_len, trg_len)
return trg_mask.to(self.device)
def forward(self, src, trg):
src_mask = self.make_src_mask(src)
trg_mask = self.make_trg_mask(trg)
enc_src = self.encoder(src, src_mask)
out = self.decoder(trg, enc_src, src_mask, trg_mask)
return out
def nextMultiple(n, x):
n = n + x / 2
n = n - (n % x)
return int(n)
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"""
#This shit are the one-hot encoded sentences (word 1, word 4 etc. as sentence)
train = torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0, 1, 11],
[1, 8, 7, 3, 4, 5, 6, 11, 2, 1, 3]]).to(device)
target = torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0, 2, 2],
[1, 5, 6, 2, 4, 7, 6, 2, 9, 1]]).to(device)
max_len = max([len(x) for x in train]) + 1
"""
#Loading in data
data = pickle.load(open('Testdaten.pkl', 'rb'))
tmp = myDataset(data, 'POS')
#Calculating maximum sentence length (+ 1 because of start tag)
max_len = max([len(x) for x in tmp.sent_encoded]) + 1
pad_element = len(tmp.lookup_words)
#Padding everything out to maximum sentence length
train_tmp = []
for sent in tmp.sent_encoded:
train_tmp.append([pad_element] + sent + [pad_element] * (max_len - len(sent) - 1))
target_tmp = []
for sent in tmp.tags_encoded:
target_tmp.append(sent + [pad_element] * (max_len - len(sent) - 1))
#Creating tensors for model
train = torch.squeeze(torch.tensor(train_tmp))
target = torch.squeeze(torch.tensor(target_tmp))
#"""
src_pad_idx = 0
trg_pad_idx = 0
src_vocab_size = int(torch.max(train)) + 1
trg_vocab_size = int(torch.max(target)) + 1
heads = 8
es = nextMultiple(max(src_vocab_size, trg_vocab_size), heads)
model = Transformer(src_vocab_size, trg_vocab_size, src_pad_idx,
trg_pad_idx, es, 3, 2, heads, 0.1, device,
max_len).to(device)
#Defining loss function and optimizer
lr = 0.001
num_epochs = 2
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss_func = nn.CrossEntropyLoss()
# optimization algorithm
optimizer = optim.Adam(model.parameters(), lr=lr)
# train and evaluation
for cnt in range(num_epochs):
optimizer.zero_grad()
outputs = model(train, target)
outputs = outputs
#Outputs now are size[3, 119, 119]
#CrossEntropyLoss mag one-hot-encoding nicht, how to deal with this?
loss = loss_func(outputs, target)
loss.backward()
optimizer.step()
#out = model(train, target)
#print(out.shape)
I am confused since the code works with the vectors from the tutorial, but once I try to run the model with my own vocabulary, it produces this strange error. The data is just integer values encoding the corresponding words, e.g. "Hello World" would result in the training vector [1 2].
There is no differences between my data and the data from the tutorial as far as I can see. The tensor types are the same (Torch.LongTensor), they are both integer values and in a specified range. The difference is in dimensionality, the tutorial uses vectors with dimension (2, 10), while mine are (3, 199).
Also, I am sorry, but I can't reduce the code any more since otherwise, the error might not be reproduceable.
Did anyone encounter this error before?

How to obtain sequence of submodules from a pytorch module?

For a pytorch module, I suppose I could use .named_children, .named_modules, etc. to obtain a list of the submodules. However, I suppose the list is not given in order, right? An example:
In [19]: import transformers
In [20]: model = transformers.DistilBertForSequenceClassification.from_pretrained('distilb
...: ert-base-cased')
In [21]: [name for name, _ in model.named_children()]
Out[21]: ['distilbert', 'pre_classifier', 'classifier', 'dropout']
The order of .named_children() in the above model is given as distilbert, pre_classifier, classifier, and dropout. However, if you examine the code, it is evident that dropout happens before classifier. So how do I get the order of these submodules?
In Pytorch, the results of print(model) or .named_children(), etc are listed based on the order they are declared in __init__ of the model's class e.g.
Case 1
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
self.conv2_drop = nn.Dropout2d()
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=0.6)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Model()
print(model)
[name for name, _ in model.named_children()]
# output
['conv1', 'conv2', 'fc1', 'fc2', 'conv2_drop']
Case 2
Changed order of fc1 and fc2 layers in constructor.
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.fc2 = nn.Linear(50, 10)
self.fc1 = nn.Linear(320, 50)
self.conv2_drop = nn.Dropout2d()
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=0.6)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Model()
print(model)
[name for name, _ in model.named_children()]
# output
['conv1', 'conv2', 'fc2', 'fc1', 'conv2_drop']
That's why classifier is printed before dropout as it's declared so in constructor:
class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
...
self.distilbert = DistilBertModel(config)
self.pre_classifier = nn.Linear(config.dim, config.dim)
self.classifier = nn.Linear(config.dim, config.num_labels)
self.dropout = nn.Dropout(config.seq_classif_dropout)
Nevertheless, you can play with model's submodules using .modules(), etc. but they'll be listed only in the order they are declared in __init__. If you only want to print structure based on forward method, you may try using pytorch-summary.

Error: shape '[-1, 270000]' is invalid for input of size 1440000

I got an error:
shape '[-1, 270000]' is invalid for the input of size 1440000
while running my code for a CNN structure input tensor size is 64.
Class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.conv1 = nn.Conv2d(3, 48, 2)
self.conv2 = nn.Conv2d(48, 108, 2)
self.conv3 = nn.Conv2d(108, 192, 2)
self.conv4 = nn.Conv2d(192, 300, 2)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(300* 30* 30, 864)
self.fc2 = nn.Linear(864, 288)
self.fc3 = nn.Linear(288, 2)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
#x = self.pool(F.relu(self.conv4(x)))
x = self.pool(x)
x = x.view(-1, 300 * 30* 30)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x)
Any idea why I am getting above error?
Because after your max pooling layer, the shape of feature map is (300, width, height), and 300*width*height != 300*30*30. If you want to reshape the tensor, you must keep the same number of elements.
The view operation which should flatten x is throwing this error, since the size of 300*30*30 is not matching your activation size. Most likely your custom dataset has a different spatial size, such that the view is failing.
Based on the shape given in the error message, it looks like your activation should have the shape [batch_size=3, channels=300, height=40, width=40], which results in 1440000 values. Try to change the input size in your linear layer to 300*40*40 like this:
self.fc1 = nn.Linear(300*40*40, 864)
and the flattening to:
x = x.view(x.size(0), 300*40*40)
Please, notify me if this doesn't work.

Resources