The size of tensor a (10) must match the size of tensor b (9) at non-singleton - pytorch

i am using this code earlier in Jupiter notebook it was not showing error but accuracy was veryless then i have tried the same code in google colab there it is showing error, please suggest some way to increase accuracy . i am trying to perform multilevel CNN for detecting leaf with downsampling of image
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,32,2)
self.conv2 = nn.Conv2d(32,64,2)
self.conv2_bn = nn.BatchNorm2d(64)
x= torch.randn(256,256).view(-1,1,256,256)
self._to_linear = None
self.convs(x)
self.fc1= nn.Linear(self._to_linear, 512)
self.fc2 = nn.Linear(512,6)
def convs(self,x):
y=torch.nn.functional.interpolate(x, size=([128,128]), scale_factor=None, mode='nearest', align_corners=None)
z=torch.nn.functional.interpolate(x, size=([64,64]), scale_factor=None, mode='nearest', align_corners=None)
w=torch.nn.functional.interpolate(x, size=([32,32]), scale_factor=None, mode='nearest', align_corners=None)
# print(x[0].shape)
x= F.relu(self.conv1(x))
m = nn.ConstantPad2d(1,0)
x=m(x)
x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), 2))
# print(x[0].shape)
y= F.relu(self.conv1(y))
m = nn.ConstantPad2d(1,0)
y=m(y)
y = F.relu(self.conv2_bn(self.conv2(y)), 2)
# print(y[0].shape)
CAT_1=torch.cat((x,y),1)
CAT_1=F.max_pool2d(CAT_1,(2,2))
# print(CAT_1[0].shape)
z= F.relu(self.conv1(z))
m = nn.ConstantPad2d(1,0)
z=m(z)
z= F.relu(self.conv2_bn(self.conv2(z)))
# print(z[0].shape)
CAT_2=torch.cat((CAT_1,z),1)
CAT_2=F.max_pool2d(CAT_2,(2,2))
# print(CAT_2[0].shape)
w= F.relu(self.conv1(w))
m = nn.ConstantPad2d(1,0)
w=m(w)
w = F.relu((self.conv2_bn(self.conv2(w))))
# print(w[0].shape)
x=torch.cat((CAT_2,w),1)
x=F.max_pool2d(x,(2,2))
# print("i lov pp")
# print(x[0].shape)
x=torch.nn.functional.avg_pool2d(x, (2,2))
# print("i lov pp")
# print(x[0].shape)
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
# print("i lov pp")
x=self.convs(x)
x=x.view(-1, self._to_linear)
x= F.relu(self.fc1(x))
x= self.fc2(x)
return F.softmax(x, dim=1)
# print(x[0].shape)
net=Net()
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1,256,256)
X=X/255.0
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1
val_size=int (len(X)*VAL_PCT)
print(val_size)
train_X= X[:-val_size]
train_y= y[:-val_size]
test_X=X[-val_size:]
test_y = y[-val_size:]
print(len(train_X))
print(len(test_X))
BATCH_SIZE =10
EPOCHS = 1
for epoch in range(EPOCHS):
for i in (range(0, len(train_X), BATCH_SIZE)):
#print(i, i+BATCH_SIZE)
batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,256,256)
# print(batch_X.shape)
batch_y = train_y[i:i+BATCH_SIZE]
#print(batch_y.shape)
net.zero_grad()
outputs = net(batch_X)
#print (outputs.shape)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step()
#print(loss)
#print(f"Epoch: {epoch}. Loss: {loss}")
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py:432: UserWarning: Using a target size (torch.Size([10, 256, 256, 3])) that is different to the input size (torch.Size([10, 6])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-38-a154b102127f> in <module>()
15 outputs = net(batch_X)
16 #print (outputs.shape)
---> 17 loss = loss_function(outputs, batch_y)
18 loss.backward()
19 optimizer.step()
3 frames
/usr/local/lib/python3.6/dist-packages/torch/functional.py in broadcast_tensors(*tensors)
60 if any(type(t) is not Tensor for t in tensors) and has_torch_function(tensors):
61 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 62 return _VF.broadcast_tensors(tensors)
63
64
RuntimeError: The size of tensor a (10) must match the size of tensor b (9) at non-singleton dimension 3

Related

Custom layer caused the batch dimension mismatch in pytorch. How to fix this problem?

I have tried to train a GCN model.I defined the custom layer I needed. However, It cause some dimension mismatch when I do some batch training.
the codes are as following :
import math
import numpy as np
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
# =============================================================================
# model define
# =============================================================================
class GraphConvolution(Module):
"""
Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
"""
def __init__(self, in_features, out_features, bias=True):
super(GraphConvolution, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
if bias:
self.bias = Parameter(torch.FloatTensor(out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input, adj):
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
if self.bias is not None:
return output + self.bias
else:
return output
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_features) + ' -> ' \
+ str(self.out_features) + ')'
class GCN(nn.Module):
def __init__(self, nfeat, nhid, nclass):
super(GCN, self).__init__()
self.gc1 = GraphConvolution(nfeat, nhid)
self.gc2 = GraphConvolution(nhid, nclass)
self.linear = nn.Linear(nclass, 1)
# self.dropout = dropout
def forward(self, x, adj):
x = F.relu(self.gc1(x, adj))
# x = F.dropout(x, self.dropout, training=self.training)
x = F.relu(self.gc2(x, adj))
x = self.linear(x)
return x
def train(dataloader, model, loss_fn, optimizer,adj):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X,adj)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn,adj):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X,adj)
test_loss += loss_fn(pred, y).item()
# correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
# correct /= size
# Accuracy: {(100*correct):>0.1f}%,
print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")
when I run the code :
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = GCN(1,1,1).to(device)
print(model)
# model(X).shape
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
epochs = 10
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer,Adjacency_matrix)
test(test_dataloader, model, loss_fn,Adjacency_matrix)
print("Done!")
I got the error :
when I looking inside this ,I find the model is working well when I drop the dimension of batch-size. How I need to do to tell the model that this dimension is the batch-size which don't need to compute?
the error you're seeing is due to you trying to matrix multiple a 3d tensor (your input) by your 2D weights.
To get around this you can simply reshape your data, as we only really care about the last dim when doing matmuls:
def forward(self, input, adj):
b_size = input.size(0)
input = input.view(-1, input.shape[-1])
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
output = output.view(b_size,-1,output.shape[-1])
if self.bias is not None:
return output + self.bias
else:
return output

RuntimeError: input must have 2 dimensions, got 1

I am new to PyTorch, and I am trying to build a BiLSTM model to insert its output to a MaxPool1d layer and an AvgPool1d layer each before concatenating the outputs from both layers for a binary classification task. I am working with pretrained Word2Vec embeddings as input:
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import torch.nn.functional as F
class LSTM(nn.Module):
# define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim , num_classes, lstm_layers, weights):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.embedding.weight.data.copy_(torch.from_numpy(weights))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embedding_dim,
lstm_units,
num_layers=lstm_layers,
bidirectional=True,
batch_first=True)
num_directions = 2 #if bidirectional else 1
self.m1 = nn.MaxPool1d(1,stride= 1)
self.m2 = nn.AvgPool1d(1,stride= 1)
self.fc1 = nn.Linear(lstm_units * num_directions, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, num_classes)
self.relu = nn.ReLU()
self.softmax = nn.Softmax()
self.lstm_layers = lstm_layers
self.num_directions = num_directions
self.lstm_units = lstm_units
def forward(self, text, text_lengths):
batch_size = text.shape[0]
h_0, c_0 = (Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)),
Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)))
embedded = self.embedding(text)
packed_embedded = pack_padded_sequence(embedded, text_lengths.to("cpu"), batch_first=True)
output, (h_n, c_n) = self.lstm(packed_embedded, (h_0, c_0))
output_unpacked, output_lengths = pad_packed_sequence(output, batch_first=True, enforce_sorted=False)
# out = output_unpacked[:, -1, :]
#return self.linear(ht[-1])
out = output_unpacked
out1 = self.m1(out)
out2 = self.m2(out)
out = torch.cat((out1, out2), 1)
out = F.relu(self.fc1(out))
preds = F.softmax(self.fc2(out))
return preds
My training function is as below:
import time
def train(dataloader):
model.train()
total_acc, total_count = 0, 0
log_interval = 500
text_lengths = np.dtype('int64').type(200)
start_time = time.time()
for idx, (label, text) in enumerate(dataloader):
optimizer.zero_grad()
predited_label = model(text, text_lengths = torch.tensor([text_lengths]))
loss = criterion(predited_label, label)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
optimizer.step()
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
if idx % log_interval == 0 and idx > 0:
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches '
'| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
total_acc/total_count))
total_acc, total_count = 0, 0
start_time = time.time()
def evaluate(dataloader):
model.eval()
total_acc, total_count = 0, 0
with torch.no_grad():
for idx, (label, text) in enumerate(dataloader):
predited_label = model(text)
loss = criterion(predited_label, label)
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
return total_acc/total_count
And I try to run the code like this:
from torch.utils.data import DataLoader
# Hyperparameters
EPOCHS = 1 # epoch
LR =1 # learning rate
BATCH_SIZE = 1 # batch size for training
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
total_accu = None
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
for epoch in range(1, EPOCHS + 1):
epoch_start_time = time.time()
train(train_dataloader)
accu_val = evaluate(valid_dataloader)
if total_accu is not None and total_accu > accu_val:
scheduler.step()
else:
total_accu = accu_val
print('-' * 59)
print('| end of epoch {:3d} | time: {:5.2f}s | '
'valid accuracy {:8.3f} '.format(epoch,
time.time() - epoch_start_time,
accu_val))
print('-' * 59)
However, I get the error below. I am not sure what the input size here refers to, and I can't find anyone else with the same error anywhere. Can anyone advise me please?
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-111-14ba1dd26348> in <module>()
27 for epoch in range(1, EPOCHS + 1):
28 epoch_start_time = time.time()
---> 29 train(train_dataloader)
30 accu_val = evaluate(valid_dataloader)
31 if total_accu is not None and total_accu > accu_val:
6 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
201 raise RuntimeError(
202 'input must have {} dimensions, got {}'.format(
--> 203 expected_input_dim, input.dim()))
204 if self.input_size != input.size(-1):
205 raise RuntimeError(
RuntimeError: input must have 2 dimensions, got 1

Training 1D CNN in Pytorch

I want to train the model given below. I am developing 1D CNN model in PyTorch. Usually we use dataloaders in PyTorch. But I am not using dataloaders for my implementation. I need guidance on how i can train my model in pytorch.
import torch
import torch.nn as nn
import torch.nn.functional as F
class CharCNN(nn.Module):
def __init__(self,num_labels=11):
super(CharCNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(num_channels, depth_1, kernel_size=kernel_size_1, stride=stride_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=kernel_size_1, stride=stride_size),
nn.Dropout(0.1),
)
self.conv2 = nn.Sequential(
nn.Conv1d(depth_1, depth_2, kernel_size=kernel_size_2, stride=stride_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=kernel_size_2, stride=stride_size),
nn.Dropout(0.25)
)
self.fc1 = nn.Sequential(
nn.Linear(depth_2*kernel_size_2, num_hidden),
nn.ReLU(),
nn.Dropout(0.5)
)
self.fc2 = nn.Sequential(
nn.Linear(num_hidden, num_labels),
nn.ReLU(),
nn.Dropout(0.5)
)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
# collapse
out = x.view(x.size(0), -1)
# linear layer
out = self.fc1(out)
# output layer
out = self.fc2(out)
#out = self.log_softmax(x,dim=1)
return out
I am training my network like this:
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(),lr=learning_rate)
for e in range(training_epochs):
if(train_on_gpu):
net.cuda()
train_losses = []
for batch in iterate_minibatches(train_x, train_y, batch_size):
x, y = batch
inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
if(train_on_gpu):
inputs, targets = inputs.cuda(), targets.cuda()
opt.zero_grad()
output = model(inputs, batch_size)
loss = criterion(output, targets.long())
train_losses.append(loss.item())
loss.backward()
opt.step()
val_losses = []
accuracy=0
f1score=0
print("Epoch: {}/{}...".format(e+1, training_epochs),
"Train Loss: {:.4f}...".format(np.mean(train_losses)))
But i am getting the following error
TypeError Traceback (most recent call last)
<ipython-input-60-3a3df06ef2f8> in <module>
14 inputs, targets = inputs.cuda(), targets.cuda()
15 opt.zero_grad()
---> 16 output = model(inputs, batch_size)
17
18 loss = criterion(output, targets.long())
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self,
* input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
TypeError: forward() takes 2 positional arguments but 3 were given
Please guide me how i can resolve this issue.
The forward method of your model only takes one argument, but you are calling it with two arguments:
output = model(inputs, batch_size)
It should be:
output = model(inputs)
The time series data uses a 5 element window. The target is a rolling window of 5. The convolution 1d model receives a Sales tensor 3 dimensional structure containing all the sales for a certain duration of time (https://krzjoa.github.io/2019/12/28/pytorch-ts-v1.html) The kernel is set at 5 to match the moving window size. input and output are 1. The loss function is calculated over 1000 epochs. The prediction tensor is then converted to a numpy array and displayed comparing it to the actual moving average. I did find iterate_minibatches code but it does not work with time series data because the dimensions are different (32 target vs 36 source)
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
df=pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/shampoo.csv')
#created a three dimensional tensor
#1. number of samples
#2. number of channels
#3. -1 means infer value from dimension
X=data.Sales.copy()
y=data.Sales.rolling(5).mean().copy()
net = nn.Conv1d(1, 1, 5, bias = False)
optimizer=optim.Adam(net.parameters(), lr=0.01) #l2
running_loss=0.0
X=data.Sales.copy()
y=data.Sales.rolling(5).mean().copy()
X_tensor = torch.Tensor(X).reshape(1, 1, -1)
print("Sales", X_tensor)
y=y[4:,].to_numpy()
y_tensor = torch.Tensor(y).reshape(1, 1, -1)
print("Avg", y_tensor)
ts_tensor = torch.Tensor(X).reshape(1, 1, -1)
kernel = [0.5, 0.5]
kernel_tensor = torch.Tensor(kernel).reshape(1, 1, -1)
print("Kernel", F.conv1d(ts_tensor, kernel_tensor))
for epoch in range(1000):
optimizer.zero_grad()
outputs=net(X_tensor)
#print("Outputs",outputs)
loss_value = torch.mean((outputs - y_tensor)**2)
loss_value.backward()
optimizer.step()
running_loss += loss_value.item()
if epoch % 100 == 0:
print('[%d] loss: %.3f' % (epoch, loss_value.item()))
print(net.weight.data.numpy())
prediction = (net(X_tensor).data).float()
prediction=(prediction.numpy().flatten())
data.Sales.plot()
plt.plot(prediction)
#actual moving average
data.Sales.plot()
plt.plot(y)

Maxpool2d error is showing while there is no Maxpool2d

I run the following code to train a neural network that contains a CNN with max pooling and two fully-connected layers:
class Net(nn.Module):
def __init__(self, vocab_size, embedding_size):
torch.manual_seed(0)
super(Net, self).__init__()
self.word_embeddings = nn.Embedding(vocab_size, embedding_size)
self.conv1 = nn.Conv1d(embedding_size, 64, 3)
self.drop1 = nn.Dropout(0.5)
self.max_pool1 = nn.MaxPool1d(2)
self.flat1 = nn.Flatten()
self.fc1 = nn.Linear(64*99, 100)
self.fc2 = nn.Linear(100, 1)
def forward(self, sentence):
embedding = self.word_embeddings(sentence).permute(0, 2, 1)
conv1 = F.relu(self.conv1(embedding))
drop1 = self.drop1(conv1)
max_pool1 = self.max_pool1(drop1)
flat1 = self.flat1(max_pool1)
fc1 = F.relu(self.fc1(flat1))
fc2 = torch.sigmoid(self.fc2(fc1))
return fc2
net = Net(vocab_size, EMBEDDING_SIZE)
EPOCHS = 10
net.cuda()
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
loader = DataLoader(train, batch_size=32)
net.train()
for epoch in range(EPOCHS):
progress = tqdm_notebook(loader, leave=False)
for inputs, target in progress:
net.zero_grad()
output = net(inputs.to(device))
loss = criterion(output, target.to(device))
loss.backward()
optimizer.step()
print(loss)
and I get the following error (the error trace has been updated and it includes the complete trace):
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py:498: UserWarning: Using a target size (torch.Size([32])) that is different to the input size (torch.Size([32, 1])) is deprecated. Please ensure they have the same size.
return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-2-3c8a885417ba> in <module>()
33 for inputs, target in progress:
34 net.zero_grad()
---> 35 output = net(inputs.to(device))
36 loss = criterion(output, target.to(device))
37 loss.backward()
5 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in _max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices)
455 stride = torch.jit.annotate(List[int], [])
456 return torch.max_pool1d(
--> 457 input, kernel_size, stride, padding, dilation, ceil_mode)
458
459 max_pool1d = boolean_dispatch(
RuntimeError: max_pool2d_with_indices_out_cuda_frame failed with error code 0
I do not have any Maxpool2ds in my code! Could anybody help me with this problem?

size mismatch, m1: [3584 x 28], m2: [784 x 128] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:940

I have executed the following code and getting the error shown at extreme bottom. I would like to know how to resolve this. thanks
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import transforms
_tasks = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
from torchvision.datasets import MNIST
mnist = MNIST("data", download=True, train=True, transform=_tasks)
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
create training and validation split
split = int(0.8 * len(mnist))
index_list = list(range(len(mnist)))
train_idx, valid_idx = index_list[:split], index_list[split:]
create sampler objects using SubsetRandomSampler
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)
create iterator objects for train and valid datasets
trainloader = DataLoader(mnist, batch_size=256, sampler=tr_sampler)
validloader = DataLoader(mnist, batch_size=256, sampler=val_sampler)
Creating model for execution
class Model(nn.Module):
def init(self):
super().init()
self.hidden = nn.Linear(784, 128)
self.output = nn.Linear(128, 10)
def forward(self, x):
x = self.hidden(x)
x = F.sigmoid(x)
x = self.output(x)
return x
model = Model()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)
for epoch in range(1, 11): ## run the model for 10 epochs
train_loss, valid_loss = [], []
#training part
model.train()
for data, target in trainloader:
optimizer.zero_grad()
#1. forward propagation
output = model(data)
#2. loss calculation
loss = loss_function(output, target)
#3. backward propagation
loss.backward()
#4. weight optimization
optimizer.step()
train_loss.append(loss.item())
# evaluation part
model.eval()
for data, target in validloader:
output = model(data)
loss = loss_function(output, target)
valid_loss.append(loss.item())
Executing this I am getting the following error :
RuntimeError Traceback (most recent call last) in ()
----> 1 output = model(data) 2 3 ## 2. loss calculation 4 loss = loss_function(output, target) 5
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in
call(self, *input, **kwargs) 487 result = self._slow_forward(*input,
**kwargs)
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in
linear(input, weight, bias) 1352 ret =
torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) 1353
else:
-> 1354 output = input.matmul(weight.t()) 1355 if bias is not None: 1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 128] at
/pytorch/aten/src/TH/generic/THTensorMath.cpp:940
Your input MNIST data has shape [256, 1, 28, 28] corresponding to [B, C, H, W]. You need to flatten the input images into a single 784 long vector before feeding it to the Linear layer Linear(784, 128) such that the input becomes [256, 784] corresponding to [B, N], where N is 1x28x28, your image size. This can be done as follows:
for data, target in trainloader:
# Flatten MNIST images into a 784 long vector
data = data.view(data.shape[0], -1)
optimizer.zero_grad()
...
The same is needed to be done in the validation loop.

Resources