I am coding a dqn from scratch and therefore have written my loss function. While calling backward on my loss function, I get the following error - RuntimeError: grad can be implicitly created only for scalar outputs
Here's my code -
import numpy as np
import gym
import matplotlib.pyplot as plt
import os
import torch
import random
from torch import nn
from import DataLoader
from torchvision import datasets, transforms
from collections import deque
import sys
env = gym.make("CliffWalking-v0")
# In[103]:
episodes = 5000
eps = 1.0
learning_rate = 0.1
discount_factor = 0.99
tot_rewards = []
decay_val = 0.001
mem_size = 50000
batch_size = 2
gamma = 0.99
# In[104]:
class NeuralNetwork(nn.Module):
def __init__(self, state_size, action_size):
super(NeuralNetwork, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_relu_stack = nn.Sequential(
nn.Linear(1, 30),
nn.Linear(30, 30),
nn.Linear(30, action_size)
def forward(self, x):
x = self.linear_relu_stack(x)
return x
# In[105]:
model = NeuralNetwork(env.observation_space.n, env.action_space.n)
opt = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
loss = nn.MSELoss()
replay_buffer = deque(maxlen=mem_size)
# In[106]:
state = torch.tensor(env.reset(), dtype=torch.float32)
state = state.unsqueeze(dim=0)
out = model(state)
# In[111]:
def compute_td_loss(batch_size):
state, next_state, reward, done, action = zip(*random.sample(replay_buffer, batch_size))
state = torch.from_numpy(np.array(state).reshape(-1, 1)).unsqueeze(dim = 0).type(torch.float32)
next_state = torch.from_numpy(np.array(next_state).reshape(-1, 1)).unsqueeze(dim = 0).type(torch.float32)
reward = torch.from_numpy(np.array(reward))
done = torch.from_numpy(np.array(done))
action = torch.from_numpy(np.array(action)).type(torch.int64)
q_values = model(state)
next_q_values = model(next_state)
q_vals = q_values.squeeze().gather(dim=-1, index=action.reshape(-1,1)).reshape(1, -1)
max_next_q_values = torch.max(next_q_values,2)[0].detach()
print("q_vals = ", q_vals)
print("max_next_q_values = ", max_next_q_values)
loss = 0.5*(reward + gamma*max_next_q_values - q_vals)**2
print("reward = ", reward)
print("loss = ", loss)
return loss
# In[112]:
for i in range(episodes):
state = env.reset()
done = False
steps = 0
eps_rew = 0
while not done and steps<50:
if np.random.uniform(0,1)<eps:
action = env.action_space.sample()
state = torch.tensor(state, dtype=torch.float32)
state = state.unsqueeze(dim=0)
action = np.argmax(model(state).detach().numpy())
next_state, reward, done, info = env.step(action)
replay_buffer.append((state, next_state, reward, done, action))
if len(replay_buffer)>batch_size:
loss = compute_td_loss(batch_size)
eps = eps/(1 + 0.001)
eps_rew += reward
if done:
state = next_state
Here's the error that I get -
RuntimeError Traceback (most recent call last)
<ipython-input-112-015fd74c95d9> in <module>
14 replay_buffer.append((state, next_state, reward, done, action))
15 if len(replay_buffer)>batch_size:
---> 16 loss = compute_td_loss(batch_size)
17 sys.exit()
18 eps = eps/(1 + 0.001)
<ipython-input-111-3e1e02c32b4f> in compute_td_loss(batch_size)
16 print("loss = ", loss)
17 opt.zero_grad()
---> 18 loss.backward()
19 opt.step()
20 return loss
c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\ in backward(self, gradient, retain_graph, create_graph, inputs)
253 create_graph=create_graph,
254 inputs=inputs)
--> 255 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
257 def register_hook(self, hook):
c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\autograd\ in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
142 grad_tensors_ = _tensor_or_tensors_to_tuple(grad_tensors, len(tensors))
--> 143 grad_tensors_ = _make_grads(tensors, grad_tensors_)
144 if retain_graph is None:
145 retain_graph = create_graph
c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\autograd\ in _make_grads(outputs, grads)
48 if out.requires_grad:
49 if out.numel() != 1:
---> 50 raise RuntimeError("grad can be implicitly created only for scalar outputs")
51 new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
52 else:
RuntimeError: grad can be implicitly created only for scalar outputs

Given that your batch_size = 2 and looking at your code your loss will likely be of size batch_size x 1. Given that what you are likely trying to do is to compute the gradient of expected Q loss you can use monte carlo estimator where instead of computing an expectation we use a mean over a finite sample (here - your batch). Consequently what you are missing is taking a mean of your loss before calling backwards.


RuntimeError: input must have 2 dimensions, got 1

I am new to PyTorch, and I am trying to build a BiLSTM model to insert its output to a MaxPool1d layer and an AvgPool1d layer each before concatenating the outputs from both layers for a binary classification task. I am working with pretrained Word2Vec embeddings as input:
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import torch.nn.functional as F
class LSTM(nn.Module):
# define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim , num_classes, lstm_layers, weights):
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embedding_dim,
num_directions = 2 #if bidirectional else 1
self.m1 = nn.MaxPool1d(1,stride= 1)
self.m2 = nn.AvgPool1d(1,stride= 1)
self.fc1 = nn.Linear(lstm_units * num_directions, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, num_classes)
self.relu = nn.ReLU()
self.softmax = nn.Softmax()
self.lstm_layers = lstm_layers
self.num_directions = num_directions
self.lstm_units = lstm_units
def forward(self, text, text_lengths):
batch_size = text.shape[0]
h_0, c_0 = (Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)),
Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)))
embedded = self.embedding(text)
packed_embedded = pack_padded_sequence(embedded,"cpu"), batch_first=True)
output, (h_n, c_n) = self.lstm(packed_embedded, (h_0, c_0))
output_unpacked, output_lengths = pad_packed_sequence(output, batch_first=True, enforce_sorted=False)
# out = output_unpacked[:, -1, :]
#return self.linear(ht[-1])
out = output_unpacked
out1 = self.m1(out)
out2 = self.m2(out)
out =, out2), 1)
out = F.relu(self.fc1(out))
preds = F.softmax(self.fc2(out))
return preds
My training function is as below:
import time
def train(dataloader):
total_acc, total_count = 0, 0
log_interval = 500
text_lengths = np.dtype('int64').type(200)
start_time = time.time()
for idx, (label, text) in enumerate(dataloader):
predited_label = model(text, text_lengths = torch.tensor([text_lengths]))
loss = criterion(predited_label, label)
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
if idx % log_interval == 0 and idx > 0:
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches '
'| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
total_acc, total_count = 0, 0
start_time = time.time()
def evaluate(dataloader):
total_acc, total_count = 0, 0
with torch.no_grad():
for idx, (label, text) in enumerate(dataloader):
predited_label = model(text)
loss = criterion(predited_label, label)
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
return total_acc/total_count
And I try to run the code like this:
from import DataLoader
# Hyperparameters
EPOCHS = 1 # epoch
LR =1 # learning rate
BATCH_SIZE = 1 # batch size for training
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
total_accu = None
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
for epoch in range(1, EPOCHS + 1):
epoch_start_time = time.time()
accu_val = evaluate(valid_dataloader)
if total_accu is not None and total_accu > accu_val:
total_accu = accu_val
print('-' * 59)
print('| end of epoch {:3d} | time: {:5.2f}s | '
'valid accuracy {:8.3f} '.format(epoch,
time.time() - epoch_start_time,
print('-' * 59)
However, I get the error below. I am not sure what the input size here refers to, and I can't find anyone else with the same error anywhere. Can anyone advise me please?
RuntimeError Traceback (most recent call last)
<ipython-input-111-14ba1dd26348> in <module>()
27 for epoch in range(1, EPOCHS + 1):
28 epoch_start_time = time.time()
---> 29 train(train_dataloader)
30 accu_val = evaluate(valid_dataloader)
31 if total_accu is not None and total_accu > accu_val:
6 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/ in check_input(self, input, batch_sizes)
201 raise RuntimeError(
202 'input must have {} dimensions, got {}'.format(
--> 203 expected_input_dim, input.dim()))
204 if self.input_size != input.size(-1):
205 raise RuntimeError(
RuntimeError: input must have 2 dimensions, got 1

what if the size of training set is not the integer multiple of batch size

I am running the following code against the dataset of PV_Elec_Gas3.csv, the network architecture is designed as follows
class CNN_ForecastNet(nn.Module):
def __init__(self):
self.conv1d = nn.Conv1d(3,64,kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.fc1 = nn.Linear(64*2,50)
self.fc2 = nn.Linear(50,1)
def forward(self,x):
x = self.conv1d(x)
x = self.relu(x)
x = x.view(-1)
#print('x size',x.size())
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
The train function is defined as follows,
def Train():
running_loss = .0
for idx, (inputs,labels) in enumerate(train_loader):
inputs =
labels =
#print('inputs ',inputs)
preds = model(inputs.float())
loss = criterion(preds,labels.float())
running_loss += loss
train_loss = running_loss/len(train_loader)
print(f'train_loss {train_loss}')
the train_loader is defined as train_loader =,batch_size=2,shuffle=False) here the batch_size is set as 2. When running the train function, I got error message as follows. The reason is becaause when the code iterate through the train_loader, the last iteration only have one training point instead of two as batch_size requires. For this kind of scenario, besides changing the batch size, are there any other options?
This is the error message. I also include the full code to reproduce the error
RuntimeError Traceback (most recent call last)
<ipython-input-82-78a49fb8c068> in <module>
99 for epoch in range(epochs):
100 print('epochs {}/{}'.format(epoch+1,epochs))
--> 101 Train()
102 gc.collect()
<ipython-input-82-78a49fb8c068> in Train()
81 optimizer.zero_grad()
82 #print('inputs ',inputs)
---> 83 preds = model(inputs.float())
84 loss = criterion(preds,labels.float())
85 loss.backward()
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\ in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-82-78a49fb8c068> in forward(self, x)
57 x = x.view(-1)
58 #print('x size',x.size())
---> 59 x = self.fc1(x)
60 x = self.relu(x)
61 x = self.fc2(x)
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\ in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\ in forward(self, input)
92 def forward(self, input: Tensor) -> Tensor:
---> 93 return F.linear(input, self.weight, self.bias)
95 def extra_repr(self) -> str:
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\ in linear(input, weight, bias)
1690 ret = torch.addmm(bias, input, weight.t())
1691 else:
-> 1692 output = input.matmul(weight.t())
1693 if bias is not None:
1694 output += bias
RuntimeError: mat1 dim 1 must match mat2 dim 0
the following is the code for reproduction of error
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from numpy import array
import torch
import gc
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from import Dataset,DataLoader
solar_power = pd.read_csv('PV_Elec_Gas3.csv').rename(columns={'date':'timestamp'}).set_index('timestamp')
train_set = solar_power[:'8/10/2016']
def split_sequence(sequence, n_steps):
x, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
return array(x), array(y)
n_steps = 3
train_x,train_y = split_sequence(train_set.loc[:,"kWh electricity/day"].values,n_steps)
class ElecDataset(Dataset):
def __init__(self,feature,target):
self.feature = feature = target
def __len__(self):
return len(self.feature)
def __getitem__(self,idx):
item = self.feature[idx]
label =[idx]
return item,label
class CNN_ForecastNet(nn.Module):
def __init__(self):
self.conv1d = nn.Conv1d(3,64,kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.fc1 = nn.Linear(64*2,50)
self.fc2 = nn.Linear(50,1)
def forward(self,x):
x = self.conv1d(x)
x = self.relu(x)
x = x.view(-1)
#print('x size',x.size())
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN_ForecastNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.MSELoss()
train_losses = []
def Train():
running_loss = .0
for idx, (inputs,labels) in enumerate(train_loader):
inputs =
labels =
#print('inputs ',inputs)
preds = model(inputs.float())
loss = criterion(preds,labels.float())
running_loss += loss
train_loss = running_loss/len(train_loader)
print(f'train_loss {train_loss}')
train = ElecDataset(train_x.reshape(train_x.shape[0],train_x.shape[1],1),train_y)
train_loader =,batch_size=2,shuffle=False)
epochs = 1
for epoch in range(epochs):
print('epochs {}/{}'.format(epoch+1,epochs))
In your forward method you x.view(-1) before passing it to a nn.Linear layer. This "flattens" not only the spatial dimensions on x, but also the batch dimension! You basically mix together all samples in the batch, making your model dependant on the batch size and in general making the predictions depend on the batch as a whole rather than on the individual data points.
Instead, you should:
def forward(self, x):
x = self.conv1d(x)
x = self.relu(x)
x = x.flatten(start_dim=1) # flatten all BUT batch dimension
x = self.fc1(x) # you'll probably have to modify in_features of fc1 now
x = self.relu(x)
x = self.fc2(x)
return x
Please see flatten() for more details.
If, for some reason, you must process only "full batches", you can tell DataLoader to drop the last batch by changing the argument drop_last from the default False to True:
train_loader =, batch_size=2, shuffle=False, drop_last=True)

Expected more than 1 value per channel when training, got input size torch.Size([1, **])

I met an error when I use BatchNorm1d, code:
##% first I set a model
class net(nn.Module):
def __init__(self, max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num__rnn_layers, bidirectional, batch_first=True, p=0.2):
super(net, self).__init__()
self.max_len = max_len
self.feature_linear = feature_linear
self.input_size = input_size
self.hidden_size = hidden_size
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional == True else 1
self.p = p
self.batch_first = batch_first
self.linear1 = nn.Linear(max_len, feature_linear)
init.kaiming_normal_(self.linear1.weight, mode='fan_in')
self.BN1 = BN(feature_linear)
def forward(self, xb, seq_len_crt):
rnn_input = torch.zeros(xb.shape[0], self.feature_linear, self.input_size)
for i in range(self.input_size):
out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(1,34), out.shape(1,100)
out = F.relu(out) # 输入:out.shape(1,100), 输出:out.shape(1,100)
out = self.BN1(out) # 输入:out.shape(1,100),输出:out.shape(1,100)
return y_hat.squeeze(-1)
##% make the model as a function and optimize it
input_size = 5
hidden_size = 32
output_dim = 1
num_rnn_layers = 2
bidirectional = True
rnn = nn.LSTM
batch_size = batch_size
feature_linear = 60
BN = nn.BatchNorm1d
model = net(max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num_rnn_layers, bidirectional, p=0.1)
loss_func = nn.MSELoss(reduction='none')
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)
##% use this model to predict data
def predict(xb, model, seq_len):
# xb's shape should be (batch_size, seq_len, n_features)
if xb.ndim == 2: # suitable for both ndarray and Tensor
# add a {batch_size} dim
xb = xb[None, ]
if not isinstance(xb, torch.Tensor):
xb = torch.Tensor(xb)
return model(xb, seq_len) # xb.shape(1,34,5)
##% create training/valid/test data
seq_len_train_iter = []
for i in range(0, len(seq_len_train), batch_size):
if i + batch_size <= len(seq_len_train):
seq_len_valid_iter = []
for i in range(0, len(seq_len_valid), batch_size):
if i + batch_size <= len(seq_len_valid):
seq_len_test_iter = []
for i in range(0, len(seq_len_test), batch_size):
if i + batch_size <= len(seq_len_test):
##% fit model
def fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter):
train_loss_record = []
valid_loss_record = []
mean_pct_final = []
mean_abs_final = []
is_better = False
last_epoch_abs_error = 0
last_epoch_pct_error = 0
mean_pct_final_train = []
mean_abs_final_train = []
for epoch in range(epochs):
# seq_len_crt: current batch seq len
for batches, ((xb, yb), seq_len_crt) in enumerate(zip(train_dl, seq_len_train_iter)):
if isinstance(seq_len_crt, np.int64):
seq_len_crt = [seq_len_crt]
y_hat = model(xb, seq_len_crt)
packed_yb = nn.utils.rnn.pack_padded_sequence(yb, seq_len_crt, batch_first=True, enforce_sorted=False)
final_yb, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_yb)
final_yb = final_yb.permute(1, 0)
# assert torch.all(torch.tensor(seq_len_crt).eq(input_sizes))
loss = loss_func(y_hat, final_yb)
batch_size_crt = final_yb.shape[0]
loss = (loss.sum(-1) / input_sizes).sum() / batch_size_crt
# scheduler.step()
# print(i)
with torch.no_grad():
if batches % 50 == 0 and epoch % 1 == 0:
# print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
label = yb[0][:len(y_hat)]
# plt.ion()
plt.plot(y_hat, label='predicted')
plt.plot(label, label='label')
plt.legend(loc='upper right')
plt.title('training mode')
plt.text(len(y_hat)+1, max(y_hat.max(), label.max()), f'Epoch {epoch}, batch {batches} training loss: {loss.item()}')
return train_loss_record
but I met:Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
the error message is:
ValueError Traceback (most recent call last)
<ipython-input-119-fb062ad3f20e> in <module>
----> 1 fit(500, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
<ipython-input-118-2eb946c379bf> in fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
38 # print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
---> 40 y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
41 label = yb[0][:len(y_hat)]
42 # plt.ion()
<ipython-input-116-28afce77e325> in predict(xb, model, seq_len)
7 if not isinstance(xb, torch.Tensor):
8 xb = torch.Tensor(xb)
----> 9 return model(xb, seq_len) # xb.shape(None,34,5)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\ in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-114-3e9c30d20ed6> in forward(self, xb, seq_len_crt)
50 out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(None,34), out.shape(None,100)
51 out = F.relu(out) # 输入:out.shape(None,100), 输出:out.shape(None,100)
---> 52 out = self.BN1(out) # 输入:out.shape(None,100),输出:out.shape(None,100)
54 out = self.linear2(out)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\ in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\ in forward(self, input)
129 used for normalization (i.e. in eval mode when buffers are not None).
130 """
--> 131 return F.batch_norm(
132 input,
133 # If buffers are not to be tracked, ensure that they won't be updated
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\ in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2052 bias=bias, training=training, momentum=momentum, eps=eps)
2053 if training:
-> 2054 _verify_batch_size(input.size())
2056 return torch.batch_norm(
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\ in _verify_batch_size(size)
2035 size_prods *= size[i + 2]
2036 if size_prods == 1:
-> 2037 raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
I have checked and I found that in out = self.BN1(out),out.shape = (1,60),it seems that batchsize=1 is not permitted in BatchNorm1d .But I don't know how to modify it.
what does BatchNorm1d do mathematically?
try and write down the equation for the case of batch_size=1 and you'll understand why pytorch is angry with you.
How to solve it?
It is simple: BatchNorm has two "modes of operation": one is for training where it estimates the current batch's mean and variance (this is why you must have batch_size>1 for training).
The other "mode" is for evaluation: it uses accumulated mean and variance to normalize new inputs without re-estimating the mean and variance. In this mode there is no problem processing samples one by one.
When evaluating your model use model.eval() before and model.train() after.
I met this problem when I load the model and started to test. Add the model.eval() before you fill in your data. This can solve the problem.
If you are using the DataLoader class, sometimes the last batch in an epoch will have only a single training example (imagine a training set of 33 examples with a batch size of 32). This can trigger the error if the network is in training mode and a batch norm layer is present.
Set the drop_last argument in the DataLoader to True like:
from import DataLoader
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
to discard the last incomplete batch in each epoch.

The size of tensor a (10) must match the size of tensor b (9) at non-singleton

i am using this code earlier in Jupiter notebook it was not showing error but accuracy was veryless then i have tried the same code in google colab there it is showing error, please suggest some way to increase accuracy . i am trying to perform multilevel CNN for detecting leaf with downsampling of image
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
self.conv1 = nn.Conv2d(1,32,2)
self.conv2 = nn.Conv2d(32,64,2)
self.conv2_bn = nn.BatchNorm2d(64)
x= torch.randn(256,256).view(-1,1,256,256)
self._to_linear = None
self.fc1= nn.Linear(self._to_linear, 512)
self.fc2 = nn.Linear(512,6)
def convs(self,x):
y=torch.nn.functional.interpolate(x, size=([128,128]), scale_factor=None, mode='nearest', align_corners=None)
z=torch.nn.functional.interpolate(x, size=([64,64]), scale_factor=None, mode='nearest', align_corners=None)
w=torch.nn.functional.interpolate(x, size=([32,32]), scale_factor=None, mode='nearest', align_corners=None)
# print(x[0].shape)
x= F.relu(self.conv1(x))
m = nn.ConstantPad2d(1,0)
x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), 2))
# print(x[0].shape)
y= F.relu(self.conv1(y))
m = nn.ConstantPad2d(1,0)
y = F.relu(self.conv2_bn(self.conv2(y)), 2)
# print(y[0].shape),y),1)
# print(CAT_1[0].shape)
z= F.relu(self.conv1(z))
m = nn.ConstantPad2d(1,0)
z= F.relu(self.conv2_bn(self.conv2(z)))
# print(z[0].shape),z),1)
# print(CAT_2[0].shape)
w= F.relu(self.conv1(w))
m = nn.ConstantPad2d(1,0)
w = F.relu((self.conv2_bn(self.conv2(w))))
# print(w[0].shape),w),1)
# print("i lov pp")
# print(x[0].shape)
x=torch.nn.functional.avg_pool2d(x, (2,2))
# print("i lov pp")
# print(x[0].shape)
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
# print("i lov pp")
x=x.view(-1, self._to_linear)
x= F.relu(self.fc1(x))
x= self.fc2(x)
return F.softmax(x, dim=1)
# print(x[0].shape)
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1,256,256)
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1
val_size=int (len(X)*VAL_PCT)
train_X= X[:-val_size]
train_y= y[:-val_size]
test_y = y[-val_size:]
for epoch in range(EPOCHS):
for i in (range(0, len(train_X), BATCH_SIZE)):
#print(i, i+BATCH_SIZE)
batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,256,256)
# print(batch_X.shape)
batch_y = train_y[i:i+BATCH_SIZE]
outputs = net(batch_X)
#print (outputs.shape)
loss = loss_function(outputs, batch_y)
#print(f"Epoch: {epoch}. Loss: {loss}")
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ UserWarning: Using a target size (torch.Size([10, 256, 256, 3])) that is different to the input size (torch.Size([10, 6])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
RuntimeError Traceback (most recent call last)
<ipython-input-38-a154b102127f> in <module>()
15 outputs = net(batch_X)
16 #print (outputs.shape)
---> 17 loss = loss_function(outputs, batch_y)
18 loss.backward()
19 optimizer.step()
3 frames
/usr/local/lib/python3.6/dist-packages/torch/ in broadcast_tensors(*tensors)
60 if any(type(t) is not Tensor for t in tensors) and has_torch_function(tensors):
61 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 62 return _VF.broadcast_tensors(tensors)
RuntimeError: The size of tensor a (10) must match the size of tensor b (9) at non-singleton dimension 3

multi-variable linear regression with pytorch

I'm working on a linear regression problem with Pytorch.
I've had success with the single variable case, however when I perform multi-variable linear regression I get the following error. How should I perform linear regression with multiple variables?
TypeError Traceback (most recent call
last) in ()
9 optimizer.zero_grad() #gradient
10 outputs = model(inputs) #output
---> 11 loss = criterion(outputs,targets) #loss function
12 loss.backward() #backward propogation
13 optimizer.step() #1-step optimization(gradeint descent)
in call(self, *input, **kwargs)
205 def call(self, *input, **kwargs):
--> 206 result = self.forward(*input, **kwargs)
207 for hook in self._forward_hooks.values():
208 hook_result = hook(self, input, result)
in forward(self, input, target)
22 _assert_no_grad(target)
23 backend_fn = getattr(self._backend, type(self).name)
---> 24 return backend_fn(self.size_average)(input, target)
in forward(self, input, target)
39 output =
40 getattr(self._backend,, input, target,
---> 41 output, *self.additional_args)
42 return output
TypeError: FloatMSECriterion_updateOutput received an invalid
combination of arguments - got (int, torch.FloatTensor,
torch.DoubleTensor, torch.FloatTensor, bool), but expected (int state,
torch.FloatTensor input, torch.FloatTensor target, torch.FloatTensor
output, bool sizeAverage)
here is code
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
#input_size = 1
input_size = 3
output_size = 1
num_epochs = 300
learning_rate = 0.002
#Data set
#x_train = np.array([[1.564],[2.11],[3.3],[5.4]], dtype=np.float32)
x_train = np.array([[73.,80.,75.],[93.,88.,93.],[89.,91.,90.],[96.,98.,100.],[73.,63.,70.]],dtype=np.float32)
#y_train = np.array([[8.0],[19.0],[25.0],[34.45]], dtype= np.float32)
y_train = np.array([[152.],[185.],[180.],[196.],[142.]])
class LinearRegression(nn.Module):
def __init__(self,input_size,output_size):
self.linear = nn.Linear(input_size,output_size)
def forward(self,x):
out = self.linear(x) #Forward propogation
return out
model = LinearRegression(input_size,output_size)
#Lost and Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
#train the Model
for epoch in range(num_epochs):
#convert numpy array to torch Variable
inputs = Variable(torch.from_numpy(x_train)) #convert numpy array to torch tensor
#inputs = Variable(torch.Tensor(x_train))
targets = Variable(torch.from_numpy(y_train)) #convert numpy array to torch tensor
#forward+ backward + optimize
optimizer.zero_grad() #gradient
outputs = model(inputs) #output
loss = criterion(outputs,targets) #loss function
loss.backward() #backward propogation
optimizer.step() #1-step optimization(gradeint descent)
if(epoch+1) %5 ==0:
print('epoch [%d/%d], Loss: %.4f' % (epoch +1, num_epochs,[0]))
predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()
plt.plot(x_train,y_train,'ro',label='Original Data')
plt.plot(x_train,predicted,label='Fitted Line')
You need to make sure that the data has the same type. In this case x_train is a 32 bit float while y_train is a Double. You have to use:
y_train = np.array([[152.],[185.],[180.],[196.],[142.]],dtype=np.float32)
