Expected more than 1 value per channel when training, got input size torch.Size([1, **]) - pytorch

I met an error when I use BatchNorm1d, code:
##% first I set a model
class net(nn.Module):
def __init__(self, max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num__rnn_layers, bidirectional, batch_first=True, p=0.2):
super(net, self).__init__()
self.max_len = max_len
self.feature_linear = feature_linear
self.input_size = input_size
self.hidden_size = hidden_size
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional == True else 1
self.p = p
self.batch_first = batch_first
self.linear1 = nn.Linear(max_len, feature_linear)
init.kaiming_normal_(self.linear1.weight, mode='fan_in')
self.BN1 = BN(feature_linear)
def forward(self, xb, seq_len_crt):
rnn_input = torch.zeros(xb.shape[0], self.feature_linear, self.input_size)
for i in range(self.input_size):
out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(1,34), out.shape(1,100)
out = F.relu(out) # 输入:out.shape(1,100), 输出:out.shape(1,100)
out = self.BN1(out) # 输入:out.shape(1,100),输出:out.shape(1,100)
return y_hat.squeeze(-1)
##% make the model as a function and optimize it
input_size = 5
hidden_size = 32
output_dim = 1
num_rnn_layers = 2
bidirectional = True
rnn = nn.LSTM
batch_size = batch_size
feature_linear = 60
BN = nn.BatchNorm1d
model = net(max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num_rnn_layers, bidirectional, p=0.1)
loss_func = nn.MSELoss(reduction='none')
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)
##% use this model to predict data
def predict(xb, model, seq_len):
# xb's shape should be (batch_size, seq_len, n_features)
if xb.ndim == 2: # suitable for both ndarray and Tensor
# add a {batch_size} dim
xb = xb[None, ]
if not isinstance(xb, torch.Tensor):
xb = torch.Tensor(xb)
return model(xb, seq_len) # xb.shape(1,34,5)
##% create training/valid/test data
seq_len_train_iter = []
for i in range(0, len(seq_len_train), batch_size):
if i + batch_size <= len(seq_len_train):
seq_len_train_iter.append(seq_len_train[i:i+batch_size])
else:
seq_len_train_iter.append(seq_len_train[i:])
seq_len_valid_iter = []
for i in range(0, len(seq_len_valid), batch_size):
if i + batch_size <= len(seq_len_valid):
seq_len_valid_iter.append(seq_len_valid[i:i+batch_size])
else:
seq_len_valid_iter.append(seq_len_valid[i:])
seq_len_test_iter = []
for i in range(0, len(seq_len_test), batch_size):
if i + batch_size <= len(seq_len_test):
seq_len_test_iter.append(seq_len_test[i:i+batch_size])
else:
seq_len_test_iter.append(seq_len_test[i:])
##% fit model
def fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter):
train_loss_record = []
valid_loss_record = []
mean_pct_final = []
mean_abs_final = []
is_better = False
last_epoch_abs_error = 0
last_epoch_pct_error = 0
mean_pct_final_train = []
mean_abs_final_train = []
for epoch in range(epochs):
# seq_len_crt: current batch seq len
for batches, ((xb, yb), seq_len_crt) in enumerate(zip(train_dl, seq_len_train_iter)):
if isinstance(seq_len_crt, np.int64):
seq_len_crt = [seq_len_crt]
y_hat = model(xb, seq_len_crt)
packed_yb = nn.utils.rnn.pack_padded_sequence(yb, seq_len_crt, batch_first=True, enforce_sorted=False)
final_yb, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_yb)
final_yb = final_yb.permute(1, 0)
# assert torch.all(torch.tensor(seq_len_crt).eq(input_sizes))
loss = loss_func(y_hat, final_yb)
batch_size_crt = final_yb.shape[0]
loss = (loss.sum(-1) / input_sizes).sum() / batch_size_crt
loss.backward()
optimizer.step()
# scheduler.step()
optimizer.zero_grad()
# print(i)
with torch.no_grad():
train_loss_record.append(loss.item())
if batches % 50 == 0 and epoch % 1 == 0:
# print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
label = yb[0][:len(y_hat)]
# plt.ion()
plt.plot(y_hat, label='predicted')
plt.plot(label, label='label')
plt.legend(loc='upper right')
plt.title('training mode')
plt.text(len(y_hat)+1, max(y_hat.max(), label.max()), f'Epoch {epoch}, batch {batches} training loss: {loss.item()}')
plt.show()
return train_loss_record
but I met:Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
the error message is:
ValueError Traceback (most recent call last)
<ipython-input-119-fb062ad3f20e> in <module>
----> 1 fit(500, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
<ipython-input-118-2eb946c379bf> in fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
38 # print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
39
---> 40 y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
41 label = yb[0][:len(y_hat)]
42 # plt.ion()
<ipython-input-116-28afce77e325> in predict(xb, model, seq_len)
7 if not isinstance(xb, torch.Tensor):
8 xb = torch.Tensor(xb)
----> 9 return model(xb, seq_len) # xb.shape(None,34,5)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-114-3e9c30d20ed6> in forward(self, xb, seq_len_crt)
50 out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(None,34), out.shape(None,100)
51 out = F.relu(out) # 输入:out.shape(None,100), 输出:out.shape(None,100)
---> 52 out = self.BN1(out) # 输入:out.shape(None,100),输出:out.shape(None,100)
53
54 out = self.linear2(out)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\batchnorm.py in forward(self, input)
129 used for normalization (i.e. in eval mode when buffers are not None).
130 """
--> 131 return F.batch_norm(
132 input,
133 # If buffers are not to be tracked, ensure that they won't be updated
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2052 bias=bias, training=training, momentum=momentum, eps=eps)
2053 if training:
-> 2054 _verify_batch_size(input.size())
2055
2056 return torch.batch_norm(
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in _verify_batch_size(size)
2035 size_prods *= size[i + 2]
2036 if size_prods == 1:
-> 2037 raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
2038
2039
ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
I have checked and I found that in out = self.BN1(out),out.shape = (1,60),it seems that batchsize=1 is not permitted in BatchNorm1d .But I don't know how to modify it.

what does BatchNorm1d do mathematically?
try and write down the equation for the case of batch_size=1 and you'll understand why pytorch is angry with you.
How to solve it?
It is simple: BatchNorm has two "modes of operation": one is for training where it estimates the current batch's mean and variance (this is why you must have batch_size>1 for training).
The other "mode" is for evaluation: it uses accumulated mean and variance to normalize new inputs without re-estimating the mean and variance. In this mode there is no problem processing samples one by one.
When evaluating your model use model.eval() before and model.train() after.

I met this problem when I load the model and started to test. Add the model.eval() before you fill in your data. This can solve the problem.

If you are using the DataLoader class, sometimes the last batch in an epoch will have only a single training example (imagine a training set of 33 examples with a batch size of 32). This can trigger the error if the network is in training mode and a batch norm layer is present.
Set the drop_last argument in the DataLoader to True like:
from torch.utils.data import DataLoader
...
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
to discard the last incomplete batch in each epoch.

Related

How to make a mlflow model predict?

I recently made a GNN model using TransformerConv and TopKPooling, it is smooth while training, but I have problems when I want to use it to predict, it kept telling me that the TransformerConv doesn't have the 'aggr_module' attribute
This is my network:
class GNN(torch.nn.Module):
def __init__(self, feature_size, model_params):
super(GNN, self).__init__()
embedding_size = model_params["model_embedding_size"]
n_heads = model_params["model_attention_heads"]
self.n_layers = model_params["model_layers"]
dropout_rate = model_params["model_dropout_rate"]
top_k_ratio = model_params["model_top_k_ratio"]
self.top_k_every_n = model_params["model_top_k_every_n"]
dense_neurons = model_params["model_dense_neurons"]
self.conv_layers = ModuleList([])
self.transf_layers = ModuleList([])
self.pooling_layers = ModuleList([])
self.bn_layers = ModuleList([])
# Transformation layer: transform original node features to embedding vector(size: embedding_size(defined in config.py))
self.conv1 = TransformerConv(feature_size,
embedding_size,
heads=n_heads,
dropout=dropout_rate,
#edge_dim=edge_dim,
beta=True)
self.transf1 = Linear(embedding_size*n_heads, embedding_size)
self.bn1 = BatchNorm1d(embedding_size)
# Other layers: message passing and pooling
for i in range(self.n_layers):
self.conv_layers.append(TransformerConv(embedding_size,
embedding_size,
heads=n_heads,
dropout=dropout_rate,
#edge_dim=edge_dim,
beta=True))
# map conv_layer output size back to emgedding_size(embedding_size*n_heads -> embedding_size)
self.transf_layers.append(Linear(embedding_size*n_heads, embedding_size))
# Batch normalization
self.bn_layers.append(BatchNorm1d(embedding_size))
# Top-k pooling to reduce the size of the graph
if i % self.top_k_every_n == 0:
self.pooling_layers.append(TopKPooling(embedding_size, ratio=top_k_ratio))
# Linear output layers: feed graph representation in & reduce until single value left
self.linear1 = Linear(embedding_size*2, dense_neurons)
self.linear2 = Linear(dense_neurons, int(dense_neurons/2))
self.linear3 = Linear(int(dense_neurons/2), 3) # same as the general form
def forward(self, x, edge_index, batch_index):
# Initial transformation
x = self.conv1(x, edge_index)
x = torch.relu(self.transf1(x))
x = torch.relu((x))
x = self.bn1(x)
# Holds the intermediate graph representations
global_representation = []
for i in range(self.n_layers):
x = self.conv_layers[i](x, edge_index)
x = torch.relu(self.transf_layers[i](x))
x = torch.relu((x))
x = self.bn_layers[i](x)
# Always aggregate last layer
if i % self.top_k_every_n == 0 or i == self.n_layers:
x , edge_index, edge_attr, batch_index, _, _ = self.pooling_layers[int(i/self.top_k_every_n)]( x,
edge_index,
None,
batch_index)
# Add current representation
global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))
x = sum(global_representation)
# Output block
x = torch.relu(self.linear1(x))
x = F.dropout(x, p=0.8, training=self.training)
x = torch.relu(self.linear2(x))
x = F.dropout(x, p=0.8, training=self.training)
x = self.linear3(x)
return x
One training:
def run_one_training(params):
params = params[0]
with mlflow.start_run() as run:
# Log parameters used in this experiment
for key in params.keys():
mlflow.log_param(key, params[key])
# Loading the dataset
print("Loading dataset...")
full_dataset = ProcessedDataset(root = "data/", filename = "fixtures_full.csv")
full_dataset.shuffle()
train_dataset = full_dataset[:3400] # around 80% of the full dataset
test_dataset = full_dataset[3400:3800]
# Prepare training
print("Preparing Training")
batch_size=params["batch_size"]
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
# Loading the model
print("Loading model...")
model_params = {k: v for k, v in params.items() if k.startswith("model_")}
model = GNN(feature_size=train_dataset[0].x.shape[1], model_params=model_params)
print(model)
model = model.to(device)
print(f"Number of parameters: {count_parameters(model)}")
mlflow.log_param("num_params", count_parameters(model))
class_weights = [1.0239, 1.2753, 0.8070]
class_weights= torch.tensor(class_weights,dtype=torch.float)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
lr=params["learning_rate"],
momentum=params["sgd_momentum"],
weight_decay=params["weight_decay"])
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=params["scheduler_gamma"])
# Start training
best_loss = 1000
early_stopping_counter = 0
for epoch in range(300):
if early_stopping_counter <= 10: # = x * 5
# Training
model.train()
loss = train_one_epoch(epoch, model, train_loader, optimizer, loss_fn)
print(f"Epoch {epoch} | Train Loss {loss}")
mlflow.log_metric(key="Train loss", value=float(loss), step=epoch)
# Testing
model.eval()
if epoch % 5 == 0:
loss = test(epoch, model, test_loader, loss_fn)
print(f"Epoch {epoch} | Test Loss {loss}")
mlflow.log_metric(key="Test loss", value=float(loss), step=epoch)
# Update best loss
if float(loss) < best_loss:
best_loss = loss
# Save the currently best model
mlflow.pytorch.log_model(model, "model", signature=SIGNATURE)
early_stopping_counter = 0
else:
early_stopping_counter += 1
scheduler.step()
else:
print("Early stopping due to no improvement.")
return [best_loss]
print(f"Finishing training with best test loss: {best_loss}")
return [best_loss]
Train and Test
def train_one_epoch(epoch, model, train_loader, optimizer, loss_fn):
# Enumerate over the data
all_preds = []
all_labels = []
running_loss = 0.0
step = 0
for _, batch in enumerate(tqdm(train_loader)):
batch.x = torch.tensor(batch.x)
batch.x = batch.x.reshape((-1, *batch.x.shape[2:]))
# Use GPU
batch.to(device)
# Reset gradients
optimizer.zero_grad()
# Passing the node features and the connection info
pred = model(torch.tensor(batch.x).float(),
#batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculating the loss and gradients
loss = torch.sqrt(loss_fn(pred, batch.y.long()))
loss.backward()
optimizer.step()
# Update tracking
running_loss += loss.item()
step += 1
all_preds.append(np.argmax(pred.cpu().detach().numpy(), axis=1))
all_labels.append(batch.y.cpu().detach().numpy())
all_preds = np.concatenate(all_preds).ravel()
all_labels = np.concatenate(all_labels).ravel()
calculate_metrics(all_preds, all_labels, epoch, "train")
return running_loss/step
def test(epoch, model, test_loader, loss_fn):
all_preds = []
all_preds_raw = []
all_labels = []
running_loss = 0.0
step = 0
for batch in test_loader:
batch.x = torch.tensor(batch.x)
batch.x = batch.x.reshape((-1, *batch.x.shape[2:]))
batch.to(device)
pred = model(torch.tensor(batch.x).float(),
#batch.edge_attr.float(),
batch.edge_index,
batch.batch)
loss = torch.sqrt(loss_fn(pred, batch.y.long()))
# Update tracking
running_loss += loss.item()
step += 1
all_preds.append(np.argmax(pred.cpu().detach().numpy(), axis=1))
all_preds_raw.append(torch.sigmoid(pred).cpu().detach().numpy())
all_labels.append(batch.y.cpu().detach().numpy())
all_preds = np.concatenate(all_preds).ravel()
all_labels = np.concatenate(all_labels).ravel()
print(all_preds_raw[0][:10])
print(all_preds[:10])
print(all_labels[:10])
calculate_metrics(all_preds, all_labels, epoch, "test")
log_conf_matrix(all_preds, all_labels, epoch)
return running_loss/step
Predict:
def predict(model, test_loader):
all_preds = []
all_preds_raw = []
all_labels = []
for batch in test_loader:
batch.x = torch.tensor(batch.x)
batch.x = batch.x.reshape((-1, *batch.x.shape[2:]))
batch.to(device)
pred = model(torch.tensor(batch.x).float(),
#batch.edge_attr.float(),
batch.edge_index,
batch.batch)
all_preds.append(np.argmax(pred.cpu().detach().numpy(), axis=1))
all_preds_raw.append(torch.sigmoid(pred).cpu().detach().numpy())
all_labels.append(batch.y.cpu().detach().numpy())
all_preds = np.concatenate(all_preds).ravel()
all_labels = np.concatenate(all_labels).ravel()
return all_preds, all_preds_raw, all_labels
I was using mlflow to load my model and this is what I did:
import mlflow
logged_model = 'runs:/b18929aa871047f9892aa3c84a998d28/model'
# Load model
loaded_model = mlflow.pytorch.load_model(logged_model)
loaded_model = loaded_model.to(device)
loaded_model.eval()
loader = DataLoader(dataset, batch_size=len(dataset))
all_pred, all_pred_raw, all_label = predict(loaded_model, loader)
This is the error message
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [23], in <cell line: 7>()
3 dataset = full_dataset[3800:]
5 loader = DataLoader(dataset, batch_size=len(dataset))
----> 7 all_pred, all_pred_raw, all_label = predict(loaded_model, loader)
Input In [20], in predict(epoch, model, test_loader, loss_fn)
143 batch.x = batch.x.reshape((-1, *batch.x.shape[2:]))
144 batch.to(device)
--> 145 pred = model(torch.tensor(batch.x).float(),
146 #batch.edge_attr.float(),
147 batch.edge_index,
148 batch.batch)
150 all_preds.append(np.argmax(pred.cpu().detach().numpy(), axis=1))
151 all_preds_raw.append(torch.sigmoid(pred).cpu().detach().numpy())
File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File ~\FIFA_PROJECT\model.py:67, in GNN.forward(self, x, edge_index, batch_index)
63 def forward(self, x, edge_index, batch_index):
64 #def forward(self, x, edge_attr=None, edge_index, batch_index):
65 # Initial transformation
66 #x = self.conv1(x, edge_index, edge_attr)
---> 67 x = self.conv1(x, edge_index)
68 x = torch.relu(self.transf1(x))
69 x = torch.relu((x))
File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File ~\anaconda3\lib\site-packages\torch_geometric\nn\conv\gcn_conv.py:198, in GCNConv.forward(self, x, edge_index, edge_weight)
195 x = self.lin(x)
197 # propagate_type: (x: Tensor, edge_weight: OptTensor)
--> 198 out = self.propagate(edge_index, x=x, edge_weight=edge_weight,
199 size=None)
201 if self.bias is not None:
202 out = out + self.bias
File ~\anaconda3\lib\site-packages\torch_geometric\nn\conv\message_passing.py:454, in MessagePassing.propagate(self, edge_index, size, **kwargs)
451 if res is not None:
452 aggr_kwargs = res[0] if isinstance(res, tuple) else res
--> 454 out = self.aggregate(out, **aggr_kwargs)
456 for hook in self._aggregate_forward_hooks.values():
457 res = hook(self, (aggr_kwargs, ), out)
File ~\anaconda3\lib\site-packages\torch_geometric\nn\conv\message_passing.py:578, in MessagePassing.aggregate(self, inputs, index, ptr, dim_size)
565 def aggregate(self, inputs: Tensor, index: Tensor,
566 ptr: Optional[Tensor] = None,
567 dim_size: Optional[int] = None) -> Tensor:
568 r"""Aggregates messages from neighbors as
569 :math:`\square_{j \in \mathcal{N}(i)}`.
570
(...)
576 as specified in :meth:`__init__` by the :obj:`aggr` argument.
577 """
--> 578 return self.aggr_module(inputs, index, ptr=ptr, dim_size=dim_size,
579 dim=self.node_dim)
File ~\anaconda3\lib\site-packages\torch\nn\modules\module.py:1265, in Module.__getattr__(self, name)
1263 if name in modules:
1264 return modules[name]
-> 1265 raise AttributeError("'{}' object has no attribute '{}'".format(
1266 type(self).__name__, name))
AttributeError: 'TransformerConv' object has no attribute 'aggr_module'
Please I'm begging :(
I wrote the predict function but it didn't come out as expected.
Pls send help, would be grateful for any suggestions.
I’ve gotten the solution from pyg discussion here
So basically you can get around this by iterating over all `MessagePassing layers and setting:
loaded_model = mlflow.pytorch.load_model(logged_model)
for conv in loaded_model.conv_layers:
conv.aggr_module = SumAggregation()
This should fix the problem!

what if the size of training set is not the integer multiple of batch size

I am running the following code against the dataset of PV_Elec_Gas3.csv, the network architecture is designed as follows
class CNN_ForecastNet(nn.Module):
def __init__(self):
super(CNN_ForecastNet,self).__init__()
self.conv1d = nn.Conv1d(3,64,kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.fc1 = nn.Linear(64*2,50)
self.fc2 = nn.Linear(50,1)
def forward(self,x):
x = self.conv1d(x)
x = self.relu(x)
x = x.view(-1)
#print('x size',x.size())
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
The train function is defined as follows,
def Train():
running_loss = .0
model.train()
for idx, (inputs,labels) in enumerate(train_loader):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
#print('inputs ',inputs)
preds = model(inputs.float())
loss = criterion(preds,labels.float())
loss.backward()
optimizer.step()
running_loss += loss
train_loss = running_loss/len(train_loader)
train_losses.append(train_loss.detach().numpy())
print(f'train_loss {train_loss}')
the train_loader is defined as train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False) here the batch_size is set as 2. When running the train function, I got error message as follows. The reason is becaause when the code iterate through the train_loader, the last iteration only have one training point instead of two as batch_size requires. For this kind of scenario, besides changing the batch size, are there any other options?
This is the error message. I also include the full code to reproduce the error
RuntimeError Traceback (most recent call last)
<ipython-input-82-78a49fb8c068> in <module>
99 for epoch in range(epochs):
100 print('epochs {}/{}'.format(epoch+1,epochs))
--> 101 Train()
102 gc.collect()
<ipython-input-82-78a49fb8c068> in Train()
81 optimizer.zero_grad()
82 #print('inputs ',inputs)
---> 83 preds = model(inputs.float())
84 loss = criterion(preds,labels.float())
85 loss.backward()
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-82-78a49fb8c068> in forward(self, x)
57 x = x.view(-1)
58 #print('x size',x.size())
---> 59 x = self.fc1(x)
60 x = self.relu(x)
61 x = self.fc2(x)
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
91
92 def forward(self, input: Tensor) -> Tensor:
---> 93 return F.linear(input, self.weight, self.bias)
94
95 def extra_repr(self) -> str:
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1690 ret = torch.addmm(bias, input, weight.t())
1691 else:
-> 1692 output = input.matmul(weight.t())
1693 if bias is not None:
1694 output += bias
RuntimeError: mat1 dim 1 must match mat2 dim 0
the following is the code for reproduction of error
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from numpy import array
import torch
import gc
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import Dataset,DataLoader
solar_power = pd.read_csv('PV_Elec_Gas3.csv').rename(columns={'date':'timestamp'}).set_index('timestamp')
train_set = solar_power[:'8/10/2016']
def split_sequence(sequence, n_steps):
x, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
x.append(seq_x)
y.append(seq_y)
return array(x), array(y)
n_steps = 3
train_x,train_y = split_sequence(train_set.loc[:,"kWh electricity/day"].values,n_steps)
class ElecDataset(Dataset):
def __init__(self,feature,target):
self.feature = feature
self.target = target
def __len__(self):
return len(self.feature)
def __getitem__(self,idx):
item = self.feature[idx]
label = self.target[idx]
return item,label
class CNN_ForecastNet(nn.Module):
def __init__(self):
super(CNN_ForecastNet,self).__init__()
self.conv1d = nn.Conv1d(3,64,kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.fc1 = nn.Linear(64*2,50)
self.fc2 = nn.Linear(50,1)
def forward(self,x):
x = self.conv1d(x)
x = self.relu(x)
x = x.view(-1)
#print('x size',x.size())
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN_ForecastNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.MSELoss()
train_losses = []
def Train():
running_loss = .0
model.train()
for idx, (inputs,labels) in enumerate(train_loader):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
#print('inputs ',inputs)
preds = model(inputs.float())
loss = criterion(preds,labels.float())
loss.backward()
optimizer.step()
running_loss += loss
train_loss = running_loss/len(train_loader)
train_losses.append(train_loss.detach().numpy())
print(f'train_loss {train_loss}')
train = ElecDataset(train_x.reshape(train_x.shape[0],train_x.shape[1],1),train_y)
train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False)
epochs = 1
for epoch in range(epochs):
print('epochs {}/{}'.format(epoch+1,epochs))
Train()
gc.collect()
NO!!!!
In your forward method you x.view(-1) before passing it to a nn.Linear layer. This "flattens" not only the spatial dimensions on x, but also the batch dimension! You basically mix together all samples in the batch, making your model dependant on the batch size and in general making the predictions depend on the batch as a whole rather than on the individual data points.
Instead, you should:
...
def forward(self, x):
x = self.conv1d(x)
x = self.relu(x)
x = x.flatten(start_dim=1) # flatten all BUT batch dimension
x = self.fc1(x) # you'll probably have to modify in_features of fc1 now
x = self.relu(x)
x = self.fc2(x)
return x
Please see flatten() for more details.
If, for some reason, you must process only "full batches", you can tell DataLoader to drop the last batch by changing the argument drop_last from the default False to True:
train_loader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=False, drop_last=True)

Multi class classification - RuntimeError: 1D target tensor expected, multi-target not supported

My goal is to build a multi-class image classifier using Pytorch and based on the EMNIST dataset (black and white pictures of letters).
The shape of my training data X_train is (124800, 28, 28).
The shape of the original target variables y_train is (124800, 1), however I created a one-hot encoding so that now the shape is (124800, 26).
The model that I am building should have 26 output variables, each representing the probability of one letter.
I read in my data as follows:
import scipy .io
emnist = scipy.io.loadmat(DATA_DIR + '/emnist-letters.mat')
data = emnist ['dataset']
X_train = data ['train'][0, 0]['images'][0, 0]
X_train = X_train.reshape((-1,28,28), order='F')
y_train = data ['train'][0, 0]['labels'][0, 0]
Then, I created a one-hot-encoding as follows:
y_train_one_hot = np.zeros([len(y_train), 27])
for i in range (0, len(y_train)):
y_train_one_hot[i, y_train[i][0]] = 1
y_train_one_hot = np.delete(y_train_one_hot, 0, 1)
I create the dataset with:
train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train_one_hot))
batch_size = 128
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
And then I build my model as follows:
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
# Convolution 1
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
self.relu1 = nn.ReLU()
# Max pool 1
self.maxpool1 = nn.MaxPool2d(2,2)
# Convolution 2
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
self.relu2 = nn.ReLU()
# Max pool 2
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
# Fully connected 1 (readout)
self.fc1 = nn.Linear(32 * 4 * 4, 26)
def forward(self, x):
# Convolution 1
out = self.cnn1(x.float())
out = self.relu1(out)
# Max pool 1
out = self.maxpool1(out)
# Convolution 2
out = self.cnn2(out)
out = self.relu2(out)
# Max pool 2
out = self.maxpool2(out)
# Resize
# Original size: (100, 32, 7, 7)
# out.size(0): 100
# New out size: (100, 32*7*7)
out = out.view(out.size(0), -1)
# Linear function (readout)
out = self.fc1(out)
return out
model = CNNModel()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
And then I train the model as follows:
iter = 0
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Add a single channel dimension
# From: [batch_size, height, width]
# To: [batch_size, 1, height, width]
images = images.unsqueeze(1)
# Forward pass to get output/logits
outputs = model(images)
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Forward pass to get output/logits
outputs = model(images)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, labels)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
iter += 1
if iter % 500 == 0:
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
for images, labels in test_loader:
images = images.unsqueeze(1)
# Forward pass only to get logits/output
outputs = model(images)
# Get predictions from the maximum value
_, predicted = torch.max(outputs.data, 1)
# Total number of labels
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
# Print Loss
print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
However, when I run this, I get the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-c26c43bbc32e> in <module>()
21
22 # Calculate Loss: softmax --> cross entropy loss
---> 23 loss = criterion(outputs, labels)
24
25 # Getting gradients w.r.t. parameters
3 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
930 def forward(self, input, target):
931 return F.cross_entropy(input, target, weight=self.weight,
--> 932 ignore_index=self.ignore_index, reduction=self.reduction)
933
934
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2315 if size_average is not None or reduce is not None:
2316 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2318
2319
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2113 .format(input.size(0), target.size(0)))
2114 if dim == 2:
-> 2115 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2116 elif dim == 4:
2117 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: 1D target tensor expected, multi-target not supported
I expect that I do something wrong when I initialize/use my loss function. What can I do so that I can start training my model?
If you are using crossentropy loss you shouldn't one-hot encode your target variable y.
Pytorch crossentropy expects just the class indices as target not their one-hot encoded version.
To cite the doc https://pytorch.org/docs/master/generated/torch.nn.CrossEntropyLoss.html :
This criterion expects a class index in the range [0, C-1] as the target for each value of a 1D tensor of size minibatch;

Pytorch: ValueError: Expected input batch_size (32) to match target batch_size (64)

Tried to run the CNN examples on MNIST dataset, batch size=64, channel =1, n_h=28, n_w=28, n_iters = 1000. The program runs for first 500 interation and then gives the above mentioned error.
There are same topics already being discussed on the forum such as : topic 1
and topic 2, but none of them could help me identify the mistake in the following code:
class CNN_MNIST(nn.Module):
def __init__(self):
super(CNN_MNIST,self).__init__()
# convolution layer 1
self.cnn1 = nn.Conv2d(in_channels=1, out_channels= 32, kernel_size=5,
stride=1,padding=2)
# ReLU activation
self.relu1 = nn.ReLU()
# maxpool 1
self.maxpool1 = nn.MaxPool2d(kernel_size=2,stride=2)
# convolution 2
self.cnn2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5,
stride=1,padding=2)
# ReLU activation
self.relu2 = nn.ReLU()
# maxpool 2
self.maxpool2 = nn.MaxPool2d(kernel_size=2,stride=2)
# fully connected 1
self.fc1 = nn.Linear(7*7*64,1000)
# fully connected 2
self.fc2 = nn.Linear(1000,10)
def forward(self,x):
# convolution 1
out = self.cnn1(x)
# activation function
out = self.relu1(out)
# maxpool 1
out = self.maxpool1(out)
# convolution 2
out = self.cnn2(out)
# activation function
out = self.relu2(out)
# maxpool 2
out = self.maxpool2(out)
# flatten the output
out = out.view(out.size(0),-1)
# fully connected layers
out = self.fc1(out)
out = self.fc2(out)
return out
# model trainning
count = 0
loss_list = []
iteration_list = []
accuracy_list = []
for epoch in range(int(n_epochs)):
for i, (image,labels) in enumerate(train_loader):
train = Variable(image)
labels = Variable(labels)
# clear gradient
optimizer.zero_grad()
# forward propagation
output = cnn_model(train)
# calculate softmax and cross entropy loss
loss = error(output,label)
# calculate gradients
loss.backward()
# update the optimizer
optimizer.step()
count += 1
if count % 50 ==0:
# calculate the accuracy
correct = 0
total = 0
# iterate through the test data
for image, labels in test_loader:
test = Variable(image)
# forward propagation
output = cnn_model(test)
# get prediction
predict = torch.max(output.data,1)[1]
# total number of labels
total += len(labels)
# correct prediction
correct += (predict==labels).sum()
# accuracy
accuracy = 100*correct/float(total)
# store loss, number of iteration, and accuracy
loss_list.append(loss.data)
iteration_list.append(count)
accuracy_list.append(accuracy)
# print loss and accurcay as the algorithm progresses
if count % 500 ==0:
print('Iteration :{} Loss :{} Accuracy :
{}'.format(count,loss.item(),accuracy))
The error is as follows:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-9e93a242961b> in <module>
18
19 # calculate softmax and cross entropy loss
---> 20 loss = error(output,label)
21
22 # calculate gradients
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
914 def forward(self, input, target):
915 return F.cross_entropy(input, target, weight=self.weight,
--> 916 ignore_index=self.ignore_index, reduction=self.reduction)
917
918
~\Anaconda3\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
1993 if size_average is not None or reduce is not None:
1994 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 1995 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
1996
1997
~\Anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1820 if input.size(0) != target.size(0):
1821 raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
-> 1822 .format(input.size(0), target.size(0)))
1823 if dim == 2:
1824 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
ValueError: Expected input batch_size (32) to match target batch_size (64).
You are providing the wrong target to your loss:
loss = error(output, label)
While your loader gives you
for i, (image,labels) in enumerate(train_loader):
train = Variable(image)
labels = Variable(labels)
So you have a variable name labels (with s) from the loader, yet you feed label (no s) to your loss.
Batch size is the least of your worries.

Unexpected data types when trying to train a pytorch model

I'm putting together a basic neural network to learn pytorch. Attempting to train it always fails with the message "Expected object of scalar type Float but got scalar type Double for argument #4 'mat1'". I suspect I'm doing something wrong with putting the data together, but I don't know what.
The data in question is a couple of one-dimensional lists of numbers that I've generated, which should be linearly separable.
I've pasted my code below.
class MyDataset(Dataset):
def __init__(self, xs, ys):
assert len(xs) == len(ys), "Input and output tensors must be the same length"
self.xs = np.array(xs, dtype=np.double)
self.ys = np.array(ys, dtype=np.double)
def __getitem__(self, idx):
return (self.xs[idx], self.ys[idx])
def __len__(self):
return len(self.xs)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = nn.Linear(1, 1)
def forward(self, x):
x = F.relu(self.layer1(x))
return x
def train(data, validation, net, epochs=100):
learning_rate = 0.01
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
for epoch in range(0, epochs):
print('Beginning epoch ', epoch+1)
training_losses = []
validation_losses = []
for x_batch, y_batch in data:
optimizer.zero_grad()
yhat = net(x_batch)
loss = criterion(y_batch, yhat)
loss.backward()
optimizer.step()
optimizer.zero_grad()
training_losses.append(loss)
with torch.no_grad():
for x_batch, y_batch in validation:
net.eval()
yhat = net(x_batch)
loss = criterion(y_batch, yhat)
validation_losses.append(loss)
print('Ending epoch ', epoch+1, 'Training loss: ', np.mean(training_losses), 'Validation loss: ', np.mean(validation_losses))
And this is how I'm generating the data and attempting to train it:
num_samples = 10000
foos = [100 + np.random.normal(scale=20) for x in range(0, num_samples)]
bars = [200 + np.random.normal(scale=20) for x in range(0, num_samples)]
xs = foos + bars
xs = torch.tensor([[x] for x in xs])
ys = np.concatenate([np.zeros(num_samples), np.ones(num_samples)])
ys = torch.tensor([[y] for y in ys])
dataset = MyDataset(xs, ys)
train_dataset, val_dataset = random_split(dataset, [16000, 4000])
train_loader = DataLoader(dataset=train_dataset, batch_size=16)
val_loader = DataLoader(dataset=val_dataset, batch_size=20)
net = Net()
train(train_loader, val_loader, net)
Finally, here's the stack trace:
<ipython-input-114-ab674ae015a5> in train(data, validation, net, epochs)
13 print('x_batch: ', type(x_batch[0].item()))
14 print('y_batch: ', type(y_batch[0].item()))
---> 15 yhat = net(x_batch)
16 loss = criterion(y_batch, yhat)
17 loss.backward()
/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
--> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
<ipython-input-58-ec2e6d981760> in forward(self, x)
5
6 def forward(self, x):
----> 7 x = F.relu(self.layer1(x))
8 return x
/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
--> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/site-packages/torch/nn/modules/linear.py in forward(self, input)
65 #weak_script_method
66 def forward(self, input):
---> 67 return F.linear(input, self.weight, self.bias)
68
69 def extra_repr(self):
/usr/local/lib/python3.6/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1350 if input.dim() == 2 and bias is not None:
1351 # fused op is marginally faster
-> 1352 ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())
1353 else:
1354 output = input.matmul(weight.t())
RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #4 'mat1'
I've attempted to debug by logging the types of x_batch and y_batch from within the train method, but they're both showing as float, so I'm stumped as to where the Double is coming from.
Any suggestions?
PyTorch uses single-precision floats by default.
In the lines:
self.xs = np.array(xs, dtype=np.double)
self.ys = np.array(ys, dtype=np.double)
Replace np.double with np.float32.

Resources