**My complete code is here: I am following github code for accomplishing my task. I am getting dimensions mismatch error. The code is giving me dimension mismatch error. I am receiving the following error: ValueError: Expected input batch_size (1) to match target batch_size (64).I am confused, i don't know what should i change in this code. Please help me in resolving this issue. **
def windowz(data, size):
start = 0
while start < len(data):
yield start, start + size
start += (size // 2)
def segment_pa2(x_train,y_train,window_size):
segments = np.zeros(((len(x_train)//(window_size//2))-1,window_size,52))
labels = np.zeros(((len(y_train)//(window_size//2))-1))
i_segment = 0
i_label = 0
for (start,end) in windowz(x_train,window_size):
if(len(x_train[start:end]) == window_size):
m = stats.mode(y_train[start:end])
segments[i_segment] = x_train[start:end]
labels[i_label] = m[0]
i_label+=1
i_segment+=1
return segments, labels
print ('starting...')
start_time = time.time()
dataset = sys.argv[1]
path = '/Users/tehreem/Desktop/PAMAP2/PAMAP2_Dataset/pamap2.h5'
f = h5py.File(path, 'r')
print(f)
x_train = f.get('train').get('inputs')[()]
y_train = f.get('train').get('targets')[()]
x_test = f.get('test').get('inputs')[()]
y_test = f.get('test').get('targets')[()]
print ("x_train shape =",x_train.shape)
print ("y_train shape =",y_train.shape)
print ("x_test shape =" ,x_test.shape)
print ("y_test shape =",y_test.shape)
x_train = x_train[::3,:]
y_train = y_train[::3]
x_test = x_test[::3,:]
y_test = y_test[::3]
print ("x_train shape(downsampled) = ", x_train.shape)
print ("y_train shape(downsampled) =",y_train.shape)
print ("x_test shape(downsampled) =" ,x_test.shape)
print ("y_test shape(downsampled) =",y_test.shape)
print (np.unique(y_train))
print (np.unique(y_test))
unq = np.unique(y_test)
input_width = 52
print("segmenting signal...")
train_x, train_y = segment_pa2(x_train,y_train,input_width)
test_x, test_y = segment_pa2(x_test,y_test,input_width)
print ("signal segmented.")
train = pd.get_dummies(train_y)
test = pd.get_dummies(test_y)
train, test = train.align(test, join='inner', axis=1)
train_y = np.asarray(train)
test_y = np.asarray(test)
input_height = 1
input_width = input_width
num_labels = 11
num_channels = 52
batch_size = 64
stride_size = 2
kernel_size_1 = 7
kernel_size_2 = 3
kernel_size_3 = 1
depth_1 = 128
depth_2 = 128
depth_3 = 128
num_hidden = 512
dropout_1 = 0.1 #0.1
dropout_2 = 0.25 #0.25
dropout_3 = 0.5 #0.5
learning_rate = 0.0005
training_epochs = 50
total_batches = train_x.shape[0] // batch_size
train_x = train_x.reshape(len(train_x),1,input_width,num_channels)
test_x = test_x.reshape(len(test_x),1,input_width,num_channels)
print ("test_x_reshaped = " , test_x.shape)
print ("train_x shape =",train_x.shape)
print ("train_y shape =",train_y.shape)
print ("test_x shape =",test_x.shape)
print ("test_y shape =",test_y.shape)
train_x = train_x.reshape(-1,input_width,num_channels)
test_x = test_x.reshape(-1,input_width,num_channels)
def init_weights(m):
if type(m) == nn.LSTM:
for name, param in m.named_parameters():
if 'weight_ih' in name:
torch.nn.init.orthogonal_(param.data)
elif 'weight_hh' in name:
torch.nn.init.orthogonal_(param.data)
elif 'bias' in name:
param.data.fill_(0)
elif type(m) == nn.Conv1d or type(m) == nn.Linear:
torch.nn.init.orthogonal_(m.weight)
m.bias.data.fill_(0)
model.apply(init_weights)
import torch
import torch.nn as nn
import torch.nn.functional as F
class CharCNN(nn.Module):
def __init__(self):
super(CharCNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(num_channels, depth_1, kernel_size=kernel_size_1, stride=stride_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=kernel_size_1, stride=stride_size),
nn.Dropout(0.1),
)
self.conv2 = nn.Sequential(
nn.Conv1d(depth_1, depth_2, kernel_size=kernel_size_2, stride=stride_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=kernel_size_2, stride=stride_size),
nn.Dropout(0.25)
)
self.fc1 = nn.Sequential(
nn.Linear(128*64, num_hidden),
nn.ReLU(),
nn.Dropout(0.5)
)
self.fc2 = nn.Sequential(
nn.Linear(num_hidden, 11),
nn.ReLU(),
nn.Dropout(0.5)
)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
# collapse
out = out.reshape(-1,128*64)
#out = out.view(out.size(0), -1)
# linear layer
out = self.fc1(out)
# output layer
out = self.fc2(out)
#out = self.log_softmax(x,dim=1)
return out
model = CharCNN()
print(model)
def iterate_minibatches(inputs, targets, batch_size, shuffle=True):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
if shuffle:
excerpt = indices[start_idx:start_idx + batch_size]
else:
excerpt = slice(start_idx, start_idx + batch_size)
yield inputs[excerpt], targets[excerpt]
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(),lr=learning_rate)
for e in range(training_epochs):
if(train_on_gpu):
net.cuda()
train_losses = []
for batch in iterate_minibatches(train_x, train_y, batch_size):
x, y = batch
inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
if(train_on_gpu):
inputs, targets = inputs.cuda(), targets.cuda()
#inputs= inputs.view(batch_size, 128*64)
#targets = targets.view(batch_size)
opt.zero_grad()
output = model(inputs)
loss = criterion(output, targets.long())
train_losses.append(loss.item())
loss.backward()
opt.step()
val_losses = []
accuracy=0
f1score=0
print("Epoch: {}/{}...".format(e+1, training_epochs),
"Train Loss: {:.4f}...".format(np.mean(train_losses)))
I received the following error:
<ipython-input-468-7a508893b28d> in <module>
21 output = model(inputs)
22
---> 23 loss = criterion(output, targets.long())
24 train_losses.append(loss.item())
25 loss.backward()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
914 def forward(self, input, target):
915 return F.cross_entropy(input, target, weight=self.weight,
--> 916 ignore_index=self.ignore_index, reduction=self.reduction)
917
918
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2019 if size_average is not None or reduce is not None:
2020 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2021 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2022
2023
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1834 if input.size(0) != target.size(0):
1835 raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
-> 1836 .format(input.size(0), target.size(0)))
1837 if dim == 2:
1838 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
ValueError: Expected input batch_size (1) to match target batch_size (64).```
The code you have posted hasn't been pasted correctly with proper indentations (it's different in every part of the code), so it's hard to go through the code.
But from what I understand from your error message, the problem lies with the size of your 'output' tensor. For a batch size of 64, the 'output' tensor should have the dimension (64, num_classes). But the first dimension of your 'output' tensor is 1 according to the error message. I suspect that there is an extra dimension getting added to your tensor somehow.
I would suggest printing out the size of your 'output' tensor using output.size() and that should give you an idea where the bug lies. If my intuition is correct and if it is indeed (1, 64, num_classes), then a simple output = output.squeeze(0) should do the trick.
Related
I am using nn.CrossEntropyLoss() in as my criterion in a model that I am developing. The problem that I am having is that the model outputs a vector of size (batchsize, #classes) when it is supposed to output a (batchsize) vector.
Isn't CrossEntropyLoss supposed to apply LogSoftmax?
Here's my Dataset:
class DatasetPlus(Dataset):
def __init__(self, root_img, root_data, width, hight, transform=None):
self.root_img = root_img
self.root_data = root_data
self.width = width
self.hight = hight
self.transform = transform
# labels are stored in a csv file
self.labels = pd.read_csv(self.root_data)
self.imgs = [image for image in sorted(
os.listdir(self.root_img)) if image[-4:] == '.jpg']
self.len = len(self.imgs)
def __len__(self):
return self.len
def __getitem__(self, idx):
img_name = self.imgs[idx]
img_path = os.path.join(self.root_img, img_name)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
img = cv2.resize(img, (self.width, self.hight), cv2.INTER_AREA)
img = np.array(img) / 255.0
if self.transform is not None:
img = self.transform(img)
img_id = int(img_name[6:-4])
label = self.labels.where(self.labels['ID'] == img_id)['Label'].dropna().to_numpy()[0]
label = torch.tensor(label, dtype=torch.float32)
return img, label
Here is my model:
class Net(nn.Module):
def __init__(self, h, w):
super().__init__()
nw = (((w - 4) // 2) -4) // 2
nh = (((h - 4) // 2) -4) // 2
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * nh * nw, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 3)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = (self.fc3(x))
return x
Here's my training code:
model = Net(224, 224)
trainloader = DataLoader(ds, batch_size=4, shuffle=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
def train_model(epochs):
for epoch in range(epochs):
losses = 0.0
for i, data in enumerate(trainloader, 0):
optimizer.zero_grad()
img, label = data
yhat = model(img)
loss = criterion(yhat, label)
loss.backward()
optimizer.step()
losses += loss.item()
# if i % 5 == 99:
print(f'[{epoch + 1}, {i + 1:5d}] loss: {losses:.3f}')
losses = 0.0
train_model(5)
I have explained the problem but here's the error anyways:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[9], line 1
----> 1 train_model(5)
Cell In[8], line 13, in train_model(epochs)
11 print(yhat.size())
12 print(label.size())
---> 13 loss = criterion(yhat, label)
14 loss.backward()
15 optimizer.step()
File c:\Users\Yasamin\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
File c:\Users\Yasamin\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\loss.py:720, in BCEWithLogitsLoss.forward(self, input, target)
719 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 720 return F.binary_cross_entropy_with_logits(input, target,
721 self.weight,
722 pos_weight=self.pos_weight,
723 reduction=self.reduction)
File c:\Users\Yasamin\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\functional.py:3160, in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
3157 reduction_enum = _Reduction.get_enum(reduction)
3159 if not (target.size() == input.size()):
-> 3160 raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
3162 return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
ValueError: Target size (torch.Size([4])) must be the same as input size (torch.Size([4, 3]))
And finally, these are the outputs and the labels that raise this error:
yhat=
tensor([[ 0.0097, 0.0184, -0.1236],
[ 0.0020, 0.0135, -0.1324],
[ 0.0095, 0.0136, -0.1261],
[ 0.0027, 0.0176, -0.1285]], grad_fn=<AddmmBackward0>)
torch.Size([4, 3])
label=
tensor([2., 1., 0., 2.])
torch.Size([4])
from what I found out, CrossEntropyLoss works in two ways.
If you pass it Long labels, it treats the labels as integer class labels and the shape of (batchsize) is correct.
But if you pass CrossEntropyLoss labels of type Float (as I have in my code) CrossEntropyLoss therefore treats your labels as probabilistic (“soft”) labels and expects labels to have shape (nBatch, #classes), that is, to have the same shape
as yhat.
So to fix the error, label should be converted to Long, before
being passed to CrossEntropyLoss (or set it to int64 when creating the tensor)
Also it is worth noting that labels should be from zero to )#classes -1) for CrossEntropyLoss to operate correctly.
I am new to PyTorch, and I am trying to build a BiLSTM model to insert its output to a MaxPool1d layer and an AvgPool1d layer each before concatenating the outputs from both layers for a binary classification task. I am working with pretrained Word2Vec embeddings as input:
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import torch.nn.functional as F
class LSTM(nn.Module):
# define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim , num_classes, lstm_layers, weights):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.embedding.weight.data.copy_(torch.from_numpy(weights))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embedding_dim,
lstm_units,
num_layers=lstm_layers,
bidirectional=True,
batch_first=True)
num_directions = 2 #if bidirectional else 1
self.m1 = nn.MaxPool1d(1,stride= 1)
self.m2 = nn.AvgPool1d(1,stride= 1)
self.fc1 = nn.Linear(lstm_units * num_directions, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, num_classes)
self.relu = nn.ReLU()
self.softmax = nn.Softmax()
self.lstm_layers = lstm_layers
self.num_directions = num_directions
self.lstm_units = lstm_units
def forward(self, text, text_lengths):
batch_size = text.shape[0]
h_0, c_0 = (Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)),
Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)))
embedded = self.embedding(text)
packed_embedded = pack_padded_sequence(embedded, text_lengths.to("cpu"), batch_first=True)
output, (h_n, c_n) = self.lstm(packed_embedded, (h_0, c_0))
output_unpacked, output_lengths = pad_packed_sequence(output, batch_first=True, enforce_sorted=False)
# out = output_unpacked[:, -1, :]
#return self.linear(ht[-1])
out = output_unpacked
out1 = self.m1(out)
out2 = self.m2(out)
out = torch.cat((out1, out2), 1)
out = F.relu(self.fc1(out))
preds = F.softmax(self.fc2(out))
return preds
My training function is as below:
import time
def train(dataloader):
model.train()
total_acc, total_count = 0, 0
log_interval = 500
text_lengths = np.dtype('int64').type(200)
start_time = time.time()
for idx, (label, text) in enumerate(dataloader):
optimizer.zero_grad()
predited_label = model(text, text_lengths = torch.tensor([text_lengths]))
loss = criterion(predited_label, label)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
optimizer.step()
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
if idx % log_interval == 0 and idx > 0:
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches '
'| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
total_acc/total_count))
total_acc, total_count = 0, 0
start_time = time.time()
def evaluate(dataloader):
model.eval()
total_acc, total_count = 0, 0
with torch.no_grad():
for idx, (label, text) in enumerate(dataloader):
predited_label = model(text)
loss = criterion(predited_label, label)
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
return total_acc/total_count
And I try to run the code like this:
from torch.utils.data import DataLoader
# Hyperparameters
EPOCHS = 1 # epoch
LR =1 # learning rate
BATCH_SIZE = 1 # batch size for training
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
total_accu = None
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
for epoch in range(1, EPOCHS + 1):
epoch_start_time = time.time()
train(train_dataloader)
accu_val = evaluate(valid_dataloader)
if total_accu is not None and total_accu > accu_val:
scheduler.step()
else:
total_accu = accu_val
print('-' * 59)
print('| end of epoch {:3d} | time: {:5.2f}s | '
'valid accuracy {:8.3f} '.format(epoch,
time.time() - epoch_start_time,
accu_val))
print('-' * 59)
However, I get the error below. I am not sure what the input size here refers to, and I can't find anyone else with the same error anywhere. Can anyone advise me please?
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-111-14ba1dd26348> in <module>()
27 for epoch in range(1, EPOCHS + 1):
28 epoch_start_time = time.time()
---> 29 train(train_dataloader)
30 accu_val = evaluate(valid_dataloader)
31 if total_accu is not None and total_accu > accu_val:
6 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
201 raise RuntimeError(
202 'input must have {} dimensions, got {}'.format(
--> 203 expected_input_dim, input.dim()))
204 if self.input_size != input.size(-1):
205 raise RuntimeError(
RuntimeError: input must have 2 dimensions, got 1
I am running the following code against the dataset of PV_Elec_Gas3.csv, the network architecture is designed as follows
class CNN_ForecastNet(nn.Module):
def __init__(self):
super(CNN_ForecastNet,self).__init__()
self.conv1d = nn.Conv1d(3,64,kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.fc1 = nn.Linear(64*2,50)
self.fc2 = nn.Linear(50,1)
def forward(self,x):
x = self.conv1d(x)
x = self.relu(x)
x = x.view(-1)
#print('x size',x.size())
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
The train function is defined as follows,
def Train():
running_loss = .0
model.train()
for idx, (inputs,labels) in enumerate(train_loader):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
#print('inputs ',inputs)
preds = model(inputs.float())
loss = criterion(preds,labels.float())
loss.backward()
optimizer.step()
running_loss += loss
train_loss = running_loss/len(train_loader)
train_losses.append(train_loss.detach().numpy())
print(f'train_loss {train_loss}')
the train_loader is defined as train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False) here the batch_size is set as 2. When running the train function, I got error message as follows. The reason is becaause when the code iterate through the train_loader, the last iteration only have one training point instead of two as batch_size requires. For this kind of scenario, besides changing the batch size, are there any other options?
This is the error message. I also include the full code to reproduce the error
RuntimeError Traceback (most recent call last)
<ipython-input-82-78a49fb8c068> in <module>
99 for epoch in range(epochs):
100 print('epochs {}/{}'.format(epoch+1,epochs))
--> 101 Train()
102 gc.collect()
<ipython-input-82-78a49fb8c068> in Train()
81 optimizer.zero_grad()
82 #print('inputs ',inputs)
---> 83 preds = model(inputs.float())
84 loss = criterion(preds,labels.float())
85 loss.backward()
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-82-78a49fb8c068> in forward(self, x)
57 x = x.view(-1)
58 #print('x size',x.size())
---> 59 x = self.fc1(x)
60 x = self.relu(x)
61 x = self.fc2(x)
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
91
92 def forward(self, input: Tensor) -> Tensor:
---> 93 return F.linear(input, self.weight, self.bias)
94
95 def extra_repr(self) -> str:
~\Anaconda3\envs\pytorchenv\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1690 ret = torch.addmm(bias, input, weight.t())
1691 else:
-> 1692 output = input.matmul(weight.t())
1693 if bias is not None:
1694 output += bias
RuntimeError: mat1 dim 1 must match mat2 dim 0
the following is the code for reproduction of error
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from numpy import array
import torch
import gc
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
from torch.utils.data import Dataset,DataLoader
solar_power = pd.read_csv('PV_Elec_Gas3.csv').rename(columns={'date':'timestamp'}).set_index('timestamp')
train_set = solar_power[:'8/10/2016']
def split_sequence(sequence, n_steps):
x, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
x.append(seq_x)
y.append(seq_y)
return array(x), array(y)
n_steps = 3
train_x,train_y = split_sequence(train_set.loc[:,"kWh electricity/day"].values,n_steps)
class ElecDataset(Dataset):
def __init__(self,feature,target):
self.feature = feature
self.target = target
def __len__(self):
return len(self.feature)
def __getitem__(self,idx):
item = self.feature[idx]
label = self.target[idx]
return item,label
class CNN_ForecastNet(nn.Module):
def __init__(self):
super(CNN_ForecastNet,self).__init__()
self.conv1d = nn.Conv1d(3,64,kernel_size=1)
self.relu = nn.ReLU(inplace=True)
self.fc1 = nn.Linear(64*2,50)
self.fc2 = nn.Linear(50,1)
def forward(self,x):
x = self.conv1d(x)
x = self.relu(x)
x = x.view(-1)
#print('x size',x.size())
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN_ForecastNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.MSELoss()
train_losses = []
def Train():
running_loss = .0
model.train()
for idx, (inputs,labels) in enumerate(train_loader):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
#print('inputs ',inputs)
preds = model(inputs.float())
loss = criterion(preds,labels.float())
loss.backward()
optimizer.step()
running_loss += loss
train_loss = running_loss/len(train_loader)
train_losses.append(train_loss.detach().numpy())
print(f'train_loss {train_loss}')
train = ElecDataset(train_x.reshape(train_x.shape[0],train_x.shape[1],1),train_y)
train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False)
epochs = 1
for epoch in range(epochs):
print('epochs {}/{}'.format(epoch+1,epochs))
Train()
gc.collect()
NO!!!!
In your forward method you x.view(-1) before passing it to a nn.Linear layer. This "flattens" not only the spatial dimensions on x, but also the batch dimension! You basically mix together all samples in the batch, making your model dependant on the batch size and in general making the predictions depend on the batch as a whole rather than on the individual data points.
Instead, you should:
...
def forward(self, x):
x = self.conv1d(x)
x = self.relu(x)
x = x.flatten(start_dim=1) # flatten all BUT batch dimension
x = self.fc1(x) # you'll probably have to modify in_features of fc1 now
x = self.relu(x)
x = self.fc2(x)
return x
Please see flatten() for more details.
If, for some reason, you must process only "full batches", you can tell DataLoader to drop the last batch by changing the argument drop_last from the default False to True:
train_loader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=False, drop_last=True)
I met an error when I use BatchNorm1d, code:
##% first I set a model
class net(nn.Module):
def __init__(self, max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num__rnn_layers, bidirectional, batch_first=True, p=0.2):
super(net, self).__init__()
self.max_len = max_len
self.feature_linear = feature_linear
self.input_size = input_size
self.hidden_size = hidden_size
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional == True else 1
self.p = p
self.batch_first = batch_first
self.linear1 = nn.Linear(max_len, feature_linear)
init.kaiming_normal_(self.linear1.weight, mode='fan_in')
self.BN1 = BN(feature_linear)
def forward(self, xb, seq_len_crt):
rnn_input = torch.zeros(xb.shape[0], self.feature_linear, self.input_size)
for i in range(self.input_size):
out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(1,34), out.shape(1,100)
out = F.relu(out) # 输入:out.shape(1,100), 输出:out.shape(1,100)
out = self.BN1(out) # 输入:out.shape(1,100),输出:out.shape(1,100)
return y_hat.squeeze(-1)
##% make the model as a function and optimize it
input_size = 5
hidden_size = 32
output_dim = 1
num_rnn_layers = 2
bidirectional = True
rnn = nn.LSTM
batch_size = batch_size
feature_linear = 60
BN = nn.BatchNorm1d
model = net(max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num_rnn_layers, bidirectional, p=0.1)
loss_func = nn.MSELoss(reduction='none')
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)
##% use this model to predict data
def predict(xb, model, seq_len):
# xb's shape should be (batch_size, seq_len, n_features)
if xb.ndim == 2: # suitable for both ndarray and Tensor
# add a {batch_size} dim
xb = xb[None, ]
if not isinstance(xb, torch.Tensor):
xb = torch.Tensor(xb)
return model(xb, seq_len) # xb.shape(1,34,5)
##% create training/valid/test data
seq_len_train_iter = []
for i in range(0, len(seq_len_train), batch_size):
if i + batch_size <= len(seq_len_train):
seq_len_train_iter.append(seq_len_train[i:i+batch_size])
else:
seq_len_train_iter.append(seq_len_train[i:])
seq_len_valid_iter = []
for i in range(0, len(seq_len_valid), batch_size):
if i + batch_size <= len(seq_len_valid):
seq_len_valid_iter.append(seq_len_valid[i:i+batch_size])
else:
seq_len_valid_iter.append(seq_len_valid[i:])
seq_len_test_iter = []
for i in range(0, len(seq_len_test), batch_size):
if i + batch_size <= len(seq_len_test):
seq_len_test_iter.append(seq_len_test[i:i+batch_size])
else:
seq_len_test_iter.append(seq_len_test[i:])
##% fit model
def fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter):
train_loss_record = []
valid_loss_record = []
mean_pct_final = []
mean_abs_final = []
is_better = False
last_epoch_abs_error = 0
last_epoch_pct_error = 0
mean_pct_final_train = []
mean_abs_final_train = []
for epoch in range(epochs):
# seq_len_crt: current batch seq len
for batches, ((xb, yb), seq_len_crt) in enumerate(zip(train_dl, seq_len_train_iter)):
if isinstance(seq_len_crt, np.int64):
seq_len_crt = [seq_len_crt]
y_hat = model(xb, seq_len_crt)
packed_yb = nn.utils.rnn.pack_padded_sequence(yb, seq_len_crt, batch_first=True, enforce_sorted=False)
final_yb, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_yb)
final_yb = final_yb.permute(1, 0)
# assert torch.all(torch.tensor(seq_len_crt).eq(input_sizes))
loss = loss_func(y_hat, final_yb)
batch_size_crt = final_yb.shape[0]
loss = (loss.sum(-1) / input_sizes).sum() / batch_size_crt
loss.backward()
optimizer.step()
# scheduler.step()
optimizer.zero_grad()
# print(i)
with torch.no_grad():
train_loss_record.append(loss.item())
if batches % 50 == 0 and epoch % 1 == 0:
# print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
label = yb[0][:len(y_hat)]
# plt.ion()
plt.plot(y_hat, label='predicted')
plt.plot(label, label='label')
plt.legend(loc='upper right')
plt.title('training mode')
plt.text(len(y_hat)+1, max(y_hat.max(), label.max()), f'Epoch {epoch}, batch {batches} training loss: {loss.item()}')
plt.show()
return train_loss_record
but I met:Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
the error message is:
ValueError Traceback (most recent call last)
<ipython-input-119-fb062ad3f20e> in <module>
----> 1 fit(500, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
<ipython-input-118-2eb946c379bf> in fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
38 # print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
39
---> 40 y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
41 label = yb[0][:len(y_hat)]
42 # plt.ion()
<ipython-input-116-28afce77e325> in predict(xb, model, seq_len)
7 if not isinstance(xb, torch.Tensor):
8 xb = torch.Tensor(xb)
----> 9 return model(xb, seq_len) # xb.shape(None,34,5)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-114-3e9c30d20ed6> in forward(self, xb, seq_len_crt)
50 out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(None,34), out.shape(None,100)
51 out = F.relu(out) # 输入:out.shape(None,100), 输出:out.shape(None,100)
---> 52 out = self.BN1(out) # 输入:out.shape(None,100),输出:out.shape(None,100)
53
54 out = self.linear2(out)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\batchnorm.py in forward(self, input)
129 used for normalization (i.e. in eval mode when buffers are not None).
130 """
--> 131 return F.batch_norm(
132 input,
133 # If buffers are not to be tracked, ensure that they won't be updated
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2052 bias=bias, training=training, momentum=momentum, eps=eps)
2053 if training:
-> 2054 _verify_batch_size(input.size())
2055
2056 return torch.batch_norm(
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in _verify_batch_size(size)
2035 size_prods *= size[i + 2]
2036 if size_prods == 1:
-> 2037 raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
2038
2039
ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
I have checked and I found that in out = self.BN1(out),out.shape = (1,60),it seems that batchsize=1 is not permitted in BatchNorm1d .But I don't know how to modify it.
what does BatchNorm1d do mathematically?
try and write down the equation for the case of batch_size=1 and you'll understand why pytorch is angry with you.
How to solve it?
It is simple: BatchNorm has two "modes of operation": one is for training where it estimates the current batch's mean and variance (this is why you must have batch_size>1 for training).
The other "mode" is for evaluation: it uses accumulated mean and variance to normalize new inputs without re-estimating the mean and variance. In this mode there is no problem processing samples one by one.
When evaluating your model use model.eval() before and model.train() after.
I met this problem when I load the model and started to test. Add the model.eval() before you fill in your data. This can solve the problem.
If you are using the DataLoader class, sometimes the last batch in an epoch will have only a single training example (imagine a training set of 33 examples with a batch size of 32). This can trigger the error if the network is in training mode and a batch norm layer is present.
Set the drop_last argument in the DataLoader to True like:
from torch.utils.data import DataLoader
...
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
to discard the last incomplete batch in each epoch.
My goal is to build a multi-class image classifier using Pytorch and based on the EMNIST dataset (black and white pictures of letters).
The shape of my training data X_train is (124800, 28, 28).
The shape of the original target variables y_train is (124800, 1), however I created a one-hot encoding so that now the shape is (124800, 26).
The model that I am building should have 26 output variables, each representing the probability of one letter.
I read in my data as follows:
import scipy .io
emnist = scipy.io.loadmat(DATA_DIR + '/emnist-letters.mat')
data = emnist ['dataset']
X_train = data ['train'][0, 0]['images'][0, 0]
X_train = X_train.reshape((-1,28,28), order='F')
y_train = data ['train'][0, 0]['labels'][0, 0]
Then, I created a one-hot-encoding as follows:
y_train_one_hot = np.zeros([len(y_train), 27])
for i in range (0, len(y_train)):
y_train_one_hot[i, y_train[i][0]] = 1
y_train_one_hot = np.delete(y_train_one_hot, 0, 1)
I create the dataset with:
train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train_one_hot))
batch_size = 128
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
And then I build my model as follows:
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
# Convolution 1
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
self.relu1 = nn.ReLU()
# Max pool 1
self.maxpool1 = nn.MaxPool2d(2,2)
# Convolution 2
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
self.relu2 = nn.ReLU()
# Max pool 2
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
# Fully connected 1 (readout)
self.fc1 = nn.Linear(32 * 4 * 4, 26)
def forward(self, x):
# Convolution 1
out = self.cnn1(x.float())
out = self.relu1(out)
# Max pool 1
out = self.maxpool1(out)
# Convolution 2
out = self.cnn2(out)
out = self.relu2(out)
# Max pool 2
out = self.maxpool2(out)
# Resize
# Original size: (100, 32, 7, 7)
# out.size(0): 100
# New out size: (100, 32*7*7)
out = out.view(out.size(0), -1)
# Linear function (readout)
out = self.fc1(out)
return out
model = CNNModel()
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
And then I train the model as follows:
iter = 0
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Add a single channel dimension
# From: [batch_size, height, width]
# To: [batch_size, 1, height, width]
images = images.unsqueeze(1)
# Forward pass to get output/logits
outputs = model(images)
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Forward pass to get output/logits
outputs = model(images)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, labels)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
iter += 1
if iter % 500 == 0:
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
for images, labels in test_loader:
images = images.unsqueeze(1)
# Forward pass only to get logits/output
outputs = model(images)
# Get predictions from the maximum value
_, predicted = torch.max(outputs.data, 1)
# Total number of labels
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
# Print Loss
print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data[0], accuracy))
However, when I run this, I get the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-c26c43bbc32e> in <module>()
21
22 # Calculate Loss: softmax --> cross entropy loss
---> 23 loss = criterion(outputs, labels)
24
25 # Getting gradients w.r.t. parameters
3 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
930 def forward(self, input, target):
931 return F.cross_entropy(input, target, weight=self.weight,
--> 932 ignore_index=self.ignore_index, reduction=self.reduction)
933
934
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2315 if size_average is not None or reduce is not None:
2316 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2318
2319
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2113 .format(input.size(0), target.size(0)))
2114 if dim == 2:
-> 2115 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2116 elif dim == 4:
2117 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: 1D target tensor expected, multi-target not supported
I expect that I do something wrong when I initialize/use my loss function. What can I do so that I can start training my model?
If you are using crossentropy loss you shouldn't one-hot encode your target variable y.
Pytorch crossentropy expects just the class indices as target not their one-hot encoded version.
To cite the doc https://pytorch.org/docs/master/generated/torch.nn.CrossEntropyLoss.html :
This criterion expects a class index in the range [0, C-1] as the target for each value of a 1D tensor of size minibatch;