multi-variable linear regression with pytorch - pytorch

I'm working on a linear regression problem with Pytorch.
I've had success with the single variable case, however when I perform multi-variable linear regression I get the following error. How should I perform linear regression with multiple variables?
TypeError Traceback (most recent call
last) in ()
9 optimizer.zero_grad() #gradient
10 outputs = model(inputs) #output
---> 11 loss = criterion(outputs,targets) #loss function
12 loss.backward() #backward propogation
13 optimizer.step() #1-step optimization(gradeint descent)
/anaconda/envs/tensorflow/lib/python3.6/site-packages/torch/nn/modules/module.py
in call(self, *input, **kwargs)
204
205 def call(self, *input, **kwargs):
--> 206 result = self.forward(*input, **kwargs)
207 for hook in self._forward_hooks.values():
208 hook_result = hook(self, input, result)
/anaconda/envs/tensorflow/lib/python3.6/site-packages/torch/nn/modules/loss.py
in forward(self, input, target)
22 _assert_no_grad(target)
23 backend_fn = getattr(self._backend, type(self).name)
---> 24 return backend_fn(self.size_average)(input, target)
25
26
/anaconda/envs/tensorflow/lib/python3.6/site-packages/torch/nn/_functions/thnn/auto.py
in forward(self, input, target)
39 output = input.new(1)
40 getattr(self._backend, update_output.name)(self._backend.library_state, input, target,
---> 41 output, *self.additional_args)
42 return output
43
TypeError: FloatMSECriterion_updateOutput received an invalid
combination of arguments - got (int, torch.FloatTensor,
torch.DoubleTensor, torch.FloatTensor, bool), but expected (int state,
torch.FloatTensor input, torch.FloatTensor target, torch.FloatTensor
output, bool sizeAverage)
here is code
#import
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
#input_size = 1
input_size = 3
output_size = 1
num_epochs = 300
learning_rate = 0.002
#Data set
#x_train = np.array([[1.564],[2.11],[3.3],[5.4]], dtype=np.float32)
x_train = np.array([[73.,80.,75.],[93.,88.,93.],[89.,91.,90.],[96.,98.,100.],[73.,63.,70.]],dtype=np.float32)
#y_train = np.array([[8.0],[19.0],[25.0],[34.45]], dtype= np.float32)
y_train = np.array([[152.],[185.],[180.],[196.],[142.]])
print('x_train:\n',x_train)
print('y_train:\n',y_train)
class LinearRegression(nn.Module):
def __init__(self,input_size,output_size):
super(LinearRegression,self).__init__()
self.linear = nn.Linear(input_size,output_size)
def forward(self,x):
out = self.linear(x) #Forward propogation
return out
model = LinearRegression(input_size,output_size)
#Lost and Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
#train the Model
for epoch in range(num_epochs):
#convert numpy array to torch Variable
inputs = Variable(torch.from_numpy(x_train)) #convert numpy array to torch tensor
#inputs = Variable(torch.Tensor(x_train))
targets = Variable(torch.from_numpy(y_train)) #convert numpy array to torch tensor
#forward+ backward + optimize
optimizer.zero_grad() #gradient
outputs = model(inputs) #output
loss = criterion(outputs,targets) #loss function
loss.backward() #backward propogation
optimizer.step() #1-step optimization(gradeint descent)
if(epoch+1) %5 ==0:
print('epoch [%d/%d], Loss: %.4f' % (epoch +1, num_epochs, loss.data[0]))
predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()
plt.plot(x_train,y_train,'ro',label='Original Data')
plt.plot(x_train,predicted,label='Fitted Line')
plt.legend()
plt.show()

You need to make sure that the data has the same type. In this case x_train is a 32 bit float while y_train is a Double. You have to use:
y_train = np.array([[152.],[185.],[180.],[196.],[142.]],dtype=np.float32)

Related

Expected more than 1 value per channel when training, got input size torch.Size([1, **])

I met an error when I use BatchNorm1d, code:
##% first I set a model
class net(nn.Module):
def __init__(self, max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num__rnn_layers, bidirectional, batch_first=True, p=0.2):
super(net, self).__init__()
self.max_len = max_len
self.feature_linear = feature_linear
self.input_size = input_size
self.hidden_size = hidden_size
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional == True else 1
self.p = p
self.batch_first = batch_first
self.linear1 = nn.Linear(max_len, feature_linear)
init.kaiming_normal_(self.linear1.weight, mode='fan_in')
self.BN1 = BN(feature_linear)
def forward(self, xb, seq_len_crt):
rnn_input = torch.zeros(xb.shape[0], self.feature_linear, self.input_size)
for i in range(self.input_size):
out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(1,34), out.shape(1,100)
out = F.relu(out) # 输入:out.shape(1,100), 输出:out.shape(1,100)
out = self.BN1(out) # 输入:out.shape(1,100),输出:out.shape(1,100)
return y_hat.squeeze(-1)
##% make the model as a function and optimize it
input_size = 5
hidden_size = 32
output_dim = 1
num_rnn_layers = 2
bidirectional = True
rnn = nn.LSTM
batch_size = batch_size
feature_linear = 60
BN = nn.BatchNorm1d
model = net(max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num_rnn_layers, bidirectional, p=0.1)
loss_func = nn.MSELoss(reduction='none')
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)
##% use this model to predict data
def predict(xb, model, seq_len):
# xb's shape should be (batch_size, seq_len, n_features)
if xb.ndim == 2: # suitable for both ndarray and Tensor
# add a {batch_size} dim
xb = xb[None, ]
if not isinstance(xb, torch.Tensor):
xb = torch.Tensor(xb)
return model(xb, seq_len) # xb.shape(1,34,5)
##% create training/valid/test data
seq_len_train_iter = []
for i in range(0, len(seq_len_train), batch_size):
if i + batch_size <= len(seq_len_train):
seq_len_train_iter.append(seq_len_train[i:i+batch_size])
else:
seq_len_train_iter.append(seq_len_train[i:])
seq_len_valid_iter = []
for i in range(0, len(seq_len_valid), batch_size):
if i + batch_size <= len(seq_len_valid):
seq_len_valid_iter.append(seq_len_valid[i:i+batch_size])
else:
seq_len_valid_iter.append(seq_len_valid[i:])
seq_len_test_iter = []
for i in range(0, len(seq_len_test), batch_size):
if i + batch_size <= len(seq_len_test):
seq_len_test_iter.append(seq_len_test[i:i+batch_size])
else:
seq_len_test_iter.append(seq_len_test[i:])
##% fit model
def fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter):
train_loss_record = []
valid_loss_record = []
mean_pct_final = []
mean_abs_final = []
is_better = False
last_epoch_abs_error = 0
last_epoch_pct_error = 0
mean_pct_final_train = []
mean_abs_final_train = []
for epoch in range(epochs):
# seq_len_crt: current batch seq len
for batches, ((xb, yb), seq_len_crt) in enumerate(zip(train_dl, seq_len_train_iter)):
if isinstance(seq_len_crt, np.int64):
seq_len_crt = [seq_len_crt]
y_hat = model(xb, seq_len_crt)
packed_yb = nn.utils.rnn.pack_padded_sequence(yb, seq_len_crt, batch_first=True, enforce_sorted=False)
final_yb, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_yb)
final_yb = final_yb.permute(1, 0)
# assert torch.all(torch.tensor(seq_len_crt).eq(input_sizes))
loss = loss_func(y_hat, final_yb)
batch_size_crt = final_yb.shape[0]
loss = (loss.sum(-1) / input_sizes).sum() / batch_size_crt
loss.backward()
optimizer.step()
# scheduler.step()
optimizer.zero_grad()
# print(i)
with torch.no_grad():
train_loss_record.append(loss.item())
if batches % 50 == 0 and epoch % 1 == 0:
# print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
label = yb[0][:len(y_hat)]
# plt.ion()
plt.plot(y_hat, label='predicted')
plt.plot(label, label='label')
plt.legend(loc='upper right')
plt.title('training mode')
plt.text(len(y_hat)+1, max(y_hat.max(), label.max()), f'Epoch {epoch}, batch {batches} training loss: {loss.item()}')
plt.show()
return train_loss_record
but I met:Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
the error message is:
ValueError Traceback (most recent call last)
<ipython-input-119-fb062ad3f20e> in <module>
----> 1 fit(500, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
<ipython-input-118-2eb946c379bf> in fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)
38 # print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')
39
---> 40 y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)
41 label = yb[0][:len(y_hat)]
42 # plt.ion()
<ipython-input-116-28afce77e325> in predict(xb, model, seq_len)
7 if not isinstance(xb, torch.Tensor):
8 xb = torch.Tensor(xb)
----> 9 return model(xb, seq_len) # xb.shape(None,34,5)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-114-3e9c30d20ed6> in forward(self, xb, seq_len_crt)
50 out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(None,34), out.shape(None,100)
51 out = F.relu(out) # 输入:out.shape(None,100), 输出:out.shape(None,100)
---> 52 out = self.BN1(out) # 输入:out.shape(None,100),输出:out.shape(None,100)
53
54 out = self.linear2(out)
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\modules\batchnorm.py in forward(self, input)
129 used for normalization (i.e. in eval mode when buffers are not None).
130 """
--> 131 return F.batch_norm(
132 input,
133 # If buffers are not to be tracked, ensure that they won't be updated
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
2052 bias=bias, training=training, momentum=momentum, eps=eps)
2053 if training:
-> 2054 _verify_batch_size(input.size())
2055
2056 return torch.batch_norm(
D:\Anaconda3\envs\LSTM\lib\site-packages\torch\nn\functional.py in _verify_batch_size(size)
2035 size_prods *= size[i + 2]
2036 if size_prods == 1:
-> 2037 raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
2038
2039
ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
I have checked and I found that in out = self.BN1(out),out.shape = (1,60),it seems that batchsize=1 is not permitted in BatchNorm1d .But I don't know how to modify it.
what does BatchNorm1d do mathematically?
try and write down the equation for the case of batch_size=1 and you'll understand why pytorch is angry with you.
How to solve it?
It is simple: BatchNorm has two "modes of operation": one is for training where it estimates the current batch's mean and variance (this is why you must have batch_size>1 for training).
The other "mode" is for evaluation: it uses accumulated mean and variance to normalize new inputs without re-estimating the mean and variance. In this mode there is no problem processing samples one by one.
When evaluating your model use model.eval() before and model.train() after.
I met this problem when I load the model and started to test. Add the model.eval() before you fill in your data. This can solve the problem.
If you are using the DataLoader class, sometimes the last batch in an epoch will have only a single training example (imagine a training set of 33 examples with a batch size of 32). This can trigger the error if the network is in training mode and a batch norm layer is present.
Set the drop_last argument in the DataLoader to True like:
from torch.utils.data import DataLoader
...
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
to discard the last incomplete batch in each epoch.

(pytorch / mse) How can I change the shape of tensor?

Problem definition:
I have to use MSELoss function to define the loss to classification problem. Therefore it keeps saying the error message regarding the shape of tensor.
Entire error message:
torch.Size([32, 10]) torch.Size([32])
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call
last) in
53 output = model.forward(images)
54 print(output.shape, labels.shape)
---> 55 loss = criterion(output, labels)
56 loss.backward()
57 optimizer.step()
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in
call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/loss.py in
forward(self, input, target)
429
430 def forward(self, input, target):
--> 431 return F.mse_loss(input, target, reduction=self.reduction)
432
433
/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in
mse_loss(input, target, size_average, reduce, reduction) 2213
ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
2214 else:
-> 2215 expanded_input, expanded_target = torch.broadcast_tensors(input, target) 2216 ret =
torch._C._nn.mse_loss(expanded_input, expanded_target,
_Reduction.get_enum(reduction)) 2217 return ret
/opt/conda/lib/python3.7/site-packages/torch/functional.py in
broadcast_tensors(*tensors)
50 [0, 1, 2]])
51 """
---> 52 return torch._C._VariableFunctions.broadcast_tensors(tensors)
53
54
> RuntimeError: The size of tensor a (10) must match the size of tensor
b (32) at non-singleton dimension 1
How can I reshape the tensor, and which tensor (output or labels) should I change to calculate the loss?
Entire code is attached below.
import numpy as np
import torch
# Loading the Fashion-MNIST dataset
from torchvision import datasets, transforms
# Get GPU Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
# Download and load the training data
trainset = datasets.FashionMNIST('MNIST_data/', download = True, train = True, transform = transform)
testset = datasets.FashionMNIST('MNIST_data/', download = True, train = False, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 32, shuffle = True, num_workers=4)
testloader = torch.utils.data.DataLoader(testset, batch_size = 32, shuffle = True, num_workers=4)
# Examine a sample
dataiter = iter(trainloader)
images, labels = dataiter.next()
# Define the network architecture
from torch import nn, optim
import torch.nn.functional as F
model = nn.Sequential(nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 10),
nn.LogSoftmax(dim = 1))
model.to(device)
# Define the loss
criterion = nn.MSELoss()
# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr = 0.001)
# Define the epochs
epochs = 5
train_losses, test_losses = [], []
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
# Flatten Fashion-MNIST images into a 784 long vector
images = images.to(device)
labels = labels.to(device)
images = images.view(images.shape[0], -1)
# Training pass
optimizer.zero_grad()
output = model.forward(images)
print(output.shape, labels.shape)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
else:
test_loss = 0
accuracy = 0
# Turn off gradients for validation, saves memory and computation
with torch.no_grad():
# Set the model to evaluation mode
model.eval()
# Validation pass
for images, labels in testloader:
images = images.to(device)
labels = labels.to(device)
images = images.view(images.shape[0], -1)
ps = model(images)
test_loss += criterion(ps, labels)
top_p, top_class = ps.topk(1, dim = 1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
model.train()
print("Epoch: {}/{}..".format(e+1, epochs),
"Training loss: {:.3f}..".format(running_loss/len(trainloader)),
"Test loss: {:.3f}..".format(test_loss/len(testloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
From the output you print before it error, torch.Size([32, 10]) torch.Size([32]).
The left one is what the model gives you and the right one is from trainloader, normally you use this for something like nn.CrossEntropyLoss.
And from the full error log, the error is from this line
loss = criterion(output, labels)
The way to make this work is called One-hot Encoding, if it's me for sake of my laziness I'll write it like this.
ones = torch.sparse.torch.eye(10).to(device) # number of class class
labels = ones.index_select(0, labels)
Alternatively, you can change your loss function from nn.MSELoss() to nn.CrossEntropyLoss(). Cross entropy loss is generally preferable to MSE for categorical tasks like this, and in PyTorch's implementation this loss function takes care of a lot of the shape conversion under the hood so you can provide it with a vector of class probabilities and a single class label.
Fundamentally, your model attempts to predict what class the input belongs to by calculating a score (you might call it a 'confidence score') for each possible class. So if you have 10 classes, the model's output will be a 10-dimensional list (in PyTorch, a tensor shape [10]) and the prediction would be the the index of the highest score. Often one would apply the softmax (https://en.wikipedia.org/wiki/Softmax_function) function to convert these scores to a probability distribution, so all scores will be between 0 and 1 and the elements all sum to 1.
Then cross entropy is a common choice of loss function for this task: it compares the list of predictions to the one-hot encoded label. E.g. if you have 3 classes, a label would look like [1, 0, 0] to represent the first class. This is also called the "one-hot encoding". Meanwhile a prediction might look like [0.7, 0.1, 0.2]. In PyTorch, nn.CrossEntropyLoss() expects your labels are coming as single value tensors whose value represents the class label, since there's no real need to move long, sparse vectors around memory. So this loss function accomplishes the comparison you want to do and I'm guessing is implemented more efficiently than actually creating one-hot encodings.

The size of tensor a (10) must match the size of tensor b (9) at non-singleton

i am using this code earlier in Jupiter notebook it was not showing error but accuracy was veryless then i have tried the same code in google colab there it is showing error, please suggest some way to increase accuracy . i am trying to perform multilevel CNN for detecting leaf with downsampling of image
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,32,2)
self.conv2 = nn.Conv2d(32,64,2)
self.conv2_bn = nn.BatchNorm2d(64)
x= torch.randn(256,256).view(-1,1,256,256)
self._to_linear = None
self.convs(x)
self.fc1= nn.Linear(self._to_linear, 512)
self.fc2 = nn.Linear(512,6)
def convs(self,x):
y=torch.nn.functional.interpolate(x, size=([128,128]), scale_factor=None, mode='nearest', align_corners=None)
z=torch.nn.functional.interpolate(x, size=([64,64]), scale_factor=None, mode='nearest', align_corners=None)
w=torch.nn.functional.interpolate(x, size=([32,32]), scale_factor=None, mode='nearest', align_corners=None)
# print(x[0].shape)
x= F.relu(self.conv1(x))
m = nn.ConstantPad2d(1,0)
x=m(x)
x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), 2))
# print(x[0].shape)
y= F.relu(self.conv1(y))
m = nn.ConstantPad2d(1,0)
y=m(y)
y = F.relu(self.conv2_bn(self.conv2(y)), 2)
# print(y[0].shape)
CAT_1=torch.cat((x,y),1)
CAT_1=F.max_pool2d(CAT_1,(2,2))
# print(CAT_1[0].shape)
z= F.relu(self.conv1(z))
m = nn.ConstantPad2d(1,0)
z=m(z)
z= F.relu(self.conv2_bn(self.conv2(z)))
# print(z[0].shape)
CAT_2=torch.cat((CAT_1,z),1)
CAT_2=F.max_pool2d(CAT_2,(2,2))
# print(CAT_2[0].shape)
w= F.relu(self.conv1(w))
m = nn.ConstantPad2d(1,0)
w=m(w)
w = F.relu((self.conv2_bn(self.conv2(w))))
# print(w[0].shape)
x=torch.cat((CAT_2,w),1)
x=F.max_pool2d(x,(2,2))
# print("i lov pp")
# print(x[0].shape)
x=torch.nn.functional.avg_pool2d(x, (2,2))
# print("i lov pp")
# print(x[0].shape)
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
# print("i lov pp")
x=self.convs(x)
x=x.view(-1, self._to_linear)
x= F.relu(self.fc1(x))
x= self.fc2(x)
return F.softmax(x, dim=1)
# print(x[0].shape)
net=Net()
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1,256,256)
X=X/255.0
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1
val_size=int (len(X)*VAL_PCT)
print(val_size)
train_X= X[:-val_size]
train_y= y[:-val_size]
test_X=X[-val_size:]
test_y = y[-val_size:]
print(len(train_X))
print(len(test_X))
BATCH_SIZE =10
EPOCHS = 1
for epoch in range(EPOCHS):
for i in (range(0, len(train_X), BATCH_SIZE)):
#print(i, i+BATCH_SIZE)
batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,256,256)
# print(batch_X.shape)
batch_y = train_y[i:i+BATCH_SIZE]
#print(batch_y.shape)
net.zero_grad()
outputs = net(batch_X)
#print (outputs.shape)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step()
#print(loss)
#print(f"Epoch: {epoch}. Loss: {loss}")
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py:432: UserWarning: Using a target size (torch.Size([10, 256, 256, 3])) that is different to the input size (torch.Size([10, 6])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-38-a154b102127f> in <module>()
15 outputs = net(batch_X)
16 #print (outputs.shape)
---> 17 loss = loss_function(outputs, batch_y)
18 loss.backward()
19 optimizer.step()
3 frames
/usr/local/lib/python3.6/dist-packages/torch/functional.py in broadcast_tensors(*tensors)
60 if any(type(t) is not Tensor for t in tensors) and has_torch_function(tensors):
61 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 62 return _VF.broadcast_tensors(tensors)
63
64
RuntimeError: The size of tensor a (10) must match the size of tensor b (9) at non-singleton dimension 3

Training 1D CNN in Pytorch

I want to train the model given below. I am developing 1D CNN model in PyTorch. Usually we use dataloaders in PyTorch. But I am not using dataloaders for my implementation. I need guidance on how i can train my model in pytorch.
import torch
import torch.nn as nn
import torch.nn.functional as F
class CharCNN(nn.Module):
def __init__(self,num_labels=11):
super(CharCNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(num_channels, depth_1, kernel_size=kernel_size_1, stride=stride_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=kernel_size_1, stride=stride_size),
nn.Dropout(0.1),
)
self.conv2 = nn.Sequential(
nn.Conv1d(depth_1, depth_2, kernel_size=kernel_size_2, stride=stride_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=kernel_size_2, stride=stride_size),
nn.Dropout(0.25)
)
self.fc1 = nn.Sequential(
nn.Linear(depth_2*kernel_size_2, num_hidden),
nn.ReLU(),
nn.Dropout(0.5)
)
self.fc2 = nn.Sequential(
nn.Linear(num_hidden, num_labels),
nn.ReLU(),
nn.Dropout(0.5)
)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
# collapse
out = x.view(x.size(0), -1)
# linear layer
out = self.fc1(out)
# output layer
out = self.fc2(out)
#out = self.log_softmax(x,dim=1)
return out
I am training my network like this:
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(),lr=learning_rate)
for e in range(training_epochs):
if(train_on_gpu):
net.cuda()
train_losses = []
for batch in iterate_minibatches(train_x, train_y, batch_size):
x, y = batch
inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
if(train_on_gpu):
inputs, targets = inputs.cuda(), targets.cuda()
opt.zero_grad()
output = model(inputs, batch_size)
loss = criterion(output, targets.long())
train_losses.append(loss.item())
loss.backward()
opt.step()
val_losses = []
accuracy=0
f1score=0
print("Epoch: {}/{}...".format(e+1, training_epochs),
"Train Loss: {:.4f}...".format(np.mean(train_losses)))
But i am getting the following error
TypeError Traceback (most recent call last)
<ipython-input-60-3a3df06ef2f8> in <module>
14 inputs, targets = inputs.cuda(), targets.cuda()
15 opt.zero_grad()
---> 16 output = model(inputs, batch_size)
17
18 loss = criterion(output, targets.long())
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self,
* input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
TypeError: forward() takes 2 positional arguments but 3 were given
Please guide me how i can resolve this issue.
The forward method of your model only takes one argument, but you are calling it with two arguments:
output = model(inputs, batch_size)
It should be:
output = model(inputs)
The time series data uses a 5 element window. The target is a rolling window of 5. The convolution 1d model receives a Sales tensor 3 dimensional structure containing all the sales for a certain duration of time (https://krzjoa.github.io/2019/12/28/pytorch-ts-v1.html) The kernel is set at 5 to match the moving window size. input and output are 1. The loss function is calculated over 1000 epochs. The prediction tensor is then converted to a numpy array and displayed comparing it to the actual moving average. I did find iterate_minibatches code but it does not work with time series data because the dimensions are different (32 target vs 36 source)
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
df=pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/shampoo.csv')
#created a three dimensional tensor
#1. number of samples
#2. number of channels
#3. -1 means infer value from dimension
X=data.Sales.copy()
y=data.Sales.rolling(5).mean().copy()
net = nn.Conv1d(1, 1, 5, bias = False)
optimizer=optim.Adam(net.parameters(), lr=0.01) #l2
running_loss=0.0
X=data.Sales.copy()
y=data.Sales.rolling(5).mean().copy()
X_tensor = torch.Tensor(X).reshape(1, 1, -1)
print("Sales", X_tensor)
y=y[4:,].to_numpy()
y_tensor = torch.Tensor(y).reshape(1, 1, -1)
print("Avg", y_tensor)
ts_tensor = torch.Tensor(X).reshape(1, 1, -1)
kernel = [0.5, 0.5]
kernel_tensor = torch.Tensor(kernel).reshape(1, 1, -1)
print("Kernel", F.conv1d(ts_tensor, kernel_tensor))
for epoch in range(1000):
optimizer.zero_grad()
outputs=net(X_tensor)
#print("Outputs",outputs)
loss_value = torch.mean((outputs - y_tensor)**2)
loss_value.backward()
optimizer.step()
running_loss += loss_value.item()
if epoch % 100 == 0:
print('[%d] loss: %.3f' % (epoch, loss_value.item()))
print(net.weight.data.numpy())
prediction = (net(X_tensor).data).float()
prediction=(prediction.numpy().flatten())
data.Sales.plot()
plt.plot(prediction)
#actual moving average
data.Sales.plot()
plt.plot(y)

size mismatch, m1: [3584 x 28], m2: [784 x 128] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:940

I have executed the following code and getting the error shown at extreme bottom. I would like to know how to resolve this. thanks
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import transforms
_tasks = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
from torchvision.datasets import MNIST
mnist = MNIST("data", download=True, train=True, transform=_tasks)
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
create training and validation split
split = int(0.8 * len(mnist))
index_list = list(range(len(mnist)))
train_idx, valid_idx = index_list[:split], index_list[split:]
create sampler objects using SubsetRandomSampler
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)
create iterator objects for train and valid datasets
trainloader = DataLoader(mnist, batch_size=256, sampler=tr_sampler)
validloader = DataLoader(mnist, batch_size=256, sampler=val_sampler)
Creating model for execution
class Model(nn.Module):
def init(self):
super().init()
self.hidden = nn.Linear(784, 128)
self.output = nn.Linear(128, 10)
def forward(self, x):
x = self.hidden(x)
x = F.sigmoid(x)
x = self.output(x)
return x
model = Model()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)
for epoch in range(1, 11): ## run the model for 10 epochs
train_loss, valid_loss = [], []
#training part
model.train()
for data, target in trainloader:
optimizer.zero_grad()
#1. forward propagation
output = model(data)
#2. loss calculation
loss = loss_function(output, target)
#3. backward propagation
loss.backward()
#4. weight optimization
optimizer.step()
train_loss.append(loss.item())
# evaluation part
model.eval()
for data, target in validloader:
output = model(data)
loss = loss_function(output, target)
valid_loss.append(loss.item())
Executing this I am getting the following error :
RuntimeError Traceback (most recent call last) in ()
----> 1 output = model(data) 2 3 ## 2. loss calculation 4 loss = loss_function(output, target) 5
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in
call(self, *input, **kwargs) 487 result = self._slow_forward(*input,
**kwargs)
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in
linear(input, weight, bias) 1352 ret =
torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) 1353
else:
-> 1354 output = input.matmul(weight.t()) 1355 if bias is not None: 1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 128] at
/pytorch/aten/src/TH/generic/THTensorMath.cpp:940
Your input MNIST data has shape [256, 1, 28, 28] corresponding to [B, C, H, W]. You need to flatten the input images into a single 784 long vector before feeding it to the Linear layer Linear(784, 128) such that the input becomes [256, 784] corresponding to [B, N], where N is 1x28x28, your image size. This can be done as follows:
for data, target in trainloader:
# Flatten MNIST images into a 784 long vector
data = data.view(data.shape[0], -1)
optimizer.zero_grad()
...
The same is needed to be done in the validation loop.

Resources