Im dealing with titanic data with pytorch
these are my model & training code
import torch.nn.functional as F
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1_1=torch.nn.Linear(13, 512)
self.fc1_2=torch.nn.Linear(512, 64)
self.fc1_3=torch.nn.Linear(64, 10)
self.fc2_1=torch.nn.Linear(13, 64)
self.fc2_2=torch.nn.Linear(64, 512)
self.fc2_3=torch.nn.Linear(512, 10)
self.fc3_1=torch.nn.Linear(13, 128)
self.fc3_2=torch.nn.Linear(128, 128)
self.fc3_3=torch.nn.Linear(128, 10)
self.fc_full_1=torch.nn.Linear(30, 64)
self.fc_full_2=torch.nn.Linear(64, 128)
self.fc_full_3=torch.nn.Linear(128, 2)
def forward(self, x):
x1=self.fc1_1(x)
x1=F.relu(x1)
x1=self.fc1_2(x1)
x1=F.relu(x1)
x1=self.fc1_3(x1)
x1=F.relu(x1)
x2=self.fc2_1(x)
x2=F.relu(x2)
x2=self.fc2_2(x2)
x2=F.relu(x2)
x2=self.fc2_3(x2)
x2=F.relu(x2)
x3=self.fc3_1(x)
x3=F.relu(x3)
x3=self.fc3_2(x3)
x3=F.relu(x3)
x3=self.fc3_3(x3)
x3=F.relu(x3)
x=torch.cat((x1, x2, x3), dim=1)
x=self.fc_full_1(x)
x=F.relu(x)
x=self.fc_full_2(x)
x=F.relu(x)
x=self.fc_full_3(x)
return x
model=Net()
as seen above, they are just fully connected layers
model loss function and optimization
cross ehtropy loss and adam
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model1.parameters(), lr=0.05)
these are training code
for epoch in range(100):
model.train()
x_var = Variable(torch.FloatTensor(x_train))
y_var = Variable(torch.LongTensor(y_train))
optimizer.zero_grad()
train_pred = model(x_var)
loss =criterion(train_pred, y_var)
loss.backward()
optimizer.step()
train_acc=calc_accuracy(train_pred, y_var)
loss=loss.data.numpy()
and finaly, the accuracy and loss printed
Epoch 0
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 10
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 20
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 30
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 40
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 50
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 60
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 70
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 80
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
Epoch 90
0.6900209 0.531578947368421
valid: 0.692668 0.4621212121212121
as seen above, model training loss and valid loss does not change at all.
what does seems to be the problem?
Your optimizer does not use your model's parameters, but some other model1's.
optimizer = torch.optim.Adam(model1.parameters(), lr=0.05)
BTW, you do not have to use model.train() for each epoch.
Related
Running following code if device = 'cpu' then loss decreases as expected. However, if device = 'cuda' the loss wont decrease, and model training fails. How should I correct this?
device = 'cpu'
device = 'cuda'
import sys
from typing import Optional
import torch
from torch import Tensor
from torch_geometric.nn import Node2Vec
class Node2VecV2(Node2Vec):
def __init__(
self,
edge_index: Tensor,
embedding_dim: int,
walk_length: int,
context_size: int,
walks_per_node: int = 1,
p: float = 1.0,
q: float = 1.0,
num_negative_samples: int = 1,
num_nodes: Optional[int] = None,
sparse: bool = False,
batch_size: int = 128,
shuffle: bool = True,
num_workers: int = 0,
device: str = 'cuda'
):
super().__init__(edge_index, embedding_dim, walk_length, context_size, walks_per_node,
p, q, num_negative_samples, num_nodes, sparse)
self.device = device
self.batch_size = batch_size
self.shuffle = shuffle
self.num_workers = 0 if sys.platform.startswith('win') else num_workers
self.loader = self.loader(batch_size=128, shuffle=True,
num_workers=num_workers)
# def make_loader(
# self,
# batch_size: int = 128,
# shuffle: bool = True,
# num_workers: int = 0,
# device: str = 'cpu'
# ):
# self.device = device
# self.batch_size = batch_size
# self.shuffle = shuffle
# self.num_workers = 0 if sys.platform.startswith('win') else num_workers
# self.loader = super().loader(batch_size=128, shuffle=True,
# num_workers=num_workers)
def __repr__(self) -> str:
return (f'{self.__class__.__name__}({self.embedding.weight.size(0)}, '
f'{self.embedding.weight.size(1)})')
def train_(self):
self.train()
total_loss = 0
for pos_rw, neg_rw in self.loader:
self.optimizer.zero_grad()
loss = self.loss(pos_rw.to(device), neg_rw.to(device))
loss.backward()
self.optimizer.step()
total_loss += loss.item()
#print(total_loss / len(self.loader))
return total_loss / len(self.loader)
def train_dw_model(dw_model, dataset):
data = dataset[0]
data = data.to(device)
best_val = 0
for epoch in range(1, 101):
loss = dw_model.train_()
with torch.no_grad():
dw_model.eval()
z = dw_model()
val_acc = dw_model.test(
z[data.train_mask], data.y[data.train_mask],
z[data.val_mask], data.y[data.val_mask],
max_iter=150)
if val_acc> best_val:
best_val = val_acc
torch.save(dw_model.state_dict(), 'dw.pt')
print(f'{dataset}_{dw_model} Epoch: {epoch:02d}, Loss: {loss:.4f},'
f' Val: {val_acc*100:.2f} best Val: {best_val*100:.2f} ')
if __name__ == '__main__':
import os.path as osp
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
dataset = Planetoid(path, 'Cora', transform=T.NormalizeFeatures())
data = dataset[0].to(device)
dw_model = Node2VecV2(data.edge_index, embedding_dim=128, walk_length=20,
context_size=10, walks_per_node=10,
num_negative_samples=1, p=1, q=1, sparse=True,batch_size=128, shuffle=True, num_workers=4, device=device).to(device)
#dw_model.make_loader(batch_size=128, shuffle=True, num_workers=4, device=args.device)
dw_model.optimizer = torch.optim.SparseAdam(list(dw_model.parameters()), lr=0.01)
for i in range(10):
print(f'Epoch {i} loss: ',dw_model.train_())
cpu output:
Epoch 0 loss: 8.111482880332254
Epoch 1 loss: 6.081473242152821
Epoch 2 loss: 4.976185473528775
Epoch 3 loss: 4.138110041618347
Epoch 4 loss: 3.4765207875858652
Epoch 5 loss: 2.960351337086071
Epoch 6 loss: 2.5505979494615034
Epoch 7 loss: 2.2174546501853247
Epoch 8 loss: 1.955638435753909
Epoch 9 loss: 1.7383252869952808
Process finished with exit code 0
cuda output:
Epoch 0 loss: 1.3862942511385137
Epoch 1 loss: 1.3862942511385137
Epoch 2 loss: 1.3862942511385137
Epoch 3 loss: 1.3862942511385137
Epoch 4 loss: 1.3862942511385137
Epoch 5 loss: 1.3862942511385137
Epoch 6 loss: 1.3862942511385137
Epoch 7 loss: 1.3862942511385137
Epoch 8 loss: 1.3862942511385137
Epoch 9 loss: 1.3862942511385137
Process finished with exit code 0
I’m new to pytorch and my problem may be a little naive
I’m training a pretrained VGG16 network on my dataset which it’s size is near 33000 images in 8 classes with labels [1,2,…,8] and my classes are imbalanced. my problem is that during training, validation and training accuracy is low and doesn’t increase, is there any problem in my code?
if not, what do you suggest to improve training?
'''
import torch
import time
import torch.nn as nn
import numpy as np
from sklearn.model_selection import train_test_split
from torch.optim import Adam
import cv2
import torchvision.models as models
from classify_dataset import Classification_dataset
from torchvision import transforms
transform = transforms.Compose([transforms.Resize((224,224)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomRotation(degrees=45),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
dataset = Classification_dataset(root_dir=r'//home/arisa/Desktop/Hamid/IQA/Hamid_Dataset',
csv_file=r'/home/arisa/Desktop/Hamid/IQA/new_label.csv',transform=transform)
target = dataset.labels - 1
train_indices, test_indices = train_test_split(np.arange(target.shape[0]), stratify=target)
test_dataset = torch.utils.data.Subset(dataset, indices=test_indices)
train_dataset = torch.utils.data.Subset(dataset, indices=train_indices)
class_sample_count = np.array([len(np.where(target[train_indices] == t)[0]) for t in np.unique(target)])
weight = 1. / class_sample_count
samples_weight = np.array([weight[t] for t in target[train_indices]])
samples_weight = torch.from_numpy(samples_weight)
samples_weight = samples_weight.double()
sampler = torch.utils.data.WeightedRandomSampler(samples_weight, len(samples_weight), replacement = True)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=64,
sampler=sampler)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=64,
shuffle=False)
for param in model.parameters():
param.requires_grad = False
num_ftrs = model.classifier[0].in_features
model.classifier = nn.Linear(num_ftrs,8)
optimizer = Adam(model.parameters(), lr = 0.0001 )
criterion = nn.CrossEntropyLoss()
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.01)
path = '/home/arisa/Desktop/Hamid/IQA/'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
def train_model(model, train_loader,valid_loader, optimizer, criterion, scheduler=None, num_epochs=10 ):
min_valid_loss = np.inf
model.train()
start = time.time()
TrainLoss = []
model = model.to(device)
for epoch in range(num_epochs):
total = 0
correct = 0
train_loss = 0
#lr_scheduler.step()
print('Epoch {}/{}'.format(epoch+1, num_epochs))
print('-' * 10)
train_loss = 0.0
for x,y in train_loader:
x = x.to(device)
#print(y.shape)
y = y.view(y.shape[0],).to(device)
y = y.to(device)
y -= 1
out = model(x)
loss = criterion(out, y)
optimizer.zero_grad()
loss.backward()
TrainLoss.append(loss.item()* y.shape[0])
train_loss += loss.item() * y.shape[0]
_,predicted = torch.max(out.data,1)
total += y.size(0)
correct += (predicted == y).sum().item()
optimizer.step()
lr_scheduler.step()
accuracy = 100*correct/total
valid_loss = 0.0
val_loss = []
model.eval()
val_correct = 0
val_total = 0
with torch.no_grad():
for x_val, y_val in test_loader:
x_val = x_val.to(device)
y_val = y_val.view(y_val.shape[0],).to(device)
y_val -= 1
target = model(x_val)
loss = criterion(target, y_val)
valid_loss += loss.item() * y_val.shape[0]
_,predicted = torch.max(target.data,1)
val_total += y_val.size(0)
val_correct += (predicted == y_val).sum().item()
val_loss.append(loss.item()* y_val.shape[0])
val_acc = 100*val_correct / val_total
print(f'Epoch {epoch + 1} \t\t Training Loss: {train_loss / len(train_loader)} \t\t Validation Loss: {valid_loss / len(test_loader)} \t\t Train Acc:{accuracy} \t\t Validation Acc:{val_acc}')
if min_valid_loss > (valid_loss / len(test_loader)):
print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss / len(test_loader):.6f}) \t Saving The Model')
min_valid_loss = valid_loss / len(test_loader)
state = {'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(),}
torch.save(state,'/home/arisa/Desktop/Hamid/IQA/checkpoint.t7')
end = time.time()
print('TRAIN TIME:')
print('%.2gs'%(end-start))
train_model(model=model, train_loader=train_loader, optimizer=optimizer, criterion=criterion, valid_loader= test_loader,num_epochs=500 )
Thanks in advance
here is the result of 15 epoch
Epoch 1/500
----------
Epoch 1 Training Loss: 205.63448420514916 Validation Loss: 233.89266112356475 Train Acc:39.36360386127994 Validation Acc:24.142040038131555
Epoch 2/500
----------
Epoch 2 Training Loss: 199.05699240435197 Validation Loss: 235.08799531243065 Train Acc:41.90998291820601 Validation Acc:24.27311725452812
Epoch 3/500
----------
Epoch 3 Training Loss: 199.15626737127448 Validation Loss: 236.00033430619672 Train Acc:41.1035633416756 Validation Acc:23.677311725452814
Epoch 4/500
----------
Epoch 4 Training Loss: 199.02581041173886 Validation Loss: 233.60767459869385 Train Acc:41.86628530568466 Validation Acc:24.606768350810295
Epoch 5/500
----------
Epoch 5 Training Loss: 198.61493769454472 Validation Loss: 233.7503859202067 Train Acc:41.53656695665991 Validation Acc:25.0
Epoch 6/500
----------
Epoch 6 Training Loss: 198.71323942956585 Validation Loss: 234.17176149830675 Train Acc:41.639852222619474 Validation Acc:25.369399428026693
Epoch 7/500
----------
Epoch 7 Training Loss: 199.9395153770592 Validation Loss: 234.1744423635078 Train Acc:40.98041552456998 Validation Acc:24.84509056244042
Epoch 8/500
----------
Epoch 8 Training Loss: 199.3533399020355 Validation Loss: 235.4645173188412 Train Acc:41.26643626107337 Validation Acc:24.165872259294567
Epoch 9/500
----------
Epoch 9 Training Loss: 199.6451746921249 Validation Loss: 233.33387595956975 Train Acc:40.96452548365312 Validation Acc:24.59485224022879
Epoch 10/500
----------
Epoch 10 Training Loss: 197.9305159737011 Validation Loss: 233.76405122063377 Train Acc:41.8782028363723 Validation Acc:24.6186844613918
Epoch 11/500
----------
Epoch 11 Training Loss: 199.33247244055502 Validation Loss: 234.41085289463854 Train Acc:41.59218209986891 Validation Acc:25.119161105815063
Epoch 12/500
----------
Epoch 12 Training Loss: 199.87399289874256 Validation Loss: 234.23621463775635 Train Acc:41.028085647320545 Validation Acc:24.49952335557674
Epoch 13/500
----------
Epoch 13 Training Loss: 198.85540591944292 Validation Loss: 234.33149099349976 Train Acc:41.206848607635166 Validation Acc:24.857006673021925
Epoch 14/500
----------
Epoch 14 Training Loss: 199.92641723337513 Validation Loss: 233.37722391070741 Train Acc:41.15520597465539 Validation Acc:24.988083889418494
Epoch 15/500
----------
Epoch 15 Training Loss: 197.82172771698328 Validation Loss: 234.4943131533536 Train Acc:41.69943987605768 Validation Acc:24.380362249761678
You freezed your model through
for param in model.parameters():
param.requires_grad = False
which basically says "do not calculate any gradient for any weight" which is equivalent of not updating weights - hence no optimization
my problem was in model.train(). This phrase should be inside the training loop. but in my case I put it outside the training loop and when it comes to model.eval(), model maintained in this mode
I'm training a binary classification model on a series of images.
The model was derived from resnet18 in torchvision and I made the last FC as nn.Linear(512, 1)
The loss function is BCELoss
However, the model doesn't show any sign of converging even after 5000 iterations.
I'm suspecting I might do something wrong in the training stage? But I can't find where's the bug.
Here's my code:
Model:
## Model
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
resnet18 = models.resnet18(pretrained= True)
resnet18.fc = nn.Linear(512, 1)
Parameters, loss, optimizers:
## parameter
epochs = 200
learning_rate = 0.1
momen = 0.9
batch = 8
criterion = nn.BCELoss()
resnet18.to(device)
opt = optim.SGD(resnet18.parameters(), lr = learning_rate, momentum = momen)
Dataloaders:
# Generators
training_set = Dataset(X_train)
training_generator = torch.utils.data.DataLoader(training_set, batch_size= batch, shuffle=True)
validation_set = Dataset(X_test)
validation_generator = torch.utils.data.DataLoader(validation_set, batch_size=1, shuffle=False)
Training:
# training
history = []
for t in range(epochs):
for i, data in enumerate(training_generator, 0):
inputs, labels = data
# check if input size == batch size #
if inputs.shape[0] < batch:
break
# print("labels", labels, labels.dtype)
# move data to GPU #
inputs, labels = inputs.to(device), labels.to(device)
opt.zero_grad()
# Prediction #
y_pred = resnet18(inputs).view(batch,)
y_pred = (y_pred > 0).float().requires_grad_()
# print("y_pred", y_pred, y_pred.dtype)
# Calculating loss #
loss = criterion(y_pred, labels.view(batch,))
loss.backward()
opt.step()
if i % 10 == 0:
history.append(loss.item())
print("Epoch: {}, iter: {}, loss: {}".format(t, i, loss.item())
torch.save(resnet18, 'trained_resnet18.pt')
Edit:
The loss values are like this:
Epoch: 3, iter: 310, loss: 0.0
Epoch: 3, iter: 320, loss: 37.5
Epoch: 3, iter: 330, loss: 37.5
Epoch: 3, iter: 340, loss: 0.0
Epoch: 3, iter: 350, loss: 37.5
Epoch: 3, iter: 360, loss: 50.0
Epoch: 3, iter: 370, loss: 37.5
Epoch: 3, iter: 380, loss: 25.0
Epoch: 3, iter: 390, loss: 12.5
I belive the error lies in the following line:
y_pred = (y_pred > 0).float().requires_grad_()
You try to binarize the model prediction in a weird way, I suggest do the following instead:
y_pred = torch.sigmoid(y_pred)
And pass this to the loss function.
Explanation
The output of the model can be any value, but we want to normalize that values to reside in the [0,1] range. This is exactly what the sigmoid function does. Once we have the values in the range of [0,1] the comparison with the binary labels will make sense, closer to 1 will be "1" and the opposite.
You can refer to the following link: https://www.youtube.com/watch?v=WsFasV46KgQ
I m try to train an rbf network... I used MNIST database. And pytorch framework...
The results are the same in each epoch...
The results....:
Epoch: 1
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
Epoch: 2
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
Epoch: 3
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
Epoch: 4
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
My Code... I think that the problem is somewhere in the linear layer. The model has no improve after the training epoch, maybe it's the linear layer. It seems like the weights no change...! But i don't know why...?
class RBF(nn.Module):
def __init__(self, in_layers, centers, sigmas):
super(RBF, self).__init__()
self.in_layers = in_layers
self.centers = nn.Parameter(centers)
self.sigmas = nn.Parameter(torch.Tensor(self.centers.size(0)))
torch.nn.init.constant_(self.sigmas, sigmas)
def forward(self, x):
x = x.view(-1, self.in_layers)
size = [self.centers.size(0), x.size(0)]
sigma = self.sigmas.view(-1).to(device)**2
dists = torch.empty(size).to(device)
for i,c in enumerate(self.centers):
c = c.reshape(-1,c.size(0))
temp = (x-c).pow(2).sum(-1).pow(0.5)
dists[i] = temp
dists = dists.permute(1,0)
phi = torch.exp(-1*(dists/(2*sigma))) #gaussian
return phi
class Net(nn.Module):
def __init__(self, in_layers, centers, sigmas):
super(Net, self).__init__()
self.rbf_layers = nn.ModuleList()
self.linear_layers = nn.ModuleList()
for i in range(len(in_layers) - 1):
self.rbf_layers.append(RBF(in_layers[i], centers, sigmas))
self.linear_layers.append(nn.Linear(centers.size(0), in_layers[i+1], bias = True))
def forward(self, x):
out = x
for i in range(len(self.rbf_layers)):
out = self.rbf_layers[i](out)
out = F.sigmoid( self.linear_layers[i](out.float()) )
return out
def training(engine, batch, device, model, criterion, optimizer):
inputs, labels = batch[0].to(device), batch[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
return outputs, labels
Ofcourse the code continuous but i think these are enough to solve the problem(if you want smthing extra 'i m here').... Do you have any ideas???
and the training part of code....
def training(engine, batch, device, model, criterion, optimizer):
inputs, labels = batch[0].to(device), batch[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
return outputs, labels
def nn_run1(batch, classes, dim, learning_rate, epochs, clusters):
# ---Load Model's Parameters---
train_loader, test_loader = data_loading(batch, shuffle=False)
kmeans_input = train_loader.dataset.train_data
kmeans_input = torch.reshape(kmeans_input.double(), (kmeans_input.size(0), -1))
_, centers = Kmeans(kmeans_input, clusters)
centers = centers.to(device)
sigma = Sigmas(centers)
layers = in_layers(dim, len(classes), layers = 1)
# ---Model Setup---
model = Net(layers, centers, sigma)
model.cuda()
criterion = nn.CrossEntropyLoss()
print(model.parameters)
optimizer = torch.optim.SGD(model.parameters(), learning_rate)
I'm writing a code example to do a simple linear projection (like PCA) in PyTorch. Everything appears to be OK except that the loss does not change as training progresses. Changing the learning rate doesn't affect this, and it's a simple one-dimensional problem so the loss should certainly be changing. What am I missing here?
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as nnF
class PCArot2D(nn.Module):
"2D PCA rotation, expressed as a gradient-descent problem"
def __init__(self):
super(PCArot2D, self).__init__()
self.theta = nn.Parameter(torch.tensor(np.random.random() * 2 * np.pi))
def getrotation(self):
sintheta = torch.sin(self.theta)
costheta = torch.cos(self.theta)
return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
def forward(self, x):
xmeans = torch.mean(x, dim=1, keepdim=True)
rot = self.getrotation()
return torch.mm(rot, x - xmeans)
def covariance(y):
"Calculates the covariance matrix of its input (as torch variables)"
ymeans = torch.mean(y, dim=1, keepdim=True)
ycentred = y - ymeans
return torch.mm(ycentred, ycentred.T) / ycentred.shape[1]
net = PCArot2D()
example2 = torch.tensor(np.random.randn(2, 33))
# define a loss function and an optimiser
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.1)
# train the network
num_epochs = 1000
for epoch in range(num_epochs):
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(torch.DoubleTensor(example2))
# the covariance between output channels is the measure that we wish to minimise
covariance = (outputs[0, :] * outputs[1, :]).mean()
loss = criterion(covariance, torch.tensor(0, dtype=torch.double))
loss.backward()
optimizer.step()
running_loss = loss.item()
if ((epoch & (epoch - 1)) == 0) or epoch==(num_epochs-1): # don't print on all epochs
# print statistics
print('[%d] loss: %.8f' %
(epoch, running_loss))
print('Finished Training')
Output:
[0] loss: 0.00629047
[1] loss: 0.00629047
[2] loss: 0.00629047
[4] loss: 0.00629047
[8] loss: 0.00629047
etc
It seems the problem is in your getrotation function. When creating a new tensor from the other tensors it is not back-probable anymore:
def getrotation(self):
sintheta = torch.sin(self.theta)
costheta = torch.cos(self.theta)
return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
So you need to find some other way to construct your return tensor.
Here is one suggestion that seems to work using torch.cat:
def getrotation(self):
sintheta = torch.sin(self.theta)
costheta = torch.cos(self.theta)
#return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
A = torch.cat([costheta.unsqueeze(0), -sintheta.unsqueeze(0)], dim=0)
B = torch.cat([sintheta.unsqueeze(0), costheta.unsqueeze(0)], dim=0)
return torch.cat([A.unsqueeze(0), B.unsqueeze(0)], dim=0).double()
After implementing this change the loss changes:
[0] loss: 0.00765365
[1] loss: 0.00764726
[2] loss: 0.00764023
[4] loss: 0.00762607
[8] loss: 0.00759777
[16] loss: 0.00754148
[32] loss: 0.00742997
[64] loss: 0.00721117
[128] loss: 0.00679025
[256] loss: 0.00601233
[512] loss: 0.00469085
[999] loss: 0.00288501
Finished Training
I hope this helps!
Edit:
A simpler and prettier version by #DanStowell:
def getrotation(self):
sintheta = torch.sin(net.theta).double().unsqueeze(0)
costheta = torch.cos(net.theta).double().unsqueeze(0)
return torch.cat([costheta, -sintheta, sintheta, costheta]).reshape((2,2))