Model not saved after training in PyTorch - pytorch

I encounter the following problem.
I perform an increasing cross-validation; I have 20 subjects in my dataset and try to classify images. I start with 3 subjects and perform a cross-validation with k=3; that is I train 3 different models and validate on the subject left out. And this is what I do for 4, 5, ..., 20 drivers. Hence, I have a lot of models trained.
Now I wanted to check the performance of all models on another dataset, but for some reason the accuracy is the same for all models, which must be a bug somewhere.
I already use copy.deepcopy(), so I must have an error somewhere else.
I'm open for any hints!
Here is the code for the training function:
def train_model(model, num_classes, dirname, trainloader, valloader, trainset_size, valset_size, criterion, optimizer, scheduler, patience, min_delta, num_epochs, fold):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
since = time.time()
train_loss, train_acc, val_loss, val_acc = [], [], [], []
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
early_stopping = False
counter = 0
last_train_epoch = 0
for epoch in range(num_epochs):
if early_stopping:
print('\nEarly Stopping')
break
print('Epoch {}/{}'.format(epoch+1, num_epochs))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
dataloader = trainloader
dataset_size = trainset_size
else:
model.eval() # Set model to evaluate mode
dataloader = valloader
dataset_size = valset_size
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloader:
model = model.to(device)
inputs = inputs.to(device)
#labels = labels.long().to(device)
labels = labels.to(device) #test_tensor.type(torch.FloatTensor)
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# zero the parameter gradients
optimizer.zero_grad()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_size
epoch_acc = running_corrects.double() / dataset_size
if phase == 'train':
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
else:
val_loss.append(epoch_loss)
val_acc.append(epoch_acc)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# early stopping
if phase == 'val':
if counter == patience:
early_stopping = True
break
if epoch == 0:
best_loss = epoch_loss
else:
if best_loss >= epoch_loss + min_delta:
print('Validation loss decreased ({:.4f} --> {:.4f}). Saving model ...'.format(best_loss,epoch_loss))
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model.state_dict(), '{}/weights/model_fold_{}.pth'.format(dirname,fold))
last_train_epoch = epoch + 1
best_loss = epoch_loss
counter = 0
else:
counter += 1
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
# save best model
return model, train_acc, train_loss, val_acc, val_loss, last_train_epoch
Here is how I call the function:
model = net
model = model.to(device)
# train
[model, train_acc, train_loss, val_acc, val_loss, last_train_epoch] = train_model(model, num_classes, dirname,\
trainloader, valloader,\
trainset_size, valset_size,\
criterion, optimizer, \
exp_lr_scheduler, patience, \
min_delta, num_epochs, fold=val_index)
# test model
[preds_val, labels_val, idx_false_val, pred_time_val_fold] = test(model, valloader)
[preds_tr, labels_tr, idx_false_train, pred_time_train_fold] = test(model, trainloader)
[preds_all, labels_all, idx_false_all, pred_time_all_fold] = test(model, allloader)
print('Accuracy on all data: ', accuracy_score(labels_all, preds_all))
and for the sake of completeness, this is what the test() function looks like:
def test(model, dataloader):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
pred_labels, gt_labels, idx_false, pred_time = [], [], [], []
was_training = model.training
model.eval()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloader):
inputs = inputs.to(device)
labels = labels.to(device)
start_pred = time.clock()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
end_pred = time.clock()
pred_time.append(end_pred-start_pred)
for j in range(inputs.size()[0]):
pred_labels.append(preds[j].item())
gt_labels.append(labels[j].item())
for i in range(len(pred_labels)):
if pred_labels[i] != gt_labels[i]:
idx_false.append(i)
model.train(mode=was_training)
return pred_labels, gt_labels, idx_false, pred_time
Edit: It looks as if it always saves the same models even though I try to make sure that only the updated weights of the best model are saved.

Related

Find Training/Validation Accuracy & Loss of Faster-RCNN PyTorch model

I am trying to find the training/validation accuracy and loss of my model for each epoch as I train it to find the best epoch to use from now on. I appreciate that there is lots of information on this now but this topic is very new to me, and I find it very difficult to find the right answer for my situation.
I assume that I need to add in one or two bits to the train_one_epoch() and evaluate() functions in order to do this?
My model setup is:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=models.detection.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.02, momentum=0.9, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20,40], gamma=0.1)
And my training function is:
epochs = 50
for epoch in range(epochs):
train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=20)
lr_scheduler.step()
evaluate(model, val_data_loader, device=device)
print("\n\n")
torch.save(model, f'./Models/trained_{ds}_model_Epoch{epochs}_LR0_02.pt')
I am using coco-like annotations, for example:
{'boxes': tensor([[316.9700, 242.5500, 464.1000, 442.1700], [ 39.2200, 172.6700, 169.8400, 430.9600]]), 'labels': tensor([2, 2]), 'image_id': tensor(1416), 'area': tensor([29370.1094, 33738.3789]), 'iscrowd': tensor([0, 0])}
The train_one_epoch and evaluate functions are from 'engine.py' from Torchvision.
It seems like using Tensorboard is a good tool to use, but I don't really know how to use it.
The engine.py is:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
header = f"Epoch: [{epoch}]"
lr_scheduler = None
if epoch == 0:
warmup_factor = 1.0 / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = torch.optim.lr_scheduler.LinearLR(
optimizer, start_factor=warmup_factor, total_iters=warmup_iters
)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
if scaler is not None:
scaler.scale(losses).backward()
scaler.step(optimizer)
scaler.update()
else:
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
The evaluate function is:
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = "Test:"
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator

MNIST dataset overfitting

I am working with the MNIST dataset and I have created the following network. I want to overfit the training data and I think I am doing that here. My training loss is lower than my validation loss. This is the code that I have come up with. Please look at it and let me know if I am overfitting the training data, if I am not then how do I go about it?
class NN(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(784,4096),
nn.ReLU(),
nn.Linear(4096,2048),
nn.ReLU(),
nn.Linear(2048,1024),
nn.ReLU(),
nn.Linear(1024,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,32),
nn.ReLU(),
nn.Linear(32,16),
nn.ReLU(),
nn.Linear(16,10))
def forward(self,x):
return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
total_correct = 0
total_loss = 0
total_examples = 0
n_batches = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
batch_loss = loss_function(outputs,labels)
n_batches += 1
total_loss += batch_loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_examples
mean_loss = total_loss / n_batches
return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 1e-7)
train_acc = []
val_acc = []
train_loss = []
val_loss = []
for epoch in range(100):
total_loss = 0
total_correct = 0
total_examples = 0
n_mini_batches = 0
for i,mini_batch in enumerate(trainloader,0):
images,labels = mini_batch
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
n_mini_batches += 1
total_loss += loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
epoch_training_accuracy = total_correct / total_examples
epoch_training_loss = total_loss / n_mini_batches
epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )
print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
%(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
train_loss.append( epoch_training_loss )
train_acc.append( epoch_training_accuracy )
val_loss.append( epoch_val_loss )
val_acc.append( epoch_val_accuracy )
history = { 'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc }
return ( history, model )
history1, net1 = define_and_train(model,dataset_training,dataset_test)
I am trying to overfit the training data so that later i can apply regularization and then reduce the overfitting which will give me a better understanding of the process
Although I won't attempt to provide a rigorous definition, the term "overfit" typically means that the training loss continues to decrease whereas the validation loss stays stagnant at a position higher than the training loss, or continues to increase with more iterations.
Therefore, it is difficult to know whether your network is overfitting solely based on your code alone. Since dense, fully-connected networks tend to overfit easily in the absence of dropout layers or other regularizers, my hunch would be that your network is indeed overfitting according to your intention. However, we would have to see your tensorboard logs or loss plot to determine whether the model is overfitting.
If you want to overfit your network to the dataset, I suggest that you construct a much larger model with more hidden layers. Overfitting occurs when the dataset is "too easy" for the model and it starts to remember the training set itself without learning generalizable patterns that can be applied to the validation set.

Need help regarding Transfer Learning a Faster RCNN ResNet50FPN in PyTorch

I am new to PyTorch. I'm trying to use a pre-trained Faster RCNN torchvision.models.detection.fasterrcnn_resnet50_fpn() for object detection project. I have created a CustomDataset(Dataset) class to handle the custom dataset.
Here is the custom class implementation
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, landmarks = sample['image'], sample['meta_data']
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1))
return {'image': torch.from_numpy(image),
'meta_data': landmarks}
class CustomDataset(Dataset):
"""Custom Landmarks dataset."""
def __init__(self, data_dir, root_dir, transform=None):
"""
Args:
data_dir (string): Directory with all the labels(json).
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.data_dir = data_dir
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(os.listdir(self.data_dir))
def __getitem__(self, idx):
img_name = sorted(os.listdir(self.root_dir))[idx]
image = io.imread(self.root_dir+'/'+img_name, plugin='matplotlib')
json_file = sorted(os.listdir(self.data_dir))[idx]
with open(self.data_dir+'/'+json_file) as f:
meta_data = json.load(f)
meta_data = meta_data['annotation']['object']
sample = {'image': image, 'meta_data': meta_data}
to_tensor = ToTensor()
transformed_sample = to_tensor(sample)
if self.transform:
sample = self.transform(sample)
return transformed_sample
Here is the train_model function
def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=25):
since = time.time()
best_model = model
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'test']:
if phase == 'train':
optimizer = lr_scheduler(optimizer, epoch)
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
for data in dset_loaders[phase]:
# get the inputs
inputs, labels = data['image'], data['meta_data']
inputs= inputs.to(device) # ,
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs, labels)
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
running_corrects += torch.sum(preds == labels).item()
epoch_loss = running_loss / dset_sizes[phase]
epoch_acc = running_corrects / dset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'test' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model = copy.deepcopy(model)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
return best_model
While performing model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25) I am getting "RuntimeError: _thnn_upsample_bilinear2d_forward not supported on CUDAType for Byte"
It appears your datapoints are byte tensors, i.e type uint8. Try casting your data into float32
# Replace this
inputs = inputs.to(device)
# With this
inputs = inputs.float().to(device)
Note that the torchvision models expect data to be normalized in a specific way. Check here for the procedure, which basically entails using
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
for normalizing your data.

Pytorch: Illegal instruction core dumped when loading ResNet50 model

I'm new to pytorch and I'm using ResNet50 model to train.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Sequential(nn.Linear(2048, 512),nn.ReLU(),nn.Dropout(0.2),nn.Linear(512, 10),nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.003)
model.to(device)
epochs = 1
steps = 0
running_loss = 0
print_every = 10
train_losses, test_losses = [], []
for epoch in range(epochs):
for inputs, labels in trainloader:
steps += 1
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
logps = model.forward(inputs)
loss = criterion(logps, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if steps % print_every == 0:
test_loss = 0
accuracy = 0
model.eval()
with torch.no_grad():
for inputs, labels in testloader:
inputs, labels = inputs.to(device),labels.to(device)
logps = model.forward(inputs)
batch_loss = criterion(logps, labels)
test_loss += batch_loss.item()
ps = torch.exp(logps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
train_losses.append(running_loss/len(trainloader))
test_losses.append(test_loss/len(testloader))
print(f"Epoch {epoch+1}/{epochs}.. "
f"Train loss: {running_loss/print_every:.3f}.. "
f"Test loss: {test_loss/len(testloader):.3f}.. "
f"Test accuracy: {accuracy/len(testloader):.3f}")
running_loss = 0
model.train()
This code works fine in Google Colaboratory but in my local machine(CPU), on execution of "model.forward(inputs)", it gives an error "Illegal instruction core dumped". I've tried updating my pytorch version, but the problem persists.
please try with: CPU:#-GPU (e.g.:0)
example
import torch
import torch
import pycuda.driver as cuda
cuda.init()
torch.cuda.is_available()
# True
## Get Id of default device
torch.cuda.current_device()
# 0
cuda.Device(0).name() # '0' is the id of your GPU
# Tesla K80
source: full ref at
https://medium.com/p/speed-up-your-algorithms-part-1-pytorch-56d8a4ae7051

PyTorch Getting Custom Loss Function Running

I'm trying to use a custom loss function by extending nn.Module, but I can't get past the error
element 0 of variables does not require grad and does not have a grad_fn
Note: my labels are lists of size: num_samples, but each batch will have the same labels throughout the batch, so we shrink labels for the whole batch to be a single label by calling .diag()
My code is as follows and is based on the transfer learning tutorial:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for data in dataloaders[phase]:
# get the inputs
inputs, labels = data
inputs = inputs.float()
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs = Variable(inputs)
labels = Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
#outputs = nn.functional.sigmoid(outputs).round()
_, preds = torch.max(outputs, 1)
label = labels.diag().float()
preds = preds.float()
loss = criterion(preds, label)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.data[0] * inputs.size(0)
running_corrects += torch.sum(pred == label.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
and my loss function is defined below:
class CustLoss(nn.Module):
def __init__(self):
super(CustLoss, self).__init__()
def forward(self, outputs, labels):
return cust_loss(outputs, labels)
def cust_loss(pred, targets):
'''preds are arrays of size classes with floats in them'''
'''targets are arrays of all the classes from the batch'''
'''we sum the classes from the batch and find the num correct'''
r = torch.sum(pred == targets)
return r
Then I run the following to run the model:
model_ft = models.resnet18(pretrained=True)
for param in model_ft.parameters():
param.requires_grad = False
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 3)
if use_gpu:
model_ft = model_ft.cuda()
criterion = CustLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.fc.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=25)
I tried getting it to work with other loss functions to no avail. I always get the same error when loss.backward() is called.
It was my understanding that I wouldn't need a custom implementation of loss.backward if I extend nn.Module.
You are subclassing nn.Module to define a function, in your case Loss function. So, when you compute loss.backward(), it tries to store the gradients in the loss itself, instead of the model and there is no variable in the loss for which to store the gradients. Your loss needs to be a function and not a module. See Extending autograd.
You have two options here -
The easiest one is to directly pass cust_loss function as criterion parameter to train_model.
You can extend torch.autograd.Function to define the custom loss (and if you wish, the backward function as well).
P.S. - It is mentioned that you need to implement the backward of the custom loss functions. This is not always the case. It is required only when your loss function is non-differentiable at some point. But, I do not think so that you’ll need to do that.

Resources