Find Training/Validation Accuracy & Loss of Faster-RCNN PyTorch model - pytorch

I am trying to find the training/validation accuracy and loss of my model for each epoch as I train it to find the best epoch to use from now on. I appreciate that there is lots of information on this now but this topic is very new to me, and I find it very difficult to find the right answer for my situation.
I assume that I need to add in one or two bits to the train_one_epoch() and evaluate() functions in order to do this?
My model setup is:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=models.detection.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.02, momentum=0.9, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20,40], gamma=0.1)
And my training function is:
epochs = 50
for epoch in range(epochs):
train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=20)
lr_scheduler.step()
evaluate(model, val_data_loader, device=device)
print("\n\n")
torch.save(model, f'./Models/trained_{ds}_model_Epoch{epochs}_LR0_02.pt')
I am using coco-like annotations, for example:
{'boxes': tensor([[316.9700, 242.5500, 464.1000, 442.1700], [ 39.2200, 172.6700, 169.8400, 430.9600]]), 'labels': tensor([2, 2]), 'image_id': tensor(1416), 'area': tensor([29370.1094, 33738.3789]), 'iscrowd': tensor([0, 0])}
The train_one_epoch and evaluate functions are from 'engine.py' from Torchvision.
It seems like using Tensorboard is a good tool to use, but I don't really know how to use it.
The engine.py is:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
header = f"Epoch: [{epoch}]"
lr_scheduler = None
if epoch == 0:
warmup_factor = 1.0 / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = torch.optim.lr_scheduler.LinearLR(
optimizer, start_factor=warmup_factor, total_iters=warmup_iters
)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
if scaler is not None:
scaler.scale(losses).backward()
scaler.step(optimizer)
scaler.update()
else:
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
The evaluate function is:
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = "Test:"
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator

Related

Why loss is not decreasing in a Siamese BERT-Network training (Entity matching task)

I'm trying to finetune a model for an entity matching task (kind of a sentence similarity task).
The idea is that if I give as input two sentences the model should output if they represent the same entity or not. I'm interested in the products' domain.
So for example:
sentences_left = ('logitech harmony 890 advanced universal remote control h890', 'sony silver digital voice recorder icdb600')
sentences_right = ('logitech harmony 890 advanced universal remote hdtv , tv , dvd player ( s ) , lighting , audio system 100 ft universal remote 966193-0403', 'canon black ef 70-300mm f/4 -5.6 is usm telephoto zoom lens 0345b002')
The output should be 1 for the first left-right pair of sentences and 0 for the second.
I want to test two approaches. The first is a sequence classification setup. So I take a pair of sentences, concat them with a [SEP] token in-between, encode it and feed it to BERT.
This approach kind of work, but I wanted to explore a second one that, in theory, should work too.
In few words, using mpnet as pre-trained language model I'm trying to implement this setup:
This is taken from the paper Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. The idea is to compute not only a single embedding as before, but two separate embeddings for each of the sentences. Then concatenate the embeddings and feeds them to a softmax classifier.
After lots of struggles I'm still unable to make it work, since the loss has no intention of decreasing. It starts at 0.25 and never goes up neither down.
I'm using the Abt-Buy, Amazon-Google and Walmart-Amazon datasets.
This is my model:
class FinalClassifier(nn.Module):
def __init__(self, pos_neg=None, frozen=False):
super(FinalClassifier, self).__init__()
use_cuda = torch.cuda.is_available()
self.device = torch.device("cuda" if use_cuda else "cpu")
self.encoder = AutoModel.from_pretrained(
'all-mpnet-base-v2')
if frozen:
for param in self.encoder.parameters():
param.requires_grad = False
self.tokenizer = AutoTokenizer.from_pretrained(
'all-mpnet-base-v2')
if pos_neg:
self.criterion = BCEWithLogitsLoss(pos_weight=torch.Tensor([pos_neg]))
self.linear = nn.Linear(3*768, 1)
self.relu = nn.ReLu()
def forward(self, texts_left, texts_right, labels=None):
encoded_inputs_left = self.tokenizer(texts_left, padding='max_length',
truncation=True, return_tensors='pt')
encoded_inputs_left = encoded_inputs_left.to(self.device)
output_left = self.encoder(**encoded_inputs_left)
output_left = _mean_pooling(output_left, encoded_inputs_left['attention_mask'])
# output_left = F.normalize(output_left, p=2, dim=1)
encoded_inputs_right = self.tokenizer(texts_right, padding='max_length',
truncation=True, return_tensors='pt')
encoded_inputs_right = encoded_inputs_right.to(self.device)
output_right = self.encoder(**encoded_inputs_right)
output_right = _mean_pooling(output_right, encoded_inputs_right['attention_mask'])
# output_right = F.normalize(output_right, p=2, dim=1)
# Look at sBERT paper (u, v, |u-v|)
pooled_output = torch.cat((output_left, output_right, torch.abs(output_left - output_right)), -1)
linear_output = self.linear(pooled_output)
relu_output = self.relu(linear_output)
labels = labels.to(self.device)
loss = self.criterion(linear_output.view(-1), labels.float())
return (loss, relu_output)
Here's the Dataset
class FinalDataset(torch.utils.data.Dataset):
def __init__(self, df):
self.labels = [int(label) for label in df['label']]
self.examples = df
def classes(self):
return self.labels
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
examples = self.examples.iloc[idx]
text_left = examples['text_left']
text_right = examples['text_right']
label = np.array(self.labels[idx])
return text_left, text_right, label
and finally the training loop
def train(model, train, val, learning_rate=1e-6, epochs=5, batch_size=8):
train_dataloader = torch.utils.data.DataLoader(train, batch_size=8, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val, batch_size=8)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
optimizer = Adam(model.parameters(), lr= learning_rate)
if use_cuda:
model = model.cuda()
for epoch_num in range(epochs):
total_loss_train = 0
tmp_loss = 0
step = 0
model.train()
for i, data in enumerate(tqdm(train_dataloader)):
left_batch, right_batch, labels = data
(batch_loss, _) = model(left_batch, right_batch, labels)
total_loss_train += batch_loss
tmp_loss += batch_loss
model.zero_grad()
batch_loss.backward()
optimizer.step()
# every 100 mini-batches
if i % 100 == 99:
print(f' Loss/train at epoch {epoch_num+1} (batch {i}): {tmp_loss/500}')
writer.add_scalar('Loss/train',
tmp_loss / 100,
epoch_num * len(train_dataloader) + i)
tmp_loss = 0
total_loss_val = 0
predictions = None
total_labels = None
step = 0
model.eval()
with torch.no_grad():
for i, data in enumerate(val_dataloader):
left_batch, right_batch, labels = data
(batch_loss, linear_output) = model(left_batch, right_batch, labels)
labels = labels.detach().cpu().numpy()
linear_output = linear_output.detach().cpu().numpy()
if predictions is None:
predictions = np.where(linear_output>0.5, 1, 0)
total_labels = labels
else:
predictions = np.append(predictions, np.where(linear_output>0.5, 1, 0), axis=0)
total_labels = np.append(total_labels, labels, axis=0)
total_loss_val += batch_loss.item()
tmp_loss += batch_loss.item()
# every 100 mini-batches
if i % 100 == 99:
print(f' Loss/val at epoch {epoch_num+1} (batch {i}): {tmp_loss/500}')
writer.add_scalar('Loss/val',
tmp_loss / 100,
epoch_num * len(val_dataloader) + i)
writer.add_scalar('F1/val',
f1_score(y_true=total_labels.flatten()[step:i], y_pred=predictions.flatten()[step:i]),
epoch_num * len(val_dataloader) + i)
tmp_loss = 0
step += 100
f1 = f1_score(y_true=total_labels.flatten(), y_pred=predictions.flatten())
report = classification_report(total_labels, predictions, zero_division=0)
# plot all the pr curves
for i in range(len([0, 1])):
add_pr_curve_tensorboard(i, predictions.flatten(), total_labels.flatten())
for name, p in model.named_parameters():
writer.add_histogram(name, p, bins='auto')
print(
f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train): .3f} \
| Val Loss: {total_loss_val / len(val): .3f} \
| Val F1: {f1: .3f}')
tqdm.write(report)
writer = SummaryWriter(log_dir=tensorboard_path)
EPOCHS = 5
LR = 1e-6
train_pos_neg_ratio = 9
model = FinalClassifier(train_pos_neg_ratio, frozen=False)
train_data, val_data = FinalDataset(df_train), FinalDataset(df_dev)
train(model, train_data, val_data, LR, EPOCHS)
writer.flush()
writer.close()
The issue is that the loss does NOT decrease, and the F1 accuracy as a result. I tried to normalize the outputs, add a dropout layer, analized the dataset to be sure that the problem wasn't there but now I ran out of ideas. An help would be extremely valuable.

mini-batch gradient decent bad accuracy/loss

I’m trying mini-batch gradient descent on the popular iris dataset, but somehow I don’t manage to get the accuracy of the model above 75-80%. Also, I’m not certain if I’m calculating the loss as well as the accuracy correctly. Any suggestions on how to improve my code or mistakes I’m doing are appreciated.
batch_size = 10
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
Training loop:
n_iters = 1000
steps = n_iters/10
LOSS = []
for epochs in range(n_iters):
for i,(inputs, labels) in enumerate(train_loader):
out = model(inputs)
train_labels = transform_label(labels)
l = loss(out, train_labels)
l.backward()
#update weights
optim.step()
optim.zero_grad()
LOSS.append(l.item())
if epochs%steps == 0:
print(f"\n epoch: {int(epochs+steps)}/{n_iters}, loss: {sum(LOSS)/len(LOSS)}")
#if i % 1 == 0:
#print(f" steps: {i+1}, loss : {l.item()}")
claculate accuracy:
def accuracy(model,test_loader):
sum_acc= 0
#map labels with 0,1,2
def transform_label(label_data):
data = []
for i in label_data:
if i == "Iris-setosa":
data.append(torch.tensor([0]))
if i == "Iris-versicolor":
data.append(torch.tensor([1]))
if i == "Iris-virginica":
data.append(torch.tensor([2]))
return torch.stack(data)
for i,(X_test, test_labels) in enumerate(test_loader):
test_labels = transform_label(test_labels)
x_label_pre = model(X_test)
_, x_label_pre_hat = torch.max(x_label_pre, 1)
idx = 0
number_pred = 0
while idx < len(X_test):
if x_label_pre_hat[idx].item() == test_labels[idx].item():
number_correct += 1
idx +=1
lr = 0.01
model = NeuralNetwork()
optim = torch.optim.Adam(model.parameters(), lr=lr)
#optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss = torch.nn.CrossEntropyLoss()
#loss = torch.nn.MSELoss()
#Weights are by default torch.32 not 64 --> error message
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_stack = nn.Sequential(
nn.Linear(4,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,3),
)
def forward(self, x):
logits = self.linear_stack(x)
return logits

Bigger batch size improves training by too much

I am writing a classifier that takes a surname and predicts a language it belongs to. I found that small batch sizes (256 and less) perform poorly compared to big batch sizes (2048 and more). Could someone give me some insight on why this is happening and how to fix it? Thank you.
Training code:
def indices_to_packed(names, input_size):
names = [F.one_hot(item, input_size).float() for item in names]
names_packed = pack_sequence(names, enforce_sorted=False)
return names_packed
def infer(model, data, labels, lengths, device):
data_packed = indices_to_packed(data, model.rnn.input_size)
data_packed, labels, lengths = data_packed.to(device), labels.to(device), lengths.to(device)
preds = model(data_packed, lengths)
loss = loss_fn(preds, labels)
return loss, preds
results = {}
epochs = 100
for BATCH_SIZE in [4096, 2048, 256]:
train_loader = data.DataLoader(train_data, BATCH_SIZE, sampler=train_sampler, collate_fn=partial(my_collate, input_size=input_size, output_size=output_size))
val_loader = data.DataLoader(val_data, BATCH_SIZE, sampler=val_sampler, collate_fn=partial(my_collate, input_size=input_size, output_size=output_size))
model = LSTM(input_size, HIDDEN_SIZE, NUM_LAYERS, DROPOUT, output_size)
optimizer = torch.optim.Adam(model.parameters())
model.to(device)
train_losses = []
val_losses = []
cur_losses = {}
duration = 0
for epoch in range(epochs):
start = time.time()
train_loss = 0
model.train()
# Using PackedSequence
for names, langs, lengths in train_loader:
optimizer.zero_grad()
loss, _ = infer(model, names, langs, lengths, device)
loss.backward()
optimizer.step()
train_loss += loss
train_loss /= len(train_data)
train_losses.append(train_loss.cpu().detach().numpy())
model.eval()
val_loss = 0
with torch.no_grad():
for names, langs, lengths in val_loader:
loss, _ = infer(model, names, langs, lengths, device)
val_loss += loss
val_loss /= len(val_data)
val_losses.append(val_loss.cpu().detach().numpy())
cur_duration = time.time() - start
duration += cur_duration
log_line = (f"BATCH_SIZE: {BATCH_SIZE} epoch: {epoch} train loss: "
f"{train_loss:.5f} val loss: {val_loss:.5f}")
print(log_line)
cur_losses["train_losses"] = train_losses
cur_losses["val_losses"] = val_losses
results[BATCH_SIZE] = {"losses" : cur_losses, "duration" : duration, "model": model}
Model:
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, dropout, output_size):
super().__init__()
self.rnn = nn.LSTM(input_size, hidden_size, num_layers, dropout=DROPOUT)
self.linear = nn.Linear(hidden_size, output_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, x, lengths):
lstm_out, _ = self.rnn(x)
# https://discuss.pytorch.org/t/get-each-sequences-last-item-from-packed-sequence/41118/7
sum_batch_sizes = torch.cat((
torch.zeros(2, dtype=torch.int64),
torch.cumsum(lstm_out.batch_sizes, 0)
))
sorted_lengths = lengths[lstm_out.sorted_indices]
last_seq_idxs = sum_batch_sizes[sorted_lengths] + torch.arange(lengths.size(0))
last_seq_items = lstm_out.data[last_seq_idxs]
lstm_last_out = last_seq_items[lstm_out.unsorted_indices]
linear_out = self.linear(lstm_last_out)
softmax_out = self.softmax(linear_out)
return softmax_out
Losses with different batch sizes:
It looks like there issue is how the loss is calculated.
train_loss += loss line accumulates the loss. When batch size is higher, there will be fewer steps to do. The code normalizes this by dividing by the length of train data, train_loss /= len(train_data), but should probably take into account the batch size: train_loss /= (len(train_data) / BATCH_SIZE).
The same for validation loss, but the effect is different probably because of smaller data size compared to training data.

Almost non-existent training accuracy and low test accuracy

I am really new to Machine Learning and I am not so well versed in coding in general. However there is need to look through the customers feedback at our store, that average quite a lot each year, yet we cannot tell % of positive, negative and neutral.
Currently I am trying to train a Bert Model to do simple multi labeled sentiment analysis. The input is our store's customers feedback. The customers feedback is not always so clearly defined since customers do tend to tell long and long about their experience and their sentiment is not always so clear. However we managed to get positive, negative and neutral, each set 2247 samples.
But when I try to train it the training accuracy is around 0.4% which is super low. Validation score is around 60%. F1-score is around 60% for each of the label. I wonder what can be done to improve this training accuracy. I have been stuck for a while. Please take a look at my codes and help me out with this.
I have tried changing learning rate (tried all learning rate Bert suggested and 1e-5),changing Max LEN, changing amount of EPOCH, changing drop out rate (0.1, 0.2, 0.3, 0.4, 0.5), but so far nothing yielded results.
#read dataset
df = pd.read_csv("data.csv",header=None, names=['content', 'sentiment'], sep='\;', lineterminator='\r',encoding = "ISO-8859-1",engine="python")
from sklearn.utils import shuffle
df = shuffle(df)
df['sentiment'] = df['sentiment'].replace(to_replace = [-1, 0, 1], value = [0, 1, 2])
df.head()
#Load pretrained FinBert model and get bert tokenizer from it
PRE_TRAINED_MODEL_NAME = 'TurkuNLP/bert-base-finnish-cased-v1'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
#Choose sequence Length
token_lens = []
for txt in df.content:
tokens = tokenizer.encode(txt, max_length=512)
token_lens.append(len(tokens))
sns.distplot(token_lens)
plt.xlim([0, 256]);
plt.xlabel('Token count');
MAX_LEN = 260
#Make a PyTorch dataset
class FIDataset(Dataset):
def __init__(self, texts, targets, tokenizer, max_len):
self.texts = texts
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'text': text,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
#split test and train
df_train, df_test = train_test_split(
df,
test_size=0.1,
random_state=RANDOM_SEED
)
df_val, df_test = train_test_split(
df_test,
test_size=0.5,
random_state=RANDOM_SEED
)
df_train.shape, df_val.shape, df_test.shape
#data loader function
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = FIDataset(
texts=df.content.to_numpy(),
targets=df.sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)
#Load data into train, test, val
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
# Sentiment Classifier based on Bert model just loaded
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.1)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
returned = self.bert(
input_ids=input_ids,
attention_mask=attention_mask
)
pooled_output = returned["pooler_output"]
output = self.drop(pooled_output)
return self.out(output)
#Create a Classifier instance and move to GPU
model = SentimentClassifier(3)
model = model.to(device)
#Optimize with AdamW
EPOCHS = 5
optimizer = AdamW(model.parameters(), lr= 2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)
#Train each Epoch function
def train_epoch(
model,
data_loader,
loss_fn,
optimizer,
device,
scheduler,
n_examples
):
model = model.train()
losses = []
correct_predictions = 0
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return correct_predictions.double() / n_examples, np.mean(losses)
#Eval model function
def eval_model(model, data_loader, loss_fn, device, n_examples):
model = model.eval()
losses = []
correct_predictions = 0
with torch.no_grad():
torch.cuda.empty_cache()
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
return correct_predictions.double() / n_examples, np.mean(losses)
#training loop through each epochs
import torch
torch.cuda.empty_cache()
history = defaultdict(list)
best_accuracy = 0
if __name__ == '__main__':
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(
model,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
len(df_train)
)
print(f'Train loss {train_loss} accuracy {train_acc}')
val_acc, val_loss = eval_model(
model,
val_data_loader,
loss_fn,
device,
len(df_val)
)
print(f'Val loss {val_loss} accuracy {val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_accuracy:
torch.save(model.state_dict(), 'best_model_state.bin')
best_accuracy = val_acc
-- Edit: I have printed out preds and targets as well as train and val accuracy
Here _, preds = torch.max(outputs, dim=1), you probably want argmax, not max?
Print out preds and targets to better see what's going on.
Edit after preds and targets printed out. For epochs 4 and 5, preds matches targets exactly, so train accuracy should be 1. I think the issue is that the accuracy is divided by n_examples, which is a number of examples in the whole train dataset, while it should be divided by the number of examples in the epoch.

Need help regarding Transfer Learning a Faster RCNN ResNet50FPN in PyTorch

I am new to PyTorch. I'm trying to use a pre-trained Faster RCNN torchvision.models.detection.fasterrcnn_resnet50_fpn() for object detection project. I have created a CustomDataset(Dataset) class to handle the custom dataset.
Here is the custom class implementation
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, landmarks = sample['image'], sample['meta_data']
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1))
return {'image': torch.from_numpy(image),
'meta_data': landmarks}
class CustomDataset(Dataset):
"""Custom Landmarks dataset."""
def __init__(self, data_dir, root_dir, transform=None):
"""
Args:
data_dir (string): Directory with all the labels(json).
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.data_dir = data_dir
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(os.listdir(self.data_dir))
def __getitem__(self, idx):
img_name = sorted(os.listdir(self.root_dir))[idx]
image = io.imread(self.root_dir+'/'+img_name, plugin='matplotlib')
json_file = sorted(os.listdir(self.data_dir))[idx]
with open(self.data_dir+'/'+json_file) as f:
meta_data = json.load(f)
meta_data = meta_data['annotation']['object']
sample = {'image': image, 'meta_data': meta_data}
to_tensor = ToTensor()
transformed_sample = to_tensor(sample)
if self.transform:
sample = self.transform(sample)
return transformed_sample
Here is the train_model function
def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=25):
since = time.time()
best_model = model
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'test']:
if phase == 'train':
optimizer = lr_scheduler(optimizer, epoch)
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
for data in dset_loaders[phase]:
# get the inputs
inputs, labels = data['image'], data['meta_data']
inputs= inputs.to(device) # ,
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs, labels)
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
running_corrects += torch.sum(preds == labels).item()
epoch_loss = running_loss / dset_sizes[phase]
epoch_acc = running_corrects / dset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'test' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model = copy.deepcopy(model)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
return best_model
While performing model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25) I am getting "RuntimeError: _thnn_upsample_bilinear2d_forward not supported on CUDAType for Byte"
It appears your datapoints are byte tensors, i.e type uint8. Try casting your data into float32
# Replace this
inputs = inputs.to(device)
# With this
inputs = inputs.float().to(device)
Note that the torchvision models expect data to be normalized in a specific way. Check here for the procedure, which basically entails using
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
for normalizing your data.

Resources