this is the code where I was working on Image Classification using Pytorch and I'm not able to get the accuracy right.
the accuracy is exceeding 100 ,can anyone help me to find the error.
def trained_model(criterion, optimizer, epochs=5):
epoch_loss = 0.0
epoch_accuracy = 0
running_loss = 0
running_accuracy = 0
total = 0
for epoch in range(epochs):
print('epoch : {}/{}'.format(epoch+1, epochs))
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
_, predictions = torch.max(outputs, dim=1)
loss.backward()
optimizer.step()
running_loss += loss.item()
running_accuracy += torch.sum(predictions == labels.data)
epoch_loss = running_loss / len(train_dataset)
epoch_accuracy = running_accuracy / len(train_dataset)
print('Loss:{:.4f} , Accuracy : {:.4f} '.format(epoch_loss, epoch_accuracy))
return model
You should probably use torch.argmax to get the class predictions from your model output, instead of torch.max.
Assuming you are working with indices as labels. Something like the following will get you the average accuracy of the current batch:
>>> outputs = torch.rand(16, 5)
>>> pred = torch.argmax(outputs, axis=0)
tensor([14, 11, 13, 15, 7])
>>> labels = torch.tensor([14, 6, 13, 5, 8])
>>> accuracy = (pred == labels).float().mean()
tensor(0.4000)
Related
So I'm studiying pytorch coming from a background with tensorflow.
I'm trying to replicate a simple convnet, that I've developed with success in tensorflow, to classify cat vs dogs images.
In pytorch I see some strange behaviors:
Using a Learning Rate of 0.001 make the CNet predicting only 0 after the first batch (might be exploding gradients?)
Using a Learning Rate of 0.0005 gives a smooth learning curve and the CNet converge
Can anyone help me to understand what I'm doing wrong? that the code:
import pathlib
import torch
import torch.nn.functional as F
import torchvision
from torch.utils.data.dataloader import DataLoader
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CNet(torch.nn.Module):
def __init__(self):
super(CNet, self).__init__() #input is 180x180 image
self.conv1 = torch.nn.Conv2d(3, 32, 3) # out -> 178x178x32
self.conv2 = torch.nn.Conv2d(32, 64, 3)
self.conv3 = torch.nn.Conv2d(64, 128, 3)
self.conv4 = torch.nn.Conv2d(128, 256, 3)
self.conv5 = torch.nn.Conv2d(256, 256, 3)
self.flatten = torch.nn.Flatten()
#self.fc = torch.nn.LazyLinear(1)
self.fc = torch.nn.Linear(7*7*256, 1)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv4(x)), (2, 2))
x = F.relu(self.conv5(x))
x = self.flatten(x)
o = torch.sigmoid(self.fc(x))
return o
def train(model : CNet, train_data : DataLoader, criterion, optimizer : torch.optim.Optimizer, epochs = 10, validation_data : DataLoader = None):
losses = []
for epoch in range(epochs):
epoch_loss = 0.0
running_loss = 0.0
for i, data in enumerate(train_data, 0):
imgs, labels = data
imgs, labels = imgs.to(device), labels.to(device, dtype=torch.float)
labels = labels.unsqueeze(-1)
# run
output = net(imgs)
# zero out accumulated grads
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
epoch_loss += loss.item()
#if i % 50 == 49:
# print(f'[{epoch+1}, {i:5d}] loss: {running_loss / 50.0:.3f}')
# running_loss = 0.0
losses.append(epoch_loss / len(train_data.dataset))
print(f'[{epoch+1}, {epochs:5d}] loss: {losses[-1]:.3f}')
return losses
if __name__=="__main__":
transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize((180, 180)),
torchvision.transforms.ToTensor(),
])
dataset_dir = pathlib.Path("E:\Datasets\\torch\Cat_Dog\cats_vs_dogs_small")
train_data = torchvision.datasets.ImageFolder(dataset_dir / "train", transform=transforms)
validation_data = torchvision.datasets.ImageFolder(dataset_dir / "validation", transform=transforms)
test_data = torchvision.datasets.ImageFolder(dataset_dir / "test", transform=transforms)
train_data_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2, persistent_workers=True, pin_memory=True)
validation_data_loader = DataLoader(validation_data, batch_size=32, num_workers=2, shuffle=True, pin_memory=True)
test_data_loader = DataLoader(test_data, batch_size=32, shuffle=True, pin_memory=True, num_workers=2)
import matplotlib.pyplot as plt
#plt.figure()
#for i in range(1, 10):
# plt.subplot(3, 3, i)
# plt.axis('off')
# rand_idx = np.random.random_integers(0, len(train_data))
# plt.imshow(np.moveaxis(test_data[rand_idx][0].numpy(), 0, 2))
#plt.show()
net = CNet()
net = net.to(device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.RMSprop(net.parameters(), 0.001)
net.train()
# TODO save best model
losses = train(net, train_data_loader, criterion, optimizer, epochs=30)
epochs = range(1, len(losses) + 1)
plt.plot(epochs, losses, 'bo', label='Training Loss')
plt.show()
print('Training Finished')
correct_count, all_count = 0, 0
for images,labels in test_data_loader:
images,labels = images.to(device), labels.to(device, dtype=torch.float)
with torch.no_grad():
ps = net(images)
pred_label = (ps > 0.5).to(torch.float)
true_label = labels.unsqueeze(1)
correct_count += (pred_label == true_label).sum().item()
all_count += len(labels)
print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))
and here some screenshot of the loss for each point:
LR=0.001 (not convering on pytorch, converging on tensorflow)
LR=0.0005 (converging in 30 epochs) [I know that the validation loss is not 0, accuracy is ~70% but is expected]
As you can see the loss on the two experiment are very different in scale. What might cause that such a weird behavior? I call it 'wierd' cause I never seen that happen on tensorflow.
Is typicall such different behavior between those 2 framework? or am I loosing something?
first of all, thanks for visiting my questions.
in multi label classification problem, i wonder if i measure accuracy correcyly.
the label data are one-hot encoded, and it shape (1000) e.g. (0, 1, 0, 0, .... 0, 1)
i used res50(in 3 gpus) for training, which implemented in pytorch.models
However, the accuracy of the model is higher than expected and the early epoch already outputs a high value.
am i measuring the accuracy of the model correctly?
codes below
`def train(log_interval, model, device, train_loader, criterion, optimizer, epoch):
model.train()
running_loss = 0
running_correct = 0
_iter = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
model.zero_grad()
#output = F.softmax(model(data), dim=1)
output = model(data)
loss = criterion(output, target.type(torch.cuda.LongTensor))
loss.backward()
optimizer.step()
pred = torch.sigmoid(output)
pred[pred >= 0.5]=1
pred[pred < 0.5]=0
running_loss += loss.item() * data.size(0)
running_correct += (pred == target).sum()/(target.size(0)*target.size(1))
_iter += 1
epoch_loss = running_loss / len(train_loader.dataset)
epoch_acc = running_correct / _iter
print('Epochs : {}, train loss : {:4f}, train acc : {:4f}'.format(epoch, epoch_loss, epoch_acc))`
I am working with the MNIST dataset and I have created the following network. I want to overfit the training data and I think I am doing that here. My training loss is lower than my validation loss. This is the code that I have come up with. Please look at it and let me know if I am overfitting the training data, if I am not then how do I go about it?
class NN(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(784,4096),
nn.ReLU(),
nn.Linear(4096,2048),
nn.ReLU(),
nn.Linear(2048,1024),
nn.ReLU(),
nn.Linear(1024,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,32),
nn.ReLU(),
nn.Linear(32,16),
nn.ReLU(),
nn.Linear(16,10))
def forward(self,x):
return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
total_correct = 0
total_loss = 0
total_examples = 0
n_batches = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
batch_loss = loss_function(outputs,labels)
n_batches += 1
total_loss += batch_loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_examples
mean_loss = total_loss / n_batches
return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 1e-7)
train_acc = []
val_acc = []
train_loss = []
val_loss = []
for epoch in range(100):
total_loss = 0
total_correct = 0
total_examples = 0
n_mini_batches = 0
for i,mini_batch in enumerate(trainloader,0):
images,labels = mini_batch
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
n_mini_batches += 1
total_loss += loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
epoch_training_accuracy = total_correct / total_examples
epoch_training_loss = total_loss / n_mini_batches
epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )
print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
%(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
train_loss.append( epoch_training_loss )
train_acc.append( epoch_training_accuracy )
val_loss.append( epoch_val_loss )
val_acc.append( epoch_val_accuracy )
history = { 'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc }
return ( history, model )
history1, net1 = define_and_train(model,dataset_training,dataset_test)
I am trying to overfit the training data so that later i can apply regularization and then reduce the overfitting which will give me a better understanding of the process
Although I won't attempt to provide a rigorous definition, the term "overfit" typically means that the training loss continues to decrease whereas the validation loss stays stagnant at a position higher than the training loss, or continues to increase with more iterations.
Therefore, it is difficult to know whether your network is overfitting solely based on your code alone. Since dense, fully-connected networks tend to overfit easily in the absence of dropout layers or other regularizers, my hunch would be that your network is indeed overfitting according to your intention. However, we would have to see your tensorboard logs or loss plot to determine whether the model is overfitting.
If you want to overfit your network to the dataset, I suggest that you construct a much larger model with more hidden layers. Overfitting occurs when the dataset is "too easy" for the model and it starts to remember the training set itself without learning generalizable patterns that can be applied to the validation set.
I am new to pytorch, and I am trying to train my model (CNN), using the following code:
The program runs fine, but it does not display this Epoch/Step/Loss/Accuracy part:
print(‘Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%’
as if (i+1) % 100 == 0: never turns to 0
Training part:
iter = 0
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(dataloaders['train']):
images = Variable(images)
labels = Variable(labels)
# Clear the gradients
optimizer.zero_grad()
# Forward propagation
outputs = model(images)
# Calculating loss with softmax to obtain cross entropy loss
loss = criterion(outputs, labels)
# Backward prop
loss.backward()
# Updating gradients
optimizer.step()
iter += 1
# Total number of labels
total = labels.size(0)
# Obtaining predictions from max value
_, predicted = torch.max(outputs.data, 1)
# Calculate the number of correct answers
correct = (predicted == labels).sum().item()
# Print loss and accuracy
if (i+1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, i + 1, len(dataloaders['train']), loss.item(),
(correct / total) * 100))
Full Code:
https://pastebin.com/dshNmhRL
I tried to categorize the image of the skin. The following code is used to train the data, but I will encounter problems like the title when I train halfway.
use
python: 3.7
pytorch: 1.1
system: win10
start_epoch=0
for epoch in range(num_epochs):
print('Starting train epoch %d / %d' % (start_epoch + epoch + 1, num_epochs))
model = model.to(device)
print(device)
#model.train()
running_loss = 0
count = 0
epoch_loss = 0
#for i, (input, depth) in enumerate(train_loader):
for step,(input, depth) in enumerate(train_loader):
# input, depth = data
input = input.to(device)
depth = depth.to(device)
input_var=torch.tensor(input)
depth_var=torch.tensor(depth).squeeze(1)
#input_tensor = input_var.to(device)
#depth_tensor = depth_var.to(device)
output = model.forward(input_var)
#new_output = output.to(device)
#result = output.type(torch.FloatTensor)
loss = loss_fn(output, depth_var)
#loss = loss.type(torch.FloatTensor)
print('count: ',count,' loss:', loss.item())
count += 1
running_loss += loss.data.cpu().numpy()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if count%100==0:
torch.save(model,"./pkl/cifar.pkl")