vgg pytorch is probability distribution supposed to add up to 1? - conv-neural-network

I've trained a vgg16 model to predict 102 classes of flowers.
It works however now that I'm trying to understand one of it's predictions I feel it's not acting normally.
model layout
# Imports here
import os
import numpy as np
import torch
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import json
from pprint import pprint
from scipy import misc
%matplotlib inline
data_dir = 'flower_data'
train_dir = data_dir + '/train'
test_dir = data_dir + '/valid'
json_data=open('cat_to_name.json').read()
main_classes = json.loads(json_data)
main_classes = {int(k):v for k,v in classes.items()}
train_transform_2 = transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomRotation(30),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()])
test_transform_2= transforms.Compose([transforms.RandomResizedCrop(224),
transforms.ToTensor()])
# TODO: Load the datasets with ImageFolder
train_data = datasets.ImageFolder(train_dir, transform=train_transform_2)
test_data = datasets.ImageFolder(test_dir, transform=test_transform_2)
# define dataloader parameters
batch_size = 20
num_workers=0
# TODO: Using the image datasets and the trainforms, define the dataloaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
num_workers=num_workers, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
num_workers=num_workers, shuffle=True)
vgg16 = models.vgg16(pretrained=True)
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
param.requires_grad = False
import torch.nn as nn
n_inputs = vgg16.classifier[6].in_features
# add last linear layer (n_inputs -> 102 flower classes)
# new layers automatically have requires_grad = True
last_layer = nn.Linear(n_inputs, len(classes))
vgg16.classifier[6] = last_layer
import torch.optim as optim
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()
# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001)
pre_trained_model=torch.load("model.pt")
new=list(pre_trained_model.items())
my_model_kvpair=vgg16.state_dict()
count=0
for key,value in my_model_kvpair.items():
layer_name, weights = new[count]
my_model_kvpair[key] = weights
count+=1
# number of epochs to train the model
n_epochs = 6
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf # set initial "min" to infinity
for epoch in range(1, n_epochs+1):
# keep track of training and validation loss
train_loss = 0.0
valid_loss = 0.0
###################
# train the model #
###################
# model by default is set to train
vgg16.train()
for batch_i, (data, target) in enumerate(train_loader):
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = vgg16(data)
# calculate the batch loss
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update training loss
train_loss += loss.item()
if batch_i % 20 == 19: # print training loss every specified number of mini-batches
print('Epoch %d, Batch %d loss: %.16f' %
(epoch, batch_i + 1, train_loss / 20))
train_loss = 0.0
######################
# validate the model #
######################
vgg16.eval() # prep model for evaluation
for data, target in test_loader:
# forward pass: compute predicted outputs by passing inputs to the model
output = vgg16(data)
# calculate the loss
loss = criterion(output, target)
# update running validation loss
valid_loss += loss.item()
# print training/validation statistics
# calculate average loss over an epoch
train_loss = train_loss/len(train_loader.dataset)
valid_loss = valid_loss/len(test_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch+1,
train_loss,
valid_loss
))
# save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
valid_loss))
torch.save(vgg16.state_dict(), 'model.pt')
valid_loss_min = valid_loss
testing on a single image
tensor = torch.from_numpy(test_image)
reshaped = tensor.permute(2, 0, 1).unsqueeze(0)
floatified = reshaped.to(torch.float32) / 255
vgg16(floatified)
>>>
tensor([[ 2.5686, -1.1964, -0.0872, -1.7010, -1.6669, -1.0638, 0.4515, 0.1124,
0.0166, 0.3156, 1.1699, 1.5374, 1.8720, 2.5184, 2.9046, -0.8241,
-1.1949, -0.5700, 0.8692, -1.0485, 0.0390, -1.3783, -3.4632, -0.0143,
1.0986, 0.2667, -1.1127, -0.8515, 0.7759, -0.7528, 1.6366, -0.1170,
-0.4983, -2.6970, 0.7545, 0.0188, 0.1094, 0.5002, 0.8838, -0.0006,
-1.7993, -1.3706, 0.4964, -0.3251, -1.7313, 1.8731, 2.4963, 1.1713,
-1.5726, 1.5476, 3.9576, 0.7388, 0.0228, 0.3947, -1.7237, -1.8350,
-2.0297, 1.4088, -1.3469, 1.6128, -1.0851, 2.0257, 0.5881, 0.7498,
0.0738, 2.0592, 1.8034, -0.5468, 1.9512, 0.4534, 0.7746, -1.0465,
-0.7254, 0.3333, -1.6506, -0.4242, 1.9529, -0.4542, 0.2396, -1.6804,
-2.7987, -0.6367, -0.3599, 1.0102, 2.6319, 0.8305, -1.4333, 3.3043,
-0.4021, -0.4877, 0.9125, 0.0607, -1.0326, 1.3186, -2.5861, 0.1211,
-2.3177, -1.5040, 1.0416, 1.4008, 1.4225, -2.7291]],
grad_fn=<ThAddmmBackward>)
sum([ 2.5686, -1.1964, -0.0872, -1.7010, -1.6669, -1.0638, 0.4515, 0.1124,
0.0166, 0.3156, 1.1699, 1.5374, 1.8720, 2.5184, 2.9046, -0.8241,
-1.1949, -0.5700, 0.8692, -1.0485, 0.0390, -1.3783, -3.4632, -0.0143,
1.0986, 0.2667, -1.1127, -0.8515, 0.7759, -0.7528, 1.6366, -0.1170,
-0.4983, -2.6970, 0.7545, 0.0188, 0.1094, 0.5002, 0.8838, -0.0006,
-1.7993, -1.3706, 0.4964, -0.3251, -1.7313, 1.8731, 2.4963, 1.1713,
-1.5726, 1.5476, 3.9576, 0.7388, 0.0228, 0.3947, -1.7237, -1.8350,
-2.0297, 1.4088, -1.3469, 1.6128, -1.0851, 2.0257, 0.5881, 0.7498,
0.0738, 2.0592, 1.8034, -0.5468, 1.9512, 0.4534, 0.7746, -1.0465,
-0.7254, 0.3333, -1.6506, -0.4242, 1.9529, -0.4542, 0.2396, -1.6804,
-2.7987, -0.6367, -0.3599, 1.0102, 2.6319, 0.8305, -1.4333, 3.3043,
-0.4021, -0.4877, 0.9125, 0.0607, -1.0326, 1.3186, -2.5861, 0.1211,
-2.3177, -1.5040, 1.0416, 1.4008, 1.4225, -2.7291])
>>>
5.325799999999998
given this as how I test it on a single image (and the model as usual is trained and tested on batches it returns a prediction matrix that doesn't seem to be normalized or add up to 1.
Is this normal?

I cannot tell with certainty without seeing your training code, but it's most likely your model was trained with cross-entropy loss and as such it outputs logits rather than class probabilities. You can turn them into proper probabilities by applying the softmax function.

Related

How to calculate the f1-score?

I have a pyTorch-code to train a model that should be able to detect placeholder-images among product-images. I didn't write the code by myself as I am very unexperienced with CNNs and Machine Learning.
My boss told me to calculate the f1-score for that model and i found out that the formula for that is ((precision * recall)/(precision + recall)) but I don't know how I get precision and recall. Is someone able to tell me how I can get those two parameters from that following code?
(Sorry for the long piece of code, but I didn't really know what is necessary and what isn't)
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
data_dir = "data"
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"
# Number of classes in the dataset [we have four classes A-Balik-Duz-Princess]
num_classes = 2
# Batch size for training (change depending on how much memory you have)
batch_size = 25
# Number of epochs to train for (This will need to be calculated in order to address under and over fitting issue)
num_epochs = 20
# Flag for feature extracting. When False, we fine tune the whole model,
# when True we only update the reshaped layer params
feature_extract = True
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
since = time.time()
print("model is : ",model)
val_acc_history = []
val_loss_history = []
train_acc_history = []
train_loss_history = []
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients (This can be changed to the Adam and other optimizers)
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# Special case for inception because in training it has an auxiliary output. In train
# mode we calculate the loss by summing the final output and the auxiliary output
# but in testing we only consider the final output.
if is_inception and phase == 'train':
# From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 + 0.4*loss2
else:
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs, 1)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == 'val':
val_acc_history.append(epoch_acc)
val_loss_history.append(epoch_loss)
if phase == 'train':
train_acc_history.append(epoch_acc)
train_loss_history.append(epoch_loss)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model, val_acc_history, train_acc_history,val_loss_history,train_loss_history
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
###############################################
### Initialize and Reshape the Networks
###############################################
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
# Initialize these variables which will be set in this if statement. Each of these
# variables is model specific.
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet18
"""
model_ft = models.resnet152(pretrained=use_pretrained)
#we can select any possible variation of ResNet such as Resnet18, Resnet34, Resnet50, Resnet101, and Resnet152
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "alexnet":
""" Alexnet
"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "vgg":
""" VGG11_bn
"""
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "squeezenet":
""" Squeezenet
"""
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
model_ft.num_classes = num_classes
input_size = 224
elif model_name == "densenet":
""" Densenet
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
Be careful, expects (299,299) sized images and has auxiliary output
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,num_classes)
input_size = 299
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
# Print the model we just instantiated
#print(model_ft)
########################
### LOAD DATA
########################
# Data augmentation and normalization for training
# there are multiple approaches for data augmentation which can be added in the future
# Just normalization for validation
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(input_size),
#transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(input_size),
transforms.CenterCrop(input_size),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
print("Initializing Datasets and Dataloaders...")
# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#############################
### Create the Optimizer
#############################
# Send the model to GPU
model_ft = model_ft.to(device)
# Gather the parameters to be optimized/updated in this run. If we are
# fine tuning we will be updating all parameters. However, if we are
# doing feature extract method, we will only update the parameters
# that we have just initialized, i.e. the parameters with requires_grad
# is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
params_to_update = []
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
params_to_update.append(param)
print("\t",name)
else:
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
print("\t",name)
# Observe that all parameters are being optimized we can add leaky ReLU and much more
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
###########################
### Run Training and Validation Step
###########################
%time
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()
# Train and evaluate
model_ft, hist, loss_t,vloss_acc, tloss_acc = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))
...
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
# Add these lines to obtain f1_score
from sklearn.metrics import f1_score
f1_score = f1_score(labels.data, preds)
#or: f1_score = f1_score(labels.cpu().data, preds.cpu())
...
You can use sklearn to calculate f1_score
from sklearn.metrics import f1_score
X, y = get_data(...)
y_pred = model.predict(X)
f1_score(y, y_pred)
I think it's better to call f1-score with macro/micro.
from sklearn.metrics import f1_score
print('F1-Score macro: ',f1_score(outputs, labels, average='macro'))
print('F1-Score micro: ',f1_score(outputs, labels, average='micro'))
The key difference between micro and macro F1 score is their behavior on imbalanced datasets. Micro F1 score often doesn't return an objective measure of model performance when the classes are imbalanced, whilst Macro F1 score is able to do so.Read More

TensorFlow 2.0 GradientTape with EarlyStopping

I am using Python 3.7.5 and TensorFlow 2.0's 'GradientTape' API for classification of MNIST dataset using 300 100 dense fully connected architecture. I would like to use TensorFlow's 'EarlyStopping' with GradientTape() so that the training stops according to the variable being watched or monitored and according to patience parameters.
The code I have is below:
# Use tf.data to batch and shuffle the dataset
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(100).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)
# Choose an optimizer and loss function for training-
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(lr = 0.001)
def create_nn_gradienttape():
"""
Function to create neural network for use
with GradientTape API following MNIST
300 100 architecture
"""
model = Sequential()
model.add(
Dense(
units = 300, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal,
input_shape = (784,)
)
)
model.add(
Dense(
units = 100, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal
)
)
model.add(
Dense(
units = 10, activation = 'softmax'
)
)
return model
# Instantiate the model to be trained using GradientTape-
model = create_nn_gradienttape()
# Select metrics to measure the error & accuracy of model.
# These metrics accumulate the values over epochs and then
# print the overall result-
train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
# Use tf.GradientTape to train the model-
#tf.function
def train_step(data, labels):
"""
Function to perform one step of Gradient
Descent optimization
"""
with tf.GradientTape() as tape:
# 'training=True' is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
# predictions = model(data, training=True)
predictions = model(data)
loss = loss_fn(labels, predictions)
# 'gradients' is a list variable!
gradients = tape.gradient(loss, model.trainable_variables)
# IMPORTANT:
# Multiply mask with computed gradients-
# List to hold element-wise multiplication between-
# computed gradient and masks-
grad_mask_mul = []
# Perform element-wise multiplication between computed gradients and masks-
for grad_layer, mask in zip(gradients, mask_model_stripped.trainable_weights):
grad_mask_mul.append(tf.math.multiply(grad_layer, mask))
# optimizer.apply_gradients(zip(gradients, model.trainable_variables))
optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(data, labels):
"""
Function to test model performance
on testing dataset
"""
# training=False is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
predictions = model(data)
t_loss = loss_fn(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 15
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for x, y in train_ds:
train_step(x, y)
for x_t, y_t in test_ds:
test_step(x_t, y_t)
template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'
print(template.format(epoch + 1,
train_loss.result(), train_accuracy.result()*100,
test_loss.result(), test_accuracy.result()*100))
# Count number of non-zero parameters in each layer and in total-
# print("layer-wise manner model, number of nonzero parameters in each layer are: \n")
model_sum_params = 0
for layer in model.trainable_weights:
# print(tf.math.count_nonzero(layer, axis = None).numpy())
model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()
print("Total number of trainable parameters = {0}\n".format(model_sum_params))
In the code above, How can I use 'tf.keras.callbacks.EarlyStopping' with GradientTape() API ?
Thanks!

vgg probability doesn't add up to 1, pytorch

I've trained a vgg16 model to predict 102 classes of flowers.
It works however now that I'm trying to understand one of it's predictions I feel it's not acting normally.
model layout
# Imports here
import os
import numpy as np
import torch
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import json
from pprint import pprint
from scipy import misc
%matplotlib inline
data_dir = 'flower_data'
train_dir = data_dir + '/train'
test_dir = data_dir + '/valid'
json_data=open('cat_to_name.json').read()
main_classes = json.loads(json_data)
main_classes = {int(k):v for k,v in classes.items()}
train_transform_2 = transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomRotation(30),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()])
test_transform_2= transforms.Compose([transforms.RandomResizedCrop(224),
transforms.ToTensor()])
# TODO: Load the datasets with ImageFolder
train_data = datasets.ImageFolder(train_dir, transform=train_transform_2)
test_data = datasets.ImageFolder(test_dir, transform=test_transform_2)
# define dataloader parameters
batch_size = 20
num_workers=0
# TODO: Using the image datasets and the trainforms, define the dataloaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
num_workers=num_workers, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
num_workers=num_workers, shuffle=True)
vgg16 = models.vgg16(pretrained=True)
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
param.requires_grad = False
import torch.nn as nn
n_inputs = vgg16.classifier[6].in_features
# add last linear layer (n_inputs -> 102 flower classes)
# new layers automatically have requires_grad = True
last_layer = nn.Linear(n_inputs, len(classes))
vgg16.classifier[6] = last_layer
import torch.optim as optim
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()
# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001)
pre_trained_model=torch.load("model.pt")
new=list(pre_trained_model.items())
my_model_kvpair=vgg16.state_dict()
count=0
for key,value in my_model_kvpair.items():
layer_name, weights = new[count]
my_model_kvpair[key] = weights
count+=1
# number of epochs to train the model
n_epochs = 6
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf # set initial "min" to infinity
for epoch in range(1, n_epochs+1):
# keep track of training and validation loss
train_loss = 0.0
valid_loss = 0.0
###################
# train the model #
###################
# model by default is set to train
vgg16.train()
for batch_i, (data, target) in enumerate(train_loader):
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = vgg16(data)
# calculate the batch loss
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update training loss
train_loss += loss.item()
if batch_i % 20 == 19: # print training loss every specified number of mini-batches
print('Epoch %d, Batch %d loss: %.16f' %
(epoch, batch_i + 1, train_loss / 20))
train_loss = 0.0
######################
# validate the model #
######################
vgg16.eval() # prep model for evaluation
for data, target in test_loader:
# forward pass: compute predicted outputs by passing inputs to the model
output = vgg16(data)
# calculate the loss
loss = criterion(output, target)
# update running validation loss
valid_loss += loss.item()
# print training/validation statistics
# calculate average loss over an epoch
train_loss = train_loss/len(train_loader.dataset)
valid_loss = valid_loss/len(test_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch+1,
train_loss,
valid_loss
))
# save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
valid_loss))
torch.save(vgg16.state_dict(), 'model.pt')
valid_loss_min = valid_loss
testing on a single image
tensor = torch.from_numpy(test_image)
reshaped = tensor.permute(2, 0, 1).unsqueeze(0)
floatified = reshaped.to(torch.float32) / 255
vgg16(floatified)
>>>
tensor([[ 2.5686, -1.1964, -0.0872, -1.7010, -1.6669, -1.0638, 0.4515, 0.1124,
0.0166, 0.3156, 1.1699, 1.5374, 1.8720, 2.5184, 2.9046, -0.8241,
-1.1949, -0.5700, 0.8692, -1.0485, 0.0390, -1.3783, -3.4632, -0.0143,
1.0986, 0.2667, -1.1127, -0.8515, 0.7759, -0.7528, 1.6366, -0.1170,
-0.4983, -2.6970, 0.7545, 0.0188, 0.1094, 0.5002, 0.8838, -0.0006,
-1.7993, -1.3706, 0.4964, -0.3251, -1.7313, 1.8731, 2.4963, 1.1713,
-1.5726, 1.5476, 3.9576, 0.7388, 0.0228, 0.3947, -1.7237, -1.8350,
-2.0297, 1.4088, -1.3469, 1.6128, -1.0851, 2.0257, 0.5881, 0.7498,
0.0738, 2.0592, 1.8034, -0.5468, 1.9512, 0.4534, 0.7746, -1.0465,
-0.7254, 0.3333, -1.6506, -0.4242, 1.9529, -0.4542, 0.2396, -1.6804,
-2.7987, -0.6367, -0.3599, 1.0102, 2.6319, 0.8305, -1.4333, 3.3043,
-0.4021, -0.4877, 0.9125, 0.0607, -1.0326, 1.3186, -2.5861, 0.1211,
-2.3177, -1.5040, 1.0416, 1.4008, 1.4225, -2.7291]],
grad_fn=<ThAddmmBackward>)
sum([ 2.5686, -1.1964, -0.0872, -1.7010, -1.6669, -1.0638, 0.4515, 0.1124,
0.0166, 0.3156, 1.1699, 1.5374, 1.8720, 2.5184, 2.9046, -0.8241,
-1.1949, -0.5700, 0.8692, -1.0485, 0.0390, -1.3783, -3.4632, -0.0143,
1.0986, 0.2667, -1.1127, -0.8515, 0.7759, -0.7528, 1.6366, -0.1170,
-0.4983, -2.6970, 0.7545, 0.0188, 0.1094, 0.5002, 0.8838, -0.0006,
-1.7993, -1.3706, 0.4964, -0.3251, -1.7313, 1.8731, 2.4963, 1.1713,
-1.5726, 1.5476, 3.9576, 0.7388, 0.0228, 0.3947, -1.7237, -1.8350,
-2.0297, 1.4088, -1.3469, 1.6128, -1.0851, 2.0257, 0.5881, 0.7498,
0.0738, 2.0592, 1.8034, -0.5468, 1.9512, 0.4534, 0.7746, -1.0465,
-0.7254, 0.3333, -1.6506, -0.4242, 1.9529, -0.4542, 0.2396, -1.6804,
-2.7987, -0.6367, -0.3599, 1.0102, 2.6319, 0.8305, -1.4333, 3.3043,
-0.4021, -0.4877, 0.9125, 0.0607, -1.0326, 1.3186, -2.5861, 0.1211,
-2.3177, -1.5040, 1.0416, 1.4008, 1.4225, -2.7291])
>>>
5.325799999999998
given this as how I test it on a single image (and the model as usual is trained and tested on batches it returns a prediction matrix that doesn't seem to be normalized or add up to 1.
Is this normal?
Yes, official network implementations in PyTorch don't apply softmax to the last linear layer. Check the code for VGG. You can use nn.softmax to achieve what you want:
m = nn.Softmax()
out = vgg16(floatified)
out = m(out)
You can also use nn.functional.softmax:
out = nn.functional.softmax(vgg16(floatified))

InvalidArgumentError Sentiment analyser with keras

I have built a sentiment analyzer using Keras as a binary classification problem. I am using the Imdb dataset using GRU.
My code is:
# coding=utf-8
# ==========
# MODEL
# ==========
# imports
from __future__ import print_function
from timeit import default_timer as timer
from datetime import timedelta
from keras.models import Sequential
from keras.preprocessing import sequence
from keras import regularizers
from keras.layers import Dense, Embedding
from keras.layers import GRU, LeakyReLU, Bidirectional
from keras.datasets import imdb
#start a timer
start = timer()
# Hyperparameters
Model_Name = 'my_model.h5'
vocab_size = 5000
maxlen = 1000
batch_size = 512
hidden_layer_size = 2
test_split = 0.3
dropout = 0.1
num_epochs = 1
alpha = 0.2
validation_split = 0.25
l1 = 0.01
l2 = 0.01
# Dataset loading
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz",
maxlen=maxlen)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
# Data preprocessing
# Sequence padding
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
# Network building
print('Build model...')
model = Sequential()
model.add(Embedding(vocab_size, hidden_layer_size))
model.add(Bidirectional(GRU(hidden_layer_size, kernel_initializer='uniform', kernel_regularizer=regularizers.l1_l2(l1=l1,l2=l2), dropout=dropout, recurrent_dropout=dropout,return_sequences=True)))
model.add(LeakyReLU())
model.add(Bidirectional(GRU(hidden_layer_size, kernel_initializer='uniform', dropout=dropout, kernel_regularizer=regularizers.l1_l2(l1=l1,l2=l2), recurrent_dropout=dropout)))
model.add(LeakyReLU())
model.add(Dense(1, activation='softmax', kernel_initializer='uniform', kernel_regularizer=regularizers.l1_l2(l1=l1,l2=l2)))
# Compile my model
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print('Train...')
# Fit the model
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_split=validation_split)
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1)
# Create a summary, a plot and print the scores of the model
model.summary()
print('Test score:', score)
print('Test accuracy:', acc)
# Save model architecture, weights, training configuration (loss,optimizer),
# and also the state of the optimizer, so you can resume where you stopped
model.save(Model_Name)
end = timer()
print('Running time: ' + str(timedelta(seconds=(end - start))) + ' in Hours:Minutes:Seconds')
I keep receiving an Error message which I don't completely understand:
InvalidArgumentError (see above for traceback): indices[502,665] = 5476 is not in [0, 5000)
[[Node: embedding_1/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device=/job:localhost/replica:0/task:0/device:CPU:0](embedding_1/embeddings/read, embedding_1/Cast)]]
Can anyone help me understand what causes this error and how to solve it?
The error complains about a non-existent word index. That's because you are only limiting the number of Emedding features (i.e. there is a word with index 5476 which is not in the range [0, 5000), which 5000 refers to the vocab_size you have set). To resolve this, you also need to pass the vocab_size as num_words argument of load_data function, like this:
... = imdb.load_data(num_words=vocab_size, ...)
This way you are limiting the words to the most frequent words (i.e. top vocab_size words with the most frequency in the dataset) with their indices in range [0, vocab_size).

PyTorch: Add validation error in training

I am using PyTorch to train a cnn model. Here is my Network architecture:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as I
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 5)
self.pool = nn.MaxPool2d(2,2)
self.conv1_bn = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 5)
self.conv2_drop = nn.Dropout2d()
self.conv2_bn = nn.BatchNorm2d(64)
self.fc1 = torch.nn.Linear(53*53*64, 256)
self.fc2 = nn.Linear(256, 136)
def forward(self, x):
x = F.relu(self.conv1_bn(self.pool(self.conv1(x))))
x = F.relu(self.conv2_bn(self.pool(self.conv2_drop(self.conv2(x)))))
x = x.view(-1, 53*53*64)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return x
Then I train the model like below:
# prepare the net for training
net.train()
for epoch in range(n_epochs): # loop over the dataset multiple times
running_loss = 0.0
# train on batches of data, assumes you already have train_loader
for batch_i, data in enumerate(train_loader):
# get the input images and their corresponding labels
images = data['image']
key_pts = data['keypoints']
# flatten pts
key_pts = key_pts.view(key_pts.size(0), -1)
# wrap them in a torch Variable
images, key_pts = Variable(images), Variable(key_pts)
# convert variables to floats for regression loss
key_pts = key_pts.type(torch.FloatTensor)
images = images.type(torch.FloatTensor)
# forward pass to get outputs
output_pts = net(images)
# calculate the loss between predicted and target keypoints
loss = criterion(output_pts, key_pts)
# zero the parameter (weight) gradients
optimizer.zero_grad()
# backward pass to calculate the weight gradients
loss.backward()
# update the weights
optimizer.step()
# print loss statistics
running_loss += loss.data[0]
I am wondering if it is possible to add the validation error in the training? I mean something like this (validation split) in Keras:
myModel.fit(trainX, trainY, epochs=50, batch_size=1, verbose=2, validation_split = 0.1)
Here is an example how to split your dataset for training and validation, then switch between the two phases every epoch:
import numpy as np
import torch
from torchvision import datasets
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
# Examples:
my_dataset = datasets.MNIST(root="/home/benjamin/datasets/mnist", train=True, download=True)
validation_split = 0.1
dataset_len = len(my_dataset)
indices = list(range(dataset_len))
# Randomly splitting indices:
val_len = int(np.floor(validation_split * dataset_len))
validation_idx = np.random.choice(indices, size=val_len, replace=False)
train_idx = list(set(indices) - set(validation_idx))
# Contiguous split
# train_idx, validation_idx = indices[split:], indices[:split]
## Defining the samplers for each phase based on the random indices:
train_sampler = SubsetRandomSampler(train_idx)
validation_sampler = SubsetRandomSampler(validation_idx)
train_loader = torch.utils.data.DataLoader(my_dataset, sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(my_dataset, sampler=validation_sampler)
data_loaders = {"train": train_loader, "val": validation_loader}
data_lengths = {"train": len(train_idx), "val": val_len}
# Training with Validation (your code + code from Pytorch tutorial: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)
n_epochs = 40
net = ...
for epoch in range(n_epochs):
print('Epoch {}/{}'.format(epoch, n_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
optimizer = scheduler(optimizer, epoch)
net.train(True) # Set model to training mode
else:
net.train(False) # Set model to evaluate mode
running_loss = 0.0
# Iterate over data.
for data in data_loaders[phase]:
# get the input images and their corresponding labels
images = data['image']
key_pts = data['keypoints']
# flatten pts
key_pts = key_pts.view(key_pts.size(0), -1)
# wrap them in a torch Variable
images, key_pts = Variable(images), Variable(key_pts)
# convert variables to floats for regression loss
key_pts = key_pts.type(torch.FloatTensor)
images = images.type(torch.FloatTensor)
# forward pass to get outputs
output_pts = net(images)
# calculate the loss between predicted and target keypoints
loss = criterion(output_pts, key_pts)
# zero the parameter (weight) gradients
optimizer.zero_grad()
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
# update the weights
optimizer.step()
# print loss statistics
running_loss += loss.data[0]
epoch_loss = running_loss / data_lengths[phase]
print('{} Loss: {:.4f}'.format(phase, epoch_loss))

Resources