Feature extraction Using Pretrained Models - pytorch

So, I want to use the pretrained models to feature extract features from images, so I used “resnet50 , incepton_v3, Xception, inception_resnet” models, removed the classifier or FC depends on the model architecture, as some models have model.fc and other have model.classifier and other have model.classi , then I concatenated the features and trained the concatenated features in a deep neural network, but I got validation accuracy 1% only, although when I did the same network in Tensorflow I got accuracy of 95%, so can someone clarify what I did wrong in my pytorch notebook?
model_resnet = models.resnet50(pretrained=True).cuda()
model_resnet.fc=nn.Sequential()
model_incep = models.inception_v3 (pretrained=True).cuda()
model_incep.fc=nn.Sequential()
model_xcep = timm.create_model(‘xception’, pretrained=True).cuda()
model_xcep.fc=nn.Sequential()
model_incep_res = timm.create_model(‘ens_adv_inception_resnet_v2’, pretrained=True).cuda()
model_incep_res.classif=nn.Sequential()
def features_extract(loader, model,features):
num_correct = 0
num_samples = 0
model.eval()
i=0
batchsize=64
score=torch.empty((10222,features))
with torch.no_grad():
for x, y in iter(loader):
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
score[i*64:(i+1)*64]=scores
i+=1
model.train()
return score
score_res=features_extract(trainloader,model_resnet,2048)
score_incep=features_extract(trainloader,model_incep,2048)
score_incep_res=features_extract(trainloader,model_incep_res,1536)
score_xcep=features_extract(trainloader,model_xcep,2048)
features_extracted=torch.cat([score_res,score_incep,score_incep_res,score_xcep],axis=1)
from collections import OrderedDict
my_model=nn.Sequential(OrderedDict([(‘fc1’, nn.Linear(7680, 5000)),
(‘drop’, nn.Dropout(p=0.5)),
(‘fc2’, nn.Linear(5000, 120)),
(‘output’, nn.Softmax(dim=0))])).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(my_model.parameters(), 0.001 )
from tqdm import tqdm
epochs = 5
check_every = 1
iters = 0
train_loss=0
modle.to(device)
from tqdm import tqdm
epochs = 5
check_every = 1
iters = 0
train_loss=0
modle.to(device)
for epoch in range(epochs):
for i in range(features_extracted.shape[0]):
# Get data to cuda if possible
data = features_extracted[i].to(device=device)
targets = y_train[i].to(device=device)
# forward
scores = modle(torch.reshape(data,(-1,7680)))
scores=max(scores)
loss = criterion(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent or adam step
optimizer.step()
def check_accuracy(features_extracted, model,y_train):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for i in range(features_extracted.shape[0]):
# Get data to cuda if possible
data = features_extracted[i].to(device=device)
targets = y_train[i].to(device=device)
global scores
scores = model(data)
predictions = torch.argmax(scores)
if(predictions==y_train[i]):
num_correct+=1
model.train()
return num_correct/features_extracted.shape[0]
print(f"Accuracy on Validation set: {check_accuracy(features_val, modle,y_val):.2f}")

Related

Strange loss curve while training EfficientNetV2 with Pytorch

I'm new to Pytorch. And I use the architecture that a pre-trained EfficientNetV2 model to connect to a single fully connected layer with one neuron using the ReLU activation function in regression task. However, both losses on training and validation set suddenly increase after first epoch and keep at about the same value during 50 epochs, then suddenly decrease to about same value as first epoch. Can anyone help me figure out what's happening?
Some codes for model and training process:
# hyper-parameter
image_size = 256
learning_rate = 1e-3
batch_size = 32
epochs = 60
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = models.efficientnet_v2_m(pretrained=True,weights='DEFAULT')
self.net.classifier[1] = nn.Linear(in_features=1280, out_features=1, bias=True)
self.net.classifier = nn.Sequential(self.net.classifier,nn.ReLU())
def forward(self, input):
output = self.net(input)
return output
model = Model()
# Define the loss function with Classification Cross-Entropy loss and an optimizer with Adam optimizer
loss_fn = nn.L1Loss()
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
# Function to test the model with the test dataset and print the accuracy for the test images
def testAccuracy():
model.eval()
loss = 0.0
total = 0.0
with torch.no_grad():
for data in validation_loader:
images, labels = data
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print("The model test will be running on", device, "device")
# get the inputs
images = Variable(images.to(device))
labels = Variable(labels.to(device))
# run the model on the test set to predict labels
outputs = model(images)
# the label with the highest energy will be our prediction
# print('outputs: ',outputs)
# print('labels: ',labels)
temp = loss_fn(outputs, labels.unsqueeze(1))
loss += loss_fn(outputs, labels.unsqueeze(1)).item()
total += 1
# compute the accuracy over all test images
mae = loss/total
return(mae)
# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(num_epochs):
best_accuracy = 0.0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()
train_loss_all = []
val_loss_all = []
for epoch in range(num_epochs): # loop over the dataset multiple times
running_loss = 0.0
total = 0
for i, (images, labels) in tqdm(enumerate(train_loader, 0),total=len(train_loader)):
# get the inputs
images = Variable(images.to(device))
labels = Variable(labels.to(device))
# zero the parameter gradients
optimizer.zero_grad()
# predict classes using images from the training set
outputs = model(images)
# compute the loss based on model output and real labels
loss = loss_fn(outputs, labels.unsqueeze(1))
# backpropagate the loss
loss.backward()
# adjust parameters based on the calculated gradients
optimizer.step()
# Let's print statistics for every one batch
running_loss += loss.item() # extract the loss value
total += 1
train_loss = running_loss/total
train_loss_all.append(train_loss)
accuracy = testAccuracy()
val_loss_all.append(accuracy)
if accuracy > best_accuracy:
saveModel()
best_accuracy = accuracy
history = {'train_loss':train_loss_all,'val_loss':val_loss_all}
return(history)
Loss curve:
loss curve

Could the output label in turn with the text?

I am new in NLP.
On Pytorch, I'm training BertForSequenceClassification for a multi-label task.
Is it possible for the output label to match the text?
nput is a text like : name, phon, address,output : label_name, label_phone, label_address.
In this case, input : phon, address, name,output : label_name, label_phone, label_address.
However, I'd like the output to look something like this: label_phone, label_address, label_name.
Is there anything I can change?
## Package
# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
import torch.utils.data as data
# BERT Related Libraries
from transformers import BertTokenizer, BertForSequenceClassification
# Python
import pandas as pd
import numpy as np
import os
import time
import matplotlib.pyplot as plt
# ML Parameters
epoch = 8
device = 'cuda'
num_labels = 9
batch_size = 32
epsilon = 1e-8
learning_rate = 5e-5
## Define model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
optimizer = optim.AdamW(model.parameters(), lr = learning_rate, eps = epsilon, weight_decay = 1e-2)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 1, gamma=0.1, last_epoch=-1)
criterion = nn.BCEWithLogitsLoss()
train_losses = []
train_acces = []
val_losses = []
val_acces = []
## Train with training set
def train(model, iterator, optimizer, criterion, device):
model.train()
train_loss = 0
correct = 0
total = 0
for batch_idx, (sentences, labels) in enumerate(iterator):
# tokenize the sentences
encoding = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True)
input_ids = encoding['input_ids']
attention_mask = encoding['attention_mask']
# generate prediction
optimizer.zero_grad()
outputs = model(input_ids, attention_mask=attention_mask) # NOT USING INTERNAL CrossEntropyLoss
# compute gradients and update weights
loss = criterion(outputs.logits, labels) # BCEWithLogitsLoss has sigmoid
loss.backward()
optimizer.step()
# accumulate train loss
train_loss += loss.item()
# record processed data count
prob = outputs.logits.sigmoid()
total += (labels.size(0)*labels.size(1))
# take the index of the highest prob as prediction output
THRESHOLD = 0.7
prediction = prob.detach().clone()
prediction[prediction > THRESHOLD] = 1
prediction[prediction <= THRESHOLD] = 0
correct += prediction.eq(labels).sum().item()
train_acc = correct/total
print('train_loss: %f\t' % (train_loss))
print('train_acc: %f\t' % (train_acc))
return train_loss, train_acc
## Validate with testing set
def test(model, iterator, optimizer, criterion, device):
model.eval()
val_loss = 0
correct = 0
total = 0
with torch.no_grad():
for batch_idx, (sentences, labels) in enumerate(iterator):
# tokenize the sentences
encoding = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True)
input_ids = encoding['input_ids']
attention_mask = encoding['attention_mask']
# generate prediction
outputs = model(input_ids, attention_mask=attention_mask)
loss = criterion(outputs.logits, labels)
val_loss += loss.item()
# record processed data count
prob = outputs.logits.sigmoid()
total += (labels.size(0)*labels.size(1))
# take the index of the highest prob as prediction output
THRESHOLD = 0.7
prediction = prob.detach().clone()
prediction[prediction > THRESHOLD] = 1
prediction[prediction <= THRESHOLD] = 0
correct += prediction.eq(labels).sum().item()
val_acc = correct/total
print('val_loss: %f\t' % (val_loss))
print('val_acc: %f\t' % (val_acc))
print('correct: %i , total: %i' % (correct, total))
return val_loss, val_acc
for e in range(epoch):
print("===== Epoch %i =====" % e)
print("Training started ...")
train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
train_losses.append(train_loss/len(train_loader))
train_acces.append(train_acc/len(train_loader))
# validation testing
print("Testing started ...")
val_loss, val_acc = test(model, test_loader, optimizer, criterion, device)
val_losses.append(val_loss/len(test_loader))
val_acces.append(val_acc/len(test_loader))
scheduler.step()
If you need more information,please let me konw.
Thanks for all.

How to check accuracy on BCELoss Pytorch?

I'm trying to use Pytorch to take a HeartDisease.csv and predict whether the patient has heart disease or not... the .csv provides 13 inputs and 1 target
I'm using BCELoss and I'm having trouble understanding how to write an accuracy check function.
My num_samples is correct but not my num_correct. I think this is a result of not understanding the predictions tensor. Right now my num_correct is usually over 8000 while my num_samples is 303...
Any insight on how to write this check accuracy function is much appreciated
I wrote this on a google co lab
#imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
#create fully connected network
class NN(nn.Module):
def __init__(self, input_size, num_classes):
super(NN, self).__init__()
self.outputs = nn.Linear(input_size, 1)
def forward(self, x):
x = self.outputs(x)
return torch.sigmoid(x)
#set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#hyperparameters
input_size = 13 # 13 inputs
num_classes = 1 # heartdisease or not
learning_rate = 0.001
batch_size = 64
num_epochs = 1
#load data
class MyDataset(Dataset):
def __init__(self, root, n_inp):
self.df = pd.read_csv(root)
self.data = self.df.to_numpy()
self.x , self.y = (torch.from_numpy(self.data[:,:n_inp]),
torch.from_numpy(self.data[:,n_inp:]))
def __getitem__(self, idx):
return self.x[idx, :], self.y[idx,:]
def __len__(self):
return len(self.data)
train_dataset = MyDataset("heart.csv", input_size)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle =True)
test_dataset = MyDataset("heart.csv", input_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle =True)
#initialize network
model = NN(input_size=input_size, num_classes=num_classes).to(device)
#loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#train network
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(train_loader):
#get data to cuda if possible
data = data.to(device=device)
targets = targets.to(device=device)
#forward
scores = model(data.float())
targets = targets.float()
loss = criterion(scores, targets)
#backward
optimizer.zero_grad()
loss.backward()
#grad descent or adam step
optimizer.step()
#check accuracy of model
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x.float())
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print("Got {} / {} with accuracy {}".format(num_correct, num_samples, float(num_correct)/float(num_samples)*100))
model.train()
print("checking accuracy on training data")
check_accuracy(train_loader, model)
print("checking accuracy on test data")
check_accuracy(test_loader, model)
Note: Don't fool yourself. A single linear layer + a sigmoid + BCE loss = logistic regression. This is a linear model, so just take note of that when referring to it as a "neural network", which is a term usually reserved for similar networks but with at least one hidden layer and nonlinear activations.
The sigmoid layer at the end of your model's forward() function returns an (N,1)-sized tensor, where N is the batch size. In other words, it returns a scalar for every data point. Each scalar is a value between 0 and 1 (this is the range of the sigmoid function).
The idea is to interpret those scalars as probabilities corresponding to the positive class. Suppose 1 corresponds to heart disease, and 0 corresponds to no heart disease; heart disease is the positive class, and no heart disease is the negative class. Now suppose a score is 0.6. This might be interpreted as a 60% chance that the associated label is heart disease, and a 40% chance that the associated label is no heart disease. This interpretation of the sigmoid output is what motivates the BCE loss to begin with (it's ultimately just a negative log likelihood).
So what you might do is check if your scores are greater than 0.5. If so, predict heart disease. If not, predict no heart disease.
Right now, you're computing maximums from the scores across dimension 1, which does nothing because dimension 1 is already of size 1; taking the maximum of a single value simply gives you that value.
Try something like this:
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x.float())
// Create a Boolean tensor (True for scores > 0.5, False for others)
// and then cast it to a long tensor (Trues -> 1, Falses -> 0)
predictions = (scores > 0.5).long()
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print("Got {} / {} with accuracy {}".format(num_correct, num_samples, float(num_correct)/float(num_samples)*100))
model.train()
You may also want to squeeze your prediction and target tensors to size (N) instead of (N,1), though I'm not sure it's necessary in your case.

PyTorch: Add validation error in training

I am using PyTorch to train a cnn model. Here is my Network architecture:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as I
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 5)
self.pool = nn.MaxPool2d(2,2)
self.conv1_bn = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 5)
self.conv2_drop = nn.Dropout2d()
self.conv2_bn = nn.BatchNorm2d(64)
self.fc1 = torch.nn.Linear(53*53*64, 256)
self.fc2 = nn.Linear(256, 136)
def forward(self, x):
x = F.relu(self.conv1_bn(self.pool(self.conv1(x))))
x = F.relu(self.conv2_bn(self.pool(self.conv2_drop(self.conv2(x)))))
x = x.view(-1, 53*53*64)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return x
Then I train the model like below:
# prepare the net for training
net.train()
for epoch in range(n_epochs): # loop over the dataset multiple times
running_loss = 0.0
# train on batches of data, assumes you already have train_loader
for batch_i, data in enumerate(train_loader):
# get the input images and their corresponding labels
images = data['image']
key_pts = data['keypoints']
# flatten pts
key_pts = key_pts.view(key_pts.size(0), -1)
# wrap them in a torch Variable
images, key_pts = Variable(images), Variable(key_pts)
# convert variables to floats for regression loss
key_pts = key_pts.type(torch.FloatTensor)
images = images.type(torch.FloatTensor)
# forward pass to get outputs
output_pts = net(images)
# calculate the loss between predicted and target keypoints
loss = criterion(output_pts, key_pts)
# zero the parameter (weight) gradients
optimizer.zero_grad()
# backward pass to calculate the weight gradients
loss.backward()
# update the weights
optimizer.step()
# print loss statistics
running_loss += loss.data[0]
I am wondering if it is possible to add the validation error in the training? I mean something like this (validation split) in Keras:
myModel.fit(trainX, trainY, epochs=50, batch_size=1, verbose=2, validation_split = 0.1)
Here is an example how to split your dataset for training and validation, then switch between the two phases every epoch:
import numpy as np
import torch
from torchvision import datasets
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
# Examples:
my_dataset = datasets.MNIST(root="/home/benjamin/datasets/mnist", train=True, download=True)
validation_split = 0.1
dataset_len = len(my_dataset)
indices = list(range(dataset_len))
# Randomly splitting indices:
val_len = int(np.floor(validation_split * dataset_len))
validation_idx = np.random.choice(indices, size=val_len, replace=False)
train_idx = list(set(indices) - set(validation_idx))
# Contiguous split
# train_idx, validation_idx = indices[split:], indices[:split]
## Defining the samplers for each phase based on the random indices:
train_sampler = SubsetRandomSampler(train_idx)
validation_sampler = SubsetRandomSampler(validation_idx)
train_loader = torch.utils.data.DataLoader(my_dataset, sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(my_dataset, sampler=validation_sampler)
data_loaders = {"train": train_loader, "val": validation_loader}
data_lengths = {"train": len(train_idx), "val": val_len}
# Training with Validation (your code + code from Pytorch tutorial: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)
n_epochs = 40
net = ...
for epoch in range(n_epochs):
print('Epoch {}/{}'.format(epoch, n_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
optimizer = scheduler(optimizer, epoch)
net.train(True) # Set model to training mode
else:
net.train(False) # Set model to evaluate mode
running_loss = 0.0
# Iterate over data.
for data in data_loaders[phase]:
# get the input images and their corresponding labels
images = data['image']
key_pts = data['keypoints']
# flatten pts
key_pts = key_pts.view(key_pts.size(0), -1)
# wrap them in a torch Variable
images, key_pts = Variable(images), Variable(key_pts)
# convert variables to floats for regression loss
key_pts = key_pts.type(torch.FloatTensor)
images = images.type(torch.FloatTensor)
# forward pass to get outputs
output_pts = net(images)
# calculate the loss between predicted and target keypoints
loss = criterion(output_pts, key_pts)
# zero the parameter (weight) gradients
optimizer.zero_grad()
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
# update the weights
optimizer.step()
# print loss statistics
running_loss += loss.data[0]
epoch_loss = running_loss / data_lengths[phase]
print('{} Loss: {:.4f}'.format(phase, epoch_loss))

Failing to train SkipGram word embedding in Pytorch

I am training the skipgram word embeddings using the famous model described in https://arxiv.org/abs/1310.4546. I want to train it in PyTorch but I am getting errors and I can't figure out where they are coming from. Below I have provided my model class, training loop, and batching method. Does anyone have any insight into whats going on?
I am getting an error on the output = loss(data, target) line. It is having a problem with <class 'torch.LongTensor'> which is weird because CrossEntropyLoss takes a long tensor. The output shape might be wrong which is: torch.Size([1000, 100, 1000]) after the feedforward.
I have my model defined as:
import torch
import torch.nn as nn
torch.manual_seed(1)
class SkipGram(nn.Module):
def __init__(self, vocab_size, embedding_dim):
super(SkipGram, self).__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.hidden_layer = nn.Linear(embedding_dim, vocab_size)
# Loss needs to be input: (minibatch (N), C) target: (minibatch, 1), each label is a class
# Calculate loss in training
def forward(self, x):
embeds = self.embeddings(x)
x = self.hidden_layer(embeds)
return x
My training is defined as:
import torch.optim as optim
from torch.autograd import Variable
net = SkipGram(1000, 300)
optimizer = optim.SGD(net.parameters(), lr=0.01)
batch_size = 100
size = len(train_ints)
batches = batch_index_gen(batch_size, size)
inputs, targets = build_tensor_from_batch_index(batches[0], train_ints)
for i in range(100):
running_loss = 0.0
for batch_idx, batch in enumerate(batches):
data, target = build_tensor_from_batch_index(batch, train_ints)
# if (torch.cuda.is_available()):
# data, target = data.cuda(), target.cuda()
# net = net.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net.forward(data)
loss = nn.CrossEntropyLoss()
output = loss(data, target)
output.backward()
optimizer.step()
running_loss += loss.data[0]
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
i, batch_idx * len(batch_size), len(size),
100. * (batch_idx * len(batch_size)) / len(size), loss.data[0]))
If useful my batching is:
def build_tensor_from_batch_index(index, train_ints):
minibatch = []
for i in range(index[0], index[1]):
input_arr = np.zeros( (1000,1), dtype=np.int )
target_arr = np.zeros( (1000,1), dtype=np.int )
input_index, target_index = train_ints[i]
input_arr[input_index] = 1
target_arr[input_index] = 1
input_tensor = torch.from_numpy(input_arr)
target_tensor = torch.from_numpy(target_arr)
minibatch.append( (input_tensor, target_tensor) )
# Concatenate all tensors into a minibatch
#x = [tensor[0] for tensor in minibatch]
#print(x)
input_minibatch = torch.cat([tensor[0] for tensor in minibatch], 1)
target_minibatch = torch.cat([tensor[1] for tensor in minibatch], 1)
#target_minibatch = minibatch[0][1]
return input_minibatch, target_minibatch
I'm not sure about that since I did not read the paper, but seems weird that you are computing the loss with the original data and the targets:
output = loss(data, target)
Considering that the output of the network is output = net.forward(data) I think you should compute your loss as:
error = loss(output, target)
If this doesn't help, briefly point me out what the paper says about the loss function.

Resources