Apply Torchvision Transforms to a numpy array - pytorch

I wrote the following code:
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
val_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float())
# Define the dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
I would like to apply the transform compose to my dataset (X_train and X_val) which are both numpy array. How can I apply transform to augment my dataset and normalize it.
Should I apply it before the model training or during model training?

You can simply add the transformers to your training pipeline:
train_transformed = transform(torch.from_numpy(X_train).float())
val_transformed = transform(torch.from_numpy(X_val).float())
train_dataset = torch.utils.data.TensorDataset(train_transformed, torch.from_numpy(y_train).float())
val_dataset = torch.utils.data.TensorDataset(val_transformed, torch.from_numpy(y_val).float())
This way the transform is applied during data loading which is before training.

Related

trained model for image classification always returns the 0th index in pytorch

I've used transfer learning to classify an image(classify between ants and bees) tbvh i just copy pasted the whole exact code available in here and saved the model using pycharm ide later load the same and pass a single image to predict the output but instead of the actual output i keep getting the same zeroth index value defined inside my 'classes'
this is the code base of the whole model
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pickle
def main():
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = 'hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def imshow(inp, title=None):
"""Imshow for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
# Make a grid from batch
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print(f'Epoch {epoch}/{num_epochs - 1}')
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
print(f'Best val Acc: {best_acc:4f}')
# load best model weights
model.load_state_dict(best_model_wts)
return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images // 2, 2, images_so_far)
ax.axis('off')
ax.set_title(f'predicted: {class_names[preds[j]]}')
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
###
# save using pickle
# pickle.dump(model_ft, open('model.pkl', 'wb'))
###
# save using torch
# def save_model(model, best_acc):
# state = {
# 'model': model_ft,
# 'acc': best_acc,
# }
torch.save(model_ft, './best_model.pth')
if __name__ == '__main__':
main()
Now loading the saved model and passing an image
# to be worked on
from __future__ import print_function, division
import torch
import numpy as np
from torchvision import transforms
import PIL.Image as Image
classes = [
"ants",
"bees",
]
# loading model
model = torch.load('best_model.pth')
# transform the image
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
image_transforms = transforms.Compose([
transforms.Resize((224, 224,)),
transforms.ToTensor(),
transforms.Normalize(torch.Tensor(mean), torch.Tensor(std))
])
def classify(model, image_transforms, image_path, classes):
model = model.eval()
image = Image.open(image_path)
image = image_transforms(image).float()
image = image.unsqueeze(0)
output = model(image)
_, predicted = torch.max(output.data, 1)
print(classes[predicted.item()])
classify(model,image_transforms,"beeimage.jpg",classes)
output of which should be bees but instead it is returning ants
output for the same is
C:\Users\prasa\Desktop\DL\venv\Scripts\python.exe C:\Users\prasa\Desktop\DL\callmod1.py
ants
Process finished with exit code 0
well i realized there is a silly mistake i did , i shouldve debug it before posting here.
It was that my function train_model was never called in the training model code. As a result of which it was never trained and acted upon the pretrained sets

pytorch transforms are not applied when downloading dataset from torchvision datasets

I am importing the cifar10 dataset but my transforms are not applied
here is my transform and my dataset:
from torchvision import transforms
transform_train = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])
train = torchvision.datasets.CIFAR10("./data", train=True, download=True, transform=transform_train)
test = torchvision.datasets.CIFAR10("./data", train=False, download=True)
The transform was not being applied. why is that happening?

Low Validation Score on Pretrained Alexnet from Pytorch models for ImageNet 2012 dataset

I am using pre-trained AlexNet network to validate some prior work.
The code is as follows:
import os
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
model.eval()
batchsize = 50000
workers = 1
dataset_path = 'data/imagenet_2012/'
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
val_data = datasets.ImageFolder(root=os.path.join(dataset_path, 'val'), transform=transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize,]))
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batchsize, num_workers=workers)
batch = next(iter(val_loader))
images, labels = batch
with torch.no_grad():
output = model(images)
for i in output:
out_soft = torch.nn.functional.softmax(i, dim=0)
print(int(torch.argmax(out_soft)))
When I execute this and compare with ILSVRC2012_validation_ground_truth.txt, I get top-1 accuracy of 5% only.
What am I doing wrong here?
Thank you.
So, Pytorch/Caffe have their own "ground truth" files, which can be obtained from here:
https://gist.github.com/ksimonyan/fd8800eeb36e276cd6f9#note
I manually tested the images in the validation folder of the Imagenet dataset against the val.txt file in the tar file provided at the link above to verify the order.
Update:
New validation accuracy based on the groundtruth in the zip file in the link:
Top_1 = 56.522%
Top_5 = 79.066%

Pytorch: Add input normalization to model (division layer)

I want to add the image normalization to an existing pytorch model, so that I don't have to normalize the input image anymore.
Say I have an existing model
model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True)
model.eval()
Now I can add new layers (for example a relu) using torch.nn.Sequential:
new_model = nn.Sequential(
model,
nn.ReLU()
)
However I couldn't find a layer to do perform just a division or subtraction as needed for the input normalization here shown in numpy:
import cv2
import numpy as np
img = cv2.imread("my_img.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
img = img / 255.0
img = img - mean
img = img / std
img = np.transpose(img, (2, 0, 1))
img = np.expand_dims(img, axis=0)
The goal is that normalization is eventually done on GPU to save time during inference. Also I cannot use torchvision transforms as those operation are not stored inside the model itself. For example, if I want to save the model to disk (in order to convert it to tflite using onnx) the torchvision transform operations will not be saved along with the model. Is there an elegant way of doing this?
(preferably without using a linear layer, which would fix my model input size, which should be flexible as my real model is fully convolutional)
Untested code which hopefully you can vet yourself.
import torch.nn as nn
cuda0 = torch.device('cuda:0')
class Normalize(nn.Module):
def __init__(self, mean, std):
super(Normlize, self).__init__()
self.mean = torch.tensor(mean, device=cuda0)
self.std = torch.tensor(std, device=cuda0)
def forward(self, input):
x = input / 255.0
x = x - self.mean
x = x / self.std
return x
In your model you can do
new_model = nn.Sequential(
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
model,
nn.ReLU()
)
The right way of doing this in PyTorch is using dataset transformations. In your specific case, you need torchvision transforms. You can see an example here or here . Copying some part of the code here, for completeness
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)

PyTorch Dataloader - List is not callable error when enumerating

When iterating over a PyTorch dataloader, e.g.
# define dataset, dataloader
train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transforms)
test_data = datasets.ImageFolder(data_dir + '/test', transform=test_transforms)
trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=64)
# define model, optimizer, loss
# not included - irrelevant to the question
for ii, (inputs, labels) in enumerate(trainloader):
# Move input and label tensors to the GPU
inputs, labels = inputs.to(device), labels.to(device)
start = time.time()
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
loss.backward()
I get a TypeError: 'list' object is not callable on this line
for ii, (inputs, labels) in enumerate(trainloader):
What dumb thing am I forgetting?
Did you remember to call transforms.Compose on your list of transforms?
In this line
train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transforms)
the transform parameter is expecting a callable object, not a list.
So, for example, this is wrong:
train_transforms = [
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
It should look like this
train_transforms = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

Resources