Classification with pretrained pytorch vgg16 model and its classes - pytorch

I wrote a image vgg classification model with pytorch's pretrained vgg16 model.
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image
import urllib
from skimage.transform import resize
from skimage import io
import yaml
# Downloading imagenet 1000 classes list
file = urllib. request. urlopen("https://gist.githubusercontent.com/yrevar/942d3a0ac09ec9e5eb3a/raw/238f720ff059c1f82f368259d1ca4ffa5dd8f9f5/imagenet1000_clsidx_to_labels.txt")
classes = ''
for f in file:
classes = classes + f.decode("utf-8")
classes = yaml.load(classes)
# Downloading pretrained vgg16 model
model = torch.hub.load('pytorch/vision:v0.6.0', 'vgg16', pretrained=True)
print(model)
for param in model.parameters():
param.requires_grad = False
url, filename = ("https://raw.githubusercontent.com/pytorch/hub/master/dog.jpg", "dog.jpg")
image=io.imread(url)
plt.imshow(image)
plt.show()
# resize to 224x224x3
img = resize(image,(224,224,3))
plt.imshow(img)
plt.show()
# Normalizing input for vgg16
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img1 = mean*img+std
img1 = np.clip(img1,0,1)
img1 = torch.from_numpy(img1).unsqueeze(0)
img1 = img1.permute(0,3,2,1) # batch_size x channels x height x width
model.eval()
pred = model(img1.float())
print(classes[torch.argmax(pred).numpy().tolist()])
The code works fine but its outputting wrong classes. I am not sure where I did wrong but If I have to guess it might be the imagenet yaml classes list or at the normalizing input image. Can anyone tell me where I am making the mistakes?

There are some issues with the image preprocessing. Firstly, the normalisation is calculated as (value - mean) / std), not value * mean + std. Secondly, the values should not be clipped to [0, 1], the normalisation purposely shifts the values away from [0, 1]. Secondly, the image as NumPy array has shape [height, width, 3], when you permute the dimensions you swap the height and width dimension, creating a tensor with shape [batch_size, channels, width, height].
img = resize(image,(224,224,3))
# Normalizing input for vgg16
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img1 = (img1 - mean) / std
img1 = torch.from_numpy(img1).unsqueeze(0)
img1 = img1.permute(0, 3, 1, 2) # batch_size x channels x height x width
Instead of doing that manually, you can use torchvision.transforms.
from torchvision import transforms
preprocess = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
img = resize(image,(224,224,3))
img1 = preprocess(img)
img1 = img1.unsqueeze(0)
If you use PIL to load the images, you could also resize the images by adding transforms.Resize((224, 224)) to the preprocessing pipeline, or you could even add transforms.ToPILImage() to first convert the image to a PIL image (transforms.Resize requires a PIL image).

Related

Low Validation Score on Pretrained Alexnet from Pytorch models for ImageNet 2012 dataset

I am using pre-trained AlexNet network to validate some prior work.
The code is as follows:
import os
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
model.eval()
batchsize = 50000
workers = 1
dataset_path = 'data/imagenet_2012/'
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
val_data = datasets.ImageFolder(root=os.path.join(dataset_path, 'val'), transform=transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize,]))
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batchsize, num_workers=workers)
batch = next(iter(val_loader))
images, labels = batch
with torch.no_grad():
output = model(images)
for i in output:
out_soft = torch.nn.functional.softmax(i, dim=0)
print(int(torch.argmax(out_soft)))
When I execute this and compare with ILSVRC2012_validation_ground_truth.txt, I get top-1 accuracy of 5% only.
What am I doing wrong here?
Thank you.
So, Pytorch/Caffe have their own "ground truth" files, which can be obtained from here:
https://gist.github.com/ksimonyan/fd8800eeb36e276cd6f9#note
I manually tested the images in the validation folder of the Imagenet dataset against the val.txt file in the tar file provided at the link above to verify the order.
Update:
New validation accuracy based on the groundtruth in the zip file in the link:
Top_1 = 56.522%
Top_5 = 79.066%

Pytorch: Add input normalization to model (division layer)

I want to add the image normalization to an existing pytorch model, so that I don't have to normalize the input image anymore.
Say I have an existing model
model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True)
model.eval()
Now I can add new layers (for example a relu) using torch.nn.Sequential:
new_model = nn.Sequential(
model,
nn.ReLU()
)
However I couldn't find a layer to do perform just a division or subtraction as needed for the input normalization here shown in numpy:
import cv2
import numpy as np
img = cv2.imread("my_img.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
img = img / 255.0
img = img - mean
img = img / std
img = np.transpose(img, (2, 0, 1))
img = np.expand_dims(img, axis=0)
The goal is that normalization is eventually done on GPU to save time during inference. Also I cannot use torchvision transforms as those operation are not stored inside the model itself. For example, if I want to save the model to disk (in order to convert it to tflite using onnx) the torchvision transform operations will not be saved along with the model. Is there an elegant way of doing this?
(preferably without using a linear layer, which would fix my model input size, which should be flexible as my real model is fully convolutional)
Untested code which hopefully you can vet yourself.
import torch.nn as nn
cuda0 = torch.device('cuda:0')
class Normalize(nn.Module):
def __init__(self, mean, std):
super(Normlize, self).__init__()
self.mean = torch.tensor(mean, device=cuda0)
self.std = torch.tensor(std, device=cuda0)
def forward(self, input):
x = input / 255.0
x = x - self.mean
x = x / self.std
return x
In your model you can do
new_model = nn.Sequential(
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
model,
nn.ReLU()
)
The right way of doing this in PyTorch is using dataset transformations. In your specific case, you need torchvision transforms. You can see an example here or here . Copying some part of the code here, for completeness
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)

Keras : KeyError: 'acc' , during plotting

i'm trying to visualize the Training plot of my Model , and i get this Error :
Traceback (most recent call last):
File "train.py", line 120, in <module>
plt.plot(H.history['acc'])
KeyError: 'acc'
& here's the Full Code :
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from pyimagesearch.resnet import ResNet
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.utils import np_utils
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2
import os
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
help="path to input dataset")
ap.add_argument("-a", "--augment", type=int, default=-1,
help="whether or not 'on the fly' data augmentation should be used")
ap.add_argument("-p", "--plot", type=str, default="plot.png",
help="path to output loss/accuracy plot")
args = vars(ap.parse_args())
# initialize the initial learning rate, batch size, and number of
# epochs to train for
INIT_LR = 1e-1
BS = 8
EPOCHS = 50
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
data = []
labels = []
# loop over the image paths
for imagePath in imagePaths:
# extract the class label from the filename, load the image, and
# resize it to be a fixed 64x64 pixels, ignoring aspect ratio
label = imagePath.split(os.path.sep)[-2]
image = cv2.imread(imagePath)
image = cv2.resize(image, (64, 64))
# update the data and labels lists, respectively
data.append(image)
labels.append(label)
# convert the data into a NumPy array, then preprocess it by scaling
# all pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
# encode the labels (which are currently strings) as integers and then
# one-hot encode them
le = LabelEncoder()
labels = le.fit_transform(labels)
labels = np_utils.to_categorical(labels, 3)
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels,
test_size=0.25, random_state=42)
# initialize an our data augmenter as an "empty" image data generator
aug = ImageDataGenerator()
# check to see if we are applying "on the fly" data augmentation, and
# if so, re-instantiate the object
if args["augment"] > 0:
print("[INFO] performing 'on the fly' data augmentation")
aug = ImageDataGenerator(
rotation_range=20,
zoom_range=0.15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.15,
horizontal_flip=True,
fill_mode="nearest")
# initialize the optimizer and model
print("[INFO] compiling model...")
opt = SGD(lr=INIT_LR, momentum=0.9, decay=INIT_LR / EPOCHS)
model = ResNet.build(64, 64, 3, 2, (2, 3, 4),
(32, 64, 128, 256), reg=0.0001)
model.compile(loss="categorical_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the network
print("[INFO] training network for {} epochs...".format(EPOCHS))
H = model.fit_generator(
aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY),
steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS)
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=BS)
print(classification_report(testY.argmax(axis=1),
predictions.argmax(axis=1), target_names=le.classes_))
# plot the training loss and accuracy
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history['val_loss'], label="val_loss")
plt.plot(N, H.history['acc'], label="accuracy")
plt.plot(N, H.history['val_acc'], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig(args["plot"])
So, this code , is originally from the blog Pyimagesearch , on data augmentation , so , i decided to give it a try , the original "pipeline" was meant for a binary classification , so , since i was intrested in a mutli-class "task" , i brought some changes.
but the plotting part of the code , seems correct to me , i cheked on multiple posts here on Stack , checked the keras documentation , and nothing , i do not understand , why it dose not work !!!!
any suggestion would be much appreciated .
thank you.
Have you tried H.history['accuracy']? Since you compiled using 'accuracy' it will probably have the same string.
Now you can always inspect what you've got:
for key in H.history.keys():
print(key)
You will see what is logged there

Prediction and class activation map works just on some pictures

I have retrained a VGG16 classifier and want to show the class activation map. Unfortunately this only works with some pictures, despite the images are preprocessed. It is only a binary classifier.
I have seen that some pictures are not in the desired width and height, despite setting the target_size while loading the image. Manual resizing did not help as well. z has the desired shape.
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.engine.input_layer import Input
from keras.layers import Dropout, Flatten, Dense
from keras.preprocessing import image
from keras.models import load_model, Model
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
import cv2
# Load weights from retrained classifier
top_model_weights_path = 'retrained_weights.h5'
# Create model with VGG16 base
input_tensor = Input(shape=(224, 224, 3))
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
x = Flatten()(base_model.output)
x = Dense(4096, activation='relu')(x)
x = Dense(4096, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(input=base_model.input, output=predictions)
model.load_weights(top_model_weights_path, by_name=True)
# load and preprocess image
img_path = './picture.jpg'
img = image.load_img(img_path, target_size=(224, 224))
z = image.img_to_array(img)
z = np.expand_dims(z, axis=0)
z = preprocess_input(z)
# make Prediction
preds = model.predict(z)
print(preds)
maximum_model_output = model.output[:, 0]
last_conv_layer = model.layers[17]
# pooled grads of last convolutional layer and iterate over image
grads = K.gradients(model.output[:, 0], last_conv_layer.output)[0]
pooled_grads = K.mean(grads, axis=(0, 1, 2))
iterate = K.function([model.input],
[pooled_grads, last_conv_layer.output[0]])
pooled_grads_value, conv_layer_output_value = iterate([z])
for i in range(512):
conv_layer_output_value[:, :, i] *= pooled_grads_value[i]
# create heatmap
heatmap = np.mean(conv_layer_output_value, axis=-1)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
plt.matshow(heatmap)
img = cv2.imread(img_path)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + img
cv2.imwrite('./Images/picture_cam.jpg', superimposed_img)
As stated, with some pictures I get a valid prediction and a class activation map, most pictures do not work and I got this error (last_conv_layer.output is all 0, pooled_grads are all 0 and value of model.predict is 1.0):
Using TensorFlow backend.
Backend TkAgg is interactive backend. Turning interactive mode on.
2019-08-11 21:13:16.868637: I tensorflow/core/common_runtime/process_util.cc:69] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
<input>:20: UserWarning: Update your `Model` call to the Keras 2 API: `Model(inputs=Tensor("in..., outputs=Tensor("de...)`
[[1.]]
0
<input>:63: RuntimeWarning: invalid value encountered in true_divide
/home/zuse/Projekte/deep-learning-convolutional-network/venv/lib/python3.6/site-packages/matplotlib/image.py:395: UserWarning: Warning: converting a masked element to nan.
dv = (np.float64(self.norm.vmax) -
/home/zuse/Projekte/deep-learning-convolutional-network/venv/lib/python3.6/site-packages/matplotlib/image.py:396: UserWarning: Warning: converting a masked element to nan.
np.float64(self.norm.vmin))
/home/zuse/Projekte/deep-learning-convolutional-network/venv/lib/python3.6/site-packages/matplotlib/image.py:403: UserWarning: Warning: converting a masked element to nan.
a_min = np.float64(newmin)
/home/zuse/Projekte/deep-learning-convolutional-network/venv/lib/python3.6/site-packages/matplotlib/image.py:408: UserWarning: Warning: converting a masked element to nan.
a_max = np.float64(newmax)
/home/zuse/Projekte/deep-learning-convolutional-network/venv/lib/python3.6/site-packages/matplotlib/colors.py:918: UserWarning: Warning: converting a masked element to nan.
dtype = np.min_scalar_type(value)
I am running out of ideas what could be the problem.
Issue was connected to preprocess_input from applications.vgg16. Setting
z = preprocess_input(z, mode='tf')
solved the issue. Perhaps it helps someone.

how to implement Grad-CAM on your own network?

I want to implement Grad-CAM on my own network, should I save my model and load it, then treat my saved model like VGG-16, then do similar operations?
I tried to search on the internet, and I found that all methods are based on famous models, not their owns.
So I wonder, maybe I just need to treat my own model as VGG-16, then do similar things.
Hi i have one solution in pytorch
import torch
import torch.nn as nn
from torch.utils import data
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
import numpy as np
# use the ImageNet transformation
transform = transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# define a 1 image dataset
dataset = datasets.ImageFolder(root='./data/Elephant/', transform=transform)
# define the dataloader to load that single image
dataloader = data.DataLoader(dataset=dataset, shuffle=False, batch_size=1)
vgg19 = Mymodel() ## create an object of your model
vgg19.load_state_dict(torch.load("your_vgg19_weights"))
class VGG(nn.Module):
def __init__(self):
super(VGG, self).__init__()
# get the pretrained VGG19 network
self.vgg = vgg19
# disect the network to access its last convolutional layer
self.features_conv = self.vgg.features[:36] # 36th layer was my last conv layer
# get the max pool of the features stem
self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
# get the classifier of the vgg19
self.classifier = self.vgg.classifier
# placeholder for the gradients
self.gradients = None
# hook for the gradients of the activations
def activations_hook(self, grad):
self.gradients = grad
def forward(self, x):
x = self.features_conv(x)
# register the hook
h = x.register_hook(self.activations_hook)
# apply the remaining pooling
x = self.max_pool(x)
x = x.view((1, -1))
x = self.classifier(x)
return x
# method for the gradient extraction
def get_activations_gradient(self):
return self.gradients
# method for the activation exctraction
def get_activations(self, x):
return self.features_conv(x)
vgg = VGG()
# set the evaluation mode
vgg.eval()
# get the image from the dataloader
img, _ = next(iter(dataloader))
# get the most likely prediction of the model
pred_class = vgg(img).argmax(dim=1).numpy()[0]
pred = vgg(img)
pred[:, pred_class].backward()
# pull the gradients out of the model
gradients = vgg.get_activations_gradient()
# pool the gradients across the channels
pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])
# get the activations of the last convolutional layer
activations = vgg.get_activations(img).detach()
# weight the channels by corresponding gradients
for i in range(512):
activations[:, i, :, :] *= pooled_gradients[i]
# average the channels of the activations
heatmap = torch.mean(activations, dim=1).squeeze()
# relu on top of the heatmap
# expression (2) in https://arxiv.org/pdf/1610.02391.pdf
heatmap = np.maximum(heatmap, 0)
# normalize the heatmap
heatmap /= torch.max(heatmap)
heatmap = heatmap.numpy()
import cv2
img = cv2.imread('./data/Elephant/data/05fig34.jpg')
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + img
cv2.imwrite('./map.jpg', superimposed_img) ###saves gradcam visualization image

Resources