I want to add the image normalization to an existing pytorch model, so that I don't have to normalize the input image anymore.
Say I have an existing model
model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True)
model.eval()
Now I can add new layers (for example a relu) using torch.nn.Sequential:
new_model = nn.Sequential(
model,
nn.ReLU()
)
However I couldn't find a layer to do perform just a division or subtraction as needed for the input normalization here shown in numpy:
import cv2
import numpy as np
img = cv2.imread("my_img.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32)
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
img = img / 255.0
img = img - mean
img = img / std
img = np.transpose(img, (2, 0, 1))
img = np.expand_dims(img, axis=0)
The goal is that normalization is eventually done on GPU to save time during inference. Also I cannot use torchvision transforms as those operation are not stored inside the model itself. For example, if I want to save the model to disk (in order to convert it to tflite using onnx) the torchvision transform operations will not be saved along with the model. Is there an elegant way of doing this?
(preferably without using a linear layer, which would fix my model input size, which should be flexible as my real model is fully convolutional)
Untested code which hopefully you can vet yourself.
import torch.nn as nn
cuda0 = torch.device('cuda:0')
class Normalize(nn.Module):
def __init__(self, mean, std):
super(Normlize, self).__init__()
self.mean = torch.tensor(mean, device=cuda0)
self.std = torch.tensor(std, device=cuda0)
def forward(self, input):
x = input / 255.0
x = x - self.mean
x = x / self.std
return x
In your model you can do
new_model = nn.Sequential(
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
model,
nn.ReLU()
)
The right way of doing this in PyTorch is using dataset transformations. In your specific case, you need torchvision transforms. You can see an example here or here . Copying some part of the code here, for completeness
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
Related
I am using pre-trained AlexNet network to validate some prior work.
The code is as follows:
import os
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
model.eval()
batchsize = 50000
workers = 1
dataset_path = 'data/imagenet_2012/'
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
val_data = datasets.ImageFolder(root=os.path.join(dataset_path, 'val'), transform=transforms.Compose( [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize,]))
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batchsize, num_workers=workers)
batch = next(iter(val_loader))
images, labels = batch
with torch.no_grad():
output = model(images)
for i in output:
out_soft = torch.nn.functional.softmax(i, dim=0)
print(int(torch.argmax(out_soft)))
When I execute this and compare with ILSVRC2012_validation_ground_truth.txt, I get top-1 accuracy of 5% only.
What am I doing wrong here?
Thank you.
So, Pytorch/Caffe have their own "ground truth" files, which can be obtained from here:
https://gist.github.com/ksimonyan/fd8800eeb36e276cd6f9#note
I manually tested the images in the validation folder of the Imagenet dataset against the val.txt file in the tar file provided at the link above to verify the order.
Update:
New validation accuracy based on the groundtruth in the zip file in the link:
Top_1 = 56.522%
Top_5 = 79.066%
I've been trying to implement Keras custom imagedatagenerator so that I can do hair and microscope image augmentation.
This is the Datagenerator class:
class DataGenerator( Sequence ):
def __init__(self,image_paths,labels, augmentations, batch_size=32, image_dimension=(224,224,3), shuffle=False):
self.image_paths = image_paths
self.labels = labels
self.batch_size = batch_size
self.image_dimension = image_dimension
self.shuffle = shuffle
self.augment = augmentations
def __len__(self):
return int(np.ceil(len(self.image_paths) / self.batch_size ))
def _getitem__(self,index):
indexes = self.indexes[index*self.batch_size : (index+1)*self.batch_size]
batch_y = np.array([self.labels[k] for k in indexes])
batch_x = [cv2.cvtColor(cv2.imread(self.image_paths[k]), cv2.COLOR_RGB2BGR) for k in indexes]
return np.stack([
self.augment(image=x)["image"] for x in batch_x
], axis=0), np.array(batch_y)
Below Code is for albumentations augmentation (Just trying albualbumentations augmentation to test if the data generator works or not):
AUGMENTATIONS_TRAIN = Compose([
HorizontalFlip(p=0.5),
RandomContrast(limit=0.2, p=0.5),
RandomGamma(gamma_limit=(80, 120), p=0.5),
RandomBrightness(limit=0.2, p=0.5),
HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20,
val_shift_limit=10, p=.9),
# CLAHE(p=1.0, clip_limit=2.0),
ShiftScaleRotate(
shift_limit=0.0625, scale_limit=0.1,
rotate_limit=15, border_mode=cv2.BORDER_REFLECT_101, p=0.8),
ToFloat(max_value=255)
])
AUGMENTATIONS_TEST = Compose([
# CLAHE(p=1.0, clip_limit=2.0),
ToFloat(max_value=255)
])
Now creating DataGenerator object :
train_datagen = DataGenerator( train['images'],
train['target'],
augmentations=AUGMENTATIONS_TRAIN,
batch_size=32,
image_dimension=(224,224,3) )
val_datagen = DataGenerator( validation['images'],
validation['target'],
augmentations=AUGMENTATIONS_TEST,
batch_size=16,
image_dimension=(224,224,3) )`
A NonImplementedError comes when i
run model.fit_generator(generator=train_datagen,steps_per_epoch=30,epochs = 30,validation_data=val_datagen,validation_steps=15)
I have shared my kernel here and
I was taking help from here.
I have also looked for other ways to augment which were all the same.
I will be thankful if someone can tell why and where is the problem ? and Is there is any other good way to do custom image augmentation in keras.
You can have a look at imgaug library. albumentations and imgaug are same almost. Write the sequence of operations and then just put it in Imagedatagenerator preprocessing_function. I tried using albumentations library but faced some errors.
from imgaug import augmenters as iaa
seq = iaa.Sequential([
iaa.Fliplr(0.5), # horizontally flip
# sometimes(iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05), per_channel=0.5)),
iaa.OneOf([
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)),
# iaa.Noop(),
iaa.GaussianBlur(sigma=(0.0, 1.0)),
# iaa.Noop(),
iaa.Affine(rotate=(-10, 10), translate_percent={"x": (-0.25, 0.25)}, mode='symmetric', cval=(0)),
# iaa.Noop(),
# iaa.PerspectiveTransform(scale=(0.04, 0.08)),
# # iaa.Noop(),
# iaa.PiecewiseAffine(scale=(0.05, 0.1), mode='edge', cval=(0)),
]),
sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)),
# More as you want ...
], random_order=True)
datagen = ImageDataGenerator(preprocessing_function=seq.augment_image)
There are some advanced data augmentation practices such as cutout, random-erasing and mixup. They are easy to implement in Keras. For mixup, the example is below:
training_generator = MixupGenerator(trainX, trainY, batch_size=8, alpha=0.2, datagen=datagen)()
x, y = next(training_generator)
# To visualize the batch images
for i in range(9):
plt.subplot(330+1+i)
# batch = it.next()
img = x[i]
plt.imshow(img.reshape(224, 224, 3))
plt.savefig("mixup_batch.png")
H = model.fit_generator(
# datagen.flow(trainX, trainY, batch_size=args.batch_size),
training_generator,
steps_per_epoch=len(trainX) // args.batch_size,
validation_data=(valX, valY),
validation_steps=len(valX) // args.batch_size,
epochs=args.epochs,
# workers=4,
callbacks=[model_checkpoint, lr_reducer, stopping, lr_schedule],
)
The problem I faced in this though is that for random erasing, we need to put that in ImageDataGenerator preprocessing_function and we have already put the imgaug augmentation in that. The possible alternative is to use two data generators maybe.
I wrote a image vgg classification model with pytorch's pretrained vgg16 model.
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image
import urllib
from skimage.transform import resize
from skimage import io
import yaml
# Downloading imagenet 1000 classes list
file = urllib. request. urlopen("https://gist.githubusercontent.com/yrevar/942d3a0ac09ec9e5eb3a/raw/238f720ff059c1f82f368259d1ca4ffa5dd8f9f5/imagenet1000_clsidx_to_labels.txt")
classes = ''
for f in file:
classes = classes + f.decode("utf-8")
classes = yaml.load(classes)
# Downloading pretrained vgg16 model
model = torch.hub.load('pytorch/vision:v0.6.0', 'vgg16', pretrained=True)
print(model)
for param in model.parameters():
param.requires_grad = False
url, filename = ("https://raw.githubusercontent.com/pytorch/hub/master/dog.jpg", "dog.jpg")
image=io.imread(url)
plt.imshow(image)
plt.show()
# resize to 224x224x3
img = resize(image,(224,224,3))
plt.imshow(img)
plt.show()
# Normalizing input for vgg16
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img1 = mean*img+std
img1 = np.clip(img1,0,1)
img1 = torch.from_numpy(img1).unsqueeze(0)
img1 = img1.permute(0,3,2,1) # batch_size x channels x height x width
model.eval()
pred = model(img1.float())
print(classes[torch.argmax(pred).numpy().tolist()])
The code works fine but its outputting wrong classes. I am not sure where I did wrong but If I have to guess it might be the imagenet yaml classes list or at the normalizing input image. Can anyone tell me where I am making the mistakes?
There are some issues with the image preprocessing. Firstly, the normalisation is calculated as (value - mean) / std), not value * mean + std. Secondly, the values should not be clipped to [0, 1], the normalisation purposely shifts the values away from [0, 1]. Secondly, the image as NumPy array has shape [height, width, 3], when you permute the dimensions you swap the height and width dimension, creating a tensor with shape [batch_size, channels, width, height].
img = resize(image,(224,224,3))
# Normalizing input for vgg16
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img1 = (img1 - mean) / std
img1 = torch.from_numpy(img1).unsqueeze(0)
img1 = img1.permute(0, 3, 1, 2) # batch_size x channels x height x width
Instead of doing that manually, you can use torchvision.transforms.
from torchvision import transforms
preprocess = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
img = resize(image,(224,224,3))
img1 = preprocess(img)
img1 = img1.unsqueeze(0)
If you use PIL to load the images, you could also resize the images by adding transforms.Resize((224, 224)) to the preprocessing pipeline, or you could even add transforms.ToPILImage() to first convert the image to a PIL image (transforms.Resize requires a PIL image).
I am trying to write a simple ML code to classify the mnist dataset in tensorflow2.0. I didn't use Keras for now since I just want to use lower API to help me understand how tensorflow works. However, after I defined the cross entropy, It seems impossible to continue. All the tf2.0 optimizers are moved to keras and I don't know how to train a model without keras in tf2.0. Is there a way that we bypass keras in tf2.0?
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
print(train_images.shape)
print(len(train_labels))
print(train_images[1,:,:].shape)
# plt.figure()
# plt.imshow(train_images[0])
# plt.colorbar()
# plt.grid(False)
# plt.show()
# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
for i in range(1):
x = tf.constant(train_images[1,:,:].reshape(784), dtype = tf.float32)
x = tf.reshape(x, [1, 784])
print(tf.shape(x), tf.shape(W))
# define the model
y = tf.nn.softmax(tf.matmul(x, W) + b)
print(y)
# correct labels
y_ = np.zeros(10)
y_[train_labels[i]] = 1.0
y_ = tf.constant(y_, dtype = tf.float32)
y_ = tf.reshape(y_, [1, 10])
cross_entropy = -tf.reduce_sum(y_* tf.math.log(y))
print(cross_entropy)
I don't know how to continue from here.
Backpropagation-based training of models is totally possible in TensorFlow 2.x without using the keras API. The usage will be centered around the tf.GradientTape API and optimizers objects under the tf.optimizers namespace.
Your example can be modified as follows. Note that it's a simplistic code meant to illustrate the basic usage in a short code snippet. It's not to illustrate machine learning best practices in TF2.
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
#tf.function
def my_model(x):
# This is a hand-rolled logistic regressor.
y = tf.matmul(x, W) + b
return tf.nn.softmax(y)
#tf.function
def loss(x, y):
# This is a hand-rolled categorical cross-entropy loss.
diff = -(labels * tf.math.log(logits))
loss = tf.reduce_mean(diff)
return loss
optimizer = tf.optimizers.Adam(learning_rate=1e-3)
for i in xrange(num_steps):
# A single training step.
with tf.GradientTape() as tape:
# This is atypical, in that you would normally want to do this in
# mini-batches, instead of using all examples in x_train and y_train
# at once. But again, this is just a simple example.
loss_value = loss(x_train, y_train)
gradients = tape.gradient(loss_value, [W, b])
optimizer.apply_gradients(zip(gradients, [w, b]))
I want to implement Grad-CAM on my own network, should I save my model and load it, then treat my saved model like VGG-16, then do similar operations?
I tried to search on the internet, and I found that all methods are based on famous models, not their owns.
So I wonder, maybe I just need to treat my own model as VGG-16, then do similar things.
Hi i have one solution in pytorch
import torch
import torch.nn as nn
from torch.utils import data
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
import numpy as np
# use the ImageNet transformation
transform = transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# define a 1 image dataset
dataset = datasets.ImageFolder(root='./data/Elephant/', transform=transform)
# define the dataloader to load that single image
dataloader = data.DataLoader(dataset=dataset, shuffle=False, batch_size=1)
vgg19 = Mymodel() ## create an object of your model
vgg19.load_state_dict(torch.load("your_vgg19_weights"))
class VGG(nn.Module):
def __init__(self):
super(VGG, self).__init__()
# get the pretrained VGG19 network
self.vgg = vgg19
# disect the network to access its last convolutional layer
self.features_conv = self.vgg.features[:36] # 36th layer was my last conv layer
# get the max pool of the features stem
self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
# get the classifier of the vgg19
self.classifier = self.vgg.classifier
# placeholder for the gradients
self.gradients = None
# hook for the gradients of the activations
def activations_hook(self, grad):
self.gradients = grad
def forward(self, x):
x = self.features_conv(x)
# register the hook
h = x.register_hook(self.activations_hook)
# apply the remaining pooling
x = self.max_pool(x)
x = x.view((1, -1))
x = self.classifier(x)
return x
# method for the gradient extraction
def get_activations_gradient(self):
return self.gradients
# method for the activation exctraction
def get_activations(self, x):
return self.features_conv(x)
vgg = VGG()
# set the evaluation mode
vgg.eval()
# get the image from the dataloader
img, _ = next(iter(dataloader))
# get the most likely prediction of the model
pred_class = vgg(img).argmax(dim=1).numpy()[0]
pred = vgg(img)
pred[:, pred_class].backward()
# pull the gradients out of the model
gradients = vgg.get_activations_gradient()
# pool the gradients across the channels
pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])
# get the activations of the last convolutional layer
activations = vgg.get_activations(img).detach()
# weight the channels by corresponding gradients
for i in range(512):
activations[:, i, :, :] *= pooled_gradients[i]
# average the channels of the activations
heatmap = torch.mean(activations, dim=1).squeeze()
# relu on top of the heatmap
# expression (2) in https://arxiv.org/pdf/1610.02391.pdf
heatmap = np.maximum(heatmap, 0)
# normalize the heatmap
heatmap /= torch.max(heatmap)
heatmap = heatmap.numpy()
import cv2
img = cv2.imread('./data/Elephant/data/05fig34.jpg')
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + img
cv2.imwrite('./map.jpg', superimposed_img) ###saves gradcam visualization image