How to calculate the mean and the std of cifar10 data - pytorch

Pytorch is using the following values as the mean and std for the cifar10 data:
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
I need to understand the concept behind calculating it because this data is 3 channel image and I do not understand what is summed and divided over what and so on.
Also if someone can share a code for calculating the mean and the std, would be so thankful.

The 0.5 values are just approximates for cifar10 mean and std values over the three channels (r,g,b). The precise values for cifar10 train set are
mean: 0.49139968, 0.48215827 ,0.44653124
std: 0.24703233 0.24348505 0.26158768
You may calculate these using the following script:
import torch
import numpy
import torchvision.datasets as datasets
from torchvision import transforms
cifar_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
imgs = [item[0] for item in cifar_trainset] # item[0] and item[1] are image and its label
imgs = torch.stack(imgs, dim=0).numpy()
# calculate mean over each channel (r,g,b)
mean_r = imgs[:,0,:,:].mean()
mean_g = imgs[:,1,:,:].mean()
mean_b = imgs[:,2,:,:].mean()
print(mean_r,mean_g,mean_b)
# calculate std over each channel (r,g,b)
std_r = imgs[:,0,:,:].std()
std_g = imgs[:,1,:,:].std()
std_b = imgs[:,2,:,:].std()
print(std_r,std_g,std_b)
Also, you may find the same mean and std values here and here

Alternative way
from torchvision import datasets
cifar_trainset = datasets.CIFAR10(root='./data', train=True, download=True )
data = cifar_trainset.data / 255 # data is numpy array
mean = data.mean(axis = (0,1,2))
std = data.std(axis = (0,1,2))
print(f"Mean : {mean} STD: {std}") #Mean : [0.491 0.482 0.446] STD: [0.247 0.243 0.261]

The previous answers either relied on loading each image individually (which is incredibly slow) or assuming that the dataset saves the data as one large numpy array.
A more general and fast approach is to use a PyTorch DataLoader.
def get_mean_std(trainLoader):
imgs = None
for batch in trainLoader:
image_batch = batch[0]
if imgs is None:
imgs = image_batch.cpu()
else:
imgs = torch.cat([imgs, image_batch.cpu()], dim=0)
imgs = imgs.numpy()
# calculate mean over each channel (r,g,b)
mean_r = imgs[:,0,:,:].mean()
mean_g = imgs[:,1,:,:].mean()
mean_b = imgs[:,2,:,:].mean()
print(mean_r,mean_g,mean_b)
# calculate std over each channel (r,g,b)
std_r = imgs[:,0,:,:].std()
std_g = imgs[:,1,:,:].std()
std_b = imgs[:,2,:,:].std()
print(std_r,std_g,std_b)

Related

Question about skimage.transform.PolynomialTransform

I have two images (channel 1 and channel 2) and I'm trying to compute the polynomial transform that warps one image into the other image. First, I created an ORB object and computed the affine transform between the two images (post-affine). Then I decided to try to use skimage.transform.PolynomialTransform. However, when I try to compute the transform, the returned NumPy array has either NaN values or 0 values, even though the original image had a non-zero float value at that location (post-polynomial). What am I doing wrong? Code included below, images in following link. https://drive.google.com/drive/folders/1mWxUvLFLK5-rYCrxs3-uGKFxKq2wXDjS?usp=sharing Thanks in advance!
Note: I know that the question Image warping with scikit-image and transform.PolynomialTransform is similar, but in my opinion the two aren't duplicates. Although that user's problem is with the same function, the pixels in their transformed images have values, whereas by and large mine don't.
import cv2
from ImageConversion import ImageConversion # self-written, irrelevant
import matplotlib
matplotlib.use('macosX')
import matplotlib.pyplot as plt
from scipy.ndimage import uniform_filter
from skimage.draw import circle_perimeter
from skimage.transform import PolynomialTransform, warp
def affine_transform(self):
channel1_u8 = self.channel1.astype('uint8') # necessary for detectAndCompute
channel2_u8 = self.channel2.astype('uint8')
orb = cv2.ORB_create(100)
#kp1, des1 = orb.detectAndCompute(channel1_32, None)
#kp2, des2 = orb.detectAndCompute(channel2_32, None)
kp1, des1 = orb.detectAndCompute(channel1_u8, None)
kp2, des2 = orb.detectAndCompute(channel2_u8, None)
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(des1, des2, None)
matches = sorted(matches, key = lambda x:x.distance)
points1 = np.zeros((len(matches), 2), dtype = np.float32)
points2 = np.zeros((len(matches), 2), dtype = np.float32)
for i, match in enumerate(matches):
points1[i, :] = kp1[match.queryIdx].pt # index of descriptor in query descriptors, ie index of descriptor in channel 1 which is the image we wish to map to channel 2
points2[i, :] = kp2[match.trainIdx].pt
mat_coeff, inliers = cv2.estimateAffine2D(points1, points2) # inliers only here because estimateAffine2D returns both matrix coefficients and inliers
print(mat_coeff)
rows, cols = channel1_u8.shape
#dst = cv2.warpAffine(channel1_u8, mat_coeff, (cols, rows))
dst = cv2.warpAffine(self.channel1, mat_coeff, (cols, rows))
return mat_coeff, dst
tform = PolynomialTransform()
tform.estimate(self.channel2, dst, order = 3)
warped_1 = warp(dst, tform, mode = 'constant')
I found the error. I was trying to feed PolynomialTransform.estimate the entire image, rather than identified key points in the image.

Image augmentation in Pytorch

I like to augment image alternately.
I have pytorch transform code as follows.
import torchvision.transforms as tt
from torchvision.datasets import ImageFolder
#Data transform (normalization & data augmentation)
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_tfms = tt.Compose([tt.RandomCrop(32, padding = 4, padding_mode = 'reflect'),
tt.RandomHorizontalFlip(),
tt.RandomAffine(degrees=(10, 30),
translate=(0.1, 0.3),
scale=(0.7, 1.3),
shear=0.1,
resample=Image.BICUBIC)
tt.ToTensor(),
tt.Normalize(*stats)])
When I create dataset as follow and do training, all images will be augmented.
train_ds = ImageFolder('content/train', train_tfms)
But I want alternately. First image, just train as original image. But the next image is augmented.
How can I do that?
From a single dataset you can create two datasets one with augmentation and the other without, and then concatenate them. The order is going to be kept since we are using the subdataset pytorch class which will handle this for us.
train_ds_no_aug = ImageFolder('content/train')
train_ds_aug = ImageFolder('content/train', train_tfms)
# Check that aug_idx and no_aug_idx are not overlapping
aug_idx = torch.arange(1, len(train_ds_no_aug), 2)
no_aug_idx = torch.arange(0, len(train_ds_no_aug), 2)
train_ds_no_aug = torch.utils.data.Subset(train_ds_no_aug, no_aug_idx)
train_ds_aug = torch.utils.data.Subset(train_ds_aug, aug_idx)
train_ds = torch.utils.data.ChainDataset([train_ds_no_aug, train_ds_aug])
# Done :=

How to get a specific sample from pytorch DataLoader?

In Pytorch, is there any way of loading a specific single sample using the torch.utils.data.DataLoader class? I'd like to do some testing with it.
The tutorial uses
trainloader = torch.utils.data.DataLoader(...)
images, labels = next(iter(trainloader))
to fetch a random batch of samples. Is there are way, using DataLoader, to get a specific sample?
Cheers
Turn off the shuffle in DataLoader
Use batch_size to calculate the batch in which the desired sample you are looking for falls in
Iterate to the desired batch
Code
import torch
import numpy as np
import itertools
X= np.arange(100)
batch_size = 2
dataloader = torch.utils.data.DataLoader(X, batch_size=batch_size, shuffle=False)
sample_at = 5
k = int(np.floor(sample_at/batch_size))
my_sample = next(itertools.islice(dataloader, k, None))
print (my_sample)
Output:
tensor([4, 5])
if you want to get a specific signle sample from your dataset you can
you should check Subset class.(https://pytorch.org/docs/stable/data.html#torch.utils.data.Subset)
something like this:
indices = [0,1,2] # select your indices here as a list
subset = torch.utils.data.Subset(train_set, indices)
trainloader = DataLoader(subset , batch_size = 16 , shuffle =False) #set shuffle to False
for image , label in trainloader:
print(image.size() , '\t' , label.size())
print(image[0], '\t' , label[0]) # index the specific sample
here is a useful link if you want to learn more about the Pytorch data loading utility
(https://pytorch.org/docs/stable/data.html)

How to deform/scale a 3 dimensional numpy array in one dimension?

I would like to deform/scale a three dimensional numpy array in one dimension. I will visualize my problem in 2D:
I have the original image, which is a 2D numpy array:
Then I want to deform/scale it for some factor in dimension 0, or horizontal dimension:
For PIL images, there are a lot of solutions, for example in pytorch, but what if I have a numpy array of shapes (w, h, d) = (288, 288, 468)? I would like to upsample the width with a factor of 1.04, for example, to (299, 288, 468). Each cell contains a normalized number between 0 and 1.
I am not sure, if I am simply not looking for the correct vocabulary, if I try to search online. So also correcting my question would help. Or tell me the mathematical background of this problem, then I can write the code on my own.
Thank you!
You can repeat the array along the specific axis a number of times equal to ceil(factor) where factor > 1 and then evenly space indices on the stretched dimension to select int(factor * old_length) elements. This does not perform any kind of interpolation but just repeats some of the elements:
import math
import cv2
import numpy as np
from scipy.ndimage import imread
img = imread('/tmp/example.png')
print(img.shape) # (512, 512)
axis = 1
factor = 1.25
stretched = np.repeat(img, math.ceil(factor), axis=axis)
print(stretched.shape) # (512, 1024)
indices = np.linspace(0, stretched.shape[axis] - 1, int(img.shape[axis] * factor))
indices = np.rint(indices).astype(int)
result = np.take(stretched, indices, axis=axis)
print(result.shape) # (512, 640)
cv2.imwrite('/tmp/stretched.png', result)
This is the result (left is original example.png and right is stretched.png):
Looks like it is as easy as using the torch.nn.functional.interpolate functional from pytorch and choosing 'trilinear' as interpolation mode:
import torch
PET = torch.tensor(data)
print("Old shape = {}".format(PET.shape))
scale_factor_x = 1.4
# Scaling.
PET = torch.nn.functional.interpolate(PET.unsqueeze(0).unsqueeze(0),\
scale_factor=(scale_factor_x, 1, 1), mode='trilinear').squeeze().squeeze()
print("New shape = {}".format(PET.shape))
output:
>>> Old shape = torch.Size([288, 288, 468])
>>> New shape = torch.Size([403, 288, 468])
I verified the results by looking at the data, but I can't show them here due to data privacy. Sorry!
This is an example for linear up-sampling a 3D Image with scipy.interpolate, hope it helps.
(I worked quite a lot with np.meshgrid here, if you not familiar with it i recently explained it here)
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy.interpolate import RegularGridInterpolator
# should be 1.3.0
print(scipy.__version__)
# =============================================================================
# producing a test image "image3D"
# =============================================================================
def some_function(x,y,z):
# output is a 3D Gaussian with some periodic modification
# its only for testing so this part is not impotent
out = np.sin(2*np.pi*x)*np.cos(np.pi*y)*np.cos(4*np.pi*z)*np.exp(-(x**2+y**2+z**2))
return out
# define a grid to evaluate the function on.
# the dimension of the 3D-Image will be (20,20,20)
N = 20
x = np.linspace(-1,1,N)
y = np.linspace(-1,1,N)
z = np.linspace(-1,1,N)
xx, yy, zz = np.meshgrid(x,y,z,indexing ='ij')
image3D = some_function(xx,yy,zz)
# =============================================================================
# plot the testimage "image3D"
# you will see 5 images that corresponds to the slicing of the
# z-axis similar to your example picture_
# https://sites.google.com/site/linhvtlam2/fl7_ctslices.jpg
# =============================================================================
def plot_slices(image_3d):
f, loax = plt.subplots(1,5,figsize=(15,5))
loax = loax.flatten()
for ii,i in enumerate([8,9,10,11,12]):
loax[ii].imshow(image_3d[:,:,i],vmin=image_3d.min(),vmax=image_3d.max())
plt.show()
plot_slices(image3D)
# =============================================================================
# interpolate the image
# =============================================================================
interpolation_function = RegularGridInterpolator((x, y, z), image3D, method = 'linear')
# =============================================================================
# evaluate at new grid
# =============================================================================
# create the new grid that you want
x_new = np.linspace(-1,1,30)
y_new = np.linspace(-1,1,40)
z_new = np.linspace(-1,1,N)
xx_new, yy_new, zz_new = np.meshgrid(x_new,y_new,z_new,indexing ='ij')
# change the order of the points to match the input shape of the interpolation
# function. That's a bit messy but i couldn't figure out a way around that
evaluation_points = np.rollaxis(np.array([xx_new,yy_new,zz_new]),0,4)
interpolated = interpolation_function(evaluation_points)
plot_slices(interpolated)
The original (20,20,20) dimensional 3D Image:
And the upsampeled (30,40,20) dimensional 3D Image:

Get gradient value necessary to break an image

I've been experimenting with adversarial images and I read up on the fast gradient sign method from the following link https://arxiv.org/pdf/1412.6572.pdf...
The instructions explain that the necessary gradient can be calculated using backpropagation...
I've been successful at generating adversarial images but I have failed at attempting to extract the gradient necessary to create an adversarial image. I will demonstrate what I mean.
Let us assume that I have already trained my algorithm using logistic regression. I restore the model and I extract the number I wish to change into a adversarial image. In this case it is the number 2...
# construct model
logits = tf.matmul(x, W) + b
pred = tf.nn.softmax(logits)
...
...
# assign the images of number 2 to the variable
sess.run(tf.assign(x, labels_of_2))
# setup softmax
sess.run(pred)
# placeholder for target label
fake_label = tf.placeholder(tf.int32, shape=[1])
# setup the fake loss
fake_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=fake_label)
# minimize fake loss using gradient descent,
# calculating the derivatives of the weight of the fake image will give the direction of weights necessary to change the prediction
adversarial_step = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate).minimize(fake_loss, var_list=[x])
# continue calculating the derivative until the prediction changes for all 10 images
for i in range(FLAGS.training_epochs):
# fake label tells the training algorithm to use the weights calculated for number 6
sess.run(adversarial_step, feed_dict={fake_label:np.array([6])})
sess.run(pred)
This is my approach, and it works perfectly. It takes my image of number 2 and changes it only slightly so that when I run the following...
x_in = np.expand_dims(x[0], axis=0)
classification = sess.run(tf.argmax(pred, 1))
print(classification)
it will predict the number 2 as a number 6.
The issue is, I need to extract the gradient necessary to trick the neural network into thinking number 2 is 6. I need to use this gradient to create the nematode mentioned above.
I am not sure how can I extract the gradient value. I tried looking at tf.gradients but I was unable to figure out how to produce an adversarial image using this function. I implemented the following after the fake_loss variable above...
tf.gradients(fake_loss, x)
for i in range(FLAGS.training_epochs):
# calculate gradient with weight of number 6
gradient_value = sess.run(gradients, feed_dict={fake_label:np.array([6])})
# update the image of number 2
gradient_update = x+0.007*gradient_value[0]
sess.run(tf.assign(x, gradient_update))
sess.run(pred)
Unfortunately the prediction did not change in the way I wanted, and moreover this logic resulted in a rather blurry image.
I would appreciate an explanation as to what I need to do in order calculate and extract the gradient that will trick the neural network, so that if I were to take this gradient and apply it to my image as a nematode, it will result in a different prediction.
Why not let the Tensorflow optimizer add the gradients to your image? You can still evaluate the nematode to get the resulting gradients that were added.
I created a bit of sample code to demonstrate this with a panda image. It uses the VGG16 neural network to transform your own panda image into a "goldfish" image. Every 100 iterations it saves the image as PDF so you can print it losslessly to check if your image is still a goldfish.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipyd
from libs import vgg16 # Download here! https://github.com/pkmital/CADL/tree/master/session-4/libs
pandaimage = plt.imread('panda.jpg')
pandaimage = vgg16.preprocess(pandaimage)
plt.imshow(pandaimage)
img_4d = np.array([pandaimage])
g = tf.get_default_graph()
input_placeholder = tf.Variable(img_4d,trainable=False)
to_add_image = tf.Variable(tf.random_normal([224,224,3], mean=0.0, stddev=0.1, dtype=tf.float32))
combined_images_not_clamped = input_placeholder+to_add_image
filledmax = tf.fill(tf.shape(combined_images_not_clamped), 1.0)
filledmin = tf.fill(tf.shape(combined_images_not_clamped), 0.0)
greater_than_one = tf.greater(combined_images_not_clamped, filledmax)
combined_images_with_max = tf.where(greater_than_one, filledmax, combined_images_not_clamped)
lower_than_zero =tf.less(combined_images_with_max, filledmin)
combined_images = tf.where(lower_than_zero, filledmin, combined_images_with_max)
net = vgg16.get_vgg_model()
tf.import_graph_def(net['graph_def'], name='vgg')
names = [op.name for op in g.get_operations()]
style_layer = 'prob:0'
the_prediction = tf.import_graph_def(
net['graph_def'],
name='vgg',
input_map={'images:0': combined_images},return_elements=[style_layer])
goldfish_expected_np = np.zeros(1000)
goldfish_expected_np[1]=1.0
goldfish_expected_tf = tf.Variable(goldfish_expected_np,dtype=tf.float32,trainable=False)
loss = tf.reduce_sum(tf.square(the_prediction[0]-goldfish_expected_tf))
optimizer = tf.train.AdamOptimizer().minimize(loss)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
def show_many_images(*images):
fig = plt.figure()
for i in range(len(images)):
print(images[i].shape)
subplot_number = 100+10*len(images)+(i+1)
plt.subplot(subplot_number)
plt.imshow(images[i])
plt.show()
for i in range(1000):
_, loss_val = sess.run([optimizer,loss])
if i%100==1:
print("Loss at iteration %d: %f" % (i,loss_val))
_, loss_val,adversarial_image,pred,nematode = sess.run([optimizer,loss,combined_images,the_prediction,to_add_image])
res = np.squeeze(pred)
average = np.mean(res, 0)
res = res / np.sum(average)
plt.imshow(adversarial_image[0])
plt.show()
print([(res[idx], net['labels'][idx]) for idx in res.argsort()[-5:][::-1]])
show_many_images(img_4d[0],nematode,adversarial_image[0])
plt.imsave('adversarial_goldfish.pdf',adversarial_image[0],format='pdf') # save for printing
Let me know if this helps you!

Resources