Calculating the Hessian Vector Product of a Flax NN output wrt to the inputs - jax

I am trying to get the second derivative of the output w.r.t the input of a neural network built using Flax. The network is structured as follows:
import numpy as np
import jax
import jax.numpy as jnp
import flax.linen as nn
import optax
from flax import optim
class MLP(nn.Module):
features: Sequence[int]
#nn.compact
def __call__(self, x):
for feat in self.features[:-1]:
x = nn.tanh(nn.Dense(feat)(x))
x = nn.Dense(self.features[-1])(x)
return x
model = MLP([20, 20, 20, 20, 20, 1])
batch = jnp.ones((32, 3)) #Dummy input to Initialize the NN
params = model.init(jax.random.PRNGKey(0), batch)
X = jnp.ones((32, 3))
output = model.apply(params, X)
I can get the single derivative by using vmap over grad :
#jit
def u_function(params, X):
u = model.apply(params, X)
return jnp.squeeze(u)
grad_fn = vmap(grad(u_function, argnums=1), in_axes=(None, 0), out_axes=(0))
u_X = vmap(grad(u_function, argnums=1), in_axes=(None, 0), out_axes=(0))(params, X)
However, when I try to do this again to obtain the second derivative :
u_X_func = vmap(grad(u_function, argnums=1), in_axes=(None, 0), out_axes=(0))
u_XX_func = vmap(grad(u_X_func, argnums=1), in_axes=(None, 0), out_axes=(0))(params, X)
I get the folllowing error:
[/usr/local/lib/python3.7/dist-packages/flax/linen/linear.py](https://localhost:8080/#) in __call__(self, inputs)
186 kernel = self.param('kernel',
187 self.kernel_init,
--> 188 (jnp.shape(inputs)[-1], self.features),
189 self.param_dtype)
190 if self.use_bias:
IndexError: tuple index out of range
I tried using the hvp definition from the autodiff cookbook, but with params being an input to the function just wasnt sure how to proceed.
Any help on this would be really appreciable.

The issue is that your u_function maps a length-3 vector to a scalar. The first derivative of this is a length-3 vector, but the second derivative of this is a 3x3 hessian matrix, which you cannot compute via jax.grad, which is only designed for scalar-output functions. Fortunately JAX provides the jax.hessian transform to compute these general second derivatives:
u_XX = vmap(hessian(u_function, argnums=1), in_axes=(None, 0), out_axes=(0))(params, X)
print(u_XX.shape)
# (32, 3, 3)

Related

How to move feature maps across images using slicing?

I am trying to implement the online algorithm of this paper, which is on video classification. This work moves 1/8 of channel feature maps from each image, into the next image, after each convolution operation. The image of the operation has been attached here -
While trying to implement the same, I have succeeded in extracting out the first 1/8 channel feature maps, but I don't know how to add them to the succeeding image. My code has been attached below -
import cv2
import gym
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
N = 1 # Batch Size
T = 5 # Time Steps. This means that there are 5 frames in the video
C = 3 # RGB Channels
H = 144 # Height
W = 144 # Width
foo = torch.randn(N*T, C, H, W)
print("Shape of foo = ", foo.shape)
#torch.Size([5, 3, 144, 144])
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 8, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
print("Shape of x = ", x.shape)
# torch.Size([5, 8, 140, 140])
shape_extract = x[:, :1,:,:]
print("Shape of extract = ", shape_extract.shape)
# torch.Size([5, 1, 140, 140])
# 1/8 of the channels have been extracted out from above. But how do I transfer these channel features to the next image?
return x
net = Net()
output = net(foo)
Since your whole sequence is inside the batch, you can shift the layers using torch.roll the elements on the first axis.
>>> rolled = x.roll(shifts=1, dims=1)
Going from this layer layout on axis=1:
[x_0, x_1, x_2, x_3, ..., x_7]
to this one:
[x_7, x_0, x_1, x_2, ..., x_6]
Then replacing the first element by x_0:
>>> rolled[:, 0] = x[:, 0]
Resulting in this layout:
[x_0, x_0, x_1, x_2, ..., x_6]
Then you can input tensor rolled into the next layer.
You can implement a custom layer to wrap this logic:
class ShiftLayer(nn.Module):
def forward(self, x):
out = x.roll(1, 1)
out[:, 0] = x[:, 0]
return out
Then use it inside your model:
class Net(nn.Module):
def __init__(self):
super().__init__()
...
self.shift = ShiftLayer()
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.shift(x)
x = F.relu(self.conv2(x))
return x

Pytorch: convolve a sample with a specific filter

Given a batch of samples, I would like to convolve each of them with different filters. I have implemented the idea with keras and the code works:
import keras.backend as K
def single_conv(tupl):
inp, kernel = tupl
outputs = K.conv1d(inp, kernel, padding='same')
return outputs
# inputs and filters are given in some way
res = K.squeeze(K.map_fn(single_conv, (inputs, filters), dtype=K.floatx()), axis=1)
Is there any way to do this with pytorch?
You can try this
import torch.nn as nn
import torch
conv2d = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3)
inp = torch.ones((1, 3, 5, 5))
conv2d.weight = nn.Parameter(torch.ones((3, 3, 3, 3))) # You can set anything you want.
model = nn.Sequential(conv2d)
res = model(inp)
print(res.shape)
# print(res)
You can convolve it with whatever filter you want.

How to solve the RuntimeError when using torch.utils.tensorboard to add a graph

I am trying to use tensorboard to visualize my pytorch model and encounter a problem. The input tensor's shape is (-1, 1, 20, 15) and the output tensor's shape is (-1, 6). My model combines a list of 5 convolutional networks.
packages:
python: 3.7.6
pytorch: 1.4.0
tensorboard: 2.1.0
The pytorch model is as below:
import torch
from torch import nn
from torch.nn import functional as F
class MyModel(nn.Module):
"""example"""
def __init__(self, nchunks=[2, 5, 3, 2, 3], resp_size=6):
super().__init__()
self.nchunks = nchunks
self.conv = [nn.Conv2d(1, 2, (2, x)) for x in nchunks]
self.pool = nn.Sequential(
nn.AdaptiveMaxPool1d(output_size=10), nn.Flatten(start_dim=1)
)
self.bn = nn.BatchNorm1d(100)
self.fc1 = nn.Linear(100, 100)
self.fc2 = nn.Linear(100, 100)
self.fc3 = nn.Linear(100, resp_size)
def forward(self, x):
xi = torch.split(x, self.nchunks, dim=3)
xi = [f(subx.float()).view(-1, 2, 19) for f, subx in zip(self.conv, xi)]
xi = [self.pool(subx) for subx in xi]
xi = torch.cat(xi, dim=1)
xi = self.bn(xi)
xi = F.relu(self.fc1(xi))
xi = F.relu(self.fc2(xi))
xi = self.fc3(xi)
return xi
Here is the code for the tensorboard summary writer:
from torch.utils.tensorboard import SummaryWriter
x = torch.rand((5,1,20,15))
model = MyModel()
writer = SummaryWriter('logs')
writer.add_graph(model, x)
Such an error is returned:
RuntimeError: Cannot insert a Tensor that requires grad as a constant. Consider making it a parameter or input, or detaching the gradient
Tensor:
(1,1,.,.) =
-0.2108 -0.4986
-0.4009 -0.1910
(2,1,.,.) =
0.2383 -0.4147
0.2642 0.0456
[ torch.FloatTensor{2,1,2,2} ]
I guess the model has some issues, but I am not sure what happens.
This similar github issue does not relate to my problem because I am not using multi GPUs.
I solved the problem by replacing
[nn.Conv2d(1, 2, (2, x)) for x in nchunks]
with
nn.ModuleList([nn.Conv2d(1, 2, (2, x)) for x in nchunks])

how to get the real shape of batch_size which is none in keras

When implementing a custom layer in Keras, I need to know the real size of batch_size. my shape is (?,20).
questions:
1. What is the best way to change (?,20) to (batch_size,20).
I have looked into this but it can not adjust to my problem.
I can pass the batch_size to this layer. In that case, I need to reshape (?,20) to (batch_size,20), how can I do that?
2. Is it the best way to that, or is there any builtin function that can get the real batch_size while building and running the model?
This is my layer:
from scipy.stats import entropy
from keras.engine import Layer
import keras.backend as K
import numpy as np
class measure(Layer):
def __init__(self, beta, **kwargs):
self.beta = beta
self.uses_learning_phase = True
self.supports_masking = True
super(measure, self).__init__(**kwargs)
def call(self, x):
return K.in_train_phase(self.rev_entropy(x, self.beta), x)
def get_config(self):
config = {'beta': self.beta}
base_config = super(measure, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def rev_entropy(self, x, beta):
entropy_p_t_w = np.apply_along_axis(entropy, 1, x)
con = (beta / (1 + entropy_p_t_w)) ** 1.5
new_f_w_t = x * (con.reshape(con.shape[0], 1))
norm_const = 1e-30 + np.sum(new_f_w_t, axis=0)
for t in range(norm_const.shape[0]):
new_f_w_t[:, t] /= norm_const[t]
return new_f_w_t
And here is where I call this layer:
encoded = measure(beta=0.08)(encoded)
I am also using fit_generator if it can help at all:
autoencoder.fit_generator(train_gen, steps_per_epoch=num_train_steps, epochs=NUM_EPOCHS,
validation_data=test_gen, validation_steps=num_test_steps, callbacks=[checkpoint])
The dimension of the x passed to the layer is (?,20) and that's why I can not do my calculation.
Thanks:)

Pytorch: How to compute IoU (Jaccard Index) for semantic segmentation

Can someone provide a toy example of how to compute IoU (intersection over union) for semantic segmentation in pytorch?
As of 2021, there's no need to implement your own IoU, as torchmetrics comes equipped with it - here's the link.
It is named torchmetrics.JaccardIndex (previously torchmetrics.IoU) and calculates what you want.
It works with PyTorch and PyTorch Lightning, also with distributed training.
From the documentation:
torchmetrics.JaccardIndex(num_classes, ignore_index=None, absent_score=0.0, threshold=0.5, multilabel=False, reduction='elementwise_mean', compute_on_step=None, **kwargs)
Computes Intersection over union, or Jaccard index calculation:
J(A,B) = \frac{|A\cap B|}{|A\cup B|}
Where: A and B are both tensors of the same size, containing integer class values. They may be subject to conversion from input data (see description below). Note that it is different from box IoU.
Works with binary, multiclass and multi-label data. Accepts probabilities from a model output or integer class values in prediction. Works with multi-dimensional preds and target.
Forward accepts
preds (float or long tensor): (N, ...) or (N, C, ...) where C is the number of classes
target (long tensor): (N, ...) If preds and target
are the same shape and preds is a float tensor, we use the
self.threshold argument to convert into integer labels. This is the case for binary and multi-label probabilities.
If preds has an extra dimension as in the case of multi-class scores we perform an argmax on dim=1.
Official example:
>>> from torchmetrics import JaccardIndex
>>> target = torch.randint(0, 2, (10, 25, 25))
>>> pred = torch.tensor(target)
>>> pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15]
>>> jaccard = JaccardIndex(num_classes=2)
>>> jaccard(pred, target)
tensor(0.9660)
I found this somewhere and adapted it for me. I'll post the link if I can find it again. Sorry in case this was a dublicate.
The key function here is the function called iou. The wrapping function evaluate_performance is not universal, but it shows that one needs to iterate over all results before computing IoU.
import torch
import pandas as pd # For filelist reading
import myPytorchDatasetClass # Custom dataset class, inherited from torch.utils.data.dataset
def iou(pred, target, n_classes = 12):
ious = []
pred = pred.view(-1)
target = target.view(-1)
# Ignore IoU for background class ("0")
for cls in xrange(1, n_classes): # This goes from 1:n_classes-1 -> class "0" is ignored
pred_inds = pred == cls
target_inds = target == cls
intersection = (pred_inds[target_inds]).long().sum().data.cpu()[0] # Cast to long to prevent overflows
union = pred_inds.long().sum().data.cpu()[0] + target_inds.long().sum().data.cpu()[0] - intersection
if union == 0:
ious.append(float('nan')) # If there is no ground truth, do not include in evaluation
else:
ious.append(float(intersection) / float(max(union, 1)))
return np.array(ious)
def evaluate_performance(net):
# Dataloader for test data
batch_size = 1
filelist_name_test = '/path/to/my/test/filelist.txt'
data_root_test = '/path/to/my/data/'
dset_test = myPytorchDatasetClass.CustomDataset(filelist_name_test, data_root_test)
test_loader = torch.utils.data.DataLoader(dataset=dset_test,
batch_size=batch_size,
shuffle=False,
pin_memory=True)
data_info = pd.read_csv(filelist_name_test, header=None)
num_test_files = data_info.shape[0]
sample_size = num_test_files
# Containers for results
preds = Variable(torch.zeros((sample_size, 60, 36, 60)))
gts = Variable(torch.zeros((sample_size, 60, 36, 60)))
dataiter = iter(test_loader)
for i in xrange(sample_size):
images, labels, filename = dataiter.next()
images = Variable(images).cuda()
labels = Variable(labels)
gts[i:i+batch_size, :, :, :] = labels
outputs = net(images)
outputs = outputs.permute(0, 2, 3, 4, 1).contiguous()
val, pred = torch.max(outputs, 4)
preds[i:i+batch_size, :, :, :] = pred.cpu()
acc = iou(preds, gts)
return acc
Say your outputs are of shape [32, 256, 256] # 32 is the minibatch size and 256x256 is the image's height and width, and the labels are also the same shape.
Then you can use sklearn's jaccard_similarity_score after some reshaping.
If both are torch tensors, then:
lbl = labels.cpu().numpy().reshape(-1)
target = output.cpu().numpy().reshape(-1)
Now:
from sklearn.metrics import jaccard_similarity_score as jsc
print(jsc(target,lbl))

Resources