Torchscripting a module with _ConvNd in forward - pytorch

I am using PyTorch 1.4 and need to export a model with convolutions inside a loop in forward:
class MyCell(torch.nn.Module):
def __init__(self):
super(MyCell, self).__init__()
def forward(self, x):
for i in range(5):
conv = torch.nn.Conv1d(1, 1, 2*i+3)
x = torch.nn.Relu()(conv(x))
return x
torch.jit.script(MyCell())
This gives the following error:
RuntimeError:
Arguments for call are not valid.
The following variants are available:
_single(float[1] x) -> (float[]):
Expected a value of type 'List[float]' for argument 'x' but instead found type 'Tensor'.
_single(int[1] x) -> (int[]):
Expected a value of type 'List[int]' for argument 'x' but instead found type 'Tensor'.
The original call is:
File "***/torch/nn/modules/conv.py", line 187
padding=0, dilation=1, groups=1,
bias=True, padding_mode='zeros'):
kernel_size = _single(kernel_size)
~~~~~~~ <--- HERE
stride = _single(stride)
padding = _single(padding)
'Conv1d.__init__' is being compiled since it was called from 'Conv1d'
File "***", line ***
def forward(self, x):
for _ in range(5):
conv = torch.nn.Conv1d(1, 1, 2*i+3)
~~~~~~~~~~~~~~~ <--- HERE
x = torch.nn.Relu()(conv(x))
return x
'Conv1d' is being compiled since it was called from 'MyCell.forward'
File "***", line ***
def forward(self, x, h):
for _ in range(5):
conv = torch.nn.Conv1d(1, 1, 2*i+3)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
x = torch.nn.Relu()(conv(x))
return x
I have also tried pre-defining the conv's then putting them in a list inside __init__, but such a type is not allowed by TorchScript:
class MyCell(torch.nn.Module):
def __init__(self):
super(MyCell, self).__init__()
self.conv = [torch.nn.Conv1d(1, 1, 2*i+3) for i in range(5)]
def forward(self, x):
for i in range(len(self.conv)):
x = torch.nn.Relu()(self.conv[i](x))
return x
torch.jit.script(MyCell())
This instead gives:
RuntimeError:
Module 'MyCell' has no attribute 'conv' (This attribute exists on the Python module, but we failed to convert Python type: 'list' to a TorchScript type.):
File "***", line ***
def forward(self, x):
for i in range(len(self.conv)):
~~~~~~~~~ <--- HERE
x = torch.nn.Relu()(self.conv[i](x))
return x
So how to export this module? Background: I am exporting Mixed-scale Dense Networks (source) to TorchScript; while nn.Sequential may work for this simplified case, practically I need to convolve with all the historical convolution outputs in each iteration, which is more than chaining the layers.

As alternative to [https://stackoverflow.com/users/6210807/kharshit] suggestion, you can define network functional way:
class MyCell(torch.nn.Module):
def __init__(self):
super(MyCell, self).__init__()
self.w = []
for i in range(5):
self.w.append( torch.Tensor( 1, 1, 2*i+3 ) )
# init w[i] here, maybe make it "requires grad"
def forward(self, x):
for i in range(5):
x = torch.nn.functional.conv1d( x, self.w[i] )
x = torch.nn.functional.relu( x )
return x

You can use nn.ModuleList() in the following way.
Also, note that you can't subscript nn.ModuleList currently probably due to a bug as mentioned in issue#16123, but use the workaround as mentioned below.
class MyCell(nn.Module):
def __init__(self):
super(MyCell, self).__init__()
self.conv = nn.ModuleList([torch.nn.Conv1d(1, 1, 2*i+3) for i in range(5)])
self.relu = nn.ReLU()
def forward(self, x):
for mod in self.conv:
x = self.relu(mod(x))
return x
>>> torch.jit.script(MyCell())
RecursiveScriptModule(
original_name=MyCell
(conv): RecursiveScriptModule(
original_name=ModuleList
(0): RecursiveScriptModule(original_name=Conv1d)
(1): RecursiveScriptModule(original_name=Conv1d)
(2): RecursiveScriptModule(original_name=Conv1d)
(3): RecursiveScriptModule(original_name=Conv1d)
(4): RecursiveScriptModule(original_name=Conv1d)
)
(relu): RecursiveScriptModule(original_name=ReLU)
)

Related

Custom layer from keras to pytorch

Coming from TensorFlow background, I am trying to convert a snippet of code of the custom layer from Keras to PyTorch.
The custom layer in Keras looks like this:
class Attention_module(tf.keras.layers.Layer):
def __init__(self, class_num):
super(Attention_module,self).__init__(class_num)
self.class_num = class_num
self.Ws = None
def build(self, input_shape):
embedding_length = int(input_shape[2])
self.Ws = self.add_weight(shape=(self.class_num, embedding_length),
initializer=tf.keras.initializers.get('glorot_uniform'), trainable=True)
super(Attention_module, self).build(input_shape)
def call(self, inputs):
sentence_trans = tf.transpose(inputs, [0, 2, 1])
at = tf.matmul(self.Ws, sentence_trans)
at = tf.math.tanh(at)
at = K.exp(at - K.max(at, axis=-1, keepdims=True))
at = at / K.sum(at, axis=-1, keepdims=True)
v = K.batch_dot(at, inputs)
return v
I want to implement the same in the torch; I have already done the forward pass block but am confused about how to do the embedding and weight initialization the same as the above layer in PyTorch?
class Attention_module(torch.nn.Module):
def __init__(self, class_num):
# how to initialize weight with same as above keras layer?
def forward(self, inputs):
sentence_trans = inputs.permute(0, 2, 1)
at = torch.mm(self.Ws, sentence_trans)
at = torch.nn.Tanh(at)
at = torch.exp(at - torch.max(torch.Tensor(at), dim=-1, keepdims=True).values)
at = at / torch.sum(at, dim = -1, keepdims=True)
v = torch.einsum('ijk,ikl->ijl', at, inputs)
return v
Thank you!
class Attention_module(torch.nn.Module):
def __init__(self, class_num, input_shape):
super().__init__()
self.class_num = class_num
embedding_length = int(input_shape[2])
self.Ws = torch.nn.Embedding(num_embeddings=class_num,
embedding_dim=embedding_length) # Embedding layer
torch.nn.init.xavier_uniform_(self.Ws.weight) # Glorot initialization
Here's the reference for layer initialization methods. Xavier init is another name for Glorot init.
The _ at the end of torch.nn.init.xavier_uniform_ is a pytorch convention that signifies an inplace operation.
You can also use torch.nn.init at runtime. It doesn't have to be within __init__(). Like:
att = Attention_module(class_num, input_shape)
torch.nn.init.xavier_uniform_(att.Ws.weight)
or :
for param in att.parameters():
torch.nn.init.xavier_uniform_(param)

Learnable scalar weight in PyTorch

I have two neural networks running in parallel. Each gives a features map of same size say Nx1. Now I want weighted average of these embedding like this w1 * embed1 + w2 * embed2. I have tried these 1 2.But the weights are not updating. Any help would be appreciated. Here is how I am trying to do it:
class LinearWeightedAvg(nn.Module):
def __init__(self, n_inputs):
super(LinearWeightedAvg, self).__init__()
self.weight1 = Variable(torch.randn(1), requires_grad=True).cuda()
self.weight2 = Variable(torch.randn(1), requires_grad=True).cuda()
def forward(self, inp_embed):
return self.weight1 * inp_embed[0] + self.weight2 * inp_embed[1]
class EmbedBranch(nn.Module):
def __init__(self, feat_dim, embedding_dim):
super(EmbedBranch, self).__init__()
fc_layer1 = fc_layer
def forward(self, x):
x = self.fc_layer1(x)
return x
class EmbeddingNetwork(nn.Module):
def __init__(self, args, N):
super(EmbeddingNetwork, self).__init__()
embedding_dim = N
self.embed1 = EmbedBranch(N, N)
self.embed2 = EmbedBranch(N, N)
self.comb_branch = LinearWeightedAvg(metric_dim)
self.args = args
if args.cuda:
self.cuda()
def forward(self, emb1, emb2):
embeds1 = self.text_branch(emb1)
embeds2 = self.image_branch(emb2)
combined = self.comb_branch([embeds1, embeds2])
return combined
def train_forward(self, embed1, embed2):
combined = self(embed1, embed2)
embeds = model.train_forward(embed1, embed2)
loss = loss_func(embeds, labels)
running_loss.update(loss.data.item())
optimizer.zero_grad()
loss.backward()
Also I want the weight to be within 0-1 range.
Thanks,
You should use self.weightx = torch.nn.Parameter(your_inital_tensor) to register a tensor as a learnable parameter of the model.

In Pytorch, when transferring to GPU, I get an error "is on CPU, but expected to be on GPU"

Error example: "Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU". I was stuck on the tutorial for classification:
https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
Note: The code is for regression.
Code is below:
class Net(nn.Module):
def __init__(self, num_features, size_hidden_layer, n_hidden_layer):
super(Net, self).__init__()
self.size_hidden_layer = size_hidden_layer
self.n_hidden_layer = n_hidden_layer
self.hidden_layers = list()
self.hidden_layers.append(nn.Linear(num_features, size_hidden_layer))
for _ in range(n_hidden_layer-1):
self.hidden_layers.append(nn.Linear(size_hidden_layer, size_hidden_layer))
self.last_layer = nn.Linear(size_hidden_layer, 1)
def forward(self, x):
for i in range(self.n_hidden_layer):
x = torch.relu(self.hidden_layers[i](x))
return self.last_layer(x)
What does the tutorial section not mention is that the parameters have to be wrapped in order to be read by the GPU. For example, look at __init__ where normal and neural network layers are wrapped in nn.Sequential.
class Net(nn.Module):
def __init__(self, num_features, size_hidden_layer, n_hidden_layer):
super(Net, self).__init__()
self.size_hidden_layer = size_hidden_layer
self.n_hidden_layer = n_hidden_layer
hidden_layers = list()
hidden_layers.append(nn.Linear(num_features, size_hidden_layer))
for _ in range(n_hidden_layer-1):
hidden_layers.append(nn.Linear(size_hidden_layer, size_hidden_layer))
self.hidden_layers = nn.Sequential(*hidden_layers)
self.last_layer = nn.Linear(size_hidden_layer, 1)
def forward(self, x):
for i in range(self.n_hidden_layer):
x = torch.relu(self.hidden_layers[i](x))
return self.last_layer(x)

Using Captum with Pytorch Lightning?

So I tried to use Captum with PyTorch Lightning. I am having issues when passing the Module to Captum, since it seems to do weird reshaping of the tensors.
For example in the below minimal example, the lightning code works easy and well.
But when I use IntegratedGradient with "n_step>=1" I get an issue.
The code of the LighningModule is not that important I would say, I wonder more at the code line at the very bottom.
Does anyone know how to work around this?
from captum.attr import IntegratedGradients
from torch import nn, optim, rand, sum as tsum, reshape, device
import torch.nn.functional as F
from pytorch_lightning import seed_everything, LightningModule, Trainer
from torch.utils.data import DataLoader, Dataset
SAMPLE_DIM = 3
class CustomDataset(Dataset):
def __init__(self, samples=42):
self.dataset = rand(samples, SAMPLE_DIM).cuda().float() * 2 - 1
def __getitem__(self, index):
return (self.dataset[index], (tsum(self.dataset[index]) > 0).cuda().float())
def __len__(self):
return self.dataset.size()[0]
class OurModel(LightningModule):
def __init__(self):
super(OurModel, self).__init__()
# Network layers
self.linear = nn.Linear(SAMPLE_DIM, 2048)
self.linear2 = nn.Linear(2048, 1)
self.output = nn.Sigmoid()
# Hyper-parameters, that we will auto-tune using lightning!
self.lr = 0.001
self.batch_size = 512
def forward(self, x):
x = self.linear(x)
x = self.linear2(x)
output = self.output(x)
return reshape(output, (-1,))
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=self.lr)
def train_dataloader(self):
loader = DataLoader(CustomDataset(samples=1000), batch_size=self.batch_size, shuffle=True)
return loader
def training_step(self, batch, batch_nb):
x, y = batch
loss = F.binary_cross_entropy(self(x), y)
return {'loss': loss, 'log': {'train_loss': loss}}
if __name__ == '__main__':
seed_everything(42)
device = device("cuda")
model = OurModel().to(device)
trainer = Trainer(max_epochs=2, min_epochs=1, auto_lr_find=False,
progress_bar_refresh_rate=10)
trainer.fit(model)
# ok Now the Problem
test_input = CustomDataset(samples=1).__getitem__(0)[0].requires_grad_()
ig = IntegratedGradients(model)
attr, delta = ig.attribute(test_input, target=1, return_convergence_delta=True)
The solution was to wrap the forward function. Make sure that the shape going into the mode.foward() is correct!
# Solution is this wrapper function
def modified_f(in_vec):
# Shape here is wrong
print("IN:", in_vec.size())
x = torch.reshape(in_vec, (int(in_vec.size()[0]/SAMPLE_DIM), SAMPLE_DIM))
print("x:", x.size())
res = model.forward(x)
print("res:", res.size())
res = torch.reshape(res, (res.size()[0], 1))
print("res2:", res.size())
return res
ig = IntegratedGradients(modified_f)
attr, delta = ig.attribute(test_input, return_convergence_delta=True, n_steps=STEP_AMOUNT)

optimizer got an empty parameter list (skorch)

So, I am used to use PyTorch and now decided to give Skorch a shot.
Here they define the network as
class ClassifierModule(nn.Module):
def __init__(
self,
num_units=10,
nonlin=F.relu,
dropout=0.5,
):
super(ClassifierModule, self).__init__()
self.num_units = num_units
self.nonlin = nonlin
self.dropout = dropout
self.dense0 = nn.Linear(20, num_units)
self.nonlin = nonlin
self.dropout = nn.Dropout(dropout)
self.dense1 = nn.Linear(num_units, 10)
self.output = nn.Linear(10, 2)
def forward(self, X, **kwargs):
X = self.nonlin(self.dense0(X))
X = self.dropout(X)
X = F.relu(self.dense1(X))
X = F.softmax(self.output(X), dim=-1)
return X
I prefer inputting lists of neurons in each layer i.e num_units=[30,15,5,2] would have 2 hidden layers with 15 and 5 neurons. Furthermore we have 30 features and 2 classes, thus re-writing it to something like this
class Net(nn.Module):
def __init__(
self,
num_units=[30,15,5,2],
nonlin=[F.relu,F.relu,F.relu],
dropout=[0.5,0.5,0.5],
):
super(Net, self).__init__()
self.layer_units = layer_units
self.nonlin = nonlin #Activation function
self.dropout = dropout #Drop-out rates in each layer
self.layers = [nn.Linear(i,p) for i,p in zip(layer_units,layer_units[1:])] #Dense layers
def forward(self, X, **kwargs):
print("Forwards")
for layer,func,drop in zip(self.layers[:-1],self.nonlin,self.dropout):
print(layer,func,drop)
X=drop(func(layer(X)))
X = F.softmax(X, dim=-1)
return X
should do the trick. The problem is that when calling
net = NeuralNetClassifier(Net,max_epochs=20,lr=0.1,device="cuda")
net.fit(X,y)
I get the error "ValueError: optimizer got an empty parameter list". I have narrowed it down to the removal of self.output = nn.Linear(10, 2) simply makes the net not enter forward i.e it seems like output is some kind of "trigger" variable. Is that really the case the network need a variable called output (being a layer) at the end, and that we are not free to define the variable-names ourself ?
Pytorch will look for subclasses of nn.Module, so changing
self.layers = [nn.Linear(i,p) for i,p in zip(layer_units,layer_units[1:])]
to
self.layers = nn.ModuleList([nn.Linear(i,p) for i,p in zip(layer_units,layer_units[1:])])
should work fine

Resources