This model works in PyTorch however, after exporting it with PyTorch to Onnx format, the onnx runtime crashes with a 'Trilu NOT_IMPLEMENTED error' when loading it in. (I do not have this issue for my other models that use torch.tril() )
How do I make this model run in the Onnxruntime?
This is a visualisation of the Onnx graph of the Model.
The Model in PyTorch
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
def forward(self, item_seq):
attention_mask = item_seq < 100
tril_mask = torch.tril(attention_mask)
query_layer = torch.rand((1, 2, 2, 32))
key_layer = torch.rand((1, 2, 32, 2))
attention_scores = torch.matmul(query_layer, key_layer)
return attention_scores + tril_mask
model = MyModel()
model.eval()
x_train = torch.ones([1, 2], dtype=torch.long)
# demonstrate that eager works
print(model.forward(x_train))
bigmodel_onnx_filename = 'mymodel.onnx'
torch.onnx.export(
model,
x_train,
bigmodel_onnx_filename,
input_names=['x'],
output_names=['output'],
)
onnx.load(bigmodel_onnx_filename)
# Onnxruntime crashes when loading in the model
ort_sess = ort.InferenceSession(bigmodel_onnx_filename, providers=['CPUExecutionProvider'])
key = {'x': x_train.numpy()}
print(ort_sess.run(None, key))
This results in the following error for ort.InferenceSession():
NotImplemented: [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for Trilu(14) node with name '/net/Trilu'
How can I make this model run in the Onnxruntime?
[github: code to reproduce the error and the model.onnx file]
(https://github.com/bkersbergen/pytorch_onnx_runtime_error/blob/main/main.py)
I'm using python 3.9, these are the project requirements
torch==1.13.1
jupyter==1.0.0
onnxruntime==1.13.1
onnx==1.13.0
Torch nightly version 2.0.0.dev20230205 gave the same error
I then decided to implement my own tril function.
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
def forward(self, item_seq):
attention_mask = item_seq < 100
tril_mask = self.my_tril(attention_mask)
query_layer = torch.rand((1, 2, 2, 32))
key_layer = torch.rand((1, 2, 32, 2))
attention_scores = torch.matmul(query_layer, key_layer)
return attention_scores + tril_mask
def my_tril(self, x):
l = x.size(-1)
arange = torch.arange(l)
mask = arange.expand(l, l)
arange = arange.unsqueeze(-1)
mask = torch.le(mask, arange)
return x.masked_fill(mask == 0, 0)
but then I get a Where(9) node with name '/Where_1' NOT_IMPLEMENTED error. (?!)
The boolean output of torch.lt() as input for torch.tril() works with PyTorch's Eager and LIT mode. However it breaks the Onnx runtime with the "TRILU not implemented error".
I was able to work around it by casting the torch.tril() input to float():
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
def forward(self, item_seq):
attention_mask = torch.lt(item_seq, 100).float()
tril_mask = torch.tril(attention_mask)
query_layer = torch.rand((1, 2, 2, 32))
key_layer = torch.rand((1, 2, 32, 2))
attention_scores = torch.matmul(query_layer, key_layer)
return attention_scores + tril_mask
Based on this experience, my hypothesis is that the TRILU NOT_IMPLEMENTED error is only applicable when having BOOLEAN Tensors as input. The Onnxruntime then throws this generic TRILU NOT_IMPLEMENTED error making me believe that Onnx has no TRILU support at all, which is clearly not the case.
Related
Here is a simple example that results in an in-place operation error.
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from torch import optim
torch.autograd.set_detect_anomaly(True)
class Loss(nn.Module):
def __init__(self):
super(Loss, self).__init__()
def forward(self, x, target):
return x[0,0,0,0]
def block(in_channels, features, name):
return nn.Conv2d(in_channels=in_channels,
out_channels=features,
kernel_size=3,
padding=1,
bias=False)
class SharedNetwork(nn.Module):
def __init__(self):
super().__init__()
self.shared_layer = block(in_channels=3, features=1, name="wow")
def forward(self, x):
x = self.shared_layer(x)
return x
class Network1(nn.Module):
def __init__(self):
super().__init__()
self.conv = block(in_channels=1, features=1, name="wow-1")
def forward(self, x):
return self.conv(x)
class Network2(nn.Module):
def __init__(self):
super().__init__()
self.conv = block(in_channels=1, features=1, name="wow-2")
def forward(self, x):
return torch.sigmoid(self.conv(x))
shared_net = SharedNetwork()
net_1 = Network1()
segmentor = Network2()
optimizer = optim.Adam(list(shared_net.parameters()) + list(segmentor.parameters()), lr=1e-6)
optimizer_conf = optim.Adam(list(shared_net.parameters()), lr=1e-6)
loss_fn = Loss()
# 2. Run a forward pass
fake_data = torch.randint(0,255,(1, 3, 256, 256))/255
target_data_1 = torch.randint(0,255,(1, 3, 256, 256))/255
target_data_2 = torch.randint(0,255,(1, 3, 256, 256))/255
optimizer.zero_grad()
optimizer_conf.zero_grad()
features = shared_net(fake_data)
segmented = segmentor(features)
s_loss = loss_fn(segmented, target_data_2)
s_loss.backward(retain_graph=True)
optimizer.step()
out_1 = net_1(features)
loss = loss_fn(out_1, target_data_1)
loss.backward(retain_graph=False)
optimizer_conf.step()
Error message:
UserWarning: Error detected in ConvolutionBackward0. No forward pass information available. Enable detect anomaly during forward pass for more information. (Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:97.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [1, 3, 3, 3]] is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
I was able to solve the problem by changing the order of running the step function of optimizers.
optimizer_conf.zero_grad()
optimizer.zero_grad()
features = shared_net(fake_data)
segmented = segmentor(features)
s_loss = loss_fn(segmented, target_data_2)
s_loss.backward(retain_graph=True)
out_1 = net_1(features)
loss = loss_fn(out_1, target_data_1)
loss.backward(retain_graph=False)
optimizer_conf.step()
optimizer.step()
The following questions, however, remain:
How does the step method cause an inplace operation in convolution?
Why does moving the steps to the end of the file resolve this error?
NOTE: The loss function is used for simplicity, using dice-loss also results in the same error!
Before answering the question, I have to mention that it seems having multiple optimizers for one set of parameters is anti-pattern and it's better to be avoided.
How does the step method cause an inplace operation in convolution?
A: step method adds the gradients to the weights, so it does something like the following:
param.weight += param.grad
which can be interpreted as an in place operation
Why does moving the steps to the end of the file resolve this error?
A: Obviously, by moving the step method after the second backward method, the above-mentioned operation is not executed. As a result, there are no in-place operations and no errors raised due to their existence.
To sum up, it's best to have only one optimizer for one set of parameters, the previous example could coded in the following way:
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from torch import optim
torch.autograd.set_detect_anomaly(True)
class Loss(nn.Module):
def __init__(self):
super(Loss, self).__init__()
def forward(self, x, target):
return x[0,0,0,0]
def block(in_channels, features, name):
return nn.Conv2d(in_channels=in_channels,
out_channels=features,
kernel_size=(3,3),
padding=1,
bias=False)
class SharedNetwork(nn.Module):
def __init__(self):
super().__init__()
self.shared_layer = block(in_channels=3, features=1, name="wow")
def forward(self, x):
x = self.shared_layer(x)
return x
class Network1(nn.Module):
def __init__(self):
super().__init__()
self.conv = block(in_channels=1, features=1, name="wow-1")
def forward(self, x):
return self.conv(x)
class Network2(nn.Module):
def __init__(self):
super().__init__()
self.conv = block(in_channels=1, features=1, name="wow-2")
def forward(self, x):
return torch.sigmoid(self.conv(x))
torch.manual_seed(0)
shared_net = SharedNetwork()
net_1 = Network1()
net_2 = Network2()
shared_optimizer = optim.Adam(list(shared_net.parameters()), lr=1e-6)
net_1_optimizer = optim.Adam(list(net_1.parameters()), lr=1e-6)
net_2_optimizer = optim.Adam(list(segmentor.parameters()), lr=1e-6)
loss_fn = Loss()
# 2. Run a forward pass
fake_data = torch.randint(0,255,(1, 3, 256, 256))/255
target_data_1 = torch.randint(0,255,(1, 3, 256, 256))/255
target_data_2 = torch.randint(0,255,(1, 3, 256, 256))/255
net_2_optimizer.zero_grad()
features = shared_net(fake_data)
net_2_out = net_2(features)
s_loss = loss_fn(net_2_out, target_data_2)
s_loss.backward(retain_graph=True)
net_2_optimizer.step()
net_1_optimizer.zero_grad()
shared_optimizer.zero_grad()
out_1 = net_1(features)
loss = loss_fn(out_1, target_data_1)
loss.backward(retain_graph=False)
net_1_optimizer.step()
shared_optimizer.step()
Note: If you want to have two different learning rates for different losses applied to one set of parameters, you can multiply the losses based on their importance by a value. For example, you can multiply loss_1 by 0.1 and loss_1 by 0.5. Or, you can use backward hooks as mentioned in this comment:
backward-hook
I'm trying to convert the pytorch model to onnx. First I have a demo, but different numeric inputs result in different onnx models.
code:
import torch
from torch import nn
from openvino.runtime import Core
class TestNet(nn.Module):
def __init__(self, x):
super(TestNet, self).__init__()
self.x = x
def forward(self, x: torch.Tensor):
y, _ = x.min(dim=1, keepdim=True)
if y < self.x:
return torch.zeros((1, 1))
return y
dummy_input = torch.zeros((1, 5))
model = TestNet(1)
onnx_path = "./test_net.onnx"
torch.onnx.export(
model,
(dummy_input,),
onnx_path,
opset_version=11,
do_constant_folding=False,
input_names=["x"],
output_names=["y"],
)
ie = Core()
model_onnx = ie.read_model(model=onnx_path)
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")
output_layer_onnx = compiled_model_onnx.output(0)
res_onnx1 = compiled_model_onnx([torch.zeros((1, 5)) + 10])[output_layer_onnx]
res_onnx2 = compiled_model_onnx([torch.zeros((1, 5))])[output_layer_onnx]
print(res_onnx1, res_onnx2)
It could have worked, nut without the input node, the onnx is as fllows:
<Model: 'torch-jit-export'
inputs[
]
outputs[
<ConstOutput: names[y] shape{1,1} type: f32>
]>
but, if I use dummy_input = torch.zeros((1, 5)) + 1, onnx have input node, output is as fllows:
[[10.]] [[0.]]
The code is the same, but dummy_input is different. I don't know why. By the way, 1torch.where is not what I. need.
I made model that takes grayscale image as input, and returns single value as output.
Backbone of this model is basically resnet18, but as it takes different input/output, I modified it.
model = models.resnet18(pretrained=True)
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model = nn.Sequential(model,nn.Linear(1000,1))
This model worked pretty well on test dataset, so I decided to make activation heatmap using Grad CAM.
class model(nn.Module):
def __init__(self):
super(model,self).__init__()
self.model = torch.load('model.pth')
self.features=nn.Sequential( "STACKED ARCHITECTURES")
self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
self.classifier = nn.Sequential(nn.Linear(in_features=512, out_features=1000, bias=True),
nn.Linear(in_features=1000, out_features=1, bias=True))
self.gradient = None
# hook for the gradients
def activations_hook(self, grad):
self.gradient = grad
def get_gradient(self):
return self.gradient
def get_activations(self, x):
return self.features(x)
def forward(self, x):
identity = x
x = self.features(x) # extract the features
h = x.register_hook(self.activations_hook)
# complete the forward pass
x = self.avgpool(x)
x = x.view((1, -1))
x = self.classifier(x)
return x
and then, after I tried to initiate it with code below, I always face with TypeError
model = model()
_ = model.eval()
image, _ = next(iter(grad_loader))
pred = model(image)
TypeError: forward() missing 1 required positional argument: 'x'
Can anyone give any advice to solve this?
p.s Dataloader(grad_loader) worked fine while I was training my model, so I think dataloader is probably not the problem
I am trying to use tensorboard to visualize my pytorch model and encounter a problem. The input tensor's shape is (-1, 1, 20, 15) and the output tensor's shape is (-1, 6). My model combines a list of 5 convolutional networks.
packages:
python: 3.7.6
pytorch: 1.4.0
tensorboard: 2.1.0
The pytorch model is as below:
import torch
from torch import nn
from torch.nn import functional as F
class MyModel(nn.Module):
"""example"""
def __init__(self, nchunks=[2, 5, 3, 2, 3], resp_size=6):
super().__init__()
self.nchunks = nchunks
self.conv = [nn.Conv2d(1, 2, (2, x)) for x in nchunks]
self.pool = nn.Sequential(
nn.AdaptiveMaxPool1d(output_size=10), nn.Flatten(start_dim=1)
)
self.bn = nn.BatchNorm1d(100)
self.fc1 = nn.Linear(100, 100)
self.fc2 = nn.Linear(100, 100)
self.fc3 = nn.Linear(100, resp_size)
def forward(self, x):
xi = torch.split(x, self.nchunks, dim=3)
xi = [f(subx.float()).view(-1, 2, 19) for f, subx in zip(self.conv, xi)]
xi = [self.pool(subx) for subx in xi]
xi = torch.cat(xi, dim=1)
xi = self.bn(xi)
xi = F.relu(self.fc1(xi))
xi = F.relu(self.fc2(xi))
xi = self.fc3(xi)
return xi
Here is the code for the tensorboard summary writer:
from torch.utils.tensorboard import SummaryWriter
x = torch.rand((5,1,20,15))
model = MyModel()
writer = SummaryWriter('logs')
writer.add_graph(model, x)
Such an error is returned:
RuntimeError: Cannot insert a Tensor that requires grad as a constant. Consider making it a parameter or input, or detaching the gradient
Tensor:
(1,1,.,.) =
-0.2108 -0.4986
-0.4009 -0.1910
(2,1,.,.) =
0.2383 -0.4147
0.2642 0.0456
[ torch.FloatTensor{2,1,2,2} ]
I guess the model has some issues, but I am not sure what happens.
This similar github issue does not relate to my problem because I am not using multi GPUs.
I solved the problem by replacing
[nn.Conv2d(1, 2, (2, x)) for x in nchunks]
with
nn.ModuleList([nn.Conv2d(1, 2, (2, x)) for x in nchunks])
I'm trying to convert a PyTorch VAE to onnx, but I'm getting: torch.onnx.symbolic.normal does not exist
The problem appears to originate from a reparametrize() function:
def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
if self.have_cuda:
eps = torch.normal(torch.zeros(std.size()),torch.ones(std.size())).cuda()
else:
eps = torch.normal(torch.zeros(std.size()),torch.ones(std.size()))
return eps.mul(std).add_(mu)
I also tried:
eps = torch.cuda.FloatTensor(std.size()).normal_()
which produced the error:
Schema not found for node. File a bug report.
Node: %173 : Float(1, 20) = aten::normal(%169, %170, %171, %172), scope: VAE
Input types:Float(1, 20), float, float, Generator
and
eps = torch.randn(std.size()).cuda()
which produced the error:
builtins.TypeError: i_(): incompatible function arguments. The following argument types are supported:
1. (self: torch._C.Node, arg0: str, arg1: int) -> torch._C.Node
Invoked with: %137 : Tensor = onnx::RandomNormal(), scope: VAE, 'shape', 133 defined in (%133 : int[] = prim::ListConstruct(%128, %132), scope: VAE) (occurred when translating randn)
I am using cuda.
Any thoughts appreciated. Perhaps I need to approach the z/latent differently for onnx?
NOTE: Stepping through, I can see that it's finding RandomNormal() for torch.randn(), which should be correct. But I don't really have access to the arguments at that point, so how can I fix it?
In very short, the code bellow may work. (at least in my environment, it worked w/o errors).
It seems that .size() operator might return variable, not constant, so it causes error for onnx compilation. (I got the same error when changed to use .size())
import torch
import torch.utils.data
from torch import nn
from torch.nn import functional as F
IN_DIMS = 28 * 28
BATCH_SIZE = 10
FEATURE_DIM = 20
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.fc1 = nn.Linear(784, 400)
self.fc21 = nn.Linear(400, FEATURE_DIM)
self.fc22 = nn.Linear(400, FEATURE_DIM)
self.fc3 = nn.Linear(FEATURE_DIM, 400)
self.fc4 = nn.Linear(400, 784)
def encode(self, x):
h1 = F.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5*logvar)
eps = torch.randn(BATCH_SIZE, FEATURE_DIM, device='cuda')
return eps.mul(std).add_(mu)
def decode(self, z):
h3 = F.relu(self.fc3(z))
return torch.sigmoid(self.fc4(h3))
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparameterize(mu, logvar)
recon_x = self.decode(z)
return recon_x
model = VAE().cuda()
dummy_input = torch.randn(BATCH_SIZE, IN_DIMS, device='cuda')
torch.onnx.export(model, dummy_input, "vae.onnx", verbose=True)