PyTorch VAE fails conversion to onnx - pytorch

I'm trying to convert a PyTorch VAE to onnx, but I'm getting: torch.onnx.symbolic.normal does not exist
The problem appears to originate from a reparametrize() function:
def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
if self.have_cuda:
eps = torch.normal(torch.zeros(std.size()),torch.ones(std.size())).cuda()
else:
eps = torch.normal(torch.zeros(std.size()),torch.ones(std.size()))
return eps.mul(std).add_(mu)
I also tried:
eps = torch.cuda.FloatTensor(std.size()).normal_()
which produced the error:
Schema not found for node. File a bug report.
Node: %173 : Float(1, 20) = aten::normal(%169, %170, %171, %172), scope: VAE
Input types:Float(1, 20), float, float, Generator
and
eps = torch.randn(std.size()).cuda()
which produced the error:
builtins.TypeError: i_(): incompatible function arguments. The following argument types are supported:
1. (self: torch._C.Node, arg0: str, arg1: int) -> torch._C.Node
Invoked with: %137 : Tensor = onnx::RandomNormal(), scope: VAE, 'shape', 133 defined in (%133 : int[] = prim::ListConstruct(%128, %132), scope: VAE) (occurred when translating randn)
I am using cuda.
Any thoughts appreciated. Perhaps I need to approach the z/latent differently for onnx?
NOTE: Stepping through, I can see that it's finding RandomNormal() for torch.randn(), which should be correct. But I don't really have access to the arguments at that point, so how can I fix it?

In very short, the code bellow may work. (at least in my environment, it worked w/o errors).
It seems that .size() operator might return variable, not constant, so it causes error for onnx compilation. (I got the same error when changed to use .size())
import torch
import torch.utils.data
from torch import nn
from torch.nn import functional as F
IN_DIMS = 28 * 28
BATCH_SIZE = 10
FEATURE_DIM = 20
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.fc1 = nn.Linear(784, 400)
self.fc21 = nn.Linear(400, FEATURE_DIM)
self.fc22 = nn.Linear(400, FEATURE_DIM)
self.fc3 = nn.Linear(FEATURE_DIM, 400)
self.fc4 = nn.Linear(400, 784)
def encode(self, x):
h1 = F.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5*logvar)
eps = torch.randn(BATCH_SIZE, FEATURE_DIM, device='cuda')
return eps.mul(std).add_(mu)
def decode(self, z):
h3 = F.relu(self.fc3(z))
return torch.sigmoid(self.fc4(h3))
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparameterize(mu, logvar)
recon_x = self.decode(z)
return recon_x
model = VAE().cuda()
dummy_input = torch.randn(BATCH_SIZE, IN_DIMS, device='cuda')
torch.onnx.export(model, dummy_input, "vae.onnx", verbose=True)

Related

Onnx RuntimeError NOT_IMPLEMENTED Trilu

This model works in PyTorch however, after exporting it with PyTorch to Onnx format, the onnx runtime crashes with a 'Trilu NOT_IMPLEMENTED error' when loading it in. (I do not have this issue for my other models that use torch.tril() )
How do I make this model run in the Onnxruntime?
This is a visualisation of the Onnx graph of the Model.
The Model in PyTorch
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
def forward(self, item_seq):
attention_mask = item_seq < 100
tril_mask = torch.tril(attention_mask)
query_layer = torch.rand((1, 2, 2, 32))
key_layer = torch.rand((1, 2, 32, 2))
attention_scores = torch.matmul(query_layer, key_layer)
return attention_scores + tril_mask
model = MyModel()
model.eval()
x_train = torch.ones([1, 2], dtype=torch.long)
# demonstrate that eager works
print(model.forward(x_train))
bigmodel_onnx_filename = 'mymodel.onnx'
torch.onnx.export(
model,
x_train,
bigmodel_onnx_filename,
input_names=['x'],
output_names=['output'],
)
onnx.load(bigmodel_onnx_filename)
# Onnxruntime crashes when loading in the model
ort_sess = ort.InferenceSession(bigmodel_onnx_filename, providers=['CPUExecutionProvider'])
key = {'x': x_train.numpy()}
print(ort_sess.run(None, key))
This results in the following error for ort.InferenceSession():
NotImplemented: [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for Trilu(14) node with name '/net/Trilu'
How can I make this model run in the Onnxruntime?
[github: code to reproduce the error and the model.onnx file]
(https://github.com/bkersbergen/pytorch_onnx_runtime_error/blob/main/main.py)
I'm using python 3.9, these are the project requirements
torch==1.13.1
jupyter==1.0.0
onnxruntime==1.13.1
onnx==1.13.0
Torch nightly version 2.0.0.dev20230205 gave the same error
I then decided to implement my own tril function.
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
def forward(self, item_seq):
attention_mask = item_seq < 100
tril_mask = self.my_tril(attention_mask)
query_layer = torch.rand((1, 2, 2, 32))
key_layer = torch.rand((1, 2, 32, 2))
attention_scores = torch.matmul(query_layer, key_layer)
return attention_scores + tril_mask
def my_tril(self, x):
l = x.size(-1)
arange = torch.arange(l)
mask = arange.expand(l, l)
arange = arange.unsqueeze(-1)
mask = torch.le(mask, arange)
return x.masked_fill(mask == 0, 0)
but then I get a Where(9) node with name '/Where_1' NOT_IMPLEMENTED error. (?!)
The boolean output of torch.lt() as input for torch.tril() works with PyTorch's Eager and LIT mode. However it breaks the Onnx runtime with the "TRILU not implemented error".
I was able to work around it by casting the torch.tril() input to float():
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
def forward(self, item_seq):
attention_mask = torch.lt(item_seq, 100).float()
tril_mask = torch.tril(attention_mask)
query_layer = torch.rand((1, 2, 2, 32))
key_layer = torch.rand((1, 2, 32, 2))
attention_scores = torch.matmul(query_layer, key_layer)
return attention_scores + tril_mask
Based on this experience, my hypothesis is that the TRILU NOT_IMPLEMENTED error is only applicable when having BOOLEAN Tensors as input. The Onnxruntime then throws this generic TRILU NOT_IMPLEMENTED error making me believe that Onnx has no TRILU support at all, which is clearly not the case.

How to rewrite the torch.nn.LayerNorm function?

I need to rewrite the layer normalization with torch without parameters to adjust different data size.
I have checked the API document of nn.LayerNorm
and made some implementations with torch and numpy. In my test results, there is a few difference with torch and totally equal with numpy. Comparing with nn.LayerNorm with elementwise_affine =True, the torch implementation doesn't perform so well, and the numpy implementation perform very poor. Is there any problem? Can't I directly use them like nn.LayerNorm ?
With torch
class Layer_norm(nn.Module):
def __init__(self, eps=1e-6):
super(Layer_norm, self).__init__()
self.eps = eps
def forward(self, x):
mean = torch.mean(x, dim=(1, 2, 3), keepdim=True)
var = torch.var(x, dim=(1, 2, 3), keepdim=True)
std = torch.sqrt(var + self.eps)
return (x - mean[:,None,None,None]) / std[:,None,None,None]
With numpy
class Layer_norm(nn.Module):
def __init__(self, eps=1e-5):
super(Layer_norm, self).__init__()
self.eps = eps
def forward(self, x):
mean = np.mean(x.cpu().numpy(), axis=(1,2,3))
var = np.var(x.cpu().numpy(), axis=(1,2,3))
div = np.sqrt(var+1e-5)
out = (x.cpu().numpy()-mean[:,None,None,None])/div[:,None,None,None]
return torch.from_numpy(out).float.cuda()

torch to onnx, but missing input?

I'm trying to convert the pytorch model to onnx. First I have a demo, but different numeric inputs result in different onnx models.
code:
import torch
from torch import nn
from openvino.runtime import Core
class TestNet(nn.Module):
def __init__(self, x):
super(TestNet, self).__init__()
self.x = x
def forward(self, x: torch.Tensor):
y, _ = x.min(dim=1, keepdim=True)
if y < self.x:
return torch.zeros((1, 1))
return y
dummy_input = torch.zeros((1, 5))
model = TestNet(1)
onnx_path = "./test_net.onnx"
torch.onnx.export(
model,
(dummy_input,),
onnx_path,
opset_version=11,
do_constant_folding=False,
input_names=["x"],
output_names=["y"],
)
ie = Core()
model_onnx = ie.read_model(model=onnx_path)
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")
output_layer_onnx = compiled_model_onnx.output(0)
res_onnx1 = compiled_model_onnx([torch.zeros((1, 5)) + 10])[output_layer_onnx]
res_onnx2 = compiled_model_onnx([torch.zeros((1, 5))])[output_layer_onnx]
print(res_onnx1, res_onnx2)
It could have worked, nut without the input node, the onnx is as fllows:
<Model: 'torch-jit-export'
inputs[
]
outputs[
<ConstOutput: names[y] shape{1,1} type: f32>
]>
but, if I use dummy_input = torch.zeros((1, 5)) + 1, onnx have input node, output is as fllows:
[[10.]] [[0.]]
The code is the same, but dummy_input is different. I don't know why. By the way, 1torch.where is not what I. need.

how to get the real shape of batch_size which is none in keras

When implementing a custom layer in Keras, I need to know the real size of batch_size. my shape is (?,20).
questions:
1. What is the best way to change (?,20) to (batch_size,20).
I have looked into this but it can not adjust to my problem.
I can pass the batch_size to this layer. In that case, I need to reshape (?,20) to (batch_size,20), how can I do that?
2. Is it the best way to that, or is there any builtin function that can get the real batch_size while building and running the model?
This is my layer:
from scipy.stats import entropy
from keras.engine import Layer
import keras.backend as K
import numpy as np
class measure(Layer):
def __init__(self, beta, **kwargs):
self.beta = beta
self.uses_learning_phase = True
self.supports_masking = True
super(measure, self).__init__(**kwargs)
def call(self, x):
return K.in_train_phase(self.rev_entropy(x, self.beta), x)
def get_config(self):
config = {'beta': self.beta}
base_config = super(measure, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def rev_entropy(self, x, beta):
entropy_p_t_w = np.apply_along_axis(entropy, 1, x)
con = (beta / (1 + entropy_p_t_w)) ** 1.5
new_f_w_t = x * (con.reshape(con.shape[0], 1))
norm_const = 1e-30 + np.sum(new_f_w_t, axis=0)
for t in range(norm_const.shape[0]):
new_f_w_t[:, t] /= norm_const[t]
return new_f_w_t
And here is where I call this layer:
encoded = measure(beta=0.08)(encoded)
I am also using fit_generator if it can help at all:
autoencoder.fit_generator(train_gen, steps_per_epoch=num_train_steps, epochs=NUM_EPOCHS,
validation_data=test_gen, validation_steps=num_test_steps, callbacks=[checkpoint])
The dimension of the x passed to the layer is (?,20) and that's why I can not do my calculation.
Thanks:)

Keras Custom Layer Error (Operation IsVariableInitialized has been marked as not fetchable)

I'm trying to create a custom Keras layer on a toy dataset, and am having issues. At a high level, I want to create an "Input Gate" layer, which would have trainable weights to turn each column of input on or off. So I'm starting with just trying to multiply the inputs by a sigmoid'd version of the learned weights. My code is as follows:
### This is my custom layer
class InputGate(Layer):
def __init__(self, **kwargs):
super(InputGate, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='input_gate',
shape=input_shape[1:],
initializer='random_uniform',
trainable=True)
super(InputGate, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs):
gate_amount = K.sigmoid(self.kernel)
return inputs * gate_amount
def get_config(self):
config = {}
base_config = super(InputGate, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
def create_linear_model(x, y, num_noise_vars = 0, reg_strength=0):
new_x = get_x_with_noise(x, num_noise_vars=num_noise_vars)
model = Sequential([
InputGate(input_shape=(1+num_noise_vars,)),
Dense(1, kernel_regularizer=l2(reg_strength))
])
model.compile(optimizer="rmsprop", loss="mse")
model.optimizer.lr = 0.001
return {"model": model, "new_x": new_x}
def get_x_with_noise(x, num_noise_vars):
noise_vars = []
for noise_var in range(num_noise_vars):
noise_vars.append(np.random.random(len(x)))
noise_vars.append(x)
x_with_noise = noise_vars
new_x = np.array(list(zip(*x_with_noise)))
return new_x
x = np.random.random(500)
y = (x * 3) + 10
num_noise_vars = 5
info = create_linear_model(x, y, num_noise_vars=num_noise_vars)
model = info["model"]
new_x = info["new_x"]
results = model.fit(new_x, y, epochs=num_epochs, verbose=0)
And then I get the following error:
ValueError: Operation 'input_gate_14/IsVariableInitialized' has been marked as not fetchable.
This layer is mostly taken from the docs(https://keras.io/layers/writing-your-own-keras-layers/). I'm using Keras 2.0.9, with Tensorflow backend on a CPU (Macbook Air).
This layer seems as simple as can be, and googling the error leads me to discussions that don't seem relevant. Anyone have ideas of what's causing this?
Any help is much appreciated! Thanks!

Resources