pytorch error in multiplying matrices in neural network - pytorch

I was trying to make a Neural Network in PyTorch, however I ran into the error below. I'm still new to this topic so I am not able to understand how I should go about solving this.
Code:
class ANN_Model(nn.Module):
def __init__(self,input_features=8,hidden1=8,hidden2=200,hidden3=200,hidden4=300,hidden5=300,hidden6=400,hidden7=400,hidden8=300,hidden9=300,out_features=2):
super().__init__()
self.f_connected1=nn.Linear(input_features,hidden1)
self.f_connected2=nn.Linear(hidden1,hidden2)
self.f_connected2=nn.Linear(hidden2,hidden3)
self.f_connected2=nn.Linear(hidden3,hidden4)
self.f_connected2=nn.Linear(hidden4,hidden5)
self.f_connected2=nn.Linear(hidden5,hidden6)
self.f_connected2=nn.Linear(hidden6,hidden7)
self.f_connected2=nn.Linear(hidden7,hidden8)
self.f_connected2=nn.Linear(hidden8,hidden9)
self.out=nn.Linear(hidden9,out_features)
def forward(self,x):
x=F.relu(self.f_connected1(x))
x=F.relu(self.f_connected2(x))
x=F.relu(self.f_connected3(x))
x=F.relu(self.f_connected4(x))
x=F.relu(self.f_connected5(x))
x=F.relu(self.f_connected6(x))
x=F.relu(self.f_connected7(x))
x=F.relu(self.f_connected8(x))
x=F.relu(self.f_connected9(x))
x=self.out(x)
return x
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
epochs = 500
final_losses = []
for i in range(epochs):
i = i + 1
y_pred = model.forward(X_train)
loss=loss_function(y_pred, y_train)
final_losses.append(loss.item())
if i%10==1:
print("Epoch number: {} and the loss: {}".format(i, loss.item()))
optimizer.zero_grad()
loss.backward()
optimizer.step()
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Input In [13], in <cell line: 3>()
3 for i in range(epochs):
4 i = i + 1
----> 5 y_pred = model.forward(X_train)
6 loss=loss_function(y_pred, y_train)
7 final_losses.append(loss.item())
Input In [8], in ANN_Model.forward(self, x)
14 def forward(self,x):
15 x=F.relu(self.f_connected1(x))
---> 16 x=F.relu(self.f_connected2(x))
17 x=F.relu(self.f_connected3(x))
18 x=F.relu(self.f_connected4(x))
File ~/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~/miniconda3/lib/python3.9/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (691x8 and 300x300)
I was trying to make a Neural Network in PyTorch, however I ran into the error below. I'm still new to this topic so I am not able to understand how I should go about solving this.

I found it, in your model's constructor __init__ every layer is named self.f_connected2 and because of that it expects a shape of (batch_size,300).

Related

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 512x22400)

I'm building an MLP model for a project and ran into the following error. I'm new to DL and PyTorch and I know the error is in the hidden layer, I'm a bit confused regarding the input and output features of the hidden layer and why it is trying to multiply with 1x1 matrix. Any help would be appreciated.
Traceback:
RuntimeError Traceback (most recent call last)
Input In [92], in <cell line: 22>()
22 for epoch in range(1, num_epochs+1):
23 print(f"\nEpoch: {epoch}/{num_epochs}")
---> 25 train(model, device, train_loader, optimizer, criterion, epoch)
26 test(model, device, test_loader, criterion, mode = "Test")
Input In [90], in train(model, device, train_loader, optimizer, criterion, epoch)
13 optimizer.zero_grad()
15 # pass the batch to the model and assign the output to variable named y_pred
---> 16 y_pred = model(batch_idx)
18 # calculate the loss (use CrossEntropyLoss in pytorch)
19 loss = criterion(y_pred, target)
File ~\AppData\Local\Programs\Python\Python310\lib\site-
packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or
_global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
Input In [89], in MLP.forward(self, x)
23 print(x.size())
24 print(x.size())
---> 25 x = self.relu(self.hidden(x))
26 print(x.size())
27 x = self.relu(self.classifier(x))
File ~\AppData\Local\Programs\Python\Python310\lib\site-
packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or
_global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~\AppData\Local\Programs\Python\Python310\lib\site-
packages\torch\nn\modules\linear.py:114, in Linear.forward(self, input)
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 512x22400)
Below is my code:
import torch.nn as nn
class MLP(nn.Module):
def __init__(self, num_features, num_classes, num_hidden):
super(MLP, self).__init__()
# define a linear layer with output channels as 32
self.hidden = nn.Linear(num_hidden, num_features*70*32)
self.relu = torch.nn.ReLU()
# define a linear layer with output features corresponding to the number of classes
self.classifier = nn.Linear(num_features*70*32, 5)
def forward(self, x):
x = torch.tensor(x).unsqueeze(dim=0)
x = self.relu(self.hidden(x))
out = self.relu(self.classifier(x))
return out
num_hidden = 512
num_features = 10
classes = [0, 1, 2, 3, 4]
num_classes = len(classes)

get contrastive_logits_per_image with flava model using huggingface library

I have used a code of Flava model from this link:
https://huggingface.co/docs/transformers/model_doc/flava#transformers.FlavaModel.forward.example
But I am getting the following error:
'FlavaModelOutput' object has no attribute 'contrastive_logits_per_image'
I tried using FlavaForPreTraining model instead, so updated code was :
from PIL import Image
import requests
from transformers import FlavaProcessor, FlavaForPreTraining
model = FlavaForPreTraining.from_pretrained("facebook/flava-full")
processor = FlavaProcessor.from_pretrained("facebook/flava-full")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(text=["a photo of a cat"], images=image, return_tensors="pt", padding=True, return_codebook_pixels = True)
inputs.update(
{
"input_ids_masked": inputs.input_ids,
}
)
outputs = model(**inputs)
logits_per_image = outputs.contrastive_logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
but I'm still getting this as error:
/usr/local/lib/python3.7/dist-packages/transformers/modeling_utils.py:714: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.
"The `device` argument is deprecated and will be removed in v5 of Transformers.", FutureWarning
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-44-bdb428b8184a> in <module>()
----> 1 outputs = model(**inputs)
2 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/transformers/models/flava/modeling_flava.py in forward(self, input_ids, input_ids_masked, pixel_values, codebook_pixel_values, attention_mask, token_type_ids, bool_masked_pos, position_ids, image_attention_mask, skip_unmasked_multimodal_encoder, mlm_labels, mim_labels, itm_labels, output_attentions, output_hidden_states, return_dict, return_loss)
1968 if mim_labels is not None:
1969 mim_labels = self._resize_to_2d(mim_labels)
-> 1970 bool_masked_pos = self._resize_to_2d(bool_masked_pos)
1971 mim_labels[bool_masked_pos.ne(True)] = self.ce_ignore_index
1972
/usr/local/lib/python3.7/dist-packages/transformers/models/flava/modeling_flava.py in _resize_to_2d(self, x)
1765
1766 def _resize_to_2d(self, x: torch.Tensor):
-> 1767 if x.dim() > 2:
1768 x = x.view(x.size(0), -1)
1769 return x
AttributeError: 'NoneType' object has no attribute 'dim'
Can anyone provide suggestions with what's going wrong?
FLAVA's author here.
Can you please add the following arguments to your processor call:
return_codebook_pixels=True, return_image_mask=True
Here is an example colab if you want to see how to call FLAVA model: https://colab.research.google.com/drive/1c3l4r4cEA5oXfq9uXhrJibddwRkcBxzP?usp=sharing#scrollTo=xtkrSjfhCdv-

Pytorch : Expected all tensors on same device

I have my model and inputs moved on the same device but I still get the runtime error :
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_addmm)
Here is my code,
First my model implementation :
import torch
import torch.nn.functional as F
class Net(torch.nn.Module):
def __init__(self, n_hiddens, n_feature= 2, n_output= 1):
super().__init__()
self.hiddens = []
n_hidden_in = n_feature
for n_hidden in n_hiddens :
self.hiddens.append( torch.nn.Linear(n_hidden_in, n_hidden) ) # hidden layer
n_hidden_in = n_hidden
self.predict = torch.nn.Linear(n_hidden, n_output) # output layer
def forward(self, x):
for hidden in self.hiddens :
x = F.relu(hidden(x)) # activation function for hidden layer
x = self.predict(x) # linear output
return x
Then I define my dataloaders. Here, X and y are numpy arrays
from torch.utils.data import TensorDataset, DataLoader
# Split training/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state= 42)
X_train_tensor = torch.from_numpy(X_train)
y_train_tensor = torch.from_numpy(y_train)
X_test_tensor = torch.from_numpy(X_test)
y_test_tensor = torch.from_numpy(y_test)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor) # create your datset
train_dataloader = DataLoader(train_dataset, batch_size= 1000) # create your dataloader
test_dataset = TensorDataset(X_test_tensor, y_test_tensor) # create your datset
test_dataloader = DataLoader(test_dataset, batch_size= 1000) # create your dataloader
Here I train my model. The error occurs during the line "outputs = regressor(inputs)"
NUM_EPOCHS = 2000
BATCH_SIZE = 1000
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Device used : {device}")
# 1 hidden layer
total_num_nodes = 256
regressor = Net(n_hiddens= [total_num_nodes]).to(device)
optimizer = torch.optim.SGD(regressor.parameters(), lr=0.2, momentum= 0.1, nesterov= True)
loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
for epoch in range(NUM_EPOCHS):
running_loss = 0.0
for i, data in enumerate(train_dataloader, 0):
inputs, values = data
inputs = inputs.float().to(device)
values = values.float().to(device)
optimizer.zero_grad() # clear gradients for next train
print(f"Input device is : cuda:{inputs.get_device()}")
print(f"Target value device is : cuda:{values.get_device()}")
print(f"Is model on cuda ? : {next(regressor.parameters()).is_cuda}")
outputs = regressor(inputs) # <-- This is where I have the error
loss = loss_func(outputs, values)
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
Here are the outputs of my print statements :
Device used : cuda:0
Input device is : cuda:0
Target value device is : cuda:0
Is model on cuda ? :True
This should mean that my model and my tensors are all on the same device so why do I still have this error ?
The error log is :
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-6-5234b830bebc> in <module>()
24 print(f"Target value device is : cuda:{values.get_device()}")
25 print(f"Is model on cuda ? : {next(regressor.parameters()).is_cuda}")
---> 26 outputs = regressor(inputs)
27 loss = loss_func(outputs, values)
28 loss.backward() # backpropagation, compute gradients
4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-4-56c54b30b771> in forward(self, x)
16 def forward(self, x):
17 for hidden in self.hiddens :
---> 18 x = F.relu(hidden(x)) # activation function for hidden layer
19 x = self.predict(x) # linear output
20 return x
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
101
102 def forward(self, input: Tensor) -> Tensor:
--> 103 return F.linear(input, self.weight, self.bias)
104
105 def extra_repr(self) -> str:
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1846 if has_torch_function_variadic(input, weight, bias):
1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias)
-> 1848 return torch._C._nn.linear(input, weight, bias)
1849
1850
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_addmm)
Thank you very much
TL;DR use nn.ModuleList instead of a pythonic one to store the hidden layers in Net.
All your hidden layers are stored in a simple pythonic list self.hidden in Net. When you move your model to GPU, using .to(device), pytorch has no way to tell that all the elements of this pythonic list should also be moved to the same device.
however, if you make self.hidden = nn.ModuleLis(), pytorch now knows to treat all elements of this special list as nn.Modules and recursively move them to the same device as Net.
See these answers 1, 2, 3 for more details.

Problem with Graph Neural Network in PyTorch Geometric

I'm trying to understand what is wrong with the following GNN model implemented in PyTorch
class Net(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv = SAGEConv(dataset.num_features,
dataset.num_classes,
aggr="max") # max, mean, add ...)
def forward():
x = self.conv(data.x, data.edge_index)
return F.log_softmax(x, dim=1)
but I get the following error when trying to run a training loop:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-34-f3ee8050af6a> in <module>
1 best_val_acc = test_acc = 0
2 for epoch in range(1,100):
----> 3 train()
4 _, val_acc, tmp_test_acc = test()
5 if val_acc > best_val_acc:
<ipython-input-14-64df4e2a24f9> in train()
2 model.train()
3 optimizer.zero_grad()
----> 4 F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
5 optimizer.step()
6
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
TypeError: forward() takes 0 positional arguments but 1 was given
I'm adding more details as requested on how I call the model :
def train():
model.train()
optimizer.zero_grad()
F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
optimizer.step()
def test():
model.eval()
logits, accs = model(), []
for _, mask in data('train_mask', 'val_mask', 'test_mask'):
pred = logits[mask].max(1)[1]
acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
accs.append(acc)
return accs
Function torch.nn.Module.forward should have at minimum one argument: self. In your case, you have two: self and your input data.
def forward(self, data): # <-
x = self.conv(data.x, data.edge_index)
return F.log_softmax(x, dim=1)

Ragged Tensors of non-text data as input for LSTM

I am learning about ragged tensors with applications for particle tracking. I have the following minimal example which reproduces a the error i keep experiencing.
import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, Input, TimeDistributed
from tensorflow.keras.models import Sequential
n=10
data_n = 32
batch_size=8
window_length=8
splits = [n]*data_n
#### Create a ragged tensor with shape (32, None, 8)
t0 = tf.zeros([data_n * n, window_length])
t1 = tf.RaggedTensor.from_row_lengths(t0, splits)
max_seq = t1.bounding_shape()[-1]
#### Define Model
def create_model(batch_size, window_length, max_seq):
lstm_model = Sequential([
Input(
batch_shape=[batch_size, None, window_length],
batch_size=batch_size,
dtype=tf.float32,
ragged=True
),
LSTM(
max_seq,
return_sequences=True,
input_shape=(window_length, None)
),
TimeDistributed(Dense(units=1))
])
return lstm_model
lstm_model = create_model(batch_size=batch_size, window_length=window_length, max_seq=max_seq)
lstm_model(t1[0:8])
When i execute the above code i get the following error:
---------------------------------------------------------------------------
_FallbackException Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnnv3(input, input_h, input_c, params, sequence_lengths, rnn_mode, input_mode, direction, dropout, seed, seed2, num_proj, is_training, time_major, name)
1889 try:
-> 1890 _result = pywrap_tfe.TFE_Py_FastPathExecute(
1891 _ctx._context_handle, tld.device_name, "CudnnRNNV3", name,
_FallbackException: Expecting float value for attr dropout, got int
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
<ipython-input-19-7609e2877e20> in <module>
1 lstm_model = create_model(batch_size=batch_size, window_length=window_length, max_seq=max_seq)
----> 2 lstm_model(t1[0:8])
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py in call(self, inputs, training, mask)
275 if not self.built:
276 self._init_graph_network(self.inputs, self.outputs, name=self.name)
--> 277 return super(Sequential, self).call(inputs, training=training, mask=mask)
278
279 outputs = inputs # handle the corner case where self.layers is empty
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py in call(self, inputs, training, mask)
715 ' implement a `call` method.')
716
--> 717 return self._run_internal_graph(
718 inputs, training=training, mask=mask,
719 convert_kwargs_to_constants=base_layer_utils.call_context().saving)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py in _run_internal_graph(self, inputs, training, mask, convert_kwargs_to_constants)
886
887 # Compute outputs.
--> 888 output_tensors = layer(computed_tensors, **kwargs)
889
890 # Update tensor_dict.
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
652
653 if initial_state is None and constants is None:
--> 654 return super(RNN, self).__call__(inputs, **kwargs)
655
656 # If any of `initial_state` or `constants` are specified and are Keras
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent_v2.py in call(self, inputs, mask, training, initial_state)
1178 # GPU implementation when GPU is available.
1179 if can_use_gpu:
-> 1180 last_output, outputs, new_h, new_c, runtime = gpu_lstm(
1181 **gpu_lstm_kwargs)
1182 else:
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent_v2.py in gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, time_major, go_backwards, sequence_lengths)
1404 inputs = array_ops.reverse_sequence_v2(
1405 inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
-> 1406 outputs, h, c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(
1407 inputs,
1408 input_h=init_h,
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnnv3(input, input_h, input_c, params, sequence_lengths, rnn_mode, input_mode, direction, dropout, seed, seed2, num_proj, is_training, time_major, name)
1899 except _core._FallbackException:
1900 try:
-> 1901 return cudnn_rnnv3_eager_fallback(
1902 input, input_h, input_c, params, sequence_lengths,
1903 rnn_mode=rnn_mode, input_mode=input_mode, direction=direction,
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnnv3_eager_fallback(input, input_h, input_c, params, sequence_lengths, rnn_mode, input_mode, direction, dropout, seed, seed2, num_proj, is_training, time_major, name, ctx)
1999 "direction", direction, "dropout", dropout, "seed", seed, "seed2", seed2,
2000 "num_proj", num_proj, "is_training", is_training, "time_major", time_major)
-> 2001 _result = _execute.execute(b"CudnnRNNV3", 5, inputs=_inputs_flat,
2002 attrs=_attrs, ctx=ctx, name=name)
2003 if _execute.must_record_gradient():
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
InvalidArgumentError: Invalid input_h shape: [1,8,8] [10,8,8] [Op:CudnnRNNV3]
The 10 refers to the number of units in the LSTM-layer, which is equal to the bounding shape of t1. The two 8's refer to batch_size and window_length. I thought that 1 refered to the output shape, but that is not the case, since it does not change when i add more units to the Dense-layer the number remains the same.
When working with tf.RaggedTensor with sequences of variable sizes, you want to set the batch_size = 1 and you want to ensure that sequence_length passed to the LSTM is None.
This is because even though tf.RaggedTensor are a great storage for variable sized sequences in form of numpy arrays, the LSTM still expects the same length sequence for each batch. You can however have variable sized sequences across batches.
Making those changes should fix the issue you are facing.
This is a bug I encountered as well. It is specific to combination of the below:
Keras implementation of RNN / LSTM Tensorflow version Using GPU And using RaggedTensor
The erroneous source code is comparing shape of ragged tensor of input hidden state of RNN cell with shape of hidden state
The ragged dimension is shown as 1 in the error message.
I was able to replicate same error using your code and Tensorflow 2.2.0
Changing code to remove ragged tensor or running on cpu or different tensorflow version I was able to get this code to work

Resources