PyTorch RuntimeError : Gradients are not CUDA tensors - pytorch

I am getting the following error while doing seq to seq on characters and feeding to LSTM, and decoding to words using attention. The forward propagation is fine but while computing loss.backward() I am getting the following error.
RuntimeError: Gradients aren't CUDA tensors
My train() function is as followed.
def train(input_batch, input_batch_length, target_batch, target_batch_length, batch_size):
# Zero gradients of both optimizers
encoderchar_optimizer.zero_grad()
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()
encoder_input = Variable(torch.FloatTensor(len(input_batch), batch_size, 500))
for ix , w in enumerate(input_batch):
w = w.contiguous().view(15, batch_size)
reshaped_input_length = [x[ix] for x in input_batch_length] # [15 ,.. 30 times] * 128
if USE_CUDA:
w = w.cuda()
#reshaped_input_length = Variable(torch.LongTensor(reshaped_input_length)).cuda()
hidden_all , output = encoderchar(w, reshaped_input_length)
encoder_input[ix] = output.transpose(0,1).contiguous().view(batch_size, -1)
if USE_CUDA:
encoder_input = encoder_input.cuda()
temporary_target_batch_length = [15] * batch_size
encoder_hidden_all, encoder_output = encoder(encoder_input, target_batch_length)
decoder_input = Variable(torch.LongTensor([SOS_token] * batch_size))
decoder_hidden = encoder_output
max_target_length = max(temporary_target_batch_length)
all_decoder_outputs = Variable(torch.zeros(max_target_length, batch_size, decoder.output_size))
# Move new Variables to CUDA
if USE_CUDA:
decoder_input = decoder_input.cuda()
all_decoder_outputs = all_decoder_outputs.cuda()
target_batch = target_batch.cuda()
# Run through decoder one time step at a time
for t in range(max_target_length):
decoder_output, decoder_hidden, decoder_attn = decoder(
decoder_input, decoder_hidden, encoder_hidden_all
)
all_decoder_outputs[t] = decoder_output
decoder_input = target_batch[t] # Next input is current target
if USE_CUDA:
decoder_input = decoder_input.cuda()
# Loss calculation and backpropagation
loss = masked_cross_entropy(
all_decoder_outputs.transpose(0, 1).contiguous(), # -> batch x seq
target_batch.transpose(0, 1).contiguous(), # -> batch x seq
target_batch_length
)
loss.backward()
# Clip gradient norms
ecc = torch.nn.utils.clip_grad_norm(encoderchar.parameters(), clip)
ec = torch.nn.utils.clip_grad_norm(encoder.parameters(), clip)
dc = torch.nn.utils.clip_grad_norm(decoder.parameters(), clip)
# Update parameters with optimizers
encoderchar_optimizer.step()
encoder_optimizer.step()
decoder_optimizer.step()
return loss.data[0], ec, dc
Full Stack Trace is here.
RuntimeError Traceback (most recent call last)
<ipython-input-10-9778e12ded02> in <module>()
11 data_target_batch_index= Variable(torch.LongTensor(data_target_batch_index)).transpose(0,1)
12 # Send the data for training
---> 13 loss, ar1, ar2 = train(data_input_batch_index, data_input_batch_length, data_target_batch_index, data_target_batch_length, batch_size)
14
15 # Keep track of loss
<ipython-input-8-9c71c385f8cd> in train(input_batch, input_batch_length, target_batch, target_batch_length, batch_size)
54 target_batch_length
55 )
---> 56 loss.backward()
57
58 # Clip gradient norms
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_variables)
144 'or with gradient w.r.t. the variable')
145 gradient = self.data.new().resize_as_(self.data).fill_(1)
--> 146 self._execution_engine.run_backward((self,), (gradient,), retain_variables)
147
148 def register_hook(self, hook):
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.6/site-packages/torch/autograd/function.py in _do_backward(self, gradients, retain_variables)
207 def _do_backward(self, gradients, retain_variables):
208 self.retain_variables = retain_variables
--> 209 result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
210 if not retain_variables:
211 del self._nested_output
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.6/site-packages/torch/autograd/function.py in backward(self, *gradients)
215 def backward(self, *gradients):
216 nested_gradients = _unflatten(gradients, self._nested_output)
--> 217 result = self.backward_extended(*nested_gradients)
218 return tuple(_iter_None_tensors(result))
219
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.6/site-packages/torch/nn/_functions/rnn.py in backward_extended(self, grad_output, grad_hy)
314 grad_hy,
315 grad_input,
--> 316 grad_hx)
317
318 if any(self.needs_input_grad[1:]):
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py in backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx)
371 hidden_size, dcy.size()))
372 if not dhy.is_cuda or not dy.is_cuda or (dcy is not None and not dcy.is_cuda):
--> 373 raise RuntimeError('Gradients aren\'t CUDA tensors')
374
375 check_error(cudnn.lib.cudnnRNNBackwardData(
RuntimeError: Gradients aren't CUDA tensors
any suggestions about why I am doing wrong?

Make sure that all the objects that inherit nn.Module also call their .cuda(). Make sure to call before you pass any tensor to them. (essentially before training)
For example, (and I am guessing your encoder and decoder are such objects), do this right before you call train().
encoder = encoder.cuda()
decoder = decoder.cuda()
This ensures that all of the model's parameters are initialized in cuda memory.
Edit
In general, whenever you have this kind of error,
RuntimeError: Gradients aren't CUDA tensors
somewhere, (from your model creation, to defining inputs, to finally supplying the outputs to the loss function) you missed specifying a Variable object to be in GPU memory. You will have go through every step in your model, verifying all Variable objects to be in GPU memory.
Additionally, you dont have to call .cuda() on the outputs. Given that the inputs are in gpu's memory, all operations also takes place in gpu's memory, and so are your outputs.

Related

Convert pytorch float Sigmoid results to labels

I am trying to make segmentation model using Pytorch and implement custom IoULoss:
def IoULoss(inputs, targets, smooth=1e-6):
inputs = (inputs.view(inputs.size(0), -1) > 0.5)
targets = targets.view(targets.size(0), -1)
intersection = (inputs & targets).float().sum(1)
union = (inputs | targets).float().sum(1)
IoU = (intersection + smooth) / (union + smooth)
return 1 - IoU.mean()
But when I train model, I am getting error:
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
Is there any good way to cast my predictions to labels?
Full error traceback:
RuntimeError Traceback (most recent call last)
<ipython-input-53-3bfc1b43c8ba> in <module>()
----> 1 my_train(model, 30, torch.optim.Adam(model.parameters(), lr=0.01), IoULoss, train_loader)
2 frames
<ipython-input-41-ebe9c66b1806> in my_train(clf, epochs, optimizer, criterion, train_data, test_data)
22 epoch_loss += loss.item()
23
---> 24 loss.backward()
25 optimizer.step()
26
/usr/local/lib/python3.7/dist-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
253 create_graph=create_graph,
254 inputs=inputs)
--> 255 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
256
257 def register_hook(self, hook):
/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
147 Variable._execution_engine.run_backward(
148 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 149 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
150
151
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
Model inference:
def my_train(clf, epochs, optimizer, criterion, train_data, test_data=None):
cur_min_loss = 10e8
train_losses = []
for epoch_step in range(epochs):
epoch_loss = 0.0
for i, batch in enumerate(train_data):
X, y = batch
optimizer.zero_grad()
prediction = clf(X)
loss = criterion(prediction, y)
epoch_loss += loss.item()
loss.backward()
optimizer.step()
del prediction
del X
del y
torch.cuda.empty_cache()
train_losses.append(epoch_loss / (i + 1))
Criterion is IoULoss; clf final activation is Sigmoid; optimizer Adam, train_data - custom dataset inherited from PyTorch Dataset
The first expression in your loss function:
inputs.view(inputs.size(0), -1) > 0.5
is not a differentiable operator, hence the gradient cannot propagate through that operation.

How to fix 'Expected object of scalar type Float but got scalar type Double for argument #4 'mat1''?

I am trying to build an lstm model. My model code is below.
My input has 4 features, Sequence length of 5 and batch size of 32.
class RNN(nn.Module):
def __init__(self, feature_dim, output_size, hidden_dim, n_layers, dropout=0.5):
"""
Initialize the PyTorch RNN Module
:param feature_dim: The number of input dimensions of the neural network
:param output_size: The number of output dimensions of the neural network
:param hidden_dim: The size of the hidden layer outputs
:param dropout: dropout to add in between LSTM/GRU layers
"""
super(RNN, self).__init__()
# set class variables
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
# define model layers
self.lstm = nn.LSTM(feature_dim, hidden_dim, n_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, nn_input, hidden):
"""
Forward propagation of the neural network
:param nn_input: The input to the neural network
:param hidden: The hidden state
:return: Two Tensors, the output of the neural network and the latest hidden state
"""
# Get Batch Size
batch_size = nn_input.size(0)
# Pass through LSTM layer
lstm_out, hidden = self.lstm(nn_input, hidden)
# Stack up LSTM outputs
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
# Add dropout and pass through fully connected layer
x = self.dropout(lstm_out)
x = self.fc(lstm_out)
# reshape to be batch_size first
output = x.view(batch_size, -1, self.output_size)
# get last batch of labels
out = output[:, -1]
# return one batch of output word scores and the hidden state
return out, hidden
def init_hidden(self, batch_size):
'''
Initialize the hidden state of an LSTM/GRU
:param batch_size: The batch_size of the hidden state
:return: hidden state of dims (n_layers, batch_size, hidden_dim)
'''
# Implement function
# initialize state with zero weights, and move to GPU if available
weight = next(self.parameters()).data
if is_gpu_available:
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
else:
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
return hidden
When I train, I got the error
RuntimeError Traceback (most recent call last)
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in <module>
3
4 # training the model
----> 5 trained_rnn = train_rnn(rnn, batch_size, optimizer, num_epochs, show_every_n_batches)
6
7 # saving the trained model
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in train_rnn(rnn, batch_size, optimizer, n_epochs, show_every_n_batches)
18
19 # forward, back prop
---> 20 loss, hidden = forward_back_prop(rnn, optimizer, inputs, labels, hidden)
21 # record loss
22 batch_losses.append(loss)
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in forward_back_prop(rnn, optimizer, inp, target, hidden)
22
23 # get the output from the model
---> 24 output, h = rnn(inp, h)
25
26 # calculate the loss and perform backprop
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in forward(self, nn_input, hidden)
36
37 # Pass through LSTM layer
---> 38 lstm_out, hidden = self.lstm(nn_input, hidden)
39 # Stack up LSTM outputs
40 lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
557 return self.forward_packed(input, hx)
558 else:
--> 559 return self.forward_tensor(input, hx)
560
561
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward_tensor(self, input, hx)
537 unsorted_indices = None
538
--> 539 output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices)
540
541 return output, self.permute_hidden(hidden, unsorted_indices)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward_impl(self, input, hx, batch_sizes, max_batch_size, sorted_indices)
520 if batch_sizes is None:
521 result = _VF.lstm(input, hx, self._get_flat_weights(), self.bias, self.num_layers,
--> 522 self.dropout, self.training, self.bidirectional, self.batch_first)
523 else:
524 result = _VF.lstm(input, batch_sizes, hx, self._get_flat_weights(), self.bias,
RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #4 'mat1'
I am not able to figure the cause of this error. How to fix it? Please help.
Also, is it the correct way of implementing the LSTM or is there a better way to achieve the same?
torch.nn.LSTM does not need any initialization, as it's initialized to zeros by default (see documentation).
Furthermore, torch.nn.Module already has predefined cuda() method, so one can move module to GPU simply, hence you can safely delete init_hidden(self, batch_size).
You have this error because your input is of type torch.Double, while modules by default use torch.Float (as it's accurate enough, faster and smaller than torch.Double).
You can cast your input Tensors by calling .float(), in your case it could look like that:
def forward(self, nn_input, hidden):
nn_input = nn_input.float()
... # rest of your code
Finally, there is no need for hidden argument if it's always zeroes, you can simply use:
lstm_out, hidden = self.lstm(nn_input) # no hidden here
as hidden is zeroes by default as well.

Gradient disappearing after first epoch in manual linear regression

I'm new to Pytorch and I've been working through the tutorials and playing around with toy examples. I wanted to just make a super simple model to get a better handle on autograd, but I'm running into issues.
I'm trying to train a linear regression model but I keep running into the following error,
----------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-80-ba5ca34a3a54> in <module>()
9 loss = torch.dot(delta, delta)
10
---> 11 loss.backward()
12 with torch.no_grad():
13 w, b = w - learning_rate*w.grad.data, b - learning_rate*b.grad.data
/usr/local/lib/python3.6/dist-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
91 products. Defaults to ``False``.
92 """
---> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph)
94
95 def register_hook(self, hook):
/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
87 Variable._execution_engine.run_backward(
88 tensors, grad_tensors, retain_graph, create_graph,
---> 89 allow_unreachable=True) # allow_unreachable flag
90
91
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
And for reference, the code is here,
# dataset for training
X = torch.randn(100, 3)
y = -3*X[:,0] + 2.2*X[:,1] + 0.002*X[:,2] + 1
w = torch.randn(3, requires_grad=True, dtype=torch.float) # model weights
b = torch.randn(1, requires_grad=True, dtype=torch.float) # model bias
num_epochs = 10
learning_rate = 1e-4
for i in range(num_epochs):
y_hat = torch.mv(X, w) + b
delta = y_hat - y
loss = torch.dot(delta, delta)
loss.backward()
with torch.no_grad():
w, b = w - learning_rate*w.grad, b - learning_rate*b.grad
The issue seems to be that after the first epoch the gradient attribute is set to None, but I'm a little confused why this would be the case.
If I try to zero the gradient after updating the weights, then I get a similar error.
The answer lies in locally disabling gradient computation. As you can see in the first example, computations carried out with the torch.no_grad() context manager result in tensors for which requires_grad == False. Since you create "fresh" w and b instead of updating them in place, these tensors lose the requires_grad property after the first iteration and you get the error on 2nd iteration. A simple fix is to reenable gradients
with torch.no_grad():
w, b = w - learning_rate*w.grad, b - learning_rate*b.grad
w.requires_grad_(True)
b.requires_grad_(True)
If you look up the source of optimizers in pytorch optim module, such as SGD, you will see that they use the in-place operators such as add_. You can rewrite your loop in this manner
with torch.no_grad():
w.sub_(learning_rate*w.grad)
b.sub_(learning_rate*b.grad)
which will not touch the requires_grad flag, since the tensors keep their "identity" - just change values. In this case, you will need to remember to call w.zero_grad() and b.zero_grad() in each iteration or the gradient values will keep additively growing.

Converting spinning up policy gradient to pytorch

I'm trying to learn deep reinforcement learning through OpenAI spinning up. To do this, I want to rewrite some of their code using pytorch instead of tensorflow.
Currently I'm trying to convert the code for basic policy gradient (link with explanations) and this is my code so far:
import torch
import torch.nn as nn
from torch.nn.functional import log_softmax
from torch.distributions import Categorical
import torch.optim as optim
import numpy as np
import gym
from gym.spaces import Discrete, Box
class Policy(nn.Module):
def __init__(self, sizes, activation=nn.Tanh(), output_activation=None):
# Build a feedforward neural network.
super(Policy, self).__init__()
self.layers=nn.ModuleList([nn.Linear(sizes[i],sizes[i+1]) for i in
range(len(sizes)-1)])
self.activation=activation
self.output_activation=output_activation
self.returns=[] # for R(tau) weighting in policy gradient
self.rewards=[] # list for rewards accrued throughout ep
self.logits=[] # for measuring episode logits
def forward(self,x):
for layer in self.layers[:-1]:
x=self.activation(layer(x))
x=self.layers[-1](x)
if not self.output_activation==None:
x=self.output_activation(self.layers[-1](x))
return x
# make action selection op (outputs int actions, sampled from policy)
def select_action(logits):
return Categorical(logits=logits).sample()
# make loss function whose gradient, for the right data, is policy gradient
def loss(action_logits,tau_rets):
return torch.sum(torch.dot(log_softmax(action_logits),tau_rets))
def train(env_name='CartPole-v0', hidden_sizes=[32], lr=1e-2,
epochs=50, batch_size=5000, render=False):
# make environment, check spaces, get obs / act dims
env = gym.make(env_name)
assert isinstance(env.observation_space, Box), \
"This example only works for envs with continuous state spaces."
assert isinstance(env.action_space, Discrete), \
"This example only works for envs with discrete action spaces."
obs_dim = env.observation_space.shape[0]
n_acts = env.action_space.n
# make core of policy network
policy = Policy(sizes=[obs_dim]+hidden_sizes+[n_acts])
# make train op
train_op = optim.Adam(policy.parameters(), lr=lr)
# for training policy
def train_one_epoch():
# make some empty lists for logging.
batch_returns = [] # for measuring episode returns
batch_lens = [] # for measuring episode lengths
# reset episode-specific variables
obs = torch.from_numpy(env.reset()).type(torch.FloatTensor) # first obs comes from starting distribution
done = False # signal from environment that episode is over
num_obs=0 # to measure the number of observations
# render first episode of each epoch
finished_rendering_this_epoch = False
# collect experience by acting in the environment with current policy
while True:
# rendering
if (not finished_rendering_this_epoch) and render:
env.render()
# act in the environment
act_logit=policy.forward(obs)
act = select_action(act_logit)
tmp, reward, done, _ = env.step(act.numpy())
obs=torch.from_numpy(tmp).type(torch.FloatTensor)
num_obs+=1
# save logit, reward
policy.rewards.append(reward)
policy.logits.append(act_logit[act].item())
if done:
# if episode is over, record info about episode
ep_ret, ep_len = sum(policy.rewards), len(policy.rewards)
batch_returns.append(ep_ret)
batch_lens.append(ep_len)
# the weight for each logprob(a|s) is R(tau)
policy.returns+= [ep_ret] * ep_len
# reset episode-specific variables
tmp, done, policy.rewards = env.reset(), False, []
obs=torch.from_numpy(tmp).type(torch.FloatTensor)
# won't render again this epoch
finished_rendering_this_epoch = True
# end experience loop if we have enough of it
if num_obs > batch_size:
break
# take a single policy gradient update step
print (len(policy.returns),len(policy.rewards),len(policy.logits))
batch_loss = loss(torch.tensor(policy.logits),torch.tensor(policy.returns))
batch_loss.backward()
return batch_loss, batch_returns, batch_lens
# training loop
for i in range(epochs):
batch_loss, batch_rets, batch_lens = train_one_epoch()
print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'%
(i, batch_loss, np.mean(batch_rets), np.mean(batch_lens)))
When I run train(), I get the following error:
RuntimeError Traceback (most recent call last)
<ipython-input-163-2da0ffaf5447> in <module>()
----> 1 train()
<ipython-input-162-560e772be08b> in train(env_name, hidden_sizes, lr, epochs,
batch_size, render)
114 # training loop
115 for i in range(epochs):
--> 116 batch_loss, batch_rets, batch_lens = train_one_epoch()
117 print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'%
118 (i, batch_loss, np.mean(batch_rets), np.mean(batch_lens)))
<ipython-input-162-560e772be08b> in train_one_epoch()
109 print (len(policy.returns),len(policy.rewards),len(policy.logits))
110 batch_loss = loss(torch.tensor(policy.logits),torch.tensor(policy.returns))
--> 111 batch_loss.backward()
112 return batch_loss, batch_returns, batch_lens
113
~\Anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient,
retain_graph, create_graph)
91 products. Defaults to ``False``.
92 """
---> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph)
94
95 def register_hook(self, hook):
~\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
88 Variable._execution_engine.run_backward(
89 tensors, grad_tensors, retain_graph, create_graph,
---> 90 allow_unreachable=True) # allow_unreachable flag
91
92
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I don't understand why this happens since my code is similar to other rl pytorch code such as this.

Input Dimensions Tensorflow v1.8 ConvLSTMCell

ConvLSTMCell Official Docs
GitHub _conv where the error occurs
Issue
I'm experimenting with the ConvLSTMCell in tensorflow r1.8. The error I'm continuing to generate occurs in the __call__ method of ConvLSTMCell. The _conv method is invoked and the error is raised.
ValueError: Conv Linear Expects 3D, 4D, 5D
The error is raised from the unstacked inputs. unstacked (in this example) has dimensions of [BATCH_SIZE, N_INPUTS] = [2,5]. I am using tf.unstack to generate the required sequence that the ConvLSTMCell requires.
Why use tf.unstack?
If the input array is not unstacked, the TypeError below is raised.
TypeError: inputs must be a sequence
Question
What am I missing on the formatting? I've read through related issues but have not found anything that has guided me into a working implementation.
Are the placeholder dimensions correct?
Should I be unstacking or is there a better way?
Am I providing the proper input dimension into the ConvLSTMCell?
Code
# Parameters
TIME_STEPS = 28
N_INPUT = 5
N_HIDDEN = 128
LEARNING_RATE = 0.001
NUM_UNITS = 28
CHANNEL = 1
tf.reset_default_graph()
# Input placeholders
x = tf.placeholder(tf.float32, [BATCH_SIZE, TIME_STEPS, N_INPUT])
y = tf.placeholder(tf.float32, [None, 1])
# Format input as a sequence for LSTM Input
unstacked = tf.unstack(x, TIME_STEPS, 1) # shape=(timesteps, batch, inputs)
# Convolutional LSTM Layer
lstm_layer = tf.contrib.rnn.ConvLSTMCell(
conv_ndims=1,
input_shape=[BATCH_SIZE, N_INPUT],
output_channels=5,
kernel_shape=[7,5]
)
# Error is generated when the lstm_layer is invoked
outputs, _ = tf.contrib.rnn.static_rnn(
lstm_layer,
unstacked,
dtype=tf.float32)
Error Message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-83-3568a097e4ea> in <module>()
10 lstm_layer,
11 unstacked,
---> 12 dtype=tf.float32)
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in static_rnn(cell, inputs, initial_state, dtype, sequence_length, scope)
1322 state_size=cell.state_size)
1323 else:
-> 1324 (output, state) = call_cell()
1325
1326 outputs.append(output)
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in <lambda>()
1309 varscope.reuse_variables()
1310 # pylint: disable=cell-var-from-loop
-> 1311 call_cell = lambda: cell(input_, state)
1312 # pylint: enable=cell-var-from-loop
1313 if sequence_length is not None:
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
230 setattr(self, scope_attrname, scope)
231 with scope:
--> 232 return super(RNNCell, self).__call__(inputs, state)
233
234 def _rnn_get_variable(self, getter, *args, **kwargs):
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
715
716 if not in_deferred_mode:
--> 717 outputs = self.call(inputs, *args, **kwargs)
718 if outputs is None:
719 raise ValueError('A layer\'s `call` method should return a Tensor '
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py in call(self, inputs, state, scope)
2110 cell, hidden = state
2111 new_hidden = _conv([inputs, hidden], self._kernel_shape,
-> 2112 4 * self._output_channels, self._use_bias)
2113 gates = array_ops.split(
2114 value=new_hidden, num_or_size_splits=4, axis=self._conv_ndims + 1)
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py in _conv(args, filter_size, num_features, bias, bias_start)
2184 if len(shape) not in [3, 4, 5]:
2185 raise ValueError("Conv Linear expects 3D, 4D "
-> 2186 "or 5D arguments: %s" % str(shapes))
2187 if len(shape) != len(shapes[0]):
2188 raise ValueError("Conv Linear expects all args "
ValueError: Conv Linear expects 3D, 4D or 5D arguments: [[2, 5], [2, 2, 5]]
Here's an example with a couple tweaks, which at least passes static shape checking:
import tensorflow as tf
# Parameters
TIME_STEPS = 28
N_INPUT = 5
N_HIDDEN = 128
LEARNING_RATE = 0.001
NUM_UNITS = 28
CHANNEL = 1
BATCH_SIZE = 16
# Input placeholders
x = tf.placeholder(tf.float32, [BATCH_SIZE, TIME_STEPS, N_INPUT])
y = tf.placeholder(tf.float32, [None, 1])
# Format input as a sequence for LSTM Input
unstacked = tf.unstack(x[..., None], TIME_STEPS, 1) # shape=(timesteps, batch, inputs)
# Convolutional LSTM Layer
lstm_layer = tf.contrib.rnn.ConvLSTMCell(
conv_ndims=1,
input_shape=[N_INPUT, 1],
output_channels=5,
kernel_shape=[7]
)
# Error is generated when the lstm_layer is invoked
outputs, _ = tf.contrib.rnn.static_rnn(
lstm_layer,
unstacked,
dtype=tf.float32)
Notes:
input_shape does not include the batch dimension (see docstring)
The input needs a channels dimension. Fine for it to be one in the input (that's what I've done).
Not sure what more than one dimension on kernel_shape would mean for a 1-D convolution.

Resources