Tensorflow 2.1 TPU v2 reduce memory usage with bfloat16 - python-3.x

I have some issue with the TPUv2 regarding the memory usage.
I would like to do some experiment with some Large model but unfortunately the model does not fit the memory. I would like to use bfloat16 in order to save some memory but I have some issue when I call the model :
try:
# TPU detection. No parameters necessary if TPU_NAME environment variable is
# set: this is always the case on Kaggle.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
print('Running on TPU ', resolver.master())
except ValueError:
resolver = None
if resolver:
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
else:
# Default distribution strategy in Tensorflow. Works on CPU and single GPU.
strategy = tf.distribute.get_strategy()
policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
tf.keras.mixed_precision.experimental.set_policy(policy)
with strategy.scope():
model = CustomModel(TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large"), num_classes=5)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
optimizer = tf.mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
model.compile(optimizer=optimizer,loss=['mse'])
InvalidArgumentError Traceback (most recent call
last)
in ()
3 with strategy.scope():
4
----> 5 model = CustomModel(TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large"),
num_classes=5)
6 optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
7 optimizer = tf.mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
13 frames
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_utils.py
in from_pretrained(cls, pretrained_model_name_or_path, *model_args,
**kwargs)
399 return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True)
400
--> 401 model(model.dummy_inputs, training=False) # build the network with dummy inputs
402
403 assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_roberta.py
in call(self, inputs, **kwargs)
222
223 """
--> 224 outputs = self.roberta(inputs, **kwargs)
225 return outputs
226
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in call(self, inputs, attention_mask, token_type_ids, position_ids,
head_mask, inputs_embeds, training)
567 # head_mask = tf.constant([0] * self.num_hidden_layers)
568
--> 569 embedding_output = self.embeddings([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
570 encoder_outputs = self.encoder([embedding_output, extended_attention_mask, head_mask], training=training)
571
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in call(self, inputs, mode, training)
146 """
147 if mode == "embedding":
--> 148 return self._embedding(inputs, training=training)
149 elif mode == "linear":
150 return self._linear(inputs)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_roberta.py
in _embedding(self, inputs, training)
79 position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
80
---> 81 return super()._embedding([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
82
83
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in _embedding(self, inputs, training)
173
174 embeddings = inputs_embeds + position_embeddings + token_type_embeddings
--> 175 embeddings = self.LayerNorm(embeddings)
176 embeddings = self.dropout(embeddings, training=training)
177 return embeddings
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
962 # Eager execution on data tensors.
963 with backend.name_scope(self._name_scope()):
--> 964 self._maybe_build(inputs)
965 cast_inputs = self._maybe_cast_inputs(inputs)
966 with base_layer_utils.autocast_context_manager(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in _maybe_build(self, inputs) 2406 self._dtype_policy =
policy.Policy(dtype) 2407 input_shapes = None
-> 2408 if all(hasattr(x, 'shape') for x in input_list): 2409 input_shapes = nest.map_structure(lambda x: x.shape, inputs) 2410
Only call build if the user has manually overridden the build method.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in (.0) 2406 self._dtype_policy =
policy.Policy(dtype) 2407 input_shapes = None
-> 2408 if all(hasattr(x, 'shape') for x in input_list): 2409 input_shapes = nest.map_structure(lambda x: x.shape, inputs) 2410
Only call build if the user has manually overridden the build method.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py
in shape(self) 1065 self._tensor_shape =
tensor_shape.TensorShape(self._shape_tuple()) 1066 except
core._NotOkStatusException as e:
-> 1067 six.raise_from(core._status_to_exception(e.code, e.message), None) 1068 1069 return self._tensor_shape
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value,
from_value)
InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was
expected to be a bfloat16 tensor but is a float tensor
I suppose I have to cast something regarding the model ? How can I do that ?
I am using tensorflow 2.1 and TPU v2.
I have see this thread but it was with tensorflow 1.X I suppose as the code does not work for me.
Memory reduction Tensorflow TPU v2/v3 bfloat16

I think the problem is that you are trying to load a pre-trained model trained with full floats into a b16float model. I don't think that will work. You have to train from scratch.

Related

layers compatibility between attention layer and CONV1D in keras

I am building a model in bilstm-attention-conv1d fashion (i want to use multiple conv1d with different kernel sizes) I am facing the layers incompatibility issue between the attention layer and conv1d layer. I have tried Reshape function but it's not working, Following is my code:
my model is as follows
sequence_input = Input(shape=(maxlen,), dtype="int32")
embedded_sequences = Embedding(50000, output_dim=output_dim)(sequence_input)
lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name="bi_lstm_0")(embedded_sequences)
# Getting our LSTM outputs
(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True),
name="bi_lstm_1")(lstm)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
context_vector, attention_weights = Attention(10)(lstm, state_h)
x = Reshape((maxlen, output_dim, 1))(context_vector)
kernel_sizes = [1,2,3,4,5]
convs = []
for kernel_size in range(len(kernel_sizes)):
conv = Conv1D(128, kernel_size, activation='relu')(x)
convs.append(conv)
avg_pool = GlobalAveragePooling1D()(convs)
max_pool = GlobalMaxPooling1D()(convs)
conc = concatenate([avg_pool, max_pool])
output = Dense(50, activation="sigmoid")(conc)
model = keras.Model(inputs=sequence_input, outputs=output)
print(model.summary())
my code gives me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-114-8e5c0c75e84a> in <module>()
13 context_vector, attention_weights = Attention(10)(lstm, state_h)
14
---> 15 x = Reshape((maxlen, output_dim, 1))(context_vector)
16
17
6 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
950 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
951 return self._functional_construction_call(inputs, args, kwargs,
--> 952 input_list)
953
954 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1089 # Check input assumptions set after layer building, e.g. input shape.
1090 outputs = self._keras_tensor_symbolic_call(
-> 1091 inputs, input_masks, args, kwargs)
1092
1093 if outputs is None:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
820 return nest.map_structure(keras_tensor.KerasTensor, output_signature)
821 else:
--> 822 return self._infer_output_signature(inputs, args, kwargs, input_masks)
823
824 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
861 # TODO(kaftan): do we maybe_build here, or have we already done it?
862 self._maybe_build(inputs)
--> 863 outputs = call_fn(inputs, *args, **kwargs)
864
865 self._handle_activity_regularization(inputs, outputs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
555 # Set the static shape for the result since it might lost during array_ops
556 # reshape, eg, some `None` dim in the result could be inferred.
--> 557 result.set_shape(self.compute_output_shape(inputs.shape))
558 return result
559
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in compute_output_shape(self, input_shape)
546 output_shape = [input_shape[0]]
547 output_shape += self._fix_unknown_dimension(input_shape[1:],
--> 548 self.target_shape)
549 return tensor_shape.TensorShape(output_shape)
550
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in _fix_unknown_dimension(self, input_shape, output_shape)
534 output_shape[unknown] = original // known
535 elif original != known:
--> 536 raise ValueError(msg)
537 return output_shape
538
ValueError: total size of new array must be unchanged, input_shape = [256], output_shape = [2500, 100, 1]
kindly help me

A simple linear regression model with a DenseVariational layer in Tensorflow-Probability returns: TypeError: 'NoneType' object is not callable

This is an attempt to use Tensforflow-Probability and more specifically the DenseVariational layer but it fails for some reason. How can I correct the code?
x_train = np.linspace(-1, 1, 100)[:, np.newaxis]
y_train = x_train + 0.3*np.random.randn(100)[:, np.newaxis]
def prior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
prior_model = Sequential([
tfpl.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc = tf.zeros(n) , scale_diag = tf.ones(n)
))
])
def posterior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
posterior_model = Sequential([
tfpl.VariableLayer(tfpl.MultivariateNormalTriL.params_size(n) , dtype = dtype), # The parameters of the model are declared Variables that are trainable
tfpl.MultivariateNormalTriL(n) # The posterior function will return to the Variational layer that will call it a MultivariateNormalTril object that will have as many dimensions
# as the parameters of the Variational Dense Layer. That means that each parameter will be generated by a distinct Normal Gaussian shifted and scaled
# by a mu and sigma learned from the data, independently of all the other weights. The output of this Variablelayer will become the input to the
# MultivariateNormalTriL object.
# The shape of the VariableLayer object will be defined by the number of parameters needed to create the MultivariateNormalTriL object given
# that it will live in a Space of n dimensions (event_size = n). This number is returned by the tfpl.MultivariateNormalTriL.params_size(n)
])
return(posterior_model)
model = Sequential([
tfpl.DenseVariational(
input_shape = (1, ), # The input is of dimensionality 1, a series
units = 1, # A linear regression is represented by a Dense layer with one single unit
make_prior_fn = prior, # We pass the function we have defined which returns the prior distribution on the weights
make_posterior_fn = posterior, # We pass the function we have defined which returns the variational approximation of the posterior distribution on the weights
kl_weight = 1/ x_train.shape[0], # Tensorflow scales the likelihood loss calculated using the mini-batch to become an unbiased estimator of the true loss but does not do the
# same for the DL divergence loss. Here we instruct it to do the necessary scaling.
kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
# the KL Divergence will be approxiated using Sampling
)
])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-e7cf0bfd5902> in <module>
17 # same for the DL divergence loss. Here we instruct it to do the necessary scaling.
18
---> 19 kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
20 # the KL Divergence will be approxiated using Sampling
21
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in __init__(self, layers, name)
140 layers = [layers]
141 for layer in layers:
--> 142 self.add(layer)
143
144 #property
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in add(self, layer)
204 # and create the node connecting the current layer
205 # to the input layer we just created.
--> 206 layer(x)
207 set_inputs = True
208
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1115 try:
1116 with ops.enable_auto_cast_variables(self._compute_dtype_object):
-> 1117 outputs = call_fn(cast_inputs, *args, **kwargs)
1118
1119 except errors.OperatorNotAllowedInGraphError as e:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
455 if conversion.is_in_whitelist_cache(f, options):
456 logging.log(2, 'Whitelisted %s: from cache', f)
--> 457 return _call_unconverted(f, args, kwargs, options, False)
458
459 if ag_ctx.control_status_ctx().status == ag_ctx.Status.DISABLED:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow_probability\python\layers\dense_variational_v2.py in call(self, inputs)
120
121 q = self._posterior(inputs)
--> 122 r = self._prior(inputs)
123 self.add_loss(self._kl_divergence_fn(q, r))
124
TypeError: 'NoneType' object is not callable
Did you fail to return ?
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential([
tfp.layers.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_diag=tf.ones(n)
))
])
return (prior_model)

ValueError: Target size (torch.Size([26])) must be the same as input size (torch.Size([66]))

ValueError Traceback (most recent call last)
<ipython-input-28-10509ec63b58> in <module>
11 start_time = time.time()
12
---> 13 train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
14 valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
15
<ipython-input-25-ecea5e6d8ce8> in train(model, iterator, optimizer, criterion)
12 predictions = model(batch.t).squeeze(1)
13
---> 14 loss = criterion(predictions, batch.l)
15
16 acc = binary_accuracy(predictions, batch.l)
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
~\anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
599 self.weight,
600 pos_weight=self.pos_weight,
--> 601 reduction=self.reduction)
602
603
~\anaconda3\lib\site-packages\torch\nn\functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
2122
2123 if not (target.size() == input.size()):
-> 2124 raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
2125
2126 return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
ValueError: Target size (torch.Size([26])) must be the same as input size (torch.Size([66]))
I'm training CNN. Working on my own Indonesian dataset to do sentiment analysis. This is my code. I used the criteria = nn.BCEWithLogitsLoss () and optimizer = optim.RMSprop,. I do not understand, where I need to make changes, to correct mistakes.

Non-Stateful LSTM Issues with Keras

Good day,
I am trying to create a LSTM model (stateful or non-stateful) but running into several issues.
I am attempting to add a layer using:
model = Sequential()
...
model.add(LSTM(c['num_rnn_unit'],
activation=c['rnn_activation'],
dropout=c['dropout_rnn_input'],
recurrent_dropout=c['dropout_rnn_recurrent'],
return_sequences=True,
stateful=False,
#batch_input_shape=(c['batch_size'],c['num_steps'], c['input_dim'])
))
where:
'num_rnn_unit': np.random.choice([16, 32, 64, 128, 256, 512, 1024])
'rnn_activation': np.random.choice(['tanh', 'sigmoid'])
'dropout_rnn_input': 0
'batch_size': np.random.choice([64, 128])
'num_steps':np.random.choice([5, 10, 15])
'input_dim': 64
I experimented with "stateful=True" and used the commented out "batch_input_shape" but this caused additional errors, which others have had as well but found no workable solution.
So I stuck with trying to make "stateful=False" to work but it yields the error (below).
Any thoughts on why this error is coming up? Thanks in advance!
Here is the traceback:
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\sequential.py in add(self, layer)
180 self.inputs = network.get_source_inputs(self.outputs[0])
181 elif self.outputs:
--> 182 output_tensor = layer(self.outputs[0])
183 if isinstance(output_tensor, list):
184 raise TypeError('All layers in a Sequential model '
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
539
540 if initial_state is None and constants is None:
--> 541 return super(RNN, self).__call__(inputs, **kwargs)
542
543 # If any of `initial_state` or `constants` are specified and are Keras
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\backend\tensorflow_backend.py in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
487 # Actually call the layer,
488 # collecting output(s), mask(s), and shape(s).
--> 489 output = self.call(inputs, **kwargs)
490 output_mask = self.compute_mask(inputs, previous_mask)
491
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state)
1689 mask=mask,
1690 training=training,
-> 1691 initial_state=initial_state)
1692
1693 #property
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state, constants)
635 mask = mask[0]
636
--> 637 if len(initial_state) != len(self.states):
638 raise ValueError('Layer has ' + str(len(self.states)) +
639 ' states but was passed ' +
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in states(self)
436 num_states = 1
437 else:
--> 438 num_states = len(self.cell.state_size)
439 return [None for _ in range(num_states)]
440 return self._states
TypeError: object of type 'numpy.int32' has no len()
Would this be the first layer "input_shape", with batch_normalization=True:
if c['batch_normalization']:
model.add(BatchNormalization(input_shape=(c['num_steps'], c['input_dim'])))
model.add(TimeDistributed(Dropout(c['dropout_input']),
input_shape=(c['num_steps'], c['input_dim'])))

Tensorflow Keras - Error while stacking LSTM layers

I have the following sequence of layers. Adding additional LSTMs in the mix yields the following error which I cannot really understand.
I'm using python 3.7.3 on Linux Ubuntu x64
GCC 7.4.0
tensorflow-gpu='2.0.0'
print(x_train_uni.shape) # (299980, 20, 1)
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
tf.keras.layers.LSTM(64),
tf.keras.layers.LSTM(32),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
which yields:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-32-ba40f416ca84> in <module>
6 tf.keras.layers.LSTM(16),
7 tf.keras.layers.LSTM(8),
----> 8 tf.keras.layers.Dense(1, activation='tanh')
9 ])
10
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in __init__(self, layers, name)
112 tf_utils.assert_no_legacy_layers(layers)
113 for layer in layers:
--> 114 self.add(layer)
115
116 #property
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
194 # If the model is being built continuously on top of an input layer:
195 # refresh its output.
--> 196 output_tensor = layer(self.outputs[0])
197 if len(nest.flatten(output_tensor)) != 1:
198 raise TypeError('All layers in a Sequential model '
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
621
622 if initial_state is None and constants is None:
--> 623 return super(RNN, self).__call__(inputs, **kwargs)
624
625 # If any of `initial_state` or `constants` are specified and are Keras
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
810 # are casted, not before.
811 input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 812 self.name)
813 graph = backend.get_graph()
814 with graph.as_default(), backend.name_scope(self._name_scope()):
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
175 'expected ndim=' + str(spec.ndim) + ', found ndim=' +
176 str(ndim) + '. Full shape received: ' +
--> 177 str(x.shape.as_list()))
178 if spec.max_ndim is not None:
179 ndim = x.shape.ndims
ValueError: Input 0 of layer lstm_19 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 128]
If however I change the model like so it actually works.
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
# tf.keras.layers.LSTM(64),
# tf.keras.layers.LSTM(32),
# tf.keras.layers.Dropout(0.25),
# tf.keras.layers.LSTM(16),
# tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
What is that I'm missing? Why two or multiple LSTM layers cannot be stacked one on top of another?
LSTM layer requires as an input sequence. However, the default setting in Keras is to return final scalar.
Hence second LSTM in proposed architecture is feed with scalar instead of required sequence.
The solution is to use return_sequences=True flag (see LSTM arguments in docs):
import tensorflow as tf
x_train_uni = tf.zeros((100, 20, 1))
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:], return_sequences=True),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(32, return_sequences=True),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16, return_sequences=True),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')

Resources