Non-Stateful LSTM Issues with Keras - keras

Good day,
I am trying to create a LSTM model (stateful or non-stateful) but running into several issues.
I am attempting to add a layer using:
model = Sequential()
...
model.add(LSTM(c['num_rnn_unit'],
activation=c['rnn_activation'],
dropout=c['dropout_rnn_input'],
recurrent_dropout=c['dropout_rnn_recurrent'],
return_sequences=True,
stateful=False,
#batch_input_shape=(c['batch_size'],c['num_steps'], c['input_dim'])
))
where:
'num_rnn_unit': np.random.choice([16, 32, 64, 128, 256, 512, 1024])
'rnn_activation': np.random.choice(['tanh', 'sigmoid'])
'dropout_rnn_input': 0
'batch_size': np.random.choice([64, 128])
'num_steps':np.random.choice([5, 10, 15])
'input_dim': 64
I experimented with "stateful=True" and used the commented out "batch_input_shape" but this caused additional errors, which others have had as well but found no workable solution.
So I stuck with trying to make "stateful=False" to work but it yields the error (below).
Any thoughts on why this error is coming up? Thanks in advance!
Here is the traceback:
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\sequential.py in add(self, layer)
180 self.inputs = network.get_source_inputs(self.outputs[0])
181 elif self.outputs:
--> 182 output_tensor = layer(self.outputs[0])
183 if isinstance(output_tensor, list):
184 raise TypeError('All layers in a Sequential model '
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
539
540 if initial_state is None and constants is None:
--> 541 return super(RNN, self).__call__(inputs, **kwargs)
542
543 # If any of `initial_state` or `constants` are specified and are Keras
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\backend\tensorflow_backend.py in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
487 # Actually call the layer,
488 # collecting output(s), mask(s), and shape(s).
--> 489 output = self.call(inputs, **kwargs)
490 output_mask = self.compute_mask(inputs, previous_mask)
491
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state)
1689 mask=mask,
1690 training=training,
-> 1691 initial_state=initial_state)
1692
1693 #property
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in call(self, inputs, mask, training, initial_state, constants)
635 mask = mask[0]
636
--> 637 if len(initial_state) != len(self.states):
638 raise ValueError('Layer has ' + str(len(self.states)) +
639 ' states but was passed ' +
~\AppData\Local\Continuum\anaconda3\envs\env_py3\lib\site-packages\keras\layers\recurrent.py in states(self)
436 num_states = 1
437 else:
--> 438 num_states = len(self.cell.state_size)
439 return [None for _ in range(num_states)]
440 return self._states
TypeError: object of type 'numpy.int32' has no len()
Would this be the first layer "input_shape", with batch_normalization=True:
if c['batch_normalization']:
model.add(BatchNormalization(input_shape=(c['num_steps'], c['input_dim'])))
model.add(TimeDistributed(Dropout(c['dropout_input']),
input_shape=(c['num_steps'], c['input_dim'])))

Related

layers compatibility between attention layer and CONV1D in keras

I am building a model in bilstm-attention-conv1d fashion (i want to use multiple conv1d with different kernel sizes) I am facing the layers incompatibility issue between the attention layer and conv1d layer. I have tried Reshape function but it's not working, Following is my code:
my model is as follows
sequence_input = Input(shape=(maxlen,), dtype="int32")
embedded_sequences = Embedding(50000, output_dim=output_dim)(sequence_input)
lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name="bi_lstm_0")(embedded_sequences)
# Getting our LSTM outputs
(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True),
name="bi_lstm_1")(lstm)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
context_vector, attention_weights = Attention(10)(lstm, state_h)
x = Reshape((maxlen, output_dim, 1))(context_vector)
kernel_sizes = [1,2,3,4,5]
convs = []
for kernel_size in range(len(kernel_sizes)):
conv = Conv1D(128, kernel_size, activation='relu')(x)
convs.append(conv)
avg_pool = GlobalAveragePooling1D()(convs)
max_pool = GlobalMaxPooling1D()(convs)
conc = concatenate([avg_pool, max_pool])
output = Dense(50, activation="sigmoid")(conc)
model = keras.Model(inputs=sequence_input, outputs=output)
print(model.summary())
my code gives me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-114-8e5c0c75e84a> in <module>()
13 context_vector, attention_weights = Attention(10)(lstm, state_h)
14
---> 15 x = Reshape((maxlen, output_dim, 1))(context_vector)
16
17
6 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
950 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
951 return self._functional_construction_call(inputs, args, kwargs,
--> 952 input_list)
953
954 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1089 # Check input assumptions set after layer building, e.g. input shape.
1090 outputs = self._keras_tensor_symbolic_call(
-> 1091 inputs, input_masks, args, kwargs)
1092
1093 if outputs is None:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
820 return nest.map_structure(keras_tensor.KerasTensor, output_signature)
821 else:
--> 822 return self._infer_output_signature(inputs, args, kwargs, input_masks)
823
824 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
861 # TODO(kaftan): do we maybe_build here, or have we already done it?
862 self._maybe_build(inputs)
--> 863 outputs = call_fn(inputs, *args, **kwargs)
864
865 self._handle_activity_regularization(inputs, outputs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
555 # Set the static shape for the result since it might lost during array_ops
556 # reshape, eg, some `None` dim in the result could be inferred.
--> 557 result.set_shape(self.compute_output_shape(inputs.shape))
558 return result
559
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in compute_output_shape(self, input_shape)
546 output_shape = [input_shape[0]]
547 output_shape += self._fix_unknown_dimension(input_shape[1:],
--> 548 self.target_shape)
549 return tensor_shape.TensorShape(output_shape)
550
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in _fix_unknown_dimension(self, input_shape, output_shape)
534 output_shape[unknown] = original // known
535 elif original != known:
--> 536 raise ValueError(msg)
537 return output_shape
538
ValueError: total size of new array must be unchanged, input_shape = [256], output_shape = [2500, 100, 1]
kindly help me

how to call a pre-trained decoder model in custom loss function in keras?

I trained a auto-encoder model before and saved decoder model. Next, I train a new model that labeled as 'netA', I want to use decoder model in custom loss function. and tried, but I got error, there was my code and error information:
def custom_loss(y_true,y_pred):
a = decoder(y_pred)
b = decoder(y_true)
c = K.mean(K.square(a-b))
return c
input_feature = 409
output_feature = 256
model = Sequential()
model.add(Dense(256, activation = 'relu',input_shape=(input_feature,)))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(128,activation='relu'))
model.add(Dense(output_feature,activation='sigmoid'))
model.summary()
model.compile(optimizer = Adam(lr = 1e-4),loss=custom_loss, metrics = ['mse'])
history = model.fit(x_train_pca_scale, y_train_scale_coding, epochs = 200, batch_size = 32, verbose= 2,validation_data = (x_test_pca_scale, y_test_scale_coding))
the error is :
AssertionError Traceback (most recent call last)
in
23
24 model.summary()
---> 25 model.compile(optimizer = Adam(lr = 1e-4),loss=custom_loss, metrics = ['mse'])
26 #checkpointer = ModelCheckpoint(filepath='/home/lidan/3DFacePrediction/gene.face.autoencoder/gene.face.min.val_loss.hd5', monitor='val_loss',verbose=1,mode='min',save_best_only=True)
27
~/software/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py
in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/training.py
in compile(self, optimizer, loss, metrics, loss_weights,
sample_weight_mode, weighted_metrics, target_tensors, **kwargs)
227 # loss_weight_2 * output_2_loss_fn(...) +
228 # layer losses.
--> 229 self.total_loss = self._prepare_total_loss(masks)
230
231 # Functions for train, test and predict will
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/training.py
in _prepare_total_loss(self, masks)
690
691 output_loss = loss_fn(
--> 692 y_true, y_pred, sample_weight=sample_weight)
693
694 if len(self.outputs) > 1:
~/software/anaconda3/lib/python3.7/site-packages/keras/losses.py in
call(self, y_true, y_pred, sample_weight)
69 scope_name = 'lambda' if self.name == '' else self.name
70 with K.name_scope(scope_name):
---> 71 losses = self.call(y_true, y_pred)
72 return losses_utils.compute_weighted_loss(
73 losses, sample_weight, reduction=self.reduction)
~/software/anaconda3/lib/python3.7/site-packages/keras/losses.py in
call(self, y_true, y_pred)
130 Loss values per sample.
131 """
--> 132 return self.fn(y_true, y_pred, **self._fn_kwargs)
133
134 def get_config(self):
in custom_loss(y_true, y_pred)
3 def custom_loss(y_true,y_pred):
4 a = decoder(y_pred)
----> 5 b = decoder(y_true)
6 c = K.mean(K.square(a-b))
7 return c
~/software/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py
in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/base_layer.py
in call(self, inputs, **kwargs)
487 # Actually call the layer,
488 # collecting output(s), mask(s), and shape(s).
--> 489 output = self.call(inputs, **kwargs)
490 output_mask = self.compute_mask(inputs, previous_mask)
491
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/network.py
in call(self, inputs, mask)
581 return self._output_tensor_cache[cache_key]
582 else:
--> 583 output_tensors, _, _ = self.run_internal_graph(inputs, masks)
584 return output_tensors
585
~/software/anaconda3/lib/python3.7/site-packages/keras/engine/network.py
in run_internal_graph(self, inputs, masks)
796 input_shapes = unpack_singleton(
797 [x._keras_shape for x in computed_tensors])
--> 798 shapes = to_list(layer.compute_output_shape(input_shapes))
799 uses_learning_phase = any(
800 [x._uses_learning_phase for x in computed_tensors])
~/software/anaconda3/lib/python3.7/site-packages/keras/layers/core.py
in compute_output_shape(self, input_shape)
915 def compute_output_shape(self, input_shape):
916 assert input_shape and len(input_shape) >= 2
--> 917 assert input_shape[-1]
918 output_shape = list(input_shape)
919 output_shape[-1] = self.units
AssertionError:
I felt confused about the error information,because the decoder model worked well in y_pred and failed in y_true.
Could someone help me solve it or just give me another way to put a saved decoder model in loss function? Thank you very much!

Tensorflow 2.1 TPU v2 reduce memory usage with bfloat16

I have some issue with the TPUv2 regarding the memory usage.
I would like to do some experiment with some Large model but unfortunately the model does not fit the memory. I would like to use bfloat16 in order to save some memory but I have some issue when I call the model :
try:
# TPU detection. No parameters necessary if TPU_NAME environment variable is
# set: this is always the case on Kaggle.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
print('Running on TPU ', resolver.master())
except ValueError:
resolver = None
if resolver:
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
else:
# Default distribution strategy in Tensorflow. Works on CPU and single GPU.
strategy = tf.distribute.get_strategy()
policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
tf.keras.mixed_precision.experimental.set_policy(policy)
with strategy.scope():
model = CustomModel(TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large"), num_classes=5)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
optimizer = tf.mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
model.compile(optimizer=optimizer,loss=['mse'])
InvalidArgumentError Traceback (most recent call
last)
in ()
3 with strategy.scope():
4
----> 5 model = CustomModel(TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large"),
num_classes=5)
6 optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
7 optimizer = tf.mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
13 frames
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_utils.py
in from_pretrained(cls, pretrained_model_name_or_path, *model_args,
**kwargs)
399 return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True)
400
--> 401 model(model.dummy_inputs, training=False) # build the network with dummy inputs
402
403 assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_roberta.py
in call(self, inputs, **kwargs)
222
223 """
--> 224 outputs = self.roberta(inputs, **kwargs)
225 return outputs
226
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in call(self, inputs, attention_mask, token_type_ids, position_ids,
head_mask, inputs_embeds, training)
567 # head_mask = tf.constant([0] * self.num_hidden_layers)
568
--> 569 embedding_output = self.embeddings([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
570 encoder_outputs = self.encoder([embedding_output, extended_attention_mask, head_mask], training=training)
571
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in call(self, inputs, mode, training)
146 """
147 if mode == "embedding":
--> 148 return self._embedding(inputs, training=training)
149 elif mode == "linear":
150 return self._linear(inputs)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_roberta.py
in _embedding(self, inputs, training)
79 position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
80
---> 81 return super()._embedding([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
82
83
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in _embedding(self, inputs, training)
173
174 embeddings = inputs_embeds + position_embeddings + token_type_embeddings
--> 175 embeddings = self.LayerNorm(embeddings)
176 embeddings = self.dropout(embeddings, training=training)
177 return embeddings
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
962 # Eager execution on data tensors.
963 with backend.name_scope(self._name_scope()):
--> 964 self._maybe_build(inputs)
965 cast_inputs = self._maybe_cast_inputs(inputs)
966 with base_layer_utils.autocast_context_manager(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in _maybe_build(self, inputs) 2406 self._dtype_policy =
policy.Policy(dtype) 2407 input_shapes = None
-> 2408 if all(hasattr(x, 'shape') for x in input_list): 2409 input_shapes = nest.map_structure(lambda x: x.shape, inputs) 2410
Only call build if the user has manually overridden the build method.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in (.0) 2406 self._dtype_policy =
policy.Policy(dtype) 2407 input_shapes = None
-> 2408 if all(hasattr(x, 'shape') for x in input_list): 2409 input_shapes = nest.map_structure(lambda x: x.shape, inputs) 2410
Only call build if the user has manually overridden the build method.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py
in shape(self) 1065 self._tensor_shape =
tensor_shape.TensorShape(self._shape_tuple()) 1066 except
core._NotOkStatusException as e:
-> 1067 six.raise_from(core._status_to_exception(e.code, e.message), None) 1068 1069 return self._tensor_shape
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value,
from_value)
InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was
expected to be a bfloat16 tensor but is a float tensor
I suppose I have to cast something regarding the model ? How can I do that ?
I am using tensorflow 2.1 and TPU v2.
I have see this thread but it was with tensorflow 1.X I suppose as the code does not work for me.
Memory reduction Tensorflow TPU v2/v3 bfloat16
I think the problem is that you are trying to load a pre-trained model trained with full floats into a b16float model. I don't think that will work. You have to train from scratch.

using class weights with sklearn votingClassifier

I have an imbalance dataset for a classification problem. My target variable is binary and has two category.
I implemented Random Forest and Logistic Regression by assigning class_weights as parameter.
When I fit data to random forest and logistic regression separately it works fine. But when I use Voting Classifier on random forest and logistic regression from sklearn.ensemble to fit on the data it gives error Class label no_payment not present. I need to take ensemble of 3 or more models. I have check that this error is not because of naive_bayes implemented in the code.
My code:
rf_param = { 'class_weight': {'no_payment': 1, 'payment': 3},'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 30, 'min_samples_split': 15, 'n_estimators': 100}
lr_param = {'C': 0.1, 'class_weight': {'no_payment': 1, 'payment': 3}, 'fit_intercept': False, 'penalty': 'l2'}
rf = ensemble.RandomForestClassifier(**rf_param)
lr = linear_model.LogisticRegression(**lr_param)
nb = naive_bayes.MultinomialNB(alpha=0.0, class_prior=None, fit_prior=False)
rf.fit(train_x, train_y)
lr.fit(train_x, train_y)
nb.fit(train_x, train_y)
model = ensemble.VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('nb',nb)], voting='hard'
,weights = [2,2,1])
model.fit(train_x, train_y)
predictions = model.predict(valid_x)
This code runs perfectly if I remove class_weight from parameter list.
Below is complete error message.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-35-e05cd516f347> in <module>()
15 )
16
---> 17 model.fit(train_x, train_y)
18
19 predictions = model.predict(valid_x)
/home/.local/lib/python3.6/site-packages/sklearn/ensemble/_voting.py in fit(self, X, y, sample_weight)
220 transformed_y = self.le_.transform(y)
221
--> 222 return super().fit(X, transformed_y, sample_weight)
223
224 def predict(self, X):
/home/.local/lib/python3.6/site-packages/sklearn/ensemble/_voting.py in fit(self, X, y, sample_weight)
66 delayed(_parallel_fit_estimator)(clone(clf), X, y,
67 sample_weight=sample_weight)
---> 68 for clf in clfs if clf not in (None, 'drop')
69 )
70
/home/.local/lib/python3.6/site-packages/joblib/parallel.py in __call__(self, iterable)
1002 # remaining jobs.
1003 self._iterating = False
-> 1004 if self.dispatch_one_batch(iterator):
1005 self._iterating = self._original_iterator is not None
1006
/home/.local/lib/python3.6/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
833 return False
834 else:
--> 835 self._dispatch(tasks)
836 return True
837
/home/.local/lib/python3.6/site-packages/joblib/parallel.py in _dispatch(self, batch)
752 with self._lock:
753 job_idx = len(self._jobs)
--> 754 job = self._backend.apply_async(batch, callback=cb)
755 # A job can complete so quickly than its callback is
756 # called before we get here, causing self._jobs to
/home/.local/lib/python3.6/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
207 def apply_async(self, func, callback=None):
208 """Schedule a func to be run"""
--> 209 result = ImmediateResult(func)
210 if callback:
211 callback(result)
/home/.local/lib/python3.6/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
588 # Don't delay the application, to avoid keeping the input
589 # arguments in memory
--> 590 self.results = batch()
591
592 def get(self):
/home/.local/lib/python3.6/site-packages/joblib/parallel.py in __call__(self)
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
255 return [func(*args, **kwargs)
--> 256 for func, args, kwargs in self.items]
257
258 def __len__(self):
/home/.local/lib/python3.6/site-packages/joblib/parallel.py in <listcomp>(.0)
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
255 return [func(*args, **kwargs)
--> 256 for func, args, kwargs in self.items]
257
258 def __len__(self):
/home/.local/lib/python3.6/site-packages/sklearn/ensemble/_base.py in _parallel_fit_estimator(estimator, X, y, sample_weight)
34 raise
35 else:
---> 36 estimator.fit(X, y)
37 return estimator
38
/home/.local/lib/python3.6/site-packages/sklearn/ensemble/_forest.py in fit(self, X, y, sample_weight)
319 self.n_outputs_ = y.shape[1]
320
--> 321 y, expanded_class_weight = self._validate_y_class_weight(y)
322
323 if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
/home/.local/lib/python3.6/site-packages/sklearn/ensemble/_forest.py in _validate_y_class_weight(self, y)
585 class_weight = self.class_weight
586 expanded_class_weight = compute_sample_weight(class_weight,
--> 587 y_original)
588
589 return y, expanded_class_weight
/home/.local/lib/python3.6/site-packages/sklearn/utils/class_weight.py in compute_sample_weight(class_weight, y, indices)
161 weight_k = compute_class_weight(class_weight_k,
162 classes_full,
--> 163 y_full)
164
165 weight_k = weight_k[np.searchsorted(classes_full, y_full)]
/home/.local/lib/python3.6/site-packages/sklearn/utils/class_weight.py in compute_class_weight(class_weight, classes, y)
63 i = np.searchsorted(classes, c)
64 if i >= len(classes) or classes[i] != c:
---> 65 raise ValueError("Class label {} not present.".format(c))
66 else:
67 weight[i] = class_weight[c]
ValueError: Class label no_payment not present.

Tensorflow Keras - Error while stacking LSTM layers

I have the following sequence of layers. Adding additional LSTMs in the mix yields the following error which I cannot really understand.
I'm using python 3.7.3 on Linux Ubuntu x64
GCC 7.4.0
tensorflow-gpu='2.0.0'
print(x_train_uni.shape) # (299980, 20, 1)
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
tf.keras.layers.LSTM(64),
tf.keras.layers.LSTM(32),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
which yields:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-32-ba40f416ca84> in <module>
6 tf.keras.layers.LSTM(16),
7 tf.keras.layers.LSTM(8),
----> 8 tf.keras.layers.Dense(1, activation='tanh')
9 ])
10
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in __init__(self, layers, name)
112 tf_utils.assert_no_legacy_layers(layers)
113 for layer in layers:
--> 114 self.add(layer)
115
116 #property
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
194 # If the model is being built continuously on top of an input layer:
195 # refresh its output.
--> 196 output_tensor = layer(self.outputs[0])
197 if len(nest.flatten(output_tensor)) != 1:
198 raise TypeError('All layers in a Sequential model '
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
621
622 if initial_state is None and constants is None:
--> 623 return super(RNN, self).__call__(inputs, **kwargs)
624
625 # If any of `initial_state` or `constants` are specified and are Keras
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
810 # are casted, not before.
811 input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 812 self.name)
813 graph = backend.get_graph()
814 with graph.as_default(), backend.name_scope(self._name_scope()):
~/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
175 'expected ndim=' + str(spec.ndim) + ', found ndim=' +
176 str(ndim) + '. Full shape received: ' +
--> 177 str(x.shape.as_list()))
178 if spec.max_ndim is not None:
179 ndim = x.shape.ndims
ValueError: Input 0 of layer lstm_19 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 128]
If however I change the model like so it actually works.
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:]),
# tf.keras.layers.LSTM(64),
# tf.keras.layers.LSTM(32),
# tf.keras.layers.Dropout(0.25),
# tf.keras.layers.LSTM(16),
# tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')
What is that I'm missing? Why two or multiple LSTM layers cannot be stacked one on top of another?
LSTM layer requires as an input sequence. However, the default setting in Keras is to return final scalar.
Hence second LSTM in proposed architecture is feed with scalar instead of required sequence.
The solution is to use return_sequences=True flag (see LSTM arguments in docs):
import tensorflow as tf
x_train_uni = tf.zeros((100, 20, 1))
simple_lstm_model = tf.keras.models.Sequential([
tf.keras.layers.LSTM(128, input_shape=x_train_uni.shape[-2:], return_sequences=True),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(32, return_sequences=True),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.LSTM(16, return_sequences=True),
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='tanh')
])
simple_lstm_model.compile(optimizer='adam', loss='mae')

Resources