Square brackets in keras model output shape - keras

I've recently encountered this when looking at a model's summary.
I was wondering, what's the difference between [(None, 16)] and (None, 16)? Why does the Input layer have such input shape?
Source: model.summary() can't print output shape while using subclass model

The issue is how you are defining the input_shape. A single element tuple in python is actually a scalar value as you can see below -
input_shape0 = 32
input_shape1 = (32)
input_shape2 = (32,)
print(input_shape0, input_shape1, input_shape2)
32 32 (32,)
Since Keras function API Input needs an input shape as a tuple, you will have to pass it in the form of (n,) instead of n
It's weird that you get a square bracket because when I run the exact same code, I get an error.
TypeError Traceback (most recent call last)
<ipython-input-828-b564be68c80d> in <module>
33
34 if __name__ == '__main__':
---> 35 mlp = MLP((16))
36 mlp.summary()
<ipython-input-828-b564be68c80d> in __init__(self, input_shape, **kwargs)
6 super(MLP, self).__init__(**kwargs)
7 # Add input layer
----> 8 self.input_layer = klayers.Input(input_shape)
9
10 self.dense_1 = klayers.Dense(64, activation='relu')
~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_layer.py in Input(shape, batch_size, name, dtype, sparse, tensor, **kwargs)
229 dtype=dtype,
230 sparse=sparse,
--> 231 input_tensor=tensor)
232 # Return tensor including `_keras_history`.
233 # Note that in this case train_output and test_output are the same pointer.
~/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_layer.py in __init__(self, input_shape, batch_size, dtype, input_tensor, sparse, name, **kwargs)
89 if input_tensor is None:
90 if input_shape is not None:
---> 91 batch_input_shape = (batch_size,) + tuple(input_shape)
92 else:
93 batch_input_shape = None
TypeError: 'int' object is not iterable
Therefore, the right way to do it (which should fix your model summary as well is as below -
from tensorflow import keras
from tensorflow.keras import layers as klayers
class MLP(keras.Model):
def __init__(self, input_shape=(32,), **kwargs):
super(MLP, self).__init__(**kwargs)
# Add input layer
self.input_layer = klayers.Input(input_shape)
self.dense_1 = klayers.Dense(64, activation='relu')
self.dense_2 = klayers.Dense(10)
# Get output layer with `call` method
self.out = self.call(self.input_layer)
# Reinitial
super(MLP, self).__init__(
inputs=self.input_layer,
outputs=self.out,
**kwargs)
def build(self):
# Initialize the graph
self._is_graph_network = True
self._init_graph_network(
inputs=self.input_layer,
outputs=self.out
)
def call(self, inputs):
x = self.dense_1(inputs)
return self.dense_2(x)
if __name__ == '__main__':
mlp = MLP((16,))
mlp.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_19 (InputLayer) (None, 16) 0
_________________________________________________________________
dense_8 (Dense) (None, 64) 1088
_________________________________________________________________
dense_9 (Dense) (None, 10) 650
=================================================================
Total params: 1,738
Trainable params: 1,738
Non-trainable params: 0
_________________________________________________________________

Related

ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2 in a LSTM model

I have this code:
EMBEDDING_DIM = 100
MAXLEN = 16
TRUNCATING = 'post'
PADDING = 'post'
OOV_TOKEN = "<OOV>"
MAX_EXAMPLES = 160000
TRAINING_SPLIT = 0.9
# Initialize an empty numpy array with the appropriate size
EMBEDDINGS_MATRIX = np.zeros((VOCAB_SIZE+1, EMBEDDING_DIM))
# Iterate all of the words in the vocabulary and if the vector representation for
# each word exists within GloVe's representations, save it in the EMBEDDINGS_MATRIX array
for word, i in word_index.items():
embedding_vector = GLOVE_EMBEDDINGS.get(word)
if embedding_vector is not None:
EMBEDDINGS_MATRIX[i] = embedding_vector
# Define the model
def create_model(vocab_size, embedding_dim, maxlen, embeddings_matrix):
model = tf.keras.Sequential([
# Set the Embedding layer when using pre-trained embeddings
tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=maxlen, weights=[embeddings_matrix], trainable=False),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Conv1D(64, 6, activation='relu'),
# tf.keras.layers.AveragePooling1D(pool_size=4),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.LSTM(64),
tf.keras.layers.Dense(8, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return model.summary()
model = create_model(VOCAB_SIZE, EMBEDDING_DIM, MAXLEN, EMBEDDINGS_MATRIX)
# Train the model
history = model.fit(train_pad_trunc_seq, train_labels, epochs=20, validation_data=(val_pad_trunc_seq, val_labels))
which brings me this error,
ValueError Traceback (most recent call last)
Input In [26], in <cell line: 2>()
1 # Create your untrained model
----> 2 model = create_model(VOCAB_SIZE, EMBEDDING_DIM, MAXLEN, EMBEDDINGS_MATRIX)
4 # Train the model and save the training history
5 history = model.fit(train_pad_trunc_seq, train_labels, epochs=20, validation_data=(val_pad_trunc_seq, val_labels))
Input In [25], in create_model(vocab_size, embedding_dim, maxlen, embeddings_matrix)
4 def create_model(vocab_size, embedding_dim, maxlen, embeddings_matrix):
5
6 ### START CODE HERE
----> 8 model = tf.keras.Sequential([
9 # This is how you need to set the Embedding layer when using pre-trained embeddings
10 tf.keras.layers.Embedding(vocab_size+1, embedding_dim, input_length=maxlen, weights=[embeddings_matrix], trainable=False),
11 tf.keras.layers.Dropout(0.2),
12 tf.keras.layers.Conv1D(64, 6, activation='relu'),
13 # tf.keras.layers.AveragePooling1D(pool_size=4),
14 tf.keras.layers.GlobalAveragePooling1D(),
15 tf.keras.layers.LSTM(64),
16 tf.keras.layers.Dense(8, activation='relu'),
17 tf.keras.layers.Dense(1, activation='sigmoid')
18 ])
20 model.compile(loss='binary_crossentropy',
21 optimizer='adam',
22 metrics=['accuracy'])
24 ### END CODE HERE
File ~\.conda\envs\tf-gpu\lib\site-packages\tensorflow\python\training\tracking\base.py:629, in no_automatic_dependency_tracking.<locals>._method_wrapper(self, *args, **kwargs)
627 self._self_setattr_tracking = False # pylint: disable=protected-access
628 try:
--> 629 result = method(self, *args, **kwargs)
630 finally:
631 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
File ~\.conda\envs\tf-gpu\lib\site-packages\keras\utils\traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~\.conda\envs\tf-gpu\lib\site-packages\keras\engine\input_spec.py:214, in assert_input_compatibility(input_spec, inputs, layer_name)
212 ndim = shape.rank
213 if ndim != spec.ndim:
--> 214 raise ValueError(f'Input {input_index} of layer "{layer_name}" '
215 'is incompatible with the layer: '
216 f'expected ndim={spec.ndim}, found ndim={ndim}. '
217 f'Full shape received: {tuple(shape)}')
218 if spec.max_ndim is not None:
219 ndim = x.shape.rank
ValueError: Input 0 of layer "lstm_2" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 64)
So I need to have an input parameter such as input = (?,?,?) for my LSTM layer instead of (None, 64), but what should it be?
I have also tried to change GlobalAveragePooling1D() to AveragePooling1D(pool_size=4).
It brings up the summary but gives me a different error:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_3 (Embedding) (None, 16, 100) 12829400
dropout_3 (Dropout) (None, 16, 100) 0
conv1d_3 (Conv1D) (None, 11, 64) 38464
average_pooling1d_1 (Averag (None, 2, 64) 0
ePooling1D)
lstm_3 (LSTM) (None, 64) 33024
dense_6 (Dense) (None, 8) 520
dense_7 (Dense) (None, 1) 9
=================================================================
Total params: 12,901,417
Trainable params: 72,017
Non-trainable params: 12,829,400
AttributeError Traceback (most recent call last)
Input In [24], in <cell line: 5>()
2 model = create_model(VOCAB_SIZE, EMBEDDING_DIM, MAXLEN, EMBEDDINGS_MATRIX)
4 # Train the model and save the training history
----> 5 history = model.fit(train_pad_trunc_seq, train_labels, epochs=20, validation_data=(val_pad_trunc_seq, val_labels))
AttributeError: 'NoneType' object has no attribute 'fit'
Please help?
I haven't used it before, but refer to https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D. It seems that if the input tensor is of dimension n, then GlobalAveragePooling 1D output a tensor with dimension n-1. GlobalAveragePooling1D do pooling along an axis, so it reduces the dim.
If the output tensor shape after conv1d is (None, 11, 64), then the output for GlobalAveragePooling 1D is (None, 64), which is of dimension 2, not 3, so the first attempt results in an error.
Things are different for AveragePooling1D. It does local average pooling, so the dimension of output tensor is the same as input tensor.
https://www.tensorflow.org/api_docs/python/tf/keras/layers/AveragePooling1D
For the second question, refer to https://www.tensorflow.org/api_docs/python/tf/keras/Model#summary. model.summary() just prints a string summary of the network. (I guess the function return value is None). You should return a model with return model, because class Model has the method fit

Bias only Layer in Keras

How could one build a layer in Keras which maps an input x to an output of the form x+b where b is a trainable weight of the same dimension? (Also the activation function here would be the identity).
You can always build a custom layer by extending tf.keras.layers.Layer class, here is how I'd do it
import tensorflow as tf
print('TensorFlow:', tf.__version__)
class BiasLayer(tf.keras.layers.Layer):
def __init__(self, *args, **kwargs):
super(BiasLayer, self).__init__(*args, **kwargs)
def build(self, input_shape):
self.bias = self.add_weight('bias',
shape=input_shape[1:],
initializer='zeros',
trainable=True)
def call(self, x):
return x + self.bias
input_layer = tf.keras.Input(shape=[5])
x = BiasLayer()(input_layer)
model = tf.keras.Model(inputs=[input_layer], outputs=[x])
model.summary()
TensorFlow: 2.1.0
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_7 (InputLayer) [(None, 5)] 0
_________________________________________________________________
bias_layer_3 (BiasLayer) (None, 5) 5
=================================================================
Total params: 5
Trainable params: 5
Non-trainable params: 0
_________________________________________________________________

Make a "non-fully connected" (singly connected?) neural network in keras

I don't know the name of what I'm looking for, but I want to make a layer in keras where each input is multiplied by its own, independent weight and bias. E.g. if there were 10 inputs, there would be 10 weights, and 10 biases, and each input would be multiplied by its weight and summed with its bias to get 10 outputs.
For example here is a simple Dense network:
from keras.layers import Input, Dense
from keras.models import Model
N = 10
input = Input((N,))
output = Dense(N)(input)
model = Model(input, output)
model.summary()
As you can see, this model has 110 parameters, because it is fully connected:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) (None, 10) 0
_________________________________________________________________
dense_2 (Dense) (None, 10) 110
=================================================================
Total params: 110
Trainable params: 110
Non-trainable params: 0
_________________________________________________________________
I want to replace output = Dense(N)(input) with something like output = SinglyConnected()(input), such that the model now has 20 parameters: 10 weights and 10 Biases.
Create a custom layer:
class SingleConnected(Layer):
#creator
def __init__(self, **kwargs):
super(SingleConnected, self).__init__(**kwargs)
#creates weights
def build(self, input_shape):
weight_shape = (1,) * (len(input_shape) - 1)
weight_shape = weight_shape + (input_shape[-1]) #(....., input)
self.kernel = self.add_weight(name='kernel',
shape=weight_shape,
initializer='uniform',
trainable=True)
self.bias = self.add_weight(name='bias',
shape=weight_shape,
initializer='zeros',
trainable=True)
self.built=True
#operation:
def call(self, inputs):
return (inputs * self.kernel) + self.bias
#output shape
def compute_output_shape(self, input_shape):
return input_shape
#for saving the model - only necessary if you have parameters in __init__
def get_config(self):
config = super(SingleConnected, self).get_config()
return config
Use the layer:
model.add(SingleConnected())

Input Dimensions Tensorflow v1.8 ConvLSTMCell

ConvLSTMCell Official Docs
GitHub _conv where the error occurs
Issue
I'm experimenting with the ConvLSTMCell in tensorflow r1.8. The error I'm continuing to generate occurs in the __call__ method of ConvLSTMCell. The _conv method is invoked and the error is raised.
ValueError: Conv Linear Expects 3D, 4D, 5D
The error is raised from the unstacked inputs. unstacked (in this example) has dimensions of [BATCH_SIZE, N_INPUTS] = [2,5]. I am using tf.unstack to generate the required sequence that the ConvLSTMCell requires.
Why use tf.unstack?
If the input array is not unstacked, the TypeError below is raised.
TypeError: inputs must be a sequence
Question
What am I missing on the formatting? I've read through related issues but have not found anything that has guided me into a working implementation.
Are the placeholder dimensions correct?
Should I be unstacking or is there a better way?
Am I providing the proper input dimension into the ConvLSTMCell?
Code
# Parameters
TIME_STEPS = 28
N_INPUT = 5
N_HIDDEN = 128
LEARNING_RATE = 0.001
NUM_UNITS = 28
CHANNEL = 1
tf.reset_default_graph()
# Input placeholders
x = tf.placeholder(tf.float32, [BATCH_SIZE, TIME_STEPS, N_INPUT])
y = tf.placeholder(tf.float32, [None, 1])
# Format input as a sequence for LSTM Input
unstacked = tf.unstack(x, TIME_STEPS, 1) # shape=(timesteps, batch, inputs)
# Convolutional LSTM Layer
lstm_layer = tf.contrib.rnn.ConvLSTMCell(
conv_ndims=1,
input_shape=[BATCH_SIZE, N_INPUT],
output_channels=5,
kernel_shape=[7,5]
)
# Error is generated when the lstm_layer is invoked
outputs, _ = tf.contrib.rnn.static_rnn(
lstm_layer,
unstacked,
dtype=tf.float32)
Error Message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-83-3568a097e4ea> in <module>()
10 lstm_layer,
11 unstacked,
---> 12 dtype=tf.float32)
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in static_rnn(cell, inputs, initial_state, dtype, sequence_length, scope)
1322 state_size=cell.state_size)
1323 else:
-> 1324 (output, state) = call_cell()
1325
1326 outputs.append(output)
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in <lambda>()
1309 varscope.reuse_variables()
1310 # pylint: disable=cell-var-from-loop
-> 1311 call_cell = lambda: cell(input_, state)
1312 # pylint: enable=cell-var-from-loop
1313 if sequence_length is not None:
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
230 setattr(self, scope_attrname, scope)
231 with scope:
--> 232 return super(RNNCell, self).__call__(inputs, state)
233
234 def _rnn_get_variable(self, getter, *args, **kwargs):
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
715
716 if not in_deferred_mode:
--> 717 outputs = self.call(inputs, *args, **kwargs)
718 if outputs is None:
719 raise ValueError('A layer\'s `call` method should return a Tensor '
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py in call(self, inputs, state, scope)
2110 cell, hidden = state
2111 new_hidden = _conv([inputs, hidden], self._kernel_shape,
-> 2112 4 * self._output_channels, self._use_bias)
2113 gates = array_ops.split(
2114 value=new_hidden, num_or_size_splits=4, axis=self._conv_ndims + 1)
~/miniconda3/envs/MultivariateTimeSeries/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/rnn_cell.py in _conv(args, filter_size, num_features, bias, bias_start)
2184 if len(shape) not in [3, 4, 5]:
2185 raise ValueError("Conv Linear expects 3D, 4D "
-> 2186 "or 5D arguments: %s" % str(shapes))
2187 if len(shape) != len(shapes[0]):
2188 raise ValueError("Conv Linear expects all args "
ValueError: Conv Linear expects 3D, 4D or 5D arguments: [[2, 5], [2, 2, 5]]
Here's an example with a couple tweaks, which at least passes static shape checking:
import tensorflow as tf
# Parameters
TIME_STEPS = 28
N_INPUT = 5
N_HIDDEN = 128
LEARNING_RATE = 0.001
NUM_UNITS = 28
CHANNEL = 1
BATCH_SIZE = 16
# Input placeholders
x = tf.placeholder(tf.float32, [BATCH_SIZE, TIME_STEPS, N_INPUT])
y = tf.placeholder(tf.float32, [None, 1])
# Format input as a sequence for LSTM Input
unstacked = tf.unstack(x[..., None], TIME_STEPS, 1) # shape=(timesteps, batch, inputs)
# Convolutional LSTM Layer
lstm_layer = tf.contrib.rnn.ConvLSTMCell(
conv_ndims=1,
input_shape=[N_INPUT, 1],
output_channels=5,
kernel_shape=[7]
)
# Error is generated when the lstm_layer is invoked
outputs, _ = tf.contrib.rnn.static_rnn(
lstm_layer,
unstacked,
dtype=tf.float32)
Notes:
input_shape does not include the batch dimension (see docstring)
The input needs a channels dimension. Fine for it to be one in the input (that's what I've done).
Not sure what more than one dimension on kernel_shape would mean for a 1-D convolution.

Keras: Dense vs. Embedding - ValueError: Input 0 is incompatible with layer repeat_vector_9: expected ndim=2, found ndim=3

I have the following network which works fine:
left = Sequential()
left.add(Dense(EMBED_DIM,input_shape=(ENCODE_DIM,)))
left.add(RepeatVector(look_back))
However, I need to replace the Dense layer with the Embedding layer:
left = Sequential()
left.add(Embedding(ENCODE_DIM, EMBED_DIM, input_length=1))
left.add(RepeatVector(look_back))
Then I got the following error when I use Embedding layer:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-119-5a5f11c97e39> in <module>()
29 left.add(Embedding(ENCODE_DIM, EMBED_DIM, input_length=1))
---> 30 left.add(RepeatVector(look_back))
31
32 leftOutput = left.output
/usr/local/lib/python3.4/dist-packages/keras/models.py in add(self, layer)
467 output_shapes=[self.outputs[0]._keras_shape])
468 else:
--> 469 output_tensor = layer(self.outputs[0])
470 if isinstance(output_tensor, list):
471 raise TypeError('All layers in a Sequential model '
/usr/local/lib/python3.4/dist-packages/keras/engine/topology.py in __call__(self, inputs, **kwargs)
550 # Raise exceptions in case the input is not compatible
551 # with the input_spec specified in the layer constructor.
--> 552 self.assert_input_compatibility(inputs)
553
554 # Collect input shapes to build layer.
/usr/local/lib/python3.4/dist-packages/keras/engine/topology.py in assert_input_compatibility(self, inputs)
449 self.name + ': expected ndim=' +
450 str(spec.ndim) + ', found ndim=' +
--> 451 str(K.ndim(x)))
452 if spec.max_ndim is not None:
453 ndim = K.ndim(x)
ValueError: Input 0 is incompatible with layer repeat_vector_9: expected ndim=2, found ndim=3
What additional changes do I need when replacing the Dense layer with an Embedding layer? Thanks!
The output shape of the Dense layer is (None, EMBED_DIM). However, the output shape of the Embedding layer is (None, input_length, EMBED_DIM). With input_length=1, it'll be (None, 1, EMBED_DIM). You can add a Flatten layer after the Embedding layer to remove axis 1.
You can print out the output shape to debug your model. For example,
EMBED_DIM = 128
left = Sequential()
left.add(Dense(EMBED_DIM, input_shape=(ENCODE_DIM,)))
print(left.output_shape)
(None, 128)
left = Sequential()
left.add(Embedding(ENCODE_DIM, EMBED_DIM, input_length=1))
print(left.output_shape)
(None, 1, 128)
left.add(Flatten())
print(left.output_shape)
(None, 128)

Resources