layers compatibility between attention layer and CONV1D in keras - keras

I am building a model in bilstm-attention-conv1d fashion (i want to use multiple conv1d with different kernel sizes) I am facing the layers incompatibility issue between the attention layer and conv1d layer. I have tried Reshape function but it's not working, Following is my code:
my model is as follows
sequence_input = Input(shape=(maxlen,), dtype="int32")
embedded_sequences = Embedding(50000, output_dim=output_dim)(sequence_input)
lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name="bi_lstm_0")(embedded_sequences)
# Getting our LSTM outputs
(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True),
name="bi_lstm_1")(lstm)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
context_vector, attention_weights = Attention(10)(lstm, state_h)
x = Reshape((maxlen, output_dim, 1))(context_vector)
kernel_sizes = [1,2,3,4,5]
convs = []
for kernel_size in range(len(kernel_sizes)):
conv = Conv1D(128, kernel_size, activation='relu')(x)
convs.append(conv)
avg_pool = GlobalAveragePooling1D()(convs)
max_pool = GlobalMaxPooling1D()(convs)
conc = concatenate([avg_pool, max_pool])
output = Dense(50, activation="sigmoid")(conc)
model = keras.Model(inputs=sequence_input, outputs=output)
print(model.summary())
my code gives me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-114-8e5c0c75e84a> in <module>()
13 context_vector, attention_weights = Attention(10)(lstm, state_h)
14
---> 15 x = Reshape((maxlen, output_dim, 1))(context_vector)
16
17
6 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
950 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
951 return self._functional_construction_call(inputs, args, kwargs,
--> 952 input_list)
953
954 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1089 # Check input assumptions set after layer building, e.g. input shape.
1090 outputs = self._keras_tensor_symbolic_call(
-> 1091 inputs, input_masks, args, kwargs)
1092
1093 if outputs is None:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
820 return nest.map_structure(keras_tensor.KerasTensor, output_signature)
821 else:
--> 822 return self._infer_output_signature(inputs, args, kwargs, input_masks)
823
824 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
861 # TODO(kaftan): do we maybe_build here, or have we already done it?
862 self._maybe_build(inputs)
--> 863 outputs = call_fn(inputs, *args, **kwargs)
864
865 self._handle_activity_regularization(inputs, outputs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
555 # Set the static shape for the result since it might lost during array_ops
556 # reshape, eg, some `None` dim in the result could be inferred.
--> 557 result.set_shape(self.compute_output_shape(inputs.shape))
558 return result
559
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in compute_output_shape(self, input_shape)
546 output_shape = [input_shape[0]]
547 output_shape += self._fix_unknown_dimension(input_shape[1:],
--> 548 self.target_shape)
549 return tensor_shape.TensorShape(output_shape)
550
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in _fix_unknown_dimension(self, input_shape, output_shape)
534 output_shape[unknown] = original // known
535 elif original != known:
--> 536 raise ValueError(msg)
537 return output_shape
538
ValueError: total size of new array must be unchanged, input_shape = [256], output_shape = [2500, 100, 1]
kindly help me

Related

MLP with TensorFlow Input Shape not Matching Output Shape

I'm trying to train a multilayer perceptron based on the Iris dataset using TensorFlow in Pycharm with Jupyter Notebook. Every time I run my code it fails on the model.fit() function and gives the following error.
ValueError: Shapes (None, 1) and (None, 3) are incompatible.
I've tried playing around with different values for the hyperparameters but there's obviously something I'm not getting. Any help/pointers that anyone could provide would be much appreciated.
Here's my data setup and preprocessing:
import pandas as pd
import numpy as np
# read iris data into pandas dataframe
iris = pd.read_csv("data/IRIS.csv", header=0)
# apply label to index column
iris.index.name = "id"
# create copy of iris dataframe in which to store normalised values and keep original dataframe for comparison later on
iris_unnormalized = iris
iris_normalized = iris.copy()
# isolate columns with numerical values
iris_num = iris.select_dtypes(include=[np.number])
# find max value in each column
col_maxes = iris_num.max()
# find overall max value among all columns
iris_num_max = col_maxes.max()
# divide all numerical values by overall max value in order to normalize data to a value between 0 and 1
iris_num_norm = iris_num / iris_num_max
# reassign normalised values back to their corresponding columns
iris_normalized[iris_num_norm.columns] = iris_num_norm
# specify seed for reproducibility
np.random.seed(1671)
training = iris_normalized.sample(frac = 0.8)
test = iris_normalized.drop(training.index)
# initialize the training input and output list
# same for testing set
X_train = []
Y_train = []
X_test = []
Y_test = []
# loop through the dataframe and separate inputs and outputs for training and testing
for index, row in training.iterrows():
X_train.append([row['sepal length cm'], row['sepal width cm'], row['petal length cm'], row['petal width cm']])
Y_train.append([row['species']])
for index, row in test.iterrows():
X_test.append([row['sepal length cm'], row['sepal width cm'], row['petal length cm'], row['petal width cm']])
Y_test.append([row['species']])
X_train = np.array(X_train).astype('float32')
Y_train = np.array(Y_train)
X_test = np.array(X_test).astype('float32')
Y_test = np.array(Y_test)
print(X_train.shape, "training samples") # Output: (120, 4) training samples
print(X_test.shape, "test samples") # Output: (30, 4) test samples
Here's where I try to create the neural network:
import tensorflow as tf
from tensorflow import keras
NB_CLASSES = 3 # number of iris varieties
N_HIDDEN = 128
BATCH_SIZE = 10
VERBOSE = 1
VALIDATION_SPLIT = 0.2 # how much of training set to hold for validation
EPOCHS = 200
model = tf.keras.models.Sequential(
[
keras.layers.Dense(N_HIDDEN, input_shape=(10,4,), batch_size=BATCH_SIZE, name="dense_layer1", activation="relu"),
keras.layers.Dense(N_HIDDEN, input_shape=(4,), batch_size=BATCH_SIZE, name="dense_layer2", activation="relu"),
keras.layers.Dense(NB_CLASSES, input_shape=(4,), batch_size=BATCH_SIZE, name="dense_layer3", activation="softmax"),
]
)
model.summary()
################### model summary output: #####################
Layer (type) Output Shape Param #
=================================================================
dense_layer1 (Dense) (10, 10, 128) 640
_________________________________________________________________
dense_layer2 (Dense) (10, 10, 128) 16512
_________________________________________________________________
dense_layer3 (Dense) (10, 10, 3) 387
=================================================================
Total params: 17,539
Trainable params: 17,539
Non-trainable params: 0
# compiling the model
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
#train the model
model.fit(X_train, Y_train,
batch_size=BATCH_SIZE,
epochs = EPOCHS,
verbose = VERBOSE,
validation_split = VALIDATION_SPLIT)
The data setup and normalization work fine but when I run the code to create the neural network I get the below error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_8888/2571387568.py in <module>
38
39 #train the model
---> 40 model.fit(X_train, Y_train,
41 batch_size=BATCH_SIZE,
42 epochs = EPOCHS,
\venv\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
931 # This is the first call of __call__, so we have to initialize.
932 initializers = []
--> 933 self._initialize(args, kwds, add_initializers_to=initializers)
934 finally:
935 # At this point we know that the initialization is complete (or less
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
758 self._concrete_stateful_fn = (
--> 759 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
760 *args, **kwds))
761
\venv\lib\site-packages\tensorflow\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3064 args, kwargs = None, None
3065 with self._lock:
-> 3066 graph_function, _ = self._maybe_define_function(args, kwargs)
3067 return graph_function
3068
\venv\lib\site-packages\tensorflow\python\eager\function.py in _maybe_define_function(self, args, kwargs)
3461
3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
3464 self._function_cache.primary[cache_key] = graph_function
3465
\venv\lib\site-packages\tensorflow\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3296 arg_names = base_arg_names + missing_arg_names
3297 graph_function = ConcreteFunction(
-> 3298 func_graph_module.func_graph_from_py_func(
3299 self._name,
3300 self._python_function,
\venv\lib\site-packages\tensorflow\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1005 _, original_func = tf_decorator.unwrap(python_func)
1006
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1008
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
\venv\lib\site-packages\tensorflow\python\eager\def_function.py in wrapped_fn(*args, **kwds)
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
670
\venv\lib\site-packages\tensorflow\python\framework\func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
\venv\lib\site-packages\keras\engine\training.py:853 train_function *
return step_function(self, iterator)
\venv\lib\site-packages\keras\engine\training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
\venv\lib\site-packages\keras\engine\training.py:835 run_step **
outputs = model.train_step(data)
\venv\lib\site-packages\keras\engine\training.py:788 train_step
loss = self.compiled_loss(
\venv\lib\site-packages\keras\engine\compile_utils.py:201 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
\venv\lib\site-packages\keras\losses.py:141 __call__
losses = call_fn(y_true, y_pred)
\venv\lib\site-packages\keras\losses.py:245 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
\venv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
return target(*args, **kwargs)
\venv\lib\site-packages\keras\losses.py:1665 categorical_crossentropy
return backend.categorical_crossentropy(
\venv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
return target(*args, **kwargs)
\venv\lib\site-packages\keras\backend.py:4839 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
\venv\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 3) are incompatible

bert model showing TypeError: Layer input_spec must be an instance of InputSpec. Got: InputSpec(shape=(None, 55, 768), ndim=3)

I am trying to use bert pretrained model for intent classification. here is my code in jupyter notebok.
class DataPreparation:
text_column = "text"
label_column = "intent"
def __init__(self, train, test, tokenizer: FullTokenizer, classes, max_seq_len=192):
self.tokenizer = tokenizer
self.max_seq_len = 0
self.classes = classes
((self.train_x, self.train_y), (self.test_x, self.test_y)) = map(self.prepare_data, [train, test])
print("max seq_len", self.max_seq_len)
self.max_seq_len = min(self.max_seq_len, max_seq_len)
self.train_x, self.test_x = map(self.data_padding, [self.train_x, self.test_x])
def prepare_data(self, df):
x, y = [], []
for _, row in tqdm(df.iterrows()):
text, label = row[DataPreparation.text_column], row[DataPreparation.label_column]
tokens = self.tokenizer.tokenize(text)
tokens = ["[CLS]"] + tokens + ["[SEP]"]
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
self.max_seq_len = max(self.max_seq_len, len(token_ids))
x.append(token_ids)
y.append(self.classes.index(label))
return np.array(x), np.array(y)
def data_padding(self, ids):
x = []
for input_ids in ids:
input_ids = input_ids[:min(len(input_ids), self.max_seq_len - 2)]
input_ids = input_ids + [0] * (self.max_seq_len - len(input_ids))
x.append(np.array(input_ids))
return np.array(x)
tokenizer = FullTokenizer(vocab_file=os.path.join(bert_ckpt_dir, "vocab.txt"))
def model_defination(max_seq_len, bert_ckpt_file):
with tf.io.gfile.GFile(bert_config_file, "r") as reader:
bc = StockBertConfig.from_json_string(reader.read())
bert_params = map_stock_config_to_params(bc)
bert_params.adapter_size = None
bert = BertModelLayer.from_params(bert_params, name="bert")
input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32',name="input_ids")
bert_output = bert(input_ids)
print("bert shape", bert_output.shape)
cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
cls_out = keras.layers.Dropout(0.5)(cls_out)
logits = keras.layers.Dense(units=768, activation="tanh")(cls_out)
logits = keras.layers.Dropout(0.5)(logits)
logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits)
model = keras.Model(inputs=input_ids, outputs=logits)
model.build(input_shape=(None, max_seq_len))
load_stock_weights(bert, bert_ckpt_file)
return model
classes = train.intent.unique().tolist()
data = DataPreparation(train, test, tokenizer, classes, max_seq_len=128)
data.train_x.shape
data.train_y[0]
model = model_defination(data.max_seq_len, bert_ckpt_file)
Now when I am trying to call the function, I am getting error. The parameter values have max_seq_len = 55, bert_ckpt_file = bert checkpoint file.
when i create the model i am getting the below error:
TypeError Traceback (most recent call last)
<ipython-input-17-af3e534b3882> in <module>
----> 1 model = model_defination(data.max_seq_len, bert_ckpt_file)
<ipython-input-16-a83a622dafe3> in model_defination(max_seq_len, bert_ckpt_file)
9 input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32',name="input_ids")
10 #input_spec = tf.keras.layers.InputSpec(ndim=3)
---> 11 bert_output = bert(input_ids)
12
13 print("bert shape", bert_output.shape)
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
974 # >> model = tf.keras.Model(inputs, outputs)
975 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
--> 976 return self._functional_construction_call(inputs, args, kwargs,
977 input_list)
978
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1112 layer=self, inputs=inputs, build_graph=True, training=training_value):
1113 # Check input assumptions set after layer building, e.g. input shape.
-> 1114 outputs = self._keras_tensor_symbolic_call(
1115 inputs, input_masks, args, kwargs)
1116
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
846 return tf.nest.map_structure(keras_tensor.KerasTensor, output_signature)
847 else:
--> 848 return self._infer_output_signature(inputs, args, kwargs, input_masks)
849
850 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
886 self._maybe_build(inputs)
887 inputs = self._maybe_cast_inputs(inputs)
--> 888 outputs = call_fn(inputs, *args, **kwargs)
889
890 self._handle_activity_regularization(inputs, outputs)
~\Anaconda3\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
693 except Exception as e: # pylint:disable=broad-except
694 if hasattr(e, 'ag_error_metadata'):
--> 695 raise e.ag_error_metadata.to_exception(e)
696 else:
697 raise
TypeError: in user code:
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\bert\model.py:80 call *
output = self.encoders_layer(embedding_output, mask=mask, training=training)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:1030 __call__ **
self._maybe_build(inputs)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:2659 _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\bert\transformer.py:209 build
self.input_spec = keras.layers.InputSpec(shape=input_shape)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:2777 __setattr__
super(tf.__internal__.tracking.AutoTrackable, self).__setattr__(name, value) # pylint: disable=bad-super-call
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\tensorflow\python\training\tracking\base.py:530 _method_wrapper
result = method(self, *args, **kwargs)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:1296 input_spec
raise TypeError('Layer input_spec must be an instance of InputSpec. '
TypeError: Layer input_spec must be an instance of InputSpec. Got: InputSpec(shape=(None, 55, 768), ndim=3)
I have solved the error. It was due to the shape of my training data. I added index as a column in the training data. After reset the index column in the training data i get it right.
The below code solved the error:
train = train.reset_index(drop=True)
test = test.reset_index(drop=True)

A simple linear regression model with a DenseVariational layer in Tensorflow-Probability returns: TypeError: 'NoneType' object is not callable

This is an attempt to use Tensforflow-Probability and more specifically the DenseVariational layer but it fails for some reason. How can I correct the code?
x_train = np.linspace(-1, 1, 100)[:, np.newaxis]
y_train = x_train + 0.3*np.random.randn(100)[:, np.newaxis]
def prior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
prior_model = Sequential([
tfpl.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc = tf.zeros(n) , scale_diag = tf.ones(n)
))
])
def posterior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
posterior_model = Sequential([
tfpl.VariableLayer(tfpl.MultivariateNormalTriL.params_size(n) , dtype = dtype), # The parameters of the model are declared Variables that are trainable
tfpl.MultivariateNormalTriL(n) # The posterior function will return to the Variational layer that will call it a MultivariateNormalTril object that will have as many dimensions
# as the parameters of the Variational Dense Layer. That means that each parameter will be generated by a distinct Normal Gaussian shifted and scaled
# by a mu and sigma learned from the data, independently of all the other weights. The output of this Variablelayer will become the input to the
# MultivariateNormalTriL object.
# The shape of the VariableLayer object will be defined by the number of parameters needed to create the MultivariateNormalTriL object given
# that it will live in a Space of n dimensions (event_size = n). This number is returned by the tfpl.MultivariateNormalTriL.params_size(n)
])
return(posterior_model)
model = Sequential([
tfpl.DenseVariational(
input_shape = (1, ), # The input is of dimensionality 1, a series
units = 1, # A linear regression is represented by a Dense layer with one single unit
make_prior_fn = prior, # We pass the function we have defined which returns the prior distribution on the weights
make_posterior_fn = posterior, # We pass the function we have defined which returns the variational approximation of the posterior distribution on the weights
kl_weight = 1/ x_train.shape[0], # Tensorflow scales the likelihood loss calculated using the mini-batch to become an unbiased estimator of the true loss but does not do the
# same for the DL divergence loss. Here we instruct it to do the necessary scaling.
kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
# the KL Divergence will be approxiated using Sampling
)
])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-e7cf0bfd5902> in <module>
17 # same for the DL divergence loss. Here we instruct it to do the necessary scaling.
18
---> 19 kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
20 # the KL Divergence will be approxiated using Sampling
21
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in __init__(self, layers, name)
140 layers = [layers]
141 for layer in layers:
--> 142 self.add(layer)
143
144 #property
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in add(self, layer)
204 # and create the node connecting the current layer
205 # to the input layer we just created.
--> 206 layer(x)
207 set_inputs = True
208
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1115 try:
1116 with ops.enable_auto_cast_variables(self._compute_dtype_object):
-> 1117 outputs = call_fn(cast_inputs, *args, **kwargs)
1118
1119 except errors.OperatorNotAllowedInGraphError as e:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
455 if conversion.is_in_whitelist_cache(f, options):
456 logging.log(2, 'Whitelisted %s: from cache', f)
--> 457 return _call_unconverted(f, args, kwargs, options, False)
458
459 if ag_ctx.control_status_ctx().status == ag_ctx.Status.DISABLED:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow_probability\python\layers\dense_variational_v2.py in call(self, inputs)
120
121 q = self._posterior(inputs)
--> 122 r = self._prior(inputs)
123 self.add_loss(self._kl_divergence_fn(q, r))
124
TypeError: 'NoneType' object is not callable
Did you fail to return ?
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential([
tfp.layers.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_diag=tf.ones(n)
))
])
return (prior_model)

Tensorflow 2.1 TPU v2 reduce memory usage with bfloat16

I have some issue with the TPUv2 regarding the memory usage.
I would like to do some experiment with some Large model but unfortunately the model does not fit the memory. I would like to use bfloat16 in order to save some memory but I have some issue when I call the model :
try:
# TPU detection. No parameters necessary if TPU_NAME environment variable is
# set: this is always the case on Kaggle.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
print('Running on TPU ', resolver.master())
except ValueError:
resolver = None
if resolver:
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
else:
# Default distribution strategy in Tensorflow. Works on CPU and single GPU.
strategy = tf.distribute.get_strategy()
policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
tf.keras.mixed_precision.experimental.set_policy(policy)
with strategy.scope():
model = CustomModel(TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large"), num_classes=5)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
optimizer = tf.mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
model.compile(optimizer=optimizer,loss=['mse'])
InvalidArgumentError Traceback (most recent call
last)
in ()
3 with strategy.scope():
4
----> 5 model = CustomModel(TFXLMRobertaModel.from_pretrained("jplu/tf-xlm-roberta-large"),
num_classes=5)
6 optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
7 optimizer = tf.mixed_precision.LossScaleOptimizer(optimizer, loss_scale='dynamic')
13 frames
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_utils.py
in from_pretrained(cls, pretrained_model_name_or_path, *model_args,
**kwargs)
399 return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True)
400
--> 401 model(model.dummy_inputs, training=False) # build the network with dummy inputs
402
403 assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_roberta.py
in call(self, inputs, **kwargs)
222
223 """
--> 224 outputs = self.roberta(inputs, **kwargs)
225 return outputs
226
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in call(self, inputs, attention_mask, token_type_ids, position_ids,
head_mask, inputs_embeds, training)
567 # head_mask = tf.constant([0] * self.num_hidden_layers)
568
--> 569 embedding_output = self.embeddings([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
570 encoder_outputs = self.encoder([embedding_output, extended_attention_mask, head_mask], training=training)
571
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
966 with base_layer_utils.autocast_context_manager(
967 self._compute_dtype):
--> 968 outputs = self.call(cast_inputs, *args, **kwargs)
969 self._handle_activity_regularization(inputs, outputs)
970 self._set_mask_metadata(inputs, outputs, input_masks)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in call(self, inputs, mode, training)
146 """
147 if mode == "embedding":
--> 148 return self._embedding(inputs, training=training)
149 elif mode == "linear":
150 return self._linear(inputs)
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_roberta.py
in _embedding(self, inputs, training)
79 position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
80
---> 81 return super()._embedding([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
82
83
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py
in _embedding(self, inputs, training)
173
174 embeddings = inputs_embeds + position_embeddings + token_type_embeddings
--> 175 embeddings = self.LayerNorm(embeddings)
176 embeddings = self.dropout(embeddings, training=training)
177 return embeddings
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in call(self, *args, **kwargs)
962 # Eager execution on data tensors.
963 with backend.name_scope(self._name_scope()):
--> 964 self._maybe_build(inputs)
965 cast_inputs = self._maybe_cast_inputs(inputs)
966 with base_layer_utils.autocast_context_manager(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in _maybe_build(self, inputs) 2406 self._dtype_policy =
policy.Policy(dtype) 2407 input_shapes = None
-> 2408 if all(hasattr(x, 'shape') for x in input_list): 2409 input_shapes = nest.map_structure(lambda x: x.shape, inputs) 2410
Only call build if the user has manually overridden the build method.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py
in (.0) 2406 self._dtype_policy =
policy.Policy(dtype) 2407 input_shapes = None
-> 2408 if all(hasattr(x, 'shape') for x in input_list): 2409 input_shapes = nest.map_structure(lambda x: x.shape, inputs) 2410
Only call build if the user has manually overridden the build method.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py
in shape(self) 1065 self._tensor_shape =
tensor_shape.TensorShape(self._shape_tuple()) 1066 except
core._NotOkStatusException as e:
-> 1067 six.raise_from(core._status_to_exception(e.code, e.message), None) 1068 1069 return self._tensor_shape
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value,
from_value)
InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was
expected to be a bfloat16 tensor but is a float tensor
I suppose I have to cast something regarding the model ? How can I do that ?
I am using tensorflow 2.1 and TPU v2.
I have see this thread but it was with tensorflow 1.X I suppose as the code does not work for me.
Memory reduction Tensorflow TPU v2/v3 bfloat16
I think the problem is that you are trying to load a pre-trained model trained with full floats into a b16float model. I don't think that will work. You have to train from scratch.

Problem in GridSearching a LSTM network - Batch_size issue

I wrote code to apply the gridsearch method to a LSTM network built with keras. Everything seems to work fine, but i get some problem with passing the batch_size.
I tried to change the format of batch_size but, as i understand, it must be a tuple.
#LSTM ok
from Methods.LSTM_1HL import LSTM_method
Yhat_train_LSTM, Yhat_test_LSTM = LSTM_method(X_train, X_test, Y_train,
Y_test)
def create_model(optimizer, hl1_nodes, input_shape):
# creation of the NN - Electric Load
# LSTM layers followed by other LSTM layer must have the parameter "return_sequences" set at True
model = Sequential()
model.add(LSTM(units = hl1_nodes , input_shape=input_shape, return_sequences=False))
model.add(Dense(1, activation="linear")) # output layer
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
model.summary()
return model
def LSTM_method(X_train, X_test, Y_train, Y_test):
# normalize X and Y data
mmsx = MinMaxScaler()
mmsy = MinMaxScaler()
X_train = mmsx.fit_transform(X_train)
X_test = mmsx.transform(X_test)
Y_train = mmsy.fit_transform(Y_train)
Y_test = mmsy.transform(Y_test)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
# NN for Electric Load
# LSTM Input Shape
time_steps = 1 # number of time-steps you are feeding a sequence (?)
inputs_numb = X_train.shape[1] # number of inputs
input_shape=(time_steps, inputs_numb)
model = KerasRegressor(build_fn=create_model,verbose=1)
#GridSearch code
start=time()
optimizers = ['rmsprop', 'adam']
epochs = np.array([100, 500, 1000])
hl1_nodes = np.array([1, 10, 50])
btcsz = np.array([1,X_train.shape[0]])
param_grid = dict(optimizer=optimizers, hl1_nodes=hl1_nodes, input_shape=input_shape, nb_epoch=epochs,batch_size=btcsz)
scoring = make_scorer(accuracy_score) #in order to use a metric as a scorer
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring = scoring)
grid_result = grid.fit(X_train, Y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))
print("total time:",time()-start)
# Predictions - Electric Load
Yhat_train = grid_result.predict(X_train, verbose=0)
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
Yhat_test = grid_result.predict(X_test, verbose=0)
# Denormalization - Electric Load
Yhat_train = mmsy.inverse_transform(Yhat_train)
Yhat_test = mmsy.inverse_transform(Yhat_test)
Y_train = mmsy.inverse_transform(Y_train)
Y_test = mmsy.inverse_transform(Y_test)
return Yhat_train, Yhat_test
Below the error I get:
TypeError Traceback (most recent call last)
in
10 #from Methods.LSTM_1HL import create_model
11
---> 12 Yhat_train_LSTM, Yhat_test_LSTM = LSTM_method(X_train, X_test, Y_train, Y_test)
c:\Users\ER180124\Code\LoadForecasting\Methods\LSTM_1HL.py in LSTM_method(X_train, X_test, Y_train, Y_test)
62 scoring = make_scorer(accuracy_score) #in order to use a metric as a scorer
63 grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring = scoring)
---> 64 grid_result = grid.fit(X_train, Y_train)
65
66 print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1189 def _run_search(self, evaluate_candidates):
1190 """Search all candidates in param_grid"""
-> 1191 evaluate_candidates(ParameterGrid(self.param_grid))
1192
1193
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~\.conda\envs\PierEnv\lib\site-packages\sklearn\externals\joblib\parallel.py in (.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~\.conda\envs\PierEnv\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
526 estimator.fit(X_train, **fit_params)
527 else:
--> 528 estimator.fit(X_train, y_train, **fit_params)
529
530 except Exception as e:
~\.conda\envs\PierEnv\lib\site-packages\keras\wrappers\scikit_learn.py in fit(self, x, y, **kwargs)
139 **self.filter_sk_params(self.build_fn.__call__))
140 else:
--> 141 self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
142
143 loss_name = self.model.loss
c:\Users\ER180124\Code\LoadForecasting\Methods\LSTM_1HL.py in create_model(optimizer, hl1_nodes, input_shape)
19 # LSTM layers followed by other LSTM layer must have the parameter "return_sequences" set at True
20 model = Sequential()
---> 21 model.add(LSTM(units = hl1_nodes , input_shape=input_shape, return_sequences=False))
22 model.add(Dense(1, activation="linear")) # output layer
23 model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
~\.conda\envs\PierEnv\lib\site-packages\keras\legacy\interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
~\.conda\envs\PierEnv\lib\site-packages\keras\layers\recurrent.py in __init__(self, units, activation, recurrent_activation, use_bias, kernel_initializer, recurrent_initializer, bias_initializer, unit_forget_bias, kernel_regularizer, recurrent_regularizer, bias_regularizer, activity_regularizer, kernel_constraint, recurrent_constraint, bias_constraint, dropout, recurrent_dropout, implementation, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs)
2183 stateful=stateful,
2184 unroll=unroll,
-> 2185 **kwargs)
2186 self.activity_regularizer = regularizers.get(activity_regularizer)
2187
~\.conda\envs\PierEnv\lib\site-packages\keras\layers\recurrent.py in __init__(self, cell, return_sequences, return_state, go_backwards, stateful, unroll, **kwargs)
406 '(tuple of integers, '
407 'one integer per RNN state).')
--> 408 super(RNN, self).__init__(**kwargs)
409 self.cell = cell
410 self.return_sequences = return_sequences
~\.conda\envs\PierEnv\lib\site-packages\keras\engine\base_layer.py in __init__(self, **kwargs)
145 batch_size = None
146 batch_input_shape = (
--> 147 batch_size,) + tuple(kwargs['input_shape'])
148 self.batch_input_shape = batch_input_shape
149
TypeError: 'int' object is not iterable
I do not understand why in the last part of the error message i get: "batch_size = None" while i define a batch size that is a tuple.
Well, I think I got your problem.
When you are doing CV Search, a param grid is generated from your param dictionary using most probably a cross product of possible configurations. Your param dictionary has input_shape of (time_steps, inputs_numb) which is a sequence of two integers actually. So, your input shape parameter is either time_steps or inputs_numb. Which then becomes (None,) + (times_steps) or (None,) + (inputs_numb) in the final line of the stack trace. This is a tuple + int operation so it is not valid. Instead, you want your configuration space to have only one possible input_shape.
What you should do is to convert this line
input_shape=(time_steps, inputs_numb)
to this:
input_shape=[(time_steps, inputs_numb)]

Resources