Custom noise layer in Keras DQN model - keras

I'm trying to implement Noisy-networks in my DDDQN model as a replacement for epsilon as mentioned in this paper: https://arxiv.org/pdf/1706.10295.pdf
But I'm not sure how I implement my own noisy layer in Keras. So far I have tried this by using Lambda layers in two hidden layers as well as in my output layers:
def create_model(self):
input_node = tf.keras.Input(shape=(STACK_SIZE, ENVIROMENT_OBSERVATION_SPACE))
input_layer = input_node
#define state value function
out = GRU(64, return_sequences=True, stateful=False, activation='tanh')(input_layer)
out = Dropout(0.2)(out)
out = GRU(32, return_sequences=False, stateful=False, activation='tanh')(out)
out = Dropout(0.2)(out)
out = Lambda(self.noisy_dense(12, out))
out = Activation('relu')(out)
out = Lambda(self.noisy_dense(8, out))
out = Activation('relu')(out)
state_value = Lambda(self.noisy_dense(1, out))
state_value = Lambda(lambda s: K.expand_dims(s[:, 0], axis=-1), output_shape=(ACTION_SPACE,))(state_value)
#define acion advantage
action_advantage = Lambda(self.noisy_dense(ACTION_SPACE, out))
action_advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(ACTION_SPACE,))(action_advantage)
#merge by adding
Q = tf.keras.layers.add([state_value,action_advantage])
#define model
model = tf.keras.Model(inputs=input_node, outputs=Q)
#Model compile settings:
opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
# Compile model
model.compile(
loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
print(model.summary())
return model
def noisy_dense(self, units, input):
w_shape = [units, input.shape[1]]
mu_w = tf.Variable(initial_value=tf.random.truncated_normal(shape=w_shape))
sigma_w = tf.Variable(initial_value=tf.constant(0.017, shape=w_shape))
epsilon_w = tf.random.uniform(shape=w_shape)
b_shape = [units]
mu_b = tf.Variable(initial_value=tf.random.truncated_normal(shape=b_shape))
sigma_b = tf.Variable(initial_value=tf.constant(0.017, shape=b_shape))
epsilon_b = tf.random.uniform(shape=b_shape)
w = tf.add(mu_w, tf.multiply(sigma_w, epsilon_w))
b = tf.add(mu_b, tf.multiply(sigma_b, epsilon_b))
return tf.matmul(input, tf.transpose(w)) + b
But when I run this code I get an error in my first Lambda layer "TypeError: Unsupported callable"

Related

Warning with Model Checkpoint at the end of program (Ubuntu 22.04/Python 3.10.6/ Tensorflow 2.10.0)

I'm running a Python 3.10.6 program where I train and test a Tensorflow neural network model using Tensorflow 2.10.0 on Ubuntu 22.04 in the terminal. Though the program runs fine, at the end I get this warning:
`Exception ignored in: <function _CheckpointRestoreCoordinatorDeleter.__del__ at 0x7fd3f2d53520>
Traceback (most recent call last):
File "/home/m
/.local/lib/python3.10/site-packages/tensorflow/python/checkpoint/checkpoint.py", line 193, in __del__
TypeError: 'NoneType' object is not callable`
My code is as follows (much of it is taken from this tutorial https://www.tensorflow.org/text/tutorials/classify_text_with_bert)
`tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
bert_model = hub.KerasLayer(tfhub_handle_encoder)
from keras.layers.core import Dense, Dropout, Activation
from tensorflow.keras import optimizers, models, layers, callbacks
def build_classifier_model():
text_input = tf.keras.layers.Input(shape = (), dtype = tf.string, name = 'text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name = 'preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = Dropout(0.1)(net)
net = Dense(256, activation = 'relu', name = 'hidden_')(net)
net = Dense(128, activation = 'relu', name = 'hidden_l')(net)
net = Dense(64, activation = 'relu', name = 'hidden_la')(net)
net = Dense(64, activation = 'relu', name = 'hidden_lay')(net)
net = Dense(16, activation = 'relu', name = 'hidden_laye')(net)
net = Dense(3, activation='softmax', name='output')(net)
return tf.keras.Model(text_input, net)
def load_callbacks(patience_num, filename):
return [
callbacks.EarlyStopping(
monitor = 'val_loss',
patience = patience_num
),
callbacks.ModelCheckpoint(
filepath = f'{filename}.h5',
monitor = 'val_loss',
save_best_only = True,
verbose = 1
)
]
loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False)
metrics = tf.metrics.CategoricalAccuracy()
optimizer = optimizers.RMSprop(learning_rate = 0.001)
epochs = 2
batch_size = round(train_x.shape[0]/10)
batch_size = 30
model = build_classifier_model()
model.compile(optimizer = optimizer, loss = loss, metrics = metrics)
#ckpt = tf.train.Checkpoint(step = tf.Variable(0), iterator = iter(tf.data.Dataset.range(20)))
#manager = tf.train.CheckpointManager(ckpt, '/tmp/chkpt', max_to_keep = 3)
print("Training:")
history = model.fit(x = train_x, y = train_y, epochs = epochs, validation_data = (val_x, val_y), batch_size = batch_size, callbacks = load_callbacks(28, 'model'), verbose = 2)
#print(train_x[0])
#print(model.predict(train_x[0]))
print("Testing:")
model.evaluate(test_x, test_y)`
I know that this may be a duplicate question, but the solutions given for similar posts do not work here.

Converting TensorFlow Keras model API to model subclassing

For a simple TF2 Object detection CNN architecture defined using Keras's functional API as follows:
input_ = Input(shape = (144, 144, 3), name = 'image')
# name - An optional name string for the Input layer. Should be unique in
# a model (do not reuse the same name twice). It will be autogenerated if it isn't provided.
# Here 'image' is the Python3 dict's key used to map the data to one of the layer in the model.
x = input_
# Define a conv block-
x = Conv2D(filters = 64, kernel_size = 3, activation = 'relu')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size = 2)(x)
x = Flatten()(x) # flatten the last pooling layer's output volume
x = Dense(256, activation='relu')(x)
# We are using a data generator which yields dictionaries. Using 'name' argument makes it
# possible to map the correct data generator's output to the appropriate layer
class_out = Dense(units = 9, activation = 'softmax', name = 'class_out')(x) # classification output
box_out = Dense(units = 2, activation = 'linear', name = 'box_out')(x) # regression output
# Define the CNN model-
model = tf.keras.models.Model(input_, [class_out, box_out]) # since we have 2 outputs, we use a list
I am attempting to define it using Model sub-classing as:
class OD(Model):
def __init__(self):
super(OD, self).__init__()
self.conv1 = Conv2D(filters = 64, kernel_size = 3, activation = None)
self.bn = BatchNormalization()
self.pool = MaxPool2D(pool_size = 2)
self.flatten = Flatten()
self.dense = Dense(256, activation = None)
self.class_out = Dense(units = 9, activation = None, name = 'class_out')
self.box_out = Dense(units = 2, activation = 'linear', name = 'box_out')
def call(self, x):
x = tf.nn.relu(self.bn(self.conv1(x)))
x = self.pool(x)
x = self.flatten(x)
x = tf.nn.relu(self.dense(x))
x = [tf.nn.softmax(self.class_out(x)), self.box_out(x)]
return x
A batch of training data is obtained as:
example, label = next(data_generator(batch_size = 32))
example.keys()
# dict_keys(['image'])
image = example['image']
image.shape
# (32, 144, 144, 3)
label.keys()
# dict_keys(['class_out', 'box_out'])
label['class_out'].shape, label['box_out'].shape
# ((32, 9), (32, 2))
Is my Model sub-classing architecture equivalent to Keras's functional API?

How to fix 'No gradients provided for any variable' error when using ctc_loss in Tensorflow

I am trying to make Baidu's Deep Speech 2 model in Tensorflow 2.0.0alpha0. I am having trouble optimizing the Tensorflow ctc_loss using a tf.GradientTape() object for calculating the gradients.
I am currently passing a tensor of shape (batch_size, max_step, feats) to my model and then passing the computed logits to the loss function. I have also tried passing a sparse tensor but this also does not work.
Here is the code for creating my model
import tensorflow as tf
class DeepSpeech2(tf.keras.Model):
def __init__(self, vocab_size, conv_filters=[11], conv_kernel_sizes=[1280], conv_strides=[2],
recur_sizes=[100], rnn_type='gru', bidirect_rnn=False, batch_norm=True,
learning_rate=1e-3, name='DeepSpeech2'):
super(DeepSpeech2, self).__init__()
self._vocab_size = vocab_size
self._conv_filters = conv_filters
self._conv_kernel_sizes = conv_kernel_sizes
self._conv_strides = conv_strides
self._recur_sizes = recur_sizes
self._rnn_type = rnn_type
self._bidirect_rnn = bidirect_rnn
self._batch_norm = batch_norm
self._learning_rate = learning_rate
self._name = name
self._conv_batch_norm = None
with tf.name_scope(self._name):
self._convolution = [tf.keras.layers.Conv1D(filters=conv_filters[i],
kernel_size=conv_kernel_sizes[i], strides=conv_strides[i],
padding='valid', activation='relu',
name='conv1d_{}'.format(i)) for i in range(len(self._conv_filters))]
if self._batch_norm:
self._conv_batch_norm = tf.keras.layers.BatchNormalization(name='bn_conv_1d')
if self._rnn_type == 'gru':
rnn_init = tf.keras.layers.GRU
elif self._rnn_type == 'lstm':
rnn_init = tf.keras.layers.LSTM
else:
raise Exception("Invalid rnn_type: '{}' (must be 'lstm' or 'gru')"
.format(self._rnn_type))
self._rnn = []
for i, r in enumerate(self._recur_sizes):
layer = rnn_init(r, activation='relu', return_sequences=True,
name='{}_{}'.format(self._rnn_type, i))
if self._bidirect_rnn:
layer = tf.keras.layers.Bidirectional(layer)
self._rnn.append(layer)
if self._batch_norm:
self._rnn.append(tf.keras.layers.BatchNormalization())
self._fc = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(
self._vocab_size, name='fc', activation='linear'))
self._optimizer = tf.keras.optimizers.Adam(lr=self._learning_rate)
def __call__(self, specs):
with tf.name_scope(self._name):
feats = specs
for layer in self._convolution:
feats = layer(feats)
if self._conv_batch_norm:
feats = self._conv_batch_norm(feats)
rnn_outputs = feats
for layer in self._rnn:
rnn_outputs = layer(rnn_outputs)
outputs = self._fc(rnn_outputs)
return tf.transpose(outputs, (1, 0, 2))
#tf.function
def train_step(self, specs, spec_lengths, labels, label_lengths):
with tf.GradientTape() as tape:
logits = self.__call__(specs)
loss = tf.nn.ctc_loss(labels=labels, logits=logits,
label_length=label_lengths, logit_length=spec_lengths)
cost = tf.reduce_sum(loss)
decoded, neg_sum_logits = tf.nn.ctc_greedy_decoder(logits, label_lengths)
gradients = tape.gradient(cost, self.trainable_variables)
self._optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return (decoded[0].indices, decoded[0].values, decoded[0].dense_shape), cost
I am currently getting the following error
ValueError: No gradients provided for any variable: ['DeepSpeech2/conv1d_0/kernel:0', 'DeepSpeech2/conv1d_0/bias:0', 'DeepSpeech2/bn_conv_1d/gamma:0', 'DeepSpeech2/bn_conv_1d/beta:0', 'DeepSpeech2/gru_0/kernel:0', 'DeepSpeech2/gru_0/recurrent_kernel:0', 'DeepSpeech2/gru_0/bias:0', 'DeepSpeech2/batch_normalization_v2/gamma:0', 'DeepSpeech2/batch_normalization_v2/beta:0', 'DeepSpeech2/time_distributed/kernel:0', 'DeepSpeech2/time_distributed/bias:0'].
The error occurs at the line where the gradients are applied to the optimizer. When I print out my gradients variable, it is just a list of None
From what I understand, this error is indicating that there is no path from the variables to the loss in the graph but I'm not sure why I am getting this. Any help would be greatly appreciated!

How to feed Bert embeddings to LSTM

I am working on a Bert + MLP model for text classification problem. Essentially, I am trying to replace the MLP model with a basic LSTM model.
Is it possible to create a LSTM with embedding? Or, is best to create a LSTM with embedded layer?
More specifically, I am having a hard time trying to create embedded matrix so I can create embedding layer using Bert embedding.
def get_bert_embeddings(dataset='gap_corrected_train',
dataset_path=TRAIN_PATH,
bert_path=BERT_UNCASED_LARGE_PATH,
bert_layers=BERT_LAYERS):
"""Get BERT embeddings for all files in dataset_path and specified BERT layers and write them to file."""
df = None
for file in os.listdir(dataset_path):
if df is None:
df = pd.read_csv(dataset_path+'/'+file, sep='\t')
else:
next_df = pd.read_csv(dataset_path+'/'+file, sep='\t')
df = pd.concat([df, next_df], axis=0)
df.reset_index(inplace=True, drop=True)
for i, layer in enumerate(bert_layers):
embeddings_file = INTERIM_PATH + 'emb_bert' + str(layer) + '_' + dataset + '.h5'
if not os.path.exists(embeddings_file):
print('Embeddings file: ', embeddings_file)
print('Extracting BERT Layer {0} embeddings for {1}...'.format(layer, dataset))
print("Started at ", time.ctime())
emb = get_bert_token_embeddings(df, bert_path, layer)
emb.to_hdf(embeddings_file, 'table')
print("Finished at ", time.ctime())
def build_mlp_model(input_shape):
input_layer = layers.Input(input_shape)
input_features = layers.Input((len(FEATURES),))
x = layers.Concatenate(axis=1, name="concate_layer")([input_layer, input_features])
x = layers.Dense(HIDDEN_SIZE, name='dense1')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.Dropout(DROPOUT, seed=RANDOM)(x)
x = layers.Dense(HIDDEN_SIZE//2, name='dense2')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.Dropout(DROPOUT//2, seed=RANDOM)(x)
x = layers.Dense(HIDDEN_SIZE//4, name='dense3')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.Dropout(DROPOUT//2, seed=RANDOM)(x)
output_layer = layers.Dense(3, name='output', kernel_regularizer = regularizers.l2(LAMBDA))(x)
output_layer = layers.Activation('softmax')(output_layer)
model = models.Model(input=[input_layer, input_features], output=output_layer, name="mlp")
return model
You can create model that uses first the Embedding layer which is followed by LSTM and then Dense.
Such as here:
deep_inputs = Input(shape=(length_of_your_data,))
embedding_layer = Embedding(vocab_size, output_dim = 3000, trainable=True)(deep_inputs)
LSTM_Layer_1 = LSTM(512)(embedding_layer)
dense_layer_1 = Dense(number_of_classes, activation='softmax')(LSTM_Layer_1)
model_AdGroups = Model(inputs=deep_inputs, outputs=dense_layer_1)

ValueError: Error when checking input: expected embedding_1_input to have shape (4,) but got array with shape (1,)

I was working on seq2seq translation and got stuck here:-
def createModel(engVocab, frVocab, size, englishMaxlength, frenchMaxLength):
model = Sequential()
model.add(Embedding(input_dim = engVocab, output_dim = size, input_length = englishMaxlength, mask_zero = True))
model.add(LSTM(units = size))
model.add(RepeatVector(frenchMaxLength))
model.add(LSTM(units = size, return_sequences = True))
model.add(TimeDistributed(Dense(frenchVocabsize, activation = 'softmax')))
return model
def DataGenerator(trainingDataEnglish, trainingDataFrench):
while True:
l = len(trainingDataFrench)
for i in range(l):
yield(trainingDataEnglish[i], trainingDataFrench[i])
I created my test and training data as follows:-
def encodeSequences(trainingData, tokenizer, maxlength):
encoder = tokenizer.texts_to_sequences(trainingData)
encoder = pad_sequences(encoder, maxlen=maxlength, padding='pre')
return encoder
def encodeOutput(testData, vocabSize):
y = []
for sequence in testData:
Seq = to_categorical(sequence, num_classes=vocabSize)
y.append(Seq)
y = np.array(y)
return y
samples = 7000
trainingSize = 6000
trainEng = english[:trainingSize] #array of strings
trainFr = french[:trainingSize] #array of strings
testEng = english[trainingSize:samples] #array of strings
testFr = french[trainingSize:samples] #array of strings
englishTokenizer = createTokenizer(trainEng)
frenchTokenizer = createTokenizer(trainFr)
englishVocabSize = len(englishTokenizer.word_index) + 1
The use of encodeSequences and encodeOutput is as follows:-
trainX = encodeSequences(trainEng, englishTokenizer, englishMaxlength)
trainY = encodeSequences(trainFr, frenchTokenizer, frenchMaxLength)
trainY = encodeOutput(trainY, frenchVocabsize)
testX = encodeSequences(testEng, englishTokenizer, englishMaxlength)
testY = encodeSequences(testFr, frenchTokenizer, frenchMaxLength)
testY = encodeOutput(testY, frenchVocabsize)
And finally :-
model = createModel(engVocab = englishVocabSize, frVocab = frenchVocabsize, size = 256, englishMaxlength = englishMaxlength, frenchMaxLength = frenchMaxLength)
print(model.summary())
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy')
steps = len(trainX)
generator = DataGenerator(trainX, trainY)
model.fit_generator(generator, epochs = epochs, steps_per_epoch = steps, validation_data = (testX, testY))
model.save('Model.h5')
And I get the following error:-
ValueError: Error when checking input: expected embedding_1_input to have shape (4,) but got array with shape (1,)
How do I fix this?
Where did I go wrong?
Please help.
Thanks in advance.

Resources