I am using Keras Tuner and using RandomSearch() to hypertune my regression model. While I can hypertune using "relu" and "selu", I am unable to do the same for Leaky Relu. I understand that the reason "relu" and "selu" string works because, for "relu" and "selu", string aliases are available. String alias is not available for Leaky Relu. I tried passing a callable object of Leaky Relu (see my example below) but it doesn't seem to work. Can you please advise me how to do that? I have the same issue with using Parametric Leaky Relu,
Thank you in advance!
def build_model(hp):
model = Sequential()
model.add(
Dense(
units = 18,
kernel_initializer = 'normal',
activation = 'relu',
input_shape = (18, )
)
)
for i in range(hp.Int( name = "num_layers", min_value = 1, max_value = 5)):
model.add(
Dense(
units = hp.Int(
name = "units_" + str(i),
min_value = 18,
max_value = 180,
step = 18),
kernel_initializer = 'normal',
activation = hp.Choice(
name = 'dense_activation',
values=['relu', 'selu', LeakyReLU(alpha=0.01) ],
default='relu'
)
)
)
model.add( Dense( units = 1 ) )
model.compile(
optimizer = tf.keras.optimizers.Adam(
hp.Choice(
name = "learning_rate", values = [1e-2, 1e-3, 1e-4]
)
),
loss = 'mse'
)
return model
As a work-around, you can add another activation function in the tf.keras.activations.* module by modifying the source file ( which you'll see is activations.py )
Here's the code for tf.keras.activations.relu which you'll see in activations.py,
#keras_export('keras.activations.relu')
#dispatch.add_dispatch_support
def relu(x, alpha=0., max_value=None, threshold=0):
"""Applies the rectified linear unit activation function.
With default values, this returns the standard ReLU activation:
`max(x, 0)`, the element-wise maximum of 0 and the input tensor.
Modifying default parameters allows you to use non-zero thresholds,
change the max value of the activation,
and to use a non-zero multiple of the input for values below the threshold.
For example:
>>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)
>>> tf.keras.activations.relu(foo).numpy()
array([ 0., 0., 0., 5., 10.], dtype=float32)
>>> tf.keras.activations.relu(foo, alpha=0.5).numpy()
array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32)
>>> tf.keras.activations.relu(foo, max_value=5).numpy()
array([0., 0., 0., 5., 5.], dtype=float32)
>>> tf.keras.activations.relu(foo, threshold=5).numpy()
array([-0., -0., 0., 0., 10.], dtype=float32)
Arguments:
x: Input `tensor` or `variable`.
alpha: A `float` that governs the slope for values lower than the
threshold.
max_value: A `float` that sets the saturation threshold (the largest value
the function will return).
threshold: A `float` giving the threshold value of the activation function
below which values will be damped or set to zero.
Returns:
A `Tensor` representing the input tensor,
transformed by the relu activation function.
Tensor will be of the same shape and dtype of input `x`.
"""
return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
Copy this code and paste it just below. Change #keras_export('keras.activations.relu') to #keras_export( 'keras.activations.leaky_relu' ) and also change the value of alpha to 0.2, like,
#keras_export('keras.activations.leaky_relu')
#dispatch.add_dispatch_support
def relu(x, alpha=0.2, max_value=None, threshold=0):
"""Applies the rectified linear unit activation function.
With default values, this returns the standard ReLU activation:
`max(x, 0)`, the element-wise maximum of 0 and the input tensor.
Modifying default parameters allows you to use non-zero thresholds,
change the max value of the activation,
and to use a non-zero multiple of the input for values below the threshold.
For example:
>>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)
>>> tf.keras.activations.relu(foo).numpy()
array([ 0., 0., 0., 5., 10.], dtype=float32)
>>> tf.keras.activations.relu(foo, alpha=0.5).numpy()
array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32)
>>> tf.keras.activations.relu(foo, max_value=5).numpy()
array([0., 0., 0., 5., 5.], dtype=float32)
>>> tf.keras.activations.relu(foo, threshold=5).numpy()
array([-0., -0., 0., 0., 10.], dtype=float32)
Arguments:
x: Input `tensor` or `variable`.
alpha: A `float` that governs the slope for values lower than the
threshold.
max_value: A `float` that sets the saturation threshold (the largest value
the function will return).
threshold: A `float` giving the threshold value of the activation function
below which values will be damped or set to zero.
Returns:
A `Tensor` representing the input tensor,
transformed by the relu activation function.
Tensor will be of the same shape and dtype of input `x`.
"""
return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
You can use the String alias keras.activations.leaky_relu.
# Custom activation function
from keras.layers import Activation
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
## Add leaky-relu so we can use it as a string
get_custom_objects().update({'leaky-relu': Activation(LeakyReLU(alpha=0.2))})
## Main activation functions available to use
activation_functions = ['sigmoid', 'relu', 'elu', 'leaky-relu', 'selu', 'gelu',"swish"]
Related
I'm working on a classification problem. The number of classes is 5. I have a ground truth vector that has the shape (3) instead of 1. The values in this target vector are the possible classes and the predicted vector is of the shape (1x5) which holds the softmax scores for all the classes.
For example:
predicted_vector = tensor([0.0669, 0.1336, 0.3400, 0.3392, 0.1203]
ground_truth = tensor([3,2,5])
For the above illustration, a typical argmax operation would result in declaring class 3 as the predicted class (0.34) but I want the model to reward even if the argmax class is any of 3,2, or 5.
Which loss function is recommended for such a use case?
As jodag pointed out in the comments you can try to treat it as a multi-label classification problem.
So [[0, 1, 2], [0, 2, 4], [3, 3, 3]] will be transformed into:
tensor([[1., 1., 1., 0., 0.],
[1., 0., 1., 0., 1.],
[0., 0., 0., 1., 0.]])
Here is an example of how this can be implemented:
import torch
from torch.nn import BCELoss
predicted_vector = torch.rand((3, 5))
ground_truth = torch.LongTensor([[0, 1, 2], [0, 2, 4], [3, 3, 3]])
labels_onehot = torch.zeros_like(predicted_vector)
labels_onehot.scatter_(1, ground_truth, 1)
loss_fn = BCELoss()
loss = loss_fn(predicted_vector, labels_onehot)
Also you can add different weights to different labels
For this problem, a given sample is in exactly one class (say, class 3), but for training purposes, predicting class 2 or 5 is still okay so the model isn't penalised that heavily.
This is a typical single-label, multi-class problem, but with probabilistic (“soft”) labels, and CrossEntropyLoss should be used (and not use softmax()).
In this example, the (soft) target might be a probability of 0.7 for class 3, a probability of 0.2 for class 2, and a probability of 0.1 for class 5 (and zero for everything else).
I am having a hard time understanding the following scenario. I have a output probability of 0.0 on each class which means value of metrics such as f1 score, accuracy and recall should be zero? However i get the following:
import torch, torchmetrics
preds = torch.tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
target = torch.tensor([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
print("F1: ", torchmetrics.functional.f1_score(preds, target))
print("Accuracy: ", torchmetrics.functional.accuracy(preds, target))
print("Recall: ", torchmetrics.functional.recall(preds, target))
print("Precision: ", torchmetrics.functional.precision(preds, target))
Output:
F1: tensor(0.)
Accuracy: tensor(0.6667)
Recall: tensor(0.)
Precision: tensor(0.)
Why is accuracy 0.6667? I would expect all outputs to be 0.0.
Your preds is a probabilities array for multi-label classification problem:
To make it simpler, I will assume the example like that:
preds = torch.tensor([[0., 0., 0.]]) # multi-labels [label1, label2, label3]
target = torch.tensor([[1, 0, 0]])
The true negatives are 2 since classifier predicts not existence for label2 and label3 while label2 and label3 indeed should not be existed.
The true positives are 0 since classifier predicts the existence of any label while a label should be existed.
The false negative is 1 since classifier predicts no existence for label1 while label1 should be existed.
The false positives are 0 since classifier predicts any label while a label should not be existed.
According to the above equation, Accuracy = 2/3 = 0.6667
You can read here more about different metrics and their calculations.
I am working on an image classification problem and was using 90% pre-trained Keras mobilenet v3 on ImageNet and remaining 10% layers are made trainable whilst applying dropout of 0.2. I was wondering how this was being handled in the backend.
MobileNetV3Small(input_shape=(IMG_HEIGHT, IMG_WIDTH, DEPTH),
alpha=1.0,
minimalistic=False,
include_top=False,
weights='imagenet',
input_tensor=None,
pooling='max',
dropout_rate=0.2)
If the layer is called with parameter training=False, like when you predict, nothing will happen. Let's start with some input:
import tensorflow as tf
rate = 0.4
dropout = tf.keras.layers.Dropout(rate)
x = tf.cast(tf.reshape(tf.range(1, 10), (3, 3)), tf.float32)
<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 2., 3.],
[4., 5., 6.],
[7., 8., 9.]], dtype=float32)>
Now, let's call the dropout model while training:
dropout(x, training=True)
<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[ 0. , 3.3333333, 0. ],
[ 6.6666665, 8.333333 , 0. ],
[11.666666 , 13.333333 , 15. ]], dtype=float32)>
As you can see, all the remaining values are multiplied by 1/(1-p). Now let's call the network when training=False:
dropout(x, training=False)
<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 2., 3.],
[4., 5., 6.],
[7., 8., 9.]], dtype=float32)>
Nothing happens.
I am trying to get classification probabilities out of my trained Keras model but when I use the model.predict (or model.predict_proba) method, all I get is an array of this form:
array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32)
So basically I get a one hot encoded float array. The "1" is mostly in the right place so the training seems to have worked fine. But why can't I get the probabilities out? See code for architecture used.
First I read in the data:
mnist_train = pd.read_csv('data/mnist_train.csv')
mnist_test = pd.read_csv('data/mnist_test.csv')
mnist_train_images = mnist_train.iloc[:, 1:].values
mnist_train_labels = mnist_train.iloc[:, :1].values
mnist_test_images = mnist_test.iloc[:, 1:].values
mnist_test_labels = mnist_test.iloc[:, :1].values
mnist_train_images = mnist_train_images.astype('float32')
mnist_test_images = mnist_test_images.astype('float32')
mnist_train_images /= 255
mnist_test_images /= 255
mnist_train_labels = keras.utils.to_categorical(mnist_train_labels, 10)
mnist_test_labels = keras.utils.to_categorical(mnist_test_labels, 10)
mnist_train_images = mnist_train_images.reshape(60000,28,28,1)
mnist_test_images = mnist_test_images.reshape(10000,28,28,1)
Then I build my model and train:
num_classes = mnist_test_labels.shape[1]
model = Sequential()
model.add(Conv2D(64, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last", padding="same"))
model.add(Conv2D(64, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', data_format="channels_last", padding="same"))
model.add(Conv2D(128, (3, 3), activation='relu', data_format="channels_last", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(mnist_train_images, mnist_train_labels, validation_data=(mnist_test_images, mnist_test_labels), epochs=20, batch_size=256, verbose=2)
scores = model.evaluate(mnist_test_images, mnist_test_labels, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))
model.save('mnist-weights.model')
model.save_weights("mnist-model.h5")
model_json = model.to_json()
with open("mnist-model.json", "w") as json_file:
json_file.write(model_json)
But when I then load the model in another application and try to predict probabilities like this, the described error occurs. What am I doing wrong?
json_file = open('alphabet_keras/mnist_model.json', 'r')
model_json = json_file.read()
model = model_from_json(model_json)
model.load_weights("alphabet_keras/mnist_model.h5")
letter = cv2.cvtColor(someImg, cv2.COLOR_BGR2GRAY)
letter = fitSquare(letter,28,2) # proprietary function, doesn't matter
letter_expanded = np.expand_dims(letter, axis=0)
letter_expanded = np.expand_dims(letter_expanded, axis=3)
model.predict_proba(letter_expanded)#[0]
The output is as follows:
array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32)
I expect something like:
array([[0.1, 0.34, 0.2, 0.8, 0.1, 0.62, 0.67, 1.0, 0.31, 0.59]], dtype=float32)
There are not error messages of any kind. Please help :)
Your expected output is not correct, for classification the output of a neural network is a probability distribution over the labels, which means that the probabilities are between 0 and 1, and that they sum to 1.0. The values you show sum to more than 1.0.
About your specific problem, it looks the probabilities are saturated, this is caused by the fact that you are not normalizing the pixel values by dividing by 255, which you are doing with the training and testing sets, this inconsistency will saturate the output neurons.
Sorry for the title but I could't come up with a better description here.
I am trying to apply batches for training on a model which should have 13 fully connected output layers. Each output layer has only two nodes (but are fully connected as stated).
Building the model's output looks like this:
outputs = list()
for i in range(num_labels):
out_y = Dense(2, activation='softmax', name='out_{:d}'.format(i))(convolution_layer)
outputs.append(out_y)
self.model = Model(input=inputs, output=outputs)
However, I can't manage to feed this model. I've tried to go with a [batch_size, 13, 1, 2] sized output array:
y = np.zeros((batch_size, 13, 1, 2))
But for a batch of size 2 I get:
ValueError: The model expects 13 input arrays, but only received one array. Found: array with shape (2, 13, 1, 2)
I've tried several other things but it's simply not clear to me how the input for the model looks like.
How can I train this model?
I have also tried to pass a list of lists of numpy arrays:
where the first level of the batch represent the sample (here 2) and the second level is the sample with the list of 13 numpy arrays. Yet I am getting:
ValueError: Error when checking model target: you are passing a list as input to your model, but the model expects a list of 13 Numpy arrays instead. The list you passed was: [[array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 1., 0.]), array([
As suggested, I also tried to return a list() of numpy arrays of size [13,2]:
Where the error becomes:
ValueError: Error when checking model target: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 13 arrays but instead got the following list of 2 arrays: [array([[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 1., 0.],
[ ...
The code
Below you can find the current code which generates one sample in sample_generator and a full batch in batch_generator (which uses sample_generator).
def batch_generator(w2v, file_path, meta_info, batch_size, sample_generator_fn, embedding_size):
Please note: The code shows now how I generate a list() of [13,2] ndarrays whereas the number of such ndarrays in that list is defined by batch_size.
try:
x = np.zeros((batch_size, meta_info.max_sequence_length, embedding_size, 1))
y = list() #np.zeros((batch_size, 13, 1, 2))
file = open(file_path)
while True:
x[:] = 0.0
#y[:] = 0.0
for batch in range(batch_size):
sentence_info_json = file.readline()
if sentence_info_json == '':
file.seek(0)
sentence_info_json = file.readline()
sample = sample_generator_fn(w2v, sentence_info_json, meta_info)
if not sample:
continue
sentence_embedding = sample[0]
final_length = len(sentence_embedding)
x[batch, :final_length, :, 0] = sentence_embedding
y.append(sample[1])
shuffled = np.asarray(range(batch_size))
np.random.shuffle(shuffled)
x = x[shuffled]
#y = y[shuffled]
y = [y[i] for i in shuffled]
yield x, y
except Exception as e:
print('Error in generator.')
print(e)
raise e
def sample_generator(w2v, sentence_info_json, meta_info):
if not sentence_info_json:
print('???')
sentence_info = json.loads(sentence_info_json)
tokens = [token['word'] for token in sentence_info['corenlp']['tokens']]
sentence = Sentence(tokens=tokens)
sentence_embedding = w2v.get_word_vectors(sentence.tokens.tolist())
sentence_embedding = np.asarray([word_vector for word_vector in sentence_embedding if word_vector is not None])
final_length = len(sentence_embedding)
if final_length == 0:
return None
y = np.zeros((2, len(meta_info.category_dict)))
y[1, :] = 1.
#y_list = []
y_tar = np.zeros((len(meta_info.category_dict), 2))
for i in range(len(meta_info.category_dict)):
y_tar[i][1] = 1.0
# y_list.append(np.asarray([0.0, 1.0]))
for opinion in sentence_info['opinions']:
index = meta_info.category_dict[opinion['category']]
y_tar[index][0] = 1.0
y_tar[index][1] = 0.0
#y_list[index][0] = 1.0
#y_list[index][1] = 0.0
return sentence_embedding, y_tar
As requested, the call to fit_generator()
cnn.model.fit_generator(generator=batch_generator(word2vec,
train_file, train_meta_info,
num_batches, sample_generator,
embedding_size),
samples_per_epoch=2000,
nb_epoch=2,
# validation_data=batch_generator(test_file_path, train_meta_info),
# nb_val_samples=100,
verbose=True)
Your output should be a list as specified in the error. Each element of the list should be a numpy array of size [batch_size, nb_outputs]. So a list of 13 elements of size [batch_size,2] in your case.