Related
I am using Keras Tuner and using RandomSearch() to hypertune my regression model. While I can hypertune using "relu" and "selu", I am unable to do the same for Leaky Relu. I understand that the reason "relu" and "selu" string works because, for "relu" and "selu", string aliases are available. String alias is not available for Leaky Relu. I tried passing a callable object of Leaky Relu (see my example below) but it doesn't seem to work. Can you please advise me how to do that? I have the same issue with using Parametric Leaky Relu,
Thank you in advance!
def build_model(hp):
model = Sequential()
model.add(
Dense(
units = 18,
kernel_initializer = 'normal',
activation = 'relu',
input_shape = (18, )
)
)
for i in range(hp.Int( name = "num_layers", min_value = 1, max_value = 5)):
model.add(
Dense(
units = hp.Int(
name = "units_" + str(i),
min_value = 18,
max_value = 180,
step = 18),
kernel_initializer = 'normal',
activation = hp.Choice(
name = 'dense_activation',
values=['relu', 'selu', LeakyReLU(alpha=0.01) ],
default='relu'
)
)
)
model.add( Dense( units = 1 ) )
model.compile(
optimizer = tf.keras.optimizers.Adam(
hp.Choice(
name = "learning_rate", values = [1e-2, 1e-3, 1e-4]
)
),
loss = 'mse'
)
return model
As a work-around, you can add another activation function in the tf.keras.activations.* module by modifying the source file ( which you'll see is activations.py )
Here's the code for tf.keras.activations.relu which you'll see in activations.py,
#keras_export('keras.activations.relu')
#dispatch.add_dispatch_support
def relu(x, alpha=0., max_value=None, threshold=0):
"""Applies the rectified linear unit activation function.
With default values, this returns the standard ReLU activation:
`max(x, 0)`, the element-wise maximum of 0 and the input tensor.
Modifying default parameters allows you to use non-zero thresholds,
change the max value of the activation,
and to use a non-zero multiple of the input for values below the threshold.
For example:
>>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)
>>> tf.keras.activations.relu(foo).numpy()
array([ 0., 0., 0., 5., 10.], dtype=float32)
>>> tf.keras.activations.relu(foo, alpha=0.5).numpy()
array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32)
>>> tf.keras.activations.relu(foo, max_value=5).numpy()
array([0., 0., 0., 5., 5.], dtype=float32)
>>> tf.keras.activations.relu(foo, threshold=5).numpy()
array([-0., -0., 0., 0., 10.], dtype=float32)
Arguments:
x: Input `tensor` or `variable`.
alpha: A `float` that governs the slope for values lower than the
threshold.
max_value: A `float` that sets the saturation threshold (the largest value
the function will return).
threshold: A `float` giving the threshold value of the activation function
below which values will be damped or set to zero.
Returns:
A `Tensor` representing the input tensor,
transformed by the relu activation function.
Tensor will be of the same shape and dtype of input `x`.
"""
return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
Copy this code and paste it just below. Change #keras_export('keras.activations.relu') to #keras_export( 'keras.activations.leaky_relu' ) and also change the value of alpha to 0.2, like,
#keras_export('keras.activations.leaky_relu')
#dispatch.add_dispatch_support
def relu(x, alpha=0.2, max_value=None, threshold=0):
"""Applies the rectified linear unit activation function.
With default values, this returns the standard ReLU activation:
`max(x, 0)`, the element-wise maximum of 0 and the input tensor.
Modifying default parameters allows you to use non-zero thresholds,
change the max value of the activation,
and to use a non-zero multiple of the input for values below the threshold.
For example:
>>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)
>>> tf.keras.activations.relu(foo).numpy()
array([ 0., 0., 0., 5., 10.], dtype=float32)
>>> tf.keras.activations.relu(foo, alpha=0.5).numpy()
array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32)
>>> tf.keras.activations.relu(foo, max_value=5).numpy()
array([0., 0., 0., 5., 5.], dtype=float32)
>>> tf.keras.activations.relu(foo, threshold=5).numpy()
array([-0., -0., 0., 0., 10.], dtype=float32)
Arguments:
x: Input `tensor` or `variable`.
alpha: A `float` that governs the slope for values lower than the
threshold.
max_value: A `float` that sets the saturation threshold (the largest value
the function will return).
threshold: A `float` giving the threshold value of the activation function
below which values will be damped or set to zero.
Returns:
A `Tensor` representing the input tensor,
transformed by the relu activation function.
Tensor will be of the same shape and dtype of input `x`.
"""
return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
You can use the String alias keras.activations.leaky_relu.
# Custom activation function
from keras.layers import Activation
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
## Add leaky-relu so we can use it as a string
get_custom_objects().update({'leaky-relu': Activation(LeakyReLU(alpha=0.2))})
## Main activation functions available to use
activation_functions = ['sigmoid', 'relu', 'elu', 'leaky-relu', 'selu', 'gelu',"swish"]
I am trying to get classification probabilities out of my trained Keras model but when I use the model.predict (or model.predict_proba) method, all I get is an array of this form:
array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32)
So basically I get a one hot encoded float array. The "1" is mostly in the right place so the training seems to have worked fine. But why can't I get the probabilities out? See code for architecture used.
First I read in the data:
mnist_train = pd.read_csv('data/mnist_train.csv')
mnist_test = pd.read_csv('data/mnist_test.csv')
mnist_train_images = mnist_train.iloc[:, 1:].values
mnist_train_labels = mnist_train.iloc[:, :1].values
mnist_test_images = mnist_test.iloc[:, 1:].values
mnist_test_labels = mnist_test.iloc[:, :1].values
mnist_train_images = mnist_train_images.astype('float32')
mnist_test_images = mnist_test_images.astype('float32')
mnist_train_images /= 255
mnist_test_images /= 255
mnist_train_labels = keras.utils.to_categorical(mnist_train_labels, 10)
mnist_test_labels = keras.utils.to_categorical(mnist_test_labels, 10)
mnist_train_images = mnist_train_images.reshape(60000,28,28,1)
mnist_test_images = mnist_test_images.reshape(10000,28,28,1)
Then I build my model and train:
num_classes = mnist_test_labels.shape[1]
model = Sequential()
model.add(Conv2D(64, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last", padding="same"))
model.add(Conv2D(64, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', data_format="channels_last", padding="same"))
model.add(Conv2D(128, (3, 3), activation='relu', data_format="channels_last", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(mnist_train_images, mnist_train_labels, validation_data=(mnist_test_images, mnist_test_labels), epochs=20, batch_size=256, verbose=2)
scores = model.evaluate(mnist_test_images, mnist_test_labels, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))
model.save('mnist-weights.model')
model.save_weights("mnist-model.h5")
model_json = model.to_json()
with open("mnist-model.json", "w") as json_file:
json_file.write(model_json)
But when I then load the model in another application and try to predict probabilities like this, the described error occurs. What am I doing wrong?
json_file = open('alphabet_keras/mnist_model.json', 'r')
model_json = json_file.read()
model = model_from_json(model_json)
model.load_weights("alphabet_keras/mnist_model.h5")
letter = cv2.cvtColor(someImg, cv2.COLOR_BGR2GRAY)
letter = fitSquare(letter,28,2) # proprietary function, doesn't matter
letter_expanded = np.expand_dims(letter, axis=0)
letter_expanded = np.expand_dims(letter_expanded, axis=3)
model.predict_proba(letter_expanded)#[0]
The output is as follows:
array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32)
I expect something like:
array([[0.1, 0.34, 0.2, 0.8, 0.1, 0.62, 0.67, 1.0, 0.31, 0.59]], dtype=float32)
There are not error messages of any kind. Please help :)
Your expected output is not correct, for classification the output of a neural network is a probability distribution over the labels, which means that the probabilities are between 0 and 1, and that they sum to 1.0. The values you show sum to more than 1.0.
About your specific problem, it looks the probabilities are saturated, this is caused by the fact that you are not normalizing the pixel values by dividing by 255, which you are doing with the training and testing sets, this inconsistency will saturate the output neurons.
I have encountered a very persistent problem in a more complex Keras program but have boiled it down to this: The answer must be very simple but I can't find it.
When I run this code:
def __init__ (self):
self.model = Sequential()
self.model.add(Dense(4, input_shape=(4,), activation='linear'))
self.model.compile(optimizer='adam', loss='mse')
def run(self):
x = [1., 1., 1., 1.]
print('x:', x, 'x shape:', np.shape(x))
y = [0., 0., 0., 0.]
print('y:', y, 'y shape:', np.shape(y))
self.model.fit(x, y, batch_size=1, epochs=1, verbose=2)
The print statements show both x and y to be of shape (4,) but the fit line generates:
ValueError: Error when checking input: expected dense_1_input to have
shape (4,) but got array with shape (1,)
I've tried reshaping x to (1,4) but it didn't help. I'm stumped.
Data should be 2D.
Make your x and y data as 2D by x = [[1., 1., 1., 1.]]. It becomes 1x4 data.
1 is number of data and 4 is dimension what you define as the input_shape.
And, make it as numpy array by x = np.array(x). Keras's fit method requires numpy array. I saw x: Numpy array of training data from https://keras.io/models/model/.
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
class A:
def __init__ (self):
self.model = Sequential()
self.model.add(Dense(4, input_shape=(4,), activation='linear'))
self.model.compile(optimizer='adam', loss='mse')
def run(self):
x = [[1., 1., 1., 1.]]
print('x:', x, 'x shape:', np.shape(x))
y = [[0., 0., 0., 0.]]
print('y:', y, 'y shape:', np.shape(y))
x = np.array(x)
y = np.array(y)
self.model.fit(x, y, batch_size=1, epochs=1, verbose=2)
a = A()
a.run()
the x and y arrays you pass are not the right shape. If you want to have input tensor of shape (4,) for your model then you have to prepare a tensor with shape (n,4) where n is the number of examples you are providing.
import tensorflow as tf
import numpy as np
from keras.models import Model, Sequential
from keras.layers import Input, Dense
class Mymodel(tf.keras.Model):
def __init__ (self):
super(Mymodel, self).__init__()
self.model = Sequential()
self.model.add(Dense(4, input_shape=(4,), activation='linear'))
self.model.compile(optimizer='adam', loss='mse')
def run(self):
x = np.ones((1,4))
print('x:', x, 'x shape:', np.shape(x))
y = np.zeros((1,4))
print('y:', y, 'y shape:', np.shape(y))
self.model.fit(x, y, batch_size=1, epochs=1, verbose=2)
model = Mymodel()
model.run()
I found some weird activity by using add_loss function in Keras model compare to use loss='something like mse' in compile function while I was coding exercises for auto-encoder.
The same model of that uses loss in compile function requires y_train and y_test(if I use validation_data) as usual, but add_loss(mse(inputs,outputs)) emits the error if I provide y data(y_train/ y_test).
Here is the simple code I have made to train mnist, flattened to be 784 dim from 28 x 28.
the common body of the sample code is as below:
encode_dim = 32
inputs = Input(shape=(784,))
encoded = Dense(128, activation='relu')(inputs)
encoded = Dense(64, activation='relu')(encoded)
encoded = Dense(encode_dim, activation='relu')(encoded)
encoder = Model(inputs=inputs, outputs=encoded)
print(encoder.summary())
encoded_inputs = Input(shape=(encode_dim, ))
decoded = Dense(64, activation='relu')(encoded_inputs)
decoded = Dense(128, activation='relu')(decoded)
decoded = Dense(784, activation='sigmoid')(decoded)
decoder = Model(inputs=encoded_inputs, outputs=decoded)
print(decoder.summary())
outputs = decoder(encoder(inputs))
ae = Model(inputs, outputs)
print(ae.summary())
with providing loss by name in compile function would be like below:
ae.compile(optimizer='adam', loss='mse')
ae.fit(x_train, y_train, epochs=1, batch_size=256)
would work,
2. but with the case below:
ae_loss = mse(inputs, outputs)
ae.add_loss(ae_loss )
ae.compile(optimizer='adam')
ae.fit(x_train, y_train, epochs=1, batch_size=256)
will give the error like below:
ValueError Traceback (most recent call
last) in
1 deep_vaestyle_model2 = AutoEncoderTester(DeepModelVAEStyle())
2 deep_vaestyle_model2.train(x_train=x_train_flat, y_train=x_train_flat, x_test=x_test_flat, y_test=x_test_flat,
----> 3 epochs=1, batch_size=1024, verbose=1)
4 deep_vaestyle_model2.test(x_test=x_test_flat)
in train(self, x_train, y_train,
x_test, y_test, epochs, batch_size, verbose)
16 histogram_freq=0,
17 write_graph=True,
---> 18 write_grads=True,
19 # batch_size=batch_size,
20 # write_images=True
d:\igs_projects\realtime_eeg_analyzer\venv\lib\site-packages\keras\engine\training.py
in fit(self, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
**kwargs)
950 sample_weight=sample_weight,
951 class_weight=class_weight,
--> 952 batch_size=batch_size)
953 # Prepare validation data.
954 do_validation = False
d:\igs_projects\realtime_eeg_analyzer\venv\lib\site-packages\keras\engine\training.py
in _standardize_user_data(self, x, y, sample_weight, class_weight,
check_array_lengths, batch_size)
787 feed_output_shapes,
788 check_batch_axis=False, # Don't enforce the batch size.
--> 789 exception_prefix='target')
790
791 # Generate sample-wise weight values given the sample_weight and
d:\igs_projects\realtime_eeg_analyzer\venv\lib\site-packages\keras\engine\training_utils.py
in standardize_input_data(data, names, shapes, check_batch_axis,
exception_prefix)
61 raise ValueError('Error when checking model ' +
62 exception_prefix + ': '
---> 63 'expected no data, but got:', data)
64 return []
65 if data is None:
ValueError: ('Error when checking model target: expected no data, but
got:', array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]))
but if I give y data(y_train, and y_test) as None, then it runs as I expected.
I tried to find out why keras consider those two are different but I couldn't. Can someone explain????
Sorry for the title but I could't come up with a better description here.
I am trying to apply batches for training on a model which should have 13 fully connected output layers. Each output layer has only two nodes (but are fully connected as stated).
Building the model's output looks like this:
outputs = list()
for i in range(num_labels):
out_y = Dense(2, activation='softmax', name='out_{:d}'.format(i))(convolution_layer)
outputs.append(out_y)
self.model = Model(input=inputs, output=outputs)
However, I can't manage to feed this model. I've tried to go with a [batch_size, 13, 1, 2] sized output array:
y = np.zeros((batch_size, 13, 1, 2))
But for a batch of size 2 I get:
ValueError: The model expects 13 input arrays, but only received one array. Found: array with shape (2, 13, 1, 2)
I've tried several other things but it's simply not clear to me how the input for the model looks like.
How can I train this model?
I have also tried to pass a list of lists of numpy arrays:
where the first level of the batch represent the sample (here 2) and the second level is the sample with the list of 13 numpy arrays. Yet I am getting:
ValueError: Error when checking model target: you are passing a list as input to your model, but the model expects a list of 13 Numpy arrays instead. The list you passed was: [[array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 1., 0.]), array([
As suggested, I also tried to return a list() of numpy arrays of size [13,2]:
Where the error becomes:
ValueError: Error when checking model target: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 13 arrays but instead got the following list of 2 arrays: [array([[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 1., 0.],
[ ...
The code
Below you can find the current code which generates one sample in sample_generator and a full batch in batch_generator (which uses sample_generator).
def batch_generator(w2v, file_path, meta_info, batch_size, sample_generator_fn, embedding_size):
Please note: The code shows now how I generate a list() of [13,2] ndarrays whereas the number of such ndarrays in that list is defined by batch_size.
try:
x = np.zeros((batch_size, meta_info.max_sequence_length, embedding_size, 1))
y = list() #np.zeros((batch_size, 13, 1, 2))
file = open(file_path)
while True:
x[:] = 0.0
#y[:] = 0.0
for batch in range(batch_size):
sentence_info_json = file.readline()
if sentence_info_json == '':
file.seek(0)
sentence_info_json = file.readline()
sample = sample_generator_fn(w2v, sentence_info_json, meta_info)
if not sample:
continue
sentence_embedding = sample[0]
final_length = len(sentence_embedding)
x[batch, :final_length, :, 0] = sentence_embedding
y.append(sample[1])
shuffled = np.asarray(range(batch_size))
np.random.shuffle(shuffled)
x = x[shuffled]
#y = y[shuffled]
y = [y[i] for i in shuffled]
yield x, y
except Exception as e:
print('Error in generator.')
print(e)
raise e
def sample_generator(w2v, sentence_info_json, meta_info):
if not sentence_info_json:
print('???')
sentence_info = json.loads(sentence_info_json)
tokens = [token['word'] for token in sentence_info['corenlp']['tokens']]
sentence = Sentence(tokens=tokens)
sentence_embedding = w2v.get_word_vectors(sentence.tokens.tolist())
sentence_embedding = np.asarray([word_vector for word_vector in sentence_embedding if word_vector is not None])
final_length = len(sentence_embedding)
if final_length == 0:
return None
y = np.zeros((2, len(meta_info.category_dict)))
y[1, :] = 1.
#y_list = []
y_tar = np.zeros((len(meta_info.category_dict), 2))
for i in range(len(meta_info.category_dict)):
y_tar[i][1] = 1.0
# y_list.append(np.asarray([0.0, 1.0]))
for opinion in sentence_info['opinions']:
index = meta_info.category_dict[opinion['category']]
y_tar[index][0] = 1.0
y_tar[index][1] = 0.0
#y_list[index][0] = 1.0
#y_list[index][1] = 0.0
return sentence_embedding, y_tar
As requested, the call to fit_generator()
cnn.model.fit_generator(generator=batch_generator(word2vec,
train_file, train_meta_info,
num_batches, sample_generator,
embedding_size),
samples_per_epoch=2000,
nb_epoch=2,
# validation_data=batch_generator(test_file_path, train_meta_info),
# nb_val_samples=100,
verbose=True)
Your output should be a list as specified in the error. Each element of the list should be a numpy array of size [batch_size, nb_outputs]. So a list of 13 elements of size [batch_size,2] in your case.