Related
I want to try to test some hyperparameters, thats i want to use the GridSearchCV, because it seems like thats the way to do it.
But i also want to use the validation split. To use Callsbacks like EarlyStopping or/and ReduceLROnPlateau. So my question is:
How do i implement GridSearchCV + validation_split correctly that none of the data in validation split is using for training and the whole training set is used to train my model?
Afaik GridSearchCV split again my remaining train data (which is 1-validation_split) and split it again? I get kinda high accuracy and im thinking that i dont split the data correctly
model = KerasClassifier(build_fn=create_model,verbose=2, validation_split=0.1)
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform',
#'normal',
'uniform',
'he_normal',
#'lecun_normal',
#'he_uniform'
]
epochs = [3] #5,8,10,30
batches = [64] #32,64
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train)
You can use your self-defined validation data by passing an extra argument to the grid.fit() function that is validation_data=(X_test, Y_test). The documentation, states that grid.fit() function accepts all valid arguments that can be passed to the actual model.fit() function of the default Keras model. Therefore, you can pass the validation data through the grid.fit() function. You may also pass the callback functions there.
I am adding a working code below (applied on MNIST-digit dataset). Notice how I added the validation data on grid.fit() and removed the 'validation_split':
import tensorflow as tf
import numpy as np
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)
X_train = np.expand_dims(X_train, 3)
X_test = np.expand_dims(X_test, 3)
def create_model(optimizer, init):
model = tf.keras.Sequential([
tf.keras.layers.Convolution2D(32, 3, input_shape=(28, 28, 1),
activation='relu', kernel_initializer=init),
tf.keras.layers.Convolution2D(32, 3, activation='relu',
kernel_initializer=init),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(12, activation='relu',
kernel_initializer=init),
tf.keras.layers.Dense(10, activation='softmax',
kernel_initializer=init),
])
model.compile(loss='categorical_crossentropy',
optimizer=optimizer, metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model, verbose=2,)
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform',
#'normal',
'uniform',
'he_normal',
#'lecun_normal',
#'he_uniform'
]
epochs = [4,]
batches = [32, 64]
param_grid = dict(optimizer=optimizers, nb_epoch=epochs,
batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))
Hope this helps. Thanks.
Currently I'm trying to implement a multi-layer autoencoder using Keras, working on the Mnist dataset (handwritten digits). My code is looking like this:
from keras.layers import Input, Dense, initializers
from keras.models import Model
import numpy as np
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras import optimizers, losses
from keras import backend as K
import tensorflow as tf
from keras.callbacks import TensorBoard
from keras.layers import Dropout
from keras.models import Sequential
from keras import models
from keras import layers
import keras
from keras.optimizers import Adam
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
low_dim = 32
def autoencoder(epochs):
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
model = Sequential()
#First autoencoder
model.add(Dense(400, activation='relu', kernel_initializer=w, input_dim=num_features, name='hidden'))
model.add(Dropout(0.2))
model.add(Dense(num_features, activation='sigmoid', input_dim = 400, name = 'output'))
#Second autoencoder
model.add(Dense(100, activation='relu', kernel_initializer=w, input_dim=num_features, name='hidden2'))
model.add(Dropout(0.2))
model.add(Dense(num_features, activation = 'sigmoid', input_dim = 100, name='output2'))
#Third autoencoder
model.add(Dense(50, activation='relu', kernel_initializer=w, input_dim=num_features, name='hidden3'))
model.add(Dropout(0.2))
model.add(Dense(num_features, activation='sigmoid', input_dim=10, name='output3'))
model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(d.X_train, d.X_train,
epochs=epochs,
batch_size=64,
shuffle=True,
validation_data=(d.X_test, d.X_test))
model.test_on_batch(d.X_test, d.X_test)
print(history.history.keys())
plt.plot(history.history['acc'])
print(history.history['acc'])
plt.show()
return model
def finding_index():
elements, index = np.unique(d.Y_test, return_index = True)
return elements, index
def plotting():
ae = autoencoder(2)
elements, index = finding_index()
y_proba = ae.predict(d.X_test)
plt.figure(figsize=(20, 4))
#size = 20
for i in range(len(index)):
ax = plt.subplot(2, len(index), i + 1)
plt.imshow(d.X_test[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(2, len(index), i + 1 + len(index))
plt.imshow(y_proba[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
plotting()
I have two questions, is it supposed to be like this when you stack autoencoders or should I let one layer reduce dimensions to let's say 400 and then the next to a 100 and so on, or the way I have done it? The second one is, can you different optimizers (in my case Adam) for different layers? I would like to use SGD (stochastic gradient descent) for the last layer. Thanks in advance!
You should not do it the way you've done it, but the way you described it in the question. Also you should go down first and then up again (e.g 400, 100, 50, 25, 10, 25, 50, 100, 400) in granular steps.
For the second question is the answer that it depends. You could train the model with Adam first and then freeze all but the last layer to train this further with SGD. But you can't tell Keras to use different classifiers for different layers.
I'm predicting image classes using Keras. It works in Google Cloud ML (GCML), but for efficiency need change it to pass base64 strings instead of json array. Related Documentation
I can easily run python code to decode a base64 string into json array, but when using GCML I don't have the opportunity to run a preprocessing step (unless maybe use a Lambda layer in Keras, but I don't think that is the correct approach).
Another answer suggested adding tf.placeholder with type of tf.string, which makes sense, but how to incorporate that into the Keras model?
Here is complete code for training the model and saving the exported model for GCML...
import os
import numpy as np
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing import image
from tensorflow.python.platform import gfile
IMAGE_HEIGHT = 138
IMAGE_WIDTH = 106
NUM_CLASSES = 329
def preprocess(filename):
# decode the image file starting from the filename
# end up with pixel values that are in the -1, 1 range
image_contents = tf.read_file(filename)
image = tf.image.decode_png(image_contents, channels=1)
image = tf.image.convert_image_dtype(image, dtype=tf.float32) # 0-1
image = tf.expand_dims(image, 0) # resize_bilinear needs batches
image = tf.image.resize_bilinear(image, [IMAGE_HEIGHT, IMAGE_WIDTH], align_corners=False)
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0) # -1 to 1
image = tf.squeeze(image,[0])
return image
filelist = gfile.ListDirectory("images")
sess = tf.Session()
with sess.as_default():
x = np.array([np.array( preprocess(os.path.join("images", filename)).eval() ) for filename in filelist])
input_shape = (IMAGE_HEIGHT, IMAGE_WIDTH, 1) # 1, because preprocessing made grayscale
# in our case the labels come from part of the filename
y = np.array([int(filename[filename.index('_')+1:-4]) for filename in filelist])
# convert class labels to numbers
y = keras.utils.to_categorical(y, NUM_CLASSES)
########## TODO: something here? ##########
image = K.placeholder(shape=(), dtype=tf.string)
decoded = tf.image.decode_jpeg(image, channels=3)
# scores = build_model(decoded)
model = Sequential()
# model.add(decoded)
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(
x,
y,
batch_size=64,
epochs=20,
verbose=1,
validation_split=0.2,
shuffle=False
)
predict_signature = tf.saved_model.signature_def_utils.build_signature_def(
inputs={'input_bytes':tf.saved_model.utils.build_tensor_info(model.input)},
########## TODO: something here? ##########
# inputs={'input': image }, # input name must have "_bytes" suffix to use base64.
outputs={'formId': tf.saved_model.utils.build_tensor_info(model.output)},
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)
builder = tf.saved_model.builder.SavedModelBuilder("exported_model")
builder.add_meta_graph_and_variables(
sess=K.get_session(),
tags=[tf.saved_model.tag_constants.SERVING],
signature_def_map={
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: predict_signature
},
legacy_init_op=tf.group(tf.tables_initializer(), name='legacy_init_op')
)
builder.save()
This is related to my previous question.
Update:
The heart of the question is how to incorporate the placeholder that calls decode into the Keras model. In other words, after creating the placeholder that decodes the base64 string to a tensor, how to incorporate that into what Keras runs? I assume it needs to be a layer.
image = K.placeholder(shape=(), dtype=tf.string)
decoded = tf.image.decode_jpeg(image, channels=3)
model = Sequential()
# Something like this, but this fails because it is a tensor, not a Keras layer. Possibly this is where a Lambda layer comes in?
model.add(decoded)
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=input_shape))
...
Update 2:
Trying to use a lambda layer to accomplish this...
import keras
from keras.models import Sequential
from keras.layers import Lambda
from keras import backend as K
import tensorflow as tf
image = K.placeholder(shape=(), dtype=tf.string)
model = Sequential()
model.add(Lambda(lambda image: tf.image.decode_jpeg(image, channels=3), input_shape=() ))
Gives the error: TypeError: Input 'contents' of 'DecodeJpeg' Op has type float32 that does not match expected type of string.
first of all I use tf.keras but this should not be a big problem.
So here is an example of how you can read a base64 decoded jpeg:
def preprocess_and_decode(img_str, new_shape=[299,299]):
img = tf.io.decode_base64(img_str)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.resize_images(img, new_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False)
# if you need to squeeze your input range to [0,1] or [-1,1] do it here
return img
InputLayer = Input(shape = (1,),dtype="string")
OutputLayer = Lambda(lambda img : tf.map_fn(lambda im : preprocess_and_decode(im[0]), img, dtype="float32"))(InputLayer)
base64_model = tf.keras.Model(InputLayer,OutputLayer)
The code above creates a model that takes a jpeg of any size, resizes it to 299x299 and returns as 299x299x3 tensor. This model can be exported directly to saved_model and used for Cloud ML Engine serving. It is a little bit stupid, since the only thing it does is the convertion of base64 to tensor.
If you need to redirect the output of this model to the input of an existing trained and compiled model (e.g inception_v3) you have to do the following:
base64_input = base64_model.input
final_output = inception_v3(base64_model.output)
new_model = tf.keras.Model(base64_input,final_output)
This new_model can be saved. It takes base64 jpeg and returns classes identified by the inception_v3 part.
Another answer suggested adding tf.placeholder with type of tf.string, which makes sense, but how to incorporate that into the Keras model?
In Keras you can access your selected Backend (in this case Tensorflow) by doing:
from keras import backend as K
This you already seem to import on your code. That will enable you to access some native methods and resources available on the backend of your choice. It is the case that Keras backend includes a method for creating placeholders, among other utilities. Regarding placeholders, we can see what the Keras docs indicates about them:
placeholder
keras.backend.placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None)
Instantiates a placeholder tensor and returns it.
It also gives some example on its use:
>>> from keras import backend as K
>>> input_ph = K.placeholder(shape=(2, 4, 5))
>>> input_ph._keras_shape
(2, 4, 5)
>>> input_ph
<tf.Tensor 'Placeholder_4:0' shape=(2, 4, 5) dtype=float32>
As you can see, this is returning a Tensorflow tensor, with shape (2,4,5) and of dtype float. If you had another backend while doing the example you would get another tensor object (a Theano one surely). You can therefore use this placeholder() to adapt the solution you got on your previous question.
In conclusion, you can use your backend imported as K (or whatever you want) to do calls on the methods and objects available on the backend of your choice, by doing K.foo.bar() on the desired method. I suggest you give a read to what the Keras Backend to explore more things that can be useful for you on future situations.
Update: As per your edit. Yes, this placeholder should be a layer in your model. Specifically, it should be the Input Layer of your model, as it holds your decoded image (as Keras needs it that way) to classify.
I am running keras over tensorflow, trying to implement a multi-dimensional LSTM network to predict a linear continuous target variable , a single value for each example(return_sequences = False).
My sequence length is 10 and number of features (dim) is 11.
This is what I run:
import pprint, pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
# Input sequence
wholeSequence = [[0,0,0,0,0,0,0,0,0,2,1],
[0,0,0,0,0,0,0,0,2,1,0],
[0,0,0,0,0,0,0,2,1,0,0],
[0,0,0,0,0,0,2,1,0,0,0],
[0,0,0,0,0,2,1,0,0,0,0],
[0,0,0,0,2,1,0,0,0,0,0],
[0,0,0,2,1,0,0,0,0,0,0],
[0,0,2,1,0,0,0,0,0,0,0],
[0,2,1,0,0,0,0,0,0,0,0],
[2,1,0,0,0,0,0,0,0,0,0]]
# Preprocess Data:
wholeSequence = np.array(wholeSequence, dtype=float) # Convert to NP array.
data = wholeSequence
target = np.array([20])
# Reshape training data for Keras LSTM model
data = data.reshape(1, 10, 11)
target = target.reshape(1, 1, 1)
# Build Model
model = Sequential()
model.add(LSTM(11, input_shape=(10, 11), unroll=True, return_sequences=False))
model.add(Dense(11))
model.add(Activation('linear'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(data, target, nb_epoch=1, batch_size=1, verbose=2)
and get the error ValueError: Error when checking target: expected activation_1 to have 2 dimensions, but got array with shape (1, 1, 1)
Not sure what should the activation layer should get (shape wise)
Any help appreciated
thanks
If you just want to have a single linear output neuron, you can simply use a dense layer with one hidden unit and supply the activation there. Your output then can be a single vector without the reshape- I adjusted your given example code to make it work:
wholeSequence = np.array(wholeSequence, dtype=float) # Convert to NP array.
data = wholeSequence
target = np.array([20])
# Reshape training data for Keras LSTM model
data = data.reshape(1, 10, 11)
# Build Model
model = Sequential()
model.add(LSTM(11, input_shape=(10, 11), unroll=True, return_sequences=False))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(data, target, nb_epoch=1, batch_size=1, verbose=2)
I have a sequential model that I built in Keras.
I try to figure out how to change the shape of the input. In the following example
model = Sequential()
model.add(Dense(32, input_shape=(500,)))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
let's say that I want to build a new model with different input shape, conceptual this should looks like this:
model1 = model
model1.layers[0] = Dense(32, input_shape=(250,))
is there a way to modify the model input shape?
Somewhat related, so hopefully someone will find this useful: If you have an existing model where the input is a placeholder that looks like (None, None, None, 3) for example, you can load the model, replace the first layer with a concretely shaped input. Transformation of this kind is very useful when for example you want to use your model in iOS CoreML (In my case the input of the model was a MLMultiArray instead of CVPixelBuffer, and the model compilation failed)
from keras.models import load_model
from keras import backend as K
from keras.engine import InputLayer
import coremltools
model = load_model('your_model.h5')
# Create a new input layer to replace the (None,None,None,3) input layer :
input_layer = InputLayer(input_shape=(272, 480, 3), name="input_1")
# Save and convert :
model.layers[0] = input_layer
model.save("reshaped_model.h5")
coreml_model = coremltools.converters.keras.convert('reshaped_model.h5')
coreml_model.save('MyPredictor.mlmodel')
Think about what changing the input shape in that situation would mean.
Your first model
model.add(Dense(32, input_shape=(500,)))
Has a dense layer that really is a 500x32 matrix.
If you changed your input to 250 elements, your layers's matrix and input dimension would mismatch.
If, however, what you were trying to achieve was to reuse your last layer's trained parameters from your first 500 element input model, you could get those weights by get_weights. Then you could rebuild a new model and set values at the new model with set_weights.
model1 = Sequential()
model1.add(Dense(32, input_shape=(250,)))
model1.add(Dense(10, activation='softmax'))
model1.layers[1].set_weights(model1.layers[1].get_weights())
Keep in mind that model1 first layer (aka model1.layers[0]) would still be untrained
Here is another solution without defining each layer of the model from scratch. The key for me was to use "_layers" instead of "layers". The latter only seems to return a copy.
import keras
import numpy as np
def get_model():
old_input_shape = (20, 20, 3)
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(9, (3, 3), padding="same", input_shape=old_input_shape))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(lr=0.0001), metrics=['acc'], )
model.summary()
return model
def change_model(model, new_input_shape=(None, 40, 40, 3)):
# replace input shape of first layer
model._layers[1].batch_input_shape = new_input_shape
# feel free to modify additional parameters of other layers, for example...
model._layers[2].pool_size = (8, 8)
model._layers[2].strides = (8, 8)
# rebuild model architecture by exporting and importing via json
new_model = keras.models.model_from_json(model.to_json())
new_model.summary()
# copy weights from old model to new one
for layer in new_model.layers:
try:
layer.set_weights(model.get_layer(name=layer.name).get_weights())
except:
print("Could not transfer weights for layer {}".format(layer.name))
# test new model on a random input image
X = np.random.rand(10, 40, 40, 3)
y_pred = new_model.predict(X)
print(y_pred)
return new_model
if __name__ == '__main__':
model = get_model()
new_model = change_model(model)