i have a problem with gridsearch keras problem which loops every time with the same epoch = 25? It doesn't change to 35.
def build_classifier(optimizer):
classifier = Sequential()
classifier.add(Dense(units = 3000, kernel_initializer = 'uniform', activation = 'relu', input_dim = pca_dimensions))
classifier.add(Dense(units = 3000, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier)
parameters = {'batch_size': [1000],
'epochs': [25,35,45],
'optimizer': ['adam']}
grid_search = GridSearchCV(estimator = classifier,
param_grid = parameters,
scoring = 'accuracy',
cv = 10)enter code here
grid_results = grid_search.fit(X_train, y_train)
print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Related
I build my CNN by using the the code:
def arbitrary_functionality(tensor):
return tf.abs(tensor)
def my_filter(shape, dtype=None):
f = np.array([
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-2]], [[8]], [[-12]], [[8]], [[-2]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]]])
assert f.shape == shape
return K.variable(f, dtype='float32')
input_layer = Input(shape=(256, 256, 1))
conv = Conv2D(1, [5, 5], kernel_initializer=my_filter, input_shape=(256, 256, 1), trainable=True, padding='same')(input_layer)
conv = Conv2D(8, (5, 5), padding='same', strides=1, use_bias=False)(conv)
lambda_layer = Lambda(arbitrary_functionality)(conv)
output_layer = Activation(activation='tanh')(lambda_layer)
output_layer = AveragePooling2D(pool_size= (5, 5), strides=2)(output_layer)
hidden = Dense(256)(output_layer)
hidden = LeakyReLU(alpha=0.2)(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=input_layer, outputs=output)
# Callback for loss logging per epoch
class LossHistory(Callback):
def on_train_begin(self, logs={}):
self.losses = []
self.val_losses = []
def on_epoch_end(self, batch, logs={}):
self.losses.append(logs.aget('loss'))
self.val_losses.append(logs.get('val_loss'))
history = LossHistory()
tensorboard = TensorBoard (log_dir='E:/logs/trail' , histogram_freq=0, write_graph=True , write_images=False)
adam = keras.optimizers.Adam(lr= lrate, beta_1= 0.9, beta_2= 0.999, epsilon= 1e-08, decay= decay)
model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy', 'mse'])
batch_si = 64
fitted_model = model.fit(X_train, y_train, batch_size= batch_si, callbacks=[tensorboard], epochs=epochs, verbose=1, validation_split= 0.2 , shuffle=True)
# Save Model
model.save('E:/models/trail.h5', overwrite = True)
model.save_weights('E:/models/weights_trail.hdf5', overwrite=True)
# Evaluate the model
scores = model.evaluate(X_test, y_test, batch_size=batch_si, verbose=1)
print("Model Accuracy: {:5.2f}%".format(100*scores[1]))
# Load and Evaluate the Model
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'tf': tf})
new_model.load_weights('E:/models/trail.hdf5')
new_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', 'mse'])
scores = new_model.evaluate(X_test, y_test, verbose=1)
print("Accuracy After Model Reloaded: {:5.2f}%".format(100*scores[1]))
Now problem is, I can evaluate my output successfully before save an reload the model. But when i reload the trained model file and try to evaluate output, I am getting error the following error:
ValueError: Unknown initializer: my_filter
You have to register custom function name (see here: https://www.tensorflow.org/guide/keras/save_and_serialize#custom_objects):
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'my_filter': my_filter, 'tf': tf})
I have created a CNN that does binary classification on images. The CNN is seen below:
def neural_network():
classifier = Sequential()
# Adding a first convolutional layer
classifier.add(Convolution2D(48, 3, input_shape = (320, 320, 3), activation = 'relu'))
classifier.add(MaxPooling2D())
# Adding a second convolutional layer
classifier.add(Convolution2D(48, 3, activation = 'relu'))
classifier.add(MaxPooling2D())
#Flattening
classifier.add(Flatten())
#Full connected
classifier.add(Dense(256, activation = 'relu'))
#Full connected
classifier.add(Dense(1, activation = 'sigmoid'))
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.summary()
train_datagen = ImageDataGenerator(rescale = 1./255,
horizontal_flip = True,
vertical_flip=True,
brightness_range=[0.5, 1.5])
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('/content/drive/My Drive/data_sep/train',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
es = EarlyStopping(
monitor="val_accuracy",
patience=15,
mode="max",
baseline=None,
restore_best_weights=True,
)
filepath = "/content/drive/My Drive/data_sep/weightsbestval.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
history = classifier.fit(training_set,
epochs = 50,
validation_data = test_set,
callbacks= callbacks_list
)
best_score = max(history.history['val_accuracy'])
return best_score
The images in the folders are organized in the following way:
-train
-healthy
-patient
-validation
-healthy
-patient
Is there a way to calculate the metrics Precision,Recall,Sensitivity and Specificity or at least the true positives,true negatives,false positive and false negatives from this code?
from sklearn.metrics import classification_report
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
predictions = model.predict_generator(
test_set,
steps = np.math.ceil(test_set.samples / test_set.batch_size),
)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = test_set.classes
class_labels = list(test_set.class_indices.keys())
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
accuracy = metrics.accuracy_score(true_classes, predicted_classes)
& if you do print(report) ,it will print everything
And if your whole data files are not divisible by your batch size, then use
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 1,
class_mode = 'binary')
I have created a CNN to do binary classification in keras with the following code:
def neural_network():
classifier = Sequential()
# Adding a first convolutional layer
classifier.add(Convolution2D(48, 3, input_shape = (320, 320, 3), activation = 'relu'))
classifier.add(MaxPooling2D())
# Adding a second convolutional layer
classifier.add(Convolution2D(48, 3, activation = 'relu'))
classifier.add(MaxPooling2D())
#Flattening
classifier.add(Flatten())
#Full connected
classifier.add(Dense(256, activation = 'relu'))
#Full connected
classifier.add(Dense(256, activation = 'sigmoid'))
#Full connected
classifier.add(Dense(1, activation = 'sigmoid'))
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.summary()
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
horizontal_flip = True,
vertical_flip=True,
brightness_range=[0.5, 1.5])
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('/content/drive/My Drive/data_sep/train',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
es = EarlyStopping(
monitor="val_accuracy",
mode="max",
patience
baseline=None,
restore_best_weights=True,
)
filepath = "/content/drive/My Drive/data_sep/weightsbestval.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
history = classifier.fit(training_set,
epochs = 10,
validation_data = test_set,
callbacks= es
)
best_score = max(history.history['val_accuracy'])
from sklearn.metrics import classification_report
predictions =(classifier.predict(test_set) > 0.5).astype("int32")
newlist = predictions.tolist()
finallist = []
for number in newlist:
finallist.append(number[0])
predicted_classes = np.asarray(finallist)
true_classes = test_set.classes
class_labels = list(test_set.class_indices.keys())
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
accuracy = metrics.accuracy_score(true_classes, predicted_classes)
print(true_classes)
print(predicted_classes)
print(class_labels)
correct = 0
for i in range(len(true_classes)):
if (true_classes[i] == predicted_classes[i]):
correct = correct + 1
print(correct)
print((correct*1.0)/(len(true_classes)*1.0))
print(report)
return best_score
When I run the model I get a validation accuracy of 81.90% by model.fit()
But after finishing the model.predict validation accuracy is 40%.
I have added a callback where the best weights are restored. So what could be the problem here?
What fixed it for me was that I created another Image Data Generator variable
test2_datagen = ImageDataGenerator(rescale = 1./255)
test2_set = test2_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary',
Shuffle = False)
But as you can see I set Shuffle = False . I am posting this answer in case anyone has the same problem. So I used test2_set for for the prediction.
test2_set = test2_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size= (320, 320),
batch_size= 32,
class_mode= 'binary',
shuffle= False)
Emphasis on the lowercase shuffle parameter, otherwise this code will fail
Since you are saving best model in this line
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
please load this model in your code , and then predict
from keras.models import load_model
loaded_model = load_model('data_sep/weightsbestval.hdf5')
Then
loaded_model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics['accuracy'])
score = loaded_model.evaluate(X_test, Y_test, verbose=0)
print ("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
Plz vote / mark correct if you find this useful
I am using KerasClassifier to create a NN, usually, after using .fit() the object will still retain type of wrappers.scikit_learn.KerasClassifier and then further functions such as cross_val_score and GridSearchCV work perfectly. I am now trying to fit my code into a format that I have been given for a project which has a predefined class to hold the NN. When trying to assign either this new class or a property of the class with the KerasClassifier wrapper type the return from .fit() is instead of type engine.sequential.Sequential meaning the further functions will not operate.
I would expect the .fit() function to return an item of type KerasClassifier.
The code below is passed preprocessed training data.
class Module4_Model:
def __init__(self):
self.my_model = None
def init_classifier(self):
self.my_model = KerasClassifier(build_fn = self.build_classifier,
optimizer = 'adam',
n_units = 7,
batch_size = 32,
epochs = 100)
return self.my_model
def build_classifier(self, optimizer, n_units):
self.my_model = Sequential()
self.my_model.add(Dense(units = n_units, kernel_initializer = 'uniform', activation = 'relu', input_dim = 15))
self.my_model.add(Dense(units = n_units, kernel_initializer = 'uniform', activation = 'relu'))
self.my_model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
self.my_model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
return self.my_model
def train_model(self, X_train, y_train):
history = self.my_model.fit(X_train, y_train, validation_split = 0.1)
print(type(self.my_model))
return history
my_model = Module4_Model()
my_model.init_classifier()
history = my_model.train_model(x_train_processed, y_train_processed)
The same code moved outside of a class works as expected.
The problem is that you are using the same variable (self.my_model) inside both init_classifier and build_classifier, which is not necessary at all. When the KerasClassifier instance is created, it received self.build_classifier, which is called each time a new classifier instance is created (inside KerasClassifier), and then it overwrites the value of self.my_model.
A simple solution is to do this:
def build_classifier(self, optimizer, n_units):
model = Sequential()
model.add(Dense(units = n_units, kernel_initializer = 'uniform', activation = 'relu', input_dim = 15))
model.add(Dense(units = n_units, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
return model
Just do not use the same variable for two purposes and it should be fine.
I am trying to do gridsearch using scikit-learn GridSearchCV and KerasClassifier for a multilabel classification problem having 346 labels. I am trying to evaluate the models based on a custom metric. But I am always running into the following error. My training data size is 5334.
ValueError: operands could not be broadcast together with shapes (5334,346) (5334,)
def my_custom_scorer(y_true,y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
custom_scorer = make_scorer(my_custom_scorer, greater_is_better=True)
def create_model(learn_rate = 0.01):
# create model
model = Sequential()
model.add(Dense(100, activation = 'sigmoid',input_dim=4))
model.add(Dense(346, activation='sigmoid'))
# Compile model
optimizer = SGD(lr=learn_rate, momentum=0)
model.compile(optimizer=optimizer,
loss=custom_loss(weights),
metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model,verbose=0)
learn_rate = [0.001, 0.01, 0.1]
batch_size = [10]
epochs = [1]
param_grid = dict(epochs=epochs,batch_size=batch_size,learn_rate=learn_rate)
grid = GridSearchCV(estimator=model,
param_grid=param_grid,n_jobs=1,scoring=custom_scorer)
grid_result = grid.fit(X_train, Y_train)
print("Best: %f using %s" % (grid_result.best_score_,
grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))