Model.fit() Validation Accuracy different than Model.predict() - keras

I have created a CNN to do binary classification in keras with the following code:
def neural_network():
classifier = Sequential()
# Adding a first convolutional layer
classifier.add(Convolution2D(48, 3, input_shape = (320, 320, 3), activation = 'relu'))
classifier.add(MaxPooling2D())
# Adding a second convolutional layer
classifier.add(Convolution2D(48, 3, activation = 'relu'))
classifier.add(MaxPooling2D())
#Flattening
classifier.add(Flatten())
#Full connected
classifier.add(Dense(256, activation = 'relu'))
#Full connected
classifier.add(Dense(256, activation = 'sigmoid'))
#Full connected
classifier.add(Dense(1, activation = 'sigmoid'))
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.summary()
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
horizontal_flip = True,
vertical_flip=True,
brightness_range=[0.5, 1.5])
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('/content/drive/My Drive/data_sep/train',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
es = EarlyStopping(
monitor="val_accuracy",
mode="max",
patience
baseline=None,
restore_best_weights=True,
)
filepath = "/content/drive/My Drive/data_sep/weightsbestval.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
history = classifier.fit(training_set,
epochs = 10,
validation_data = test_set,
callbacks= es
)
best_score = max(history.history['val_accuracy'])
from sklearn.metrics import classification_report
predictions =(classifier.predict(test_set) > 0.5).astype("int32")
newlist = predictions.tolist()
finallist = []
for number in newlist:
finallist.append(number[0])
predicted_classes = np.asarray(finallist)
true_classes = test_set.classes
class_labels = list(test_set.class_indices.keys())
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
accuracy = metrics.accuracy_score(true_classes, predicted_classes)
print(true_classes)
print(predicted_classes)
print(class_labels)
correct = 0
for i in range(len(true_classes)):
if (true_classes[i] == predicted_classes[i]):
correct = correct + 1
print(correct)
print((correct*1.0)/(len(true_classes)*1.0))
print(report)
return best_score
When I run the model I get a validation accuracy of 81.90% by model.fit()
But after finishing the model.predict validation accuracy is 40%.
I have added a callback where the best weights are restored. So what could be the problem here?

What fixed it for me was that I created another Image Data Generator variable
test2_datagen = ImageDataGenerator(rescale = 1./255)
test2_set = test2_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary',
Shuffle = False)
But as you can see I set Shuffle = False . I am posting this answer in case anyone has the same problem. So I used test2_set for for the prediction.

test2_set = test2_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size= (320, 320),
batch_size= 32,
class_mode= 'binary',
shuffle= False)
Emphasis on the lowercase shuffle parameter, otherwise this code will fail

Since you are saving best model in this line
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
please load this model in your code , and then predict
from keras.models import load_model
loaded_model = load_model('data_sep/weightsbestval.hdf5')
Then
loaded_model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics['accuracy'])
score = loaded_model.evaluate(X_test, Y_test, verbose=0)
print ("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
Plz vote / mark correct if you find this useful

Related

Unimplemented Error Graph execution error in cnn model

What's the problem with this?
I'm confuse right now I need to train my model and I read the documentation somehow but I'm getting this kind of error. At first the model is success and I save it but the accuracy is too low, so I reconstruct my model and get this. I just change the imgimg_width, img_height
classification_no = 6
# Train and Test dataset
train_dataset_path = "Datasets/train/"
test_dataset_path = "Datasets/test/"
img_width, img_height =100,100
size = 20
def detection_model():
detection_model = Sequential(
[
# module 1
tf.keras.layers.Conv2D(32, kernel_size=(3,3),input_shape = (img_width,img_height, 1)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation('relu'),
tf.keras.layers.Conv2D(64,kernel_size= (3,3),padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation('relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Flatten(),
# Dense
tf.keras.layers.Dense(64),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation('relu'),
# Dropout
tf.keras.layers.Dropout(0.5),
# Dense
tf.keras.layers.Dense(64),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation('relu'),
#output
tf.keras.layers.Dense(6, activation= 'softmax'),
]
)
detection_model.compile(loss='categorical_crossentropy',
optimizer=Adam(
learning_rate = 0.0001 ,
beta_1=0.9,
beta_2=0.999,
decay=1e-6),
metrics=['accuracy'])
detection_model.summary()
return detection_model
train = ImageDataGenerator(rescale= 1./255)
validation = ImageDataGenerator(rescale= 1./255)
def get_model_train():
data_train = train.flow_from_directory(
train_dataset_path,
target_size= (img_width,img_height),
batch_size= 16,
color_mode= 'rgba',
class_mode= 'categorical')
return data_train
def get_model_test():
data_test = validation.flow_from_directory(
test_dataset_path,
target_size= (img_width,img_height),
batch_size= 16,
color_mode= 'rgba',
class_mode= 'categorical')
return data_test
model = detection_model()
fit_model = model.fit(
get_model_train(),
steps_per_epoch= 2006//16 ,
epochs= size,
validation_data= get_model_test(),
validation_steps= 2006//16,
)

ValueError: Unknown initializer: my_filter

I build my CNN by using the the code:
def arbitrary_functionality(tensor):
return tf.abs(tensor)
def my_filter(shape, dtype=None):
f = np.array([
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-2]], [[8]], [[-12]], [[8]], [[-2]]],
[[[2]], [[-6]], [[8]], [[-6]], [[2]]],
[[[-1]], [[2]], [[-2]], [[2]], [[-1]]]])
assert f.shape == shape
return K.variable(f, dtype='float32')
input_layer = Input(shape=(256, 256, 1))
conv = Conv2D(1, [5, 5], kernel_initializer=my_filter, input_shape=(256, 256, 1), trainable=True, padding='same')(input_layer)
conv = Conv2D(8, (5, 5), padding='same', strides=1, use_bias=False)(conv)
lambda_layer = Lambda(arbitrary_functionality)(conv)
output_layer = Activation(activation='tanh')(lambda_layer)
output_layer = AveragePooling2D(pool_size= (5, 5), strides=2)(output_layer)
hidden = Dense(256)(output_layer)
hidden = LeakyReLU(alpha=0.2)(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=input_layer, outputs=output)
# Callback for loss logging per epoch
class LossHistory(Callback):
def on_train_begin(self, logs={}):
self.losses = []
self.val_losses = []
def on_epoch_end(self, batch, logs={}):
self.losses.append(logs.aget('loss'))
self.val_losses.append(logs.get('val_loss'))
history = LossHistory()
tensorboard = TensorBoard (log_dir='E:/logs/trail' , histogram_freq=0, write_graph=True , write_images=False)
adam = keras.optimizers.Adam(lr= lrate, beta_1= 0.9, beta_2= 0.999, epsilon= 1e-08, decay= decay)
model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy', 'mse'])
batch_si = 64
fitted_model = model.fit(X_train, y_train, batch_size= batch_si, callbacks=[tensorboard], epochs=epochs, verbose=1, validation_split= 0.2 , shuffle=True)
# Save Model
model.save('E:/models/trail.h5', overwrite = True)
model.save_weights('E:/models/weights_trail.hdf5', overwrite=True)
# Evaluate the model
scores = model.evaluate(X_test, y_test, batch_size=batch_si, verbose=1)
print("Model Accuracy: {:5.2f}%".format(100*scores[1]))
# Load and Evaluate the Model
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'tf': tf})
new_model.load_weights('E:/models/trail.hdf5')
new_model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', 'mse'])
scores = new_model.evaluate(X_test, y_test, verbose=1)
print("Accuracy After Model Reloaded: {:5.2f}%".format(100*scores[1]))
Now problem is, I can evaluate my output successfully before save an reload the model. But when i reload the trained model file and try to evaluate output, I am getting error the following error:
ValueError: Unknown initializer: my_filter
You have to register custom function name (see here: https://www.tensorflow.org/guide/keras/save_and_serialize#custom_objects):
new_model = tf.keras.models.load_model('E:/models/trail.h5', custom_objects={'my_filter': my_filter, 'tf': tf})

Getting Precision,Recall,Sensitivity and Specificity in keras CNN

I have created a CNN that does binary classification on images. The CNN is seen below:
def neural_network():
classifier = Sequential()
# Adding a first convolutional layer
classifier.add(Convolution2D(48, 3, input_shape = (320, 320, 3), activation = 'relu'))
classifier.add(MaxPooling2D())
# Adding a second convolutional layer
classifier.add(Convolution2D(48, 3, activation = 'relu'))
classifier.add(MaxPooling2D())
#Flattening
classifier.add(Flatten())
#Full connected
classifier.add(Dense(256, activation = 'relu'))
#Full connected
classifier.add(Dense(1, activation = 'sigmoid'))
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.summary()
train_datagen = ImageDataGenerator(rescale = 1./255,
horizontal_flip = True,
vertical_flip=True,
brightness_range=[0.5, 1.5])
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('/content/drive/My Drive/data_sep/train',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
es = EarlyStopping(
monitor="val_accuracy",
patience=15,
mode="max",
baseline=None,
restore_best_weights=True,
)
filepath = "/content/drive/My Drive/data_sep/weightsbestval.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
history = classifier.fit(training_set,
epochs = 50,
validation_data = test_set,
callbacks= callbacks_list
)
best_score = max(history.history['val_accuracy'])
return best_score
The images in the folders are organized in the following way:
-train
-healthy
-patient
-validation
-healthy
-patient
Is there a way to calculate the metrics Precision,Recall,Sensitivity and Specificity or at least the true positives,true negatives,false positive and false negatives from this code?
from sklearn.metrics import classification_report
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 32,
class_mode = 'binary')
predictions = model.predict_generator(
test_set,
steps = np.math.ceil(test_set.samples / test_set.batch_size),
)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = test_set.classes
class_labels = list(test_set.class_indices.keys())
report = classification_report(true_classes, predicted_classes, target_names=class_labels)
accuracy = metrics.accuracy_score(true_classes, predicted_classes)
& if you do print(report) ,it will print everything
And if your whole data files are not divisible by your batch size, then use
test_set = test_datagen.flow_from_directory('/content/drive/My Drive/data_sep/validate',
target_size = (320, 320),
batch_size = 1,
class_mode = 'binary')

Why does my Gridsearch on Keras NN - Model just loop?

i have a problem with gridsearch keras problem which loops every time with the same epoch = 25? It doesn't change to 35.
def build_classifier(optimizer):
classifier = Sequential()
classifier.add(Dense(units = 3000, kernel_initializer = 'uniform', activation = 'relu', input_dim = pca_dimensions))
classifier.add(Dense(units = 3000, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier)
parameters = {'batch_size': [1000],
'epochs': [25,35,45],
'optimizer': ['adam']}
grid_search = GridSearchCV(estimator = classifier,
param_grid = parameters,
scoring = 'accuracy',
cv = 10)enter code here
grid_results = grid_search.fit(X_train, y_train)
print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))

keras multiple input model doesn't work

i have bulit a model to distinguish cats from dogs using kaggle's cats_vs_dogs data set. I have tried two ways to do it. For the first one, I used three existing models(ResNet50, Xception InceptionV3)to extract features, i put the traing data through these models's convolutional base,predict and concatenate the results, then use them for a standalone densely-connected classifier.The result is pretty good, after five epoches training, val_acc became 99.58%. Then i want to use data augmentation and fine-tuing, so i extended the those three models by adding layers on top, and running he whole thing end-to-end on the input data. The strange thing is the second way got good result in the traing but lousy one in the validation, and the val_acc is always a constant(0.5). i feel very confused, how come these two ways have such different results.
here is my code
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
res_net_input = Input((224, 224, 3), name='res_net')
res_net_base_model = ResNet50(input_tensor=res_net_input, weights='imagenet', include_top=False)
for layers in res_net_base_model.layers:
layers.trainable = False
xception_input = Input((299, 299, 3), name='xception')
xception_base_model = Xception(input_tensor=xception_input, weights='imagenet', include_top=False)
for layers in xception_base_model.layers:
layers.trainable = False
inception_input = Input((299, 299, 3), name='inception')
inception_base_model = InceptionV3(input_tensor=inception_input, weights='imagenet', include_top=False)
for layers in inception_base_model.layers:
layers.trainable = False
res_result = GlobalAveragePooling2D()(res_net_base_model.output)
xcp_result = GlobalAveragePooling2D()(xception_base_model.output)
icp_result = GlobalAveragePooling2D()(inception_base_model.output)
concatenated = concatenate([res_result, xcp_result, icp_result], axis=1)
x = Dropout(0.5)(concatenated)
x = Dense(1, activation='sigmoid')(x)
model = Model([res_net_base_model.input, xception_base_model.input, inception_base_model.input], x)
model.compile(optimizer='adadelta',
loss='binary_crossentropy',
metrics=['accuracy'])
train_imgen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
rotation_range=5.,
horizontal_flip = True)
validation_imgen = ImageDataGenerator(rescale = 1./255)
def generate_generator_multiple(generator,dir1, batch_size, img_size1, img_size2, img_size3):
genX1 = generator.flow_from_directory(dir1,
target_size = (img_size1[0],img_size1[1]),
class_mode = 'binary',
batch_size = batch_size,
shuffle=False,
)
genX2 = generator.flow_from_directory(dir1,
target_size = (img_size2[0],img_size2[1]),
class_mode = 'binary',
batch_size = batch_size,
shuffle=False,
seed=7)
genX3 = generator.flow_from_directory(dir1,
target_size = (img_size3[0],img_size3[1]),
class_mode = 'binary',
batch_size = batch_size,
shuffle=False,
seed=7)
while True:
X1i = genX1.next()
X2i = genX2.next()
X3i = genX3.next()
yield [X1i[0], X2i[0],X3i[0]], X1i[1]
tain_generator = generate_generator_multiple(train_imgen , '/output/keras/dog_vs_cat_full/train', 100, (224,224), (299, 299), (299, 299))
validation_generator = generate_generator_multiple(validation_imgen,'/output/keras/dog_vs_cat_full/validation', 100, (224,224), (299, 299), (299, 299))
history=model.fit_generator(tain_generator,
steps_per_epoch=200,
epochs = 5,
validation_data = validation_generator,
validation_steps = 50,
shuffle=False)

Resources