Keras predicts only one class from 3 - keras

I'm new to keras. I've adopted my model from one of the examples and it worked surprisinly well.
Suddenly my model started to predict always only one class from many. I think, the only change I made to the model is adding more samples.
Now I'm lost and have no idea, how to find out the cause.
Could someone tell me where to start and what to check?
Just in case, here is the code. The application learns from vew sorted images to sort remaining unsorted.
The images look like this, 800 per class in training set and 300 per class in test set. Validation and test are the same sets.
import numpy as np
from keras.models import model_from_json
from keras.preprocessing import image
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from shutil import copyfile
# dimensions of the images.
img_width, img_height = 64, 64
input_dirs = [
#list of dirs to sort with model,
]
out_directory = 'dir_of_auto_sorted_images'
train_base = 'dir_with_training_images'
train_dir = 'train'
val_dir = 'test'
test_dir = 'test'
nb_train_samples = 800*5 #800 training images per class, flipped V/H and zoomed in generator
nb_validation_samples = 1000 #just a number
nb_test_samples = 300*5
epochs = 100
batch_size = 32
#Read class names from training dir. In test in val they should be the same
tmp = os.path.join(train_base, train_dir)
classes = os.listdir(tmp)
classes.sort() #Classes in prediction seem to be sorted by dir_name
for c in classes: #Create output for predicted, where to copy unsorted from model
try:
os.makedirs(os.path.join(out_directory,c))
except:
pass
#############################
# Is right got gray-scale images? Shouldn't be 1 instead of 3
if K.image_data_format() == 'channels_first':
input_shape = (1, img_width, img_height)
else:
input_shape = (img_width, img_height, 1)
# Define model as proposed in keras tutorials
model = Sequential()
model.add(Conv2D(64, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(len(classes), activation='softmax'))
model.add(Activation('sigmoid'))
CreateModel = 1
if CreateModel:
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
test_datagen = ImageDataGenerator(
zoom_range=0.2,
horizontal_flip=True,
vertical_flip=True)
train_datagen = ImageDataGenerator(
zoom_range=0.2,
horizontal_flip=True,
vertical_flip=True)
train_generator = train_datagen.flow_from_directory(
os.path.join(train_base, train_dir),
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
color_mode='grayscale',
#save_to_dir=out_directory+"\\gendebug"
)
validation_generator = train_datagen.flow_from_directory(
os.path.join(train_base, val_dir),
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
color_mode='grayscale'
)
test_generator = test_datagen.flow_from_directory(
os.path.join(train_base, test_dir)
, target_size=(img_width, img_height)
, batch_size=batch_size
, class_mode='categorical',
color_mode='grayscale'
# ,save_to_dir=out_dir
)
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
# Save model architecture
model_json = model.to_json()
json_file = open(train_base+"/keras_model.json", "w")
json_file.write(model_json)
json_file.close()
# Save model weights
model.save_weights(train_base+"/keras_model.h5")
print("Finished saving")
score = model.evaluate_generator(test_generator, nb_test_samples // batch_size)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
score = model.predict_generator(train_generator, nb_test_samples // batch_size)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Score', score)
#############################
json_file = open(train_base+"/keras_model.json", "r")
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights(train_base+"/keras_model.h5")
loaded_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
def get_class(prediction):
m=max(prediction)
if m < 0.6:
return -1
for i in range(0, len(prediction)):
if prediction[i] == m:
return i
return -1
right = 0
wrong = 0
for input_directory in input_dirs:
print(input_directory)
cnt=0
for filename in os.listdir(input_directory):
os.sys.stdout.write('.')
os.sys.stdout.flush()
fn = os.path.join(input_directory, filename)
img = image.load_img(fn, target_size=(64, 64), color_mode='grayscale')
x = image.img_to_array(img)
x = x.astype('float32')
x /= 255
x = np.expand_dims(x, axis=0)
prediction = loaded_model.predict(x)[0]
c = get_class(prediction)
if c >= 0 and c < len(prediction):
predicted = classes[c]
print(fn, prediction)
copyfile(fn, os.path.join(out_directory,predicted, filename))
cnt += 1
if cnt > 100:
break

Related

Why is there no improvement in a categorical data time series model?

I built a simple categorical time series model to predict the next number of a random sequence, but the accuracy hardly moved even I trained it for 10000 epochs. The validation loss started to take off after a few hundred epochs. Could anyone make suggestions for improvement? Here's the model:
import os
import sys
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
DEVICE = 'CPU'
if DEVICE == 'CPU':
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
else:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
print(tf.test.gpu_device_name())
TOTAL_CATALOG=4
POSSIBLE_OUTCOME_COL=4
LOOK_BACK_WINDOW=1
TRAINING_DATA_RATIO=0.8
TRAINING_EPOCHS=10000
sys.path.insert(0, '/DataScience/MyModules')
from m6data import getDrawData, series_to_supervised, Split_data, get_all_categories
def get_all_categories_local(last_combination):
all_category = np.arange(1, last_combination+1)
return all_category.reshape(1,all_category.shape[0])
All_categories=get_all_categories_local(TOTAL_CATALOG)
data_sequence = [1,1,2,4,2,3,1,2,3,3,4,1,2,3,4,2,2,3,1,3]
raw_df = pd.DataFrame(data_sequence, columns=['NE'])
values = raw_df.values
# 05-Apr-2022: One-Hot Encoding
oh_encoder = OneHotEncoder(categories=All_categories, sparse=False)
encoded_input = oh_encoder.fit_transform(values)
FEATURES = encoded_input.shape[1]
POSSIBLE_OUTCOME_COL = FEATURES
draw_reframe = series_to_supervised(encoded_input, LOOK_BACK_WINDOW,1)
train, test = Split_data(draw_reframe, TRAINING_DATA_RATIO)
# Total input = all possible One-Hot Encoding outcome * number of look-back samples.
ALL_INPUT = POSSIBLE_OUTCOME_COL * LOOK_BACK_WINDOW
# split into input and outputs
train_X, train_y = train.iloc[:,:ALL_INPUT], train.iloc[:,ALL_INPUT:]
test_X, test_y = test.iloc[:,:ALL_INPUT], test.iloc[:,ALL_INPUT:]
train_X = train_X.values.reshape((train_X.shape[0], LOOK_BACK_WINDOW , FEATURES))
test_X = test_X.values.reshape((test_X.shape[0], LOOK_BACK_WINDOW, FEATURES))
print(train_X.shape, train_y.shape)
print(test_X.shape, test_y.shape)
def create_model():
model = Sequential()
model.add(LSTM(10,
return_sequences=False,
input_shape=(train_X.shape[1], train_X.shape[2]),
activation='relu'
)
)
#model.add(LSTM(20))
model.add(Dense(units=train_y.shape[1], activation='softmax'))
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.00005),
loss = 'categorical_crossentropy',
metrics=['accuracy'])
return model
model=create_model()
history = model.fit(
train_X, train_y,
epochs=TRAINING_EPOCHS,
batch_size=8,
validation_data=(test_X, test_y),
verbose=1,
)
Here are the plots of accuracies and losses (red=training, blue=validation).
Accuracies
Losses
Thank you in advance for any suggestions.
Update (13-Jun-2022)
I changed my model to the following
def create_model():
model = Sequential()
model.add(LSTM(50,
return_sequences=True,
input_shape=(train_X.shape[1], train_X.shape[2]),
activation='relu'
)
)
model.add(LSTM(units=1000, kernel_regularizer=regularizers.l1(0.05), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=1000, kernel_regularizer=regularizers.l1(0.05), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=1000, kernel_regularizer=regularizers.l1(0.05), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=1000, kernel_regularizer=regularizers.l1(0.05), activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(Dense(1000))
model.add(Dense(units=train_y.shape[1], activation='softmax'))
model.compile(optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2, nesterov=True),
#tf.keras.optimizers.Adam(learning_rate=0.001),
loss = 'categorical_crossentropy',
metrics=['accuracy'])
return model
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=20,min_lr=1e-10)
early_stop = EarlyStopping(monitor='loss', patience=100)
history = model.fit(
train_X, train_y,
epochs=TRAINING_EPOCHS,
batch_size=16,
validation_split=0.1,
validation_data=(test_X, test_y),
verbose=1,
shuffle=False,
callbacks=([reduce_lr], [early_stop])
Accuracy was bouncing around and Val_accuracy was zero all the way. The loss and val_loss were almost the same and dropping together.
Can anyone advise what I can do in this scenario?

Unexpected outputs from model.predict when using rescaling

First off: I'm aware of this post, but it doesn't provide an answer.
I am building my model like this:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator # for data augmentation
import pandas as pd # to save .csv files
from time import perf_counter # to track runtime
from keras.metrics import TrueNegatives, TruePositives, FalseNegatives, FalsePositives
def build_model(dimension):
model = Sequential()
model.add(Conv2D(32, (11,11), activation='relu',
input_shape=(dimension, dimension, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # to prepare for dropout
model.add(Dropout(0.2)) # to prevent overfitting
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy',
TruePositives(),
TrueNegatives(),
FalsePositives(),
FalseNegatives()
]
)
return model
def train_model(epoch, batch_size, run, subrun):
dimension = 200
model = build_model(dimension)
train_datagen = ImageDataGenerator(validation_split=0.2,
# samplewise_std_normalization=True,
rotation_range=40,
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
width_shift_range=0.2,
height_shift_range=0.2,
fill_mode='nearest'
)
training_set = train_datagen.flow_from_directory('train6',
target_size=(dimension, dimension),
color_mode='rgb', # default
class_mode='binary',
batch_size=batch_size,
save_to_dir=None,
interpolation='nearest',
subset='training')
validation_set = train_datagen.flow_from_directory('train6',
target_size=(dimension, dimension),
color_mode='rgb', # default
class_mode='binary',
batch_size=batch_size,
save_to_dir=None,
# if 'str', saves augmented images for visualisation
interpolation='nearest',
subset='validation')
start_time = perf_counter() # start counting
history = model.fit_generator(training_set,
epochs=epoch,
steps_per_epoch=training_set.samples // batch_size,
validation_data=validation_set,
validation_steps=validation_set.samples // batch_size,
verbose=2)
stop_time = perf_counter() # stop counting
# saving trained model & history file
model.save_weights('models/cat_dog_classifier_{0}_{1}.h5'.format(run, subrun)) # save model weights
hist_pd = pd.DataFrame(history.history) # making panda file of history.history
hist_csv_file = 'histories/history_{0}_{1}.csv'.format(run, subrun) # defining name for csv file
with open(hist_csv_file, mode='w') as f: # saving the pd file as csv
hist_pd.to_csv(f)
return stop_time - start_time
And I use the following code to get the probabilities:
from build_model import build_model
from keras.preprocessing import image
import numpy as np
run = 'A28'
subrun = 1
dimension = 200
# build model
model = build_model(dimension)
model.load_weights('models/cat_dog_classifier_{0}_{1}.h5'.format(run, subrun))
# Get test image ready
amount_of_images = 10
predictions = np.zeros((amount_of_images, 2))
labels = np.zeros(amount_of_images)
for i in range(amount_of_images):
image_name = 1 + i # choose what image to start from
test_image = image.load_img('test1/{}.jpg'.format(image_name), target_size=(dimension, dimension))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis=0)
label = model.predict_classes(test_image, batch_size=1)
labels[i] = label
prediction = model.predict(test_image, batch_size=1)
print(prediction)
print(labels)
When I train my model without using the rescaling or the normalisation, the predictions are probabilities as expected. However, when I use either, it returns only 0s and 1s (the same labels as predict_classes). I have tried to run the dummy code provided in the link above, and it worked as expected; which makes sense I suppose, as the script also runs fine when I haven't used rescaling. However, I would really like to be able to use rescaling. Does anyone have any idea what is going wrong?

Use hyperopt (or hyperas) with Keras and ImageDataGenerator "flow_from_directory"

I'm making a code to train a CNN (using Keras) with an hyperparameters search using Hyperas. To train the model, I'm using flow_from_directory function of ImageDataGenerator.
I read a lot of posts and documentation I found on internet but my code doesn't work. I don't understand why.
Hereunder my code :
'''
# Installation d'hyperas
!pip install hyperas
# Accès aux fichiers de gDrive
from google.colab import drive
drive.mount('/content/gdrive')
# Copie de la class data_gen.py sur la racine de gColab
!cp '/content/gdrive/My Drive/Deep-
learning/Projets/CNN_dogs&cats/CNN_cats&dogs_2600_hyperopt_gColab.ipynb' '/content/'
# Importation des librairies
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import matplotlib.pyplot as plt
import numpy as np
from keras.optimizers import SGD
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform
# Variables
img_width, img_height = 150, 150
nb_train_samples = 1855
nb_validation_samples = 745
nb_test_samples = 2750
epochs=20
batch_size=16
test_data_dir = '/content/gdrive/My Drive/Deep-learning/Projets/CNN_dogs&cats/data/PetImages/test'
def data():
train_data_dir = '/content/gdrive/My Drive/Deep-learning/Projets/CNN_dogs&cats/data/PetImages_2600/train'
validation_data_dir = '/content/gdrive/My Drive/Deep-
learning/Projets/CNN_dogs&cats/data/PetImages_2600/validation'
# Instanciation des générateurs d'images train
train_datagen = ImageDataGenerator(rescale=1. / 255)
validation_datagen = ImageDataGenerator(rescale=1. / 255)
# Instanciation des générateurs
train_generator = train_datagen.flow_from_directory(train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary')
validation_generator = validation_datagen.flow_from_directory(validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary')
return train_generator, validation_generator
def model(train_generator, validation_generator):
# Vérification du format des images
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
# Construction du modèle
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout({{choice([0.1, 0.2, 0.3, 0.4, 0.5])}}))
model.add(Dense(1))
model.add(Activation({{choice(['relu', 'sigmoid'])}}))
model.compile(loss='binary_crossentropy', optimizer=SGD(lr={{uniform([0, 1])}}), metrics=['accuracy'])
# Lancement de la phase d'apprentissage sur la base de train
model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=nb_validation_samples // batch_size)
score, acc = model.evaluate_generator(generator=validation_generator, steps=nb_validation_samples // batch_size)
return {'loss': -acc, 'status': STATUS_OK, 'model': model}
if __name__ == '__main__':
train_generator, validation_generator = data()
best_run, best_model = optim.minimize(model=model, data=data, algo=tpe.suggest, max_evals=10, notebook_name='CNN_cats&dogs_2600_hyperopt_gColab', trials=Trials())
print('Evaluation of best performing model:')
print(best_model.evaluate(validation_generator))
'''
On the line :
best_run, best_model = optim.minimize(model=model, data=data, algo=tpe.suggest, max_evals=10, notebook_name='CNN_cats&dogs_2600_hyperopt_gColab', trials=Trials())
I have this error message :
/usr/local/lib/python3.6/dist-packages/hyperas/optim.py in retrieve_data_string(data, verbose)
219 data_string = inspect.getsource(data)
220 first_line = data_string.split("\n")[0]
---> 221 indent_length = len(determine_indent(data_string))
222 data_string = data_string.replace(first_line, "")
223 r = re.compile(r'^\s*return.*'
TypeError: object of type 'NoneType' has no len()

Why does my model predict the same label?

I am training a small network and the training seems to go fine, the val loss decreases, I reach validation accuracy around 80, and it actually stops training once there is no more improvement (patience=10). It trained for 40 epochs. However, it keeps predicting only one class for every test image! I tried to initialize the conv layers randomly, I added regularizers, I switched from Adam to SGD, I added clipvalue, I added dropouts. I also switched to softmax (I have only two labels but I saw some recommendation on using softmax and Dense layer with 2 neurons). Some or one of these helped with the overfitting, but nothing worked for the prediction problem. The data is balanced, though it is a small dataset, so it doesn't make sense that it reaches 80% if it predicts the same labels for evaluation set as well.
What is wrong with my model and how can I fix it? Any comments are welcome.
#Import some packages to use
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.initializers import RandomNormal
os.environ["CUDA_VISIBLE_DEVICES"]="0"
epochs = 200
callbacks = []
#schedule = None
decay = 0.0
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#We will use a batch size of 32. Note: batch size should be a factor of 2.***4,8,16,32,64...***
#batch_size = 4
#from keras import layers
from keras import models
from keras import optimizers
#from keras.layers import Dropout
#from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
model = models.Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05), input_shape=(224, 224, 3)))
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv2',kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block2_pool'))
model.add(Dropout(0.2))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv3', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block3_pool'))
model.add(Dropout(0.2))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv1'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv2'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv3'))
#model.add(layers.MaxPooling2D((2, 2), name='block4_pool'))
model.add(Flatten())
model.add(Dense(256, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='relu', kernel_initializer='he_uniform'))
model.add(Dropout(0.5))
model.add(Dense(2, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='softmax'))
#Lets see our model
model.summary()
#We'll use the RMSprop optimizer with a learning rate of 0.0001
#We'll use binary_crossentropy loss because its a binary classification
#model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-5, momentum=0.9), metrics=['acc'])
model.compile(loss='categorical_crossentropy',
#optimizer=optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=decay),
optimizer=optimizers.SGD(lr= 0.0001, clipvalue = 0.5, decay=1e-6, momentum=0.9, nesterov=True),
metrics=['accuracy'])
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
#Save the model
model.save_weights('/home/d/Desktop/s/categorical_weights.h5')
model.save('/home/d/Desktop/s/categorical_model_keras.h5')
#lets plot the train and val curve
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.legend()
plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()
model.evaluate_generator(generator=val_generator, steps=val_generator.n // val_generator.batch_size)
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()
pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)
predicted_class_indices=np.argmax(pred,axis=1)
labels = (train_generator.class_indices)
np.save('/home/d/Desktop/s/classes', labels)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
"Predictions":predictions})
results.to_csv("categorical_results.csv",index=False)
One of the problems that could lead to such behavior is imbalanced dataset. Your model found out that if it predicts the dominant class each time, it would get a good results.
There are many ways to tackle an imbalance dataset. Here is a good tutorial.
One of the easiest yet powerful solution is to apply higher penalty to your loss if it wrongly predicted the smaller class. This can be implemented in keras by setting the parameter class_weight in the fitor fit_generator function.
It can be a dictionary of example:
class_weight = {0: 0.75, 1: 0.25} # does not necessarily add to up 1.
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
class_weight= class_weight, # this is the important part
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
Adding to Coderji's answer, it might also prove advantageous to counter class imbalance using stratified k-fold cross-validation, with k = 5 being common practice. This basically splits your data set up into k splits like regular cross-validation, but also stratifies these splits. In the case of class imbalance, each of these splits contain over-/undersampled classes compensating for their lower/higher occurence within the data set.
As of yet Keras does not have it's own way to use stratified k-fold cross-validation. Instead it's advised to use sklearn's StratifiedKFold. This article gives a detailed overview how to achieve this in Keras,
with the gist of it being:
from sklearn.model_selection import StratifiedKFold# Instantiate the cross validator
skf = StratifiedKFold(n_splits=kfold_splits, shuffle=True)# Loop through the indices the split() method returns
for index, (train_indices, val_indices) in enumerate(skf.split(X, y)):
print "Training on fold " + str(index+1) + "/10..." # Generate batches from indices
xtrain, xval = X[train_indices], X[val_indices]
ytrain, yval = y[train_indices], y[val_indices] # Clear model, and create it
model = None
model = create_model()
# Debug message I guess
# print "Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may be a while..."
history = train_model(model, xtrain, ytrain, xval, yval)
accuracy_history = history.history['acc']
val_accuracy_history = history.history['val_acc']
print "Last training accuracy: " + str(accuracy_history[-1]) + ", last validation accuracy: " + str(val_accuracy_history[-1])
create_model() returns a compiled Keras model
train_model() returns last history object of its last model.fit() operation

Keras fit_generator() not working due to shape error

I am running MNIST prediction using Keras, with tensorflow backend.
I have code that runs with batches , using Keras fit() as
(X_train, y_train), (X_test, y_test) = mnist.load_data()
N1 = X_train.shape[0]
N2 = X_test.shape[0]
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
# reshape N1 samples to num_pixels
x_train = X_train.reshape(N1, num_pixels).astype('float32') # shape is now (60000,784)
x_test = X_test.reshape(N2, num_pixels).astype('float32') # shape is now (10000,784)
x_train = x_train / 255
x_test = x_test / 255
y_train = np_utils.to_categorical(y_train) #(60000,10)
y_test = np_utils.to_categorical(y_test) # (10000,10):
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
max_batches = 2 * len(x_train) / batch_size # 2*60000/200
# reshape to be [samples][width][height][ channel] for ImageDataGenerator
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
datagen = ImageDataGenerator(rescale= 1./255)
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
for e in range(epochs):
batches = 0
for x_batch, y_batch in train_gen:
# x_batch is of size [batch_sz,w,h,ch]: resize to [bth_sz,pixel_sz]: (200,28,28,1)-> (200,784)
# for model.fit
x_batch = np.reshape(x_batch, [-1, num_pixels])
model.fit(x_batch, y_batch,validation_split=0.15,verbose=0)
batches += 1
print("Epoch %d/%d, Batch %d/%d" % (e+1, epochs, batches, max_batches))
if batches >= max_batches:
break
scores = model.evaluate(x_test, y_test, verbose=0)
However, when I try to implement similar code using fit_generator(), I get an error.
the code is as below:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# separate data into train and validation
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.15, shuffle= True)
# number of training samples
N1 = X_train.shape[0] # training size
N2 = X_test.shape[0] # test size
N3 = X_valid.shape[0] # valid size
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
y_train = np_utils.to_categorical(y_train)
y_valid = np_utils.to_categorical(y_valid)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
steps_per_epoch_tr = int(N1/ batch_size) # 51000/200
steps_per_epoch_val = int(N3/batch_size)
# reshape to be [samples][width][height][ channel] for ImageData Gnerator->datagen.flow
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
x_v = X_valid.reshape(N3, w, h, 1).astype('float32')
# define data preparation
datagen = ImageDataGenerator(rescale=1./255) # scales x_t/x_v
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
valid_gen = datagen.flow(x_v,y_valid, batch_size=batch_size)
model.fit_generator(train_gen,steps_per_epoch = steps_per_epoch_tr,validation_data = valid_gen,
validation_steps = steps_per_epoch_val,epochs=epochs)
This gives an error:
This is due to expected image dimension error, but I am not sure where/how to fix this. any help is greatly appreciated.
Thanks
sedy
In the model.fit() case, this line flattened the input before feeding it for training.
x_batch = np.reshape(x_batch, [-1, num_pixels])
But in the generator case, there is nothing to flatten the input before feeding it to the Dense layer. The Dense layer cannot process 2D input (28 x 28). Adding, a Flatten() layer to the model should do the trick as shown below.
def baseline_model():
# create model
model = Sequential()
model.add(Flatten(input_shape=(28,28,1)))
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model

Resources