CIFAR10 example : Keras - python-3.x

I am a total beginner and trying to implement image classifier using CIFAR 10 data set using Keras, i used the following code here, i learnt how it works and
I tried this small snippet of code for learning to implement CIFAR 10 but its not working, its not giving any error but the process doesn't start at all. I don't know what am i missing here.
'''
#Train a simple deep CNN on the CIFAR10 small images dataset.
It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs.
(it's still underfitting at that point, though).
'''
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os
batch_size = 32
num_classes = 10
epochs = 100
data_augmentation = True
num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_cifar10_trained_model.h5'
# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
if not data_augmentation:
print('Not using data augmentation.')
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(x_test, y_test),
shuffle=True)
else:
print('Using real-time data augmentation.')
# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
zca_epsilon=1e-06, # epsilon for ZCA whitening
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
# randomly shift images horizontally (fraction of total width)
width_shift_range=0.1,
# randomly shift images vertically (fraction of total height)
height_shift_range=0.1,
shear_range=0., # set range for random shear
zoom_range=0., # set range for random zoom
channel_shift_range=0., # set range for random channel shifts
# set mode for filling points outside the input boundaries
fill_mode='nearest',
cval=0., # value used for fill_mode = "constant"
horizontal_flip=True, # randomly flip images
vertical_flip=False, # randomly flip images
# set rescaling factor (applied before any other transformation)
rescale=None,
# set function that will be applied on each input
preprocessing_function=None,
# image data format, either "channels_first" or "channels_last"
data_format=None,
# fraction of images reserved for validation (strictly between 0 and 1)
validation_split=0.0)
# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(x_train)
# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(x_train, y_train,
batch_size=batch_size),
epochs=epochs,
validation_data=(x_test, y_test),
workers=4)
# Save model and weights
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
Response on Python Ide 3.6.8 :
Python 3.6.8 (tags/v3.6.8:3c6b436a57, Dec 24 2018, 00:16:47) [MSC v.1916 64
bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license()" for more information.
>>>
======================== RESTART: D:\TrainTheModel.py ========================
Using TensorFlow backend.
x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
Using real-time data augmentation.
Epoch 1/100
=============================== RESTART: Shell ===============================
>>>

I fixed your errors. If you using TensorFlow as backend, better use Keras from TensorFlow libraries.
# Train a simple deep CNN on the CIFAR10 small images dataset.
# It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs.
# (it's still underfitting at that point, though).
from __future__ import print_function
from tensorflow.keras.utils import to_categorical
from tensorflow.python.keras import optimizers
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
import os
batch_size = 32
num_classes = 10
epochs = 100
data_augmentation = True
num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_cifar10_trained_model.h5'
# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices.
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
# initiate RMSprop optimizer
opt = optimizers.rmsprop(lr=0.0001, decay=1e-6)
# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
if not data_augmentation:
print('Not using data augmentation.')
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(x_test, y_test),
shuffle=True)
else:
print('Using real-time data augmentation.')
# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
featurewise_center=False, # set input mean to 0 over the dataset
samplewise_center=False, # set each sample mean to 0
featurewise_std_normalization=False, # divide inputs by std of the dataset
samplewise_std_normalization=False, # divide each input by its std
zca_whitening=False, # apply ZCA whitening
zca_epsilon=1e-06, # epsilon for ZCA whitening
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
# randomly shift images horizontally (fraction of total width)
width_shift_range=0.1,
# randomly shift images vertically (fraction of total height)
height_shift_range=0.1,
shear_range=0., # set range for random shear
zoom_range=0., # set range for random zoom
channel_shift_range=0., # set range for random channel shifts
# set mode for filling points outside the input boundaries
fill_mode='nearest',
cval=0., # value used for fill_mode = "constant"
horizontal_flip=True, # randomly flip images
vertical_flip=False, # randomly flip images
# set rescaling factor (applied before any other transformation)
rescale=None,
# set function that will be applied on each input
preprocessing_function=None,
# image data format, either "channels_first" or "channels_last"
data_format=None,
# fraction of images reserved for validation (strictly between 0 and 1)
validation_split=0.0)
# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(x_train)
# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(x_train, y_train,
batch_size=batch_size),
epochs=epochs,
validation_data=(x_test, y_test),
workers=4)
# Save model and weights
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
Response (For test I use only 2 epochs):
x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples
Using real-time data augmentation.
Epoch 1/2
2019-02-09 16:26:13.219359: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties:
name: GeForce GTX 750 major: 5 minor: 0 memoryClockRate(GHz): 1.137
pciBusID: 0000:04:00.0
totalMemory: 1.95GiB freeMemory: 1.32GiB
2019-02-09 16:26:13.219405: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0
2019-02-09 16:26:13.550797: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
2019-02-09 16:26:13.550848: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988] 0
2019-02-09 16:26:13.550865: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0: N
2019-02-09 16:26:13.551055: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1059 MB memory) -> physical GPU (device: 0, name: GeForce GTX 750, pci bus id: 0000:04:00.0, compute capability: 5.0)
1563/1563 [==============================] - 44s 28ms/step - loss: 1.8483 - acc: 0.3220 - val_loss: 1.5551 - val_acc: 0.4414
Epoch 2/2
1563/1563 [==============================] - 42s 27ms/step - loss: 1.5651 - acc: 0.4263 - val_loss: 1.3814 - val_acc: 0.5065
Saved trained model at /home/mid/Documents/saved_models/keras_cifar10_trained_model.h5
10000/10000 [==============================] - 2s 189us/step
Test loss: 1.3814242065429687
Test accuracy: 0.5065

Related

How to save checkpoints as filenames with every epoch and then load the weights from the latest saved one in Tensorflow 2?

When I run the following code, I am getting folders created named cp_1, cp_2 while I want to save checkpoint files with every epoch.
Then I want to use the latest saved checkpoint file to load the weights for my model instance with model.load_weights(tf.train.latest_checkpoint('model_checkpoints_5000'))
how can I do it please?
import os
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
# Use the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
# Use smaller subset -- speeds things up
x_train = x_train[:10000]
y_train = y_train[:10000]
x_test = x_test[:1000]
y_test = y_test[:1000]
# define a function that creates a new instance of a simple CNN.
def create_model():
model = Sequential([
Conv2D(filters=16, input_shape=(32, 32, 3), kernel_size=(3, 3),
activation='relu', name='conv_1'),
Conv2D(filters=8, kernel_size=(3, 3), activation='relu', name='conv_2'),
MaxPooling2D(pool_size=(4, 4), name='pool_1'),
Flatten(name='flatten'),
Dense(units=32, activation='relu', name='dense_1'),
Dense(units=10, activation='softmax', name='dense_2')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
checkpoint_5000_path = './model_checkpoints_5000/cp_{epoch:02d}'
checkpoint_5000 = ModelCheckpoint(filepath = checkpoint_5000_path,
save_weights= True,
save_freq = 'epoch',
verbose = 1)
model = create_model()
model.fit(x = x_train,
y = y_train,
epochs = 3,
validation_data = (x_test, y_test),
batch_size = 10,
callbacks = [checkpoint_5000])
My output is the following.
Epoch 00001: saving model to ./model_checkpoints_5000\cp_01
INFO:tensorflow:Assets written to: ./model_checkpoints_5000\cp_01\assets
Epoch 2/3
1000/1000 [==============================] - 3s 3ms/step - loss: 1.4493 - accuracy: 0.4744 - val_loss: 1.4664 - val_accuracy: 0.4770
I have tried adding .h5 to
'./model_checkpoints_5000/cp_{epoch:02d}.h5'.
however, then if I try
tf.train.latest_checkpoint('model_checkpoints_5000'), I get None?
while I should be getting the file name cp_03.h5?
You need to use below code after training the model:
checkpoint_dir = os.path.dirname(checkpoint_5000_path)
os.listdir(checkpoint_dir)
Output:
['cp_01',
'cp_00.h5',
'cp_03',
'cp_00.data-00000-of-00001',
'cp_00.index',
'cp_03.h5',
'cp_02',
'cp_01.h5',
'cp_02.h5',
'checkpoint']
Please check this link for more details.

keras cnn training accuacy and loss do not change and val loss and accuracy remains 0

i am working on face expression classification problem, i downloaded some images from internet and some i clicked by myself of expression happy (100 pics), sad(100 pics), neutral(50 pics) and distracted(50) pics, as the data is very less i am using data augmentation and training a CNN from the example in keras documentation, also as i am working on kaggle notebook i am not saving the augemted data i am converting to array and appending with the original data.
you can try the code yourself at avidavi
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers.core import Activation, Dropout, Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.models import Model
from keras.optimizers import SGD
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.utils import np_utils
#loading from csv
train = pd.read_csv(r'/kaggle/input/traindata/train.csv')
#each row is of 9217 features in which first 9216(96*96) are image converted to array
#and last column is label
#labels = {1:'happy', 2:'sad', 3:'neutral', 4:'distracted'}
#below i am defining data generator
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.0,
zoom_range=0.1,
horizontal_flip=True,
vertical_flip=False,
fill_mode='nearest')
#below converting array for suitable shapes for keras, as these are gray scale images so 1 channel
train_array = train.values
print(train_array.shape)
y_train_array= train_array[:,-1]
X_train_array = train_array[:,:-1].reshape(train_array.shape[0],96,96,1)
# del train_array
print(X_train_array.shape, y_train_array.shape)
#below creating 20 more images for each image and creating a new array
X_test_arr = []
y_test_arr = []
for i in range(X_train_array.shape[0]):
j=0
for batch in datagen.flow(x=X_train_array[i].reshape(1,96,96,1), y=y_train_array[i].reshape(1,), batch_size=1):
j+=1
X_test_arr.append(batch[0].reshape(96,96))
y_test_arr.append(batch[1])
if j == 20:
break
#even though i am creating 20 or 50 augmented images or normalizing them or converting to int
# output accuracy does not change
#creating arrays from above list
y_aug_arr = np.array(y_test_arr)
X_aug_arr = np.array(X_test_arr)
#appending augmented data to original data
X_train_array = np.r_[X_train_array, X_aug_arr]
y_train_array = np.r_[y_train_array, y_aug_arr]
#reshaping for keras input
X_train_array = X_train_array.reshape(X_train_array.shape[0], X_train_array.shape[1], X_train_array.shape[2], 1)
#creating model, i have used adam, rmsprop accuracy does not change while using SGD accuracy drops to almost 0
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(96, 96, 1)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
#fitting model
batch_size = 32
epochs = 2
history = model.fit(X_train_array, y_train_array.reshape(-1), batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1)
#output is same no matter what i do
Epoch 1/2
200/200 [==============================] - 34s 169ms/step - loss: 0.0000e+00 - accuracy: 0.3957 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/2
200/200 [==============================] - 34s 168ms/step - loss: 0.0000e+00 - accuracy: 0.3957 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00

Why does my model predict the same label?

I am training a small network and the training seems to go fine, the val loss decreases, I reach validation accuracy around 80, and it actually stops training once there is no more improvement (patience=10). It trained for 40 epochs. However, it keeps predicting only one class for every test image! I tried to initialize the conv layers randomly, I added regularizers, I switched from Adam to SGD, I added clipvalue, I added dropouts. I also switched to softmax (I have only two labels but I saw some recommendation on using softmax and Dense layer with 2 neurons). Some or one of these helped with the overfitting, but nothing worked for the prediction problem. The data is balanced, though it is a small dataset, so it doesn't make sense that it reaches 80% if it predicts the same labels for evaluation set as well.
What is wrong with my model and how can I fix it? Any comments are welcome.
#Import some packages to use
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.initializers import RandomNormal
os.environ["CUDA_VISIBLE_DEVICES"]="0"
epochs = 200
callbacks = []
#schedule = None
decay = 0.0
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='categorical', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#We will use a batch size of 32. Note: batch size should be a factor of 2.***4,8,16,32,64...***
#batch_size = 4
#from keras import layers
from keras import models
from keras import optimizers
#from keras.layers import Dropout
#from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
model = models.Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05), input_shape=(224, 224, 3)))
model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv2',kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block2_pool'))
model.add(Dropout(0.2))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv1', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv2', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv3', kernel_initializer=RandomNormal(
mean=0.0, stddev=0.05), bias_initializer=RandomNormal(mean=0.0, stddev=0.05)))
model.add(MaxPooling2D((2, 2), name='block3_pool'))
model.add(Dropout(0.2))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv1'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv2'))
#model.add(layers.Conv2D(512, (3, 3), activation='relu', name='block4_conv3'))
#model.add(layers.MaxPooling2D((2, 2), name='block4_pool'))
model.add(Flatten())
model.add(Dense(256, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='relu', kernel_initializer='he_uniform'))
model.add(Dropout(0.5))
model.add(Dense(2, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='softmax'))
#Lets see our model
model.summary()
#We'll use the RMSprop optimizer with a learning rate of 0.0001
#We'll use binary_crossentropy loss because its a binary classification
#model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-5, momentum=0.9), metrics=['acc'])
model.compile(loss='categorical_crossentropy',
#optimizer=optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=decay),
optimizer=optimizers.SGD(lr= 0.0001, clipvalue = 0.5, decay=1e-6, momentum=0.9, nesterov=True),
metrics=['accuracy'])
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
#Save the model
model.save_weights('/home/d/Desktop/s/categorical_weights.h5')
model.save('/home/d/Desktop/s/categorical_model_keras.h5')
#lets plot the train and val curve
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.legend()
plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()
model.evaluate_generator(generator=val_generator, steps=val_generator.n // val_generator.batch_size)
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()
pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)
predicted_class_indices=np.argmax(pred,axis=1)
labels = (train_generator.class_indices)
np.save('/home/d/Desktop/s/classes', labels)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
"Predictions":predictions})
results.to_csv("categorical_results.csv",index=False)
One of the problems that could lead to such behavior is imbalanced dataset. Your model found out that if it predicts the dominant class each time, it would get a good results.
There are many ways to tackle an imbalance dataset. Here is a good tutorial.
One of the easiest yet powerful solution is to apply higher penalty to your loss if it wrongly predicted the smaller class. This can be implemented in keras by setting the parameter class_weight in the fitor fit_generator function.
It can be a dictionary of example:
class_weight = {0: 0.75, 1: 0.25} # does not necessarily add to up 1.
history = model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
class_weight= class_weight, # this is the important part
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save]) #, reduce_lr_loss])
Adding to Coderji's answer, it might also prove advantageous to counter class imbalance using stratified k-fold cross-validation, with k = 5 being common practice. This basically splits your data set up into k splits like regular cross-validation, but also stratifies these splits. In the case of class imbalance, each of these splits contain over-/undersampled classes compensating for their lower/higher occurence within the data set.
As of yet Keras does not have it's own way to use stratified k-fold cross-validation. Instead it's advised to use sklearn's StratifiedKFold. This article gives a detailed overview how to achieve this in Keras,
with the gist of it being:
from sklearn.model_selection import StratifiedKFold# Instantiate the cross validator
skf = StratifiedKFold(n_splits=kfold_splits, shuffle=True)# Loop through the indices the split() method returns
for index, (train_indices, val_indices) in enumerate(skf.split(X, y)):
print "Training on fold " + str(index+1) + "/10..." # Generate batches from indices
xtrain, xval = X[train_indices], X[val_indices]
ytrain, yval = y[train_indices], y[val_indices] # Clear model, and create it
model = None
model = create_model()
# Debug message I guess
# print "Training new iteration on " + str(xtrain.shape[0]) + " training samples, " + str(xval.shape[0]) + " validation samples, this may be a while..."
history = train_model(model, xtrain, ytrain, xval, yval)
accuracy_history = history.history['acc']
val_accuracy_history = history.history['val_acc']
print "Last training accuracy: " + str(accuracy_history[-1]) + ", last validation accuracy: " + str(val_accuracy_history[-1])
create_model() returns a compiled Keras model
train_model() returns last history object of its last model.fit() operation

Keras VGG16 pretrained model accuracy does not increase

Hi guys i have a problem on VGG16 on Keras.
I am trying to make accuracy higher but did not work.
I only have 46 data training, 12 classes, and 26 data validation.
Currently, the highest accuracy that I can get is 0.18.
I try to change the batch size into 2 but the result is worst than i expected.
I don't think I should set the data training sample should be higher than my actual data.
What should I do to increase the accuracy?
This is my actual code:
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.layers import Input, Flatten, Dense, Dropout
from keras.models import Model, Sequential
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
# dimensions of our images.
from keras.preprocessing.image import ImageDataGenerator
img_width, img_height = 224, 224
train_data_dir = 'database/train'
validation_data_dir = 'database/validation'
nb_train_samples = 46
nb_validation_samples = 26
epochs = 50
batch_size = 4
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
#Get back the convolutional part of a VGG network trained on ImageNet
vgg_conv = VGG16(weights='imagenet', include_top=True)
vgg_conv.summary()
print('VGG Pretrained Model loaded.')
#Add a layer where input is the output of the second last layer
x = Dense(12, activation='softmax', name='predictions')(vgg_conv.layers[-2].output)
model = Model(input=vgg_conv.input, output=x)
#In the summary, weights and layers from VGG part will be hidden, but they will be fit during the training
model.summary()
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
rescale=1. / 224,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 224)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
# compile model
# model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizers.RMSprop(lr=2e-4), metrics=['accuracy'])
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# Train the model
history = model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples / batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples / batch_size)
# Save the model
model.save('vgg16_pretrained_5.h5')
# Check Performance
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
Since you have 12 classes and only 46 observations, it roughly becomes 2 observations per class (This is just a guess without even looking at the data set). With this little data, the NN model can not even understand the pattern of the data and eventually will fail to generalize. So you need at least for than 2k observations for better results.

Getting wrong prediction for cnn (Dogs Vs Cat ) Keras

I have programmed convolutional neural network in keras to predict whether the image is of a cat or a dog.
I got an accuracy around 80%.
I tried checking the prediction of my code for many images as it always gave result of the one class out of the classes (dogs and cat) to be predicted.
I will walk you through my code:
The first part of my code is Data Preparation:
#Labels
Y = np.concatenate([np.array([[1,0]]*len(cats)),
np.array([[0,1]]*len(dogs))])
print(Y)
#Features
import scipy.misc
from keras.models import model_from_json
X=[]
for name in cats:
converted_image = scipy.misc.imresize(scipy.misc.imread(name), (64, 64))
X.append(converted_image)
for name in dogs:
converted_image = scipy.misc.imresize(scipy.misc.imread(name), (64, 64))
X.append(converted_image)
X= np.array(X)
print(len(X))
print(X.shape)
Now I have divided the data into training and testing
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.10,random_state=10)
X_train=X_train.astype("float32")
X_test = X_test.astype("float32")
print("Shape Of the Training Features are :",X_train.shape)
print("Shape Of Testing Features are :",X_test.shape)
print("Number of Training Samples : ",X_train.shape[0])
print("Number of Testing Samples :",X_test.shape[0])
Now, I will show my complete CNN architecture below:
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation,Lambda,Flatten
from keras.layers import Conv2D,MaxPooling2D
def layer(input_shape = (64,64,3)):
model=Sequential()
model.add(Lambda(lambda x: x/127.5 - 1.,input_shape=input_shape, output_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu', name='conv1', input_shape=input_shape, padding="valid"))
model.add(Conv2D(64,(3,3),activation='relu',name='conv2',padding='valid'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(128,(3,3),activation='relu',name='conv3',padding='valid'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
#model.add(Dense(25088,activation='relu'))
#model.add(Dense(12000,activation='relu'))
#model.add(Dense(6000,activation='relu'))
model.add(Dense(500,activation='relu'))
model.add(Dense(2,activation='softmax'))
model.summary()
return model
model = layer()
model.compile(loss='mse',optimizer='adadelta',metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=128, epochs=5, verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
My accuracy and score from the output is as follows:
Test score: 0.1459069953918457
Test accuracy: 0.7876
-Saving and loading The Model Code
#saving
model_json = model.to_json()
open('cd.json','w').write(model_json)
#Save Weights
model.save_weights('cd_weights.h5',overwrite=True)
#loading
model_architecture = 'cd.json'
model_weights = 'cd_weights.h5'
model = model_from_json(open(model_architecture).read())
model.load_weights(model_weights)
Now prediction part
img_names = ['d1.jpg','c1.jpg']
imgs = [np.transpose(scipy.misc.imresize(scipy.misc.imread(img_name), (64, 64)),
(1, 0, 2)).astype('float32')
for img_name in img_names]
imgs = np.array(imgs) / 255
# train
optim = SGD()
model.compile(loss='categorical_crossentropy', optimizer=optim,
metrics=['accuracy'])
predictions = model.predict_classes(imgs)
print(predictions)
Output : [0 0]
The images are taken from training set c1-cat, d1-dog for every image it outputs zero as predicted.
I think there is some problem with model.predict_classes() function.
You can use the following code to perform the same thing as model.predict_classes() can do.
This will give us the prediction in terms of probability.
predictions = model.predict(imgs)
ex:
1) [0.7865448 0.21910465] says there is 78% chance of the picture being the first class (cat) and 21% of chance being the second class(dog)
2) [0.7856076 0.22000787] says there is 78% chance of the picture being the first class (cat) and 21% of chance being the second class(dog)
In order to convert this probability to classes we use np.argmax function.
print(np.argmax(predictions ))
Your output will be 0 indication first class(cat)
or 1 indication second class(dog)

Resources