OOM during prediction with batchSize that works during training - keras

I'm confused by a Keras behavior I'm seeing. I'm finetuning the resnet50 model.
from keras import applications
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from regressionGenerator import regressionGenerator
from keras.optimizers import SGD
from keras.callbacks import TensorBoard
from keras.regularizers import l2
batchSize = 32
numObservations = 948
iterationsPerEpoch = numObservations // batchSize
targetSize = (1024,1024) # Keep aspect ratio, approximately .12 of
original image size
preprocessFcn = applications.resnet50.preprocess_input
base_model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(targetSize[0],targetSize[1], 3),pooling='none')
x = base_model.output
x = Flatten()(x)
x = Dense(2048,activation='relu',kernel_regularizer=l2(0.0001))(x)
x = Dropout(0.5)(x)
predictions = Dense(5)(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=SGD(lr=0.00001, momentum=0.9), loss='mean_squared_error')
train_generator = regressionGenerator(batchSize,targetSize,preprocessFcn)
# Train for a few epochs to initialize top of network
model.fit_generator(
train_generator,
steps_per_epoch=iterationsPerEpoch,
epochs=15)
This model trains, and fits in memory. When I predict, also from a generator, I cannot train with the same batchSize of 32, or I receive OOM. I have to lower the batchSize to 8 to fit in memory on the same GPU.
import numpy as np
from keras.models import load_model
from predictionGenerator import predictionGenerator
from keras import applications
model = load_model('')
preprocessFcn = applications.resnet50.preprocess_input
batchSize = 8
targetSize = (1024,1024)
test_generator,idxList,numSteps = predictionGenerator(batchSize, targetSize, preprocessFcn)
Ypred = model.predict_generator(test_generator, numSteps)
Ypred = np.around(Ypred)
Ypred = np.clip(Ypred, 0, np.inf) # Clip negative counts
Ypred = Ypred[idxList, :] # Sort according to order of observations in generator
numObservations = len(idxList)
fileindices = np.reshape(np.arange(numObservations),(numObservations,1))
Ypred = np.hstack((fileindices,Ypred))
How is it possible that I can use a batchSize of 32 and train, but have to lower the batchSize by a factor of 4 during prediction?

Related

Model not training when using batch normalization with keras functional API

I'm going through some tutorials using the Keras functional API in Tensorflow 2, and I'm having some trouble including BatchNormalization layers when using the functional API.
Using roughly the same code:
This network trains with the sequential API and batch normalization
This network trains with the functional API, but commenting out the batch normalization layers
This network does not train using the functional API and batch normalization layers
Am I missing a step somewhere? Do I set training=true or training=false somewhere in the code?
Working Sequential Code:
#subclassed layers in keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
import numpy as np
import logging
tf.get_logger().setLevel(logging.ERROR)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import matplotlib.pyplot as plt
%matplotlib inline
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
test_labels) = cifar_dataset.load_data()
EPOCHS = 128
BATCH_SIZE = 128
#standardize dataset
mean = np.mean(train_images)
stdev = np.std(train_images)
train_images = (train_images - mean)/stdev
test_images = (test_images - mean)/stdev
#change labels to one-hot
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)
# Keras model subclassing: build your own layers
#CNN -> batch norm -> Relu
#create a class for this kind of block
class CNNBlock(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, kernel_size=3, strides=(1,1)): #needs both __init__ and call functions, initialize layer
super(CNNBlock, self).__init__() #superclass layers.Layer with our new class
self.conv = layers.Conv2D(out_channels, kernel_size, padding='same',
kernel_initializer='he_normal',bias_initializer='zeros')#initialize the conv portion of this class
self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor)#run convolution operation
x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
class CNNBlock_init(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, input_size, kernel_size=3): #needs both __init__ and call functions, initialize layer
super(CNNBlock_init, self).__init__() #superclass layers.Layer with our new class - make sure new class name matches
self.input_size = input_size
self.conv = layers.Conv2D(out_channels, kernel_size,
input_shape=input_size, #first layer needs input shape to build properly
padding='same')#initialize the conv portion of this class
self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor,input_shape=self.input_size)#run convolution operation
x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
#build model with this
model = keras.Sequential(
[
CNNBlock(64,kernel_size=4,strides=(2,2)),
Dropout(0.2),
CNNBlock(64,kernel_size=2,strides=(2,2)),
Dropout(0.2),
CNNBlock(32),
Dropout(0.2),
CNNBlock(32),
MaxPooling2D(pool_size=(2,2), strides=2),
Dropout(0.2),
Flatten(),
Dense(64, activation='relu',#dense layers to combine features
kernel_initializer='he_normal',
bias_initializer='zeros'),
Dropout(0.2),
Dense(10, activation='softmax',#softmax for classification
kernel_initializer='glorot_uniform',
bias_initializer='zeros')
])
#compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#model.build(input_shape=(32,32,3))
#model.summary()
#train model
history = model.fit(
train_images, train_labels,
validation_data=(test_images,test_labels),
epochs=EPOCHS, batch_size=BATCH_SIZE,
verbose=1, shuffle=True) #verbose 1 is cool gives time for each epoch
#evaluate model
import matplotlib.pyplot as plt
%matplotlib inline
def plot_error(history):
history_dict_vals = history.__dict__['history']
history_x = history.epoch
plt.plot(history_x,history_dict_vals['accuracy'],'r-', label='training accuracy')
plt.plot(history_x,history_dict_vals['val_accuracy'],'g-', label='test accuracy')
plt.axis([0,len(history_x),0.0,1])
plt.xlabel('training epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
print(f"Final test accuracy = {history_dict_vals['val_accuracy'][-1]}")
plot_error(history)
Working Functional Code:
# same convolutional structure but with the keras functional API
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
import numpy as np
import logging
tf.get_logger().setLevel(logging.ERROR)
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import matplotlib.pyplot as plt
%matplotlib inline
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
test_labels) = cifar_dataset.load_data()
EPOCHS = 128
BATCH_SIZE = 128
#standardize dataset
mean = np.mean(train_images)
stdev = np.std(train_images)
train_images = (train_images - mean)/stdev
test_images = (test_images - mean)/stdev
#change labels to one-hot
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)
# Keras model subclassing: build your own layers
#CNN -> batch norm -> Relu
#create a class for this kind of block
class CNNBlock(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, kernel_size=3, strides=(1,1)): #needs both __init__ and call functions, initialize layer
super(CNNBlock, self).__init__() #superclass layers.Layer with our new class
self.conv = layers.Conv2D(out_channels, kernel_size, padding='same',
kernel_initializer='he_normal',bias_initializer='zeros')#initialize the conv portion of this class
#self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor)#run convolution operation
#x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
class CNNBlock_init(layers.Layer):#inherits from layers.Layer - keeps track of what we need for back propagation
def __init__(self, out_channels, input_size, kernel_size=3): #needs both __init__ and call functions, initialize layer
super(CNNBlock_init, self).__init__() #superclass layers.Layer with our new class - make sure new class name matches
self.input_size = input_size
self.conv = layers.Conv2D(out_channels, kernel_size,
input_shape=input_size, #first layer needs input shape to build properly
padding='same')#initialize the conv portion of this class
#self.bn = layers.BatchNormalization()#initialize batch normalization in this block
def call(self, input_tensor, training=False): #what happens when this block is encountered, specify training bool for traning/evaluation
#call method (forward method in pytorch)
#take input tensor, run it though our initialized layers in __init__
x = self.conv(input_tensor,input_shape=self.input_size)#run convolution operation
#x = self.bn(x, training=training)#batch norm
x = tf.nn.relu(x)#activation function for this layer
return x
#build model with this
#Build the model with the Keras functional API
input_shape = (32,32,3)
chanDim = -1
#define model with first inputs
inputs = Input(shape=input_shape)
#functional API passing layers through
x = CNNBlock(64,kernel_size=4,strides=(2,2))(inputs)
x = Dropout(0.2)(x)
x = CNNBlock(64,kernel_size=2,strides=(2,2))(x)
x = Dropout(0.2)(x)
x = CNNBlock(64)(x)
x = MaxPooling2D(pool_size=(2,2), strides=2)(x)
x = Dropout(0.2)(x)
x = Flatten()(x)
x = Dense(64, activation='relu',#dense layers to combine features
kernel_initializer='he_normal',
bias_initializer='zeros')(x)
x = Dropout(0.2)(x)
y = Dense(10, activation='softmax',#softmax for classification
kernel_initializer='glorot_uniform',
bias_initializer='zeros')(x)
#initialize model with inputs and outputs
model = Model(inputs, y, name='convnet_func')
#compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
#train model
history = model.fit(
train_images, train_labels,
validation_data=(test_images,test_labels),
epochs=EPOCHS, batch_size=BATCH_SIZE,
verbose=1, shuffle=True) #verbose 1 is cool gives time for each epoch
#evaluate model
import matplotlib.pyplot as plt
%matplotlib inline
def plot_error(history):
history_dict_vals = history.__dict__['history']
history_x = history.epoch
plt.plot(history_x,history_dict_vals['accuracy'],'r-', label='training accuracy')
plt.plot(history_x,history_dict_vals['val_accuracy'],'g-', label='test accuracy')
plt.axis([0,len(history_x),0.0,1])
plt.xlabel('training epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()
print(f"Final test accuracy = {history_dict_vals['val_accuracy'][-1]}")
plot_error(history)
Unfortunately the model does not train when I remove the comments around the batch normalization layers.

Keras TimeseriesGenerator: error when checking input

When I try to use the TimeSeriesGenerator function, my Keras LSTM NN starts training for a few moments but then gives a ValueError message. What's wrong? I wonder how it can start training and then get an error.
My similar implementation without this function runs smoothly but then the quality of the predictions are awful (and I'm not sure that this function, once successfully implemented, would make a difference).
See the code below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
data = pd.read_excel('example.xlsx',usecols=['wave','wind','current','X','Y','RZ'])
data = data.apply(lambda x: (x - np.mean(x)) / np.std(x))
n_cutoff = 200
X = np.array(data.loc[n_cutoff:,['wave','wind']])
Y = np.array(data.loc[n_cutoff:,['RZ']])
X = X.reshape(len(X),2)
X = np.append(X, [[0]*np.size(X, axis=1)], axis=0)
Y = Y.reshape(len(Y),1)
Y = np.insert(Y, 0, 0)
n_lag = 3
n_batch = 15
n = int(0.75*len(X))
generator = TimeseriesGenerator(X, Y, length=n_lag, batch_size=n_batch)
inputs = Input(shape=(n_lag,2))
hidden1 = LSTM(units=100,
activation='softmax',
recurrent_activation='linear',
dropout=0.5,
recurrent_dropout=0.5,
return_sequences=True)(inputs)
hidden2 = LSTM(units=30,
activation='softmax',
recurrent_activation='linear',
dropout=0.5,
recurrent_dropout=0.5,
return_sequences=False)(hidden1)
outputs = Dense(units=1,
activation='linear')(hidden2)
model = Model(inputs=inputs, outputs=outputs)
optimizer = Nadam(learning_rate=1e-2, beta_1=0.95, beta_2=0.9, epsilon=1e-7)
model.compile(loss='mean_squared_error', optimizer=optimizer)
history = model.fit(generator,
verbose=1,
steps_per_epoch=int(n/n_batch),
epochs=1,
shuffle=False,
callbacks=[EarlyStopping(monitor='loss', min_delta=0, patience=20, verbose=1, mode='auto'),
ReduceLROnPlateau(monitor='loss', factor=0.5, patience=10, verbose=1, mode='auto', cooldown=1),
TerminateOnNaN()])
Y_hat = model.predict(X[n:])

Transferlearning ResNet Model does not learn

I trained ResNet-50 model to classify images from 6 classes (my own dataset) and saved it. But the model did not learn properly and predictions are incorrect. What would be the reason for this poor learning?
Here is my code, and the output plots using Keras and TensorFlow backend. How can I solve this?
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.layers import Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator, image
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGE = True
# Define some constant needed throughout the script
N_CLASSES = 6
EPOCHS = 20
PATIENCE = 5
TRAIN_PATH= '/Train/'
VALID_PATH = '/Test/'
MODEL_CHECK_WEIGHT_NAME = 'resnet_monki_v1_chk.h5'
# Define model to be used we freeze the pre trained resnet model weight, and add few layer on top of it to utilize our custom dataset
K.set_learning_phase(0)
model = ResNet50(input_shape=(224,224,3),include_top=False, weights='imagenet', pooling='avg')
K.set_learning_phase(1)
x = model.output
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(N_CLASSES, activation='softmax', name='custom_output')(x)
custom_resnet = Model(inputs=model.input, outputs = output)
for layer in model.layers:
layer.trainable = False
custom_resnet.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
custom_resnet.summary()
# 4. Load dataset to be used
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
traingen = datagen.flow_from_directory(TRAIN_PATH, target_size=(224,224), batch_size=32, class_mode='categorical')
validgen = datagen.flow_from_directory(VALID_PATH, target_size=(224,224), batch_size=32, class_mode='categorical', shuffle=False)
# 5. Train Model we use ModelCheckpoint to save the best model based on validation accuracy
es_callback = EarlyStopping(monitor='val_acc', patience=PATIENCE, mode='max')
mc_callback = ModelCheckpoint(filepath=MODEL_CHECK_WEIGHT_NAME, monitor='val_acc', save_best_only=True, mode='max')
train_history = custom_resnet.fit_generator(traingen, steps_per_epoch=len(traingen), epochs= EPOCHS, validation_data=traingen, validation_steps=len(validgen), verbose=2, callbacks=[es_callback, mc_callback])
custom_resnet.save('custom_resnet.h5')
Here are the plots, I had to put the links, the site does not let me put a pic
enter image description here

MNIST and transfer learning with VGG16 in Keras- low validation accuracy

I recently started taking advantage of Keras's flow_from_dataframe() feature for a project, and decided to test it with the MNIST dataset. I have a directory full of the MNIST samples in png format, and a dataframe with the absolute directory for each in one column and the label in the other.
I'm also using transfer learning, importing VGG16 as a base, and adding my own 512 node relu dense layer and 0.5 drop-out before a softmax layer of 10. (For digits 0-9). I'm using rmsprop (lr=1e-4) as the optimizer.
When I launch my environment, it calls the latest version of keras_preprocessing from Git, which has support for absolute directories and capitalized file extensions.
My problem is that I have a very high training accuracy, and a terribly low validation accuracy. By my final epoch (10), I had a training accuracy of 0.94 and a validation accuracy of 0.01.
I'm wondering if there's something fundamentally wrong with my script? With another dataset, I'm even getting NaNs for both my training and validation loss values after epoch 4. (I checked the relevant columns, there aren't any null values!)
Here's my code. I'd be deeply appreciative is someone could glance through it and see if anything jumped out at them.
import pandas as pd
import numpy as np
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras import applications
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.applications.vgg16 import VGG16, preprocess_input
# INITIALIZE MODEL
img_width, img_height = 32, 32
model = VGG16(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))
# freeze all layers
for layer in model.layers:
layer.trainable = False
# Adding custom Layers
x = model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(10, activation="softmax")(x)
# creating the final model
model_final = Model(input = model.input, output = predictions)
# compile the model
rms = optimizers.RMSprop(lr=1e-4)
#adadelta = optimizers.Adadelta(lr=0.001, rho=0.5, epsilon=None, decay=0.0)
model_final.compile(loss = "categorical_crossentropy", optimizer = rms, metrics=["accuracy"])
# LOAD AND DEFINE SOURCE DATA
train = pd.read_csv('MNIST_train.csv', index_col=0)
val = pd.read_csv('MNIST_test.csv', index_col=0)
nb_train_samples = 60000
nb_validation_samples = 10000
batch_size = 60
epochs = 10
# Initiate the train and test generators
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe=train,
directory=None,
x_col='train_samples',
y_col='train_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
validation_generator = test_datagen.flow_from_dataframe(dataframe=val,
directory=None,
x_col='test_samples',
y_col='test_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
# GET CLASS INDICES
print('****************')
for cls, idx in train_generator.class_indices.items():
print('Class #{} = {}'.format(idx, cls))
print('****************')
# DEFINE CALLBACKS
path = './chk/epoch_{epoch:02d}-valLoss_{val_loss:.2f}-valAcc_{val_acc:.2f}.hdf5'
chk = ModelCheckpoint(path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
logger = CSVLogger('./chk/training_log.csv', separator = ',', append=False)
nPlus = 1
samples_per_epoch = nb_train_samples * nPlus
# Train the model
model_final.fit_generator(train_generator,
steps_per_epoch = int(samples_per_epoch/batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = int(nb_validation_samples/batch_size),
callbacks = [chk, logger])
Have you tried explicitly defining the classes of the images? as such:
train_generator=image.ImageDataGenerator().flow_from_dataframe(classes=[0,1,2,3,4,5,6,7,8,9])
in both the train and validation generators.
I have found that sometimes the train and validation generators create different correspondence dictionaries.

Adding a layer stops learning Keras

Code
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential,Model
from keras.layers import LeakyReLU,Dropout, Flatten, Dense,Input
from keras import applications
from keras.preprocessing import image
from keras import backend as K
from keras import regularizers
from keras.optimizers import adam
K.set_image_dim_ordering('tf')
input_tensor = Input(shape=(150,150,3))
img_width, img_height = 150,150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'Cats and Dogs Dataset/train'
validation_data_dir = 'Cats and Dogs Dataset/validation'
nb_train_samples = 20000
nb_validation_samples = 5000
epochs = 50
batch_size = 128
base_model=applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None)
i=0;
for layer in base_model.layers:
layer.trainable = False
i+=1
base_model.output
top_model=Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(1024,activation="relu"))
top_model.add(Dropout(0.5))
top_model.add(Dense(10,activation="relu"))//Layer with issue
top_model.add(Dropout(0.8))//
top_model.add(Dense(2, activation='softmax'))
model = Model(inputs=base_model.input,outputs=top_model(base_model.output))
model.summary
datagen = ImageDataGenerator(rescale=1. / 255)
train_data = datagen.flow_from_directory(train_data_dir,target_size=(img_width, img_height),batch_size=batch_size,classes=[ 'cats','dogs'])#,class_mode="binary",shuffle=True)
validation_data = datagen.flow_from_directory(validation_data_dir,target_size=(img_width, img_height), batch_size=batch_size,classes=['cats','dogs'])#,class_mode="binary",shuffle=True)
adm=adam(lr=0.02)
model.compile(optimizer=adm,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_data, steps_per_epoch=nb_train_samples//batch_size, epochs=epochs,validation_data=validation_data, shuffle=True,verbose=1)
I have implemented a Image Classifier on the cats and dogs Dataset(https://www.kaggle.com/c/dogs-vs-cats/data) using keras(transfer learned using the inception netowrk). The code runs without errors but the accuracy is stuck at 50% for the validation set and the training set from the first epoch and the loss isnt decreasing. I am using Atom with hydrogen.
The issue goes away when I remove the marked layer , I cant seem to understand why this is happening.
What I have tried to fix this
different batch sizes - 4,16,64,256
change optimizer - tried adam ,rmsprop , sgd with modified learning rates
tried different activations for the layer - relu,sigmoid and leakyrelu
changed the dropout - the issue vanishes when dropout is 0.9(i.e. make the
layer useless, this works for obvious reason but also points out there is something that i am missing )
changed the final activation to sigmoid
Can someone please tell me what i am missing because i cant think of any reason why adding a layer stops learning
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential,Model
from keras.layers import LeakyReLU,Dropout, Flatten, Dense,Input
from keras import applications
from keras.preprocessing import image
from keras import backend as K
from keras import regularizers
from keras.optimizers import adam
K.set_image_dim_ordering('tf')
input_tensor = Input(shape=(150,150,3))
img_width, img_height = 150,150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'Cats and Dogs Dataset/train'
validation_data_dir = 'Cats and Dogs Dataset/validation'
nb_train_samples = 20000
nb_validation_samples = 5000
epochs = 50
batch_size = 64
base_model=applications.inception_v3.InceptionV3(include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None)
i=0;
for layer in base_model.layers:
layer.trainable = False
i+=1
base_model.output
top_model=Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(512,activation="relu")) //decrease in units
top_model.add(Dropout(0.4)) // change the drop out
top_model.add(Dense(128,activation="relu")) //increase in units
top_model.add(Dropout(0.2)) // decrease in dropout
top_model.add(Dense(2, activation='softmax'))
model = Model(inputs=base_model.input,outputs=top_model(base_model.output))
model.summary
datagen = ImageDataGenerator(rescale=1. / 255)
train_data = datagen.flow_from_directory(train_data_dir,target_size=(img_width, img_height),batch_size=batch_size,classes=[ 'cats','dogs'])#,class_mode="binary",shuffle=True)
validation_data = datagen.flow_from_directory(validation_data_dir,target_size=(img_width, img_height), batch_size=batch_size,classes=['cats','dogs'])#,class_mode="binary",shuffle=True)
adm=adam(lr=0.02)
model.compile(optimizer=adm,loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_data, steps_per_epoch=nb_train_samples//batch_size, epochs=epochs,validation_data=validation_data, shuffle=True,verbose=1)
I have reduce the number of units in first dense layer while increase the number of units in 2nd dense layer .. and also deceasing the drop out rate .. run this code and let me know. one more thing more complex the network is higher the chance of over-fitting .. increase in dropout value might result in no learning for that layer. try to keep you network simple .

Resources