MNIST and transfer learning with VGG16 in Keras- low validation accuracy - python-3.x

I recently started taking advantage of Keras's flow_from_dataframe() feature for a project, and decided to test it with the MNIST dataset. I have a directory full of the MNIST samples in png format, and a dataframe with the absolute directory for each in one column and the label in the other.
I'm also using transfer learning, importing VGG16 as a base, and adding my own 512 node relu dense layer and 0.5 drop-out before a softmax layer of 10. (For digits 0-9). I'm using rmsprop (lr=1e-4) as the optimizer.
When I launch my environment, it calls the latest version of keras_preprocessing from Git, which has support for absolute directories and capitalized file extensions.
My problem is that I have a very high training accuracy, and a terribly low validation accuracy. By my final epoch (10), I had a training accuracy of 0.94 and a validation accuracy of 0.01.
I'm wondering if there's something fundamentally wrong with my script? With another dataset, I'm even getting NaNs for both my training and validation loss values after epoch 4. (I checked the relevant columns, there aren't any null values!)
Here's my code. I'd be deeply appreciative is someone could glance through it and see if anything jumped out at them.
import pandas as pd
import numpy as np
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras import applications
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.applications.vgg16 import VGG16, preprocess_input
# INITIALIZE MODEL
img_width, img_height = 32, 32
model = VGG16(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))
# freeze all layers
for layer in model.layers:
layer.trainable = False
# Adding custom Layers
x = model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(10, activation="softmax")(x)
# creating the final model
model_final = Model(input = model.input, output = predictions)
# compile the model
rms = optimizers.RMSprop(lr=1e-4)
#adadelta = optimizers.Adadelta(lr=0.001, rho=0.5, epsilon=None, decay=0.0)
model_final.compile(loss = "categorical_crossentropy", optimizer = rms, metrics=["accuracy"])
# LOAD AND DEFINE SOURCE DATA
train = pd.read_csv('MNIST_train.csv', index_col=0)
val = pd.read_csv('MNIST_test.csv', index_col=0)
nb_train_samples = 60000
nb_validation_samples = 10000
batch_size = 60
epochs = 10
# Initiate the train and test generators
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe=train,
directory=None,
x_col='train_samples',
y_col='train_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
validation_generator = test_datagen.flow_from_dataframe(dataframe=val,
directory=None,
x_col='test_samples',
y_col='test_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
# GET CLASS INDICES
print('****************')
for cls, idx in train_generator.class_indices.items():
print('Class #{} = {}'.format(idx, cls))
print('****************')
# DEFINE CALLBACKS
path = './chk/epoch_{epoch:02d}-valLoss_{val_loss:.2f}-valAcc_{val_acc:.2f}.hdf5'
chk = ModelCheckpoint(path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
logger = CSVLogger('./chk/training_log.csv', separator = ',', append=False)
nPlus = 1
samples_per_epoch = nb_train_samples * nPlus
# Train the model
model_final.fit_generator(train_generator,
steps_per_epoch = int(samples_per_epoch/batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = int(nb_validation_samples/batch_size),
callbacks = [chk, logger])

Have you tried explicitly defining the classes of the images? as such:
train_generator=image.ImageDataGenerator().flow_from_dataframe(classes=[0,1,2,3,4,5,6,7,8,9])
in both the train and validation generators.
I have found that sometimes the train and validation generators create different correspondence dictionaries.

Related

How do I know what the output of model.predict() correspond to?

I am trying to make a CNN that classifies cats and dogs and I am using flow_from_directory() to prepare my data for the model.
from keras import Sequential
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import *
from keras.callbacks import ModelCheckpoint
from keras.optimizers import *
import keras
import numpy as np
import os
img_size = 250 # number of pixels for width and height
#Random Seed
np.random.seed(123456789)
training_path = os.getcwd() + "/cats and dogs images/train"
testing_path = os.getcwd() + "/cats and dogs images/test"
#Defines the Model
model = Sequential([
Conv2D(filters=128, kernel_size=(3,3), activation="relu", padding="same", input_shape=(img_size,img_size,3)),
MaxPool2D(pool_size=(2,2), strides=2),
Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same"),
Flatten(),
Dense(32, activation="relu"),
Dense(2, activation="softmax")
])
#Scales the pixel values to between 0 to 1
datagen = ImageDataGenerator(rescale=1.0/255.0)
Batch_size = 10
#Prepares Training Data
training_dataset = datagen.flow_from_directory(directory = training_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Prepares Testing Data
testing_dataset = datagen.flow_from_directory(directory = testing_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Compiles the model
#model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=['accuracy'])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])
#model.compile(loss="mse", optimizer="sgd", metrics=[keras.metrics.MeanSquaredError()])
#Checkpoint
filepath = os.getcwd() + "/trained_model.h5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min', save_freq=1)
#Fitting the model to the dataset (Training the Model)
model.fit(x = training_dataset, steps_per_epoch = 400,
validation_data=testing_dataset, validation_steps=100,
epochs = 10, callbacks=[checkpoint], verbose = 1)
# evaluate model on training dataset
_,acc = model.evaluate_generator(training_dataset, steps=len(training_dataset), verbose=0)
print("Accuracy on training dataset:")
print('> %.3f' % (acc * 100.0))
#evaluate model on testing dataset
_,acc = model.evaluate_generator(testing_dataset, steps=len(testing_dataset), verbose=0)
print("Accuracy on testing dataset:")
print('> %.3f' % (acc * 100.0))
I want to know how the output of model.predict() is going to correspond to the labels cats and dogs and which one of the two numbers in the output is a cat and which is a dog?
Here's my code for loading the model and giving a prediction:
from keras.models import Sequential
from keras_preprocessing.image import *
from keras.layers import *
import tensorflow as tf
import numpy as np
from keras.layers.experimental.preprocessing import Rescaling
import os
import cv2
from keras.models import *
img_size = 250
#Load weights into new model
filepath = os.getcwd() + "/trained_model.h5"
model = load_model(filepath)
print("Loaded model from disk")
#Scales the pixel values to between 0 to 1
#datagen = ImageDataGenerator(rescale=1.0/255.0)
#Prepares Testing Data
testing_dataset = cv2.imread(os.getcwd() + "/cats and dogs images/single test sample/507.png")
#img = datagen.flow_from_directory(testing_dataset, target_size=(img_size,img_size))
img = cv2.resize(testing_dataset, (img_size,img_size))
newimg = np.asarray(img)
pixels = newimg.astype('float32')
pixels /= 255.0
print(pixels.shape)
pixels = np.expand_dims(pixels, axis=0)
print(pixels.shape)
prediction = model.predict(pixels)
print(prediction)
And here is the output from the prediction code above:
Loaded model from disk
(250, 250, 3)
(1, 250, 250, 3)
[[5.4904184e-27 1.0000000e+00]]
As you can see, the prediction gave an array of two numbers, but which one corresponds to the dog label and which to the cat label? By the way, the model isn't fully trained so I am just testing out the code to see if it works.
The model output depends on how you loaded the data and specified how the classes are going to be ordered/labelled in this code you provided:
training_dataset = datagen.flow_from_directory(directory = training_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Prepares Testing Data
testing_dataset = datagen.flow_from_directory(directory = testing_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
You specified during the loading of the data that the classes are going to be ordered Cat then Dog in classes argument.
Therefor the output is going to be ordered as two probabilities (summing to 1)
The first probability refers to by how % that the input image is cat and the second probability refers to by how % that the input image is dog.
You use this line:
output_class = np.argmax(prediction, axis=1)
This line will compare the elements of the list and outputs which index of the elements of the list is the greatest (In our case the list containing the two probabilities) in the form of [1] (or [0, 1] depending on the shape of the output) this means that the said image is a dog, since the 2nd element in the output list is 1 if it were [0] (or [1, 0] depending on the shape of the output) then that means that the output class of the input image is cat.

Calculate gradient of validation error w.r.t inputs using Keras/Tensorflow or autograd

I need to calculate the gradient of the validation error w.r.t inputs x. I'm trying to see how much the validation error changes when I perturb one of the training samples.
The validation error (E) explicitly depends on the model weights (W).
The model weights explicitly depend on the inputs (x and y).
Therefore, the validation error implicitly depends on the inputs.
I'm trying to calculate the gradient of E w.r.t x directly.
An alternative approach would be to calculate the gradient of E w.r.t W (can easily be calculated) and the gradient of W w.r.t x (cannot do at the moment), which would allow the gradient of E w.r.t x to be calculated.
I have attached a toy example. Thanks in advance!
import numpy as np
import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from autograd import grad
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()
# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5
# Flatten the images.
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))
# Build the model.
model = Sequential([
Dense(64, activation='relu', input_shape=(784,)),
Dense(64, activation='relu'),
Dense(10, activation='softmax'),
])
# Compile the model.
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'],
)
# Train the model.
model.fit(
train_images,
to_categorical(train_labels),
epochs=5,
batch_size=32,
)
model.save_weights('model.h5')
# Load the model's saved weights.
# model.load_weights('model.h5')
calculate_mse = tf.keras.losses.MeanSquaredError()
test_x = test_images[:5]
test_y = to_categorical(test_labels)[:5]
train_x = train_images[:1]
train_y = to_categorical(train_labels)[:1]
train_y = tf.convert_to_tensor(train_y, np.float32)
train_x = tf.convert_to_tensor(train_x, np.float64)
with tf.GradientTape() as tape:
tape.watch(train_x)
model.fit(train_x, train_y, epochs=1, verbose=0)
valid_y_hat = model(test_x, training=False)
mse = calculate_mse(test_y, valid_y_hat)
de_dx = tape.gradient(mse, train_x)
print(de_dx)
# approach 2 - does not run
def calculate_validation_mse(x):
model.fit(x, train_y, epochs=1, verbose=0)
valid_y_hat = model(test_x, training=False)
mse = calculate_mse(test_y, valid_y_hat)
return mse
train_x = train_images[:1]
train_y = to_categorical(train_labels)[:1]
validation_gradient = grad(calculate_validation_mse)
de_dx = validation_gradient(train_x)
print(de_dx)
Here's how you can do this. Derivation is as below.
Few things to note,
I have reduced the feature size from 784 to 256 as I was running out of memory in colab (line marked in the code) . Might have to do some mem profiling to find out why
Only computed grads for the first layer. Easily extendable to other layers
Disclaimer: this derivation is correct to best of my knowledge. Please do some research and verify that it is the case. You will run into memory issues for larger inputs and layer sizes.
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
f = 256
model = Sequential([
Dense(64, activation='relu', input_shape=(f,)),
Dense(64, activation='relu'),
Dense(10, activation='softmax'),
])
# Compile the model.
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'],
)
w = model.weights[0]
# Inputs and labels
x_tr = tf.Variable(np.random.normal(size=(1,f)), shape=(1, f), dtype='float32')
y_tr = np.random.choice([0,1,2,3,4,5,6,7,8,9], size=(1,1))
y_tr_onehot = tf.keras.utils.to_categorical(y_tr, num_classes=10).astype('float32')
x_v = tf.Variable(np.random.normal(size=(1,f)), shape=(1, f), dtype='float32')
y_v = np.random.choice([0,1,2,3,4,5,6,7,8,9], size=(1,1))
y_v_onehot = tf.keras.utils.to_categorical(y_v, num_classes=10).astype('float32')
# In the context of GradientTape
with tf.GradientTape() as tape1:
with tf.GradientTape() as tape2:
y_tr_pred = model(x_tr)
tr_loss = tf.keras.losses.MeanSquaredError()(y_tr_onehot, y_tr_pred)
tmp_g = tape2.gradient(tr_loss, w)
print(tmp_g.shape)
# d(dE_tr/d(theta))/dx
# Warning this step consumes lot of memory for large layers
lr = 0.001
grads_1 = -lr * tape1.jacobian(tmp_g, x_tr)
with tf.GradientTape() as tape3:
y_v_pred = model(x_v)
v_loss = tf.keras.losses.MeanSquaredError()(y_v_onehot, y_v_pred)
# dE_val/d(theta)
grads_2 = tape3.gradient(v_loss, w)[tf.newaxis, :]
# Just crunching the dimension to get the final desired shape of (1,256)
grad = tf.matmul(tf.reshape(grads_2,[1, -1]), tf.reshape(tf.transpose(grads_1,[2,1,0,3]),[1, -1, 256]))

why is different between model.evaluate() and computed loss by myself based on model.predict()?

I am running neural network by keras. There is my code:
import numpy as np
from keras import Model
from keras.models import Sequential
from keras.layers import Dense
from keras import backend as K
def mean_squared_error(y_true, y_pred):
return K.mean(K.square(y_pred - y_true),axis=-1)
np.random.seed(1)
Train_X = np.random.randint(low=0,high=100,size = (50,5))
Train_Y = np.matmul(Train_X,np.arange(10).reshape(5,2))+np.random.randint(low=0,high=10,size=(50,2))
Test_X = np.random.randint(low=0,high=100,size = (10,5))
Test_Y = np.matmul(Test_X,np.arange(10).reshape(5,2))+np.random.randint(low=0,high=10,size=(10,2))
model = Sequential()
model.add(Dense(4,activation = 'relu'))
model.add(Dense(2,activation='relu'))
model.add(Dense(2,activation='relu'))
model.add(Dense(2))
model.compile(loss=mean_squared_error, optimizer='adam', metrics=['mae'])
history = model.fit(Train_X, Train_Y, epochs=100, batch_size=5,validation_data = (Test_X, Test_Y))
loss1 = model.evaluate(Test_X,Test_Y)
loss2 = history.history['val_loss'][99]
y_pred = model.predict(Test_X)
y_true = Test_Y
loss3 = np.mean(np.square(y_pred-y_true))
I find that loss1 is the same as loss2 but is different with loss3. So i feel so confused. Could someone tell me why?
This is possibly due to different dtypes for Test_Y and y_pred. Keras tries to automatically take care of dtype mismatches for you, so it is possible that Test_Y is a float64 and y_pred is a float32. If that is indeed the case, try converting one of their dtypes for the loss3 calculation and see if the values match.
y_pred = model.predict(Test_X)
y_true = Test_Y.astype(np.float32)
loss3 = np.mean(np.square(y_pred-y_true))

How to extract features from an image for training a CNN model

I am working on a project to classify waste as plastics and non plastics using only.images to train them.However i still dont know what features does the model take into account while classifyimg them.I am using CNN,however the accuracy of prediction is still not up to the mark.
The reason why i went to CNN because there is no specific feature to distinguish plastics from others.Is there any other way to approach this problem?
For eg If i train the images of cats,my Neural Network learns what is a cat however i do not explicitly give features is the same case valid here too?
Suppose you want to extract the Features from the Pre-Trained Convolutional Neural Network, VGGNet, VGG16.
Code to reuse the Convolutional Base is:
from keras.applications import VGG16
conv_base = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 150, 3)) # This is the Size of your Image
The final feature map has shape (4, 4, 512). That’s the feature on top of which you’ll stick a densely connected classifier.
There are 2 ways to extract Features:
FAST FEATURE EXTRACTION WITHOUT DATA AUGMENTATION: Running the convolutional base over your dataset, recording its output to a
Numpy array on disk, and then using this data as input to a standalone, densely
connected classifier similar to those you saw in part 1 of this book. This solution is fast and cheap to run, because it only requires running the convolutional base once for every input image, and the convolutional base is by far the most expensive part of the pipeline. But for the same reason, this technique won’t allow you to use data augmentation.
Code for extracting Features using this method is shown below:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
base_dir = '/Users/fchollet/Downloads/cats_and_dogs_small'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20
def extract_features(directory, sample_count):
features = np.zeros(shape=(sample_count, 4, 4, 512))
labels = np.zeros(shape=(sample_count))
generator = datagen.flow_from_directory(directory, target_size=(150, 150),
batch_size=batch_size, class_mode='binary')
i=0
for inputs_batch, labels_batch in generator:
features_batch = conv_base.predict(inputs_batch)
features[i * batch_size : (i + 1) * batch_size] = features_batch
labels[i * batch_size : (i + 1) * batch_size] = labels_batch
i += 1
if i * batch_size >= sample_count:
break
return features, labels
train_features, train_labels = extract_features(train_dir, 2000)
validation_features, validation_labels = extract_features(validation_dir,1000)
test_features, test_labels = extract_features(test_dir, 1000)
train_features = np.reshape(train_features, (2000, 4*4* 512))
validation_features = np.reshape(validation_features, (1000, 4*4* 512))
test_features = np.reshape(test_features, (1000, 4*4* 512))
from keras import models
from keras import layers
from keras import optimizers
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer=optimizers.RMSprop(lr=2e-5),
loss='binary_crossentropy', metrics=['acc'])
history = model.fit(train_features, train_labels, epochs=30,
batch_size=20, validation_data=(validation_features, validation_labels))
Training is very fast, because you only have to deal with two Dense
layers—an epoch takes less than one second even on CPU
FEATURE EXTRACTION WITH DATA AUGMENTATION: Extending the model you have (conv_base) by adding Dense layers on top, and running the whole thing end to end on the input data. This will allow you to use data augmentation, because every input image goes through the convolutional base every time it’s seen by the model. But for the same reason, this technique is far more expensive than the first
Code for the same is shown below:
from keras import models
from keras import layers
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
train_datagen = ImageDataGenerator(rescale=1./255,rotation_range=40,
width_shift_range=0.2,height_shift_range=0.2,shear_range=0.2,
zoom_range=0.2,horizontal_flip=True,fill_mode='nearest')
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(train_dir,target_size=(150, 150), batch_size=20, class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary')
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=2e-5),
metrics=['acc'])
history = model.fit_generator(train_generator, steps_per_epoch=100, epochs=30, validation_data=validation_generator, validation_steps=50)
For more details, please refer Section 5.3.1 of the book, "Deep Learning with Python", authored by Father of Keras, "Francois Chollet"

OOM during prediction with batchSize that works during training

I'm confused by a Keras behavior I'm seeing. I'm finetuning the resnet50 model.
from keras import applications
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from regressionGenerator import regressionGenerator
from keras.optimizers import SGD
from keras.callbacks import TensorBoard
from keras.regularizers import l2
batchSize = 32
numObservations = 948
iterationsPerEpoch = numObservations // batchSize
targetSize = (1024,1024) # Keep aspect ratio, approximately .12 of
original image size
preprocessFcn = applications.resnet50.preprocess_input
base_model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(targetSize[0],targetSize[1], 3),pooling='none')
x = base_model.output
x = Flatten()(x)
x = Dense(2048,activation='relu',kernel_regularizer=l2(0.0001))(x)
x = Dropout(0.5)(x)
predictions = Dense(5)(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=SGD(lr=0.00001, momentum=0.9), loss='mean_squared_error')
train_generator = regressionGenerator(batchSize,targetSize,preprocessFcn)
# Train for a few epochs to initialize top of network
model.fit_generator(
train_generator,
steps_per_epoch=iterationsPerEpoch,
epochs=15)
This model trains, and fits in memory. When I predict, also from a generator, I cannot train with the same batchSize of 32, or I receive OOM. I have to lower the batchSize to 8 to fit in memory on the same GPU.
import numpy as np
from keras.models import load_model
from predictionGenerator import predictionGenerator
from keras import applications
model = load_model('')
preprocessFcn = applications.resnet50.preprocess_input
batchSize = 8
targetSize = (1024,1024)
test_generator,idxList,numSteps = predictionGenerator(batchSize, targetSize, preprocessFcn)
Ypred = model.predict_generator(test_generator, numSteps)
Ypred = np.around(Ypred)
Ypred = np.clip(Ypred, 0, np.inf) # Clip negative counts
Ypred = Ypred[idxList, :] # Sort according to order of observations in generator
numObservations = len(idxList)
fileindices = np.reshape(np.arange(numObservations),(numObservations,1))
Ypred = np.hstack((fileindices,Ypred))
How is it possible that I can use a batchSize of 32 and train, but have to lower the batchSize by a factor of 4 during prediction?

Resources