I was doing some classification with keras, when met this error:
InvalidArgumentError: Dimensions must be equal, but are 256 and 8 for 'dense_185/MatMul' (op: 'MatMul') with input shapes: [?,256], [8,300].
It surprised me because the dimension of the input to the dense is 1.
This is a sequential model with a few custom layers. I have no idea why 8 appears in the error of dense layer.
class Residual(Layer):
def __init__(self,input_shape,**kwargs):
super(Residual, self).__init__(**kwargs)
self.input_shapes = input_shape
def call(self, x):
print(np.shape(x)) # (?, 128, 8)
first_layer = Conv1D(256, 4, activation='relu', input_shape = self.input_shapes)(x)
print(np.shape(first_layer)) (?, 125, 256)
x = Conv1D(256, 4, activation='relu')(first_layer)
print(np.shape(x)) (?, 122, 256)
x = Conv1D(256, 4, activation='relu')(x)
print(np.shape(x)) (?, 119, 256)
x = ZeroPadding1D(padding=3)(x)
residual = Add()([x, first_layer])
x = Activation("relu")(residual)
return x
class Pooling(Layer):
def __init__(self,**kwargs):
super(Pooling, self).__init__(**kwargs)
def call(self, x):
first_layer = GlobalMaxPooling1D(data_format='channels_last')(x)
second_layer = GlobalAveragePooling1D(data_format='channels_last')(x)
pooling = Add()([first_layer, second_layer])
print(np.shape(pooling)) (?, 256)
return pooling
model = Sequential()
model.add(Residual(input_shape=(128,8)))
model.add(Pooling())
model.add(Dense(300, activation='relu'))
model.add(Dense(150, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
model.fit(np.array(dataset_data), dataset_target, epochs=1000, validation_split=0.1, verbose=1, batch_size=8)
Dimensions:
(1000, 128, 8) - input (1000 audio, 8 features, 128 seq_length)
(1000, 10) - target (1000 audio, 10 classes)
I think there are two edits required:
Add InputLayer as entrance for the data
Define compute_output_shape method at least for Pooling layer (link). If this method is not defined, Dense layer can't figure out what's input shape for it, I guess, and then fails.
Also there's minor editing - since model have InputLayer, you need no more input_shape kwarg in Residual layer.
class Residual(Layer):
def __init__(self, **kwargs): # remove input shape
super(Residual, self).__init__(**kwargs)
def call(self, x):
print(np.shape(x))
first_layer = Conv1D(256, 4, activation='relu')(x)
print(np.shape(first_layer))
x = Conv1D(256, 4, activation='relu')(first_layer)
print(np.shape(x))
x = Conv1D(256, 4, activation='relu')(x)
print(np.shape(x))
x = ZeroPadding1D(padding=3)(x)
residual = Add()([x, first_layer])
x = Activation("relu")(residual)
return x
class Pooling(Layer):
def __init__(self, **kwargs):
super(Pooling, self).__init__(**kwargs)
def call(self, x):
# !!! I build model without data_format argument - my version of keras
# doesn't support it !!!
first_layer = GlobalMaxPooling1D(data_format='channels_last')(x)
second_layer = GlobalAveragePooling1D(data_format='channels_last')(x)
pooling = Add()([first_layer, second_layer])
print(np.shape(pooling))
self.output_dim = int(np.shape(pooling)[-1]) # save output shape
return pooling
def compute_output_shape(self, input_shape):
# compute output shape here
return (input_shape[0], self.output_dim)
Initialize model:
model = Sequential()
model.add(InputLayer((128,8)))
model.add(Residual())
model.add(Pooling())
model.add(Dense(300, activation='relu'))
model.add(Dense(150, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
Out:
(?, 128, 8)
(?, 125, 256)
(?, 122, 256)
(?, 119, 256)
(?, 256)
Summary of the model (don't know why Residual and Pooling don't show params the have. I guess some additional method required for this classes to count internal params):
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
residual_10 (Residual) (None, 128, 8) 0
_________________________________________________________________
pooling_8 (Pooling) (None, 256) 0
_________________________________________________________________
dense_15 (Dense) (None, 300) 77100
_________________________________________________________________
dense_16 (Dense) (None, 150) 45150
_________________________________________________________________
dense_17 (Dense) (None, 10) 1510
=================================================================
Total params: 123,760
Trainable params: 123,760
Non-trainable params: 0
_________________________________________________________________
Create fake data and check training process:
dataset_data = np.random.randn(1000, 128, 8)
dataset_target = np.zeros((1000, 10))
dataset_target[:, 0] = 1
model.fit(np.array(dataset_data), dataset_target, epochs=1000,
validation_split=0.1, verbose=1, batch_size=8)
Train on 900 samples, validate on 100 samples
Epoch 1/1000
900/900 [==============================] - 2s 2ms/step - loss: 0.0235 - acc: 0.9911 - val_loss: 9.4426e-05 - val_acc: 1.0000
Epoch 2/1000
900/900 [==============================] - 1s 1ms/step - loss: 4.2552e-05 - acc: 1.0000 - val_loss: 1.7458e-05 - val_acc: 1.0000
Epoch 3/1000
900/900 [==============================] - 1s 1ms/step - loss: 1.1342e-05 - acc: 1.0000 - val_loss: 7.3141e-06 - val_acc: 1.0000
... and so on
Looks like it works.
Related
I am trying to design a model for binary image classification, this is my first classifier and I am following an online tutorial but the model always predicts class 0
My dataset contains 3620 and 3651 images of each class respectively, I don't suppose the problem is due to an imbalanced dataset as the model is predicting only the class with lower number of sample in the dataset.
My code
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
img_hieght, img_width = 150,150
train_data_dir = 'dataset/train'
#validation_data_dir = 'dataset/validation'
nb_train_samples = 3000
#nb_validation_samples = 500
epochs = 10
batch_size = 16
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_hieght)
else:
input_shape = (img_width, img_hieght, 3)
model = Sequential()
model.add(Conv2D(32,(3,3), input_shape = input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32,(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'rmsprop', metrics = ['accuracy'])
train_datagen = ImageDataGenerator(
rescale = 1. /255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size = (img_width,img_hieght),
batch_size = batch_size,
class_mode = 'binary')
model.fit_generator(train_generator,
steps_per_epoch = nb_train_samples//batch_size,
epochs = epochs)
model.save('classifier.h5')
I have tried checking the model summary as well, but couldn't detect anything notable
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_1 (Conv2D) (None, 148, 148, 32) 896
_________________________________________________________________
activation_1 (Activation) (None, 148, 148, 32) 0
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 74, 32) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 72, 72, 32) 9248
_________________________________________________________________
activation_2 (Activation) (None, 72, 72, 32) 0
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 36, 32) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 34, 34, 64) 18496
_________________________________________________________________
activation_3 (Activation) (None, 34, 34, 64) 0
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 17, 17, 64) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 18496) 0
_________________________________________________________________
dense_1 (Dense) (None, 64) 1183808
_________________________________________________________________
activation_4 (Activation) (None, 64) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 64) 0
_________________________________________________________________
dense_2 (Dense) (None, 1) 65
_________________________________________________________________
activation_5 (Activation) (None, 1) 0
=================================================================
Total params: 1,212,513
Trainable params: 1,212,513
Non-trainable params: 0
_________________________________________________________________
None
I have not used validation dataset, I am using only training data and testing the model manually using:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
batch_size = 16
path = 'dataset/test'
imgen = ImageDataGenerator(rescale=1/255.)
testGene = imgen.flow_from_directory(directory=path,
target_size=(150, 150,),
shuffle=False,
class_mode='binary',
batch_size=batch_size,
save_to_dir=None
)
model = tf.keras.models.load_model("classifier.h5")
pred = model.predict_generator(testGene, steps=testGene.n/batch_size)
print(pred)
Here are the accuracy and loss values per epochs:
Epoch 1/10
187/187 [==============================] - 62s 330ms/step - loss: 0.5881 - accuracy: 0.7182
Epoch 2/10
187/187 [==============================] - 99s 529ms/step - loss: 0.4102 - accuracy: 0.8249
Epoch 3/10
187/187 [==============================] - 137s 733ms/step - loss: 0.3266 - accuracy: 0.8646
Epoch 4/10
187/187 [==============================] - 159s 851ms/step - loss: 0.3139 - accuracy: 0.8620
Epoch 5/10
187/187 [==============================] - 112s 597ms/step - loss: 0.2871 - accuracy: 0.8873
Epoch 6/10
187/187 [==============================] - 60s 323ms/step - loss: 0.2799 - accuracy: 0.8847
Epoch 7/10
187/187 [==============================] - 66s 352ms/step - loss: 0.2696 - accuracy: 0.8870
Epoch 8/10
187/187 [==============================] - 57s 303ms/step - loss: 0.2440 - accuracy: 0.8947
Epoch 9/10
187/187 [==============================] - 56s 299ms/step - loss: 0.2478 - accuracy: 0.8994
Epoch 10/10
187/187 [==============================] - 53s 285ms/step - loss: 0.2448 - accuracy: 0.9047
You use only 3000 samples per epoch (see line nb_train_samples = 3000), while having 3620 and 3651 images for the each class. Given that model gets 90% accuracy and predicts only zeros, I suppose that you pass only class-zero images to the network during training. Consider increasing nb_train_samples.
I am writing to build a model to predict handwritten characters using the dataset given here (https://www.kaggle.com/sachinpatel21/az-handwritten-alphabets-in-csv-format)
EDIT: ( after making the changes suggested in the comments )
Error I get now : ValueError: Error when checking input: expected conv2d_4_input to have shape (28, 28, 1) but got array with shape (249542, 784, 1)
Find below the code for the CNN :
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras import backend as K
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
seed = 785
np.random.seed(seed)
dataset = np.loadtxt('../input/A_Z Handwritten Data/A_Z Handwritten Data.csv', delimiter=',')
print(dataset.shape) # (372451, 785)
X = dataset[:,1:785]
Y = dataset[:,0]
(X_train, X_test, Y_train, Y_test) = train_test_split(X, Y, test_size=0.33, random_state=seed)
X_train = X_train / 255
X_test = X_test / 255
X_train = X_train.reshape((-1, X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((-1, X_test.shape[0], X_test.shape[1], 1))
print(X_train.shape) # (1, 249542, 784, 1)
Y_train = np_utils.to_categorical(Y_train)
Y_test = np_utils.to_categorical(Y_test)
print(Y_test.shape) # (122909, 26)
num_classes = Y_test.shape[1] # 26
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print("DONE")
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=10, batch_size=256, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test,Y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))
model.save('weights.model')
So the problem is that your data isn't structured properly. Look at the solution below:
Read the data with pandas:
data = pd.read_csv('/users/vpolimenov/Downloads/A_Z Handwritten Data.csv')
data.shape
# shape: (372450, 785)
Get your X and y:
data.rename(columns={'0':'label'}, inplace=True)
X = data.drop('label',axis = 1)
y = data['label']
Split and scale:
X_train, X_test, y_train, y_test = train_test_split(X,y)
standard_scaler = MinMaxScaler()
standard_scaler.fit(X_train)
X_train = standard_scaler.transform(X_train)
X_test = standard_scaler.transform(X_test)
Here is the magic:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
X_train.shape
# (279337, 28, 28, 1)
Here is your model:
num_classes = y_test.shape[1] # 26
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print("DONE")
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=256, verbose=2) # WHERE I GET THE ERROR
Summary of your model:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_25 (Conv2D) (None, 24, 24, 32) 832
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 12, 12, 32) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 12, 12, 32) 0
_________________________________________________________________
flatten_25 (Flatten) (None, 4608) 0
_________________________________________________________________
dense_42 (Dense) (None, 128) 589952
_________________________________________________________________
dense_43 (Dense) (None, 26) 3354
=================================================================
Total params: 594,138
Trainable params: 594,138
Non-trainable params: 0
I've stopped it after the second epoch, but you can see it working:
Train on 279337 samples, validate on 93113 samples
Epoch 1/10
- 80s - loss: 0.2478 - acc: 0.9308 - val_loss: 0.1021 - val_acc: 0.9720
Epoch 2/10
- 273s - loss: 0.0890 - acc: 0.9751 - val_loss: 0.0716 - val_acc: 0.9803
Epoch 3/10
Note:
It takes so long to fit due to the huge number of parameters in your network. You can try to reduce those and get a much faster/efficient network.
How do you know when you've successfully frozen a layer in Keras? Below is a snippet of my model where I am trying to freeze the entire DenseNet121 layer; however, I'm unsure if that is actually occurring since the outputs to the console don't indicate what's happening.
I've tried two methods (1) densenet.trainable = False and (2) model.layers[0].trainable = False.
Furthermore, if I load the model again and add model.layers[0].trainable = True, will this unfreeze the layer?
densenet = DenseNet121(
weights='/{}'.format(WEIGHTS_FILE_NAME),
include_top=False,
input_shape=(IMG_SIZE, IMG_SIZE, 3)
)
model = Sequential()
model.add(densenet)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(NUM_CLASSES, activation='sigmoid'))
model.summary()
# This is how I freeze my layers, I decided to do it twice because I wasn't sure if it was working
densenet.trainable = False
model.layers[0].trainable = False
history = model.fit_generator(
datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
steps_per_epoch=len(x_train) / BATCH_SIZE,
epochs=NUM_EPOCHS,
validation_data=(x_test, y_test),
callbacks=callbacks_list,
max_queue_size=2
)
Below is the output of model.summary(), which I would expect to indicate if a layer has been successfully frozen or not.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
densenet121 (Model) (None, 8, 8, 1024) 7037504
_________________________________________________________________
global_average_pooling2d_3 ( (None, 1024) 0
_________________________________________________________________
dropout_2 (Dropout) (None, 1024) 0
_________________________________________________________________
dense_2 (Dense) (None, 5) 5125
=================================================================
Total params: 7,042,629
Trainable params: 5,125
Non-trainable params: 7,037,504
_________________________________________________________________
Epoch 1/100
354/353 [==============================] - 203s 573ms/step - loss: 0.4374 - acc: 0.8098 - val_loss: 0.3785 - val_acc: 0.8290
val_kappa: 0.0440
Epoch 2/100
354/353 [==============================] - 199s 561ms/step - loss: 0.3738 - acc: 0.8457 - val_loss: 0.3575 - val_acc: 0.8310
val_kappa: 0.0463
Epoch 3/100
however, I'm unsure if that is actually occurring since the outputs to
the console don't indicate what's happening.
It does, as can be seen from the number of trainable parameters. As expected, only the parameters(5125) of the last Dense layer are trainable.
Total params: 7,042,629
Trainable params: 5,125
Non-trainable params: 7,037,504
You can find whether a layer is frozen by looking at it's config:
>>> model.get_layer("dense_2").get_config()
{'name': 'dense_2',
'trainable': True,
...
If trainable is True, it is unfrozen.
I have 10000 images 5000 diseased Medical images and 5000 healthy images,
I used vgg16 and modified last layers as follows
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 224, 224, 3) 0
_________________________________________________________________
block1_conv1 (Conv2D) (None, 224, 224, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 224, 224, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 112, 112, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 112, 112, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 112, 112, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 56, 56, 128) 0
_________________________________________________________________
block3_conv1 (Conv2D) (None, 56, 56, 256) 295168
_________________________________________________________________
block3_conv2 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_conv3 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_pool (MaxPooling2D) (None, 28, 28, 256) 0
_________________________________________________________________
block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160
_________________________________________________________________
block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_pool (MaxPooling2D) (None, 14, 14, 512) 0
_________________________________________________________________
block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_pool (MaxPooling2D) (None, 7, 7, 512) 0
_________________________________________________________________
flatten (Flatten) (None, 25088) 0
_________________________________________________________________
fc1 (Dense) (None, 256) 6422784
_________________________________________________________________
fc2 (Dense) (None, 128) 32896
_________________________________________________________________
output (Dense) (None, 2) 258
=================================================================
Total params: 21,170,626
Trainable params: 6,455,938
Non-trainable params: 14,714,688
My code is as follows
import numpy as np
import os
import time
from vgg16 import VGG16
from keras.preprocessing import image
from imagenet_utils import preprocess_input, decode_predictions
from keras.layers import Dense, Activation, Flatten
from keras.layers import merge, Input
from keras.models import Model
from keras.utils import np_utils
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
# Loading the training data
PATH = '/mount'
# Define data path
data_path = PATH
data_dir_list = os.listdir(data_path)
img_data_list=[]
y=0;
for dataset in data_dir_list:
img_list=os.listdir(data_path+'/'+ dataset)
print ('Loaded the images of dataset-'+'{}\n'.format(dataset))
for img in img_list:
img_path = data_path + '/'+ dataset + '/'+ img
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
x = x/255
y=y+1
print('Input image shape:', x.shape)
print(y)
img_data_list.append(x)
from keras.optimizers import SGD
sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
img_data = np.array(img_data_list)
#img_data = img_data.astype('float32')
print (img_data.shape)
img_data=np.rollaxis(img_data,1,0)
print (img_data.shape)
img_data=img_data[0]
print (img_data.shape)
# Define the number of classes
num_classes = 2
num_of_samples = img_data.shape[0]
labels = np.ones((num_of_samples,),dtype='int64')
labels[0:5001]=0
labels[5001:]=1
names = ['YES','NO']
# convert class labels to on-hot encoding
Y = np_utils.to_categorical(labels, num_classes)
#Shuffle the dataset
x,y = shuffle(img_data,Y, random_state=2)
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)
image_input = Input(shape=(224, 224, 3))
model = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
model.summary()
last_layer = model.get_layer('block5_pool').output
x= Flatten(name='flatten')(last_layer)
x = Dense(256, activation='relu', name='fc1')(x)
x = Dense(128, activation='relu', name='fc2')(x)
out = Dense(num_classes, activation='softmax', name='output')(x)
custom_vgg_model2 = Model(image_input, out)
custom_vgg_model2.summary()
# freeze all the layers except the dense layers
for layer in custom_vgg_model2.layers[:-3]:
layer.trainable = False
custom_vgg_model2.summary()
custom_vgg_model2.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
t=time.time()
# t = now()
hist = custom_vgg_model2.fit(X_train, y_train, batch_size=128, epochs=50, verbose=1, validation_data=(X_test, y_test))
print('Training time: %s' % (t - time.time()))
(loss, accuracy) = custom_vgg_model2.evaluate(X_test, y_test, batch_size=10, verbose=1)
print("[INFO] loss={:.4f}, accuracy: {:.4f}%".format(loss,accuracy * 100))
model.save("vgg_10000.h5")
The result i'm posting first 5 and last 5 epochs
Epoch 1/50
8000/8000 [==============================] - 154s - loss: 0.6960 - acc: 0.5354 - val_loss: 0.6777 - val_acc: 0.5745
Epoch 2/50
8000/8000 [==============================] - 134s - loss: 0.6684 - acc: 0.5899 - val_loss: 0.6866 - val_acc: 0.5490
Epoch 3/50
8000/8000 [==============================] - 134s - loss: 0.6608 - acc: 0.6040 - val_loss: 0.6625 - val_acc: 0.5925
Epoch 4/50
8000/8000 [==============================] - 134s - loss: 0.6518 - acc: 0.6115 - val_loss: 0.6668 - val_acc: 0.5810
Epoch 5/50
8000/8000 [==============================] - 134s - loss: 0.6440 - acc: 0.6280 - val_loss: 0.6990 - val_acc: 0.5580
last 5
Epoch 25/50
8000/8000 [==============================] - 134s - loss: 0.5944 - acc: 0.6720 - val_loss: 0.6271 - val_acc: 0.6485
Epoch 26/50
8000/8000 [==============================] - 134s - loss: 0.5989 - acc: 0.6699 - val_loss: 0.6483 - val_acc: 0.6135
Epoch 27/50
8000/8000 [==============================] - 134s - loss: 0.5950 - acc: 0.6789 - val_loss: 0.7130 - val_acc: 0.5785
Epoch 28/50
8000/8000 [==============================] - 134s - loss: 0.5853 - acc: 0.6838 - val_loss: 0.6263 - val_acc: 0.6395
The results are not that great I tweked around using 128 and 128 nodes in last two layers using adam optimizer etc still results are not that convincing. Any help is much appriciated.
You can try the following:
Perform a stratified train_test_split
train_test_split(x, y, stratify=y, test_size=0.2, random_state=2)
Look into your data, and see if there are outliers in the images.
Use adam optimizer: from keras.optimizers import Adam instead of SGD
Try other seeds wherever applicable, i.e instead of random_state=2, use something else:
X_train, X_test, y_train, y_test = train_test_split(
x, y, test_size=0.2, random_state=382938)
Try with include_top=False:
model = VGG16(input_tensor=image_input, include_top=False,weights='imagenet')
Use (train, validation, test) sets or (cross-validation, holdout) sets to get a more reliable performance metric.
Since upgrading to Keras 2 I'm seeing nan loss when trying to fine tune ResNet50. Loss and accuracy look ok if I use a single convolutional layer (commented out below) instead of resnet. Am I missing something that changed with Keras 2?
from keras.applications.resnet50 import ResNet50
from keras.layers import Flatten, Dense, Input, Conv2D, Activation, Flatten
from keras.layers.pooling import MaxPooling2D
from keras.models import Model
from keras.optimizers import SGD
import numpy as np
inp = Input(batch_shape=(32, 224, 224, 3), name='input_image')
### resnet
modelres = ResNet50(weights="imagenet", include_top=False, input_tensor=inp)
x = modelres.output
x = Flatten()(x)
### single convolutional layer
#x = Conv2D(32, (3,3))(inp)
#x = Activation('relu')(x)
#x = MaxPooling2D(pool_size=(3,3))(x)
#x = Flatten()(x)
#x = Dense(units=32)(x)
predictions = Dense(units=2, kernel_initializer="he_normal", activation="softmax")(x)
model = Model(inputs=inp, outputs=predictions)
model.compile(SGD(lr=.001, momentum=0.9), "categorical_crossentropy", metrics=["accuracy"])
# generate images of all ones with the same label
def gen():
while True:
x_data = np.ones((32,224,224,3)).astype('float32')
y_data = np.zeros((32,2)).astype('float32')
y_data[:,1]=1.0
yield x_data, y_data
model.fit_generator(gen(), 10, validation_data=gen(), validation_steps=1)
The beginning and end of model.summary() looks like:
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
input_image (InputLayer) (32, 224, 224, 3) 0
____________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D) (32, 230, 230, 3) 0
____________________________________________________________________________________________________
conv1 (Conv2D) (32, 112, 112, 64) 9472
...
avg_pool (AveragePooling2D) (32, 1, 1, 2048) 0
____________________________________________________________________________________________________
flatten_1 (Flatten) (32, 2048) 0
____________________________________________________________________________________________________
dense_1 (Dense) (32, 2) 4098
====================================================================================================
Training output is:
Epoch 1/1
10/10 [==============================] - 30s - loss: nan - acc: 0.0000e+00 - val_loss: nan - val_acc: 0.0000e+00
Everything works fine when I switch the backend to tensorflow instead of theano. Looks like something about the theano implementation broke in keras 2.