keras model does not learn anything after setting pretrained weights by layer - keras

I am training a smaller VGG like model, and I set the pretrained weights of VGG16 to the first conv layers which are identical. My model acts pretty strange though, and does not learn anything at all - the loss stays the same, the accuracy stays the same. What is wrong and how can I fix it?
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras import layers
from keras import models
from keras import optimizers
from keras.layers import Dropout
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
base_model = models.Sequential()
base_model.add(layers.Conv2D(64, (3, 3), activation='relu', name='block1_conv1', input_shape=(224, 224, 3)))
base_model.add(layers.Conv2D(64, (3, 3), activation='relu', name='block1_conv2'))
base_model.add(layers.MaxPooling2D((2, 2)))
#model.add(Dropout(0.2))
base_model.add(layers.Conv2D(128, (3, 3), activation='relu', name='block2_conv1'))
base_model.add(layers.Conv2D(128, (3, 3), activation='relu', name='block2_conv2'))
base_model.add(layers.MaxPooling2D((2, 2), name='block2_pool'))
#model.add(Dropout(0.2))
base_model.summary()
"""
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
_________________________________________________________________
block1_conv1 (Conv2D) (None, 256, 256, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 256, 256, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 128, 128, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 128, 128, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 128, 128, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 64, 64, 128) 0
=================================================================
Total params: 260,160.0
Trainable params: 260,160.0
Non-trainable params: 0.0
"""
base_model.add(layers.Flatten())
#base_model.add(layers.Dropout(0.5)) #Dropout for regularization
base_model.add(layers.Dense(256, activation='relu'))
base_model.add(layers.Dense(1, activation='sigmoid')) #Sigmoid function at the end because we have just two classes
epochs = 50
callbacks = []
#schedule = None
decay = 0.0
#earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
#mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
#reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
base_model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4,decay=1e-6, momentum=0.9, nesterov=True),
metrics=['accuracy'])
vgg = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
print('Model loaded.')
"""
['block1_conv1',
'block1_conv2',
'block1_pool',
'block2_conv1',
'block2_conv2',
'block2_pool',
'block3_conv1',
'block3_conv2',
'block3_conv3',
'block3_conv4',
'block3_pool',
'block4_conv1',
'block4_conv2',
'block4_conv3',
'block4_conv4',
'block4_pool',
'block5_conv1',
'block5_conv2',
'block5_conv3',
'block5_conv4',
'block5_pool',
'dense_1',
'dense_2',
'dense_3',
'dropout_1',
'global_average_pooling2d_1',
'input_1']
"""
for layer in vgg.layers:
if layer.name == 'block1_conv1':
base_model.layers[0].set_weights(layer.get_weights())
elif layer.name == 'block1_conv2':
base_model.layers[1].set_weights(layer.get_weights())
elif layer.name == 'block2_conv1':
base_model.layers[3].set_weights(layer.get_weights())
elif layer.name == 'block2_conv2':
base_model.layers[4].set_weights(layer.get_weights())
os.environ["CUDA_VISIBLE_DEVICES"]="0"
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = base_model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size) #,
#callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
#Save the model
#base_model.save_weights('/home/d/Desktop/s/base_model_weights.h5')
#base_model.save('/home/d/Desktop/s/base_model_keras.h5')
#lets plot the train and val curve
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accurarcy')
plt.legend()
plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()
and the training just goes on forever like this (no changes from epoch to epoch either):
2625/4002 [==================>...........] - ETA: 3:49 - loss: 7.9723 - acc: 0.5053
2626/4002 [==================>...........] - ETA: 3:49 - loss: 7.9720 - acc: 0.5053
2627/4002 [==================>...........] - ETA: 3:49 - loss: 7.9735 - acc: 0.5052
2628/4002 [==================>...........] - ETA: 3:48 - loss: 7.9732 - acc: 0.5052
2629/4002 [==================>...........] - ETA: 3:48 - loss: 7.9732 - acc: 0.5052
2630/4002 [==================>...........] - ETA: 3:48 - loss: 7.9729 - acc: 0.5052
2631/4002 [==================>...........] - ETA: 3:48 - loss: 7.9725 - acc: 0.5052
2632/4002 [==================>...........] - ETA: 3:48 - loss: 7.9729 - acc: 0.5052
2633/4002 [==================>...........] - ETA: 3:48 - loss: 7.9733 - acc: 0.5052
2634/4002 [==================>...........] - ETA: 3:47 - loss: 7.9722 - acc: 0.5053
2635/4002 [==================>...........] - ETA: 3:47 - loss: 7.9730 - acc: 0.5052
2636/4002 [==================>...........] - ETA: 3:47 - loss: 7.9719 - acc: 0.5053
2637/4002 [==================>...........] - ETA: 3:47 - loss: 7.9727 - acc: 0.5052
2638/4002 [==================>...........] - ETA: 3:47 - loss: 7.9731 - acc: 0.5052
2639/4002 [==================>...........] - ETA: 3:47 - loss: 7.9732 - acc: 0.5052

Related

why my neural network sequential model gets low accuracy ( below than 0.0011)?

I am building a hashtag recommendation model for twitter media posts, which takes tweet text as input and does 300-dimensional word embedding on it and classifies it among 198 hashtags as classes. When I run my model I get lower than 0.0011 accuracy which does not change later! What is wrong in my model?
import pickle
import numpy as np
from keras import initializers, regularizers
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import LSTM, Activation, Dense, Dropout, Embedding
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential, load_model
package = "2018_pickle"
with open(path1, "rb") as f:
maxLen, l_h2i, l_w2i = pickle.load(f)
with open(path2, "rb") as f:
X_train, X_test, X_train_indices, X_test_indices = pickle.load(f)
with open(path3, "rb") as f:
Y_train, Y_test, Y_train_oh, Y_test_oh = pickle.load(f)
with open(path4, "rb") as f:
emd_matrix = pickle.load(f)
if __name__ == "__main__":
modelname = "model_1"
train = False
vocab_size = len(emd_matrix)
emd_dim = emd_matrix.shape[1]
if train:
model = Sequential()
model.add(
Embedding(
vocab_size,
emd_dim,
weights=[emd_matrix],
input_length=maxLen,
trainable=False,
)
)
model.add(
LSTM(
256,
return_sequences=True,
activation="relu",
kernel_regularizer=regularizers.l2(0.01),
kernel_initializer=initializers.glorot_normal(seed=None),
)
)
model.add(
LSTM(
256,
return_sequences=True,
activation="relu",
kernel_regularizer=regularizers.l2(0.01),
kernel_initializer=initializers.glorot_normal(seed=None),
)
)
model.add(
LSTM(
256,
return_sequences=False,
activation="relu",
kernel_regularizer=regularizers.l2(0.01),
kernel_initializer=initializers.glorot_normal(seed=None),
)
)
model.add(Dense(198, activation="softmax"))
model.compile(
loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
)
checkpoint = ModelCheckpoint(
filepath, monitor="loss", verbose=1, save_best_only=True, mode="min"
)
reduce_lr = ReduceLROnPlateau(
monitor="val_loss", factor=0.5, patience=2, min_lr=0.000001
)
history = model.fit(
X_train_indices,
Y_train_oh,
batch_size=2048,
epochs=5,
validation_split=0.1,
shuffle=True,
callbacks=[checkpoint, reduce_lr],
)
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_10 (Embedding) (None, 54, 300) 22592100
_________________________________________________________________
lstm_18 (LSTM) (None, 54, 256) 570368
_________________________________________________________________
lstm_19 (LSTM) (None, 54, 256) 525312
_________________________________________________________________
lstm_20 (LSTM) (None, 256) 525312
_________________________________________________________________
dense_7 (Dense) (None, 198) 50886
=================================================================
Total params: 24,263,978
Trainable params: 1,671,878
Non-trainable params: 22,592,100
_________________________________________________________________
None
Train on 177278 samples, validate on 19698 samples
Epoch 1/5
177278/177278 [==============================] - 70s 396us/step - loss: 3.3672 - acc: 8.7433e-04 - val_loss: 0.3103 - val_acc: 0.0000e+00
Epoch 00001: loss improved from inf to 3.36719, saving model to ./checkpoints/model_1/lstm-01-3.367-0.001-0.310-0.000.hdf5
Epoch 2/5
177278/177278 [==============================] - 66s 371us/step - loss: 0.1950 - acc: 2.4820e-04 - val_loss: 0.1616 - val_acc: 0.0016
Epoch 00002: loss improved from 3.36719 to 0.19496, saving model to ./checkpoints/model_1/lstm-02-0.195-0.000-0.162-0.002.hdf5
Epoch 3/5
177278/177278 [==============================] - 66s 370us/step - loss: 0.1583 - acc: 0.0011 - val_loss: 0.1570 - val_acc: 0.0016
Epoch 00003: loss improved from 0.19496 to 0.15826, saving model to ./checkpoints/model_1/lstm-03-0.158-0.001-0.157-0.002.hdf5
Epoch 4/5
177278/177278 [==============================] - 65s 369us/step - loss: 0.1566 - acc: 0.0011 - val_loss: 0.1573 - val_acc: 0.0016
Epoch 00004: loss improved from 0.15826 to 0.15660, saving model to ./checkpoints/model_1/lstm-04-0.157-0.001-0.157-0.002.hdf5
Epoch 5/5
177278/177278 [==============================] - 66s 374us/step - loss: 0.1561 - acc: 0.0011 - val_loss: 0.1607 - val_acc: 0.0016
Epoch 00005: loss improved from 0.15660 to 0.15610, saving model to ./checkpoints/model_1/lstm-05-0.156-0.001-0.161-0.002.hdf5

ValueError: Error when checking target: expected conv2d_37 to have shape (57, 57, 16) but got array with shape (120, 120, 3)

my training variable shape is (264, 120, 120, 3)
trying to give numpy array of images as input
model = Sequential()
model.add(Conv2D(8, (3, 3), activation='relu', strides=2,input_shape=(image_height,image_width,channels)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.summary()
model.compile(optimizer='rmsprop', loss='mse')
model.fit(x=X_train, y=y_train, batch_size=1, epochs=1, verbose=1)
below is the error message
________________________________________________________________
Layer (type) Output Shape Param
=================================================================
conv2d_36 (Conv2D) (None, 59, 59, 8) 224
_________________________________________________________________
conv2d_37 (Conv2D) (None, 57, 57, 16) 1168
=================================================================
Total params: 1,392
Trainable params: 1,392
Non-trainable params: 0
ValueError: Error when checking target: expected conv2d_37 to have shape (57, 57, 16) but got array with shape (120, 120, 3)
This error was because of mismatch in shape between model output and training data.
Please refer sample code in below
#Import Dependencies
import keras
from keras.models import Model, Sequential
from keras.layers import Conv2D, Flatten, Dense
# Model Building
model = Sequential()
model.add(Conv2D(8, (3, 3), activation='relu', strides=2, input_shape=(28,28,1)))
model.add(Conv2D(16, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['mse'])
# Generate dummy data
import numpy as np
data = np.random.random((100, 28, 28, 1))
labels = np.random.randint(2, size=(100, 10))
# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, epochs=5, batch_size=32)
Output:
Epoch 1/5
100/100 [==============================] - 0s 1ms/step - loss: 1.2342 - mse: 0.4195
Epoch 2/5
100/100 [==============================] - 0s 234us/step - loss: 1.2183 - mse: 0.4167
Epoch 3/5
100/100 [==============================] - 0s 222us/step - loss: 1.2104 - mse: 0.4151
Epoch 4/5
100/100 [==============================] - 0s 255us/step - loss: 1.2019 - mse: 0.4131
Epoch 5/5
100/100 [==============================] - 0s 239us/step - loss: 1.1938 - mse: 0.4120

Neural Network - My networks seems like is training well, but at classification raport it's completely random

I'm training a model to classify 3 types of vehicles. Everything seems like going well until I try to predict anything with my model. Results of prediction are completely random.
I'm using 15000 images of 3 classes (5000 each) to train and 6000 images of 3 classes (2000 each) to validate with some Data Augmentation.
I'm using Keras with Tensorflow-GPU backend, making classification report and confusion matrix with use of scikit-learn.
I have no idea why when I'm training the model loss and accuracy both in train and validation are low/high respectively, but confusion matrix is completely random.
Image Data Generators
img_width, img_height = 96, 96
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 15000
nb_validation_samples = 6000
epochs = 200
batch_size = 64
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.3,
zoom_range=0.3,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(rescale=1./256)
train_generator = train_datagen.flow_from_directory(train_data_dir, target_size=(
img_width, img_height), batch_size=batch_size, class_mode='categorical', shuffle=True)
validation_generator = test_datagen.flow_from_directory(validation_data_dir, target_size=(
img_width, img_height), batch_size=batch_size, class_mode='categorical', shuffle=True)
Building model
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=image_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (5, 5)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(3))
model.add(Activation('softmax'))
adam = optimizers.adam(lr=0.0016643)
model.compile(optimizer=adam, loss='categorical_crossentropy',
metrics=['accuracy'])
Model fitting
es = keras.callbacks.EarlyStopping(
monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
history = model.fit_generator(train_generator, steps_per_epoch=nb_train_samples//batch_size, epochs=epochs,
validation_data=validation_generator, validation_steps=nb_validation_samples//batch_size, callbacks=[es], use_multiprocessing=False, workers=6)
Testing my model
Y_pred = model.predict_generator(
validation_generator, steps=nb_validation_samples // batch_size)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
cm = confusion_matrix(validation_generator.classes, y_pred)
print(cm)
print('Classification Report')
target_names = ['Car', 'Bus', 'Truck']
print(classification_report(validation_generator.classes,
y_pred, target_names=target_names))
The output I get looks something like this:
Output
Epoch 1/200
234/234 [==============================] - 35s 149ms/step - loss: 0.9103 - acc: 0.5645 - val_loss: 0.6354 - val_acc: 0.7419
Epoch 2/200
234/234 [==============================] - 30s 130ms/step - loss: 0.6804 - acc: 0.7181 - val_loss: 0.4679 - val_acc: 0.8117
Epoch 3/200
234/234 [==============================] - 30s 129ms/step - loss: 0.6027 - acc: 0.7573 - val_loss: 0.4401 - val_acc: 0.8238
.
.
.
Epoch 37/200
234/234 [==============================] - 30s 128ms/step - loss: 0.2667 - acc: 0.9018 - val_loss: 0.2095 - val_acc: 0.9276
Epoch 38/200
234/234 [==============================] - 30s 129ms/step - loss: 0.2711 - acc: 0.9037 - val_loss: 0.1995 - val_acc: 0.9353
##Here it breaks with an EarlyStopping
Confusion Matrix
[[659 680 661]
[684 636 680]
[657 658 685]]
Classification Report
precision recall f1-score support
Car 0.33 0.33 0.33 2000
Bus 0.32 0.32 0.32 2000
Truck 0.34 0.34 0.34 2000
micro avg 0.33 0.33 0.33 6000
macro avg 0.33 0.33 0.33 6000
weighted avg 0.33 0.33 0.33 6000
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

Conv1D and batch_size questions

I face two problems when I implement 1D convnet for multi-channel sequential data.
(224 samples x 300 time sequential x 19 channels)
1) I set batch_size as 7 but it jumps with 5 times of that.
not 7 14 21 28, but 7, 56, 105, 147... what's wrong with mine?
2) when I look at the records of accuracy, it looks like to learn NOTHING.
is it impossible implement classifier for multi-channel sequential data with Conv1D?
If possible can you give me some advice from my code?
#result
x_train shape: (224, 300, 19)
224 train samples
28 test samples
Train on 224 samples, validate on 28 samples
Epoch 1/50
7/224 [..............................] - ETA: 68s - loss: 0.6945 - acc: 0.5714
56/224 [======>.......................] - ETA: 6s - loss: 0.6993 - acc: 0.4464
105/224 [=============>................] - ETA: 2s - loss: 0.6979 - acc: 0.4381
147/224 [==================>...........] - ETA: 1s - loss: 0.6968 - acc: 0.4422
189/224 [========================>.....] - ETA: 0s - loss: 0.6953 - acc: 0.4444
224/224 [==============================] - 2s - loss: 0.6953 - acc: 0.4420 - val_loss: 0.6956 - val_acc: 0.5000
Epoch 2/50
7/224 [..............................] - ETA: 0s - loss: 0.6759 - acc: 0.5714
63/224 [=======>......................] - ETA: 0s - loss: 0.6924 - acc: 0.5556
133/224 [================>.............] - ETA: 0s - loss: 0.6905 - acc: 0.5338
203/224 [==========================>...] - ETA: 0s - loss: 0.6903 - acc: 0.5567
224/224 [==============================] - 0s - loss: 0.6923 - acc: 0.5357 - val_loss: 0.6968 - val_acc: 0.5000
# code
from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
import numpy as np
batch_size = 7
num_classes = 2
epochs = 50
# input data dimensions : 300 sequential x 19 channels
eeg_rows, num_ch = 300, 19
x_train = np.load('eeg_train.npy')
y_train = np.load('label_train.npy')
x_test = np.load('eeg_test.npy')
y_test = np.load('label_test.npy')
x_valid = np.load('eeg_valid.npy')
y_valid = np.load('label_valid.npy')
x_train = x_train.reshape(x_train.shape[0], eeg_rows, num_ch)
x_test = x_test.reshape(x_test.shape[0], eeg_rows,num_ch)
x_valid = x_valid.reshape(x_valid.shape[0], eeg_rows, num_ch)
input_shape = (eeg_rows, num_ch)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_valid = x_test.astype('float32')
x_train /= 100
x_test /= 100
x_valid /= 100
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# model
conv = Sequential()
conv.add(Conv1D(32, 3, input_shape=input_shape, activation='relu', padding='same'))
conv.add(Conv1D(32, 3, activation='relu', padding='same'))
conv.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
conv.add(Dropout(0.2))
conv.add(Flatten())
conv.add(Dense(16, activation='relu'))
conv.add(Dropout(0.5))
conv.add(Dense(2, activation='softmax'))
conv.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(),
metrics=['accuracy'])
# train
conv.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_valid, y_valid))
score = conv.evaluate(x_valid, y_valid, verbose=0)
print(conv.summary())
print(conv.input_shape)
print(conv.output_shape)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

ValueError: Can't handle mix of multilabel-indicator and binary Issue with GridSearchCV and KerasClassifier

I have this CNN code for the MNIST data that divides the dataset into training set and test set for only 2's and 7's. On running it the code it gives about 98% Accuracy on the test set.
So, to increase the Accuracy I tried using KerasClassifier from keras.wrappers.scikit_learn. Using the Classifier with GridSearchCV I was thinking to find the optimal parameters but on running the code 1st Iteration goes all fine but throws an error from the next Iteration.
Here is the code:
# This is the normal CNN model without GridSearch
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import numpy as np
batch_size = 128
num_classes = 2
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#Only look at 3s and 8s
train_picks = np.logical_or(y_train==2,y_train==7)
test_picks = np.logical_or(y_test==2,y_test==7)
x_train = x_train[train_picks]
x_test = x_test[test_picks]
y_train = np.array(y_train[train_picks]==7,dtype=int)
y_test = np.array(y_test[test_picks]==7,dtype=int)
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(4, kernel_size=(3, 3),activation='relu',input_shape=input_shape))
model.add(Conv2D(8, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Improving the accuracy using GridSearch
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
def build_model(optimizer):
print(optimizer,batch_size,epochs)
model = Sequential()
model.add(Conv2D(4, kernel_size=(3, 3),activation='relu',input_shape=input_shape))
model.add(Conv2D(8, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=optimizer,
metrics=['accuracy'])
return model
model = KerasClassifier(build_fn = build_model)
parameters = {'batch_size': [128, 256],
'epochs': [10, 20],
'optimizer': ['rmsprop']}
grid_search = GridSearchCV(estimator = model,
param_grid = parameters,
scoring = 'accuracy',
cv = 10)
grid_search = grid_search.fit(x_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
This is the Output of the code:
rmsprop 128 12
Epoch 1/10
11000/11000 [==============================] - 3s - loss: 0.1654 - acc: 0.9476
Epoch 2/10
11000/11000 [==============================] - 3s - loss: 0.0699 - acc: 0.9786
Epoch 3/10
11000/11000 [==============================] - 2s - loss: 0.0557 - acc: 0.9839
Epoch 4/10
11000/11000 [==============================] - 2s - loss: 0.0510 - acc: 0.9839
Epoch 5/10
11000/11000 [==============================] - 2s - loss: 0.0471 - acc: 0.9853
Epoch 6/10
11000/11000 [==============================] - 2s - loss: 0.0417 - acc: 0.9875
Epoch 7/10
11000/11000 [==============================] - 2s - loss: 0.0399 - acc: 0.9870
Epoch 8/10
11000/11000 [==============================] - 2s - loss: 0.0365 - acc: 0.9885
Epoch 9/10
11000/11000 [==============================] - 2s - loss: 0.0342 - acc: 0.9899
Epoch 10/10
11000/11000 [==============================] - 2s - loss: 0.0321 - acc: 0.9903
768/1223 [=================>............] - ETA: 0sTraceback (most recent call last):
File "<ipython-input-4-975b20661114>", line 30, in <module>
grid_search = grid_search.fit(x_train, y_train)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py", line 945, in fit
return self._fit(X, y, groups, ParameterGrid(self.param_grid))
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py", line 564, in _fit
for parameters in parameter_iterable
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 326, in __init__
self.results = batch()
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 260, in _fit_and_score
test_score = _score(estimator, X_test, y_test, scorer)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 288, in _score
score = scorer(estimator, X_test, y_test)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/metrics/scorer.py", line 98, in __call__
**self._kwargs)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 172, in accuracy_score
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "/home/thakkar_/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 82, in _check_targets
"".format(type_true, type_pred))
ValueError: Can't handle mix of multilabel-indicator and binary
Please help!
The error seem to be in the way you parsing the dictionary parameters..
An example from here:
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
# Function to create model, required for KerasClassifier
def create_model(learn_rate=0.01, momentum=0):
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
optimizer = SGD(lr=learn_rate, momentum=momentum)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
return model
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)
# define the grid search parameters
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(learn_rate=learn_rate, momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
GridseachCV basically takes elements from the dictionary that matches with its input parameter, and train it. You are parsing the complete dictionary, but batch_size, and epochs aren't parameter within the function...
# Improving the accuracy using GridSearch
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
def build_model(optimizer = 'adam'):
model = Sequential()
model.add(Conv2D(4, kernel_size=(3, 3),activation='relu',input_shape=input_shape))
model.add(Conv2D(8, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=optimizer,
metrics=['accuracy'])
return model
model = KerasClassifier(build_fn = build_model)
parameters = {'batch_size': [128, 256],
'epochs': [10, 20],
'optimizer': ['rmsprop']}
grid_search = GridSearchCV(estimator = model,
param_grid = parameters,
scoring = 'accuracy',
cv = 10)
grid_search = grid_search.fit(x_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
Maybe something like this would work.. have not tested it.

Resources