Grid search and KerasClassifier using class weights - scikit-learn

I am trying to conduct grid search using scikit-learn RandomizedSearchCV function together with Keras KerasClassifier wrapper for my unbalanced multi-class classification problem. However, when I try to give class_weight as an input, the fit method gives me the following error:
RuntimeError: Cannot clone object <keras.wrappers.scikit_learn.KerasClassifier object at 0x000002AA3C676710>, as the constructor either does not set or modifies parameter class_weight
Below are the functions that I use to build the KerasClassifier and the script for RandomizedSearchCV:
build_fn:
import keras as k
def build_keras_model(loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'], optimiser = 'adam',
learning_rate = 0.001, n_neurons = 30, n_layers = 1, n_classes = 3,
l1_reg = 0.001, l2_reg = 0.001, batch_norm = False, dropout = None,
input_shape = (8,)):
model = k.models.Sequential()
model.add(k.layers.Dense(n_neurons,
input_shape = input_shape,
kernel_regularizer = k.regularizers.l1_l2(l1 = l1_reg, l2 = l2_reg),
activation = 'relu'))
if batch_norm is True:
model.add(k.layers.BatchNormalization())
if dropout is not None:
model.add(k.layers.Dropout(dropout))
i = 1
while i < n_layers:
model.add(k.layers.Dense(n_neurons,
kernel_regularizer = k.regularizers.l1_l2(l1 = l1_reg, l2 = l2_reg),
activation = 'relu'))
if batch_norm is True:
model.add(k.layers.BatchNormalization())
if dropout is not None:
model.add(k.layers.Dropout(dropout))
i += 1
del i
model.add(k.layers.Dense(n_classes, activation = 'softmax'))
if optimiser == 'adam':
koptimiser = k.optimizers.Adam(lr = learning_rate)
elif optimiser == 'adamax':
koptimiser = k.optimizers.Adamax(lr = learning_rate)
elif optimiser == 'nadam':
koptimiser = k.optimizers.Nadam(lr = learning_rate)
else:
print('Unknown optimiser type')
model.compile(optimizer = koptimiser, loss = loss, metrics = metrics)
model.summary()
return model
Script:
import scipy as sp
from sklearn.utils.class_weight import compute_class_weight
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
parameters = {
'optimiser': ['adam', 'adamax', 'nadam'],
'learning_rate': sp.stats.uniform(0.0005, 0.0015),
'epochs': sp.stats.randint(500, 1501),
'n_neurons': sp.stats.randint(20, 61),
'n_layers': sp.stats.randint(1, 3),
'n_classes': [3],
'batch_size': sp.stats.randint(1, 11),
'l1_reg': sp.stats.reciprocal(1e-3, 1e1),
'l2_reg': sp.stats.reciprocal(1e-3, 1e1),
'batch_norm': [False],
'dropout': [None],
'metrics': [['accuracy']],
'loss': ['sparse_categorical_crossentropy'],
'input_shape': [(training_features.shape[1],)]
}
class_weights = compute_class_weight('balanced', np.unique(training_targets),
training_targets[target_label[0]])
class_weights = dict(enumerate(class_weights))
keras_model = KerasClassifier(build_fn = build_keras_model, verbose = 0, class_weight = class_weights)
clf = RandomizedSearchCV(keras_model, parameters, n_iter = 1, scoring = 'f1_micro',
n_jobs = 1, cv = 5, random_state = random_state)
clf.fit(training_features, training_targets.values[:, 0])
model = clf.best_estimator_

To pass class_weights in this scenario with KerasClassifier, the class_weights should be passed in the fit method and then will be forwarded to the keras model.
grid_result = clf.fit(training_features, training_targets.values[:, 0], class_weight=class_weights)
In older versions it was neccecary to pass them with the clf__ prefix:
grid_result = clf.fit(training_features, training_targets.values[:, 0], clf__class_weight=class_weights)

When using a KerasClassifier, to use class weights, even for GridSearch, use fit_params functionality to add multiple parameters as the build_fn calls the model function and does not accept arguments.
`
classifier = KerasClassifier(build_fn = build_classifier, epochs=20, batch_size = 128)
accuracies = cross_val_score(estimator=classifier, X = X_train, y = y_train, cv = 3,
n_jobs = -1, verbose=0,
fit_params = {'callbacks': [EarlyStopping()],
class_weight:class_weights})
`

Related

Multi-input/Multi-output : Wrong output dimension when using KerasClassifier and GridSearchCV

I have built a multi-input (100 features) multi-ouput (100 predictions) ANN model using keras and tensorflow. I have been able to train my model and reach a quite satisfying accuracy on the test set using the following code :
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
def my_loss_fn(y_true, y_pred) :
d = K.sum(K.abs(y_true), axis = -1)
n = K.sum((K.tanh(100000*y_true*y_pred)/2 + 0.5)*K.abs(y_true), axis = -1)
return 1 - n/d
def my_metric_fn(y_true, y_pred) :
d = K.sum(K.abs(y_true))
n = K.sum((K.tanh(100000*y_true*y_pred)/2 + 0.5)*K.abs(y_true))
return n/d
def accuracy(y_true, y_pred) :
#print(y_true.shape, y_true)
#print(y_pred.shape, y_true)
acc = np.zeros([1, len(y_true)])
for day in range(len(y_pred)) :
d = 0
n = 0
for i in range(len(y_pred[0])) :
d = d + abs(y_true[day, i])
if np.sign(y_pred[day, i])*np.sign(y_true[day, i]) > 0 :
n = n + abs(y_true[day, i])
else :
n = n + 0
acc[0, day] = n/d
return np.mean(acc, axis = -1)[0]
#Model
classifier = Sequential()
classifier.add(Dense(units = 50, input_shape = (100, ), activation = "tanh"))
classifier.add(Dropout(0.2))
classifier.add(Dense(units=100, activation = 'tanh'))
classifier.compile(optimizer = 'rmsprop', loss = my_loss_fn, metrics = ['accuracy', my_metric_fn])
#Training
callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.0001, patience = 20, verbose = 0, mode = 'min')
nb_epochs = 250
history = classifier.fit(X_train, y_train, epochs = nb_epochs, batch_size = 31, callbacks = [callback], verbose = True, validation_split = 0., validation_data = (X_test, y_test), use_multiprocessing = True)
#Prediction
y_pred_train = classifier.predict(X_train)
y_pred_test = classifier.predict(X_test)
acc_test = accuracy(y_test, y_pred_test)
acc_train = accuracy(y_train, y_pred_train)
I am trying to improve the performance of my model by tuning the hyperparameters so I used KerasClassifier() and GridSearchCV(). The following code illustrates my approach for the gridsearch.
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from tensorflow import autograph
#Building a function to create the classifier
def build_classifier(nb_layers, nb_nodes, optimizer, dropout, activation_fn):
classifier=Sequential()
classifier.add(Dense(units = nb_nodes, input_shape = (100, ), activation = activation_fn))
for i in range(nb_layers-1) :
classifier.add(Dense(units = nb_nodes, activation = activation_fn, kernel_initializer = "uniform"))
classifier.add(Dropout(dropout))
classifier.add(Dense(units = 100, activation = 'tanh'))
classifier.compile(optimizer=optimizer, loss = tf.autograph.experimental.do_not_convert(my_loss_fn), metrics= ['accuracy', tf.autograph.experimental.do_not_convert(my_metric_fn)])
return classifier
#Creating a scorer to feed to the GridSearchCV()
my_scorer = make_scorer(accuracy, greater_is_better = True)
classifier=KerasClassifier(build_fn=build_classifier)
parameters={'batch_size':[13, 31],'epochs':[100, 150], 'optimizer':['adam', 'rmsprop'], 'dropout' : [0.2, 0.1], 'nb_layers' : [2, 3], 'nb_nodes' : [45, 50, 110, 115], 'activation_fn' : ['relu', 'tanh']}
grid_search=GridSearchCV(estimator=classifier, scoring = my_scorer, param_grid=parameters, cv=5, verbose = 1)
grid_search=grid_search.fit(X_train_, y_train_raw)
When I fit my GridSearchCV() object I get the following error at the end of the first combination of hyperparameters (when the scoring is computed) :
TypeError: object of type 'numpy.int32' has no len()
I investigated by adding print commandes inside my accuracy() function
#print(y_true.shape, y_true)
#print(y_pred.shape, y_pred)
to print both the shape and the array y_true and y_pred given as inputs for my accuracy() function used as the scoring in the GridSearchCV() object.
I found out that y_true.shape == (555, 100) but y_pred.shape == (555,). The value 555 corresponds to the number of lines of the fifth validation set because cv = 5.
However, I do not understand why the prediction of the gridsearch is not a multi-output prediction even though the number of nodes of the last layer of the classifier is (100,).
This was a regression problem so I used KerasRegressor() instead and it solved the issue. I guess that for a multi-output classification problem, KerasClassifier() expect the output to be a 2D hot encoded array.

the loss didn't drop in tf with keras cifar10 datesets

I create a vgg like model in tensorflow and use cifar10 in keras to train it, but the loss didn't drop, can u find what's the problem?
cifar10 datesets
from keras.datasets import cifar100, cifar10
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
def load_cifar(num_class=100):
if num_class == 100:
nb_classes = 100
(trainX, trainY), (testX, testY) = cifar100.load_data()
else:
nb_classes = 10
(trainX, trainY), (testX, testY) = cifar10.load_data()
trainX = trainX.astype('float32')
testX = testX.astype('float32')
trainX /= 255.
testX /= 255.
Y_train = np_utils.to_categorical(trainY, nb_classes)
Y_test = np_utils.to_categorical(testY, nb_classes)
generator = ImageDataGenerator(rotation_range=15,
width_shift_range=5. / 32,
height_shift_range=5. / 32,
horizontal_flip=True if num_class ==10 else False)
generator.fit(trainX, seed=0)
return trainX, Y_train, testX, Y_test, generator
model and train
import sys
import os
sys.path.append(os.path.abspath('../'))
print(sys.path)
import tensorflow as tf
from Input import cifar
BatchSize = 2
def gen_gen(batch_size=BatchSize):
trainX, Y_train, testX, Y_test, generator = cifar.load_cifar(10)
gen = generator.flow(trainX, Y_train, batch_size=batch_size)
return gen
class vgg16TF():
def __init__(self, ih=32, iw=32, batch_size=BatchSize):
self.ih = ih
self.iw = iw
self.batch_size = batch_size
def unit(self, x, conv_nums, filters, name=None):
for i in range(1, conv_nums+1):
x = tf.layers.conv2d(inputs=x, filters=filters, kernel_size=(5, 5), padding='same',
use_bias=True, activation=tf.nn.relu, name=name+'_conv'+str(i))
x = tf.layers.max_pooling2d(x, (3, 3), strides=(2, 2), name=name+'_pool')
return x
def net(self):
input_x = tf.placeholder(shape=(self.batch_size, self.ih, self.iw, 3), dtype=tf.float32)
x = self.unit(input_x, 2, 64, name='blook1')
x = self.unit(x, 2, 64, name='blook2')
# x = self.unit(x, 3, 256, name='blook3')
# x = self.unit(x, 3, 512, name='blook4')
# x = self.unit(x, 3, 512, name='blook5')
x = tf.layers.flatten(x, name='flatten')
x = tf.layers.dense(x, 384, activation=tf.nn.relu, name='fc1', use_bias=True)
x = tf.layers.dense(x, 192, activation=tf.nn.relu, name='fc2', use_bias=True)
y = tf.layers.dense(x, 10, name='prediction', use_bias=True)
print(y)
return input_x, y
def loss(self, labels, logits):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=tf.arg_max(labels,1), logits=logits
)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'))
def train(self):
input_x, y_ = self.net()
input_y = tf.placeholder(shape=(self.batch_size, 10), dtype=tf.float64)
loss = self.loss(input_y, y_)
optimizer = tf.train.AdamOptimizer().minimize(loss=loss)
# correct_pred = tf.equal(tf.arg_max(y_, 1), tf.arg_max(input_y, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
gen = gen_gen()
for i in range(10000):
train_x, train_y = gen.next()
# print(train_x.shape)
loss_val, _ = sess.run([loss, optimizer], feed_dict={input_x: train_x, input_y: train_y})
if i % 10 == 0:
print(loss_val)
some loss of train step:
2.2985106
2.2944324
2.3120923
2.306837
2.304546
2.2818785
2.3069105
2.3087378
2.3094501
2.2966876
2.3119392
2.2941442
2.2990022
2.2830834
2.3137615
I don't see any obvious errors in the code, but I can share my experience that the Adam optimizer does not always work well with VGG-like networks due to the large number of parameters, and the symptom is that the loss does not decrease. In that case you should use plain SGD with an appropriate learning rate and a schedule.

Actually printing values from tensor object

I'm currently trying to implement a basic autoencoder using Keras, and I have come to the stage where I would want the output from the second hidden layer. I think that I'm able to get the right object, the problem is that I get it as a tensor object, the code I've been trying to run is the following:
from keras.layers import Input, Dense, initializers
import numpy as np
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense, Activation
import tensorflow as tf
import time
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
#input = [784, 400, 100, 10, 100, 400]
#output = [400, 100, 10, 100, 400, 784]
names = ['hidden1', 'hidden2', 'hidden3', 'hidden4', 'hidden5', 'hidden6']
list_of_nodes = [784, 400, 144, 10]
def generate_hidden_nodes(list_of_nodes):
input = []
for j in range(len(list_of_nodes)):
input.append(list_of_nodes[j])
for i in range(len(list_of_nodes)-2):
input.append(list_of_nodes[-2-i])
output = input[::-1]
return input, output
input,output = generate_hidden_nodes(list_of_nodes)
def autoencoder(epochs):
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
model = Sequential()
input, output = generate_hidden_nodes(list_of_nodes)
for j in range(len(input)):
if j == (len(input)-1):
model.add(Dense(output[j], activation='sigmoid', kernel_initializer=w, input_dim=input[j], name=names[j]))
#model.add(Dropout(0.45))
else:
model.add(Dense(output[j], activation='relu', kernel_initializer=w, input_dim=input[j],
name = names[j]))
#model.add(Dropout(0.45))
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = model.fit(d.X_train, d.X_train,
epochs=epochs,
batch_size=50,
shuffle=True,
validation_split = 0.2)
#validation_data=(d.X_test, d.X_test))
#print(history.history.keys())
#plt.plot(history.history['val_acc'])
#print(history.history['val_acc'])
plt.show()
return model
def cv():
accuracy = 0
size = 5
epochs = 20
variance = 0
storage = np.zeros((size, epochs))
for j in range(size):
ae = autoencoder(epochs)
#print(ae.history.history['val_acc'])
storage[j] = ae.history.history['val_acc']
for i in range(size):
accuracy += storage[i][-1]
mean = accuracy/size
for k in range(size):
variance += ((storage[k][-1] - mean)**2)
variance = variance/size
return mean, variance
#mean, variance = cv()
#print(mean)
#print(variance)
#time.sleep(10)
def finding_index():
elements, index = np.unique(d.Y_test, return_index=True)
return elements, index
def plotting():
ae = autoencoder(20)
elements, index = finding_index()
y_proba = ae.predict(d.X_test)
plt.figure(figsize=(20, 4))
# size = 20
for i in range(len(index)):
ax = plt.subplot(2, len(index), i + 1)
plt.imshow(d.X_test[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(2, len(index), i + 1 + len(index))
plt.imshow(y_proba[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
def plotting_weights(epochs):
ae = autoencoder(epochs)
output_layer = ae.get_layer('hidden2')
weights = output_layer.get_weights()[0]
print(weights.shape)
size = 20
plt.figure(figsize=(20, 4))
for j in range(3):
plt.gray()
plt.imshow(weights[j].reshape(12, 12))
plt.show()
def get_output():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
new_model = Sequential()
new_model.add(Dense(400, activation='relu', kernel_initializer=w, input_dim = 784))
new_model.add(Dense(144, activation='sigmoid', kernel_initializer=w, input_dim = 400))
#new_model.add(Dense(784, activation='sigmoid', kernel_initializer=w, input_dim = 144))
new_model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = new_model.fit(d.X_train, d.X_train,
epochs=20,
batch_size=50,
shuffle=True,
validation_split=0.2)
y = new_model.predict(d.X_test)
elements, index = finding_index()
#return y.shape
def get_output2():
ae = autoencoder(5)
a =ae.layers[1].output()
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init_op) # execute init_op
# print the random values that we sample
print(a)
get_output2()
I've tried to just print(a) as well, but as I said, that returns me a tensor object. Can someone provide me some information how I can actually print those value? Thanks in advance!
Simplest:
import keras.backend as K
print(K.eval(ae.layers[1].output()))
This is equivalent to:
with tf.Session() as sess:
print(sess.run(a))
I find it more readable to simply use the keras.backend interface.

KerasClassifier for use with build_fn which takes arguments

I am attempting to wrap my keras models in scikit learn GridSearchCV and Pipeline structures for hyperparameter tuning.
It works absolutely fine when the build_fn function takes 0 arguments for use in KerasClassifier. However it fails whenever I use a function which takes arguments.
Example code below
def prepare_classifier(x, y):
shape_of_input = x.shape
shape_of_target = y.shape
classifier = Sequential()
## number of neurons = 30
## kernel_initializer determines how the weights are initialized
## activation is the activation function at this particular hidden layer
## input_shape is the number of features in a single row.. in this case it is shape_of_input[1]
## shape_of_input[0] is the total number of such rows
classifier.add(Dense(units = 30, activation = 'relu', kernel_initializer = 'uniform', input_dim = shape_of_input[1]))
classifier.add(Dense(units = 30, activation = 'relu', kernel_initializer = 'uniform'))
## we are predicting 10 digits for each row of x.
## in total there are shape_of_input[0] rows in total
classifier.add(Dense(10, activation = 'softmax'))
## categorical_crossentropy is the loss function for multi output loss function
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
def fit(classifier, x_train, y_train, epoch_size, batch_size = 10):
pipeline = Pipeline([
('keras_classifier', classifier)
])
param_grid = {
'keras_classifier__batch_size' : [10,20,30,50],
'keras_classifier__epochs' : [100, 200, 300],
'keras_classifier__x' : [x_train],
'keras_classifier__y' : [y_train],
}
grid = GridSearchCV(estimator = pipeline, param_grid = param_grid, n_jobs = -1)
grid.fit(x_train, y_train)
print("Best parameters are : ", grid.best_params_, '\n grid best score :', grid.best_score_)
classifier = KerasClassifier(build_fn = prepare_classifier, x = x_train[0:100], y = y_train )
fit(classifier, x_train[:100], y_train, epoch_size )
This is for some x, and some y data (p.s. I have used mnist data)
The error I get is :
RuntimeError: Cannot clone object , as the constructor either does not set or modifies parameter x
However if my prepare_classifier function takes no arguments code works absolutely fine.
What am I doing incorrectly?
solved it. essentially the below line was the issue
classifier = KerasClassifier(build_fn = prepare_classifier, x = x_train[0:100], y = y_train )
needed to be changed to
classifier = KerasClassifier(build_fn = prepare_classifier)
and the parameters for the prepare_classifier needs to be sent using param_grid

keras multiple input model doesn't work

i have bulit a model to distinguish cats from dogs using kaggle's cats_vs_dogs data set. I have tried two ways to do it. For the first one, I used three existing models(ResNet50, Xception InceptionV3)to extract features, i put the traing data through these models's convolutional base,predict and concatenate the results, then use them for a standalone densely-connected classifier.The result is pretty good, after five epoches training, val_acc became 99.58%. Then i want to use data augmentation and fine-tuing, so i extended the those three models by adding layers on top, and running he whole thing end-to-end on the input data. The strange thing is the second way got good result in the traing but lousy one in the validation, and the val_acc is always a constant(0.5). i feel very confused, how come these two ways have such different results.
here is my code
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
res_net_input = Input((224, 224, 3), name='res_net')
res_net_base_model = ResNet50(input_tensor=res_net_input, weights='imagenet', include_top=False)
for layers in res_net_base_model.layers:
layers.trainable = False
xception_input = Input((299, 299, 3), name='xception')
xception_base_model = Xception(input_tensor=xception_input, weights='imagenet', include_top=False)
for layers in xception_base_model.layers:
layers.trainable = False
inception_input = Input((299, 299, 3), name='inception')
inception_base_model = InceptionV3(input_tensor=inception_input, weights='imagenet', include_top=False)
for layers in inception_base_model.layers:
layers.trainable = False
res_result = GlobalAveragePooling2D()(res_net_base_model.output)
xcp_result = GlobalAveragePooling2D()(xception_base_model.output)
icp_result = GlobalAveragePooling2D()(inception_base_model.output)
concatenated = concatenate([res_result, xcp_result, icp_result], axis=1)
x = Dropout(0.5)(concatenated)
x = Dense(1, activation='sigmoid')(x)
model = Model([res_net_base_model.input, xception_base_model.input, inception_base_model.input], x)
model.compile(optimizer='adadelta',
loss='binary_crossentropy',
metrics=['accuracy'])
train_imgen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
rotation_range=5.,
horizontal_flip = True)
validation_imgen = ImageDataGenerator(rescale = 1./255)
def generate_generator_multiple(generator,dir1, batch_size, img_size1, img_size2, img_size3):
genX1 = generator.flow_from_directory(dir1,
target_size = (img_size1[0],img_size1[1]),
class_mode = 'binary',
batch_size = batch_size,
shuffle=False,
)
genX2 = generator.flow_from_directory(dir1,
target_size = (img_size2[0],img_size2[1]),
class_mode = 'binary',
batch_size = batch_size,
shuffle=False,
seed=7)
genX3 = generator.flow_from_directory(dir1,
target_size = (img_size3[0],img_size3[1]),
class_mode = 'binary',
batch_size = batch_size,
shuffle=False,
seed=7)
while True:
X1i = genX1.next()
X2i = genX2.next()
X3i = genX3.next()
yield [X1i[0], X2i[0],X3i[0]], X1i[1]
tain_generator = generate_generator_multiple(train_imgen , '/output/keras/dog_vs_cat_full/train', 100, (224,224), (299, 299), (299, 299))
validation_generator = generate_generator_multiple(validation_imgen,'/output/keras/dog_vs_cat_full/validation', 100, (224,224), (299, 299), (299, 299))
history=model.fit_generator(tain_generator,
steps_per_epoch=200,
epochs = 5,
validation_data = validation_generator,
validation_steps = 50,
shuffle=False)

Resources