Grid-Search Voting Classifier containing a Keras model - python-3.x

I am trying to train a VotingClassifier containing a Keras model using GridSearchCV.
Here is the code:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.ensemble import VotingClassifier
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
# pretend data
X, y = datasets.make_classification(n_samples=100, n_features=20)
scaler = StandardScaler()
# create model
def create_model():
model = Sequential()
model.add(Dense(20, kernel_initializer="uniform", activation='relu', input_shape=(20,)))
model.add(Dense(30, kernel_initializer="uniform", activation='relu'))
model.add(Dense(10, kernel_initializer="uniform", activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
optimizer = adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
return model
keras_model = KerasClassifier(build_fn=create_model)
keras_model._estimator_type = "classifier"
eclf = VotingClassifier(
estimators=[('svc',SVC(probability=True)), ('keras_model', keras_model)]
, voting='soft')
# Test - fit the viting classifier without grid search
eclf.fit(X, y)
print('The VotingClassifier can be fit outside of gridsearch\n')
# parameters to grid search
params = [{'svc__C':[0.01,0.1]}, ]
grid = GridSearchCV(eclf,params,cv=2,scoring='accuracy', verbose=1)
grid.fit(X,y)
I get the following error:
ValueError: The estimator KerasClassifier should be a classifier.
When I train the VotingClassifier outside of GridSearchCV no error occurs, however when I train it within GridSearchCV, I get the error message. This other question, VotingClassifier with pipelines as estimators, has the same error (without using GridSearch) and was fixed by a line asserting that the keras model is a classifier which i have also included:
keras_model._estimator_type = "classifier"
This did not fix the problem here.
Any suggestions?

Related

How can I get the history of the KerasRegressor?

I want to get KerasRegressor history but all the time I get (...) object has no attribute 'History'
'''
# Regression Example With Boston Dataset: Standardized and Wider
import numpy as np
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
#from keras.wrappers.scikit_learn import KerasRegressor
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import keras.backend as K
# load dataset
dataframe = read_csv("Data 1398-2.csv")
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:10]
Y = dataset[:,10]
############
from sklearn import preprocessing
from sklearn.metrics import r2_score
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.25)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.55)
##################
# define wider model
def wider_model():
# create model
model = Sequential()
model.add(Dense(40, input_dim=10, kernel_initializer='normal', activation='relu'))
model.add(Dense(20, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error',metrics=['mae'], optimizer='adam')
#history = model.fit(X, Y, epochs=10, batch_size=len(X), verbose=1)
return model
# evaluate model with standardized dataset
from keras.callbacks import History
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',KerasRegressor(model=wider_model, epochs=100, batch_size=2, verbose=0) ))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=5)
results = cross_val_score(pipeline, X_train, Y_train, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))
import matplotlib.pyplot as plt
#plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
#plt.title('Model loss')
#plt.ylabel('Loss')
#plt.xlabel('Epoch')
#plt.legend(['Train', 'Val'], loc='upper right')
#plt.show()
'''
Model is at index 1 in your case, but you can also find it. Now to get history object:
pipeline.steps[1][1].model.history.history
If you are sure that Keras Model is always the last estimator, you can also use:
pipeline._final_estimator.model.history.history

Keras LSTM to Pytorch

I am using the following code to apply sequential LSTM to time-series data with one value. It works fine with a Keras version. I am wondering how could I do the same using PyTorch?
import tensorflow
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Dropout, Embedding, LSTM
from tensorflow.keras.optimizers import RMSprop, Adam, Nadam
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import TensorBoard
# training_dataset.shape = (303, 24, 1)
time_steps = 24
metric = 'mean_absolute_error'
model = Sequential()
model.add(LSTM(units=32, activation='tanh', input_shape=(time_steps, 1), return_sequences=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=[metric])
print(model.summary())
batch_size=32
epochs=20
model.fit(x=training_dataset, y=training_dataset,
batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(training_dataset, training_dataset),
callbacks=[TensorBoard(log_dir='../logs/{0}'.format(tensorlog))])
testing_pred = model.predict(x=testing_dataset)
You can check the pytorch documentation for that: https://pytorch.org/docs/master/generated/torch.nn.LSTM.html
the simplest code is the following:
import torch, torch.nn as nn, torch.optim.Adam as Adam
model = nn.Sequential(nn.LSTM(input_size=1, hidden_size=32, output_size=1), nn.Sigmoid)
opt = Adam(model.parameters())
loss_func = nn.MSELoss()
for (x, y) in dataloader:
opt.zero_grad()
pred = model(x)
loss = loss_func(y, pred)
loss.backward()
opt.step()

cross_val_score's n_jobs = -1 argument not working in python 3.6

i was trying to improve the accuracy and evaluate my Artificial Neural Network, but i encountered an issue that n_jobs = -1 of cross_val_score was not working,
i am using tensorflow on my cpu and my error was:-
BrokenProcessPool: A task has failed to un-serialize. Please ensure
that the arguments of the function are all picklable.
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
def build_classifier():
classifier = Sequential()
classifier.add(Dense(6, kernel_initializer='uniform', activation='relu', input_dim=11))
classifier.add(Dense(6, kernel_initializer='uniform', activation='relu'))
classifier.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, nb_epoch = 100)
accuracies = cross_val_score(estimator= classifier, X= x_train, y= y_train, cv = 10, n_jobs= 1)

How can I get the score I wanted by using KerasRegressor and sklearn pipeline?

I want to insert Keras model into scikit-learn pipeline, but when I use pipeline.score, I am comfused. Here is the code:
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def build_model():
model = models.Sequential()
model.add(
layers.Dense(
64, activation='relu', input_shape=(train_data.shape[1], )))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
model = KerasRegressor(
build_fn=build_model, epochs=90, batch_size=1, verbose=0)
pipe_network = Pipeline([('scl', StandardScaler()), ('clf', model)])
pipe_network.fit(train_data, train_targets)
The model score is:
pipe_network.score(test_data, test_targets)
>>> -12.813292971994802
What's the score is? I want to get the result like the output of evaluate function, How can I do?
stdsc = StandardScaler()
train_data_std = stdsc.fit_transform(train_data)
test_data_std = stdsc.transform(test_data)
network = build_model()
network.fit(train_data_std, train_targets, epochs=90, batch_size=1, verbose=0)
network.evaluate(test_data_std, test_targets)
>>> [12.681396334779029, 2.479423579047708]
Thank you for your attention.

MLP classifier_for multi class

I am newbie on keras,
I try to follow the Keras tutorial for Multilayer Perceptron (MLP) for multi-class softmax classification, using my data set.
My data has 3 classes and only one feature, but I don't understand why the result always show just 0,3 of accuracy and the model predicted all training data as first class. then the confusion matrix is like this.
Confusion matrix
Here the coding:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
import pandas as pd
import numpy as np
# Importing the dataset
dataset = pd.read_csv('StatusAll.csv')
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, 0:1].values
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='tanh', input_dim=1))
model.add(Dropout(0.5))
model.add(Dense(64, activation='tanh'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
history = model.fit(x_train, y_train,
epochs=100,
batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)
print('Test score:', score[0])
print('Test accuracy:', score[1])
from sklearn import metrics
prediction = model.predict(x_test)
prediction = np.around(prediction)
y_test_non_category = [ np.argmax(t) for t in y_test ]
y_predict_non_category = [ np.argmax(t) for t in prediction ]
from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(y_test_non_category, y_predict_non_category)
print (conf_mat)
I hope I can get some advice, thanksss.
The x_train example
x_train
y_train before converted to categorical
enter image description here
Your final Dense layer has 4 outputs, it seems like you are classifying 4 instead of 3.
model.add(Dense(3, activation='softmax')) # Number of classes 3
It would be helpful to see sample data from x_train and y_train to make sure the pre-processing is correct. Because you have only 1 feature, a MLP might be overkill. A decision tree would be simpler unless you want to experiment with MLPs.

Resources