How to include normalization of features in Keras regression model? - python-3.x

I have a data for a regression task.
The independent features(X_train) are scaled with a standard scaler.
Built a Keras sequential model adding hidden layers. Compiled the model.
Then fitting the model with model.fit(X_train_scaled, y_train )
Then I saved the model in a .hdf5 file.
Now how to include the scaling part inside the saved model,
so that the same scaling parameters can be applied to unseen test data.
#imported all the libraries for training and evaluating the model
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42)
sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled= sc.transform (X_test)
def build_model():
model = keras.Sequential([layers.Dense(64, activation=tf.nn.relu,input_shape=[len(train_dataset.keys())]),
layers.Dense(64, activation=tf.nn.relu),
layers.Dense(1)
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mean_squared_error',
optimizer=optimizer,
metrics=['mean_absolute_error', 'mean_squared_error'])
return model
model = build_model()
EPOCHS=1000
history = model.fit(X_train_scaled, y_train, epochs=EPOCHS,
validation_split = 0.2, verbose=0)
loss, mae, mse = model.evaluate(X_test_scaled, y_test, verbose=0)

Related

How to make a prediction on a RNN without training it every time [duplicate]

This question already has answers here:
How to save/restore a model after training?
(29 answers)
Closed 6 months ago.
I am new to Neural networks and I have successfully trained an RNN but it takes a while to train the data. It would not be feasible for me to train the data every time I want to make a prediction. So the question is, how do I make the training data persistent in that the RNN does not have to train every time every time I make a prediction?
This is the code I am using...
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.utils.data_utils import pad_sequences
class Predict:
TrainingData = None
XScale = None
def __init__(self,PredData,TrainingData):
PreparedData = self.PrepareData(PredData,TrainingData)
#set the training data
self.Train(PreparedData)
#split all of the training data into their respective variables
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(self.XScale, self.Y, test_size = 0.6)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size = 0.6)
#define the model
self.DefineModel(X_train, Y_train, X_val, Y_val)
#do the prediction
return self.predict(PredData)
def PrepareData(self, PredData,TrainingData):
LNumber = 0
for I in TrainingData:
if(len(I) > LNumber):
LNumber = len(I)
PadData = pad_sequences(TrainingData, maxlen = LNumber, padding = 'post', truncating = 'post')
return PadData
def Train(self,TrainData):
min_max_scaler = preprocessing.MinMaxScaler()
self.X = TrainData[0:10]
self.Y = TrainData[-10:]
self.XScale = min_max_scaler.fit_transform(self.X)
def DefineModel(self,X_train, T_train, X_val, Y_val):
self.model = Sequential([
Dense(32, activation = 'relu', input_shape = (10,)),
Dense(32, activation = 'relu'),
Dense(1, activation = 'sigmoid'),
])
self.model.compile( optimizer = 'sgd',
loss = 'binary_crossentropy',
metrics = ['accuracy']
)
self.model.fit( X_train, Y_train,
batch_size = 32, epochs = 100,
validation_data = (X_val, Y_val))
def predict(self,PredData):
Self.Prediction = model.predict(PredData)
As suggested in the comments you can use SavedModel to save your entire architecture + weights. I suggest you having a look at this page on how to Save and load Keras models.
Basically you just need to save like this:
self.model.save('path/to/location')
And to restore later on:
from tensorflow import keras
model = keras.models.load_model('path/to/location')
Also if your training is quite long, you can also think about saving checkpoints of the best model so far, using the callback tf.keras.callbacks.ModelCheckpoint. You create the callback and add it to your fit:
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
save_best_only=True)
# Model weights are saved at the end of every epoch, if it's the best seen so far.
model.fit(X_train, Y_train, batch_size=32, epochs=100,
validation_data=(X_val, Y_val), callbacks=[model_checkpoint_callback])
If your train crashes or got interrupted you can re-load the best weights like this:
# The model weights (that are considered the best) are loaded into the model.
model.load_weights(checkpoint_filepath)
Note that if you are re-starting your script you will have to re-create the model object: load_weights does not re-create the architecture for you like it does load_model. But it is just a matter, in your case, of doing first:
self.model = Sequential([
Dense(32, activation = 'relu', input_shape = (10,)),
Dense(32, activation = 'relu'),
Dense(1, activation = 'sigmoid'),
])

Keras NN model isn't compatible with sklearn's votingClassifier

I'm trying to ensemble my models and use a votingclassifier from sklearn to get an accuracy score. Right now, my keras model (NN) doesn't fit in the ensemble fitting. Here's my code:
I've tried using skLearn's NN, kerasClassifier. Basically, i've ran out of options.
def multiLayerPerceptionModel(nb_epochs, hidden_1, hidden_2,
learn_rate, batch_size, num_input, num_classes, path_log, X_Train,
X_Test, Y_Train, Y_Test):
model_RN = keras.Sequential([
keras.layers.Dense(hidden_1, activation=tf.nn.relu),
keras.layers.Dense(hidden_2, activation=tf.nn.relu),
keras.layers.Dense(num_classes, activation='softmax'),
BatchNormalization(),
])
model_RN.add(layers.Dense(num_classes, activation='softmax'))
model_RN.compile(optimizer=SGD(learn_rate),loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model_RN.fit(X_Train, Y_Train, validation_data
(X_Test,Y_Test),epochs=nb_epochs, batch_size=batch_size, callbacks=
[tensorboard])
model_RN.add(Flatten())
TypeError: Cannot clone object
'<tensorflow.python.keras.engine.sequential.Sequential object at
0x11778d400>' (type <class
'tensorflow.python.keras.engine.sequential.Sequential'>): it does
not seem to be a scikit-learn estimator as it does not implement a
'get_params' methods.

Get confusion matrix from a Keras model

I have the following NN model using Keras:
import numpy as np
from keras import Sequential
from keras.layers import Dense
path = 'pima-indians-diabetes.data.csv'
dataset = np.loadtxt(path, delimiter=",")
X = dataset[:,0:8]
Y = dataset[:,8]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
model = Sequential()
model.add(Dense(16, input_dim=8, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test))
Kindly, is it possible to extract the confusion matrix? How?
You can use scikit-learn:
y_pred = model.predict(X_test)
confusion_matrix = sklearn.metrics.confusion_matrix(y_test, np.rint(y_pred))
It can be done using TensorFlow (which is almost Keras =)).
You start by making predictions on your test set with your trained model:
predictions = model.predict(x_test)
Then you can import TensorFlow and use its confusion_matrix method as follows.
import tensorflow as tf
conf_matrix = tf.math.confusion_matrix(labels=y_test,
predictions=predictions)
More information in the TensorFlow documentation.

Decoding Keras Multiclass Classifications

I am preparing input to feed into a Keras Neural network for a multiclass problem as:
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
X_train, X_test, y_train, y_test = train_test_split(X, dummy_y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.06, random_state=42)
After having trained the model, I try to run the following lines to obtain a prediction that reflects the original class names:
y_pred = model.predict_classes(X_test)
y_pred = encoder.inverse_transform(y_pred)
y_test = np.argmax(y_test, axis = 1)
y_test = encoder.inverse_transform(y_test)
However, I obtain surpisingly low levels of accuracy (0.36), as oppoes to training and validations, that reach 0.98. Is this the right way of transforming classes back into the original labels?
I compute accuracies as:
# For training
history.history['acc']
# For testing
accuracy_score(y_test, y_pred)

MLP classifier_for multi class

I am newbie on keras,
I try to follow the Keras tutorial for Multilayer Perceptron (MLP) for multi-class softmax classification, using my data set.
My data has 3 classes and only one feature, but I don't understand why the result always show just 0,3 of accuracy and the model predicted all training data as first class. then the confusion matrix is like this.
Confusion matrix
Here the coding:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
import pandas as pd
import numpy as np
# Importing the dataset
dataset = pd.read_csv('StatusAll.csv')
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, 0:1].values
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='tanh', input_dim=1))
model.add(Dropout(0.5))
model.add(Dense(64, activation='tanh'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
history = model.fit(x_train, y_train,
epochs=100,
batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)
print('Test score:', score[0])
print('Test accuracy:', score[1])
from sklearn import metrics
prediction = model.predict(x_test)
prediction = np.around(prediction)
y_test_non_category = [ np.argmax(t) for t in y_test ]
y_predict_non_category = [ np.argmax(t) for t in prediction ]
from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(y_test_non_category, y_predict_non_category)
print (conf_mat)
I hope I can get some advice, thanksss.
The x_train example
x_train
y_train before converted to categorical
enter image description here
Your final Dense layer has 4 outputs, it seems like you are classifying 4 instead of 3.
model.add(Dense(3, activation='softmax')) # Number of classes 3
It would be helpful to see sample data from x_train and y_train to make sure the pre-processing is correct. Because you have only 1 feature, a MLP might be overkill. A decision tree would be simpler unless you want to experiment with MLPs.

Resources