cross_val_score's n_jobs = -1 argument not working in python 3.6 - python-3.x

i was trying to improve the accuracy and evaluate my Artificial Neural Network, but i encountered an issue that n_jobs = -1 of cross_val_score was not working,
i am using tensorflow on my cpu and my error was:-
BrokenProcessPool: A task has failed to un-serialize. Please ensure
that the arguments of the function are all picklable.
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
def build_classifier():
classifier = Sequential()
classifier.add(Dense(6, kernel_initializer='uniform', activation='relu', input_dim=11))
classifier.add(Dense(6, kernel_initializer='uniform', activation='relu'))
classifier.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, nb_epoch = 100)
accuracies = cross_val_score(estimator= classifier, X= x_train, y= y_train, cv = 10, n_jobs= 1)

Related

Get confusion matrix from a Keras model

I have the following NN model using Keras:
import numpy as np
from keras import Sequential
from keras.layers import Dense
path = 'pima-indians-diabetes.data.csv'
dataset = np.loadtxt(path, delimiter=",")
X = dataset[:,0:8]
Y = dataset[:,8]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
model = Sequential()
model.add(Dense(16, input_dim=8, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test))
Kindly, is it possible to extract the confusion matrix? How?
You can use scikit-learn:
y_pred = model.predict(X_test)
confusion_matrix = sklearn.metrics.confusion_matrix(y_test, np.rint(y_pred))
It can be done using TensorFlow (which is almost Keras =)).
You start by making predictions on your test set with your trained model:
predictions = model.predict(x_test)
Then you can import TensorFlow and use its confusion_matrix method as follows.
import tensorflow as tf
conf_matrix = tf.math.confusion_matrix(labels=y_test,
predictions=predictions)
More information in the TensorFlow documentation.

How can I get the score I wanted by using KerasRegressor and sklearn pipeline?

I want to insert Keras model into scikit-learn pipeline, but when I use pipeline.score, I am comfused. Here is the code:
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def build_model():
model = models.Sequential()
model.add(
layers.Dense(
64, activation='relu', input_shape=(train_data.shape[1], )))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
model = KerasRegressor(
build_fn=build_model, epochs=90, batch_size=1, verbose=0)
pipe_network = Pipeline([('scl', StandardScaler()), ('clf', model)])
pipe_network.fit(train_data, train_targets)
The model score is:
pipe_network.score(test_data, test_targets)
>>> -12.813292971994802
What's the score is? I want to get the result like the output of evaluate function, How can I do?
stdsc = StandardScaler()
train_data_std = stdsc.fit_transform(train_data)
test_data_std = stdsc.transform(test_data)
network = build_model()
network.fit(train_data_std, train_targets, epochs=90, batch_size=1, verbose=0)
network.evaluate(test_data_std, test_targets)
>>> [12.681396334779029, 2.479423579047708]
Thank you for your attention.

ValueError: ('Some keys in session_kwargs are not supported at this time: %s', dict_keys(['class_mode'])

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.datasets import mnist
import numpy
model = Sequential()
model.add(Dense(500,input_shape=(784,))) # 28*28=784
model.add(Activation('tanh')) # tanh
model.add(Dropout(0.5)) # 50% dropout
model.add(Dense(500)) # 500个
model.add(Activation('tanh'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, class_mode='categorical')
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])
Y_train = (numpy.arange(10) == y_train[:, None]).astype(int)
Y_test = (numpy.arange(10) == y_test[:, None]).astype(int)
model.fit(X_train,Y_train,batch_size=200,epochs=50,shuffle=True,verbose=0,validation_split=0.3)
model.evaluate(X_test, Y_test, batch_size=200, verbose=0)
print("test set")
scores = model.evaluate(X_test,Y_test,batch_size=200,verbose=0)
print("")
print("The test loss is %f" % scores)
result = model.predict(X_test,batch_size=200,verbose=0)
I found this post Error when profiling keras models, which modifies the tensorflow library.
So, I checked Keras library code from the link. But could not find anything like ['class_mode'] to modify the keras library. Next, I tried running the code after re-installing keras, but even that didn't work.
I used anaconda to import Kreas, maybe I install wrong?
Can anyone suggest a solution for this?
remove class_mode='categorical',it runs

Python- LSTM Based RNN error in input?

i am trying to build a deep learning network based on LSTM RNN
here is what is tried
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
import numpy as np
train = np.loadtxt("TrainDatasetFinal.txt", delimiter=",")
test = np.loadtxt("testDatasetFinal.txt", delimiter=",")
y_train = train[:,7]
y_test = test[:,7]
train_spec = train[:,6]
test_spec = test[:,6]
model = Sequential()
model.add(LSTM(32, input_shape=(1415684, 8)))
model.add(LSTM(64, input_dim=1, input_length=1415684, return_sequences=True))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(train_spec, y_train, batch_size=2000, nb_epoch=11)
score = model.evaluate(test_spec, y_test, batch_size=2000)
but it gets me the following error
ValueError: Input 0 is incompatible with layer lstm_2: expected ndim=3, found ndim=2
Here is a sample from the dataset
(Patient Number, time in millisecond, accelerometer x-axis,y-axis, z-axis,magnitude, spectrogram,label (0 or 1))
1,15,70,39,-970,947321,596768455815000,0
1,31,70,39,-970,947321,612882670787000,0
1,46,60,49,-960,927601,602179976392000,0
1,62,60,49,-960,927601,808020878060000,0
1,78,50,39,-960,925621,726154800929000,0
in the dataset i am using the only the spectrogram as input feature and the label (0 or 1) as the output
the total traing samples is 1,415,684

Replicate MLPClassifier() of sklearn in keras

I am new to keras. I was attempting an ML problem.
About the data:
It has 5 input features, 4 output classes and about 26000 records.
I had first attempted it using MLPClassifier() as follows:
clf = MLPClassifier(verbose=True, tol=1e-6, batch_size=300, hidden_layer_sizes=(200,100,100,100), max_iter=500, learning_rate_init= 0.095, solver='sgd', learning_rate='adaptive', alpha = 0.002)
clf.fit(train, y_train)
After testing, I usually got a LB score around 99.90. To gain more flexibility over the model, I decided to implement the same model in Keras to start with and then make changes in it in an attempt to increase the LB score. I came up with the following:
model = Sequential()
model.add(Dense(200, input_dim=5, init='uniform', activation = 'relu'))
model.add(Dense(100, init='uniform', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(100, init='uniform', activation='relu'))
model.add(Dense(100, init='uniform', activation='relu'))
model.add(Dense(4, init='uniform', activation='softmax'))
lrate = 0.095
decay = lrate/125
sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
hist = model.fit(train, categorical_labels, nb_epoch=125, batch_size=256, shuffle=True, verbose=2)
The model seems pretty similar to the MLPClassifier() model but the LB scores were pretty disappointing at around 97.
Can somebody please tell what exactly was wrong with this model? Or how can we replicate the MLPClassifier model in keras. I think regularisation might be one of the factors that went wrong here.
Edit 1: Loss curve:
Edit 2:
Here is the code:
#import libraries
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import MinMaxScaler, scale, StandardScaler, Normalizer
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import regularizers
from keras.optimizers import SGD
#load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
#generic preprocessing
#encode as integer
mapping = {'Front':0, 'Right':1, 'Left':2, 'Rear':3}
train = train.replace({'DetectedCamera':mapping})
test = test.replace({'DetectedCamera':mapping})
#renaming column
train.rename(columns = {'SignFacing (Target)': 'Target'}, inplace=True)
mapping = {'Front':0, 'Left':1, 'Rear':2, 'Right':3}
train = train.replace({'Target':mapping})
#split data
y_train = train['Target']
test_id = test['Id']
train.drop(['Target','Id'], inplace=True, axis=1)
test.drop('Id',inplace=True,axis=1)
train_train, train_test, y_train_train, y_train_test = train_test_split(train, y_train)
scaler = StandardScaler()
scaler.fit(train_train)
train_train = scaler.transform(train_train)
train_test = scaler.transform(train_test)
test = scaler.transform(test)
#training and modelling
model = Sequential()
model.add(Dense(200, input_dim=5, kernel_initializer='uniform', activation = 'relu'))
model.add(Dense(100, kernel_initializer='uniform', activation='relu'))
# model.add(Dropout(0.2))
# model.add(Dense(100, init='uniform', activation='relu'))
# model.add(Dense(100, init='uniform', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(100, kernel_initializer='uniform', activation='relu'))
model.add(Dense(100, kernel_initializer='uniform', activation='relu'))
model.add(Dense(4, kernel_initializer='uniform', activation='softmax'))
lrate = 0.095
decay = lrate/250
sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
hist = model.fit(train_train, categorical_labels, validation_data=(train_test, categorical_labels_test), nb_epoch=100, batch_size=256, shuffle=True, verbose=2)
Edit 3: These are the files:
train.csv
test.csv
To get a bona fide scikit estimator you can use KerasClassifier from tensorflow.keras.wrappers.scikit_learn. For example:
from sklearn.datasets import make_classification
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
X, y = make_classification(
n_samples=26000, n_features=5, n_classes=4, n_informative=3, random_state=0
)
def build_fn(optimizer):
model = Sequential()
model.add(
Dense(200, input_dim=5, kernel_initializer="he_normal", activation="relu")
)
model.add(Dense(100, kernel_initializer="he_normal", activation="relu"))
model.add(Dense(100, kernel_initializer="he_normal", activation="relu"))
model.add(Dense(100, kernel_initializer="he_normal", activation="relu"))
model.add(Dense(4, kernel_initializer="he_normal", activation="softmax"))
model.compile(
loss="categorical_crossentropy",
optimizer=optimizer,
metrics=[
keras.metrics.Precision(name="precision"),
keras.metrics.Recall(name="recall"),
keras.metrics.AUC(name="auc"),
],
)
return model
clf = KerasClassifier(build_fn, optimizer="rmsprop", epochs=500, batch_size=300)
clf.fit(X, y)
clf.predict(X)

Resources