How to predict the class probability with an array - python-3.x

I have an array of data and I'm trying to predict the probability if it's 1 or 0
I have a data set with more than 3000 rows as features and output data is either 1 or 0.
I'm quite new with neural networks, so I found an example online but now I'm having difficulties how to predict with unknown data.
In my case I want to predict the probability of 1 for row variable.
Here's the code
df = pd.read_csv("data.csv")
X = df.iloc[:,10:]
Y = df['output']
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# larger model
def create_larger():
# create model
model = Sequential()
model.add(Dense(60, input_shape=(25,), activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(model=create_larger, epochs=50, batch_size=5, verbose=2)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=5, shuffle=True)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
row = [[4, 0.558, 0.493, 0.954, 0.895, 0.683, 8.7, 26, 0.155, 8.3, 21.8, 0.21, 0.723, 0.548, 0.466, 0.979, 0.887, 0.464, 11.8, 25.5, 0.184, 7.5, 18, 0.217, 0.651]]
scaler = StandardScaler()
row = np.array(row)
scaled_row = pipeline.fit(row)
print(pipeline.predict(scaled_row))
If I run this code I get an error
ValueError: Expected array-like (array or non-string sequence), got None
So now I'm kinda lost what to change it.
Thanks.

Related

dealing with sparse data on TensorFlow 2.2.0

I'm trying to make a Face Recognition app using insightface, I wrote this code on Tensorflow 2.1.0 and Keras 2.3.1 and it worked well but due to some issues I have to migrate to TensorFlow 2.2.0 and Keras 2.4.3, I understand that my problem is my embeddings. they are sparse but in a meaningful way. How can I avoid changing the meaningfulness for my embeddings and avoid the spares data? From the error, I understand (Consider casting elements to a supported type.) that TensorFlow can't convert my np.array to tensor because it is sparse.
what I tried
these arent the commands but I wrote them so you would have a notion of what I tried. np.array(data["embeddings"]).todense(),csr_matrix(data["embeddings"]), tf.convert_to_tensor(data["embeddings"]) and also tried to follow along this but couldn't get to model.fit_generator work.
>>> print(type(embeddings))
<class 'numpy.ndarray'>
>>> print(embeddings.shape)
(49, 512)
>>> print(embeddings)
[[ 0.02751185 0.0143353 0.0324492 ... -0.00347222 0.0154978
-0.01304669]
[ 0.09154768 -0.04196533 0.01197386 ... -0.08363352 0.03335601
0.01748604]
[ 0.00182035 -0.00307933 0.00386595 ... -0.04442558 0.04434329
0.06080627]
...
[-0.01564891 -0.01510727 0.0345119 ... -0.01690779 -0.00816008
0.08056415]
[-0.00543963 -0.03811216 -0.01148985 ... -0.05366111 0.07108331
-0.00186215]
[ 0.00627459 -0.04221528 0.00426272 ... 0.02838095 0.02116473
0.00491964]]
This is my code:
class SoftMax():
def __init__(self, input_shape, num_classes):
self.input_shape = input_shape
self.num_classes = num_classes
def build(self):
#create model
model = Sequential()
#add model layers
model.add(Dense(1024, activation='relu', input_shape=self.input_shape))
model.add(Dropout(0.5))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(self.num_classes, activation='softmax'))
# loss and optimizer
optimizer=Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(loss=categorical_crossentropy,
optimizer=optimizer,
metrics=['accuracy'])
return model
def make_model(args, classifier=SoftMax):
# Load the face embeddings
data = pickle.loads(open(args.embeddings, "rb").read())
num_classes = len(np.unique(data["names"]))
ct = ColumnTransformer([('myŁ”Name', OneHotEncoder(), [0])])
labels = np.array(data["names"]).reshape(-1, 1)
labels = ct.fit_transform(labels)
embeddings = np.array(data["embeddings"])
# Initialize Softmax training model arguments
BATCH_SIZE = 32
EPOCHS = 32
input_shape = embeddings.shape[1]
# Build classifier
init_classifier = classifier(input_shape=(input_shape,), num_classes=num_classes)
model = init_classifier.build()
# Create KFold
cv = KFold(n_splits = 5, random_state = None, shuffle=True)
history = {'acc': [], 'val_acc': [], 'loss': [], 'val_loss': []}
# Train
for train_idx, valid_idx in cv.split(embeddings):
X_train, X_val, y_train, y_val = embeddings[train_idx], embeddings[valid_idx], labels[train_idx], labels[valid_idx]
his = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, validation_data=(X_val, y_val))
# write the face recognition model to output
model.save(args.mymodel)
f = open(args.le, "wb")
f.write(pickle.dumps(LabelEncoder()))
f.close()
Error
TypeError: Failed to convert object of type <class 'tensorflow.python.framework.sparse_tensor.SparseTensor'> to Tensor. Contents: SparseTensor(indices=Tensor("DeserializeSparse:0", shape=(None, 2), dtype=int64), values=Tensor("DeserializeSparse:1", shape=(None,), dtype=float32), dense_shape=Tensor("stack:0", shape=(2,), dtype=int64)). Consider casting elements to a supported type.

Predicting Future values with Keras LSTM

I have created an LSTM sales prediction model that works really well on the train and test sets. I would now like to predict beyond the dates in the entire dataset.
I have tried following this answer how to use the Keras model to forecast for future dates or events? but I really can't figure out how to adjust my code to do future predictions.
Also, I changed my code from
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
to
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 1:8]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 1:8]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
after trying the solution in Keras time series can I predict next 6 month in one time
Here is the code where training and modelling happens:
# changed to initial
for df in m:
train_set, test_set = m[df][0:-6].values, m[df][-6:].values
#apply Min Max Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_set)
# reshape training set
train_set = train_set.reshape(train_set.shape[0], train_set.shape[1])
train_set_scaled = scaler.transform(train_set)
# reshape test set
test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)
#build the LSTM Model
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
print('Fitting model for: {}'.format(df))
#fit our LSTM Model
model = Sequential()
model.add(LSTM(4, batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=500, batch_size=1, verbose=1, shuffle=False)
# model.save('lstm_model.h5')
print('Predictions for: {}'.format(df))
#check prediction
y_pred = model.predict(X_test,batch_size=1)
print('Inverse Transform for: {}'.format(df))
#inverse transformation to see actual sales
#reshape y_pred
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
#rebuild test set for inverse transform
pred_test_set = []
for index in range(0,len(y_pred)):
print (np.concatenate([y_pred[index],X_test[index]],axis=1))
pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))
#reshape pred_test_set
pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])
#inverse transform
pred_test_set_inverted = scaler.inverse_transform(pred_test_set)
I would like the predictions to go beyond the data in the dataset.
UPDATE: I trained the model and took its predictions on the test set. Use these as input for another LSTM model to fit and predict for 12 months. It worked for me. Also changed my last Dense layer (above) to predict 1 point at a time instead of 7 as I had before.
Below is the code:
from numpy import array
for df in d:
if df in list_df:
# df_ADIDAS DYN PUL DEO 150 FCA5421
#KEEP
result_list = []
sales_dates = list(d["{}".format(df)][-7:].Month)
act_sales = list(d["{}".format(df)][-7:].Sale)
for index in range(0,len(pred_test_set_inverted)):
result_dict = {}
result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index]) #change to 0 ffrom act_sales[index]
result_dict['date'] = sales_dates[index] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW
result_list.append(result_dict)
df_result = pd.DataFrame(result_list)
predictions = list(df_result['pred_value'])
forecasts = []
result_list
for i in range(len(result_list)):
forecasts.append(result_list[i]['pred_value'])
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the sequence
if end_ix > len(sequence)-1:
break
# gather input and output parts of the pattern
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(forecasts, n_steps)
# summarize the data
# for i in range(len(X)):
# print(X[i], y[i])
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=200, verbose=0)
# demonstrate prediction
x_input = array(predictions[-4:])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
#print(yhat)
currentStep = yhat[:, -1:]
print('Twelve Month Prediction for {}'.format(df))
for i in range(12):
if i == 0:
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)
else:
x0_input = np.append(x_input, [currentStep[i-1]])
x0_input = x0_input.reshape((1, n_steps+1, n_features))
x_input = x0_input[:,1:]
yhat = model.predict(x_input)
currentStep = np.append(currentStep, yhat[:,-1:])
print(yhat)
Your last Dense layer says that you are predicting 7 points at a time. Save those predictions and feed them to the model again to predict next 7. That makes it 14 predictions simultaneously. And so on. Or change the number of nodes and shape of y from 7 to corresponding number and train again.

KerasClassifier for use with build_fn which takes arguments

I am attempting to wrap my keras models in scikit learn GridSearchCV and Pipeline structures for hyperparameter tuning.
It works absolutely fine when the build_fn function takes 0 arguments for use in KerasClassifier. However it fails whenever I use a function which takes arguments.
Example code below
def prepare_classifier(x, y):
shape_of_input = x.shape
shape_of_target = y.shape
classifier = Sequential()
## number of neurons = 30
## kernel_initializer determines how the weights are initialized
## activation is the activation function at this particular hidden layer
## input_shape is the number of features in a single row.. in this case it is shape_of_input[1]
## shape_of_input[0] is the total number of such rows
classifier.add(Dense(units = 30, activation = 'relu', kernel_initializer = 'uniform', input_dim = shape_of_input[1]))
classifier.add(Dense(units = 30, activation = 'relu', kernel_initializer = 'uniform'))
## we are predicting 10 digits for each row of x.
## in total there are shape_of_input[0] rows in total
classifier.add(Dense(10, activation = 'softmax'))
## categorical_crossentropy is the loss function for multi output loss function
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
def fit(classifier, x_train, y_train, epoch_size, batch_size = 10):
pipeline = Pipeline([
('keras_classifier', classifier)
])
param_grid = {
'keras_classifier__batch_size' : [10,20,30,50],
'keras_classifier__epochs' : [100, 200, 300],
'keras_classifier__x' : [x_train],
'keras_classifier__y' : [y_train],
}
grid = GridSearchCV(estimator = pipeline, param_grid = param_grid, n_jobs = -1)
grid.fit(x_train, y_train)
print("Best parameters are : ", grid.best_params_, '\n grid best score :', grid.best_score_)
classifier = KerasClassifier(build_fn = prepare_classifier, x = x_train[0:100], y = y_train )
fit(classifier, x_train[:100], y_train, epoch_size )
This is for some x, and some y data (p.s. I have used mnist data)
The error I get is :
RuntimeError: Cannot clone object , as the constructor either does not set or modifies parameter x
However if my prepare_classifier function takes no arguments code works absolutely fine.
What am I doing incorrectly?
solved it. essentially the below line was the issue
classifier = KerasClassifier(build_fn = prepare_classifier, x = x_train[0:100], y = y_train )
needed to be changed to
classifier = KerasClassifier(build_fn = prepare_classifier)
and the parameters for the prepare_classifier needs to be sent using param_grid

Keras ValueError: expected dense_1 to have shape (3,) but got array with shape (4,)

I've been trying to figure out how to use Keras when reading data from a .csv[1]. I have the following code:
dataframe = pd.read_csv("iris.csv", header=None)
dataset = dataframe.values
X = dataset[:, 0:4].astype(float)
Y = dataset[:, 4]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
one_hot_y = keras.utils.to_categorical(encoded_Y)
X_train = X[:100]
X_test = X[50:]
Y_train = one_hot_y[:100]
Y_test = one_hot_y[50:]
print(X_train.shape)
# define baseline model
def baseline_model():
# create model
model = keras.models.Sequential()
model.add(keras.layers.Dense(8, input_shape=(4, ), activation='relu'))
model.add(keras.layers.Dense(3, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
history = model.fit(X_train, X_test, epochs=5)
test_loss, test_acc = model.evaluate(Y_train, Y_test)
print('Test accuracy:', test_acc)
However, when I run this, I get the error:
ValueError: Error when checking target: expected dense_1 to have shape (3,) but got array with shape (4,)
I find this odd as I was sure to set input_shape=(4, ). Any help with this would be appreciated.
[1] The CSV looks as follows:
5.1,3.5,1.4,0.2,Iris-setosa
...
You have only 3 output neurons, but the data you are using obviously has 4 classes, so you need to change this line:
model.add(keras.layers.Dense(3, activation='softmax'))
from 3 output classes to 4 output classes:
model.add(keras.layers.Dense(4, activation='softmax'))

Wrong dimensions when trying to use model.predict() from Keras

I think the code will speak for itself, but i trained a model, that i now wanna use to predict on some new input data. The new input data seems to be the wrong dimensions though. Below you can see the code and error messages for both the model and the predicting (attempted)
tokenizer = Tokenizer(num_words=10000)
df = pd.read_csv('/home/paperspace/Sentiment Analysis Dataset.csv', index_col = 0,
error_bad_lines = False)
y = list(df['Sentiment'])
tokenizer.fit_on_texts(list(df['SentimentText']))
X = tokenizer.texts_to_sequences(list(df['SentimentText']))
X = pad_sequences(X)
print("Done, fitting on texts.")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle = True)
model = Sequential()
#Creates the wordembeddings.
embedding_vector_dim = 32
model.add(Embedding(10000, embedding_vector_dim, input_length=X.shape[1]))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
model.fit(numpy.array(X_train), numpy.array(y_train),
batch_size=128,
epochs=1,
validation_data=(numpy.array(X_test), numpy.array(y_test)))
score, acc = model.evaluate(numpy.array(X_test),numpy.array(y_test),
batch_size=128)
model.save('./sentiment_seq.h5')
print('Test score:', score)
print('Test accuracy:', acc)
Now for the trying to predict and error message.
text = "this is actually a very bad movie."
tokenizer = Tokenizer()
tokenizer.fit_on_texts(list(text))
X = tokenizer.texts_to_sequences(list(text))
X = pad_sequences(X)
X_flat = np.array([X.flatten()])
model = load_model('sentiment_test.h5')
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print(model.predict(X, batch_size = 1, verbose = 1))
ValueError: Error when checking : expected embedding_1_input to have shape (None, 116) but got array with shape (1, 38)
So basically why am i getting this error, when preprocessing is the same when training and predicting, and how can i know what the expected input should be before seeing the error message?
If you're not working with a fixed input length, you should not define an input_length in the embedding layer.

Resources