InvalidArgumentError (see above for traceback):logits and labels must be broadcastable: logits_size=[183,2] labels_size=[20,2] - python-3.x

This is a learning-based rna and disease prediction code using cnn that I downloaded from github. The output is accuracy and auc values, but the result is very unstable (occasionally 0.3, occasionally 0.8).
I don't know what the reason is, but the division of training set and verification set in this article is a self-defined function, so I want to try 10 cross-verification. However, when I write the cross-validation code, the problem as shown in the title appears.
This is the code that divides the training set and the verification set in the source code.
def get_data(args):
input_data, input_label = dh.get_samples(args)
input_data = standard_scale(input_data)
dev_sample_percentage = args.dev_percentage
test_sample_percentage = args.test_percentage
x = np.array(input_data)
Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(input_label)))
input_data = [x[i] for i in shuffle_indices]
input_label = [input_label[i] for i in shuffle_indices]
dev_sample_index = -2 * int(dev_sample_percentage * float(len(input_label)))
test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
x_train, x_dev, test_data = input_data[:dev_sample_index], input_data[dev_sample_index:test_sample_index], input_data[test_sample_index:]
y_train, y_dev, test_label = input_label[:dev_sample_index], input_label[dev_sample_index:test_sample_index], input_label[test_sample_index:]
return x_train, x_dev, test_data, y_train, y_dev, test_label
This is my modified code.
def get_data(args):
input_data, input_label = dh.get_samples(args)
input_data = standard_scale(input_data)
dev_sample_percentage = args.dev_percentage
test_sample_percentage = args.test_percentage
x = np.array(input_data)
y = np.array(input_label)
kf = KFold(n_splits=10)
d = kf.split(x)
for train_idx, test_idx in d:
x_train = x[train_idx]
x_dev = x[test_idx]
l=kf.split(y)
for train_idx ,test_idx in l:
y_train=y[train_idx]
y_dev=y[test_idx]
test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
test_data=input_data[test_sample_index:]
test_lable=input_label[test_sample_index:]
return x_train,x_dev,y_train, y_dev,test_data,test_lable
This is a screenshot of the error.
This is the complete code of this part.
#! /usr/bin/env python
import tensorflow as tf
import numpy as np
import os
import argparse
import data_helpers as dh
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve
from sklearn import metrics
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
import sklearn.preprocessing as prep
from sklearn.metrics import average_precision_score
from sklearn.model_selection import KFold
def parse_args():
parser = argparse.ArgumentParser(description="Run CNN.")
## the input file
##disease-gene relationships and miRNA-gene relatiohships
parser.add_argument('--input_disease_miRNA', nargs='?', default='..\..\data\CNN\disease-miro-1024-sigmoid.csv',
help='Input disease_gene_relationship file')
parser.add_argument('--input_label',nargs = '?',default='..\..\data\CNN\label.csv',
help='sample label')
parser.add_argument('--batch_size', nargs='?', default=64,
help = 'number of samples in one batch')
parser.add_argument('--training_epochs', nargs='?', default=1,
help= 'number of epochs in SGD')
parser.add_argument('--display_step', nargs='?', default=10)
parser.add_argument('--test_percentage', nargs='?', default=0.1,
help='percentage of test samples')
parser.add_argument('--dev_percentage', nargs='?', default=0.1,
help='percentage of validation samples')
parser.add_argument('--L2_norm', nargs='?', default=0.001,
help='percentage of validation samples')
parser.add_argument('--keep_prob', nargs='?', default=0.5,
help='keep_prob when using dropout option')
parser.add_argument('--optimizer', nargs='?', default=tf.train.AdamOptimizer,
help='optimizer for learning weights')
parser.add_argument('--learning_rate', nargs='?', default=1e-3,
help='learning rate for the SGD')
return parser.parse_args()
def standard_scale(X_train):
preprocessor = prep.StandardScaler().fit(X_train)
X_train = preprocessor.transform(X_train)
return X_train
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev= 0.1)
weights = tf.Variable(initial)
return weights
def bias_variable(shape):
initial = tf.constant(0.1, shape = shape)
return tf.Variable(initial)
def conv2d(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1], padding= "VALID")
def max_pool_2(x, W):
return tf.nn.max_pool(x, ksize = W, strides= [1,10,1,1], padding= "VALID")
def get_data(args):
input_data, input_label = dh.get_samples(args)
input_data = standard_scale(input_data)
dev_sample_percentage = args.dev_percentage
test_sample_percentage = args.test_percentage
x = np.array(input_data)
y = np.array(input_label)
kf = KFold(n_splits=10)
d = kf.split(x)
for train_idx, test_idx in d:
x_train = x[train_idx]
x_dev = x[test_idx]
l=kf.split(y)
for train_idx ,test_idx in l:
y_train=y[train_idx]
y_dev=y[test_idx]
test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
test_data=input_data[test_sample_index:]
test_lable=input_label[test_sample_index:]
return x_train,x_dev,y_train, y_dev,test_data,test_lable
# # Randomly shuffle data
# np.random.seed(10)
# shuffle_indices = np.random.permutation(np.arange(len(input_label)))
# input_data = [x[i] for i in shuffle_indices]
# input_label = [input_label[i] for i in shuffle_indices]
# dev_sample_index = -2 * int(dev_sample_percentage * float(len(input_label)))
# test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
# x_train, x_dev, test_data = input_data[:dev_sample_index], input_data[dev_sample_index:test_sample_index], input_data[test_sample_index:]
# y_train, y_dev, test_label = input_label[:dev_sample_index], input_label[dev_sample_index:test_sample_index], input_label[test_sample_index:]
#
# return x_train, x_dev, test_data, y_train, y_dev, test_label
def deepnn(x, keep_prob, args):
with tf.name_scope('reshape'):
x = tf.reshape(x, [-1, 1024, 1, 1])
with tf.name_scope('conv_pool'):
filter_shape = [4, 1, 1, 4]
W_conv = weight_variable(filter_shape)
b_conv = bias_variable([4])
h_conv = tf.nn.relu(conv2d(x, W_conv) + b_conv)
h_pool = tf.nn.max_pool(h_conv, ksize = [1, 4, 1, 1], strides= [1,4,1,1], padding= "VALID")
# filter_shape2 = [4,1,4,4]
# W_conv2 = weight_variable(filter_shape2)
# b_conv2 = bias_variable([4])
# h_conv2 = tf.nn.relu(conv2d(h_pool, W_conv2) + b_conv2)
# h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1,4,1,1], strides= [1,4,1,1],padding="VALID")
regula = tf.contrib.layers.l2_regularizer(args.L2_norm)
h_input1 = tf.reshape(h_pool,[-1, 255 * 4])
W_fc1 = weight_variable([255* 4, 50])
b_fc1 = bias_variable([50])
h_input2 = tf.nn.relu(tf.matmul(h_input1, W_fc1) + b_fc1)
h_keep = tf.nn.dropout(h_input2, keep_prob)
W_fc2 = weight_variable([50, 2])
b_fc2 = bias_variable([2])
h_output = tf.matmul(h_keep, W_fc2) + b_fc2
regularizer = regula(W_fc1) + regula(W_fc2)
return h_output, regularizer
def main(args):
with tf.device('/cpu:0'):
x_train, x_dev, test_data, y_train, y_dev, test_label = get_data(args)
input_data = tf.placeholder(tf.float32, [None, 1024])
input_label = tf.placeholder(tf.float32, [None, 2])
keep_prob = tf.placeholder(tf.float32)
y_conv, losses = deepnn(input_data, keep_prob, args)
y_res = tf.nn.softmax(y_conv)
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=input_label)
cross_entropy = tf.reduce_mean(cross_entropy)
los = cross_entropy + losses
with tf.name_scope('optimizer'):
optimizer = args.optimizer
learning_rate = args.learning_rate
train_step = optimizer(learning_rate).minimize(los)
# optimizer = tf.train.MomentumOptimizer(learning_rate= 0.02, momentum=)
# train_step = optimizer.minimize(cross_entropy)
with tf.name_scope('accuracy'):
predictions = tf.argmax(y_conv, 1)
correct_predictions = tf.equal(predictions, tf.argmax(input_label, 1))
correct_predictions = tf.cast(correct_predictions, tf.float32)
accuracy = tf.reduce_mean(correct_predictions)
batch_size = args.batch_size
num_epochs = args.training_epochs
display_step = args.display_step
k_p = args.keep_prob
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batches = dh.batch_iter(list(zip(x_train, y_train)), batch_size, num_epochs)
for i, batch in enumerate(batches):
x_batch, y_batch = zip(*batch)# 按batch把数据拿进来
train_step.run(feed_dict={input_data: x_batch, input_label: y_batch, keep_prob: k_p})
if i % display_step == 0:
loss = sess.run(los, feed_dict={input_data: x_train, input_label: y_train, keep_prob: 1.0})
#print('after training loss = %f' % loss)
y_predict = sess.run(y_res, feed_dict={input_data: x_dev, input_label: y_dev, keep_prob: 1.0})[:, 1]
loss = sess.run(los, feed_dict={input_data: x_dev, input_label: y_dev, keep_prob: 1.0})
#print('test loss = %f' % loss)
false_positive_rate1, true_positive_rate1, thresholds1 = roc_curve(np.array(y_dev)[:, 1], y_predict)
roc_auc1 = auc(false_positive_rate1, true_positive_rate1)
# print(roc_auc1)
###print(accuracy.eval(feed_dict={input_data: x_train, input_label:y_train, keep_prob: 1.0}))
print('accuracy=',accuracy.eval(feed_dict={input_data: test_data, input_label: test_label, keep_prob: 1.0}))
y_predict = sess.run(y_res, feed_dict={input_data: test_data, input_label: test_label, keep_prob: 1.0})[:, 1]
false_positive_rate1, true_positive_rate1, thresholds1 = roc_curve(np.array(test_label)[:, 1], y_predict)
roc_auc1 = auc(false_positive_rate1, true_positive_rate1)
print('roc_auc1=',roc_auc1)
# plt.figure()
# lw=2
# plt.title("ROC curve of %s (AUC = %.4f)")
# plt.xlabel("False Positive Rate")
# plt.ylabel("True Positive Rate")
# plt.plot(false_positive_rate1 , true_positive_rate1) # use pylab to plot x and y
# plt.show() # show the plot on the screen
#
# plt.show()
# np.savetxt("result_fp_tp_md_aver.txt", roc_curve(np.array(test_label)[:, 1], y_predict))
# precision, recall ,_ = precision_recall_curve(np.array(test_label)[:, 1], y_predict)
# #
# average_precision = average_precision_score(np.array(test_label)[:, 1], y_predict)
# #
# print('Average precision-recall score: {0:0.2f}'.format(average_precision))
# y_predict[y_predict >= 0.5] = 1
# y_predict[y_predict < 0.5] = 0
# print(y_predict)
# print(metrics.f1_score(np.array(test_label)[:, 1], y_predict))
# np.savetxt("precision_aver.txt", precision)
# np.savetxt("recall_aver.txt", recall)
if __name__ == '__main__':
args = parse_args()
main(args)
please help me!!! thanks a lot!!!

Related

How do I use a pt file in Pytorch to predict the label of a new data?

This is my training model run.py, my data is a one-dimensional matrix with one row and one category.
import numpy as np # linear algebra
import pandas as pd
import os
for dirname, _, filenames in os.walk('./kaggle'):
for filename in filenames:
print(os.path.join(dirname, filename))
import torch
from torch.utils.data import DataLoader
from torch import nn,optim
import sys
from tqdm import tqdm
import io
import torch.utils.model_zoo as model_zoo
import torch.onnx
def my_DataLoader(train_root,test_root,batch_size = 100, val_split_factor = 0.2):
train_df = pd.read_csv(train_root, header=None)
test_df = pd.read_csv(test_root, header=None)
train_data = train_df.to_numpy()
test_data = test_df.to_numpy()
train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data[:, :-1]).float(),
torch.from_numpy(train_data[:, -1]).long(),)#
test_dataset = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :-1]).float(),
torch.from_numpy(test_data[:, -1]).long())
train_len = train_data.shape[0]
val_len = int(train_len * val_split_factor)
train_len -= val_len
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_len, val_len])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
return train_loader, val_loader, test_loader
class conv_net(nn.Module):
def __init__(self, num_of_class):
super(conv_net, self).__init__()
self.model = nn.Sequential(
#nn.Conv1d(1, 16, kernel_size=5, stride=1, padding=2),
#nn.Conv1d(1, 16, kernel_size=1, stride=1),
nn.Conv1d(1, 16, kernel_size=1, stride=1),
nn.BatchNorm1d(16),
nn.ReLU(),
nn.MaxPool1d(2),
nn.Conv1d(16, 64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.MaxPool1d(2),
)
#self.relu = nn.ReLU()
self.linear = nn.Sequential(
#nn.Linear(5120,32),
nn.Linear(5120,32),
nn.LeakyReLU(inplace=True),
nn.Linear(32, num_of_class),
)
def forward(self,x):
#org = x
x = x.unsqueeze(1)
x = self.model(x)
#x = self.relu(x)
# print(x.shape)
x = x.view(x.size(0), -1)
#x [b, 2944]
# print(x.shape)
x = self.linear(x)
return x
batch_size=32
lr = 3e-3
epochs = 150
torch.manual_seed(1234)
#device = torch.device("cpu:0 cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
def evalute(model, loader):
model.eval()
correct = 0
total = len(loader.dataset)
val_bar = tqdm(loader, file=sys.stdout)
for x, y in val_bar:
x, y = x.to(device), y.to(device)
with torch.no_grad():
logits = model(x)
pred = logits.argmax(dim=1)
correct += torch.eq(pred, y).sum().float().item()
return correct / total
def main():
train_loader, val_loader, test_loader = my_DataLoader('./kaggle/train.csv',
'./kaggle/test.csv',
batch_size=batch_size,
val_split_factor=0.2)
model = conv_net(8).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criteon = nn.CrossEntropyLoss()
# Print model's state_dict
print(model)
best_acc, best_epoch = 0, 0
global_step = 0
for epoch in range(epochs):
train_bar = tqdm(train_loader, file=sys.stdout)
for step, (x, y) in enumerate(train_bar):
# x: [b, 187], y: [b]
x, y = x.to(device), y.to(device)
model.train()
logits = model(x)
loss = criteon(logits, y)
optimizer.zero_grad()
loss.backward()
# for param in model.parameters():
# print(param.grad)
optimizer.step()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
global_step += 1
if epoch % 1 == 0: # You can change the validation frequency as you wish
val_acc = evalute(model, val_loader)
print('val_acc = ',val_acc)
if val_acc > best_acc:
best_epoch = epoch
best_acc = val_acc
# Export the model
name_pt = 'best3.pt'
torch.save(model.state_dict(), name_pt)
print('best acc:', best_acc, 'best epoch:', best_epoch)
model.load_state_dict(torch.load(name_pt))
print('loaded from ckpt!')
test_acc = evalute(model, test_loader)
print('test acc:', test_acc)
if __name__ == '__main__':
main()
Then I try to make predictions and modify with reference to other people's code
import torch
from torchvision.transforms import transforms
import pandas as pd
from PIL import Image
from run import conv_net
from pathlib import Path
name_pt = 'best3.pt'
model = conv_net(8)
checkpoint = torch.load(name_pt)
model.load_state_dict(checkpoint)
testdata = './kaggle/onedata.csv'
test_df = pd.read_csv(testdata, header=None)
test_data = test_df.to_numpy()
csv = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :]).float())
output = model(csv)
prediction = int(torch.max(output.data, 1)[1].numpy())
print(prediction)
if (prediction == 0):
print ('other')
if (prediction == 1):
print ('100%PET')
if (prediction == 2):
print ('100% Cotton')
if (prediction == 3):
print ('100% Nylon')
if (prediction == 4):
print ('>70% PET')
if (prediction == 5):
print ('<70% PET')
if (prediction == 6):
print ('Spandex/PET Spandex<5%')
if (prediction == 7):
print ('Spandex/PET Spandex>5%')
Something went wrong
File "C:\Users\54-0461100-01\Desktop\for_spec_train\run.py", line 70, in forward
x = x.unsqueeze(1)
AttributeError: 'TensorDataset' object has no attribute 'unsqueeze'
Most of the questions are for images, not found on CSV files.Any help is appreciated if you have any suggestions.
By the way this is my data format.
LJ column are labels,train and test set are same format
enter image description here
onedata format
enter image description here
When calling output = model(csv) you are passing the model a 'TensorDataset' object as the input instead of a tensor. You can access the tensors in this object by indexing it. https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#TensorDataset
Additionally, you can avoid the TensorDataset object all together by replacing
csv = torch.utils.data.TensorDataset(torch.from_numpy(test_data[:, :]).float())
with
csv = torch.from_numpy(test_data[:, :]).float()

Running model twice inside same script gives different results in Tensorflow

I have a LSTM and I am running it twice inside my script(getting predictions twice as well). So when I take the prediction of the 2nd iteration it is different from the prediction I received from the 1st iteration. But when I run the model manually for two times by running the 2nd on when the first one finishes. It gives the same result.
Question: I want to get the same identical predictions in both the 1st and 2nd iterations. How can this be achieved? Shown below is my code.
import tensorflow as tf
import matplotlib as mplt
mplt.use('agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
import csv
import atexit
from time import time, strftime, localtime
from datetime import timedelta
np.random.seed(1)
tf.set_random_seed(1)
class RNNConfig():
input_size = 1
noTimesToRun = 2
# fileNames = ['store2_1.csv', 'store85_1.csv', 'store259_1.csv', 'store519_1.csv', 'store725_1.csv',
# 'store749_1.csv',
# 'store934_1.csv', 'store1019_1.csv']
# column_min_max_all = [[[0, 11000], [1, 7]], [[0, 17000], [1, 7]], [[0, 23000], [1, 7]], [[0, 14000], [1, 7]],
# [[0, 14000], [1, 7]], [[0, 15000], [1, 7]], [[0, 17000], [1, 7]], [[0, 25000], [1, 7]]]
columns = ['Sales', 'DayOfWeek', 'SchoolHoliday', 'Promo', 'lagged_Open', 'lagged_promo', 'lagged_SchoolHoliday']
# fileNames = ['store85_1', 'store519_1', 'store725_1', 'store749_1','store165_1','store925_1','store1089_1','store335_1']
# column_min_max_all = [[[0, 17000], [1, 7]], [[0, 14000], [1, 7]], [[0, 14000], [1, 7]], [[0, 15000], [1, 7]],[[0, 9000], [1, 7]], [[0, 15000], [1, 7]], [[0, 21000], [1, 7]], [[0, 33000], [1, 7]]]
fileNames = ['store85_1']
column_min_max_all = [[[0, 17000], [1, 7]]]
features = len(columns)
num_steps = None
lstm_size = None
batch_size = None
init_learning_rate = None
learning_rate_decay = None
init_epoch = None
max_epoch = None
hidden1_nodes = None
hidden2_nodes = None
dropout_rate = None
hidden1_activation = None
hidden2_activation = None
lstm_activation = None
fileName = None
column_min_max = None
# plotname = None
writename = None
RMSE = None
MAE = None
MAPE = None
RMSPE = None
config = RNNConfig()
def secondsToStr(elapsed=None):
if elapsed is None:
return strftime("%Y-%m-%d %H:%M:%S", localtime())
else:
return str(timedelta(seconds=elapsed))
def log(s, elapsed=None):
line = "="*40
print(line)
print(secondsToStr(), '-', s)
if elapsed:
print("Elapsed time:", elapsed)
print(line)
print()
def endlog():
end = time()
elapsed = end-start
log("End Program", secondsToStr(elapsed))
def segmentation(data):
seq = [price for tup in data[config.columns].values for price in tup]
seq = np.array(seq)
# split into items of features
seq = [np.array(seq[i * config.features: (i + 1) * config.features])
for i in range(len(seq) // config.features)]
# split into groups of num_steps
X = np.array([seq[i: i + config.num_steps] for i in range(len(seq) - config.num_steps)])
y = np.array([seq[i + config.num_steps] for i in range(len(seq) - config.num_steps)])
# get only sales value
y = [[y[i][0]] for i in range(len(y))]
y = np.asarray(y)
print(y)
return X, y
def scale(data):
for i in range (len(config.column_min_max)):
data[config.columns[i]] = (data[config.columns[i]] - config.column_min_max[i][0]) / ((config.column_min_max[i][1]) - (config.column_min_max[i][0]))
return data
def rescle(test_pred):
prediction = [(pred * (config.column_min_max[0][1] - config.column_min_max[0][0])) + config.column_min_max[0][0] for pred in test_pred]
return prediction
def pre_process():
store_data = pd.read_csv(config.fileName)
store_data['lagged_Open'] = store_data['lagged_Open'].astype(int)
store_data['lagged_promo'] = store_data['lagged_promo'].astype(int)
store_data['lagged_SchoolHoliday'] = store_data['lagged_SchoolHoliday'].astype(int)
#
# store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)
# ---for segmenting original data --------------------------------
# original_data = store_data.copy()
## train_size = int(len(store_data) * (1.0 - test_ratio))
# test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
# train_size = int(len(store_data) - (test_len))
#
# train_data = store_data[:train_size]
# test_data = store_data[(train_size - config.num_steps):]
# original_test_data = test_data.copy()
#
# # -------------- processing train data---------------------------------------
# scaled_train_data = scale(train_data)
# train_X, train_y = segmentation(scaled_train_data)
#
# # -------------- processing test data---------------------------------------
# scaled_test_data = scale(test_data)
# test_X, test_y = segmentation(scaled_test_data)
#
# # ----segmenting original test data---------------------------------------------
# nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
train_size = int(len(store_data) - (validation_len + test_len))
train_data = store_data[:train_size]
validation_data = store_data[(train_size - config.num_steps): validation_len + train_size]
test_data = store_data[((validation_len + train_size) - config.num_steps):]
original_val_data = validation_data.copy()
original_test_data = test_data.copy()
# -------------- processing train data---------------------------------------
scaled_train_data = scale(train_data)
train_X, train_y = segmentation(scaled_train_data)
# -------------- processing validation data---------------------------------------
scaled_validation_data = scale(validation_data)
val_X, val_y = segmentation(scaled_validation_data)
# -------------- processing test data---------------------------------------
scaled_test_data = scale(test_data)
test_X, test_y = segmentation(scaled_test_data)
# ----segmenting original validation data-----------------------------------------------
nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)
# ----segmenting original test data-----------------------------------------------
nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y
def generate_batches(train_X, train_y, batch_size):
num_batches = int(len(train_X)) // batch_size
if batch_size * num_batches < len(train_X):
num_batches += 1
batch_indices = range(num_batches)
for j in batch_indices:
batch_X = train_X[j * batch_size: (j + 1) * batch_size]
batch_y = train_y[j * batch_size: (j + 1) * batch_size]
assert set(map(len, batch_X)) == {config.num_steps}
yield batch_X, batch_y
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
itemindex = np.where(y_true == 0)
y_true = np.delete(y_true, itemindex)
y_pred = np.delete(y_pred, itemindex)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def RMSPE(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
itemindex = np.where(y_true == 0)
y_true = np.delete(y_true, itemindex)
y_pred = np.delete(y_pred, itemindex)
return np.sqrt(np.mean(np.square(((y_true - y_pred) / y_true)), axis=0))
# def plot(true_vals,pred_vals,name):
# fig = plt.figure()
# fig = plt.figure(dpi=100, figsize=(20, 7))
# days = range(len(true_vals))
# plt.plot(days, pred_vals, label='pred sales')
# plt.plot(days, true_vals, label='truth sales')
# plt.legend(loc='upper left', frameon=False)
# plt.xlabel("day")
# plt.ylabel("sales")
# plt.grid(ls='--')
# plt.savefig(name, format='png', bbox_inches='tight', transparent=False)
# plt.close()
def write_results(true_vals,pred_vals,name):
print("write method")
# with open(name, "w") as f:
# writer = csv.writer(f)
# writer.writerows(zip(true_vals, pred_vals))
def train_test():
train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y = pre_process()
# tf.set_random_seed(1)
inputs = tf.placeholder(tf.float32, [None, config.num_steps, config.features], name="inputs")
targets = tf.placeholder(tf.float32, [None, config.input_size], name="targets")
model_learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
model_dropout_rate = tf.placeholder_with_default(0.0, shape=())
global_step = tf.Variable(0, trainable=False)
model_learning_rate = tf.train.exponential_decay(learning_rate=model_learning_rate, global_step=global_step,
decay_rate=config.learning_rate_decay,
decay_steps=config.init_epoch, staircase=False)
cell = tf.contrib.rnn.LSTMCell(config.lstm_size, state_is_tuple=True, activation=config.lstm_activation,
use_peepholes=True)
val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
val = tf.transpose(val1, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")
# hidden layer
hidden1 = tf.layers.dense(last, units=config.hidden1_nodes, activation=config.hidden2_activation)
hidden2 = tf.layers.dense(hidden1, units=config.hidden2_nodes, activation=config.hidden1_activation)
dropout = tf.layers.dropout(hidden2, rate=model_dropout_rate, training=True,seed=1)
weight = tf.Variable(tf.truncated_normal([config.hidden2_nodes, config.input_size]))
bias = tf.Variable(tf.constant(0.1, shape=[config.input_size]))
prediction = tf.nn.relu(tf.matmul(dropout, weight) + bias)
loss = tf.losses.mean_squared_error(targets, prediction)
optimizer = tf.train.AdamOptimizer(model_learning_rate)
minimize = optimizer.minimize(loss, global_step=global_step)
# --------------------training------------------------------------------------------
tf.set_random_seed(1)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
iteration = 1
for epoch_step in range(config.max_epoch):
for batch_X, batch_y in generate_batches(train_X, train_y, config.batch_size):
train_data_feed = {
inputs: batch_X,
targets: batch_y,
model_learning_rate: config.init_learning_rate,
model_dropout_rate: config.dropout_rate
}
train_loss, _, value = sess.run([loss, minimize, val1], train_data_feed)
if iteration % 5 == 0:
print("Epoch: {}/{}".format(epoch_step, config.max_epoch),
"Iteration: {}".format(iteration),
"Train loss: {:.6f}".format(train_loss))
iteration += 1
saver = tf.train.Saver()
saver.save(sess, "checkpoints_sales/sales_pred.ckpt")
# --------------------testing------------------------------------------------------
saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))
test_data_feed = {
inputs: test_X,
}
test_pred = sess.run(prediction, test_data_feed)
# rmsse = sess.run(correct_prediction, test_data_feed)
pred_vals = rescle(test_pred)
pred_vals = np.array(pred_vals)
pred_vals = (np.round(pred_vals, 0)).astype(np.int32)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_test_y.flatten()
nonescaled_y = nonescaled_y.tolist()
# plot(nonescaled_y, pred_vals, config.plotname)
# write_results(nonescaled_y, pred_vals, config.writename)
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
rootMeanSquaredError = sqrt(meanSquaredError)
print("RMSE:", rootMeanSquaredError)
mae = mean_absolute_error(nonescaled_y, pred_vals)
print("MAE:", mae)
mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
print("MAPE:", mape)
rmse_val = RMSPE(nonescaled_y, pred_vals)
print("RMSPE:", rmse_val)
config.RMSE = rootMeanSquaredError
config.MAE = mae
config.MAPE = mape
config.RMSPE = rmse_val
# sess.close()
# tf.reset_default_graph()
if __name__ == '__main__':
start = time()
for i in range(len(config.fileNames)):
for j in range (config.noTimesToRun):
config.fileName = '{}{}{}'.format('/home/suleka/Documents/sales_prediction/', config.fileNames[i], '.csv')
# /home/suleka/Documents/sales_prediction/
# '/home/wso2/suleka/salesPred/
# config.plotname = '{}{}{}'.format('Sales_Prediction_testset_with_zero_bsl_plot_', config.fileNames[i],
# '.png')
config.writename = '{}{}{}{}{}'.format('prediction_data/Sales_Prediction_testset_with_zero_bsl_results_',j ,'_',config.fileNames[i],'.csv')
write_file = '{}{}{}{}{}'.format('test_results/test__data_',j,'_' ,config.fileNames[i], '.csv')
config.column_min_max = config.column_min_max_all[i]
hyperparameters = pd.read_csv('allStores_test.csv', header=None, float_precision='round_trip')
config.num_steps = hyperparameters.iloc[i:, 1].get_values()[0].astype(np.int32)
config.lstm_size = hyperparameters.iloc[i:, 2].get_values()[0].astype(np.int32)
config.hidden2_nodes = hyperparameters.iloc[i:, 3].get_values()[0].astype(np.int32)
config.hidden2_activation = hyperparameters.iloc[i:, 4].get_values()[0]
config.hidden1_activation = hyperparameters.iloc[i:, 5].get_values()[0]
config.hidden1_nodes = hyperparameters.iloc[i:, 6].get_values()[0].astype(np.int32)
config.lstm_activation = hyperparameters.iloc[i:, 7].get_values()[0]
config.init_epoch = hyperparameters.iloc[i:, 8].get_values()[0].astype(np.int32)
config.max_epoch = hyperparameters.iloc[i:, 9].get_values()[0].astype(np.int32)
config.learning_rate_decay = hyperparameters.iloc[i:, 10].get_values()[0].astype(np.float32)
config.dropout_rate = hyperparameters.iloc[i:, 11].get_values()[0].astype(np.float32)
config.batch_size = hyperparameters.iloc[i:, 12].get_values()[0].astype(np.int32)
config.init_learning_rate = hyperparameters.iloc[i:, 13].get_values()[0].astype(np.float32)
config.hidden1_activation = eval(config.hidden1_activation)
config.hidden2_activation = eval(config.hidden2_activation)
config.lstm_activation = eval(config.lstm_activation)
train_test()
tf.reset_default_graph()
atexit.register(endlog)
log("Start Program")
Make sure that you always do the following in this order:
# Reset the default graph
tf.reset_default_graph()
# Set the random seed
tf.set_random_seed(seed)
# Build the graph
# ....
# After creating the cell make sure intialize it
cell.build(inputs_shape)
# Initialize all variables in the graph
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Train the model
# ....
# Evaluate the model
# ....
In your code I see that the first time you are reseting the graph after setting the random seed.

Input contains NaN, infinity or a value too large for dtype('float64') in Tensorflow

I am trying to train a LSTM and in my model I have an exponential learning rate decay and a dropout layer. In order to deactivate the dropout layer when testing and validating, I have put a placeholder for the dropout rate and given it a default value of 1.0 and when training i am setting it to 0.5. The dropou_rate placeholder value is passed to the tf.layers.dropout(). When I run this during the validation I get the following error.
ValueError: Input contains NaN, infinity or a value too large for
dtype('float64').
shown below is the stack trace:
Traceback (most recent call last): File
"/home/suleka/Documents/sales_prediction/SalesPrediction_LSTM_mv.py",
line 329, in
train_test() File "/home/suleka/Documents/sales_prediction/SalesPrediction_LSTM_mv.py",
line 270, in train_test
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals) File
"/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/metrics/regression.py",
line 238, in mean_squared_error
y_true, y_pred, multioutput) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/metrics/regression.py",
line 77, in _check_reg_targets
y_pred = check_array(y_pred, ensure_2d=False) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py",
line 453, in check_array
_assert_all_finite(array) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py",
line 44, in _assert_all_finite
" or a value too large for %r." % X.dtype) ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
When I put the learning rate as a value in tf.layers.dropout like:
dropout = tf.layers.dropout(last, rate=0.5, training=True)
The code works fine. I am not sure what is happening in the code.
Shown below is my complete code:
import tensorflow as tf
import matplotlib as mplt
mplt.use('agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
import csv
np.random.seed(1)
tf.set_random_seed(1)
class RNNConfig():
input_size = 1
num_steps = 7#5
lstm_size = 64 #16
num_layers = 1
keep_prob = 0.8
batch_size = 16 #64
init_epoch = 15 # 5
max_epoch = 20 # 100 or 50
# test_ratio = 0.2
fileName = 'store2_1.csv'
graph = tf.Graph()
column_min_max = [[0,11000], [1,7]]
columns = ['Sales', 'DayOfWeek','SchoolHoliday', 'Promo']
features = len(columns)
hidden1_nodes = 64
hidden2_nodes = 8
config = RNNConfig()
def segmentation(data):
seq = [price for tup in data[config.columns].values for price in tup]
seq = np.array(seq)
# split into items of features
seq = [np.array(seq[i * config.features: (i + 1) * config.features])
for i in range(len(seq) // config.features)]
# split into groups of num_steps
X = np.array([seq[i: i + config.num_steps] for i in range(len(seq) - config.num_steps)])
y = np.array([seq[i + config.num_steps] for i in range(len(seq) - config.num_steps)])
# get only sales value
y = [[y[i][0]] for i in range(len(y))]
y = np.asarray(y)
return X, y
def scale(data):
for i in range (len(config.column_min_max)):
data[config.columns[i]] = (data[config.columns[i]] - config.column_min_max[i][0]) / ((config.column_min_max[i][1]) - (config.column_min_max[i][0]))
return data
def rescle(test_pred):
prediction = [(pred * (config.column_min_max[0][1] - config.column_min_max[0][0])) + config.column_min_max[0][0] for pred in test_pred]
return prediction
def pre_process():
store_data = pd.read_csv(config.fileName)
store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
#
# store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)
# ---for segmenting original data --------------------------------
# original_data = store_data.copy()
## train_size = int(len(store_data) * (1.0 - test_ratio))
validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
train_size = int(len(store_data) - (validation_len + test_len))
train_data = store_data[:train_size]
validation_data = store_data[(train_size - config.num_steps): validation_len + train_size]
test_data = store_data[((validation_len + train_size) - config.num_steps):]
original_val_data = validation_data.copy()
original_test_data = test_data.copy()
# -------------- processing train data---------------------------------------
scaled_train_data = scale(train_data)
train_X, train_y = segmentation(scaled_train_data)
# -------------- processing validation data---------------------------------------
scaled_validation_data = scale(validation_data)
val_X, val_y = segmentation(scaled_validation_data)
# -------------- processing test data---------------------------------------
scaled_test_data = scale(test_data)
test_X, test_y = segmentation(scaled_test_data)
# ----segmenting original validation data-----------------------------------------------
nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)
# ----segmenting original test data---------------------------------------------
nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y
def generate_batches(train_X, train_y, batch_size):
num_batches = int(len(train_X)) // batch_size
if batch_size * num_batches < len(train_X):
num_batches += 1
batch_indices = range(num_batches)
for j in batch_indices:
batch_X = train_X[j * batch_size: (j + 1) * batch_size]
batch_y = train_y[j * batch_size: (j + 1) * batch_size]
assert set(map(len, batch_X)) == {config.num_steps}
yield batch_X, batch_y
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
itemindex = np.where(y_true == 0)
y_true = np.delete(y_true, itemindex)
y_pred = np.delete(y_pred, itemindex)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def RMSPE(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.sqrt(np.mean(np.square(((y_true - y_pred) / y_pred)), axis=0))
def plot(true_vals,pred_vals,name):
fig = plt.figure()
fig = plt.figure(dpi=100, figsize=(20, 7))
days = range(len(true_vals))
plt.plot(days, pred_vals, label='pred sales')
plt.plot(days, true_vals, label='truth sales')
plt.legend(loc='upper left', frameon=False)
plt.xlabel("day")
plt.ylabel("sales")
plt.grid(ls='--')
plt.savefig(name, format='png', bbox_inches='tight', transparent=False)
plt.close()
def write_results(true_vals,pred_vals,name):
with open(name, "w") as f:
writer = csv.writer(f)
writer.writerows(zip(true_vals, pred_vals))
def train_test():
train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y = pre_process()
# Add nodes to the graph
with config.graph.as_default():
tf.set_random_seed(1)
learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
inputs = tf.placeholder(tf.float32, [None, config.num_steps, config.features], name="inputs")
targets = tf.placeholder(tf.float32, [None, config.input_size], name="targets")
global_step = tf.Variable(0, trainable=False)
dropout_rate = tf.placeholder_with_default(1.0, shape=())
learning_rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_rate=0.96, decay_steps=5, staircase=False)
cell = tf.contrib.rnn.LSTMCell(config.lstm_size, state_is_tuple=True, activation=tf.nn.relu)
val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
val = tf.transpose(val1, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")
# hidden layer
last = tf.layers.dense(last, units=config.hidden1_nodes, activation=tf.nn.relu)
last = tf.layers.dense(last, units=config.hidden2_nodes, activation=tf.nn.relu)
weight = tf.Variable(tf.truncated_normal([config.hidden2_nodes, config.input_size]))
bias = tf.Variable(tf.constant(0.1, shape=[config.input_size]))
dropout = tf.layers.dropout(last, rate=dropout_rate, training=True)
prediction = tf.matmul(dropout, weight) + bias
loss = tf.losses.mean_squared_error(targets,prediction)
optimizer = tf.train.AdamOptimizer(learning_rate)
minimize = optimizer.minimize(loss, global_step=global_step)
# correct_prediction = tf.sqrt(tf.losses.mean_squared_error(prediction, targets))
# --------------------training------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
tf.set_random_seed(1)
tf.global_variables_initializer().run()
iteration = 1
for epoch_step in range(config.max_epoch):
for batch_X, batch_y in generate_batches(train_X, train_y, config.batch_size):
train_data_feed = {
inputs: batch_X,
targets: batch_y,
learning_rate: 0.01,
dropout_rate: 0.5
}
train_loss, _, value,gs = sess.run([loss, minimize, val1,global_step], train_data_feed)
if iteration % 5 == 0:
print("Epoch: {}/{}".format(epoch_step, config.max_epoch),
"Iteration: {}".format(iteration),
"Train loss: {:.6f}".format(train_loss))
iteration += 1
saver = tf.train.Saver()
saver.save(sess, "checkpoints_sales/sales_pred.ckpt")
# --------------------validation------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
tf.set_random_seed(1)
saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))
test_data_feed = {
inputs: val_X,
dropout_rate: 1.0
}
test_pred = sess.run(prediction, test_data_feed)
# rmsse = sess.run(correct_prediction, test_data_feed)
pred_vals = rescle(test_pred)
pred_vals = np.array(pred_vals)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_val_y.flatten()
nonescaled_y = nonescaled_y.tolist()
plot(nonescaled_y, pred_vals, "Sales Prediction VS Truth mv testSet.png")
write_results(nonescaled_y, pred_vals, "Sales Prediction batch mv results_all validationSet.csv")
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
rootMeanSquaredError = sqrt(meanSquaredError)
print("RMSE:", rootMeanSquaredError)
mae = mean_absolute_error(nonescaled_y, pred_vals)
print("MAE:", mae)
mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
print("MAPE:", mape)
rmse_val = RMSPE(nonescaled_y, pred_vals)
print("RMSPE:", rmse_val)
# --------------------testing------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
tf.set_random_seed(1)
saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))
test_data_feed = {
inputs: test_X,
dropout_rate: 1.0
}
test_pred = sess.run(prediction, test_data_feed)
# rmsse = sess.run(correct_prediction, test_data_feed)
pred_vals = rescle(test_pred)
pred_vals = np.array(pred_vals)
pred_vals = (np.round(pred_vals, 0)).astype(np.int32)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_test_y.flatten()
nonescaled_y = nonescaled_y.tolist()
plot(nonescaled_y, pred_vals, "Sales Prediction VS Truth mv testSet.png")
write_results(nonescaled_y, pred_vals, "Sales Prediction batch mv results_all validationSet.csv")
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
rootMeanSquaredError = sqrt(meanSquaredError)
print("RMSE:", rootMeanSquaredError)
mae = mean_absolute_error(nonescaled_y, pred_vals)
print("MAE:", mae)
mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
print("MAPE:", mape)
rmse_val = RMSPE(nonescaled_y, pred_vals)
print("RMSPE:", rmse_val)
if __name__ == '__main__':
train_test()
When using tf.layers.dropout the rate argument tells how much of the data to drop when you give 1.0 all the output is gone, replace 1.0 with 0.0 and it should work.
TensorFlow documentation: https://www.tensorflow.org/api_docs/python/tf/layers/dropout
I am putting this because even though #Almog's answer was correct it didn't have the explanation I wanted. So for anyone confused like me:
If you use:
'tf.nn.dropout()'
to deactivate the dropout layer you should put
keep_prob= 1.0 not keep_prob=0.0
as keep_prob means 'The probability that each element is kept.' So keeping it as 1.0 makes sense to deactivate it.
If you are using
'tf.layers.dropout()'
you should put:
rate=0.0 not rate=1.0
as rate here means 'The dropout rate (should be between 0 and 1). E.g. "rate=0.1" would drop out 10% of input units'. So if I put rate=0.0 it means that none of the input units will be dropped.

Not all points are within the bounds of the space error in Scikit-Optimize

I am attempting to do a hyper-parameter optimization task on a LSTM model (purly Tensorflow) using the scikit optimize package. I am using the Bayesian optimization method using Gaussian Processes (gp_minimize) for this. The demo code provided for the function can be found through this link. When I try to run my code I keep getting the below error:
ValueError: Not all points are within the bounds of the space.
My complete code is shown below:
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.utils import use_named_args
import csv
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
import atexit
from time import time, strftime, localtime
from datetime import timedelta
input_size = 1
num_layers = 1
hidden1_activation = tf.nn.relu
hidden2_activation = tf.nn.relu
lstm_activation = tf.nn.relu
columns = ['Sales', 'DayOfWeek', 'SchoolHoliday', 'Promo']
features = len(columns)
fileName = None
column_min_max = None
# fileNames = ['store2_1.csv', 'store85_1.csv', 'store259_1.csv', 'store519_1.csv', 'store725_1.csv', 'store749_1.csv', 'store934_1.csv', 'store1019_1.csv']
# column_min_max_all = [[[0, 11000], [1, 7]], [[0, 17000], [1, 7]], [[0, 23000], [1, 7]], [[0, 14000], [1, 7]], [[0, 14000], [1, 7]], [[0, 15000], [1, 7]], [[0, 17000], [1, 7]], [[0, 25000], [1, 7]]]
fileNames = ['store2_1.csv']
column_min_max_all = [[[0, 11000], [1, 7]]]
num_steps = None
lstm_size = None
batch_size = None
init_learning_rate = 0.01
learning_rate_decay = None
init_epoch = None # 5
max_epoch = None # 100 or 50
hidden1_nodes = None
hidden2_nodes = None
dropout_rate= None
best_accuracy = 0.0
start = None
lstm_num_steps = Categorical(categories=[2,3,4,5,6,7,8,9,10,11,12,13,14], name ='lstm_num_steps')
size = Categorical(categories=[8,16,32,64,128], name ='size')
lstm_hidden1_nodes = Categorical(categories=[4,8,16,32,64], name= 'lstm_hidden1_nodes')
lstm_hidden2_nodes = Categorical(categories=[2,4,8,16,32],name= 'lstm_hidden2_nodes')
lstm_learning_rate_decay = Categorical(categories=[0.99,0.8,0.7], name='lstm_learning_rate_decay')
lstm_max_epoch = Categorical(categories=[60,50,100,120,150,200], name='lstm_max_epoch')
lstm_init_epoch = Categorical(categories=[5, 10, 15, 20],name='lstm_init_epoch')
lstm_batch_size = Categorical(categories=[5, 8, 16, 30, 31, 64] , name = 'lstm_batch_size')
lstm_dropout_rate = Categorical(categories=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] , name = 'lstm_dropout_rate')
dimensions = [lstm_num_steps, size,lstm_hidden1_nodes, lstm_hidden2_nodes,lstm_init_epoch,lstm_max_epoch,lstm_learning_rate_decay,lstm_batch_size, lstm_dropout_rate]
default_parameters = [5,35,30,15,5,60,0.99,8,0.1]
# def log_dir_name(lstm_num_steps, size,lstm_hidden1_nodes, lstm_hidden2_nodes,lstm_learning_rate,lstm_init_epoch,lstm_max_epoch,lstm_learning_rate_decay,lstm_batch_size):
#
# # The dir-name for the TensorBoard log-dir.
# s = "./19_logs/{1}_{2}_{3}_{4}_{5}_{6}_{7}_{8}_{9}/"
#
# # Insert all the hyper-parameters in the dir-name.
# log_dir = s.format(lstm_num_steps, size,lstm_hidden1_nodes, lstm_hidden2_nodes,lstm_learning_rate,lstm_init_epoch,lstm_max_epoch,lstm_learning_rate_decay,lstm_batch_size)
#
# return log_dir
def secondsToStr(elapsed=None):
if elapsed is None:
return strftime("%Y-%m-%d %H:%M:%S", localtime())
else:
return str(timedelta(seconds=elapsed))
def log(s, elapsed=None):
line = "="*40
print(line)
print(secondsToStr(), '-', s)
if elapsed:
print("Elapsed time:", elapsed)
print(line)
print()
def endlog():
end = time()
elapsed = end-start
log("End Program", secondsToStr(elapsed))
def generate_batches(train_X, train_y, batch_size):
num_batches = int(len(train_X)) // batch_size
if batch_size * num_batches < len(train_X):
num_batches += 1
batch_indices = range(num_batches)
for j in batch_indices:
batch_X = train_X[j * batch_size: (j + 1) * batch_size]
batch_y = train_y[j * batch_size: (j + 1) * batch_size]
# assert set(map(len, batch_X)) == {num_steps}
yield batch_X, batch_y
def segmentation(data):
seq = [price for tup in data[columns].values for price in tup]
seq = np.array(seq)
# split into items of features
seq = [np.array(seq[i * features: (i + 1) * features])
for i in range(len(seq) // features)]
# split into groups of num_steps
X = np.array([seq[i: i + num_steps] for i in range(len(seq) - num_steps)])
y = np.array([seq[i + num_steps] for i in range(len(seq) - num_steps)])
# get only sales value
y = [[y[i][0]] for i in range(len(y))]
y = np.asarray(y)
return X, y
def scale(data):
for i in range (len(column_min_max)):
data[columns[i]] = (data[columns[i]] - column_min_max[i][0]) / ((column_min_max[i][1]) - (column_min_max[i][0]))
return data
def rescle(test_pred):
prediction = [(pred * (column_min_max[0][1] - column_min_max[0][0])) + column_min_max[0][0] for pred in test_pred]
return prediction
def pre_process():
store_data = pd.read_csv(fileName)
# sftp://wso2#192.168.32.11/home/wso2/suleka/salesPred/store2_1.csv
store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
#
# store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)
# ---for segmenting original data --------------------------------
original_data = store_data.copy()
## train_size = int(len(store_data) * (1.0 - test_ratio))
validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
train_size = int(len(store_data) - (validation_len+test_len))
train_data = store_data[:train_size]
validation_data = store_data[(train_size-num_steps): validation_len+train_size]
test_data = store_data[((validation_len+train_size) - num_steps): ]
original_val_data = validation_data.copy()
original_test_data = test_data.copy()
# -------------- processing train data---------------------------------------
scaled_train_data = scale(train_data)
train_X, train_y = segmentation(scaled_train_data)
# -------------- processing validation data---------------------------------------
scaled_validation_data = scale(validation_data)
val_X, val_y = segmentation(scaled_validation_data)
# -------------- processing test data---------------------------------------
scaled_test_data = scale(test_data)
test_X, test_y = segmentation(scaled_test_data)
# ----segmenting original validation data-----------------------------------------------
nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)
# ----segmenting original test data-----------------------------------------------
nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y,nonescaled_val_y
def setupRNN(inputs):
cell = tf.contrib.rnn.LSTMCell(lstm_size, state_is_tuple=True, activation=lstm_activation)
val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
val = tf.transpose(val1, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")
# hidden layer
hidden1 = tf.layers.dense(last, units=hidden1_nodes, activation=hidden2_activation)
hidden2 = tf.layers.dense(hidden1, units=hidden2_nodes, activation=hidden1_activation)
dropout = tf.layers.dropout(hidden2, rate=dropout_rate, training=True)
weight = tf.Variable(tf.truncated_normal([hidden2_nodes, input_size]))
bias = tf.Variable(tf.constant(0.1, shape=[input_size]))
prediction = tf.matmul(dropout, weight) + bias
return prediction
# saver = tf.train.Saver()
# saver.save(sess, "checkpoints_sales/sales_pred.ckpt")
#use_named_args(dimensions=dimensions)
def fitness(lstm_num_steps, size,lstm_hidden1_nodes,lstm_hidden2_nodes,lstm_init_epoch,lstm_max_epoch,
lstm_learning_rate_decay,lstm_batch_size,lstm_dropout_rate):
global num_steps, lstm_size, hidden2_nodes, hidden2_activation, hidden1_activation, hidden1_nodes, lstm_activation, init_epoch, max_epoch, learning_rate_decay, dropout_rate
num_steps = lstm_num_steps
lstm_size = size
batch_size = lstm_batch_size
learning_rate_decay = lstm_learning_rate_decay
init_epoch = lstm_init_epoch
max_epoch = lstm_max_epoch
hidden1_nodes = lstm_hidden1_nodes
hidden2_nodes = lstm_hidden2_nodes
dropout_rate = lstm_dropout_rate
# log_dir = log_dir_name(lstm_num_steps, size,lstm_hidden1_nodes,lstm_hidden2_nodes,lstm_learning_rate,lstm_init_epoch,lstm_max_epoch,
# lstm_learning_rate_decay,lstm_batch_size)
train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y = pre_process()
inputs = tf.placeholder(tf.float32, [None, num_steps, features], name="inputs")
targets = tf.placeholder(tf.float32, [None, input_size], name="targets")
learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
prediction = setupRNN(inputs)
with tf.name_scope('loss'):
model_loss = tf.losses.mean_squared_error(targets, prediction)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(model_loss)
train_step = train_step
# with tf.name_scope('accuracy'):
# correct_prediction = tf.sqrt(tf.losses.mean_squared_error(prediction, targets))
#
# accuracy = correct_prediction
sess = tf.Session()
sess.run(tf.global_variables_initializer())
learning_rates_to_use = [
init_learning_rate * (
learning_rate_decay ** max(float(i + 1 - init_epoch), 0.0)
) for i in range(max_epoch)]
for epoch_step in range(max_epoch):
current_lr = learning_rates_to_use[epoch_step]
for batch_X, batch_y in generate_batches(train_X, train_y, batch_size):
train_data_feed = {
inputs: batch_X,
targets: batch_y,
learning_rate: current_lr,
}
sess.run(train_step, train_data_feed)
val_data_feed = {
inputs: val_X,
targets: val_y,
learning_rate: 0.0,
}
pred = sess.run(prediction, val_data_feed)
pred_vals = rescle(pred)
pred_vals = np.array(pred_vals)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_val_y.flatten()
nonescaled_y = nonescaled_y.tolist()
val_accuracy = sqrt(mean_squared_error(nonescaled_y, pred_vals))
global best_accuracy
if val_accuracy < best_accuracy:
# Save the new model to harddisk.
saver = tf.train.Saver()
saver.save(sess, "checkpoints_sales/sales_pred.ckpt")
with open("best_configs.csv", "a") as f:
writer = csv.writer(f)
writer.writerows(zip([fileName], [num_steps], [lstm_size], [hidden2_nodes], [hidden2_activation], [hidden1_activation], [hidden1_nodes], [lstm_size], [lstm_activation], [init_epoch], [max_epoch], [learning_rate_decay], [dropout_rate],[val_accuracy]))
# Update the classification accuracy.
best_accuracy = val_accuracy
# Clear the Keras session, otherwise it will keep adding new
# models to the same TensorFlow graph each time we create
# a model with a different set of hyper-parameters.
# sess.clear_session()
sess.close()
tf.reset_default_graph()
# NOTE: Scikit-optimize does minimization so it tries to
# find a set of hyper-parameters with the LOWEST fitness-value.
# Because we are interested in the HIGHEST classification
# accuracy, we need to negate this number so it can be minimized.
return val_accuracy
if __name__ == '__main__':
start = time()
for i in range(len(fileNames)):
fileName = '{}{}'.format('home/suleka/Documents/sales_prediction/', fileNames[i])
#/home/suleka/Documents/sales_prediction/
column_min_max = column_min_max_all[i]
#Bayesian optimization using Gaussian Processes.
#acq_func -> https://arxiv.org/pdf/1807.02811.pdf
search_result = gp_minimize(func=fitness,
dimensions=dimensions,
acq_func='EI', # Expected Improvement.
n_calls=40,
x0=default_parameters)
atexit.register(endlog)
log("Start Program")
Shown below is the complete stack trace:
/home/wso2/anaconda3/lib/python3.6/site-packages/h5py/init.py:36:
FutureWarning: Conversion of the second argument of issubdtype from
float to np.floating is deprecated. In future, it will be treated
as np.float64 == np.dtype(float).type. from ._conv import
register_converters as _register_converters auto_LSTM_skopt.py:138:
SettingWithCopyWarning: A value is trying to be set on a copy of a
slice from a DataFrame. Try using .loc[row_indexer,col_indexer] =
value instead
See the caveats in the documentation:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
data[columns[i]] = (data[columns[i]] - column_min_max[i][0]) /
((column_min_maxi) - (column_min_max[i][0]))
/home/wso2/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py:100:
UserWarning: Converting sparse IndexedSlices to a dense Tensor of
unknown shape. This may consume a large amount of memory.
"Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
Traceback (most recent call last): File "auto_LSTM_skopt.py", line
365, in
x0=default_parameters) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/optimizer/gp.py",
line 228, in gp_minimize
callback=callback, n_jobs=n_jobs) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/optimizer/base.py",
line 240, in base_minimize
result = optimizer.tell(x0, y0) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/optimizer/optimizer.py",
line 432, in tell
check_x_in_space(x, self.space) File "/home/wso2/anaconda3/lib/python3.6/site-packages/skopt/utils.py",
line 186, in check_x_in_space
raise ValueError("Not all points are within the bounds of" ValueError: Not all points are within the bounds of the space.
Issue is with your size dimension. All values in default_parameters must be in the lists of the parameter dimensions to be optimized, if not skopt throws the Not all points are within the bounds of the space error.
You currently have: size = Categorical(categories=[8,16,32,64,128], name ='size')
In your default parameters: default_parameters = [5,35,30,15,5,60,0.99,8,0.1]
the second item (representing 'size') has the value of 35, which is not part of the size parameters to search.
FIX 1.
Include 35 in size space:
size = Categorical(categories=[8,16,32,35,64,128], name ='size')
FIX 2.
Change 35 to '32' in default_parameters:
default_parameters = [5,32,30,15,5,60,0.99,8,0.1]
Use any of the fixes above and your code will run like a charm :)

Missing 1 required positional argument: 'test_y'

I make MNIST dataset study by MLP.
However, there is an error of missing 1 required positional argument: 'test_y'
I can't handle this problem.
Please tell me how to deal with this error message.
This is a code of what I do.
def homework(train_X, train_y, test_X, test_y):
epoch = 10000
batch_size = 20
learning_rate = 1e-3
input_size = 784
hidden_size = 100
output_size = 10
data_num = train_X.shape[0]
np.random.seed(0)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
def softmax(x):
x -= np.max(x, axis=1).reshape((-1, 1))
return np.exp(x) / np.sum(np.exp(x), axis=1).reshape((-1, 1))
def cross_entropy(y, output):
batch_size = y.shape[0]
return -np.sum(np.log(output[np.arange(batch_size), y])) / batch_size
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def forward(x):
fwd = {}
fwd['h1'] = sigmoid(np.dot(x, W1) + b1)
fwd['prob'] = softmax(np.dot(fwd['h1'], W2) + b2)
return fwd
for n in range(epoch):
loss_sum = 0
for i in range(0, data_num, batch_size):
x = train_X[i:i+batch_size]
y = train_y[i:i+batch_size]
fwd = forward(x)
loss_sum += cross_entropy(y, fwd['prob'])
grad = network.gradient(x, y)
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x, y)
train_loss_list.append(loss)
if np.mod(n, 1000) == 0:
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
accuracy = f1_score(test_y, pred_y, average='macro')
print("epoch: %5d, loss_sum: %.5f, accuracy: %.5f" % (n, loss_sum, accuracy))
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
return pred_y
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
import numpy as np
def load_mnist():
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
mnist.target.astype('int32'), random_state=42)
mnist_X = mnist_X / 255.0
return train_test_split(mnist_X, mnist_y,
test_size=0.2,
random_state=42)
def validate_homework():
train_X, test_X, train_y, test_y = load_mnist()
# validate for small dataset
train_X_mini = train_X[:100]
train_y_mini = train_y[:100]
test_X_mini = test_X[:100]
test_y_mini = test_y[:100]
pred_y = homework(train_X_mini, train_y_mini, test_X_mini)
print(f1_score(test_y_mini, pred_y, average='macro'))
def score_homework():
train_X, test_X, train_y, test_y = load_mnist()
pred_y = homework(train_X, train_y, test_X)
print(f1_score(test_y, pred_y, average='macro'))
validate_homework()
# score_homework()

Resources