Value Error in tensorflow - python-3.x

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/temp/data", one_hot=True)
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 10
batch_size = 100
# matrix = height * width
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
# defining the neural network
def neural_network_model(data):
hiddenLayer1 = {'weights': tf.Variable(tf.random_normal([784, n_nodes_hl1])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}
hiddenLayer2 = {'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}
hiddenLayer3 = {'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))}
outputLayer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'biases': tf.Variable(tf.random_normal([n_classes]))}
l1 = tf.add(tf.matmul(data, hiddenLayer1['weights']), hiddenLayer1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hiddenLayer2['weights']), hiddenLayer2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hiddenLayer3['weights']), hiddenLayer3['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, outputLayer['weights']), outputLayer['biases']
return output
# training the network
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(prediction,tf.squeeze(y)))
#cost = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)
#cost = tf.reduce_mean(cost) * 100
optimizer = tf.train.AdamOptimizer(0.003).minimize(cost)
# cycles feed forward + backprop
numberOfEpochs = 10
with tf.Session() as sess:
#dealing with training data
for epoch in range(numberOfEpochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples / batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_, c =[optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, ' completed out of ', numberOfEpochs, ' loss: ', epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
print('Accuracy: ', accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
I am new to Tensorflow and I am trying to train my model to read datasets. But every time I run the code, I get this error:
Traceback (most recent call last):
File "", line 87, in
File "", line 62, in train_neural_network
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(prediction,tf.squeeze(y)));
File "/home/phillipus/.local/lib/python3.6/site-packages/tensorflow/python/ops/", line 1935, in sparse_softmax_cross_entropy_with_logits
labels, logits)
File "/home/phillipus/.local/lib/python3.6/site-packages/tensorflow/python/ops/", line 1713, in _ensure_xent_args
"named arguments (labels=..., logits=..., ...)" % name)
ValueError: Only call sparse_softmax_cross_entropy_with_logits with named arguments (labels=..., logits=..., ...)
Looks like the problem is at the "cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(prediction,tf.squeeze(y)))" and the calling of the "train_neural_network(x)" function. I am new to Tensorflow so my troubleshooting isn't at its best, anyone to help me?

Maybe you could try using tf.nn.softmax_cross_entropy_with_logits rather than tf.nn.sparse_softmax_cross_entropy_with_logits inside the cost calculation.
However, if you want to continue to use tf.nn.sparse_softmax_cross_entropy_with_logits then this link might help: Tensorflow ValueError: Only call `sparse_softmax_cross_entropy_with_logits` with named arguments .
By the way, what are the versions of tensorflow and python you're using?
Try running this:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/temp/data", one_hot=True)
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 10
batch_size = 100
# matrix = height * width
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
# defining the neural network
def neural_network_model(data):
hiddenLayer1 = {'weights': tf.Variable(tf.random_normal([784,
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}
hiddenLayer2 = {'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}
hiddenLayer3 = {'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))}
outputLayer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'biases': tf.Variable(tf.random_normal([n_classes]))}
l1 = tf.add(tf.matmul(data, hiddenLayer1['weights']), hiddenLayer1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hiddenLayer2['weights']), hiddenLayer2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hiddenLayer3['weights']), hiddenLayer3['biases'])
l3 = tf.nn.relu(l3)
output = tf.add(tf.matmul(l3, outputLayer['weights']),outputLayer['biases'])
return output
prediction = neural_network_model(x)
cost = tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)
optimizer = tf.train.AdamOptimizer(0.003).minimize(cost)
# cycles feed forward + backprop
numberOfEpochs = 10
with tf.Session() as sess:
#dealing with training data
for epoch in range(numberOfEpochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples / batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_, c =[optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, ' completed out of ', numberOfEpochs, ' loss: ', epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
print('Accuracy: ', accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))

Try this code
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/temp/data/", one_hot = True)
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 10
batch_size = 100
#height x width
x = tf.placeholder('float',[None, 784])
y = tf.placeholder('float')
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([784,n_nodes_hl1])),
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,n_nodes_hl2])),
hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,n_nodes_hl3])),
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3,n_classes])),
l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']),hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']),hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']),hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, output_layer['weights']) + output_layer['biases']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x,epoch_y = mnist.train.next_batch(batch_size)
_,epoch_c =[optimizer, cost], feed_dict = {x: epoch_x, y: epoch_y})
epoch_loss += epoch_c
print('Epoch', epoch, 'completed out of ', hm_epochs, 'loss: ', epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:', accuracy.eval({x:mnist.test.images, y: mnist.test.labels}))


Python: Cost keeps increasing in a neural network that uses TensorFlow

I am trying to create a neural network using TensorFlow but my cost keeps increasing.
This is my code so far:
class AI_core:
def __init__(self, nodes_in_each_layer):
self.data_in_placeholder = tf.placeholder("float", [None, nodes_in_each_layer[0]])
self.data_out_placeholder = tf.placeholder("float")
def init_neural_network(self, n_nodes_h):
#n_nodes_h contains the number of nodes for each layer
#n_nodes_h[0] = number of inputs
#n_nodes_h[-1] = number of outputs
self.layers = [None for i in range(len(n_nodes_h)-1)]
for i in range(1, len(n_nodes_h)):
self.layers[i-1] = {"weights":tf.Variable(tf.random_normal([n_nodes_h[i-1], n_nodes_h[i]])),
def neural_network_model(self, data):
for i in range(len(self.layers)):
data = tf.matmul(data, self.layers[i]["weights"]) + self.layers[i]["biases"]
if i != len(self.layers):
data = tf.nn.relu(data)
return data
def train_neural_network(self, data):
prediction = self.neural_network_model(self.data_in_placeholder)
cost = tf.reduce_mean(tf.square(self.data_out_placeholder-prediction))
optimiser = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(cost)
with tf.Session() as sess:
epoch_loss = 0
for _ in range(int(data.length)):
epoch_x, epoch_y = data.next_batch()
c =, feed_dict={self.data_in_placeholder: epoch_x, self.data_out_placeholder: epoch_y})
_ =, feed_dict={self.data_in_placeholder: epoch_x, self.data_out_placeholder: epoch_y})
epoch_loss += np.sum(c)
print("loss =", epoch_loss)
For now I am trying to get the network to approximate the math.sin function.
I have set the nodes_in_each_layer = [1, 5, 5, 5, 1] and batch_size = 3. This is the output:
loss = 0.8417138457298279
loss = 1.190976768732071
loss = 1.8150676786899567
loss = 2.433938592672348
loss = 3.092040628194809
loss = 3.478498786687851
loss = 3.7894928753376007
loss = 4.598285228013992
loss = 5.418278068304062
loss = 5.555390268564224
It looks like you keep adding the value of loss with those in previous iterations.
with tf.Session() as sess:
epoch_loss = 0
for _ in range(int(data.length)):
epoch_x, epoch_y = data.next_batch()
c =, feed_dict={self.data_in_placeholder: epoch_x, self.data_out_placeholder: epoch_y})
_ =, feed_dict={self.data_in_placeholder: epoch_x, self.data_out_placeholder: epoch_y})
epoch_loss += np.sum(c)
print("loss =", epoch_loss)

Tensorflow Neural Network: My model is giving an accuracy of 1.0 every time

Amateur problem but i cannot solve this issue on my own.
I was trying to make a neural network for churn modelling dataset on bank data
Every time i run this network i get an accuracy of 1.0 so i think there is something wrong and its not working.
Can anyone help me figure out what is wrong?
Also please explain how i can avoid problems like these in the future
The code is :
import pandas as pd
import numpy as np
data = pd.read_csv('D:\Churn_Modelling.csv')
X = data.iloc[:, 3:13].values
Y = data.iloc[:, 13].values
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
label_encoder_x_1 = LabelEncoder()
X[:, 1] = label_encoder_x_1.fit_transform(X[:, 1])
label_encoder_x_2 = LabelEncoder()
X[:, 2] = label_encoder_x_2.fit_transform(X[:, 2])
one_hot_encoder = OneHotEncoder(categorical_features = [1])
X = one_hot_encoder.fit_transform(X).toarray()
X = X[:, 1:]
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test =
train_test_split(X, Y, test_size = 0.2)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
import tensorflow as tf
epochs = 20
batch_size = 50
learning_rate = 0.003
n_output = 1
n_input = X_train.shape[1]
X_placeholder = tf.placeholder("float32", [None, n_input], name = "X")
Y_placeholder = tf.placeholder("float32", [None, 1], name = "y")
n_neurons_1 = 64
n_neurons_2 = 32
n_neurons_3 = 16
layer_1 = {'weights': tf.Variable
(tf.random_normal([n_input, n_neurons_1])),
'biases': tf.Variable(tf.random_normal([n_neurons_1]))
layer_2 = {'weights': tf.Variable
(tf.random_normal([n_neurons_1, n_neurons_2])),
'biases': tf.Variable(tf.random_normal([n_neurons_2]))
layer_3 = {'weights': tf.Variable
(tf.random_normal([n_neurons_2, n_neurons_3])),
'biases': tf.Variable(tf.random_normal([n_neurons_3]))
output_layer = {'weights': tf.Variable(
tf.random_normal([n_neurons_3, n_output])),
'biases': tf.Variable(tf.random_normal([n_output]))
l1 = tf.add(tf.matmul(X_placeholder,
layer_1['weights']), layer_1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, layer_2['weights']),
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, layer_3['weights']),
l3 = tf.nn.relu(l3)
output_layer = tf.matmul(l3,
output_layer['weights']) + output_layer['biases']
output_layer = tf.nn.sigmoid(output_layer)
cost = tf.reduce_mean(tf.reduce_sum(
tf.square(Y_placeholder - output_layer), reduction_indices = [1]))
optimizer = tf.train.AdamOptimizer().minimize(cost)
correct_prediction = tf.equal(tf.argmax(
Y_placeholder, 1), tf.argmax(output_layer, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def next_batch(size, x, y):
idx = np.arange(0, len(x))
idx = idx[:size]
x_shuffle = [x[ i] for i in idx]
y_shuffle = [y[ i] for i in idx]
return np.asarray(x_shuffle), np.asarray(y_shuffle)
with tf.Session() as sess:
total_batches = int(len(X_train) / batch_size)
for epoch in range(epochs):
avg_cost = 0
print('epoch: ', epoch)
for batch in range(total_batches):
x_batch_data, y_batch_data =
next_batch(batch_size, X_train, Y_train)
y_batch_data = y_batch_data.reshape((50, 1))
_, c =[optimizer, cost],
feed_dict = {X_placeholder: x_batch_data,
Y_placeholder: y_batch_data})
avg_cost += c / total_batches
print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost))
Y_test_temp = Y_test.reshape((2000, 1))
print('accuracy: ',,
feed_dict = {X_placeholder: X_test, Y_placeholder: Y_test_temp}))

Input contains NaN, infinity or a value too large for dtype('float64') in Tensorflow

I am trying to train a LSTM and in my model I have an exponential learning rate decay and a dropout layer. In order to deactivate the dropout layer when testing and validating, I have put a placeholder for the dropout rate and given it a default value of 1.0 and when training i am setting it to 0.5. The dropou_rate placeholder value is passed to the tf.layers.dropout(). When I run this during the validation I get the following error.
ValueError: Input contains NaN, infinity or a value too large for
shown below is the stack trace:
Traceback (most recent call last): File
line 329, in
train_test() File "/home/suleka/Documents/sales_prediction/",
line 270, in train_test
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals) File
line 238, in mean_squared_error
y_true, y_pred, multioutput) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/metrics/",
line 77, in _check_reg_targets
y_pred = check_array(y_pred, ensure_2d=False) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/utils/",
line 453, in check_array
_assert_all_finite(array) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/utils/",
line 44, in _assert_all_finite
" or a value too large for %r." % X.dtype) ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
When I put the learning rate as a value in tf.layers.dropout like:
dropout = tf.layers.dropout(last, rate=0.5, training=True)
The code works fine. I am not sure what is happening in the code.
Shown below is my complete code:
import tensorflow as tf
import matplotlib as mplt
mplt.use('agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
import csv
class RNNConfig():
input_size = 1
num_steps = 7#5
lstm_size = 64 #16
num_layers = 1
keep_prob = 0.8
batch_size = 16 #64
init_epoch = 15 # 5
max_epoch = 20 # 100 or 50
# test_ratio = 0.2
fileName = 'store2_1.csv'
graph = tf.Graph()
column_min_max = [[0,11000], [1,7]]
columns = ['Sales', 'DayOfWeek','SchoolHoliday', 'Promo']
features = len(columns)
hidden1_nodes = 64
hidden2_nodes = 8
config = RNNConfig()
def segmentation(data):
seq = [price for tup in data[config.columns].values for price in tup]
seq = np.array(seq)
# split into items of features
seq = [np.array(seq[i * config.features: (i + 1) * config.features])
for i in range(len(seq) // config.features)]
# split into groups of num_steps
X = np.array([seq[i: i + config.num_steps] for i in range(len(seq) - config.num_steps)])
y = np.array([seq[i + config.num_steps] for i in range(len(seq) - config.num_steps)])
# get only sales value
y = [[y[i][0]] for i in range(len(y))]
y = np.asarray(y)
return X, y
def scale(data):
for i in range (len(config.column_min_max)):
data[config.columns[i]] = (data[config.columns[i]] - config.column_min_max[i][0]) / ((config.column_min_max[i][1]) - (config.column_min_max[i][0]))
return data
def rescle(test_pred):
prediction = [(pred * (config.column_min_max[0][1] - config.column_min_max[0][0])) + config.column_min_max[0][0] for pred in test_pred]
return prediction
def pre_process():
store_data = pd.read_csv(config.fileName)
store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
# store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)
# ---for segmenting original data --------------------------------
# original_data = store_data.copy()
## train_size = int(len(store_data) * (1.0 - test_ratio))
validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
train_size = int(len(store_data) - (validation_len + test_len))
train_data = store_data[:train_size]
validation_data = store_data[(train_size - config.num_steps): validation_len + train_size]
test_data = store_data[((validation_len + train_size) - config.num_steps):]
original_val_data = validation_data.copy()
original_test_data = test_data.copy()
# -------------- processing train data---------------------------------------
scaled_train_data = scale(train_data)
train_X, train_y = segmentation(scaled_train_data)
# -------------- processing validation data---------------------------------------
scaled_validation_data = scale(validation_data)
val_X, val_y = segmentation(scaled_validation_data)
# -------------- processing test data---------------------------------------
scaled_test_data = scale(test_data)
test_X, test_y = segmentation(scaled_test_data)
# ----segmenting original validation data-----------------------------------------------
nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)
# ----segmenting original test data---------------------------------------------
nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y
def generate_batches(train_X, train_y, batch_size):
num_batches = int(len(train_X)) // batch_size
if batch_size * num_batches < len(train_X):
num_batches += 1
batch_indices = range(num_batches)
for j in batch_indices:
batch_X = train_X[j * batch_size: (j + 1) * batch_size]
batch_y = train_y[j * batch_size: (j + 1) * batch_size]
assert set(map(len, batch_X)) == {config.num_steps}
yield batch_X, batch_y
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
itemindex = np.where(y_true == 0)
y_true = np.delete(y_true, itemindex)
y_pred = np.delete(y_pred, itemindex)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def RMSPE(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.sqrt(np.mean(np.square(((y_true - y_pred) / y_pred)), axis=0))
def plot(true_vals,pred_vals,name):
fig = plt.figure()
fig = plt.figure(dpi=100, figsize=(20, 7))
days = range(len(true_vals))
plt.plot(days, pred_vals, label='pred sales')
plt.plot(days, true_vals, label='truth sales')
plt.legend(loc='upper left', frameon=False)
plt.savefig(name, format='png', bbox_inches='tight', transparent=False)
def write_results(true_vals,pred_vals,name):
with open(name, "w") as f:
writer = csv.writer(f)
writer.writerows(zip(true_vals, pred_vals))
def train_test():
train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y = pre_process()
# Add nodes to the graph
with config.graph.as_default():
learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
inputs = tf.placeholder(tf.float32, [None, config.num_steps, config.features], name="inputs")
targets = tf.placeholder(tf.float32, [None, config.input_size], name="targets")
global_step = tf.Variable(0, trainable=False)
dropout_rate = tf.placeholder_with_default(1.0, shape=())
learning_rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_rate=0.96, decay_steps=5, staircase=False)
cell = tf.contrib.rnn.LSTMCell(config.lstm_size, state_is_tuple=True, activation=tf.nn.relu)
val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
val = tf.transpose(val1, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")
# hidden layer
last = tf.layers.dense(last, units=config.hidden1_nodes, activation=tf.nn.relu)
last = tf.layers.dense(last, units=config.hidden2_nodes, activation=tf.nn.relu)
weight = tf.Variable(tf.truncated_normal([config.hidden2_nodes, config.input_size]))
bias = tf.Variable(tf.constant(0.1, shape=[config.input_size]))
dropout = tf.layers.dropout(last, rate=dropout_rate, training=True)
prediction = tf.matmul(dropout, weight) + bias
loss = tf.losses.mean_squared_error(targets,prediction)
optimizer = tf.train.AdamOptimizer(learning_rate)
minimize = optimizer.minimize(loss, global_step=global_step)
# correct_prediction = tf.sqrt(tf.losses.mean_squared_error(prediction, targets))
# --------------------training------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
iteration = 1
for epoch_step in range(config.max_epoch):
for batch_X, batch_y in generate_batches(train_X, train_y, config.batch_size):
train_data_feed = {
inputs: batch_X,
targets: batch_y,
learning_rate: 0.01,
dropout_rate: 0.5
train_loss, _, value,gs =[loss, minimize, val1,global_step], train_data_feed)
if iteration % 5 == 0:
print("Epoch: {}/{}".format(epoch_step, config.max_epoch),
"Iteration: {}".format(iteration),
"Train loss: {:.6f}".format(train_loss))
iteration += 1
saver = tf.train.Saver(), "checkpoints_sales/sales_pred.ckpt")
# --------------------validation------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))
test_data_feed = {
inputs: val_X,
dropout_rate: 1.0
test_pred =, test_data_feed)
# rmsse =, test_data_feed)
pred_vals = rescle(test_pred)
pred_vals = np.array(pred_vals)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_val_y.flatten()
nonescaled_y = nonescaled_y.tolist()
plot(nonescaled_y, pred_vals, "Sales Prediction VS Truth mv testSet.png")
write_results(nonescaled_y, pred_vals, "Sales Prediction batch mv results_all validationSet.csv")
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
rootMeanSquaredError = sqrt(meanSquaredError)
print("RMSE:", rootMeanSquaredError)
mae = mean_absolute_error(nonescaled_y, pred_vals)
print("MAE:", mae)
mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
print("MAPE:", mape)
rmse_val = RMSPE(nonescaled_y, pred_vals)
print("RMSPE:", rmse_val)
# --------------------testing------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))
test_data_feed = {
inputs: test_X,
dropout_rate: 1.0
test_pred =, test_data_feed)
# rmsse =, test_data_feed)
pred_vals = rescle(test_pred)
pred_vals = np.array(pred_vals)
pred_vals = (np.round(pred_vals, 0)).astype(np.int32)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_test_y.flatten()
nonescaled_y = nonescaled_y.tolist()
plot(nonescaled_y, pred_vals, "Sales Prediction VS Truth mv testSet.png")
write_results(nonescaled_y, pred_vals, "Sales Prediction batch mv results_all validationSet.csv")
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
rootMeanSquaredError = sqrt(meanSquaredError)
print("RMSE:", rootMeanSquaredError)
mae = mean_absolute_error(nonescaled_y, pred_vals)
print("MAE:", mae)
mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
print("MAPE:", mape)
rmse_val = RMSPE(nonescaled_y, pred_vals)
print("RMSPE:", rmse_val)
if __name__ == '__main__':
When using tf.layers.dropout the rate argument tells how much of the data to drop when you give 1.0 all the output is gone, replace 1.0 with 0.0 and it should work.
TensorFlow documentation:
I am putting this because even though #Almog's answer was correct it didn't have the explanation I wanted. So for anyone confused like me:
If you use:
to deactivate the dropout layer you should put
keep_prob= 1.0 not keep_prob=0.0
as keep_prob means 'The probability that each element is kept.' So keeping it as 1.0 makes sense to deactivate it.
If you are using
you should put:
rate=0.0 not rate=1.0
as rate here means 'The dropout rate (should be between 0 and 1). E.g. "rate=0.1" would drop out 10% of input units'. So if I put rate=0.0 it means that none of the input units will be dropped.

Missing 1 required positional argument: 'test_y'

I make MNIST dataset study by MLP.
However, there is an error of missing 1 required positional argument: 'test_y'
I can't handle this problem.
Please tell me how to deal with this error message.
This is a code of what I do.
def homework(train_X, train_y, test_X, test_y):
epoch = 10000
batch_size = 20
learning_rate = 1e-3
input_size = 784
hidden_size = 100
output_size = 10
data_num = train_X.shape[0]
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
def softmax(x):
x -= np.max(x, axis=1).reshape((-1, 1))
return np.exp(x) / np.sum(np.exp(x), axis=1).reshape((-1, 1))
def cross_entropy(y, output):
batch_size = y.shape[0]
return -np.sum(np.log(output[np.arange(batch_size), y])) / batch_size
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def forward(x):
fwd = {}
fwd['h1'] = sigmoid(, W1) + b1)
fwd['prob'] = softmax(['h1'], W2) + b2)
return fwd
for n in range(epoch):
loss_sum = 0
for i in range(0, data_num, batch_size):
x = train_X[i:i+batch_size]
y = train_y[i:i+batch_size]
fwd = forward(x)
loss_sum += cross_entropy(y, fwd['prob'])
grad = network.gradient(x, y)
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x, y)
if np.mod(n, 1000) == 0:
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
accuracy = f1_score(test_y, pred_y, average='macro')
print("epoch: %5d, loss_sum: %.5f, accuracy: %.5f" % (n, loss_sum, accuracy))
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
return pred_y
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
import numpy as np
def load_mnist():
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle('float32'),'int32'), random_state=42)
mnist_X = mnist_X / 255.0
return train_test_split(mnist_X, mnist_y,
def validate_homework():
train_X, test_X, train_y, test_y = load_mnist()
# validate for small dataset
train_X_mini = train_X[:100]
train_y_mini = train_y[:100]
test_X_mini = test_X[:100]
test_y_mini = test_y[:100]
pred_y = homework(train_X_mini, train_y_mini, test_X_mini)
print(f1_score(test_y_mini, pred_y, average='macro'))
def score_homework():
train_X, test_X, train_y, test_y = load_mnist()
pred_y = homework(train_X, train_y, test_X)
print(f1_score(test_y, pred_y, average='macro'))
# score_homework()

UnboundLocalError happened

I am using MNIST dataset to study MLP(multi layer perceptron) in python. While running the code given below I get the following error message:
UnboundLocalError: local variable 'x' referenced before assignment
Where should I insert "global x" or what should I do? Here is my code:
def homework(train_X, train_y, test_X, test_y):
epoch = 10000
batch_size = 20
learning_rate = 1e-3
input_size = 784
hidden_size = 100
output_size = 10
data_num = train_X.shape[0]
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
for n in range(epoch):
loss_sum = 0
for i in range(0, data_num, batch_size):
x = train_X[i:i+batch_size]
y = train_y[i:i+batch_size]
fwd = forward(x)
loss_sum += cross_entropy(y, fwd['prob'])
grad = network.gradient(x, y)
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x, y)
if np.mod(n, 1000) == 0:
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
accuracy = f1_score(test_y, pred_y, average='macro')
print("epoch: %5d, loss_sum: %.5f, accuracy: %.5f" % (n, loss_sum, accuracy))
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
return pred_y
def softmax(x):
x -= np.max(x, axis=1).reshape((-1, 1))
return np.exp(x) / np.sum(np.exp(x), axis=1).reshape((-1, 1))
def cross_entropy(y, output):
batch_size = y.shape[0]
return -np.sum(np.log(output[np.arange(batch_size), y])) / batch_size
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def forward(x):
fwd = {}
fwd['h1'] = sigmoid(, W1) + b1)
fwd['prob'] = softmax(['h1'], W2) + b2)
return fwd
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
import numpy as np
def load_mnist():
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle('float32'),'int32'), random_state=42)
mnist_X = mnist_X / 255.0
return train_test_split(mnist_X, mnist_y,
def validate_homework():
train_X, test_X, train_y, test_y = load_mnist()
train_X_mini = train_X[:100]
train_y_mini = train_y[:100]
test_X_mini = test_X[:100]
test_y_mini = test_y[:100]
pred_y = homework(train_X_mini, train_y_mini, test_X_mini, test_y_mini)
print(f1_score(test_y_mini, pred_y, average='macro'))
def score_homework():
train_X, test_X, train_y, test_y = load_mnist()
pred_y = homework(train_X, train_y, test_X, test_y)
print(f1_score(test_y, pred_y, average='macro'))
# score_homework()
