I have one simple TensorFlow model and accuracy for that is 1. But when I try to predict some new inputs it always returns Zero(0).
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()
# generate data
np.random.seed(10)
#inputs = np.random.uniform(low=1.2, high=1.5, size=[5000, 150]).astype('float32')
inputs = np.random.randint(low=50, high=500, size=[5000, 150])
label = np.random.uniform(low=1.3, high=1.4, size=[5000, 1])
# reverse_label = 1 - label
reverse_label = np.random.uniform(
low=1.3, high=1.4, size=[5000, 1])
reverse_label1 = np.random.randint(
low=80, high=140, size=[5000, 1])
#labels = np.append(label, reverse_label, 1)
#labels = np.append(labels, reverse_label1, 1)
labels = reverse_label1
print(inputs)
print(labels)
# parameters
learn_rate = 0.001
epochs = 100
n_input = 150
n_hidden = 15
n_output = 1
# set weights/biases
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_output])
b0 = tf.Variable(tf.truncated_normal([n_hidden], stddev=0.2, seed=0))
b1 = tf.Variable(tf.truncated_normal([n_output], stddev=0.2, seed=0))
w0 = tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.2, seed=0))
w1 = tf.Variable(tf.truncated_normal([n_hidden, n_output], stddev=0.2, seed=0))
# step function
def returnPred(x, w0, w1, b0, b1):
z1 = tf.add(tf.matmul(x, w0), b0)
a2 = tf.nn.relu(z1)
z2 = tf.add(tf.matmul(a2, w1), b1)
h = tf.nn.relu(z2)
return h # return the first response vector from the
y_ = returnPred(x, w0, w1, b0, b1) # predict operation
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=y_, labels=y)) # calculate loss between prediction and actual
model = tf.train.AdamOptimizer(learning_rate=learn_rate).minimize(
loss) # apply gradient descent based on loss
init = tf.global_variables_initializer()
tf.Session = sess
sess.run(init) # initialize graph
for step in range(0, epochs):
sess.run([model, loss], feed_dict={x: inputs, y: labels}) # train model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: inputs, y: labels})) # print accuracy
inp = np.random.randint(low=50, high=500, size=[5, 150])
print(sess.run(tf.argmax(y_, 1), feed_dict={x: inp})) # predict some new inputs
All functions are working properly and my problem is with the latest line of code. I tried only "y_" instead "tf.argmax(y_, 1)" but not worked too.
How can I fix that?
Regards,
There are multiple mistakes in your code.
Starting with this lines of code:
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: inputs, y: labels})) # print accuracy
You are performing linear regression but you are checking accuracy with that of logistic regression methodology. If you want to see how your linear regression network is performing, print the loss. Ensure that your loss is decreasing after each epoch of training.
If you look into that accuracy code, run the following code:
print(y_.get_shape()) # Outputs (?, 1)
There is only one input and both of your function tf.argmax(y,1) and tf.argmax(y_,1) will always return [0,0,..]. So as a result your accuracy will be always 1.0. Delete those three lines of code.
Next, to get the outputs, just run the following code:
print(sess.run(y_, feed_dict={x: inp}))
But since your data is random, don't expect good set of outputs.
Related
I'm pretty new to Tensorflow. Currently, I'm doing a 3-layer network, with 10 neurons in the hidden layer with ReLU, mini-batch gradient descent size of 8, L2 regularisation weight decay parameter (beta) of 0.001. The Tensorflow version I'm using is 1.14 and I'm on Python 3.6.
The issue that boggles my mind is that my predicted values and testing errors are absolutely off the charts.
For example, I plotted out the test errors and the predicted vs target values for a sample size of 50, and this is what came out.
As you can see, both plots are way off, and I haven't had the slightest clue as to why.
Here's how the dataset roughly looks like. The first column is discarded as it is just a counter value, and the last column is the target.
My code:
NUM_FEATURES = 7
num_neuron = 10
batch_size = 8
beta = 0.001
learning_rate = 0.001
epochs = 4000
seed = 10
np.random.seed(seed)
# read and divide data into test and train sets
total_dataset= np.genfromtxt('dataset_excel.csv', delimiter=',')
X_data, Y_data = total_dataset[1:, 1:8], total_dataset[1:, -1]
Y_data = Y_data.reshape(Y_data.shape[0], 1)
# shuffle input, ensure both are shuffled with the same order
shufflestate = np.random.get_state()
np.random.shuffle(X_data)
np.random.set_state(shufflestate)
np.random.shuffle(Y_data)
# 70% used for training, 30% used for testing
trainX = X_data[:280]
trainY = Y_data[:280]
testX = X_data[280:]
testY = Y_data[280:]
trainX = (trainX - np.mean(trainX, axis=0)) / np.std(trainX, axis=0)
# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, 1])
# get 50 samples for plotting of predicted vs target values
limited50testX = testX[:50]
limited50testY = testY[:50]
# Hidden
with tf.name_scope('hidden'):
weight1 = tf.Variable(tf.truncated_normal([NUM_FEATURES, num_neuron],stddev=1.0,name='weight1'))
bias1 = tf.Variable(tf.zeros([num_neuron]),name='bias1')
hidden = tf.nn.relu(tf.matmul(x, weight1) + bias1)
# output
with tf.name_scope('linear'):
weight2 = tf.Variable(tf.truncated_normal([num_neuron, 1],stddev=1.0 / np.sqrt(float(num_neuron))),name='weight2')
bias2 = tf.Variable(tf.zeros([1]),name='bias2')
logits = tf.matmul(hidden, weight2) + bias2
ridgeLoss = tf.square(y_ - logits)
regularisation = tf.nn.l2_loss(weight1) + tf.nn.l2_loss(weight2)
loss = tf.reduce_mean(ridgeLoss + beta * regularisation)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss)
error = tf.reduce_mean(tf.square(y_ - logits))
N = len(trainX)
idx = np.arange(N)
predicted=[]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_err = []
test_err_ = []
for i in range(epochs):
for batchStart, batchEnd in zip(range(0, trainX.shape[0], batch_size),range(batch_size, trainX.shape[0], batch_size)):
train_op.run(feed_dict={x: trainX[batchStart:batchEnd], y_: trainY[batchStart:batchEnd]})
err = error.eval(feed_dict={x: trainX, y_: trainY})
train_err.append(err)
if i % 100 == 0:
print('iter %d: train error %g' % (i, train_err[i]))
test_err = error.eval(feed_dict={x: testX, y_: testY})
test_err_.append(test_err)
predicted = sess.run(logits, feed_dict={x:limited50testX})
print("predicted values: ", predicted)
print("size of predicted values is", len(predicted))
print("targets: ", limited50testY)
print("size of target values is", len(limited50testY))
#plot predictions vs targets
numberList=np.arange(0, 50, 1).tolist()
predplot = plt.figure(1)
plt.plot(numberList, predicted, label='Predictions')
plt.plot(numberList, limited50testY, label='Targets')
plt.xlabel('50 samples')
plt.ylabel('Value')
plt.legend(loc='lower right')
predplot.show()
# plot training error
trainplot = plt.figure(2)
plt.plot(range(epochs), train_err)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train Error')
trainplot.show()
#plot testing error
testplot = plt.figure(3)
plt.plot(range(epochs), test_err_)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Test Error')
testplot.show()
Not sure if that's it, but trainX is normalized whereas testX is not. You might want to use the same normalization on testX before predicting.
I use the convolutional autoencoder neural network method to train my model and then save it, but when I restore my model to reconstruct the image which is similar to the training image, the reconstruction result is very bad and the loss is large. I am not sure if I am wrong with saving and reading files.
Training model and save it!
#--------------------------------------------------------------------------
x = tf.placeholder(tf.float32, [None, dim], name = "X")
y = tf.placeholder(tf.float32, [None, dim], name = "Y")
keepprob = tf.placeholder(tf.float32, name = "K")
pred = cae(x, weights, biases, keepprob, imgsize)["out"]
cost = tf.reduce_sum(tf.square(cae(x, weights, biases, keepprob,imgsize)["out"] - tf.reshape(y, shape=[-1, imgsize, imgsize, 1])))
learning_rate = 0.01
optm = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#--------------------------------------------------------------------------
sess = tf.Session()
save_model = os.path.join(PATH,'temp_saved_model')
saver = tf.train.Saver()
tf.add_to_collection("COST", cost)
tf.add_to_collection("PRED", pred)
sess.run(tf.global_variables_initializer())
mean_img = np.zeros((dim))
batch_size = 100
n_epochs = 1000
for epoch_i in range(n_epochs):
for batch_i in range(ntrain // batch_size):
trainbatch = np.array(train)
trainbatch = np.array([img - mean_img for img in trainbatch])
sess.run(optm, feed_dict={x: trainbatch, y: trainbatch, keepprob: 1.})
save_path = saver.save(sess, save_model)
print('Model saved in file: %s' %save_path)
sess.close()
Restoring the model and try to reconstruct the image.
tf.reset_default_graph()
save_model = os.path.join(PATH + 'SaveModel/','temp_saved_model.meta')
imgsize = 64
dim = imgsize * imgsize
mean_img = np.zeros((dim))
with tf.Session() as sess:
saver = tf.train.import_meta_graph(save_model)
saver.restore(sess, tf.train.latest_checkpoint(PATH + 'SaveModel/'))
cost = tf.get_collection("COST")[0]
pred = tf.get_collection("PRED")[0]
graph = tf.get_default_graph()
x = graph.get_tensor_by_name("X:0")
y = graph.get_tensor_by_name("Y:0")
k = graph.get_tensor_by_name("K:0")
for i in range(10):
test_xs = np.array(data)
test = load_image(test_xs, imgsize)
test = np.array([img - mean_img for img in test])
print ("[%02d/%02d] cost: %.4f" % (i, 10, sess.run(cost, feed_dict={x: test, y: test, K: 1.})))
The loss value in the training process is 1.321..., but the reconstruction loss is 16545.10441... Is there something wrong in my code?
First make sure that Your Restore and Save functions are in Different files.
There are a few problems that I have debugged so far,
keepprob changes from 'K' to 'k' while building graph after restore.
You are facing same Images as Logits and labels (Doesn't make sense until you are trying to learn an Identity function)
You are calculating training cost before saving the model and validation/test cost after restoring the model.
Your code in saver
recon = sess.run(pred, feed_dict={x: testbatch, keepprob: 1.})
fig, axs = plt.subplots(2, n_examples, figsize=(15, 4))
for example_i in range(5):
axs[0][example_i].matshow(np.reshape(testbatch[example_i, :], (imgsize, imgsize)), cmap=plt.get_cmap('gray'))
axs[1][example_i].matshow(np.reshape(np.reshape(recon[example_i, ...], (dim,)) + mean_img, (imgsize, imgsize)), cmap=plt.get_cmap('gray'))
plt.show()
Your code in restore function
recon = sess.run(pred, feed_dict={x: test, k: 1.})
cost = sess.run(cost, feed_dict={x: test, y: test, k: 1.})
if (i % 2) == 0:
fig, axs = plt.subplots(2, n_examples, figsize=(15, 4))
for example_i in range(n_examples):
axs[0][example_i].matshow(np.reshape(test[example_i, :], (imgsize, imgsize)), cmap=plt.get_cmap('gray'))
axs[1][example_i].matshow(np.reshape(np.reshape(recon[example_i, ...], (dim,)) + mean_img, (imgsize, imgsize)), cmap=plt.get_cmap('gray'))
plt.show()
Also nowhere in your code are you printing/plotting cost even in your recover module you are plotting recon variable
If you are trying to test autoencoder-decoder pair, to generate the original Image, your model is a bit too small(Shallow), If that makes sense, try implementing it, if you are confused, check out this link. https://pgaleone.eu/neural-networks/deep-learning/2016/12/13/convolutional-autoencoders-in-tensorflow/
And in any case, feel free to add comments for further clarification.
I created a TensorFlow neural network that has 2 hidden layers with 10 units each using ReLU activations and Xavier Initialization for the weights. The output layer has 1 unit outputting binary classification (0 or 1) using the sigmoid activation function to classify whether it believes a passenger on the titanic survived based on the input features.
(The only code omitted is the load_data function which populates the variables X_train, Y_train, X_test, Y_test used later in the program)
Parameters
# Hyperparams
learning_rate = 0.001
lay_dims = [10,10, 1]
# Other params
m = X_train.shape[1]
n_x = X_train.shape[0]
n_y = Y_train.shape[0]
Inputs
X = tf.placeholder(tf.float32, shape=[X_train.shape[0], None], name="X")
norm = tf.nn.l2_normalize(X, 0) # normalize inputs
Y = tf.placeholder(tf.float32, shape=[Y_train.shape[0], None], name="Y")
Initialize Weights & Biases
W1 = tf.get_variable("W1", [lay_dims[0],n_x], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", [lay_dims[0],1], initializer=tf.zeros_initializer())
W2 = tf.get_variable("W2", [lay_dims[1],lay_dims[0]], initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", [lay_dims[1],1], initializer=tf.zeros_initializer())
W3 = tf.get_variable("W3", [lay_dims[2],lay_dims[1]], initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable("b3", [lay_dims[2],1], initializer=tf.zeros_initializer())
Forward Prop
Z1 = tf.add(tf.matmul(W1,X), b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2,A1), b2)
A2 = tf.nn.relu(Z2)
Y_hat = tf.add(tf.matmul(W3,A2), b3)
BackProp
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.transpose(Y_hat), labels=tf.transpose(Y)))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
Session
# Initialize
init = tf.global_variables_initializer()
with tf.Session() as sess:
# Initialize
sess.run(init)
# Normalize Inputs
sess.run(norm, feed_dict={X:X_train, Y:Y_train})
# Forward/Backprob and update weights
for i in range(10000):
c, _ = sess.run([cost, optimizer], feed_dict={X:X_train, Y:Y_train})
if i % 100 == 0:
print(c)
correct_prediction = tf.equal(tf.argmax(Y_hat), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Training Set:", sess.run(accuracy, feed_dict={X: X_train, Y: Y_train}))
print("Testing Set:", sess.run(accuracy, feed_dict={X: X_test, Y: Y_test}))
After running running 10,000 epochs of training, the cost goes down each time so it shows that the learning_rate is okay and that the cost function appears normal. However, after training, all of my Y_hat values (predictions on the training set) are 1 (predicting the passenger survived). So basically the prediction just outputs y=1 for every training example.
Also, when I run tf.argmax on Y_hat, the result is a matrix of all 0's. The same thing is happening when tf.argmax is applied to Y (ground truth labels) which is odd because Y consists of all the correct labels for the training examples.
Any help is greatly appreciated. Thanks.
I assume your Y_hat is a (1,m) matrix with m is the number of training example. Then the tf.argmax(Y_hat) will give all 0. According to tensorflow documentation, argmax
Returns the index with the largest value across axes of a tensor.
If you do not pass in axis, the axis is set as 0. Because the axis 0 only has one value, the returned index becomes 0 all the time.
I know I am late but I'd would also point out that since your label matrix is of shape (n,1), i.e., there is only 1 class to predict, and hence, cross entropy doesn't make sense. In such cases you should use something different for calculating the cost (may be a mean squared error or something similar).
I had similar problem recently while I was working on my college project and I found a work around, I turned this binary output into 2 classes such as present and absent so if it's present it's [1,0]. I know this is not the best way to do it but it can be helpful when you need the working thing instantly.
This follows on from this post (not mine): TensorFlow for binary classification
I had a similar issue and converted my data to use one hot encoding. However I'm still getting a cost of 0. Interestingly the accuracy is correct (90%) when I feed my training data back into it.
Code below:
# Set parameters
learning_rate = 0.02
training_iteration = 2
batch_size = int(np.size(y_vals)/300)
display_step = 1
numOfFeatures = 20 # 784 if MNIST
numOfClasses = 2 #10 if MNIST dataset
# TF graph input
x = tf.placeholder("float", [None, numOfFeatures])
y = tf.placeholder("float", [None, numOfClasses])
# Create a model
# Set model weights to random numbers: https://www.tensorflow.org/api_docs/python/tf/random_normal
W = tf.Variable(tf.random_normal(shape=[numOfFeatures,1])) # Weight vector
b = tf.Variable(tf.random_normal(shape=[1,1])) # Constant
# Construct a linear model
model = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax
# Minimize error using cross entropy
# Cross entropy
cost_function = -tf.reduce_sum(y*tf.log(model))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for iteration in range(training_iteration):
avg_cost = 0.
total_batch = int(len(x_vals)/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs = x_vals[i*batch_size:(i*batch_size)+batch_size]
batch_ys = y_vals_onehot[i*batch_size:(i*batch_size)+batch_size]
# Fit training using batch data
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
# Compute average loss
avg_cost += sess.run(cost_function, feed_dict={x: batch_xs, y: batch_ys})/total_batch
# Display logs per eiteration step
if iteration % display_step == 0:
print ("Iteration:", '%04d' % (iteration + 1), "cost=", "{:.9f}".format(avg_cost))
print ("Tuning completed!")
# Evaluation function
correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
#correct_prediction = tf.equal(model, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Test the model
print ("Accuracy:", accuracy.eval({x: x_vals_test, y: y_vals_test_onehot}))
Your output for cost is using:
"{:.9f}".format(avg_cost)
Therefore, maybe you can replace 9 with bigger number.
Ok here is what I found in the end.
Replace:
b = tf.Variable(tf.random_normal(shape=[1,1]))
with:
b = tf.Variable(tf.zeros([1]))
I am implementing a logistic regression function. It is quite simple and work properly up until I get to the part where I want to calculate its accuracy. Here is my logistic regression...
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# tf Graph Input
x = tf.get_variable("input_image", shape=[100,784], dtype=tf.float32)
x_placeholder = tf.placeholder(tf.float32, shape=[100, 784])
assign_x_op = x.assign(x_placeholder).op
y = tf.placeholder(shape=[100,10], name='input_label', dtype=tf.float32) # 0-9 digits recognition => 10 classes
# set model weights
W = tf.get_variable("weights", shape=[784, 10], dtype=tf.float32, initializer=tf.random_normal_initializer())
b = tf.get_variable("biases", shape=[1, 10], dtype=tf.float32, initializer=tf.zeros_initializer())
# construct model
logits = tf.matmul(x, W) + b
pred = tf.nn.softmax(logits) # Softmax
# minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize(cost)
# initializing the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# launch the graph
with tf.Session() as sess:
sess.run(init)
# training cycle
for epoch in range(FLAGS.training_epochs):
avg_cost = 0
total_batch = int(mnist.train.num_examples/FLAGS.batch_size)
# loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size)
# Assign the contents of `batch_xs` to variable `x`.
sess.run(assign_x_op, feed_dict={x_placeholder: batch_xs})
_, c = sess.run([optimizer, cost], feed_dict={y: batch_ys})
# compute average loss
avg_cost += c / total_batch
# display logs per epoch step
if (epoch + 1) % FLAGS.display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
save_path = saver.save(sess, "/tmp/model.ckpt")
print("Model saved in file: %s" % save_path)
print("Optimization Finished!")
As you can see it is a basic logistic regression and function and it works perfectly.
It is important to not that batch_size is 100.
Now, after the code snipped above, I try the following...
# list of booleans to determine the correct predictions
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
print(correct_prediction.eval({x_placeholder:mnist.test.images, y:mnist.test.labels}))
# calculate total accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
However the code fails on correct_prediction. I get the following error...
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (10000, 784) for Tensor 'Placeholder:0', which has shape '(100, 784)'
I believe I get this error because of the value I am trying to assign the placeholder for x. How can I fix this? Do I need to reshape the array?
In
x_placeholder = tf.placeholder(tf.float32, shape=[100, 784])
y = tf.placeholder(shape=[100,10], name='input_label', dtype=tf.float32) # 0-9
avoid fixing the first dimension as 100, since it prohibits you from using any other batch size (so if the number of images in mnist.test.images is different from 100, you'll get an error). Instead specify them as None:
x_placeholder = tf.placeholder(tf.float32, shape=[None, 784])
y = tf.placeholder(shape=[None,10], name='input_label', dtype=tf.float32) #
Then you can use any batch size