Tensorflow Logits and Labels must be broadcastable - python-3.x

I am very green working with Tensorflow, and can not seem to get past this error. I have been trouble shooting this error for two days now and I can't get it to work. Can anyone see an issue with the code? I am using python3 via Jupyter Notebook. Thanks for the assistance.
Here is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("official/MNIST_data/", one_hot=True)
Extracting official/MNIST_data/train-images-idx3-ubyte.gz
Extracting official/MNIST_data/train-labels-idx1-ubyte.gz
Extracting official/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting official/MNIST_data/t10k-labels-idx1-ubyte.gz
type(mnist)
tensorflow.contrib.learn.python.learn.datasets.base.Datasets
mnist.train.num_examples
55000
mnist.test.num_examples
10000
Preparation for building CNN model: define supporting Functions
Initialize weights in Filter
def initialize_weights (filter_shape):
init_random_dist = tf.truncated_normal(filter_shape, stddev=.1)
return (tf.Variable(init_random_dist))
def initialize_bias(bias_shape):
initial_bias_vals = tf.constant(.1, shape=bias_shape)
return(tf.Variable(initial_bias_vals))
def create_convolution_layer_and_compute_dot_product(inputs, filter_shape):
filter_initialized_with_weights = initialize_weights(filter_shape)
conv_layer_outputs = tf.nn.conv2d(inputs, filter_initialized_with_weights, strides = [1,1,1,1], padding = 'SAME')
return(conv_layer_outputs)
def create_relu_layer_and_compute_dotproduct_plus_b(inputs, filter_shape):
b = initialize_bias([filter_shape[3]])
relu_layer_outputs = tf.nn.relu(inputs + b)
return (relu_layer_outputs)
def create_maxpool2by2_and_reduce_spatial_size(inputs):
pooling_layer_outputs = tf.nn.max_pool(inputs, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
return(pooling_layer_outputs)
def create_fully_conected_layer_and_compute_dotproduct_plus_bias(inputs, output_size):
input_size = int(inputs.get_shape()[1])
W = initialize_weights([input_size, output_size])
b = initialize_bias([output_size])
fc_xW_plus_bias_outputs = tf.matmul(inputs, W) + b
return(fc_xW_plus_bias_outputs)
Build the Convolutional Neural Network
x = tf.placeholder(tf.float32, shape = [None, 784])
y_true = tf.placeholder(tf.float32, [None, 10])
x_image = tf.reshape(x, [-1,28,28,1])
conv_layer_1_outputs \
= create_convolution_layer_and_compute_dot_product(x_image, filter_shape=[5,5,1,32])
conv_relu_layer_1_outputs \
= create_relu_layer_and_compute_dotproduct_plus_b(conv_layer_1_outputs, filter_shape=[5,5,1,32])
pooling_layer_1_ouptuts = create_maxpool2by2_and_reduce_spatial_size(conv_relu_layer_1_outputs)
conv_layer_2_outputs \
= create_convolution_layer_and_compute_dot_product(conv_layer_1_outputs, filter_shape=[5,5,32,64])
conv_relu_layer_2_outputs \
= create_relu_layer_and_compute_dotproduct_plus_b(conv_layer_2_outputs, filter_shape=[5,5,32,64])
pooling_layer_2_outputs = create_maxpool2by2_and_reduce_spatial_size(conv_relu_layer_2_outputs)
pooling_layer_2_outputs_flat=tf.reshape(pooling_layer_2_outputs, [-1,7*7*64])
fc_layer_1_outputs \
= create_fully_conected_layer_and_compute_dotproduct_plus_bias(pooling_layer_2_outputs_flat, output_size=1024)
fc_relu_layer_1_outputs = tf.nn.relu(fc_layer_1_outputs)
hold_prob = tf.placeholder(tf.float32)
fc_dropout_outputs = tf.nn.dropout(fc_layer_1_outputs, keep_prob=hold_prob)
y_pred = create_fully_conected_layer_and_compute_dotproduct_plus_bias(fc_dropout_outputs, output_size=10)
softmax_cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true, logits=y_pred)
cross_entropy_mean = tf.reduce_mean(softmax_cross_entropy_loss)
optimizer = tf.train.AdamOptimizer(learning_rate=.001)
cnn_trainer = optimizer.minimize(cross_entropy_mean)
vars_initializer = tf.global_variables_initializer()
steps = 5000
Run tf.session to train and test deep learning CNN model
with tf.Session() as sess:
sess.run(vars_initializer)
for i in range(steps):
batch_x, batch_y = mnist.train.next_batch(50)
sess.run(cnn_trainer, feed_dict={x: batch_x, y_true: batch_y, hold_prob: .5})
if i % 100 == 0:
print('ON STEP: {}', format(i))
print('ACCURACY: ')
matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
acc = tf.reduce_mean(tf.cast(matches, tf.float32))
test_accuracy = sess.run(acc, feed_dict = {x: mnist.test.images, y_true: mnist.test.labels, hold_prob: 1.0})
print(test_accuracy)
print('\n')
Here is the exact error message:
InvalidArgumentError: logits and labels must be broadcastable: logits_size=[200,10] labels_size=[50,10]
[[node softmax_cross_entropy_with_logits_7 (defined at <ipython-input-162-3d06fe78186c>:1) = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](add_31, softmax_cross_entropy_with_logits_7/Reshape_1)]]

Posting this in case someone else is having similar issues.
The error should read "Dumb User" lol. I passed the wrong variable into the second layer.
pooling_layer_1_ouptuts = create_maxpool2by2_and_reduce_spatial_size(conv_relu_layer_1_outputs)
conv_layer_2_outputs \
= create_convolution_layer_and_compute_dot_product(conv_layer_1_outputs, filter_shape=[5,5,32,64])
should be:
pooling_layer_1_ouptuts = create_maxpool2by2_and_reduce_spatial_size(conv_relu_layer_1_outputs)
conv_layer_2_outputs \
= create_convolution_layer_and_compute_dot_product(pooling_layer_1_ouptuts , filter_shape=[5,5,32,64])

Related

I run this code and I get the following error. How do I fix this?

This is a code to predict stock price movements using TensorFlow and the ReLu activation function. I run the following code:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import pandas_datareader as web
dataset = web.DataReader('AAPL', data_source = 'yahoo', start = '1989-01-01', end = '2019-12-25')
import math
close_price = dataset.filter(['Close']).values
data_train_len = math.ceil(len(close_price) * .8)
sc = MinMaxScaler(feature_range = (0, 1))
sc_data = sc.fit_transform(close_price)
data_train = sc_data[0 : data_train_len, : ]
xtrain = []
ytrain = []
for i in range(60, len(data_train)):
xtrain.append(data_train[i - 60 : i, 0])
ytrain.append(data_train[i, 0])
xtrain, ytrain = np.array(xtrain), np.array(ytrain)
xtrain = np.reshape(xtrain, (xtrain.shape[0], xtrain.shape[1], 1))
print(xtrain.shape, ytrain.shape)
data_test = sc_data[data_train_len - 60 : , :]
xtest = []
ytest = close_price[data_train_len :, :]
for i in range(60, len(data_test)):
xtest.append(data_test[i - 60 : i, 0])
xtest = np.array(xtest)
xtest = np.reshape(xtest, (xtest.shape[0], xtest.shape[1], 1))
print(xtest.shape, ytest.shape)
# Number of stock in training data
n_stocks = xtrain.shape[1]
#Model architecture parameters
n_neurons_1 = 1024
n_neurons_2 = 512
n_neurons_3 = 256
n_neurons_4 = 128
# Session
sesh = tf.InteractiveSession()
# Define two variables as placeholders
a = tf.placeholder(dtype = tf.float32, shape = [None, n_stocks])
b = tf.placeholder(dtype = tf.float32, shape = [1, None])
# Initializers
sig = 1
weight_init = tf.variance_scaling_initializer(mode = "fan_avg", distribution = "uniform", scale =
sig)
bias_init = tf.zeros_initializer()
# Hidden weights
w_hid_1 = tf.Variable(weight_init([n_stocks, n_neurons_1]))
bias_hid_1 = tf.Variable(bias_init([n_neurons_1]))
w_hid_2 = tf.Variable(weight_init([n_neurons_1, n_neurons_2]))
bias_hid_2 = tf.Variable(bias_init([n_neurons_2]))
w_hid_3 = tf.Variable(weight_init([n_neurons_2, n_neurons_3]))
bias_hid_3 = tf.Variable(bias_init([n_neurons_3]))
w_hid_4 = tf.Variable(weight_init([n_neurons_3, n_neurons_4]))
bias_hid_4 = tf.Variable(bias_init([n_neurons_4]))
# Output weights
w_out = tf.Variable(weight_init([n_neurons_4, 1]))
bias_out = tf.Variable(bias_init([1]))
# Hidden layers
hid_1 = tf.nn.relu(tf.add(tf.matmul(a, w_hid_1), bias_hid_1))
hid_2 = tf.nn.relu(tf.add(tf.matmul(hid_1, w_hid_2), bias_hid_2))
hid_3 = tf.nn.relu(tf.add(tf.matmul(hid_2, w_hid_3), bias_hid_3))
hid_4 = tf.nn.relu(tf.add(tf.matmul(hid_3, w_hid_4), bias_hid_4))
# Transposed Output layer
out = tf.transpose(tf.add(tf.matmul(hid_4, w_out), bias_out))
# Cost function
mse = tf.reduce_mean(tf.squared_difference(out, b))
rmse = tf.sqrt(tf.reduce_mean(tf.squared_difference(out, b)))
opt1 = tf.train.AdamOptimizer().minimize(mse)
opt2 = tf.train.AdamOptimizer().minimize(rmse)
sesh.run(tf.global_variables_initializer())
# Setup plot
plt.ion()
fig = plt.figure()
ax1 = fig.add_subplot(111)
line1, = ax1.plot(ytest)
line2, = ax1.plot(ytest * 0.5)
plt.show()
# Fitting neural network
batch_size = 256
mse_train = []
rmse_train = []
mse_test = []
rmse_test = []
# Run tensorflow
epochs = 10
for epoch in range(epochs):
# Training data is shuffled
shuffle_ind = np.random.permutation(np.arange(len(ytrain)))
xtrain = xtrain[shuffle_ind]
ytrain = ytrain[shuffle_ind]
# Minibatch training
for i in range(0, len(ytrain) // batch_size):
start = i * batch_size
batch_x = xtrain[start : start + batch_size]
batch_y = ytrain[start : start + batch_size]
# Run optimizer with batch
sesh.run(opt1, feed_dict = {a : batch_x, b : batch_y})
sesh.run(opt2, feed_dict = {a : batch_x, b : batch_y})
I get the following error:
ValueError: Cannot feed value of shape (256, 60, 1) for Tensor 'Placeholder_30:0', which has shape '(?, 60)'
This error appears for both of the last two lines under 'Run Optimizer with Batch'. How do I fix this?
It seems like you trying to feed data that doesn't fit with place holder (I think you placeholder a), simple way to change your place holder to a = tf.placeholder(dtype = tf.float32, shape = [None, n_stocks, 1]) or change your xtest and xtrain dimension (the line that you use reshape) by reduce last dimension using np.squeeze().

get InvalidArgumentError when using tf.image.resize_bilinear in Keras with multi-gpu environment

I use tf.image.resize_bilinear in a segmentation network, It seems this function does not support by multi-gpu model. The following code shows the simplified situation: (which can be run directly)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1'
from keras.backend.tensorflow_backend import set_session
from keras import backend as K
from keras.utils import multi_gpu_model
from keras.applications.mobilenet_v2 import preprocess_input
import tensorflow as tf
import numpy as np
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
set_session(sess)
batch = 4
num_classes = 2
size = 128
K.clear_session()
def _GetRandomImg():
shape = (batch, size, size, 3)
img = np.random.randint(low=0, high=256, size=shape)
return preprocess_input(img)
def _GetRandomLabel():
shape = (batch, size, size, num_classes)
label = np.random.randint(low=0, high=num_classes, size=shape)
label = np.exp(label)
label = label/ np.sum(label, axis=-1, keepdims=True)
return label
def DataGen():
while True:
x = _GetRandomImg()
y = _GetRandomLabel()
yield x, y
from keras.layers import Input, Conv2D, Lambda
from keras import Model
def GetModel():
inputs = Input(shape=(size, size, 3))
f = lambda x: tf.image.resize_bilinear(inputs, (size, size), align_corners=True)
x = Lambda(f, output_shape=(size, size, 3))(inputs)
outputs = Conv2D(num_classes, kernel_size=3, padding='same')(x)
model = Model(inputs=[inputs], outputs=[outputs])
return model
gen = DataGen()
with tf.device('/cpu:0'):
model = GetModel()
model = multi_gpu_model(model, gpus=2)
model.compile(loss='categorical_crossentropy', optimizer='sgd')
result = model.fit_generator(gen, epochs=2, verbose = 1, steps_per_epoch = 100)
it works fine with single gpu environment, but in multi-gpu environment, I got the following error:
InvalidArgumentError: Incompatible shapes: [3,128,128,2] vs. [6,128,128,2]
[[{{node loss/conv2d_1_loss/categorical_crossentropy/mul}}]]
[[{{node training/SGD/gradients/conv2d_1_1/concat_grad/Slice_1}}]]
the problem is solved. If tensorflow function is used in a customized Lambda layer, it is needed to explicitly use set_shape() function:
def MyResizeBilinear(x, height, width):
rows, cols = 1, 2
original_shape = K.int_shape(x)
new_shape = tf.constant(np.array([height, width], dtype='int32'))
x = tf.image.resize_bilinear(x, new_shape, align_corners=True)
new_height = None if original_shape[rows] is None else height
new_width = None if original_shape[cols] is None else width
output_shape = (None, new_height, new_width, None)
x.set_shape(output_shape)
return x

Error while running a convolutional network using my own data in Tensorflow

I´m a complete beginner in using Tensorflow and machine learning in general, so there are many concepts that I still don´t understand quite well, so sorry if my error is obvious. I´m trying to train my own convolutional network using my own images (optical microscopy photos) resized to 60x60, and I have only 2 labels to classify them (if the sample is positive or not). Here is my code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
sess = tf.InteractiveSession()
# Load dataset in two lists (images and labels).
def load_data(data_dir):
directories = [d for d in os.listdir(data_dir)
if os.path.isdir(os.path.join(data_dir, d))]
labels = []
images = []
for d in directories:
label_dir = os.path.join(data_dir, d)
file_names = [os.path.join(label_dir, f)
for f in os.listdir(label_dir) if f.endswith(".JPG")]
for f in file_names:
images.append(f)
labels.append(int(d))
return images, labels
# Load training and testing datasets.
ROOT_PATH = "Proyecto"
train_data_dir = os.path.join(ROOT_PATH, "Imagenes_entrenamiento")
test_data_dir = os.path.join(ROOT_PATH, "Imagenes_test")
images_train, labels_train = load_data(train_data_dir)
images_test, labels_test = load_data(test_data_dir)
# Converting training data to tensors.
timages_train = ops.convert_to_tensor(images_train, dtype=dtypes.string)
tlabels_train = ops.convert_to_tensor(labels_train, dtype=dtypes.int32)
# Converting testing data to tensors.
timages_test = ops.convert_to_tensor(images_test, dtype=dtypes.string)
tlabels_test = ops.convert_to_tensor(labels_test, dtype=dtypes.int32)
# Creation of a training queue.
num_files_train = len(images_train)
filename_train_queue = tf.train.slice_input_producer([timages_train,
tlabels_train], num_epochs=None, shuffle=True, capacity=num_files_train)
# Creation of a testing queue.
num_files_test = len(images_test)
filename_test_queue = tf.train.slice_input_producer([timages_test,
tlabels_test], num_epochs=None, shuffle=True, capacity=num_files_test)
# Decoding and resizing train images
raw_image_train= tf.read_file(filename_train_queue[0])
decoded_image_train = tf.image.decode_jpeg(raw_image_train, channels=3)
decoded_image_train = tf.cast(decoded_image_train, tf.float32)
resized_train_image = tf.image.resize_images(decoded_image_train, [60, 60])
# Decoding and resizing test images
raw_image_test= tf.read_file(filename_test_queue[0])
decoded_image_test = tf.image.decode_jpeg(raw_image_test, channels=3)
decoded_image_test = tf.cast(decoded_image_test, tf.float32)
resized_test_image = tf.image.resize_images(decoded_image_test, [60, 60])
# Extracting training and testing labels.
label_train_queue = filename_train_queue[1]
label_test_queue = filename_test_queue[1]
# Training batch.
batch_size_train = 5
image_train_batch, label_train_batch = tf.train.batch([resized_train_image,
label_train_queue], batch_size_train)
# Testing batch.
batch_size_test = 2
image_test_batch, label_test_batch = tf.train.batch([resized_test_image,
label_test_queue], batch_size_test)
# General model
x = tf.placeholder(tf.float32, shape=[None, 60, 60, 3])
y_ = tf.placeholder(tf.int32, shape=[None])
keep_prob = tf.placeholder(tf.float32)
# Weights and biases
dense_w={
"w_conv1": tf.Variable(tf.truncated_normal([5,5,3,32],stddev=0.1),
name="w_conv1"),
"b_conv1": tf.Variable(tf.constant(0.1,shape=[32]), name="b_conv1"),
"w_conv2": tf.Variable(tf.truncated_normal([5,5,32,64],stddev=0.1),
name="w_conv2"),
"b_conv2": tf.Variable(tf.constant(0.1,shape=[64]), name="b_conv2"),
"w_fc1": tf.Variable(tf.truncated_normal([15*15*64,1024],stddev=0.1),
name="w_fc1"),
"b_fc1": tf.Variable(tf.constant(0.1,shape=[1024]), name="b_fc1"),
"w_fc2": tf.Variable(tf.truncated_normal([1024,2],stddev=0.1),
name="w_fc2"),
"b_fc2": tf.Variable(tf.constant(0.1,shape=[2]), name="b_fc2")
}
# CNN model
def dense_cnn_model(weights):
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x_image = tf.reshape(x, [-1,60,60,3])
h_conv1 = tf.nn.relu(conv2d(x_image, weights["w_conv1"]) +
weights["b_conv1"])
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, weights["w_conv2"]) +
weights["b_conv2"])
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 15*15*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, weights["w_fc1"]) +
weights["b_fc1"])
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, weights["w_fc2"]) +
weights["b_fc2"])
return y_conv
y_conv = dense_cnn_model(dense_w)
cross_entropy=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=y_conv, labels=tf.squeeze(y_)))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_,))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
## Training:
for i in range(50):
image_train_batch_eval,
label_train_batch_eval=image_train_batch.eval(),
label_train_batch.eval()
if i % 2 == 0:
train_accuracy = accuracy.eval(feed_dict={x:
image_train_batch_eval, y_: label_train_batch_eval,
keep_prob: 0.5})
print('Paso %d, Precisión de entrenamiento: %g' %
(i,train_accuracy))
train_step.run(feed_dict={x: image_train_batch_eval, y_:
label_train_batch_eval, keep_prob: 0.5})
## Testing
image_test_batch_eval, label_test_batch_eval=image_test_batch.eval(),
label_test_batch.eval()
print('Precisión de evaluación: %g' % accuracy.eval(feed_dict={
x: image_test_batch_eval, y_: label_test_batch_eval, keep_prob:1.0}))
coord.request_stop()
coord.join(threads)
EDIT:
The code is corrected.
You need to pass enqueue_many=True to tf.train.batch to indicate that you are enqueuing multiple examples at once, otherwise it will treat it as a single example with many features.

Why do I get a low accuracy in this neural network (tensorflow)?

I have made a convolutional neural network with tensorflow, i've trained it and tested it (about 98% accuracy)... I saved the model with
saver = tf.train.Saver()
saver.save(sess, 'model.ckpt')
Then i restored with the saver, but i always get an accuracy lower than 50%... why ?
Here's the code:
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle
import numpy as np
with open('X_train.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
X_train = u.load()
with open('X_test.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
X_test = u.load()
X_test = np.array(X_test).reshape(-1, 2500)
with open('y_train.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
y_train = u.load()
with open('y_test.pickle', 'rb') as y:
u = pickle._Unpickler(y)
u.encoding = 'latin1'
y_test = u.load()
n_classes = 3
batch_size = 100
x = tf.placeholder('float', [None, 2500])
y = tf.placeholder('float')
keep_rate = 0.8
keep_prob = tf.placeholder(tf.float32)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
# size of window movement of window
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_fc':tf.Variable(tf.random_normal([13*13*64,1024])),
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1, 50, 50, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2,[-1, 13*13*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
#fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
def use_neural_network(input_data):
prediction = convolutional_neural_network(x)
sess.run(tf.global_variables_initializer())
result = (sess.run(tf.argmax(prediction.eval(feed_dict={x:[input_data]}),1)))
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:X_test, y:y_test}))
return result
with tf.Session() as sess:
c = convolutional_neural_network(x)
saver = tf.train.Saver()
saver.restore(sess, "model.ckpt")
sample = X_train[432].reshape(2500)
res = use_neural_network(sample)
if res == [0]: print('Go straight')
elif res == [1]: print('Turn right')
else: print('Turn left')
img = sample.reshape(50,50)
plt.imshow(img)
plt.show()
sample = X_train[1222].reshape(2500)
res = use_neural_network(sample)
if res == [0]: print('Go straight')
elif res == [1]: print('Turn right')
else: print('Turn left')
img = sample.reshape(50,50)
plt.imshow(img)
plt.show()
sample = X_train[2986].reshape(2500)
res = use_neural_network(sample)
if res == [0]: print('Go straight')
elif res == [1]: print('Turn right')
else: print('Turn left')
img = sample.reshape(50,50)
plt.imshow(img)
plt.show()
The problem can't be overfitting, since i'm testing it with elements of the training dataset ...
I'm quite sure that the problem is the saver, but i can't figure out how to solve it ...
When you train a model using tensorlfow , make sure that your using the tensorflow version 1.0 and above. once you trained model using latest version 3 file will be created named as follows :
modelname.data
It is TensorBundle collection, save the values of all variables.
modelname.index
.index stores the list of variable names and shapes saved.
modelname.meta
this file describes the saved graph structure, includes GraphDef, SaverDef, and so on.
To reload/restore your model use model.load(modelname) it not only loads your model but also the accuracy won't be fluctuated.
Note : Please use TFLearn , TFLearn introduces a High-Level API that makes neural network building and training fast and easy.For more detail visit http://tflearn.org/getting_started/
The simple and Generalized way of building and using CNN using tensorflow is as follows:
Construct Network :
Here your will create n convolution , max-poll layer and fully connected layer then apply whatever activation function you want and return your model object
Train model :
fit your training data into your model using model.fit(X,Y)
Save Model :
Save your model using model.save(modelName)
Reload Model :
Reload your model using model.load(modelName)
This is the generic and simplified way to build and use CNN.
Hope it may help you :)

Tensorflow RNN stuck at 20% error

I created my first tensorflow neuronal network, initially for generating sequences. It produced weird outputs so I simplified it a lot to see if it can reach an error rate of 0% with just 5 inputs and 5 output classes. Somehow it does not seem to backpropagate at all because it is stuck at 20 % error rate without moving at all. So if anyone can point me to my mistake I made thank you in advance :)
Cheers
import numpy as np
import tensorflow as tf
import sys
trainingInputs = [
[[0],[0],[0],[0]],
[[1],[0],[0],[0]],
[[0],[1],[0],[0]],
[[0],[0],[1],[0]],
[[0],[0],[0],[1]]]
trainingOutputs = [
[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1],
[0,0,0,0]]
data = tf.placeholder(tf.float32, [None, len(trainingInputs[0]),1])
target = tf.placeholder(tf.float32, [None, len(trainingOutputs[0])])
num_hidden = 24
cell = tf.contrib.rnn.LSTMCell(num_hidden,state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.GradientDescentOptimizer(0.01)
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1
no_of_batches = int((len(trainingInputs)) / batch_size)
def trainNetwork():
epoch = 1000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = trainingInputs[ptr:ptr+batch_size], trainingOutputs[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize, feed_dict={data: inp, target: out})
def generateOutput():
incorrect = sess.run(error,{data: trainingInputs, target: trainingOutputs})
sys.stdout.write('error {:3.1f}%'.format(100 * incorrect) + "\n")
sys.stdout.flush()
for i in range(200):
trainNetwork()
generateOutput()
sess.close()

Resources