Is it possible to build a custom loss function of chamfer distance using Keras - keras

In my assignment about the point cloud,I need to use a keras custom loss function of chamfer distance and apply it to the autoencoder.But I find it is hard to implement the function.
I have tried to write in this way.I hope someone could help me.
def chamfer_loss_func(y_true,y_pred):
Calculate the chamfer distance,use euclidean metric
:param y_true:
:param y_pred:
batch_size = 32
y_true = K.reshape(y_true,[batch_size,2048,3])
y_pred = K.reshape(y_pred, [batch_size, 2048, 3])
num_t = K.int_shape(y_true)[1]
num_p = K.int_shape(y_pred)[1]
dists_mat = K.zeros(shape=[num_t, num_p])
sum = 0.0
for bi in range(batch_size):
for i in range(num_t):
pnt_t = y_true[bi][i]
for j in range(num_p):
if (i <= j):
pnt_p = y_pred[bi][j]
dists_mat[i][j] = K.eval(K.sum(K.sqrt(tf.subtract(pnt_t, pnt_p))))
dists_mat[i][j] = dists_mat[j][i]
dist_t_to_p = K.mean(K.min(dists_mat, axis=0))
dist_p_to_t = K.mean(K.min(dists_mat, axis=1))
sum += K.max([dist_p_to_t, dist_t_to_p])
return sum / batch_size
And the network structure is listed here:
def get_model(num_pnts):
input_tensor = Input(shape=[num_pnts, 3])
input_cov_tensor = Input(shape=[num_pnts, 9]) # add the local covariance matrix
# the encoder network
concat_1 = concatenate([input_tensor, input_cov_tensor]) # concatenate the two input tensor
dense_1 = Dense(32,activation='relu')(concat_1)
dense_2 = Dense(64,activation='relu')(dense_1)
dense_3 = Dense(128,activation='relu')(dense_2)
encoded = MaxPooling1D(pool_size=num_pnts)(dense_3)
# the decoder network
# use 3 fully connected layers
dense_3 = Dense(128,activation='relu')(encoded)
dense_4 = Dense(128,activation='relu')(dense_3)
dense_5 = Dense(num_pnts*3,activation='linear')(dense_4)
decoded = Reshape(target_shape=[num_pnts,3])(dense_5)
#the autoencoder
autoencoder = Model(inputs=[input_tensor,input_cov_tensor],outputs=decoded)
return autoencoder
And the place where uses the loss function:
model = get_model(2048)

Can you give me the link of chamfer distance you reference to?
K.sum(K.sqrt(tf.subtract(pnt_t, pnt_p))) looks strange to me. To calculate Euclidean distance, the sqrt should be replaced with square.
And it is not recommended to use for loop in tensorflow, so I have reimplemented it:
import numpy as np
import keras.backend as K
import tensorflow as tf
from keras.layers import Input, Dense, MaxPooling1D, Reshape, concatenate
from keras.models import Model
def dists_mat_calculater(pnts_t, pnts_p):
pnts_t : from y_true[bi], shape: (num_t, 3)
pnts_p : from y_pred[bi], shape: (num_p, 3)
dists_mat: shape: (num_t, num_p)
num_t = K.int_shape(pnts_t)[0]
num_p = K.int_shape(pnts_p)[0]
pnts_t = tf.reshape(tf.tile(tf.expand_dims(pnts_t, 1), [1, 1, num_p]), [-1, 3])
pnts_p = tf.tile(pnts_p, [num_t, 1])
dists_mat = K.sum(K.square(tf.subtract(pnts_t, pnts_p)), axis=1)
dists_mat = tf.reshape(dists_mat, [num_t, num_p])
dists_mat_upper = tf.matrix_band_part(dists_mat, 0, -1)
dists_mat_symm = dists_mat_upper + tf.transpose(dists_mat_upper)
dists_mat_symm = tf.matrix_set_diag(dists_mat_symm, tf.diag_part(dists_mat))
return dists_mat_symm
def dist_calculator(pnts):
pnts_t : from y_true[bi], shape: (num_t, 3)
pnts_p : from y_pred[bi], shape: (num_p, 3)
dist: shape: (1, )
pnts_t, pnts_p = pnts
dists_mat = dists_mat_calculater(pnts_t, pnts_p)
dist_t_to_p = K.mean(K.min(dists_mat, axis=0)) #shape: (1,)
dist_p_to_t = K.mean(K.min(dists_mat, axis=1)) #shape: (1,)
dist = K.max([dist_p_to_t, dist_t_to_p]) #shape: (1,)
return dist
def chamfer_loss_func_tf(y_true,y_pred):
Calculate the chamfer distance,use euclidean metric
:param y_true:
:param y_pred:
y_true = K.reshape(y_true,[-1, num_pnts, 3])
y_pred = K.reshape(y_pred, [-1, num_pnts, 3])
return K.mean(tf.map_fn(dist_calculator, elems=(y_true, y_pred), dtype=tf.float32))
dists_mat_calculater calculate the distance matrix, and the part of pairwise calculation is enlightened by Mask R-CNN - overlaps_graph.
I also implement a pure python version for validation purpose:
def chamfer_loss_python(y_true,y_pred):
Calculate the chamfer distance,use euclidean metric
:param y_true:
:param y_pred:
y_true = np.reshape(y_true,[-1,num_pnts,3])
y_pred = np.reshape(y_pred, [-1,num_pnts, 3])
batch_size = y_true.shape[0]
num_t = y_true.shape[1]
num_p = y_pred.shape[1]
dists_mat = np.zeros((num_t, num_p))
_sum = 0.0
loss_before_mean_py = []
for bi in range(batch_size):
for i in range(num_t):
pnt_t = y_true[bi][i]
for j in range(num_p):
pnt_p = y_pred[bi][j]
if (i <= j):
pnt_p = y_pred[bi][j]
dists_mat[i][j] = np.sum((pnt_t - pnt_p)**2)
dists_mat[i][j] = dists_mat[j][i]
dist_t_to_p = np.mean(np.min(dists_mat, axis=0))
dist_p_to_t = np.mean(np.min(dists_mat, axis=1))
_sum += np.max([dist_p_to_t, dist_t_to_p])
loss_before_mean_py.append(np.max([dist_p_to_t, dist_t_to_p]))
return _sum / batch_size
Following is the testing script:
num_pnts = 8
Y_true = np.random.randn(32, num_pnts, 3).astype(np.float32)
Y_pred = np.random.randn(32, num_pnts, 3).astype(np.float32)
Y_true_ph = tf.placeholder(tf.float32, shape=(None, num_pnts, 3), name="Y_true_ph")
Y_pred_ph = tf.placeholder(tf.float32, shape=(None, num_pnts, 3), name="Y_pred_ph")
loss = chamfer_loss_func_tf(Y_true_ph, Y_pred_ph)
with tf.Session() as sess:
loss =, feed_dict={
Y_true_ph: Y_true,
Y_pred_ph: Y_pred})
loss_py = chamfer_loss_python(Y_true,Y_pred)


Multi-input/Multi-output : Wrong output dimension when using KerasClassifier and GridSearchCV

I have built a multi-input (100 features) multi-ouput (100 predictions) ANN model using keras and tensorflow. I have been able to train my model and reach a quite satisfying accuracy on the test set using the following code :
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
def my_loss_fn(y_true, y_pred) :
d = K.sum(K.abs(y_true), axis = -1)
n = K.sum((K.tanh(100000*y_true*y_pred)/2 + 0.5)*K.abs(y_true), axis = -1)
return 1 - n/d
def my_metric_fn(y_true, y_pred) :
d = K.sum(K.abs(y_true))
n = K.sum((K.tanh(100000*y_true*y_pred)/2 + 0.5)*K.abs(y_true))
return n/d
def accuracy(y_true, y_pred) :
#print(y_true.shape, y_true)
#print(y_pred.shape, y_true)
acc = np.zeros([1, len(y_true)])
for day in range(len(y_pred)) :
d = 0
n = 0
for i in range(len(y_pred[0])) :
d = d + abs(y_true[day, i])
if np.sign(y_pred[day, i])*np.sign(y_true[day, i]) > 0 :
n = n + abs(y_true[day, i])
else :
n = n + 0
acc[0, day] = n/d
return np.mean(acc, axis = -1)[0]
classifier = Sequential()
classifier.add(Dense(units = 50, input_shape = (100, ), activation = "tanh"))
classifier.add(Dense(units=100, activation = 'tanh'))
classifier.compile(optimizer = 'rmsprop', loss = my_loss_fn, metrics = ['accuracy', my_metric_fn])
callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.0001, patience = 20, verbose = 0, mode = 'min')
nb_epochs = 250
history =, y_train, epochs = nb_epochs, batch_size = 31, callbacks = [callback], verbose = True, validation_split = 0., validation_data = (X_test, y_test), use_multiprocessing = True)
y_pred_train = classifier.predict(X_train)
y_pred_test = classifier.predict(X_test)
acc_test = accuracy(y_test, y_pred_test)
acc_train = accuracy(y_train, y_pred_train)
I am trying to improve the performance of my model by tuning the hyperparameters so I used KerasClassifier() and GridSearchCV(). The following code illustrates my approach for the gridsearch.
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from tensorflow import autograph
#Building a function to create the classifier
def build_classifier(nb_layers, nb_nodes, optimizer, dropout, activation_fn):
classifier.add(Dense(units = nb_nodes, input_shape = (100, ), activation = activation_fn))
for i in range(nb_layers-1) :
classifier.add(Dense(units = nb_nodes, activation = activation_fn, kernel_initializer = "uniform"))
classifier.add(Dense(units = 100, activation = 'tanh'))
classifier.compile(optimizer=optimizer, loss = tf.autograph.experimental.do_not_convert(my_loss_fn), metrics= ['accuracy', tf.autograph.experimental.do_not_convert(my_metric_fn)])
return classifier
#Creating a scorer to feed to the GridSearchCV()
my_scorer = make_scorer(accuracy, greater_is_better = True)
parameters={'batch_size':[13, 31],'epochs':[100, 150], 'optimizer':['adam', 'rmsprop'], 'dropout' : [0.2, 0.1], 'nb_layers' : [2, 3], 'nb_nodes' : [45, 50, 110, 115], 'activation_fn' : ['relu', 'tanh']}
grid_search=GridSearchCV(estimator=classifier, scoring = my_scorer, param_grid=parameters, cv=5, verbose = 1), y_train_raw)
When I fit my GridSearchCV() object I get the following error at the end of the first combination of hyperparameters (when the scoring is computed) :
TypeError: object of type 'numpy.int32' has no len()
I investigated by adding print commandes inside my accuracy() function
#print(y_true.shape, y_true)
#print(y_pred.shape, y_pred)
to print both the shape and the array y_true and y_pred given as inputs for my accuracy() function used as the scoring in the GridSearchCV() object.
I found out that y_true.shape == (555, 100) but y_pred.shape == (555,). The value 555 corresponds to the number of lines of the fifth validation set because cv = 5.
However, I do not understand why the prediction of the gridsearch is not a multi-output prediction even though the number of nodes of the last layer of the classifier is (100,).
This was a regression problem so I used KerasRegressor() instead and it solved the issue. I guess that for a multi-output classification problem, KerasClassifier() expect the output to be a 2D hot encoded array.

Translating LSTM model from Keras to Pytorch

I am having a hard time translating a quite simple LSTM model from Keras to Pytorch. X (get it here) corresponds to 1152 samples of 90 timesteps, each timestep has only 1 dimension. y (here) is a single prediction at t = 91 for all 1152 samples.
In Keras:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
import numpy as np
import pandas as pd
X = pd.read_csv('X.csv', header = None).values
y = pd.read_csv('y.csv', header = None).values
# From Keras documentation []:
# Input shape 3D tensor with shape (batch_size, timesteps, input_dim).
X = np.reshape(X, (1152, 90, 1))
regressor = Sequential()
regressor.add(LSTM(units = 100, return_sequences = True, input_shape = (90, 1)))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(LSTM(units = 50))
regressor.add(Dense(units = 1, activation = 'linear'))
regressor.compile(optimizer = 'rmsprop', loss = 'mean_squared_error', metrics = ['mean_absolute_error']), y, epochs = 10, batch_size = 32)
... leads me to:
# Epoch 10/10
# 1152/1152 [==============================] - 33s 29ms/sample - loss: 0.0068 - mean_absolute_error: 0.0628
Then in Pytorch:
import torch
from torch import nn, optim
from sklearn.metrics import mean_absolute_error
X = pd.read_csv('X.csv', header = None).values
y = pd.read_csv('y.csv', header = None).values
X = torch.tensor(X, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.float32)
dataset =, y)
loader =, batch_size = 32, shuffle = True)
class regressor_LSTM(nn.Module):
def __init__(self):
self.lstm1 = nn.LSTM(input_size = 1, hidden_size = 100)
self.lstm2 = nn.LSTM(100, 50)
self.lstm3 = nn.LSTM(50, 50, dropout = 0.3, num_layers = 2)
self.dropout = nn.Dropout(p = 0.3)
self.linear = nn.Linear(in_features = 50, out_features = 1)
def forward(self, X):
# From the Pytorch documentation []:
# **input** of shape `(seq_len, batch, input_size)`
X = X.view(90, 32, 1)
# I am discarding hidden/cell states since in Keras I am using a stateless approach
# []
X, _ = self.lstm1(X)
X = self.dropout(X)
X, _ = self.lstm2(X)
X = self.dropout(X)
X, _ = self.lstm3(X)
X = self.dropout(X)
X = self.linear(X)
return X
regressor = regressor_LSTM()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(regressor.parameters())
for epoch in range(10):
running_loss = 0.
running_mae = 0.
for i, data in enumerate(loader):
inputs, labels = data
outputs = regressor(inputs)
outputs = outputs[-1].view(*labels.shape)
loss = criterion(outputs, labels)
running_loss += loss.item()
mae = mean_absolute_error(labels.detach().cpu().numpy().flatten(), outputs.detach().cpu().numpy().flatten())
running_mae += mae
print('EPOCH %3d: loss %.5f - MAE %.5f' % (epoch+1, running_loss/len(loader), running_mae/len(loader)))
... leads me to:
# EPOCH 10: loss 0.04220 - MAE 0.16762
You can notice that both loss and MAE are quite different (Pytorch's are much higher). If I use Pytorch's model to predict the values, they all return as a constant.
What am I doing wrong?
Oh I believe I made considerable progress. It seems that the way to represent y is different between Keras and Pytorch. In Keras, we should pass it as a single value representing one timestep in the future (or, at least, for the problem I am trying to solve). But in Pytorch, y must be X shifted one timestep to the future. It is like this:
time_series = [0, 1, 2, 3, 4, 5]
X = [0, 1, 2, 3, 4]
# Keras:
y = [5]
# Pytorch:
y = [1, 2, 3, 4, 5]
This way, Pytorch compares all values in the time slice when calculating loss. I believe Keras rearranges the data under the hood to conform to this approach, as the code works when fed the variables just like that. But in Pytorch, I was estimating loss based only on one value (the one I was trying to predict), not the whole series, therefore I believe it could not correctly capture the time dependency.
When taking this in consideration, I got to:
EPOCH 100: loss 0.00551 - MAE 0.058435
And, most importantly, comparing true and predicted values in a separate dataset got me to
The patterns were clearly captured by the model.

Actually printing values from tensor object

I'm currently trying to implement a basic autoencoder using Keras, and I have come to the stage where I would want the output from the second hidden layer. I think that I'm able to get the right object, the problem is that I get it as a tensor object, the code I've been trying to run is the following:
from keras.layers import Input, Dense, initializers
import numpy as np
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense, Activation
import tensorflow as tf
import time
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
#input = [784, 400, 100, 10, 100, 400]
#output = [400, 100, 10, 100, 400, 784]
names = ['hidden1', 'hidden2', 'hidden3', 'hidden4', 'hidden5', 'hidden6']
list_of_nodes = [784, 400, 144, 10]
def generate_hidden_nodes(list_of_nodes):
input = []
for j in range(len(list_of_nodes)):
for i in range(len(list_of_nodes)-2):
output = input[::-1]
return input, output
input,output = generate_hidden_nodes(list_of_nodes)
def autoencoder(epochs):
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
model = Sequential()
input, output = generate_hidden_nodes(list_of_nodes)
for j in range(len(input)):
if j == (len(input)-1):
model.add(Dense(output[j], activation='sigmoid', kernel_initializer=w, input_dim=input[j], name=names[j]))
model.add(Dense(output[j], activation='relu', kernel_initializer=w, input_dim=input[j],
name = names[j]))
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history =, d.X_train,
validation_split = 0.2)
#validation_data=(d.X_test, d.X_test))
return model
def cv():
accuracy = 0
size = 5
epochs = 20
variance = 0
storage = np.zeros((size, epochs))
for j in range(size):
ae = autoencoder(epochs)
storage[j] = ae.history.history['val_acc']
for i in range(size):
accuracy += storage[i][-1]
mean = accuracy/size
for k in range(size):
variance += ((storage[k][-1] - mean)**2)
variance = variance/size
return mean, variance
#mean, variance = cv()
def finding_index():
elements, index = np.unique(d.Y_test, return_index=True)
return elements, index
def plotting():
ae = autoencoder(20)
elements, index = finding_index()
y_proba = ae.predict(d.X_test)
plt.figure(figsize=(20, 4))
# size = 20
for i in range(len(index)):
ax = plt.subplot(2, len(index), i + 1)
plt.imshow(d.X_test[index[i]].reshape(28, 28))
ax = plt.subplot(2, len(index), i + 1 + len(index))
plt.imshow(y_proba[index[i]].reshape(28, 28))
def plotting_weights(epochs):
ae = autoencoder(epochs)
output_layer = ae.get_layer('hidden2')
weights = output_layer.get_weights()[0]
size = 20
plt.figure(figsize=(20, 4))
for j in range(3):
plt.imshow(weights[j].reshape(12, 12))
def get_output():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
new_model = Sequential()
new_model.add(Dense(400, activation='relu', kernel_initializer=w, input_dim = 784))
new_model.add(Dense(144, activation='sigmoid', kernel_initializer=w, input_dim = 400))
#new_model.add(Dense(784, activation='sigmoid', kernel_initializer=w, input_dim = 144))
new_model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history =, d.X_train,
y = new_model.predict(d.X_test)
elements, index = finding_index()
#return y.shape
def get_output2():
ae = autoencoder(5)
a =ae.layers[1].output()
init_op = tf.initialize_all_variables()
with tf.Session() as sess: # execute init_op
# print the random values that we sample
I've tried to just print(a) as well, but as I said, that returns me a tensor object. Can someone provide me some information how I can actually print those value? Thanks in advance!
import keras.backend as K
This is equivalent to:
with tf.Session() as sess:
I find it more readable to simply use the keras.backend interface.

Tensorflow CNN with custom images with RGB values

I have followed tutorials on creating a CNN with the MNIST dataset, and understands most of it. Then I tried to convert it into my own custom images with RGB values. But have trouble in certain parts of the code, as I do not fully understands what happens and how to proceed next. I know I have to change the channels to 3, but does not know if the rest of the helper functions are correct? I also do not understand when I have initialized everything how to train it. Because of the batch_x, batch_y = iterator.get_next()
I am not able to use the feed_dict, and do not how to train this? On the MNIST data it was possible to set the dropout, but how can i specify this now? And as far as a I understand, I do not train it on the real data know?
How can I also calculate the result in the same way as with the MNIST data, when I create and test the validation data?
The code looks like this:
import tensorflow as tf
import process_images as image_util
from import Dataset, Iterator
# With MNIST
#from tensorflow.examples.tutorials.mnist import input_data
#mnist = input_data.read_data_sets("MNISt_data/", one_hot=True)
filenames_dummy, labels_dummy = image_util.run_it()
#The filenames_dummy and labels_dummy are two lists looking like this, respectively:
#["data/image_1.png", "data/image_2.png", ..., "data/image_n.png"]
# The values of the labels are 0-3, since I have 4 classes.
#[0, 1, ..., 3]
filenames = tf.constant(filenames_dummy)
labels = tf.constant(labels_dummy)
def _parse_function(filename, label):
image_string = tf.read_file(filename)
image_decoded = tf.image.decode_png(image_string, channels=3)
# The image size is 425x425.
image_resized = tf.image.resize_images(image_decoded, [425,425])
return image_resized, label
dataset =, labels))
dataset =
dataset = dataset.batch(30)
dataset = dataset.repeat()
iterator = dataset.make_one_shot_iterator()
# Helper functions
# INIT weights
def init_weights(shape):
init_random_dist = tf.truncated_normal(shape, stddev=0.1)
# INIT Bias
def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape=shape)
return tf.Variable(init_bias_vals)
def conv2d(x, W):
# x --> input tensor [batch, H, W, Channels]
# W --> [filter H, filter W, Channels IN, Channels OUT]
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
# Pooling
def max_pooling_2by2(x):
# x --> [batch, h, w, c]
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
#Convolutional layer
def convolutional_layer(input_x, shape):
W =init_weights(shape)
b = init_bias([shape[3]])
return tf.nn.relu(conv2d(input_x, W)+b)
def normal_full_layer(input_layer, size):
input_size = int(input_layer.get_shape()[1])
W = init_weights([input_size, size])
b = init_bias([size])
return tf.matmul(input_layer, W) + b
x = tf.placeholder(tf.float32, shape=[None, 180625])
y_true = tf.placeholder(tf.float32, shape=[None, 4])
# With MNIST
#x = tf.placeholder(tf.float32, shape=[None, 784])
#y_true = tf.placeholder(tf.float32, shape=[None, 10])
# Layers
x_image = tf.reshape(x, [-1, 425,425, 1])
# With MNIST
#x_image = tf.reshape(x, [-1, 28,28, 1])
convo_1 = convolutional_layer(x_image, shape=[5,5,1,32])
convo_1_pooling = max_pooling_2by2(convo_1)
convo_2 = convolutional_layer(convo_1_pooling, shape=[5,5,32, 64])
convo_2_pooling = max_pooling_2by2(convo_2)
convo_2_flat = tf.reshape(convo_2_pooling, [-1, 7*7*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat, 1024))
# Dropout
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one, keep_prob=hold_prob)
y_pred = normal_full_layer(full_one_dropout, 4)
# With MNIST
#y_pred = normal_full_layer(full_one_dropout, 10)
# LOSS function
cross_entropy =
tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))
# Optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
init = tf.global_variables_initializer()
steps = 5000
with tf.Session() as sess:
for i in range(steps):
batch_x, batch_y = iterator.get_next()
test1, test2 =[batch_x, batch_y])
# With MNIST, feed_dict={x:batch_x, y_true:batch_y, hold_prob:0.5})
if i%100 == 0:
print("ON STEP {}".format(i))
print("Accuracy: ")
matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(matches, tf.float32))
# With MNIST
#print(, feed_dict={x:mnist.test.images, y_true:mnist.test.labels, hold_prob:1.0}))

Tensor flow, making predictions using a trained network

So I am training a network to classify images in tensor flow. After I trained the network I began work on trying to use it to classify other images. The goal is to import an image, feed it to the classifier and have it print the result. I am having some trouble getting that part off the ground though. Here is what I have so far. I found that having tf.argmax(y,1) gave an error. I found that changing it to 0 fixed that error. However I am not convinced that it is actually working. I tossed 2 images through the classifier and they both got the same class even though they are vastly different. Just need some perspective here. Is this valid? Or is there something wrong here that will always feed me the same class (in this case I got class 0 for both of the images I tried).
Is this even the right way to approach making predictions in tensor flow? This is just the culmination of my debugging, not sure if it is what should be done or not.
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
X_train,X_validation,y_train,y_validation=train_test_split(X_train,y_train, test_size=20,random_state=0)
X_train, y_train = shuffle(X_train, y_train)
def LeNet(x):
# Arguments used for tf.truncated_normal, randomly defines variables
for the weights and biases for each layer
mu = 0
sigma = 0.1
# SOLUTION: Layer 1: Convolutional. Input = 32x32x3. Output = 28x28x6.
conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 3, 6), mean = mu, stddev = sigma))
conv1_b = tf.Variable(tf.zeros(6))
conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b
# SOLUTION: Activation.
conv1 = tf.nn.relu(conv1)
# SOLUTION: Pooling. Input = 28x28x6. Output = 14x14x6.
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# SOLUTION: Layer 2: Convolutional. Output = 10x10x16.
conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = mu, stddev = sigma))
conv2_b = tf.Variable(tf.zeros(16))
conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b
# SOLUTION: Activation.
conv2 = tf.nn.relu(conv2)
# SOLUTION: Pooling. Input = 10x10x16. Output = 5x5x16.
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
# SOLUTION: Flatten. Input = 5x5x16. Output = 400.
fc0 = flatten(conv2)
# SOLUTION: Layer 3: Fully Connected. Input = 400. Output = 120.
fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma))
fc1_b = tf.Variable(tf.zeros(120))
fc1 = tf.matmul(fc0, fc1_W) + fc1_b
# SOLUTION: Activation.
fc1 = tf.nn.relu(fc1)
# SOLUTION: Layer 4: Fully Connected. Input = 120. Output = 84.
fc2_W = tf.Variable(tf.truncated_normal(shape=(120, 84), mean = mu, stddev = sigma))
fc2_b = tf.Variable(tf.zeros(84))
fc2 = tf.matmul(fc1, fc2_W) + fc2_b
# SOLUTION: Activation.
fc2 = tf.nn.relu(fc2)
# SOLUTION: Layer 5: Fully Connected. Input = 84. Output = 43.
fc3_W = tf.Variable(tf.truncated_normal(shape=(84, 43), mean = mu, stddev = sigma))
fc3_b = tf.Variable(tf.zeros(43))
logits = tf.matmul(fc2, fc3_W) + fc3_b
return logits
import tensorflow as tf
x = tf.placeholder(tf.float32, (None, 32, 32, 3))
y = tf.placeholder(tf.int32, (None))
one_hot_y = tf.one_hot(y, 43)
rate = 0.001
logits = LeNet(x)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
def evaluate(X_data, y_data):
num_examples = len(X_data)
total_accuracy = 0
sess = tf.get_default_session()
for offset in range(0, num_examples, BATCH_SIZE):
batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
accuracy =, feed_dict={x: batch_x, y: batch_y})
total_accuracy += (accuracy * len(batch_x))
return total_accuracy / num_examples
with tf.Session() as sess:
num_examples = len(X_train)
for i in range(EPOCHS):
X_train, y_train = shuffle(X_train, y_train)
for offset in range(0, num_examples, BATCH_SIZE):
end = offset + BATCH_SIZE
batch_x, batch_y = X_train[offset:end], y_train[offset:end], feed_dict={x: batch_x, y: batch_y})
validation_accuracy = evaluate(X_validation, y_validation)
print("EPOCH {} ...".format(i+1))
print("Validation Accuracy = {:.3f}".format(validation_accuracy))
print(), './lenet')
print("Model saved")
import cv2
image=cv2.imread('File path')
image=cv2.resize(image,(32,32)) #classifier takes 32X32 images
with tf.Session() as sess:
saver3 = tf.train.import_meta_graph('./lenet.meta')
saver3.restore(sess, "./lenet")
pred = tf.nn.softmax(logits)
predictions =,0), feed_dict={x: image})
print (predictions)
So what had to happen here was first clear the kernel and outputs. Somewhere along the way my placeholders got muddled up and clearing the kernel fixed that right up. Then I had to realize what really had to get done here: I had to call up the softmax function on my new data.
Like this:
pred = tf.nn.softmax(logits)
classification =, feed_dict={x: image_array})
