Keras: using validation data with fit_generator - keras

When I use fit_generator in Keras, I get the validation set split into minibatches, and each minibatch is evaluated as training progresses. I want the validation data used exactly once at the end of each epoch. That is, my code is currently:
def model_fit_generator(self):
#This does the actual training of the model
earlystop = EarlyStopping(monitor='val_acc', patience=5, verbose=2, mode='auto')
self.__model.fit_generator(generator=self.train_generator,
validation_data=self.valid_generator,
steps_per_epoch=self.s_per_e,
epochs=self.epochs,
validation_steps = self.v_per_e,
shuffle=False,
verbose=2,
callbacks=[earlystop])
model_filename = '_'.join([str(x) for x in now_list]) + '_model.h5'
self.__model.save(model_filename)
def model_evaluate(self):
self.model_fit_generator()
evaluation = self.__model.evaluate_generator(self.valid_generator, self.v_per_e, verbose=0)
return evaluation
How do I change this so that I have the validation data used once, at the end of each epoch, to decide whether early stopping is useful?
EDIT: In response to a comment, here is a complete MWE, showing that the validation data are being used at the same time as the training data. Note this code will produce an error, but it also prints out batch numbers to show that validation and training sets are both being used. To run this code, you will need 10 CSV files of data, which I can provide, but I'd rather just give you the output right after this code.
from __future__ import division
from __future__ import print_function
from pandas import concat
from pandas import DataFrame
import sys, keras, GPy, GPyOpt
import numpy as np
import pandas as pd
from keras import backend as K
from keras.models import Model
from keras.metrics import binary_crossentropy
from keras.layers import Dense, Input, LSTM, Lambda
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
class my_model():
def __init__(self, n_lags=2, hid_dim_1=5, epochs=2, batch_size=1):
self.n_lags = n_lags
self.hid_dim_1 = hid_dim_1
self.epochs = epochs
self.batch_size = batch_size
self.train_generator, self.s_per_e, self.valid_generator, self.v_per_e, self.n_vars = self.read_data()
self.__model = self.model()
def read_data(self):
n_vars = 2
num_sample_minibatches = 6
num_valid_minibatches = 4
sample_IDs = range(1, self.batch_size+num_sample_minibatches)
valid_IDs = range(num_sample_minibatches+1, max(sample_IDs)+num_valid_minibatches+1)
params = {'batch_size': self.batch_size, 'n_lags': self.n_lags, 'n_vars': n_vars}
train_generator = DataGenerator(sample_IDs, **params)
valid_generator = DataGenerator(valid_IDs, **params)
s_per_e = int(len(sample_IDs) - self.batch_size + 1) #e.g. if you have 1,2,3,4,5,6 then you can create 4 sequences of length 3 (batch_size)
v_per_e = int(len(valid_IDs) - self.batch_size + 1)
return train_generator, s_per_e, valid_generator, v_per_e, n_vars
def model(self):
#https://github.com/twairball/keras_lstm_vae/blob/master/lstm_vae/vae.py
a_input = Input(shape=(self.n_lags, self.n_vars,), name='a_input')
cond_on_this = Input(shape=(self.n_vars,), name="cond_on_this")
b_lstm = LSTM(self.hid_dim_1)(a_input)
outputs = Dense(self.hid_dim_1, activation='sigmoid')(b_lstm)
my_model1 = Model([a_input, cond_on_this], outputs)
my_model1.compile(optimizer=Adam(lr=0.001), loss=binary_crossentropy)
return my_model1
def my_model_fit_generator(self):
earlystop = EarlyStopping(monitor='val_acc', patience=5, verbose=2, mode='auto')
self.__model.fit_generator(generator=self.train_generator,
validation_data=self.valid_generator,
steps_per_epoch=self.s_per_e,
epochs=self.epochs,
validation_steps = self.v_per_e,
shuffle=False,
verbose=2,
callbacks=[earlystop])
def my_model_evaluate(self):
self.my_model_fit_generator()
evaluation = self.__model.evaluate_generator(self.valid_generator, self.v_per_e, verbose=0)
return evaluation
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, list_IDs, batch_size, n_lags, n_vars, shuffle=False):
'Initialization'
self.list_IDs = list_IDs
self.batch_size = batch_size
self.n_lags = n_lags
self.n_vars = n_vars
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
batches_per_epoch = int(np.floor(len(self.list_IDs) - self.batch_size + 1))
return batches_per_epoch
def __getitem__(self, index):
'Generate one batch of data'
#Here's my evidence that the validation minibatches are being used during training!
print('batch number: ', index+1, 'of: ', int(np.floor(len(self.list_IDs) - self.batch_size + 1)))
indexes = self.indexes[index:(index+self.batch_size)]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
data, cond_on_this = self.__data_generation(list_IDs_temp)
return [np.asarray(data), np.asarray(cond_on_this)], np.asarray(cond_on_this)
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
#From MachineLearningMastery
def series_to_supervised(self, data, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)
cols, names = list(), list()
#input sequence t-n, ..., t-1
for i in range(self.n_lags, 0, -1): #for i in 3 to 0 not including 0
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range (self.n_vars)]
#forecast sequence t, t+1, ..., t+n
for i in range(0, n_out):
cols.append(df.shift(-i))
if i==0:
names += [('var%d(t)' % (j+1)) for j in range(self.n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(self.n_vars)]
agg = concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
def __data_generation(self, list_IDs_temp):
'Generates data containing batch_size samples'
data_np_array = np.empty((self.batch_size, self.n_vars), dtype=float)
for i, ID in enumerate(list_IDs_temp):
#Read in a data file corresponding to this ID; put it into the numpy array.
data_file = './pollution_' + str(i) + '.csv'
df_data = pd.read_csv(data_file, sep=",", header=0)
df_data.columns = ['date','pollution','dew','temp','press','wnd_dir','wnd_spd','snow','rain']
df_data_vals = df_data[['pollution', 'temp']] #this is shape (24, 2)
data_np_array[i,] = np.asarray(df_data_vals)
data_s2s = np.asarray(self.series_to_supervised(data_np_array))
data_data = data_s2s[:, :int(self.n_vars*self.n_lags)]
data_cond = data_s2s[:, int(self.n_vars*self.n_lags):]
data_data = data_data.reshape((data_data.shape[0], self.n_lags, self.n_vars))
return data_data, data_cond
def run_my_model(n_lags=2, hid_dim_1=5, epochs=2, batch_size=1):
_my_model = my_model(n_lags=n_lags, hid_dim_1=hid_dim_1, epochs=epochs, batch_size=batch_size)
mymodel_evaluation = _my_model.my_model_evaluate()
return mymodel_evaluation
#Bounds for hyperparameters
bounds = [{'name': 'hid_dim_1', 'type': 'discrete', 'domain': (5, 10)}]
#Bayesian Optimization
def f(x):
evaluation = run_my_model(hid_dim_1 = int(x[:,0]), epochs = 2, batch_size = 1)
print("binary crossentropy:\t{0}".format(evaluation[0]))
print(evaluation)
return evaluation
#Optimizer instance
opt_mymodel = GPyOpt.methods.BayesianOptimization(f=f, domain=bounds, initial_design_numdata=1)
#Run optimizer
opt_mymodel.run_optimization(max_iter=2)
opt_mymodel.x_opt
Relevant Output:
Using TensorFlow backend.
Epoch 1/2
batch number: 1 of: 4
batch number: 1 of: 6
batch number: 2 of: 4
batch number: 2 of: 6
batch number: 3 of: 4
batch number: 3 of: 6
batch number: 4batch number: 4 of: 4
of: 6
batch number: 5 of: 6
batch number: 6 of: 6
Traceback (most recent call last):
...Error after this...

Related

ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` when using custom callback to plot conv layer feature maps

I'm trying to implement a custom callback to get the feature maps of each Conv2D layer in the network plotted in TensorBoard.
When I run the code in Example 1 I get the following error:
<ipython-input-44-b691dabedd05> in on_epoch_end(self, epoch, logs)
28
29 # 3) Build partial model
---> 30 partial_model = keras.Model(
31 inputs=self.model.model.input,
32 outputs=output_layers
ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.engine.base_layer.Layer object at 0x000002773C631CA0>
which seams as if it can't build the partial network, which is strange, because it succeeds when running is separately from the main thread.
Here is an example that illustrates the issue:
Example 1
import os
import io
import datetime as dt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt
'''
You can adjust the verbosity of the logs which are being printed by TensorFlow
by changing the value of TF_CPP_MIN_LOG_LEVEL:
0 = all messages are logged (default behavior)
1 = INFO messages are not printed
2 = INFO and WARNING messages are not printed
3 = INFO, WARNING, and ERROR messages are not printed
'''
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
DEBUG = False
class ConvModel(keras.Model):
def __init__(self, input_shape):
super().__init__()
self.input_image_shape = input_shape
self.model = keras.Sequential([
layers.Input(shape=input_shape),
layers.Conv2D(32, 3),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(64, 5),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(128, 3, kernel_regularizer=keras.regularizers.l2(0.01)),
layers.BatchNormalization(),
layers.ReLU(),
layers.Flatten(),
layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
layers.Dropout(0.5),
layers.Dense(10)
])
def call(self, inputs):
return self.model(inputs)
def find_sub_string(string: str, sub_string: str):
return True if string.find(sub_string) > -1 else False
def get_file_type(file_name: str):
file_type = None
if isinstance(file_name, str):
dot_idx = file_name.find('.')
if dot_idx > -1:
file_type = file_name[dot_idx + 1:]
return file_type
def get_image_from_figure(figure):
buffer = io.BytesIO()
plt.savefig(buffer, format='png')
plt.close(figure)
buffer.seek(0)
image = tf.image.decode_png(buffer.getvalue(), channels=4)
image = tf.expand_dims(image, 0)
return image
class ConvLayerVis(keras.callbacks.Callback):
def __init__(self, X, figure_configs: dict, log_dir: str, log_interval: int):
super().__init__()
self.X_test = X
n_dims = len(self.X_test.shape)
assert 2 < n_dims < 5, f'The shape of the test image should be less than 5 and grater than 2, but current shape is {self.X_test.shape}'
# In case the image is not represented as a tensor - add a dimension to the left for the batch
if len(self.X_test.shape) < 4:
self.X_test = np.reshape(self.X_test, (1,) + self.X_test.shape)
self.file_writer = tf.summary.create_file_writer(log_dir)
self.figure_configs = figure_configs
self.log_interval = log_interval
def on_training_begin(self, logs=None):
pass
def on_epoch_end(self, epoch, logs=None):
# 1) Get the layers
if epoch % self.log_interval == 0:
# 1) Get the layers
output_layer_tuples = [(idx, layer) for idx, layer in enumerate(self.model.model.layers) if find_sub_string(layer.name, 'conv2d') or find_sub_string(layer.name, 'max_pooling2d')]
output_layers = [layer_tuple[1] for layer_tuple in output_layer_tuples]
# 2) Get the layer names
conv_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Conv 2D ') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'conv2d')]
max_pool_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Max Pooling 2D') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'max_pooling2d')]
layer_name_tuples = (conv_layer_name_tuples + max_pool_layer_name_tuples)
layer_name_tuples.sort(key=lambda x: x[0])
layer_names = [layer_name_tuple[1] for layer_name_tuple in layer_name_tuples]
# 3) Build partial model
partial_model = keras.Model(
inputs=model.model.input,
outputs=output_layers
)
# 4) Get the feature maps
feature_maps = partial_model.predict(self.X_test)
# 5) Plot
rows, cols = self.figure_configs.get('rows'), self.figure_configs.get('cols')
for feature_map, layer_name in zip(feature_maps, layer_names):
fig, ax = plt.subplots(rows, cols, figsize=self.figure_configs.get('figsize'))
for row in range(rows):
for col in range(cols):
ax[row][col].imshow(feature_map[0, :, :, row+col], cmap=self.figure_configs.get('cmap'))
fig.suptitle(f'{layer_name}')
with self.file_writer.as_default():
tf.summary.image(f'{layer_name} Feature Maps', get_image_from_figure(figure=fig), step=epoch)
if __name__ == '__main__':
print(tf.config.list_physical_devices('GPU'))
# Load the data
(X, y), (X_test, y_test) = cifar10.load_data()
X, X_test = X.astype(np.float32) / 255.0, X_test.astype(np.float32) / 255.0
n, w, h, c = X.shape[0], X.shape[1], X.shape[2], X.shape[3]
n_test, w_test, h_test, c_test = X_test.shape[0], X_test.shape[1], X_test.shape[2], X_test.shape[3]
print(f'''
Dataset Stats:
Number of train images: {n}
Dimensions:
> Train:
width = {w}, height = {h}, channels = {c}
> Test:
width = {w_test}, height = {h_test}, channels = {c_test}
''')
# Model with keras.Sequential
model = ConvModel(input_shape=(w, h, c))
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(learning_rate=3e-4), metrics=['accuracy'])
log_dir = f'./logs/{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}'
callbacks = [
keras.callbacks.TensorBoard(
log_dir=log_dir,
write_images=True
),
ConvLayerVis(
X=X[0],
figure_configs=dict(rows=5, cols=5, figsize=(35, 35), cmap='gray'),
log_dir=f'{log_dir}/train',
log_interval=3
)
]
model.fit(
X,
y,
batch_size=64,
epochs=15,
callbacks=callbacks
)
Thanks in advance for any help regarding this issue.
Just figured out the problem:
output_layers = [layer_tuple[1].output for layer_tuple in output_layer_tuples]
Should have recovered the output attribute of each layer.

Input size (depth of inputs) must be accessible via shape inference, but saw value None error whaen trying to set tf.expand_dims axis to 0

I am trying to use 20 news groups data set available in sklearn to train a LSTM to do incremental learning (classification). I used the sklearn's TfidfVectorizer to pre-process the data. Then I turned the resulting sparse matrix into a numpy array before feeding it. After that when coding the below line:
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
It gave an error saying that the 'inputs_' should have 3 dimensions. so I used:
inputs_ = tf.expand_dims(inputs_, 0)
To expand the dimension. But when I do that i get the error:
ValueError: Input size (depth of inputs) must be accessible via shape
inference, but saw value None.
The shape of 'input_' is:
(1, 134410)
I already went through this post, but it did not help.
I cannot seem to understand how to solve this issue. Any help is much appreciated. Thank you in advance!
show below is my complete code:
import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
features = vectorizer.fit_transform(newsgroups_data.data)
lb = LabelBinarizer()
labels = np.reshape(newsgroups_data.target, [-1])
labels = lb.fit_transform(labels)
return features, labels
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot_error(errorplot, datapoint, numberOfWrongPreds):
errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
errorplot.autoscale(enable=True, axis='both', tight=None)
plt.draw()
def train_test():
features, labels = pre_process()
#Defining Hyperparameters
epochs = 1
lstm_layers = 1
batch_size = 1
lstm_size = 30
learning_rate = 0.003
print(lstm_size)
print(batch_size)
print(epochs)
#--------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
#getting dynamic batch size according to the input tensor size
# dynamic_batch_size = tf.shape(inputs_)[0]
#output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
#Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
inputs_ = tf.expand_dims(inputs_, 0)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
#hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
errorplot, = plt.plot([], [])
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x.toarray(),
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
print("Prediction: ", predictions)
print("Actual: ",y)
pred = np.array(predictions)
print(pred)
print(y)
if not ((pred==y).all()):
wrongPred += 1
if ii % 27 == 0:
plot_error(errorplot,ii,wrongPred)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred/len(labels)
print("ERROR RATE: ", errorRate )
if __name__ == '__main__':
train_test()
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
This error is given because you don't specify the size nor the amount of inputs.
I got the script working like this:
inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")
inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim, initial_state=initial_state)

Logistic regression 100% test accuracy: What am I doing wrong?

I am using tensorflow on mnist handwritten numbers. Here, I keep getting 100% accuracy on a test set when using logistic regression (I am expecting something much less accuracy).
The following is the code I used. Can someone please point out what I am doing wrong?
I must be somehow overfitting the data, but I cannot figure out why the accuracy is this high. Also, when I tested the model on random handwritten numbers, it doesn't get the prediction right always, so most likely the way I am calculating the accuracy must be incorrect. I believe I am using the correct formulas here. I am stumped here. Any help will be much appreciated. Thanks.
import tensorflow as tf
gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.5)
s = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_digits
cl = 2 # number of classes = 2
mnist = load_digits(cl)
X,y = mnist.data, mnist.target
# inputs and shareds
weights = tf.Variable(tf.zeros([64, cl])) # 8x8 image, 2 output classes
b = tf.Variable(tf.zeros([cl]))
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,cl])
accuracy_train = tf.Variable(tf.zeros([1]))
accuracy_test = tf.Variable(tf.zeros([1]))
# create model to predict y
predicted_y = tf.nn.sigmoid(tf.matmul(input_X, weights) + b)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=predicted_y, labels = input_y))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
def train_function(X, y):
weights, c = s.run([optimizer, loss], {input_X:X, input_y:y})
return weights, c
def predict_function(X):
predict = s.run(probs, {input_X:X})
return predict
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
y_train.shape = (y_train.shape[0],1)
y_test.shape = (y_test.shape[0],1)
# one-hot encoding
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)
y_train = enc.fit_transform(y_train).astype('int')
y_test = enc.fit_transform(y_test).astype('int')
from sklearn.metrics import roc_auc_score
batch_size =16
s.run(tf.global_variables_initializer())
for epoch in range(15):
avg_loss = 0
num_batches = int(X_train.shape[0]/batch_size)
for i in range(num_batches):
batch_x, batch_y = X_train[i:i+num_batches], y_train[i:i+num_batches]
_, c = train_function(batch_x, batch_y)
avg_loss += c/num_batches
print("\nloss at iter %i:%.4f" % (i, np.sum(c)))
print("train auc:",roc_auc_score(y_train, s.run(predicted_y, {input_X:X_train})))
print("test auc:",roc_auc_score(y_test, s.run(predicted_y, {input_X:X_test})))
print("Average loss = ", avg_loss)
plt.scatter(epoch,avg_loss, color='b', marker='x')
plt.xlabel('epoch')
plt.ylabel('Avg loss')
# Prediction
probs = tf.equal(tf.argmax(predicted_y,1), tf.argmax(input_y,1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(probs, tf.float32))
print("Training accuracy: ",s.run(accuracy, {input_X:X_train, input_y:y_train}))
print("Test accuracy: ", s.run(accuracy, {input_X:X_test, input_y:y_test}))
# Testing on a random image
fn = ["0.png", "1.jpg", "1_1.jpg", "One.png", "1_2.jpeg", "1-3.png"]
filename = np.random.choice(fn)
from PIL import Image
def resize_image(image):
img = Image.open(image)
img = img.resize((8,8))
arr = np.array(img)
#convert to gray scale
if len(arr.shape) > 2:
arr = np.mean(arr, 2)
#flatten
arr = arr.flatten()
return arr
if any("0" in s for s in filename):
test_label = 0
elif any("1" in s for s in filename):
test_label = 1
elif any("2" in s for s in filename):
test_label = 2
elif any("3" in s for s in filename):
test_label = 3
elif any("4" in s for s in filename):
test_label = 4
elif any("5" in s for s in filename):
test_label = 5
elif any("6" in s for s in filename):
test_label = 6
elif any("7" in s for s in filename):
test_label = 7
elif any("8" in s for s in filename):
test_label = 8
elif any("9" in s for s in filename):
test_label = 9
test_label
test_image = resize_image(filename)
new_predict = predict_function(test_image.reshape((1, 64)))
print("predicted label: ", s.run(tf.argmax(new_predict, 1))[0])

Evaluating CNN model for one image

I'm new to tensorflow and I'm trying it, so if one of you could be able to help me I will really appreciate it.
So I've created a model CNN and train it to classify a series of images in 2 categories, for example, FLOWERS and OTHERS and I think I did a good job for that but if do you have any idea how can I improve this model please let me know.
But my problem is after I train this model, how can I use it to classify just one specific image? I don't want to use baches if is possible. Could anyone give me some advice or examples about it, please?
My Code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import math
import numpy as np
from PIL import Image
import tensorflow as tf
import os
# Basic model parameters as external flags.
flags = tf.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 10000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 256, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 64, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 32, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', "ModelData/data", 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
'for unit testing.')
NUM_CLASSES = 2
IMAGE_SIZE = 200
CHANNELS = 3
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * CHANNELS
# starter_learning_rate = 0.1
def inference(images, hidden1_units, hidden2_units):
# Hidden 1
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
name='weights')
biases = tf.Variable(tf.zeros([hidden1_units]), name='biases')
hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
# Hidden 2
with tf.name_scope('hidden2'):
weights = tf.Variable(
tf.truncated_normal([hidden1_units, hidden2_units], stddev=1.0 / math.sqrt(float(hidden1_units))),
name='weights')
biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
# Linear
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.truncated_normal([hidden2_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden2_units))),
name='weights')
biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
logits = tf.matmul(hidden2, weights) + biases
return logits
def cal_loss(logits, labels):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
def training(loss, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
return tf.reduce_sum(tf.cast(correct, tf.int32))
def placeholder_inputs(batch_size):
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, IMAGE_PIXELS))
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
return images_placeholder, labels_placeholder
def fill_feed_dict(images_feed, labels_feed, images_pl, labels_pl):
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
}
return feed_dict
def do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_set):
# And run one epoch of eval.
true_count = 0 # Counts the number of correct predictions.
steps_per_epoch = 32 // FLAGS.batch_size
num_examples = steps_per_epoch * FLAGS.batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = true_count / num_examples
print(' Num examples: %d Num correct: %d Precision # 1: %0.04f' % (num_examples, true_count, precision))
# Get the sets of images and labels for training, validation, and
def init_training_data_set(dir):
train_images = []
train_labels = []
def GetFoldersList():
mylist = []
filelist = os.listdir(dir)
for name in filelist:
if os.path.isdir(os.path.join(dir, name)):
mylist.append(name)
return mylist
def ReadImagesFromFolder(folder):
fin_dir = os.path.join(dir, folder)
images_name = os.listdir(fin_dir)
images = []
for img_name in images_name:
img_location = os.path.join(dir, folder)
final_loc = os.path.join(img_location, img_name)
try:
hash_folder = int(folder.split("_")[0])
images.append((np.array(Image.open(final_loc).convert('RGB')), hash_folder))
except:
pass
return images
folders = GetFoldersList()
for folder in folders:
for imgs in ReadImagesFromFolder(folder):
train_images.append(imgs[0])
train_labels.append(imgs[1])
return train_images, train_labels
train_images, train_labels = init_training_data_set(os.path.join("FetchData", "Image"))
train_images = np.array(train_images)
train_images = train_images.reshape(len(train_images), IMAGE_PIXELS)
train_labels = np.array(train_labels)
def restore_model_last_version(saver, sess):
def get_biggest_index(folder):
import re
index_vals = []
for file in os.listdir(folder):
split_data = file.split(".")
extension = split_data[len(split_data) - 1]
if extension == "meta":
index = int(re.findall(r"\d+", file)[0])
index_vals.append(index)
index_vals.sort(reverse=True)
if index_vals:
return index_vals[0]
else:
return ""
real_path = os.path.abspath(os.path.split(FLAGS.train_dir)[0])
index = get_biggest_index(real_path)
isdir = os.path.isdir(real_path)
is_empty = True
if isdir:
if os.listdir(real_path):
is_empty = False
if not is_empty:
saver.restore(sess, FLAGS.train_dir + "-" + str(index))
def run_training():
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Generate placeholders for the images and labels.
images_placeholder, labels_placeholder = placeholder_inputs(len(train_images))
# Build a Graph that computes predictions from the inference model.
logits = inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)
# Add to the Graph the Ops for loss calculation.
loss = cal_loss(logits, labels_placeholder)
# Add to the Graph the Ops that calculate and apply gradients.
train_op = training(loss, FLAGS.learning_rate)
# Add the Op to compare the logits to the labels during evaluation.
eval_correct = evaluation(logits, labels_placeholder)
# Create a saver for writing training checkpoints.
saver = tf.train.Saver(save_relative_paths=True)
# Create a session for running Ops on the Graph.
# sess = tf.Session()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.9
# gpu_options = tf.GPUOptions(allow_growth=True)
# sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
sess = tf.Session(config=config)
# Run the Op to initialize the variables.
# init = train_op.g
init = tf.global_variables_initializer()
sess.run(init)
restore_model_last_version(saver, sess)
# And then after everything is built, start the training loop.
for step in range(FLAGS.max_steps):
start_time = time.time()
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
if (step) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
print("Current step is: " + str(step))
print("Current los value: " + str(loss_value))
print("Current duration: " + str(duration))
print("\n")
saver.save(sess, save_path=FLAGS.train_dir, global_step=step)
print('Training Data Eval:')
do_eval(sess, eval_correct, images_placeholder, labels_placeholder, train_images)
def main(_):
run_training()
if __name__ == '__main__':
tf.app.run()
So if anyone could help me with this and knows how can I make that evaluation for just one pictures please help me.
Thanks :)
Pretty much every operation in Tensorflow expect you to pass a batched input to make great use of the parallelization capacities of modern GPUs.
Now, if you want to infer on a single image, you simply need to consider this image as a batch of size 1. Here is quick code snippet :
# Load image
img = np.array(Image.open(your_path).convert('RGB'))
# Expand dimensions to simulate a batch of size 1
img = np.expand_dims(img, 0)
...
# Get prediction
pred = sess.run(tf.nn.softmax(logits), {images_placeholder: img})

Keras error Error when checking target: expected activation_1 to have 2 dimensions, but got array with shape (10, 5, 95)

I'm trying to create a simple RNN using keras but I'm getting this error.
Input is a stream of letters represented by binary classes. The shape is (10, 5, 95). 10 batches, 5 letters at a time, 95 characters in total.
I'm guessing it has something to do with incorrect input fed back as input but I'm not sure how to handle it.
Traceback (most recent call last):
File "07_rnn.py", line 90, in <module>
model.fit(x, y, epochs=3, batch_size=BATCHSIZE)
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/models.py", line 965, in fit
validation_steps=validation_steps)
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1593, in fit
batch_size=batch_size)
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1430, in _standardize_user_data
exception_prefix='target')
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/engine/training.py", line 110, in _standardize_input_data
'with shape ' + str(data_shape))
ValueError: Error when checking target: expected activation_1 to have 2 dimensions, but got array with shape (10, 5, 95)
Code
import numpy as np
import glob
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
CHARMAP = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-=!##$%^&*()_+`~[]\{}|;':\",./<>?"
SEQLEN = 5
BATCHSIZE = 10
ALPHASIZE = len(CHARMAP)
INTERNALSIZE = 128
FILES = "shakespeare/*.txt"
LEARNING_RATE = 0.001
## Data related stuff
def char_to_value(char):
idx = CHARMAP.find(char)
if idx >= 0:
return idx
else:
return 0
def char_to_class_map(char):
value = char_to_value(char)
return to_categorical(value,ALPHASIZE)
def value_to_char(value):
return CHARMAP[value]
# iterate every single file
def get_file_data(pattern, index):
paths = glob.glob(pattern)
length = len(paths)
if index < length:
data = []
with open(paths[index], "r") as file:
for line in file:
line_values = [char_to_class_map(l) for l in line]
data = data + list(line_values)
return data
else:
return None
# get batch data in file
def build_line_data(file_data, seqlen, batch_index, batch_count):
length = len(file_data)
start = batch_index * batch_count
end = start+seqlen
x = []
y = []
while end+1 <= length and len(x) < batch_count:
x_line = file_data[start:end]
y_line = file_data[start+1:end+1]
x.append(x_line)
y.append(y_line)
start = start + 1
end = start + seqlen
x = np.array(x)
y = np.array(y)
return x,y
def create_model():
model = Sequential()
model.add(LSTM(INTERNALSIZE,input_shape=(SEQLEN, ALPHASIZE)))
model.add(Dense(ALPHASIZE))
model.add(Activation('softmax'))
#adam optimizer
optimizer = Adam(lr=LEARNING_RATE)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
return model
print('before create_model')
model = create_model()
print('after create_model')
for i in range(1):
print('before get file data')
file_data = get_file_data(FILES, i)
print('after get file data')
idx = 0
while True:
print('before build line data')
x,y = build_line_data(file_data, SEQLEN, idx ,BATCHSIZE)
print('after build line data')
print('before fit')
model.fit(x, y, epochs=3, batch_size=BATCHSIZE)
print('after fit')
idx = idx + 1
if 0 == len(x):
break
if idx > 10:
break
github link: https://github.com/djaney/ml-studies/blob/master/07_rnn.py
Edit:
return_sequences=True in LSTM fixed it.
What are you trying to predict? If it is a sequences-to-sequence model than return_sequences=true is the right way to go.
The reason for the error is that your target was 3 dimensional (batchsize,sequence_length,features) and the LSTM layer only outputs (batchsize,features) for the last time step of the sequence if return_sequences=false.
So depending on your application you have to change the shape of your targets or set return_sequences=true as you already did.

Resources