How do I know what the output of model.predict() correspond to? - keras

I am trying to make a CNN that classifies cats and dogs and I am using flow_from_directory() to prepare my data for the model.
from keras import Sequential
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import *
from keras.callbacks import ModelCheckpoint
from keras.optimizers import *
import keras
import numpy as np
import os
img_size = 250 # number of pixels for width and height
#Random Seed
np.random.seed(123456789)
training_path = os.getcwd() + "/cats and dogs images/train"
testing_path = os.getcwd() + "/cats and dogs images/test"
#Defines the Model
model = Sequential([
Conv2D(filters=128, kernel_size=(3,3), activation="relu", padding="same", input_shape=(img_size,img_size,3)),
MaxPool2D(pool_size=(2,2), strides=2),
Conv2D(filters=64, kernel_size=(3,3), activation="relu", padding="same"),
Flatten(),
Dense(32, activation="relu"),
Dense(2, activation="softmax")
])
#Scales the pixel values to between 0 to 1
datagen = ImageDataGenerator(rescale=1.0/255.0)
Batch_size = 10
#Prepares Training Data
training_dataset = datagen.flow_from_directory(directory = training_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Prepares Testing Data
testing_dataset = datagen.flow_from_directory(directory = testing_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Compiles the model
#model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=['accuracy'])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])
#model.compile(loss="mse", optimizer="sgd", metrics=[keras.metrics.MeanSquaredError()])
#Checkpoint
filepath = os.getcwd() + "/trained_model.h5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min', save_freq=1)
#Fitting the model to the dataset (Training the Model)
model.fit(x = training_dataset, steps_per_epoch = 400,
validation_data=testing_dataset, validation_steps=100,
epochs = 10, callbacks=[checkpoint], verbose = 1)
# evaluate model on training dataset
_,acc = model.evaluate_generator(training_dataset, steps=len(training_dataset), verbose=0)
print("Accuracy on training dataset:")
print('> %.3f' % (acc * 100.0))
#evaluate model on testing dataset
_,acc = model.evaluate_generator(testing_dataset, steps=len(testing_dataset), verbose=0)
print("Accuracy on testing dataset:")
print('> %.3f' % (acc * 100.0))
I want to know how the output of model.predict() is going to correspond to the labels cats and dogs and which one of the two numbers in the output is a cat and which is a dog?
Here's my code for loading the model and giving a prediction:
from keras.models import Sequential
from keras_preprocessing.image import *
from keras.layers import *
import tensorflow as tf
import numpy as np
from keras.layers.experimental.preprocessing import Rescaling
import os
import cv2
from keras.models import *
img_size = 250
#Load weights into new model
filepath = os.getcwd() + "/trained_model.h5"
model = load_model(filepath)
print("Loaded model from disk")
#Scales the pixel values to between 0 to 1
#datagen = ImageDataGenerator(rescale=1.0/255.0)
#Prepares Testing Data
testing_dataset = cv2.imread(os.getcwd() + "/cats and dogs images/single test sample/507.png")
#img = datagen.flow_from_directory(testing_dataset, target_size=(img_size,img_size))
img = cv2.resize(testing_dataset, (img_size,img_size))
newimg = np.asarray(img)
pixels = newimg.astype('float32')
pixels /= 255.0
print(pixels.shape)
pixels = np.expand_dims(pixels, axis=0)
print(pixels.shape)
prediction = model.predict(pixels)
print(prediction)
And here is the output from the prediction code above:
Loaded model from disk
(250, 250, 3)
(1, 250, 250, 3)
[[5.4904184e-27 1.0000000e+00]]
As you can see, the prediction gave an array of two numbers, but which one corresponds to the dog label and which to the cat label? By the way, the model isn't fully trained so I am just testing out the code to see if it works.

The model output depends on how you loaded the data and specified how the classes are going to be ordered/labelled in this code you provided:
training_dataset = datagen.flow_from_directory(directory = training_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
#Prepares Testing Data
testing_dataset = datagen.flow_from_directory(directory = testing_path,
target_size=(img_size,img_size),
classes = ["cat","dog"],
class_mode = "categorical",
batch_size = Batch_size)
You specified during the loading of the data that the classes are going to be ordered Cat then Dog in classes argument.
Therefor the output is going to be ordered as two probabilities (summing to 1)
The first probability refers to by how % that the input image is cat and the second probability refers to by how % that the input image is dog.
You use this line:
output_class = np.argmax(prediction, axis=1)
This line will compare the elements of the list and outputs which index of the elements of the list is the greatest (In our case the list containing the two probabilities) in the form of [1] (or [0, 1] depending on the shape of the output) this means that the said image is a dog, since the 2nd element in the output list is 1 if it were [0] (or [1, 0] depending on the shape of the output) then that means that the output class of the input image is cat.

Related

Load model from Keras and investigate the details

I'm using Keras to fit a function, and I'm new to Keras.
With a very simple network, the Keras can fit my function very well, I just want to know what the function is and try to understand why it works very well. But the "predict" function hide the details.
Here is the code I create the network:
import numpy as np
import tensorflow as tf
from tensorflow import keras
LABEL_COLUMN = "shat"
BATCH_SIZE = 16
EPOCHS = 20
trainfilePath = "F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0train.csv"
testfilePath = "F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0test.csv"
with open(trainfilePath, encoding='utf-8') as txtContent:
trainArray = np.loadtxt(txtContent, delimiter=",")
with open(testfilePath, encoding='utf-8') as txtContent:
testArray = np.loadtxt(txtContent, delimiter=",")
trainSample = trainArray[:, 0:14]
trainLable = trainArray[:, 14]
testSample = testArray[:, 0:14]
testLable = testArray[:, 14]
model = keras.Sequential([
keras.layers.Dense(14, activation='relu', input_shape=[14]),
keras.layers.Dense(15, activation='relu'),
keras.layers.Dense(1)
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
# optimizer = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
model.summary()
history = model.fit(trainSample, trainLable, epochs=EPOCHS, batch_size=BATCH_SIZE)
model.evaluate(testSample, testLable, verbose=1)
model.save("F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0.h5")
What I understand is:
the layers are weight matrices and basis matrices, it works as
out=max(0, weight * input + basis)
After some search, I find I can read the .h5 file using
import h5py
import numpy as np
FILENAME = "F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0.h5"
with h5py.File(FILENAME, 'r') as f:
dense_1 = f['/model_weights/dense_1/dense_1']
dense_1_bias = dense_1['bias:0'][:]
dense_1_kernel = dense_1['kernel:0'][:]
dense_2 = f['/model_weights/dense_2/dense_2']
dense_2_bias = dense_2['bias:0'][:]
dense_2_kernel = dense_2['kernel:0'][:]
# print("Weight matrix 1:\n")
# print(dense_1_kernel)
# print("Basis matrix 1:\n")
# print(dense_1_bias)
# print("Weight matrix 2:\n")
# print(dense_2_kernel)
# print("Basis matrix 2:\n")
# print(dense_2_bias)
def layer_output(v, kernel, bias):
return np.dot(v, kernel) + bias
reluFunction = np.vectorize(lambda x: x if x >= 0.0 else 0.0)
testV = np.array([[-0.004090321213057993,
0.009615388501909157,
-0.24223693596921558,
0.015504079563927319,
-0.02659541428995062,
0.018512968977547152,
0.00836788544720289,
-0.10874776132746002,
-0.045863474556415526,
-0.010195799916571194,
0.09474219315939948,
0.03606698737846194,
-0.004560110004741025,
0.028042417959738858]])
output_1 = layer_output(testV, dense_1_kernel, dense_1_bias)
output_2 = reluFunction(output_1)
output_3 = layer_output(output_2, dense_2_kernel, dense_2_bias)
output_4 = reluFunction(output_3)
however, the result of output_4 is very different from what I get using
loaded_model = keras.models.load_model("F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0.h5")
predicted = loaded_model(testV)
The "predicted" is very close to the ground truth while "output_4" is far away from the ground truth.
I get stuck here and don't know why and failed to find information about how to extract the function I want from the Keras model, I need your help!
Thanks!
model = keras.Sequential([
keras.layers.Dense(14, activation='relu', input_shape=[14]),
keras.layers.Dense(15, activation='relu'),
keras.layers.Dense(1)
])
In your model, there are 3 layers, the last dense layer has weight and biases too, you didn't consider them in your calculation.

CNN validation accuracy high, but bad at predictions?

I am trying to build a CNN to distinguish between 3 classes which are genuine faces, printed faces, and replayed faces. I prepared the data as so:
classes = ['Genuine', 'Printed', 'Replay']
base_dir = '/Dataset'
import os
import numpy as np
import glob
import shutil
for cl in classes:
img_path = os.path.join(base_dir, cl)
images = glob.glob(img_path + '/*.jpg')
print("{}: {} Images".format(cl, len(images)))
num_train = int(round(len(images)*0.8))
train, val = images[:num_train], images[num_train:]
for t in train:
if not os.path.exists(os.path.join(base_dir, 'train', cl)):
os.makedirs(os.path.join(base_dir, 'train', cl))
shutil.move(t, os.path.join(base_dir, 'train', cl))
for v in val:
if not os.path.exists(os.path.join(base_dir, 'val', cl)):
os.makedirs(os.path.join(base_dir, 'val', cl))
shutil.move(v, os.path.join(base_dir, 'val', cl))
from tensorflow.keras.preprocessing.image import ImageDataGenerator
image_gen_train = ImageDataGenerator(
rescale=1./255,
rotation_range=45,
width_shift_range=.15,
height_shift_range=.15,
horizontal_flip=True,
zoom_range=0.5
)
batch_size = 32
IMG_SHAPE = 96
train_data_gen = image_gen_train.flow_from_directory(
batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_SHAPE,IMG_SHAPE),
class_mode='sparse'
)
I built a simple model like the following:
## Model
import tensorflow as tf
from keras import regularizers
from keras.layers.normalization import BatchNormalization
IMG_SHAPE = (96, 96, 3)
batch_size = 32
## Trainable classification head
aConv_layer = tf.keras.layers.Conv2D(576, (3, 3), padding="same",
activation="relu", input_shape= IMG_SHAPE)
aConv_layer = tf.keras.layers.Conv2D(144, (3, 3), padding="same",
activation="relu", input_shape= IMG_SHAPE)
gmaxPool_layer = tf.keras.layers.GlobalMaxPooling2D() #reduces input from 4D to 2D
maxPool_layer = tf.keras.layers.MaxPool2D(pool_size=(1, 1), strides=None,
padding='valid', data_format=None,
)
batNor_layer = tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99,
epsilon=0.001,
center=True, scale=True,
beta_initializer='zeros',
gamma_initializer='ones',
moving_mean_initializer='zeros',
moving_variance_initializer='ones',
beta_regularizer=None, gamma_regularizer=None,
beta_constraint=None, gamma_constraint=None)
flat_layer = tf.keras.layers.Flatten()
dense_layer = tf.keras.layers.Dense(9, activation='softmax',
kernel_regularizer=regularizers.l2(0.01))
prediction_layer = tf.keras.layers.Dense(3, activation='softmax')
model = tf.keras.Sequential([
#base_model,
tf.keras.layers.Conv2D(576, (3, 3), padding="same", activation="relu", input_shape= IMG_SHAPE),
tf.keras.layers.Dense(288, activation='softmax', kernel_regularizer=regularizers.l2(0.01)),
tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None),
tf.keras.layers.Conv2D(144, (3, 3), padding="same", activation="relu"),
tf.keras.layers.Dense(72, activation='softmax', kernel_regularizer=regularizers.l2(0.01)),
tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None),
#
batNor_layer,
gmaxPool_layer,
tf.keras.layers.Flatten(),
#tf.keras.layers.Dropout(0.5),
prediction_layer
])
learning_rate = 0.001
## Compiles the model
model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
I trained the model and got the following, which I would assume to be great results:
However, whenever I tried to predict an image with the following code, it would almost always get it wrong:
import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():
# predicting images
path = fn
img = image.load_img(path, target_size=(96, 96))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(fn)
print('Genuine | Printout | Replay')
print(np.argmax(classes))
How can the predictions be wrong when the validation accuracy be so high?
Here is the Codelab, if it helps.
Process the images for prediction in the same way that you processed your images for training. Specifically, rescale your images like you did with ImageDataGenerator.
import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():
# predicting images
path = fn
img = image.load_img(path, target_size=(96, 96))
x = image.img_to_array(img)
# Rescale image.
x = x / 255.
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(fn)
print('Genuine | Printout | Replay')
print(np.argmax(classes))
Somehow, the image generator of Keras works well when combined with fit() or fit_generator() function, but fails miserably when combined
with predict_generator() or the predict() function. The Keras predict() function generally fails when working with batch prediction.
When using Plaid-ML Keras back-end for AMD processor, I would rather loop through all test images one-by-one and get the prediction for each image in each iteration.
import os
from PIL import Image
import keras
import numpy
# code for creating dan training model is not included
print("Prediction result:")
dir = "/path/to/test/images"
files = os.listdir(dir)
correct = 0
total = 0
#dictionary to label all animal category class.
classes = {
0:'This is Cat',
1:'This is Dog',
}
for file_name in files:
total += 1
image = Image.open(dir + "/" + file_name).convert('RGB')
image = image.resize((100,100))
image = numpy.expand_dims(image, axis=0)
image = numpy.array(image)
image = image/255
pred = model.predict_classes([image])[0]
animals_category = classes[pred]
if ("cat" in file_name) and ("cat" in sign):
print(correct,". ", file_name, animals_category)
correct+=1
elif ("dog" in file_name) and ("dog" in animals_category):
print(correct,". ", file_name, animals_category)
correct+=1
print("accuracy: ", (correct/total))

MNIST and transfer learning with VGG16 in Keras- low validation accuracy

I recently started taking advantage of Keras's flow_from_dataframe() feature for a project, and decided to test it with the MNIST dataset. I have a directory full of the MNIST samples in png format, and a dataframe with the absolute directory for each in one column and the label in the other.
I'm also using transfer learning, importing VGG16 as a base, and adding my own 512 node relu dense layer and 0.5 drop-out before a softmax layer of 10. (For digits 0-9). I'm using rmsprop (lr=1e-4) as the optimizer.
When I launch my environment, it calls the latest version of keras_preprocessing from Git, which has support for absolute directories and capitalized file extensions.
My problem is that I have a very high training accuracy, and a terribly low validation accuracy. By my final epoch (10), I had a training accuracy of 0.94 and a validation accuracy of 0.01.
I'm wondering if there's something fundamentally wrong with my script? With another dataset, I'm even getting NaNs for both my training and validation loss values after epoch 4. (I checked the relevant columns, there aren't any null values!)
Here's my code. I'd be deeply appreciative is someone could glance through it and see if anything jumped out at them.
import pandas as pd
import numpy as np
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras import applications
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.applications.vgg16 import VGG16, preprocess_input
# INITIALIZE MODEL
img_width, img_height = 32, 32
model = VGG16(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))
# freeze all layers
for layer in model.layers:
layer.trainable = False
# Adding custom Layers
x = model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(10, activation="softmax")(x)
# creating the final model
model_final = Model(input = model.input, output = predictions)
# compile the model
rms = optimizers.RMSprop(lr=1e-4)
#adadelta = optimizers.Adadelta(lr=0.001, rho=0.5, epsilon=None, decay=0.0)
model_final.compile(loss = "categorical_crossentropy", optimizer = rms, metrics=["accuracy"])
# LOAD AND DEFINE SOURCE DATA
train = pd.read_csv('MNIST_train.csv', index_col=0)
val = pd.read_csv('MNIST_test.csv', index_col=0)
nb_train_samples = 60000
nb_validation_samples = 10000
batch_size = 60
epochs = 10
# Initiate the train and test generators
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe=train,
directory=None,
x_col='train_samples',
y_col='train_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
validation_generator = test_datagen.flow_from_dataframe(dataframe=val,
directory=None,
x_col='test_samples',
y_col='test_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
# GET CLASS INDICES
print('****************')
for cls, idx in train_generator.class_indices.items():
print('Class #{} = {}'.format(idx, cls))
print('****************')
# DEFINE CALLBACKS
path = './chk/epoch_{epoch:02d}-valLoss_{val_loss:.2f}-valAcc_{val_acc:.2f}.hdf5'
chk = ModelCheckpoint(path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
logger = CSVLogger('./chk/training_log.csv', separator = ',', append=False)
nPlus = 1
samples_per_epoch = nb_train_samples * nPlus
# Train the model
model_final.fit_generator(train_generator,
steps_per_epoch = int(samples_per_epoch/batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = int(nb_validation_samples/batch_size),
callbacks = [chk, logger])
Have you tried explicitly defining the classes of the images? as such:
train_generator=image.ImageDataGenerator().flow_from_dataframe(classes=[0,1,2,3,4,5,6,7,8,9])
in both the train and validation generators.
I have found that sometimes the train and validation generators create different correspondence dictionaries.

Changing batch_size parameter in keras leads to broadcast error

I am running a simple encoder-decoder setup to train a representation for a one dimensional image. In this sample the input are lines with varying slopes and in the encoded layer we would expect something that resembles the slope. My setup is keras with a tensorflow backend. I am very new to this as well.
It all works fine, at least until I move away from steps_per_epoch to batch_size in the model.fit() method. Certain values of the batch_size, such as 1,2,3, 8 and 16 do work, for others I get a value error. My initial guess was 2^n, but that did not work.
The error I get for batch_size = 5
ValueError: operands could not be broadcast together with shapes (5,50) (3,50) (5,50)
I am trying to understand which relation between batch_size and training data is valid such that it always passes. I assumed that the training set would be simply divided into floor(N/batch_size) batches and the remainder would be processed as such.
My questions are:
What is the relation between size of data set and batch_size that are allowed.
What exactly is the keras/tensorflow trying to do such that the batch_size is important?
Thank you very much for the help.
The code to reproduce this is
import numpy as np
from keras.models import Model
from keras.layers import Input, Dense, Conv1D, Concatenate
from keras.losses import mse
from keras.optimizers import Adam
INPUT_DIM = 50
INTER_DIM = 15
LATENT_DIM = 1
# Prepare Sample Data
one_line = np.linspace(1, 30, INPUT_DIM).reshape(1, INPUT_DIM)
test_array = np.repeat(one_line, 1000, axis=0)
slopes = np.linspace(0, 1, 1000).reshape(1000, 1)
data = test_array * slopes
# Train test split
train_mask = np.where(np.random.sample(1000) < 0.8, 1, 0).astype('bool')
x_train = data[train_mask].reshape(-1, INPUT_DIM, 1)
x_test = data[~train_mask].reshape(-1, INPUT_DIM, 1)
# Define Model
input = Input(shape=(INPUT_DIM, 1), name='input')
conv_layer_small = Conv1D(filters=1, kernel_size=[3], padding='same')(input)
conv_layer_medium = Conv1D(filters=1, kernel_size=[5], padding='same')(input)
merged_convs = Concatenate()(
[conv_layer_small, conv_layer_medium])
latent = Dense(LATENT_DIM, name='latent_layer',
activation='relu')(merged_convs)
encoder = Model(input, latent)
decoder_int = Dense(INTER_DIM, name='dec_int_layer', activation='relu')(latent)
output = Dense(INPUT_DIM, name='output', activation='linear')(decoder_int)
encoder_decoder = Model(input, output, name='encoder_decoder')
# Add Loss
reconstruction_loss = mse(input, output)
encoder_decoder.add_loss(reconstruction_loss)
encoder_decoder.compile(optimizer='adam')
if __name__ == '__main__':
epochs = 100
encoder_decoder.fit(
x_train,
epochs=epochs,
batch_size=4,
verbose=2
)

Input size (depth of inputs) must be accessible via shape inference, but saw value None error whaen trying to set tf.expand_dims axis to 0

I am trying to use 20 news groups data set available in sklearn to train a LSTM to do incremental learning (classification). I used the sklearn's TfidfVectorizer to pre-process the data. Then I turned the resulting sparse matrix into a numpy array before feeding it. After that when coding the below line:
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
It gave an error saying that the 'inputs_' should have 3 dimensions. so I used:
inputs_ = tf.expand_dims(inputs_, 0)
To expand the dimension. But when I do that i get the error:
ValueError: Input size (depth of inputs) must be accessible via shape
inference, but saw value None.
The shape of 'input_' is:
(1, 134410)
I already went through this post, but it did not help.
I cannot seem to understand how to solve this issue. Any help is much appreciated. Thank you in advance!
show below is my complete code:
import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
features = vectorizer.fit_transform(newsgroups_data.data)
lb = LabelBinarizer()
labels = np.reshape(newsgroups_data.target, [-1])
labels = lb.fit_transform(labels)
return features, labels
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot_error(errorplot, datapoint, numberOfWrongPreds):
errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
errorplot.autoscale(enable=True, axis='both', tight=None)
plt.draw()
def train_test():
features, labels = pre_process()
#Defining Hyperparameters
epochs = 1
lstm_layers = 1
batch_size = 1
lstm_size = 30
learning_rate = 0.003
print(lstm_size)
print(batch_size)
print(epochs)
#--------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
#getting dynamic batch size according to the input tensor size
# dynamic_batch_size = tf.shape(inputs_)[0]
#output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
#Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
inputs_ = tf.expand_dims(inputs_, 0)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
#hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
errorplot, = plt.plot([], [])
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x.toarray(),
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
print("Prediction: ", predictions)
print("Actual: ",y)
pred = np.array(predictions)
print(pred)
print(y)
if not ((pred==y).all()):
wrongPred += 1
if ii % 27 == 0:
plot_error(errorplot,ii,wrongPred)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred/len(labels)
print("ERROR RATE: ", errorRate )
if __name__ == '__main__':
train_test()
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
This error is given because you don't specify the size nor the amount of inputs.
I got the script working like this:
inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")
inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim, initial_state=initial_state)

Resources