I need some help for a project I am working on for a data science course. In this project I classy the digits of the MNIST datasets in three ways:
using the dissimilarity matrices induced by the distances 1,2 and infinity
using a BallTree
using a neural network.
The first two parts are done, but I getting an error for the neural network code that I can't solve. This is the code.
#Upload the MNIST dataset
data = load('mnist.npz')
x_train = data['arr_0']
y_train = data['arr_1']
x_test = data['arr_2']
y_test = data['arr_3']
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
The output is
(60000, 28, 28) (60000,)
(10000, 28, 28) (10000,)
Then,
#Setting up the neural network and defining sigmoid function
#self.mtrx holds the neurons in each level
#self.weight, bias, grad hold weight, bias and gradient values between level L and L - 1
class NeuralNetwork:
def __init__(self, rows, columns=0):
self.mtrx = np.zeros((rows, 1))
self.weight = np.random.random((rows, columns)) / columns ** .5
self.bias = np.random.random((rows, 1)) * -1.0
self.grad = np.zeros((rows, columns))
def sigmoid(self):
return 1 / (1 + np.exp(-self.mtrx))
def sigmoid_derivative(self):
return self.sigmoid() * (1.0 - self.sigmoid())
#Initializing neural network levels
lvl_input = NeuralNetwork(784)
lvl_one = NeuralNetwork(200, 784)
lvl_two = NeuralNetwork(200, 200)
lvl_output = NeuralNetwork(10, 200)
#Forward and backward propagation functions
def forward_prop():
lvl_one.mtrx = lvl_one.weight.dot(lvl_input.mtrx) + lvl_one.bias
lvl_two.mtrx = lvl_two.weight.dot(lvl_one.sigmoid()) + lvl_two.bias
lvl_output.mtrx = lvl_output.weight.dot(lvl_two.sigmoid()) + lvl_output.bias
def back_prop(actual):
val = np.zeros((10, 1))
val[actual] = 1
delta_3 = (lvl_output.sigmoid() - val) * lvl_output.sigmoid_derivative()
delta_2 = np.dot(lvl_output.weight.transpose(), delta_3) * lvl_two.sigmoid_derivative()
delta_1 = np.dot(lvl_two.weight.transpose(), delta_2) * lvl_one.sigmoid_derivative()
lvl_output.grad = lvl_two.sigmoid().transpose() * delta_3
lvl_two.grad = lvl_one.sigmoid().transpose() * delta_2
lvl_one.grad = lvl_input.sigmoid().transpose() * delta_1
#Storing mnist data into np.array
def make_image(c):
lvl_input.mtrx = x_train[c]
#Evaluating cost function
def cost(actual):
val = np.zeros((10, 1))
val[actual] = 1
cost_val = (lvl_output.sigmoid() - val) ** 2
return np.sum(cost_val)
#Subtraction gradients from weights and initializing learning rate
learning_rate = .01
def update():
lvl_output.weight -= learning_rate * lvl_output.grad
lvl_two.weight -= learning_rate * lvl_two.grad
lvl_one.weight -= learning_rate * lvl_one.grad
And finally I train the neural network.
#Training neural network
#iter_1 equals number of batches
#iter_2 equals number of iterations in one batch
iter_1 = 50
iter_2 = 100
for batch_num in range(iter_1):
update()
counter=0
for batches in range(iter_2):
make_image(counter)
num = np.argmax(y_train[counter])
counter += 1
forward_prop()
back_prop(num)
print("actual: ", num, " guess: ", np.argmax(lvl_output.mtrx), " cost", cost(num))
I get the following error and I can't figure out what's wrong with my code.. can anybody help?
ValueError Traceback (most recent call last)
<ipython-input-12-8821054ddd29> in <module>
13 num = np.argmax(y_train[counter])
14 counter += 1
---> 15 forward_prop()
16 back_prop(num)
17 print("actual: ", num, " guess: ", np.argmax(lvl_output.mtrx), " cost", cost(num))
<ipython-input-6-e6875bcd1a03> in forward_prop()
2
3 def forward_prop():
----> 4 lvl_one.mtrx = lvl_one.weight.dot(lvl_input.mtrx) + lvl_one.bias
5 lvl_two.mtrx = lvl_two.weight.dot(lvl_one.sigmoid()) + lvl_two.bias
6 lvl_output.mtrx = lvl_output.weight.dot(lvl_two.sigmoid()) + lvl_output.bias
ValueError: shapes (200,784) and (28,28) not aligned: 784 (dim 1) != 28 (dim 0)
In your code:
def make_image(c):
lvl_input.mtrx = x_train[c]
althout you init lvl_input.mtrx with shape (row, 1), data with shape(28,28) then assign to lvl_input.mtrx later. Basically reshape() need to be done to training data
Related
I want to program the softmax function from scratch using Python on tensorflow mood.
def sigmoid(p):
return tf.cond(p >= 0, lambda: 1 / (1 + tf.exp(-p)), \
lambda: tf.exp(p) / (1 + tf.exp(p)))
While running this code chunk, I got this traceback:
InvalidArgumentError: Shape must be rank 0 but is rank 2 for 'cond/Switch' (op: 'Switch') with input shapes: [?,256], [?,256].
Here's my reproducible code:
# 1st hidden layer
W1 = tf.get_variable("W1", shape=(784, 256), dtype=tf.float32, initializer = tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", shape=(256), dtype=tf.float32, initializer = tf.zeros_initializer)
# 2nd hidden layer
W2 = tf.get_variable("W2", shape=(256, 10), dtype=tf.float32, initializer = tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", shape=(10), dtype=tf.float32, initializer = tf.zeros_initializer)
def sigmoid(z):
"""Numerically stable sigmoid function."""
return tf.where(z >= 0, 1 / (1 + tf.exp(-z)), tf.exp(z) / (1 + tf.exp(z)))
### Compute predictions
logits = X # W1 + b1
probas = sigmoid(logits)
y_pred = probas # W2 + b2
def softmax(z): ## this approach provides numerical stability
"""Compute softmax values for each sets of scores in z."""
e = tf.exp(z - tf.reduce_max(z))
return e / tf.reduce_sum(e)
### Cross-Entropy loss
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(softmax(y_pred)), reduction_indices=[1]))
lr = 0.01
optimizer = tf.train.AdamOptimizer(lr)
step = optimizer.minimize(cost) #
This gives me a very bad score with BATCH_SIZE = 512 and EPOCHS= 55
Test cost after 10 epochs: 6.3319
Test cost after 30 epochs: 6.2753
Test cost after 50 epochs: nan
OPTIMIZATION IS DONE!
Score = 0.098982
When I use fit_generator in Keras, I get the validation set split into minibatches, and each minibatch is evaluated as training progresses. I want the validation data used exactly once at the end of each epoch. That is, my code is currently:
def model_fit_generator(self):
#This does the actual training of the model
earlystop = EarlyStopping(monitor='val_acc', patience=5, verbose=2, mode='auto')
self.__model.fit_generator(generator=self.train_generator,
validation_data=self.valid_generator,
steps_per_epoch=self.s_per_e,
epochs=self.epochs,
validation_steps = self.v_per_e,
shuffle=False,
verbose=2,
callbacks=[earlystop])
model_filename = '_'.join([str(x) for x in now_list]) + '_model.h5'
self.__model.save(model_filename)
def model_evaluate(self):
self.model_fit_generator()
evaluation = self.__model.evaluate_generator(self.valid_generator, self.v_per_e, verbose=0)
return evaluation
How do I change this so that I have the validation data used once, at the end of each epoch, to decide whether early stopping is useful?
EDIT: In response to a comment, here is a complete MWE, showing that the validation data are being used at the same time as the training data. Note this code will produce an error, but it also prints out batch numbers to show that validation and training sets are both being used. To run this code, you will need 10 CSV files of data, which I can provide, but I'd rather just give you the output right after this code.
from __future__ import division
from __future__ import print_function
from pandas import concat
from pandas import DataFrame
import sys, keras, GPy, GPyOpt
import numpy as np
import pandas as pd
from keras import backend as K
from keras.models import Model
from keras.metrics import binary_crossentropy
from keras.layers import Dense, Input, LSTM, Lambda
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
class my_model():
def __init__(self, n_lags=2, hid_dim_1=5, epochs=2, batch_size=1):
self.n_lags = n_lags
self.hid_dim_1 = hid_dim_1
self.epochs = epochs
self.batch_size = batch_size
self.train_generator, self.s_per_e, self.valid_generator, self.v_per_e, self.n_vars = self.read_data()
self.__model = self.model()
def read_data(self):
n_vars = 2
num_sample_minibatches = 6
num_valid_minibatches = 4
sample_IDs = range(1, self.batch_size+num_sample_minibatches)
valid_IDs = range(num_sample_minibatches+1, max(sample_IDs)+num_valid_minibatches+1)
params = {'batch_size': self.batch_size, 'n_lags': self.n_lags, 'n_vars': n_vars}
train_generator = DataGenerator(sample_IDs, **params)
valid_generator = DataGenerator(valid_IDs, **params)
s_per_e = int(len(sample_IDs) - self.batch_size + 1) #e.g. if you have 1,2,3,4,5,6 then you can create 4 sequences of length 3 (batch_size)
v_per_e = int(len(valid_IDs) - self.batch_size + 1)
return train_generator, s_per_e, valid_generator, v_per_e, n_vars
def model(self):
#https://github.com/twairball/keras_lstm_vae/blob/master/lstm_vae/vae.py
a_input = Input(shape=(self.n_lags, self.n_vars,), name='a_input')
cond_on_this = Input(shape=(self.n_vars,), name="cond_on_this")
b_lstm = LSTM(self.hid_dim_1)(a_input)
outputs = Dense(self.hid_dim_1, activation='sigmoid')(b_lstm)
my_model1 = Model([a_input, cond_on_this], outputs)
my_model1.compile(optimizer=Adam(lr=0.001), loss=binary_crossentropy)
return my_model1
def my_model_fit_generator(self):
earlystop = EarlyStopping(monitor='val_acc', patience=5, verbose=2, mode='auto')
self.__model.fit_generator(generator=self.train_generator,
validation_data=self.valid_generator,
steps_per_epoch=self.s_per_e,
epochs=self.epochs,
validation_steps = self.v_per_e,
shuffle=False,
verbose=2,
callbacks=[earlystop])
def my_model_evaluate(self):
self.my_model_fit_generator()
evaluation = self.__model.evaluate_generator(self.valid_generator, self.v_per_e, verbose=0)
return evaluation
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, list_IDs, batch_size, n_lags, n_vars, shuffle=False):
'Initialization'
self.list_IDs = list_IDs
self.batch_size = batch_size
self.n_lags = n_lags
self.n_vars = n_vars
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
batches_per_epoch = int(np.floor(len(self.list_IDs) - self.batch_size + 1))
return batches_per_epoch
def __getitem__(self, index):
'Generate one batch of data'
#Here's my evidence that the validation minibatches are being used during training!
print('batch number: ', index+1, 'of: ', int(np.floor(len(self.list_IDs) - self.batch_size + 1)))
indexes = self.indexes[index:(index+self.batch_size)]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
data, cond_on_this = self.__data_generation(list_IDs_temp)
return [np.asarray(data), np.asarray(cond_on_this)], np.asarray(cond_on_this)
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
#From MachineLearningMastery
def series_to_supervised(self, data, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)
cols, names = list(), list()
#input sequence t-n, ..., t-1
for i in range(self.n_lags, 0, -1): #for i in 3 to 0 not including 0
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range (self.n_vars)]
#forecast sequence t, t+1, ..., t+n
for i in range(0, n_out):
cols.append(df.shift(-i))
if i==0:
names += [('var%d(t)' % (j+1)) for j in range(self.n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(self.n_vars)]
agg = concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
def __data_generation(self, list_IDs_temp):
'Generates data containing batch_size samples'
data_np_array = np.empty((self.batch_size, self.n_vars), dtype=float)
for i, ID in enumerate(list_IDs_temp):
#Read in a data file corresponding to this ID; put it into the numpy array.
data_file = './pollution_' + str(i) + '.csv'
df_data = pd.read_csv(data_file, sep=",", header=0)
df_data.columns = ['date','pollution','dew','temp','press','wnd_dir','wnd_spd','snow','rain']
df_data_vals = df_data[['pollution', 'temp']] #this is shape (24, 2)
data_np_array[i,] = np.asarray(df_data_vals)
data_s2s = np.asarray(self.series_to_supervised(data_np_array))
data_data = data_s2s[:, :int(self.n_vars*self.n_lags)]
data_cond = data_s2s[:, int(self.n_vars*self.n_lags):]
data_data = data_data.reshape((data_data.shape[0], self.n_lags, self.n_vars))
return data_data, data_cond
def run_my_model(n_lags=2, hid_dim_1=5, epochs=2, batch_size=1):
_my_model = my_model(n_lags=n_lags, hid_dim_1=hid_dim_1, epochs=epochs, batch_size=batch_size)
mymodel_evaluation = _my_model.my_model_evaluate()
return mymodel_evaluation
#Bounds for hyperparameters
bounds = [{'name': 'hid_dim_1', 'type': 'discrete', 'domain': (5, 10)}]
#Bayesian Optimization
def f(x):
evaluation = run_my_model(hid_dim_1 = int(x[:,0]), epochs = 2, batch_size = 1)
print("binary crossentropy:\t{0}".format(evaluation[0]))
print(evaluation)
return evaluation
#Optimizer instance
opt_mymodel = GPyOpt.methods.BayesianOptimization(f=f, domain=bounds, initial_design_numdata=1)
#Run optimizer
opt_mymodel.run_optimization(max_iter=2)
opt_mymodel.x_opt
Relevant Output:
Using TensorFlow backend.
Epoch 1/2
batch number: 1 of: 4
batch number: 1 of: 6
batch number: 2 of: 4
batch number: 2 of: 6
batch number: 3 of: 4
batch number: 3 of: 6
batch number: 4batch number: 4 of: 4
of: 6
batch number: 5 of: 6
batch number: 6 of: 6
Traceback (most recent call last):
...Error after this...
I am trying to save a trained model and use it later in another instance (function). But, somehow this throws me the variable not found error. After reagin through SO and other forums, I understand the problem is the way I store it.
dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)
n_input = 3
n_hidden = 512
# RNN output node weights and biases
weights = {'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))}
biases = {'out': tf.Variable(tf.random_normal([vocab_size]))}
# tf Graph input
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])
# RNN implementation in Tensorflow
def RNN(x,weights,biases):
x = tf.reshape(x, [-1, n_input])
x = tf.split(x, n_input, 1)
rnn_cell = rnn.BasicLSTMCell(n_hidden)
outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
learning_rate = 0.001
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
training_iters = 1000
display_step = 500
saver = tf.train.Saver()
# Launch the graph
with tf.Session() as session:
session.run(init)
step = 0
offset = random.randint(0, n_input+1)
end_offset = n_input + 1
acc_total = 0
loss_total = 0
while step < training_iters:
if offset > (len(training_data)-end_offset):
offset = random.randint(0, n_input+1)
symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
symbols_out_onehot = np.zeros([vocab_size], dtype=float)
symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
symbols_out_onehot = np.reshape(symbols_out_onehot, [1, -1])
_, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
loss_total += loss
acc_total += acc
if (step+1) % display_step == 0:
print("Iter= " + str(step+1) + ", Average Loss= " + \
"{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
"{:.2f}%".format(100*acc_total/display_step))
acc_total = 0
loss_total = 0
symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
symbols_out = training_data[offset + n_input]
symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
print("%s - [%s] vs [%s]" % (symbols_in,symbols_out,symbols_out_pred))
step += 1
offset += (n_input+1)
saver.save(session, 'userLocation/Model')
While the model files are generated, but when I try to restore the model using
saver = tf.train.Saver()
with tf.Session() as restored_session:
saver.restore(restored_session, 'userLocation/Model')
Error
tensorflow.python.framework.errors_impl.NotFoundError: Key Variable_3 not found in checkpoint
[[Node: save_1/RestoreV2_7 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_1/Const_0, save_1/RestoreV2_7/tensor_names, save_1/RestoreV2_7/shape_and_slices)]]
Any pointers as to what am i missing while saving.
I will explain this in 2 different part -
When you save the model in tensorflow, it will save graph in one file(usually the extention is .meta) and variable tensors in other file(usually index file).
Now, while importing you have to do the same 2 step process - a) import the graph first b) then create a session and import variables.
Here is a sample code -
import tensorflow as tf
import numpy as np
tf.set_random_seed(10)
#define graph location in variable
meta_file = 'userLocation/Model.meta'
#importing the graph
ns = tf.train.import_meta_graph(meta_file , clear_devices=True)
#create a session
with tf.Session().as_default() as sess:
#import variables
ns.restore(sess, meta_file[0:len(meta_file)-5])
# for example, if you have 'x' tenbsor in graph
x=tf.get_default_graph().get_tensor_by_name("x:0")
.
.
.
#Further processing/prediction etc
I am learning CNN, right now, working on deconvolution of the layers. I have begun the process of learning upsampling and observe how convolution layers see the world by generating feature maps from the filters from the source Visualization of the filters of VGG16, with the Source code. I have changed the input and the code is as follows:
import imageio
import numpy as np
import time
from keras.applications import vgg16
from keras import backend as K
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# dimensions of the generated pictures for each filter.
img_width = 128
img_height = 128
# the name of the layer we want to visualize
# (see model definition at keras/applications/vgg16.py)
layer_name = 'block5_conv1'
# util function to convert a tensor into a valid image
def deprocess_image(x):
# normalize tensor: center on 0., ensure std is 0.1
x -= x.mean()
x /= (x.std() + K.epsilon())
x *= 0.1
# clip to [0, 1]
x += 0.5
x = np.clip(x, 0, 1)
# convert to RGB array
x *= 255
if K.image_data_format() == 'channels_first':
x = x.transpose((1, 2, 0))
x = np.clip(x, 0, 255).astype('uint8')
return x
# build the VGG16 network with ImageNet weights
model = vgg16.VGG16(weights='imagenet', include_top=False)
print('Model loaded.')
model.summary()
# this is the placeholder for the input images
input_img = model.input
# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
def normalize(x):
# utility function to normalize a tensor by its L2 norm
return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())
kept_filters = []
for filter_index in range(200):
# we only scan through the first 200 filters,
# but there are actually 512 of them
print('Processing filter %d' % filter_index)
start_time = time.time()
# we build a loss function that maximizes the activation
# of the nth filter of the layer considered
layer_output = layer_dict[layer_name].output
if K.image_data_format() == 'channels_first':
loss = K.mean(layer_output[:, filter_index, :, :])
else:
loss = K.mean(layer_output[:, :, :, filter_index])
# we compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, input_img)[0]
# normalization trick: we normalize the gradient
grads = normalize(grads)
# this function returns the loss and grads given the input picture
iterate = K.function([input_img], [loss, grads])
# step size for gradient ascent
step = 1.
inpImgg = '/home/sanaalamgeer/Downloads/cat.jpeg'
inpImg = mpimg.imread(inpImgg)
inpImg = cv2.resize(inpImg, (img_width, img_height))
# we start from a gray image with some random noise
if K.image_data_format() == 'channels_first':
input_img_data = inpImg.reshape((1, 3, img_width, img_height))
else:
input_img_data = inpImg.reshape((1, img_width, img_height, 3))
input_img_data = (input_img_data - 0.5) * 20 + 128
# we run gradient ascent for 20 steps
for i in range(20):
loss_value, grads_value = iterate([input_img_data])
input_img_data += grads_value * step
print('Current loss value:', loss_value)
if loss_value <= 0.:
# some filters get stuck to 0, we can skip them
break
# decode the resulting input image
if loss_value > 0:
img = deprocess_image(input_img_data[0])
kept_filters.append((img, loss_value))
end_time = time.time()
print('Filter %d processed in %ds' % (filter_index, end_time - start_time))
# we will stich the best 64 filters on a 8 x 8 grid.
n = 8
# the filters that have the highest loss are assumed to be better-looking.
# we will only keep the top 64 filters.
kept_filters.sort(key=lambda x: x[1], reverse=True)
kept_filters = kept_filters[:n * n]
# build a black picture with enough space for
# our 8 x 8 filters of size 128 x 128, with a 5px margin in between
margin = 5
width = n * img_width + (n - 1) * margin
height = n * img_height + (n - 1) * margin
stitched_filters = np.zeros((width, height, 3))
# fill the picture with our saved filters
for i in range(n):
for j in range(n):
img, loss = kept_filters[i * n + j]
stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width,
(img_height + margin) * j: (img_height + margin) * j + img_height, :] = img
# save the result to disk
imageio.imwrite('stitched_filters_%dx%d.png' % (n, n), stitched_filters)
The input image I am using is
It is supposed to generate an output with 64 feature maps embedded into one image as shown in Visualization of the filters of VGG16, but it is generating the same input image at each filter,
.
I am confused what's wrong or where I should make changes.
Please help.
What a complex code....
I'd do this:
from keras.applications.vgg16 import preprocess_input
layer_name = 'block5_conv1'
#create a section of the model to output the layer we want
model = vgg16.VGG16(weights='imagenet', include_top=False)
model = Model(model.input, model.get_layer(layer_name).output)
#open and preprocess the cat image
catImage = openTheCatImage(catFile)
catImage = np.expand_dims(catImage,axis=0)
catImage = preprocess_input(catImage)
#get the layer outputs
features = model.predict(catImage)
#plot
for channel in range(features.shape[-1]): #or .shape[1], or up to a limit you like
featureMap = features[:,:,:,channel] #or features[:,channel]
featureMap = deprocess_image(feature_map)[0]
saveOrPlot(featureMap)
I created my first tensorflow neuronal network, initially for generating sequences. It produced weird outputs so I simplified it a lot to see if it can reach an error rate of 0% with just 5 inputs and 5 output classes. Somehow it does not seem to backpropagate at all because it is stuck at 20 % error rate without moving at all. So if anyone can point me to my mistake I made thank you in advance :)
Cheers
import numpy as np
import tensorflow as tf
import sys
trainingInputs = [
[[0],[0],[0],[0]],
[[1],[0],[0],[0]],
[[0],[1],[0],[0]],
[[0],[0],[1],[0]],
[[0],[0],[0],[1]]]
trainingOutputs = [
[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1],
[0,0,0,0]]
data = tf.placeholder(tf.float32, [None, len(trainingInputs[0]),1])
target = tf.placeholder(tf.float32, [None, len(trainingOutputs[0])])
num_hidden = 24
cell = tf.contrib.rnn.LSTMCell(num_hidden,state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.GradientDescentOptimizer(0.01)
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1
no_of_batches = int((len(trainingInputs)) / batch_size)
def trainNetwork():
epoch = 1000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = trainingInputs[ptr:ptr+batch_size], trainingOutputs[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize, feed_dict={data: inp, target: out})
def generateOutput():
incorrect = sess.run(error,{data: trainingInputs, target: trainingOutputs})
sys.stdout.write('error {:3.1f}%'.format(100 * incorrect) + "\n")
sys.stdout.flush()
for i in range(200):
trainNetwork()
generateOutput()
sess.close()