Backpropagation from scratch -- having trouble with some matrix multiplications

Backpropagation from scratch -- having trouble with some matrix multiplications - python-3.x

I'm trying to implement a neural network from scratch and I'm having trouble correctly implementing the backpropagation. I feel like I'm either calling the wrong indexes of a weight/activation or I'm just not implementing the matrix multiplication correctly. I can't get the matrixes to align properly.
Please note cost is sum of squared errors. I'm taking the gradient of cost with respect to Z.
Instead of posting the entire code, I will just post the relevant parts.
My networks are :
#[3,5,5,1] 3 inputs, 5 hidden units, 5 hidden units, 1 output unit
##a[-1] = [1,1]
##a[-2] =[1,5]
##a[-3]= [1,5]
##a[-4]= [1,3] #input layer
weight shapes as follows :
##weights[-1] = 5,1
##weights [-2] = 5,5
##weights [-3] = 3,5 #first layer
Zs:
##zs shape: (1, 1) [-1] #last layer z
##zs shape: (1, 5) [-2]
##zs shape: (1, 5) [-3] #first layer z
#code for generating data
def psuedo_training_data():
input_data = np.random.randint(1, 5, (500,1,3))
labels =np.random.randint(0,2,(500,1))
training_data = [(x,y) for x,y in zip
(input_data[:int(len(input_data)*.8)],
labels[:int(len(labels)*.8)])]
testing_data = [(x,y) for x,y in zip
(input_data[int(len(input_data)*.8):],
labels[int(len(labels)*.8):])]
return training_data, testing_data
def sigmoid_derivative(z):
return sigmoid(z) * (1-sigmoid(z))
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
def calculate_gradients(weights, biases, x, y):
weight_derivatives, bias_derivatives, activations, zs = gather_backprop_data(weights, biases, x,y)
#weight derivatives and bias_derivatives are empty shells
#gradient of cost with respect to Z of last layer
last_layer_z_error = (activations[-1] - y) * sigmoid_derivative(zs[-1])
#updating the weight_derivatives of final layer
weight_derivatives[-1] = last_layer_z_error * (activations[-2]).T
bias_derivatives[-1] = last_layer_z_error
#attempting to get the gradients of the hidden layers
z_previous_layer = last_layer_z_error
for i in reversed(range(1,len(weights))):
print('layer:', i)
print('****************************************')
print('z_previous_layer.shape', z_previous_layer.shape)
print('weights[i-1].shape', weights[i-1].shape)
print(' sigmoid_derivative(zs[i-1])',sigmoid_derivative(zs[i-1]).shape)
print('******************************************************')
z_previous_layer =np.dot(weights[i].T, z_previous_layer)*
(sigmoid_derivative(zs[i-1]))
print('********************************************')
print('activations i -1', activations[i-1].shape)
print('z_previous_layer_post.shape', z_previous_layer.shape)
weight_derivatives[i-1] = np.dot(z_previous_layer,activations[i-1].T)
bias_derivatives[i-1] = z_previous_layer
return weight_derivatives, bias_derivatives
FSE:
ValueError Traceback (most recent call last)
<ipython-input-399-ee974769393b> in <module>()
----> 1 stochastic_gradient_decent(training_data,testing_data, 5, 25, NN.weights, NN.biases, NN)
<ipython-input-395-e78e37a1fda0> in stochastic_gradient_decent(training_data, testing_data, epochs, mini_batch_size, weights, biases, neural_network)
13 for mini_batch in mini_batches:
14 for x,y in mini_batch:
---> 15 weight_derivatives, bias_derivatives = calculate_gradients(weights, biases, x,y )
16
17 for i in range(len(weights)):
<ipython-input-393-bb458045ab42> in calculate_gradients(weights, biases, x, y)
19 print('******************************************************')
20
---> 21 z_previous_layer =np.dot(weights[i].T, z_previous_layer)*(sigmoid_derivative(zs[i-1]))
22
23
ValueError: shapes (1,5) and (1,1) not aligned: 5 (dim 1) != 1 (dim 0)

Related

What is causing tensorflow retracing?

Here is my code for factorization
I am trying to run some part of my iterate function with TPUs and I am getting retracing error ? If I am making an obvious mistake I am sorry. update_xu and update_yi is mostly matrix multiplication and matrix inversion. Hence I am try to run them in strategy.scope() ?. Eventually some kind of memory leakage or something
def loss(R, X, Y, C, lmda_x, lmda_y):
'''
returns the MSE of the weighted least squares plus L2 regularisation error
'''
Error = R - tf.matmul(X, Y, transpose_a = True)
Error = Error * Error
Error = C * Error
Reg = 0 + tf.math.reduce_sum(X * X * lmda_x) + tf.math.reduce_sum(Y * Y * lmda_y)
return Reg + tf.math.reduce_sum(Error) * 1 / (Error.shape[0] * Error.shape[1])
def update_xu(Ru, Y, Cuser, lmda):
'''
input
column vector Ru,
k x m matrix Y,
m x m matrix Cuser and
lmda_x
output
the updated user row vector (xu) by making Y matrix constant
'''
Ru = tf.reshape(Ru, shape = [Y.shape[1], 1])
C = Cuser # Ru
inverse = tf.linalg.inv(Y # Cuser # tf.transpose(Y) + lmda * tf.eye(Y.shape[0]))
ans = (inverse # Y # C)
return tf.reshape(ans, [Y.shape[0]])
def update_yi(Ri, X, Citem, lmda):
'''
input
column vector Ri,
k x n matrix X,
n x n matrix Citem and
lmda_y
output
the updated user row vector (yi) by making X matrix constant
'''
Ri = tf.reshape(Ri, shape = [X.shape[1], 1])
C = Citem # Ri
inverse = tf.linalg.inv(X # Citem # tf.transpose(X) + lmda * tf.eye(X.shape[0]))
ans = inverse # X # C
return tf.reshape(ans, [X.shape[0]])
def iterate(R, X, Y, C, lmda_x, lmda_y, epochs):
'''
returns approximately updated X and Y such R = X(Y.T) with WALS algorithm
'''
with strategy.scope():
for _ in range(epochs):
Xtt = tf.vectorized_map(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C))
#Xtt = tf.map_fn(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C), dtype = tf.TensorSpec([Y.shape[0]], dtype = tf.float32), parallel_iterations=6)
print(Xtt.shape)
X = tf.transpose(Xtt)
R, C = tf.transpose(R), tf.transpose(C)
Ytt = tf.vectorized_map(lambda x: update_yi(x[0], X, tf.linalg.diag(x[1]), lmda_y), (R, C))
#Ytt = tf.map_fn(lambda x: update_yi(x[0], X, tf.linalg.diag(x[1]), lmda_y), (R, C), dtype = tf.TensorSpec([X.shape[0]], dtype = tf.float32), parallel_iterations=6)
R , C = tf.transpose(R), tf.transpose(C)
print(Ytt.shape)
Y = tf.transpose(Ytt)
return X, Y
def init_weights(R):
chk = tf.where(
tf.math.abs(R) > eps, 1., 0.
)
cnt = tf.constant(
tf.math.reduce_sum(chk, axis = 0), shape = [1, R.shape[1]]
)
cnt = tf.repeat(
cnt, repeats = R.shape[0], axis = 0
)
cnt = tf.reshape(cnt, [R.shape[0], R.shape[1]])
cnt = cnt * chk
cnt = tf.random.normal( R.shape, mean=0.0, stddev=1.0) * cnt
cnt = cnt + tf.random.normal([1], mean = 0.0, stddev = 1.0)
return cnt
def train( R, lmda_x, lmda_y, epochs, embd):
flag = False
loss_train , loss_test, total = 0., 0., 0
loss_train_list, loss_test_list,total_epochs = [], [], []
X, Y = tf.zeros([embd, R.shape[0]]), tf.zeros([embd, R.shape[1]])
C = init_weights(train_data)
for epoch in epochs:
if flag == False:
X, Y = tf.random.normal( [embd, R.shape[0]], mean=0.0, stddev=1.0), tf.random.normal( [embd, R.shape[1]], mean=0.0, stddev=1.0)
X, Y = iterate(R, X, Y, C, lmda_x, lmda_y, epoch)
flag = True
else:
X, Y = iterate(train_data, X, Y, C, lmda_x, lmda_y, epoch)
total += epoch
loss_train = loss(train_data, X, Y, C, lmda_x, lmda_y)
loss_test = loss(test_data, X, Y, C, lmda_x, lmda_y)
loss_train_list.append(loss_train)
loss_test_list.append(loss_test)
total_epochs.append(total)
print(loss_train, loss_test)
plt.plot(total_epochs, loss_train_list, label = "Training", linewidth = 5)
plt.plot(total_epochs, loss_test_list, label = "Test", linewidth = 1)
plt.xticks(fontsize = 10)
plt.title(str(embd)+ ', ' +str(lmda_x) + ',' + str(lmda_y))
plt.yticks(fontsize = 10)
plt.xlim(0, 200)
plt.ylim(0, 1000000000)
plt.xlabel('iterations', fontsize=30);
plt.ylabel('MSE', fontsize=30);
plt.legend(loc='best', fontsize=20);
return X, Y
def grid_search(epochs, embds, lmdas_x, lmdas_y, train_data, test_data):
for lmda_x in lmdas_x:
for lmda_y in lmdas_y:
for embd in embds:
lmda_x , lmda_y , epochs, embd = tf.convert_to_tensor(lmda_x, dtype = tf.float32), tf.convert_to_tensor(lmda_y, dtype =tf.float32), tf.convert_to_tensor(epochs, dtype = tf.int64), tf.convert_to_tensor(embd, dtype =tf.int64)
X, Y = train(train_data, lmda_x, lmda_y, epochs, embd)
Some errors on calling grid search
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7b90> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7b90> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
(943, 1)
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7560> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7560> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
(1682, 1)
(943, 1)
ERROR:tensorflow:==================================
Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x7f4e018c73d0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2778, in while_loop
return result File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2726, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv)) File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/map_fn.py", line 507, in compute
return (i + 1, tas) File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/map_fn.py", line 505, in <listcomp>
ta.write(i, value) for (ta, value) in zip(tas, result_value_batchable) File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/tf_should_use.py", line 249, in wrapped
error_in_function=error_in_function)
==================================
InvalidArgumentError Traceback (most recent call last)
<ipython-input-58-a2922620d8dd> in <module>()
7 epochs = [5, 100] #2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]#, 588, 299, 300, 200]
8 #X, Y, C = train(train_data.shape[0], train_data.shape[1], train_data, 0, 0, 2, 2)
----> 9 grid_search(epochs, embds, lmdas, lmdas, train_data, test_data)
3 frames
<ipython-input-57-8cc0dfdb85de> in grid_search(epochs, embds, lmdas_x, lmdas_y, train_data, test_data)
5 plt.figure(figsize = (10,10))
6 lmda_x , lmda_y , epochs, embd = tf.convert_to_tensor(lmda_x, dtype = tf.float32), tf.convert_to_tensor(lmda_y, dtype =tf.float32), tf.convert_to_tensor(epochs, dtype = tf.int64), tf.convert_to_tensor(embd, dtype =tf.int64)
----> 7 X, Y = train(train_data, lmda_x, lmda_y, epochs, embd)
8
9
<ipython-input-56-d70632663530> in train(R, lmda_x, lmda_y, epochs, embd)
88 flag = True
89 else:
---> 90 X, Y = iterate(train_data, X, Y, C, lmda_x, lmda_y, epoch)
91 total += epoch
92 loss_train = loss(train_data, X, Y, C, lmda_x, lmda_y)
<ipython-input-56-d70632663530> in iterate(R, X, Y, C, lmda_x, lmda_y, epochs)
50 Xtt = tf.vectorized_map(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C))
51 #Xtt = tf.map_fn(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C), dtype = tf.TensorSpec([Y.shape[0]], dtype = tf.float32), parallel_iterations=6)
---> 52 print(Xtt.shape)
53 X = tf.transpose(Xtt)
54 R, C = tf.transpose(R), tf.transpose(C)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in shape(self)
1173 # `_tensor_shape` is declared and defined in the definition of
1174 # `EagerTensor`, in C.
-> 1175 self._tensor_shape = tensor_shape.TensorShape(self._shape_tuple())
1176 except core._NotOkStatusException as e:
1177 six.raise_from(core._status_to_exception(e.code, e.message), None)
InvalidArgumentError: {{function_node __inference_f_5094764}} Input is not invertible.
[[{{node loop_body/MatrixInverse/pfor/MatrixInverse}}]]

Tensorflow error producing result on a single dataset

I am trying my first NN with tensorflow and unable to produce results for a single input sample. I have created a minimal example where I am feeding it multiple y = a * x + b inputs (for varying a, b) and trying to get a result back but it fails. Note that I don't care about accuracy here, I'm doing this as a POC. Some parameters are below:
N is the number of x grid points. Each input row is of length 2*N
(N for x, N for y).
M is the number of training rows I give.
2 is the number of outputs I expect (a and b).
Thus, my training data is x_train of size (m, 2*n) and y_train of size (m, 2). It seems that I build the model OK but I am unable to feed it a single input of size (1, 2*n) and get a result back of size (1, 2) as desired. Instead I get the following error:
Traceback (most recent call last):
File "xdriver.py", line 92, in <module>
main()
File "xdriver.py", line 89, in main
ab2 = model.predict(rys) # This fails
File "/apps/anaconda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 909, in predict
use_multiprocessing=use_multiprocessing)
File "/apps/anaconda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 462, in predict
steps=steps, callbacks=callbacks, **kwargs)
File "/apps/anaconda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 396, in _model_iteration
distribution_strategy=strategy)
File "/apps/anaconda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 594, in _process_inputs
steps=steps)
File "/apps/anaconda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 2472, in _standardize_user_data
exception_prefix='input')
File "/apps/anaconda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py", line 574, in standardize_input_data
str(data_shape))
ValueError: Error when checking input: expected dense_input to have shape (20,) but got array with shape (1,)
Below is the code I am using which is the minimal example I have been able to develop to reproduce this (along with documentation to explain my process). Can anyone assess what I am doing wrong and what to change?
#!/usr/bin/env python3
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
#################
### CONSTANTS ###
#################
ARANGE = (-5.0, 5.0) # Possible values for m in training data
BRANGE = (0.0, 10.0) # Possible values for b in training data
X_MIN = 1.0
X_MAX = 9.0
N = 10 # Number of grid points
M = 2 # Number of {(x,y)} sets to train on
def gen_ab(arange, brange):
""" mrange, brange are tuples of floats """
a = (arange[1] - arange[0])*np.random.rand() + arange[0]
b = (brange[1] - brange[0])*np.random.rand() + brange[0]
return (a, b)
def build_model(x_data, y_data):
""" Build the model using input / output training data
Args:
x_data (np array): Size (m, n*2) grid of input training data.
y_data (np array): Size (m, 2) grid of output training data.
Returns:
model (Sequential model)
"""
model = keras.Sequential()
model.add(layers.Dense(64, activation='relu', input_dim=len(x_data[0])))
model.add(layers.Dense(len(y_data[0])))
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
return model
def gen_data(xs, arange, brange, m):
""" Generate training data for lines of y = m*x + b
Args:
xs (list): Grid points (size N1)
arange (tuple): Range to use for a (a_min, a_max)
brange (tuple): Range to use for b (b_min, b_max)
m (int): Number of y grids to generate
Returns:
x_data (np array): Size (m, n*2) grid of input training data.
y_data (np array): Size (m, 2) grid of output training data.
"""
n = len(xs)
x_data = np.zeros((m, 2*n))
y_data = np.zeros((m, 2))
for ix in range(m):
(a, b) = gen_ab(arange, brange)
ys = a*xs + b*np.ones(xs.size)
x_data[ix, :] = np.concatenate((xs, ys))
y_data[ix, :] = [a, b]
return (x_data, y_data)
def main():
""" Main routin """
# Generate the x axis grid to be used for all training sets
xs = np.linspace(X_MIN, X_MAX, N)
# Generate the training data
# x_train has M rows (M is the number of training samples)
# x_train has 2*N columns (first N columns are x, second N columns are y)
# y_train has M rows, each of which has two columns (a, b) for y = ax + b
(x_train, y_train) = gen_data(xs, ARANGE, BRANGE, M)
model = build_model(x_train, y_train)
model.fit(x_train, y_train, epochs=10, batch_size=32)
model.summary()
####################
### Test example ###
####################
(a, b) = gen_ab(ARANGE, BRANGE)
ys = a*xs + b*np.ones(xs.size)
rys = np.concatenate((xs, ys))
ab1 = model.predict(x_train) # This succeeds
print(a, b)
print(ab1)
ab2 = model.predict(rys) # This fails
if __name__ == "__main__":
main()

The solution to this turned out to be pretty simple. You simply need to pass in the input data as a batch of size one. Changing:
ab2 = model.predict(rys)
to
ab2 = model.predict(np.array([rys]))
made it work!

How to code softmax function using Python on tensorflow graph

I want to program the softmax function from scratch using Python on tensorflow mood.
def sigmoid(p):
return tf.cond(p >= 0, lambda: 1 / (1 + tf.exp(-p)), \
lambda: tf.exp(p) / (1 + tf.exp(p)))
While running this code chunk, I got this traceback:
InvalidArgumentError: Shape must be rank 0 but is rank 2 for 'cond/Switch' (op: 'Switch') with input shapes: [?,256], [?,256].
Here's my reproducible code:
# 1st hidden layer
W1 = tf.get_variable("W1", shape=(784, 256), dtype=tf.float32, initializer = tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", shape=(256), dtype=tf.float32, initializer = tf.zeros_initializer)
# 2nd hidden layer
W2 = tf.get_variable("W2", shape=(256, 10), dtype=tf.float32, initializer = tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", shape=(10), dtype=tf.float32, initializer = tf.zeros_initializer)
def sigmoid(z):
"""Numerically stable sigmoid function."""
return tf.where(z >= 0, 1 / (1 + tf.exp(-z)), tf.exp(z) / (1 + tf.exp(z)))
### Compute predictions
logits = X # W1 + b1
probas = sigmoid(logits)
y_pred = probas # W2 + b2
def softmax(z): ## this approach provides numerical stability
"""Compute softmax values for each sets of scores in z."""
e = tf.exp(z - tf.reduce_max(z))
return e / tf.reduce_sum(e)
### Cross-Entropy loss
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(softmax(y_pred)), reduction_indices=[1]))
lr = 0.01
optimizer = tf.train.AdamOptimizer(lr)
step = optimizer.minimize(cost) #
This gives me a very bad score with BATCH_SIZE = 512 and EPOCHS= 55
Test cost after 10 epochs: 6.3319
Test cost after 30 epochs: 6.2753
Test cost after 50 epochs: nan
OPTIMIZATION IS DONE!
Score = 0.098982

Convergence issues in 2D RBF Neuron implemented as a Keras layer

We implemented a 2D Gaussian radial basis layer (RBF) in Keras and are running into convergence issues with batch sizes larger than 1. The Neuron should implement the following function:
f(x,y)=exp(-a((x-x_0)²+(y-y_0)²)
Here x_0, y_0 and a are fit parameters.
Testcase
Currently we are doing correctness tests and are trying to fit just a single Neuron on the 2D function above. The Neuron should be (and is in case of batch_size 1) able to approximate this function exactly. The optimal loss is 0.
Problem
If we choose a batch size of 1 in this code, the prediction with Keras will converge very often and will be nearly independent of the starting parameters.
If we increase the batch size, the fit might produce a random walk, freeze or not converge at all. In all of these cases (even batch_size 2) convergence is a lot worse than in the batch_size 1 case. If we choose the batch_size as the size of the trainingset (i.e. 1296, our desired batch size), the fit will freeze most of the time mostly independent of learning rate.
Code
We implemented this layer in the following code:
# 2D RBF Layer
# In case anybody wants to use this code afterwards:
# Licenses: Apache, MIT, BSD, LGPLv2 and v3 and Public Domain
# Input: x,y Pairs, shape: (2,)
# Output: exp(a* ((x-x_0)**2 + (y-y_0)**2)), shape: (1,)
# Parameters: x_0, y_0, a - called: mean_x, mean_y and opening in the following code:
# x and y should both lie in [0,1] - only [0,infinity] is enforced currently
class RBFLayer2D(Layer):
def __init__(self, **kwargs):
super(RBFLayer2D, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.mean_x = K.variable(0.35)
self.constraints[self.mean_x] = NonNeg()
self.mean_y = K.variable(0.35)
self.constraints[self.mean_y] = NonNeg()
self.opening = K.variable(2.0)
self.constraints[self.opening] = NonNeg()
self.trainable_weights = [self.mean_x,self.mean_y,self.opening]
super(RBFLayer2D, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
x_m = x[:,0] - self.mean_x
y_m = x[:,1] - self.mean_y
out = x_m*x_m + y_m*y_m
outexp = 50.0*K.exp(-64.8*self.opening*out)
# Output: exp(-a* ((x-x_0)**2 + (y-y_0)**2))
return outexp
def compute_output_shape(self, input_shape):
# If Inputshape is (None, N) Outputshape is (None,N/2)
# In our example we only look at (None, 2), which outputs (None,1)
output_shape = (input_shape[0], input_shape[1]//2)
return output_shape
Reproduction
To reproduce set a batch_size of 1 in the (not-so) minimal example after this section. When you run it, the code will display the target distribution (a circle in the lower left corner), the starting guess for our RBF ANN (a smaller circler in the middle) and then after each iteration the current guess (a circle getting bigger and moving to the lower left corner).
Afterwards set a batch_size of 12 and restart the code and you will not observe convergence anymore.
Minimal Example
from __future__ import print_function
from __future__ import division
import numpy as np
np.random.seed(1234)
import matplotlib.pyplot as plt
from keras.engine import Layer
from keras.optimizers import SGD
from keras.models import Sequential
from keras.constraints import NonNeg
from keras import backend as K
# 2D RBF Layer
# Input: x,y Pairs, shape: (2,)
# Output: exp(a* ((x-x_0)**2 + (y-y_0)**2)), shape: (1,)
# Parameters: x_0, y_0, a - called: mean_x, mean_y and opening in the following code:
# x and y should both lie in [0,1] - only [0,infinity] is enforced currently
class RBFLayer2D(Layer):
def __init__(self, **kwargs):
super(RBFLayer2D, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.mean_x = K.variable(0.35)
self.constraints[self.mean_x] = NonNeg()
self.mean_y = K.variable(0.35)
self.constraints[self.mean_y] = NonNeg()
self.opening = K.variable(2.0)
self.constraints[self.opening] = NonNeg()
self.trainable_weights = [self.mean_x,self.mean_y,self.opening]
super(RBFLayer2D, self).build(input_shape)
def call(self, x):
x_m = x[:,0] - self.mean_x
y_m = x[:,1] - self.mean_y
out = x_m*x_m + y_m*y_m
outexp = 50.0*K.exp(-64.8*self.opening*out)
# Output: exp(-a* ((x-x_0)**2 + (y-y_0)**2))
return outexp
def compute_output_shape(self, input_shape):
# If Inputshape is (None, N) Outputshape is (None,N/2)
# In our example we only look at (None, 2), which outputs (None,1)
output_shape = (input_shape[0], input_shape[1]//2)
return output_shape
# The function we want to train.
# It can be exactly represented using a single Neuron.
def twodenergy(phi, psi):
r0 = np.array([-180, -180])
b = 0.00005
return 50.0 * np.exp(- b * ((phi - r0[0]) ** 2 + (psi - r0[1]) ** 2))
# One of two plotting helper functions to show the results
def make_plot(y,numsteps,numbins,minangle,maxangle,plotnum, batch_size):
evaluation = np.zeros((numsteps, numsteps))
for i in range(0, numbins):
mx = i % numsteps
my = int(i / numsteps)
evaluation[mx,my]=y[i]
plt.imshow(evaluation.T, origin='lower',extent=[minangle, maxangle, minangle, maxangle])
plt.xlabel("x")
plt.ylabel("y")
if plotnum == 0:
plt.title("Startconfiguration")
else:
plt.title("RBF for batch_size %i at frame %03d" % (batch_size, plotnum))
plt.show()
# One of two plotting helper functions to show the target function
def plot_target_function(phi, psi, minangle, maxangle, delta_angle_half, numbins, numsteps ):
eval_matrix_corr = np.zeros((numsteps, numsteps))
for i in range(0, numbins):
mx = i % numsteps
my = int(i / numsteps)
ph = phi[mx] +delta_angle_half
ps = psi[my] +delta_angle_half
eval_matrix_corr[mx,my] = twodenergy(ph,ps)
plt.imshow(eval_matrix_corr.T, origin='lower', extent=[minangle, maxangle, minangle, maxangle])
plt.title("Target Function")
plt.xlabel("phi")
plt.ylabel("psi")
plt.show()
if __name__ == "__main__":
# batch_size == 1: converges very often nearly independent of input parameters
# batch_size == 2: no to slow convergence, but distribution stays in the right place more or less
# batch_size == 3-12: random walk
# batch_size == 1296: no movement in case of low learning_rate, random_walk in case of high learning_rate
# (this is the case where the whole map is evaluated in every step.
# 1296 is our desired testcase, because it evaluates the whole map we want to fit.
batch_size = 1
learning_rate = 1E-5
### Here we generate the target function ###
### f(phi,psi)
### phi is [-180,180]
### psi is [-180,180]
anglestep = 10.0
minangle = -180.0
maxangle = 180.0
numsteps = int((maxangle - minangle)/anglestep)
anglerange = maxangle - minangle
numbins = numsteps*numsteps
delta_angle_half = anglerange /(2.0* numsteps)
phi = np.arange(minangle, maxangle, anglestep)
psi = np.arange(minangle, maxangle, anglestep)
#Target Function Plot, Gaussian in lower left
plot_target_function(phi, psi, minangle, maxangle, delta_angle_half, numbins, numsteps )
# Input Parameter Regularization
# we map -180..180 to 0..1
# we also calculate the training parameters for our x,y pairs:
x_train = np.zeros((numbins, 2))
y_train = np.zeros((numbins, 1))
for x,ph in enumerate(phi):
for y,ps in enumerate(psi):
myphi = (ph + delta_angle_half - minangle)/(anglerange)
mypsi = (ps + delta_angle_half- minangle)/(anglerange)
x_train[x * numsteps + y, 0] = (ph +delta_angle_half - minangle)/(anglerange)
x_train[x * numsteps + y, 1] = (ps + delta_angle_half- minangle)/(anglerange)
y_train[x * numsteps + y] = twodenergy(ph +delta_angle_half,ps +delta_angle_half)
# Prediction with Keras
model = Sequential()
# Single RBF Layer, only one node
model.add(RBFLayer2D(input_shape=(2,)))
sgd = SGD(lr=learning_rate)
model.compile(loss="mean_squared_error", optimizer=sgd)
# We plot the starting configuration.
y = model.predict(x_train, batch_size=batch_size)
make_plot(y, numsteps, numbins, minangle, maxangle, 0, batch_size)
#Plot the first 15 iterations:
for i in range(0,15):
# For demonstration purposes, we fit 1 epoch and plot the output.
model.fit(x_train,y_train, epochs=1, batch_size=batch_size)
y = model.predict(x_train, batch_size=batch_size)
make_plot(y, numsteps, numbins, minangle, maxangle, 1 + i, batch_size)

Fully Connected layer incorrect feed

Dragging some idea for building CNN from here, I want to build a convnet which comprises of two convolution layers, one fully connected layer(FCL) and softmax layer(SL). I couldn't understand defining the convolution operations to perform on FCL and back connected to SL.
In FCL, are the convolution operation performed in 1D where the input is flattened ? The weight for FCL are generated in 2D but how can I do the Conv operations if so ? because the matrix dimension dont match with the reshaped input and weights generated.( comparing VGGNET in detail column at the end). Even If I can do a 1xM and MxN conv operation the sizes of the matrix are missmatching where did I go wrong in FCL ?
Traceback (most recent call last):
File "D:/Lab_Project_Files/TF/Practice Files/basictest22.py", line 108, in <module>
y = conv_net( x )
File "D:/Lab_Project_Files/TF/Practice Files/basictest22.py", line 93, in conv_net
FClayer = tf.nn.relu(tf.add(tf.matmul(reshape,layer3_weights),layer3_biases))
ValueError: Shape must be rank 2 but is rank 1 for 'MatMul' (op: 'MatMul') with input shapes: [15360], [2240,64]
How to define the FCL ?
I'm bit confused whether these operations apply on each and every image of the batch ?
My input parameters are
INPUT_WIDTH = 16 # input image width
INPUT_HEIGHT = 12 # input image height
INPUT_DEPTH = 1 # input image depth = 1 for monochrome
NUM_CLASSES = 8 # output classes
BATCH_SIZE = 5 # grouping batch for training
# input output placeholders
x = tf.placeholder(tf.float32, [BATCH_SIZE, INPUT_WIDTH,INPUT_HEIGHT,INPUT_DEPTH ])
y_ = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES])
my trail code
def outputdetails(W1, H1,F, P, S):
# W1,W2 - width of input and output
# H1,H2 - height of input and output
# F - size of the filter
# P - padding
# S - Stride
P = 0.00
W2 = int((W1 - F + 2*P)/S + 1)
H2 = int((H1 - F + 2*P)/S + 1)
return W2, H2
# CNN trail
def conv_net(x):
# CONV1 layer
FILTER_SIZE = 3 # applying 3x3 filter
STRIDE = 1
num_hidden = 64 # used for FCL as num of outputs
NUM_CHANNELS = INPUT_DEPTH # input channels
DEPTH = 16 # Output channels Apply 16 filters
layer1_weights = tf.Variable(tf.random_normal([FILTER_SIZE,FILTER_SIZE,NUM_CHANNELS,DEPTH],stddev = 0.1))
layer1_biases = tf.Variable(tf.zeros([DEPTH]))
#CONV2 layer
NUM_CHANNELS = 16
DEPTH = 16
layer2_weights = tf.Variable(tf.random_normal([FILTER_SIZE, FILTER_SIZE, NUM_CHANNELS, DEPTH], stddev=0.1))
layer2_biases = tf.Variable(tf.zeros([DEPTH]))
# Fully Connected layer
# W1 - INPUT_WIDTH, H1 - INPUT_HEIGHT, F - FILTER_SIZE, S - STRIDE
finalsize_width,finalsize_height = outputdetails(INPUT_WIDTH,INPUT_HEIGHT,FILTER_SIZE,1,STRIDE)
layer3_weights = tf.Variable(
tf.truncated_normal([finalsize_width * finalsize_height * DEPTH, num_hidden], stddev=0.1))
layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
# softmax layer
Outlayer_weights = tf.Variable(tf.random_normal([num_hidden, NUM_CLASSES], stddev=0.1))
Outlayer_biases = tf.Variable(tf.constant(1.0,shape = [NUM_CLASSES]))
conv1 = tf.nn.relu(tf.add(tf.nn.conv2d(x,layer1_weights,strides = [1,1,1,1],padding='SAME'),layer1_biases))
conv2 = tf.nn.relu(tf.add(tf.nn.conv2d(conv1, layer2_weights, strides=[1, 1, 1, 1], padding='SAME'), layer2_biases))
shape = conv2.get_shape().as_list()
reshape = tf.reshape(conv2,[shape[0]*shape[1]*shape[2]*shape[3]])
FClayer = tf.nn.relu(tf.add(tf.matmul(reshape,layer3_weights),layer3_biases))
out = tf.add(tf.matmul(FClayer, Outlayer_weights), Outlayer_biases)
return out
Files if required
source file
classes
data

Change this
reshape = tf.reshape(conv2,[shape[0]*shape[1]*shape[2]*shape[3]])
to this
reshape = tf.reshape(conv2,[shape[0],shape[1]*shape[2]*shape[3]])
matmul can work with a batch dimension which you are destroying.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Backpropagation from scratch -- having trouble with some matrix multiplications - python-3.x

Related

What is causing tensorflow retracing?

Tensorflow error producing result on a single dataset

How to code softmax function using Python on tensorflow graph

Convergence issues in 2D RBF Neuron implemented as a Keras layer

Fully Connected layer incorrect feed

Categories

Resources