Knowledge Distillation - distillation_loss: 0.0000e+00 - keras

I am implementing the Knowledge Distillation code as in keras documentation https://keras.io/examples/vision/knowledge_distillation/.
However, when the distillation never surpasses 0.
Tried to change the function in student from softmax to
tf.nn.log_softmax(student_predictions / self.temperature, axis=1)
and the result is way higher but still static.
This is the code:
class Distiller(keras.Model):
def __init__(self, student, teacher):
super(Distiller, self).__init__()
self.teacher = teacher
self.student = student
def compile(
self,
optimizer,
metrics,
student_loss_fn,
distillation_loss_fn,
alpha=0.1,
temperature=3,
):
super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
self.student_loss_fn = student_loss_fn
self.distillation_loss_fn = distillation_loss_fn
self.alpha = alpha
self.temperature = temperature
def train_step(self, data):
# Unpack data
x, y = data
# Forward pass of teacher
teacher_predictions = self.teacher(x, training=False)
with tf.GradientTape() as tape:
# Forward pass of student
student_predictions = self.student(x, training=True)
# Compute losses
student_loss = self.student_loss_fn(y, student_predictions)
# Compute scaled distillation loss from https://arxiv.org/abs/1503.02531
# The magnitudes of the gradients produced by the soft targets scale
# as 1/T^2, multiply them by T^2 when using both hard and soft targets.
distillation_loss = (
self.distillation_loss_fn(
tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
tf.nn.log_softmax(student_predictions / self.temperature, axis=1),
)
* self.temperature**2
)
loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss
# Compute gradients
trainable_vars = self.student.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update the metrics configured in `compile()`.
self.compiled_metrics.update_state(y, student_predictions)
# Return a dict of performance
results = {m.name: m.result() for m in self.metrics}
results.update(
{"student_loss": student_loss, "distillation_loss": distillation_loss}
)
return results
def test_step(self, data):
# Unpack the data
x, y = data
# Compute predictions
y_prediction = self.student(x, training=False)
# Calculate the loss
student_loss = self.student_loss_fn(y, y_prediction)
# Update the metrics.
self.compiled_metrics.update_state(y, y_prediction)
# Return a dict of performance
results = {m.name: m.result() for m in self.metrics}
results.update({"student_loss": student_loss})
return results
...define both teacher and student models
# Initialize and compile distiller
distiller = Distiller(student=student, teacher=teacher)
distiller.compile(
optimizer=keras.optimizers.Adam(),
metrics=[keras.metrics.BinaryAccuracy()],
student_loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),
distillation_loss_fn=keras.losses.KLDivergence(),
alpha=0.1,
temperature=10,
)
# Distill teacher to student
distiller.fit(train_ds, validation_data = val_ds, epochs=1)

Related

Why loss is not decreasing in a Siamese BERT-Network training (Entity matching task)

I'm trying to finetune a model for an entity matching task (kind of a sentence similarity task).
The idea is that if I give as input two sentences the model should output if they represent the same entity or not. I'm interested in the products' domain.
So for example:
sentences_left = ('logitech harmony 890 advanced universal remote control h890', 'sony silver digital voice recorder icdb600')
sentences_right = ('logitech harmony 890 advanced universal remote hdtv , tv , dvd player ( s ) , lighting , audio system 100 ft universal remote 966193-0403', 'canon black ef 70-300mm f/4 -5.6 is usm telephoto zoom lens 0345b002')
The output should be 1 for the first left-right pair of sentences and 0 for the second.
I want to test two approaches. The first is a sequence classification setup. So I take a pair of sentences, concat them with a [SEP] token in-between, encode it and feed it to BERT.
This approach kind of work, but I wanted to explore a second one that, in theory, should work too.
In few words, using mpnet as pre-trained language model I'm trying to implement this setup:
This is taken from the paper Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. The idea is to compute not only a single embedding as before, but two separate embeddings for each of the sentences. Then concatenate the embeddings and feeds them to a softmax classifier.
After lots of struggles I'm still unable to make it work, since the loss has no intention of decreasing. It starts at 0.25 and never goes up neither down.
I'm using the Abt-Buy, Amazon-Google and Walmart-Amazon datasets.
This is my model:
class FinalClassifier(nn.Module):
def __init__(self, pos_neg=None, frozen=False):
super(FinalClassifier, self).__init__()
use_cuda = torch.cuda.is_available()
self.device = torch.device("cuda" if use_cuda else "cpu")
self.encoder = AutoModel.from_pretrained(
'all-mpnet-base-v2')
if frozen:
for param in self.encoder.parameters():
param.requires_grad = False
self.tokenizer = AutoTokenizer.from_pretrained(
'all-mpnet-base-v2')
if pos_neg:
self.criterion = BCEWithLogitsLoss(pos_weight=torch.Tensor([pos_neg]))
self.linear = nn.Linear(3*768, 1)
self.relu = nn.ReLu()
def forward(self, texts_left, texts_right, labels=None):
encoded_inputs_left = self.tokenizer(texts_left, padding='max_length',
truncation=True, return_tensors='pt')
encoded_inputs_left = encoded_inputs_left.to(self.device)
output_left = self.encoder(**encoded_inputs_left)
output_left = _mean_pooling(output_left, encoded_inputs_left['attention_mask'])
# output_left = F.normalize(output_left, p=2, dim=1)
encoded_inputs_right = self.tokenizer(texts_right, padding='max_length',
truncation=True, return_tensors='pt')
encoded_inputs_right = encoded_inputs_right.to(self.device)
output_right = self.encoder(**encoded_inputs_right)
output_right = _mean_pooling(output_right, encoded_inputs_right['attention_mask'])
# output_right = F.normalize(output_right, p=2, dim=1)
# Look at sBERT paper (u, v, |u-v|)
pooled_output = torch.cat((output_left, output_right, torch.abs(output_left - output_right)), -1)
linear_output = self.linear(pooled_output)
relu_output = self.relu(linear_output)
labels = labels.to(self.device)
loss = self.criterion(linear_output.view(-1), labels.float())
return (loss, relu_output)
Here's the Dataset
class FinalDataset(torch.utils.data.Dataset):
def __init__(self, df):
self.labels = [int(label) for label in df['label']]
self.examples = df
def classes(self):
return self.labels
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
examples = self.examples.iloc[idx]
text_left = examples['text_left']
text_right = examples['text_right']
label = np.array(self.labels[idx])
return text_left, text_right, label
and finally the training loop
def train(model, train, val, learning_rate=1e-6, epochs=5, batch_size=8):
train_dataloader = torch.utils.data.DataLoader(train, batch_size=8, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val, batch_size=8)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
optimizer = Adam(model.parameters(), lr= learning_rate)
if use_cuda:
model = model.cuda()
for epoch_num in range(epochs):
total_loss_train = 0
tmp_loss = 0
step = 0
model.train()
for i, data in enumerate(tqdm(train_dataloader)):
left_batch, right_batch, labels = data
(batch_loss, _) = model(left_batch, right_batch, labels)
total_loss_train += batch_loss
tmp_loss += batch_loss
model.zero_grad()
batch_loss.backward()
optimizer.step()
# every 100 mini-batches
if i % 100 == 99:
print(f' Loss/train at epoch {epoch_num+1} (batch {i}): {tmp_loss/500}')
writer.add_scalar('Loss/train',
tmp_loss / 100,
epoch_num * len(train_dataloader) + i)
tmp_loss = 0
total_loss_val = 0
predictions = None
total_labels = None
step = 0
model.eval()
with torch.no_grad():
for i, data in enumerate(val_dataloader):
left_batch, right_batch, labels = data
(batch_loss, linear_output) = model(left_batch, right_batch, labels)
labels = labels.detach().cpu().numpy()
linear_output = linear_output.detach().cpu().numpy()
if predictions is None:
predictions = np.where(linear_output>0.5, 1, 0)
total_labels = labels
else:
predictions = np.append(predictions, np.where(linear_output>0.5, 1, 0), axis=0)
total_labels = np.append(total_labels, labels, axis=0)
total_loss_val += batch_loss.item()
tmp_loss += batch_loss.item()
# every 100 mini-batches
if i % 100 == 99:
print(f' Loss/val at epoch {epoch_num+1} (batch {i}): {tmp_loss/500}')
writer.add_scalar('Loss/val',
tmp_loss / 100,
epoch_num * len(val_dataloader) + i)
writer.add_scalar('F1/val',
f1_score(y_true=total_labels.flatten()[step:i], y_pred=predictions.flatten()[step:i]),
epoch_num * len(val_dataloader) + i)
tmp_loss = 0
step += 100
f1 = f1_score(y_true=total_labels.flatten(), y_pred=predictions.flatten())
report = classification_report(total_labels, predictions, zero_division=0)
# plot all the pr curves
for i in range(len([0, 1])):
add_pr_curve_tensorboard(i, predictions.flatten(), total_labels.flatten())
for name, p in model.named_parameters():
writer.add_histogram(name, p, bins='auto')
print(
f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train): .3f} \
| Val Loss: {total_loss_val / len(val): .3f} \
| Val F1: {f1: .3f}')
tqdm.write(report)
writer = SummaryWriter(log_dir=tensorboard_path)
EPOCHS = 5
LR = 1e-6
train_pos_neg_ratio = 9
model = FinalClassifier(train_pos_neg_ratio, frozen=False)
train_data, val_data = FinalDataset(df_train), FinalDataset(df_dev)
train(model, train_data, val_data, LR, EPOCHS)
writer.flush()
writer.close()
The issue is that the loss does NOT decrease, and the F1 accuracy as a result. I tried to normalize the outputs, add a dropout layer, analized the dataset to be sure that the problem wasn't there but now I ran out of ideas. An help would be extremely valuable.

Nested optimization in pytorch

I wrote a short snippet to train a classification model, and learn the learning rate of its optimization algorithm. In my example I tried to update weights of a network in an inner optimization loop and to learn the learning rate of the weight updates using an outer optimization loop (meta-optimization). I'm getting the error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [3, 10]], which is output 0 of AsStridedBackward0, is at version 12; expected version 2 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
My code snippet is as following (NOTE: I'm using _stateless, an experimental functional API for nn. You need to run with the nightly build of pytorch.)
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils import _stateless
class MyDataset(Dataset):
def __init__(self, N):
self.N = N
self.x = torch.rand(self.N, 10)
self.y = torch.randint(0, 3, (self.N,))
def __len__(self):
return self.N
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.fc1 = nn.Linear(10, 10)
self.fc2 = nn.Linear(10, 3)
self.relu = nn.ReLU()
self.alpha = nn.Parameter(torch.randn(1))
self.beta = nn.Parameter(torch.randn(1))
def forward(self, x):
y = self.relu(self.fc1(x))
return self.fc2(y)
epochs = 20
N = 100
dataset = DataLoader(dataset=MyDataset(N), batch_size=10)
model = MyModel()
loss_func = nn.CrossEntropyLoss()
optim = optim.Adam([model.alpha], lr=1e-3)
params = dict(model.named_parameters())
for i in range(epochs):
model.train()
train_loss = 0
for batch_idx, (x, y) in enumerate(dataset):
logits = _stateless.functional_call(model, params, x) # predict
loss_inner = loss_func(logits, y) # loss
optim.zero_grad() # reset grad
loss_inner.backward(create_graph=True, inputs=params.values()) # compute grad
train_loss += loss_inner.item() # store loss
for k, p in params.items():
if k is not 'alpha' and k is not 'beta':
p.update = - model.alpha * p.grad
params[k] = p + p.update # update weight
print('Train Epoch: {}\tLoss: {:.6f}'.format(i, train_loss / N))
logits = _stateless.functional_call(model, params, x) # predict
loss_meta = loss_func(logits, y)
loss_meta.backward()
loss_meta.step()
From the error message, I understand that the issue comes from weight update for the weights of the second layer of the network, which points to an error in my inner loop optimization. Any suggestions would be appreciated.
Check this link and save PARAMs per each epoch and use same inner batch:
https://discuss.pytorch.org/t/issue-using-parameters-internal-method/134549/11
for i in range(epochs):
model.train()
train_loss = 0
params = dict(model.named_parameters()) # add this
for batch_idx, (x, y) in enumerate(dataset):
params = {k: v.clone() for k,v in params.items()} # add this
logits = _stateless.functional_call(model, params, x) # predict
loss_inner = loss_func(logits, y)
..................
You should be updating params[k].data instead of params[k]
(Deleted the example to avoid distraction)
Let me enter in a kind of fundamental discussion (not an answer to your question).
If I undertand correctly you want to compute loss(f(w[i], x)) , and computing the w[i+1,j] = w[i,j] + g(v[j], w[i,j].grad(w.r.t loss)) . Then in the end you want to compute v[j+1] = v[j] + v[j].grad(w.r.t loss).
The gradient of v[j] is computed using the backward propagation, as a function of grad w[i,j]. So what you are trying to do is to choose v[j] that results in a good w[i,j]. I would ask: why would you bother about v[j] if you can control w[i,j] directly? And that's what the standard approach.

How to make custom validation_step in tensorflow 2 Tensorflow 2 / Keras?

I have a question regarding the validation Data.
I have this neural network and I divided my data into train_generator, val_generator, test_generator.
I made a custom model with a custom fit.
class MyModel(tf.keras.Model):
def __init__(self):
def __call__(.....)
def train_step(....)
then I have:
train_generator = DataGenerator(....)
val_generator = DataGenerator(....)
test_generator = DataGenerator(....)
then :
model = MyModel()
model.compile(optimizer=keras.optimizers.Adam(clipnorm=5.),
metrics=["accuracy"])
model.fit(train_generator, validation_data = val_generator, epochs=40)
ok and the program gives me no errors
But my question is : how can I know what happens with my validation_data ?
Is it processed the same way as the train_data ( train_generator ) in the train_step function ?
Or do I need to specify how to process the validation data ?
If it helps I will also live MyModel class
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel2, self).__init__()
self.dec2 = Decoder2()
def __call__(self, y_hat, **kwargs):
print(y_hat.shape)
z_hat = self.dec2(y_hat)
return z_hat
def train_step(self, dataset):
with tf.GradientTape() as tape:
y_hat = dataset[0]
z_true = dataset[1]
z_pred = self(y_hat, training=True)
#print("This is z_true : ", z_true.shape)
#print("This is z_pred : ", z_pred.shape)
loss = tf.reduce_mean(tf.abs(tf.cast(z_pred, tf.float64) - tf.cast(z_true, tf.float64)))
print("loss: ", loss)
global_loss.append(loss)
# Compute gradients. TRE SA FAC GRADIENT CLIPPING
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(z_true, z_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
You have to add a test_step(self, data) function to your MyModel class as you can see it here: Providing your own evaluation step

Train simple RNN from a map of T/F values and bitstring keys in pytorch

I want to train rnn netwwork that will be able to identify the folowing:
I have a Language L that made of bitstrings (can be in lengh n>20) which every bitstring that in the language satisfy the pattern of the language (which is not rellevant for now).
I created a dataset which is like a map with bitstrings as keys and True/False as values:
001101000010010, False
111001001000000, False
111011101001100, False
011111000101101, False
10000110000110001100, True
011100100001010, False
....
I tried to create rnn network in pytorch:
class myDataset(T.utils.data.Dataset):
def __init__(self, src_file, m_rows=None):
tmp_x = np.loadtxt(src_file, max_rows=m_rows,
usecols=[0], delimiter=",", skiprows=0, dtype=np.int64)
tmp_y = np.genfromtxt(src_file, max_rows=m_rows,
usecols=[1], delimiter=",", dtype=bool)
tmp_y = tmp_y.reshape(-1, 1) # 2-D required
self.x_data = T.from_numpy(tmp_x).to(device)
self.y_data = T.from_numpy(tmp_y).to(device)
def __len__(self):
return len(self.x_data)
def __getitem__(self, idx):
preds = self.x_data[idx, :] # or just [idx]
val = self.y_data[idx, :]
return (preds, val) # tuple of two matrices
and train it:
net.train() # set mode
for epoch in range(0, max_epochs):
T.manual_seed(1 + epoch) # recovery reproducibility
epoch_loss = 0 # for one full epoch
for (batch_idx, batch) in enumerate(train_ldr):
(X, Y) = batch # (predictors, targets)
optimizer.zero_grad() # prepare gradients
oupt = net(X) # predicted prices
loss_val = loss_func(oupt, Y) # avg per item in batch
epoch_loss += loss_val.item() # accumulate avgs
loss_val.backward() # compute gradients
optimizer.step() # update wts
but I get error when loading the data:
OverflowError: Python int too large to convert to C long

Convergence issues in 2D RBF Neuron implemented as a Keras layer

We implemented a 2D Gaussian radial basis layer (RBF) in Keras and are running into convergence issues with batch sizes larger than 1. The Neuron should implement the following function:
f(x,y)=exp(-a((x-x_0)²+(y-y_0)²)
Here x_0, y_0 and a are fit parameters.
Testcase
Currently we are doing correctness tests and are trying to fit just a single Neuron on the 2D function above. The Neuron should be (and is in case of batch_size 1) able to approximate this function exactly. The optimal loss is 0.
Problem
If we choose a batch size of 1 in this code, the prediction with Keras will converge very often and will be nearly independent of the starting parameters.
If we increase the batch size, the fit might produce a random walk, freeze or not converge at all. In all of these cases (even batch_size 2) convergence is a lot worse than in the batch_size 1 case. If we choose the batch_size as the size of the trainingset (i.e. 1296, our desired batch size), the fit will freeze most of the time mostly independent of learning rate.
Code
We implemented this layer in the following code:
# 2D RBF Layer
# In case anybody wants to use this code afterwards:
# Licenses: Apache, MIT, BSD, LGPLv2 and v3 and Public Domain
# Input: x,y Pairs, shape: (2,)
# Output: exp(a* ((x-x_0)**2 + (y-y_0)**2)), shape: (1,)
# Parameters: x_0, y_0, a - called: mean_x, mean_y and opening in the following code:
# x and y should both lie in [0,1] - only [0,infinity] is enforced currently
class RBFLayer2D(Layer):
def __init__(self, **kwargs):
super(RBFLayer2D, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.mean_x = K.variable(0.35)
self.constraints[self.mean_x] = NonNeg()
self.mean_y = K.variable(0.35)
self.constraints[self.mean_y] = NonNeg()
self.opening = K.variable(2.0)
self.constraints[self.opening] = NonNeg()
self.trainable_weights = [self.mean_x,self.mean_y,self.opening]
super(RBFLayer2D, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
x_m = x[:,0] - self.mean_x
y_m = x[:,1] - self.mean_y
out = x_m*x_m + y_m*y_m
outexp = 50.0*K.exp(-64.8*self.opening*out)
# Output: exp(-a* ((x-x_0)**2 + (y-y_0)**2))
return outexp
def compute_output_shape(self, input_shape):
# If Inputshape is (None, N) Outputshape is (None,N/2)
# In our example we only look at (None, 2), which outputs (None,1)
output_shape = (input_shape[0], input_shape[1]//2)
return output_shape
Reproduction
To reproduce set a batch_size of 1 in the (not-so) minimal example after this section. When you run it, the code will display the target distribution (a circle in the lower left corner), the starting guess for our RBF ANN (a smaller circler in the middle) and then after each iteration the current guess (a circle getting bigger and moving to the lower left corner).
Afterwards set a batch_size of 12 and restart the code and you will not observe convergence anymore.
Minimal Example
from __future__ import print_function
from __future__ import division
import numpy as np
np.random.seed(1234)
import matplotlib.pyplot as plt
from keras.engine import Layer
from keras.optimizers import SGD
from keras.models import Sequential
from keras.constraints import NonNeg
from keras import backend as K
# 2D RBF Layer
# Input: x,y Pairs, shape: (2,)
# Output: exp(a* ((x-x_0)**2 + (y-y_0)**2)), shape: (1,)
# Parameters: x_0, y_0, a - called: mean_x, mean_y and opening in the following code:
# x and y should both lie in [0,1] - only [0,infinity] is enforced currently
class RBFLayer2D(Layer):
def __init__(self, **kwargs):
super(RBFLayer2D, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.mean_x = K.variable(0.35)
self.constraints[self.mean_x] = NonNeg()
self.mean_y = K.variable(0.35)
self.constraints[self.mean_y] = NonNeg()
self.opening = K.variable(2.0)
self.constraints[self.opening] = NonNeg()
self.trainable_weights = [self.mean_x,self.mean_y,self.opening]
super(RBFLayer2D, self).build(input_shape)
def call(self, x):
x_m = x[:,0] - self.mean_x
y_m = x[:,1] - self.mean_y
out = x_m*x_m + y_m*y_m
outexp = 50.0*K.exp(-64.8*self.opening*out)
# Output: exp(-a* ((x-x_0)**2 + (y-y_0)**2))
return outexp
def compute_output_shape(self, input_shape):
# If Inputshape is (None, N) Outputshape is (None,N/2)
# In our example we only look at (None, 2), which outputs (None,1)
output_shape = (input_shape[0], input_shape[1]//2)
return output_shape
# The function we want to train.
# It can be exactly represented using a single Neuron.
def twodenergy(phi, psi):
r0 = np.array([-180, -180])
b = 0.00005
return 50.0 * np.exp(- b * ((phi - r0[0]) ** 2 + (psi - r0[1]) ** 2))
# One of two plotting helper functions to show the results
def make_plot(y,numsteps,numbins,minangle,maxangle,plotnum, batch_size):
evaluation = np.zeros((numsteps, numsteps))
for i in range(0, numbins):
mx = i % numsteps
my = int(i / numsteps)
evaluation[mx,my]=y[i]
plt.imshow(evaluation.T, origin='lower',extent=[minangle, maxangle, minangle, maxangle])
plt.xlabel("x")
plt.ylabel("y")
if plotnum == 0:
plt.title("Startconfiguration")
else:
plt.title("RBF for batch_size %i at frame %03d" % (batch_size, plotnum))
plt.show()
# One of two plotting helper functions to show the target function
def plot_target_function(phi, psi, minangle, maxangle, delta_angle_half, numbins, numsteps ):
eval_matrix_corr = np.zeros((numsteps, numsteps))
for i in range(0, numbins):
mx = i % numsteps
my = int(i / numsteps)
ph = phi[mx] +delta_angle_half
ps = psi[my] +delta_angle_half
eval_matrix_corr[mx,my] = twodenergy(ph,ps)
plt.imshow(eval_matrix_corr.T, origin='lower', extent=[minangle, maxangle, minangle, maxangle])
plt.title("Target Function")
plt.xlabel("phi")
plt.ylabel("psi")
plt.show()
if __name__ == "__main__":
# batch_size == 1: converges very often nearly independent of input parameters
# batch_size == 2: no to slow convergence, but distribution stays in the right place more or less
# batch_size == 3-12: random walk
# batch_size == 1296: no movement in case of low learning_rate, random_walk in case of high learning_rate
# (this is the case where the whole map is evaluated in every step.
# 1296 is our desired testcase, because it evaluates the whole map we want to fit.
batch_size = 1
learning_rate = 1E-5
### Here we generate the target function ###
### f(phi,psi)
### phi is [-180,180]
### psi is [-180,180]
anglestep = 10.0
minangle = -180.0
maxangle = 180.0
numsteps = int((maxangle - minangle)/anglestep)
anglerange = maxangle - minangle
numbins = numsteps*numsteps
delta_angle_half = anglerange /(2.0* numsteps)
phi = np.arange(minangle, maxangle, anglestep)
psi = np.arange(minangle, maxangle, anglestep)
#Target Function Plot, Gaussian in lower left
plot_target_function(phi, psi, minangle, maxangle, delta_angle_half, numbins, numsteps )
# Input Parameter Regularization
# we map -180..180 to 0..1
# we also calculate the training parameters for our x,y pairs:
x_train = np.zeros((numbins, 2))
y_train = np.zeros((numbins, 1))
for x,ph in enumerate(phi):
for y,ps in enumerate(psi):
myphi = (ph + delta_angle_half - minangle)/(anglerange)
mypsi = (ps + delta_angle_half- minangle)/(anglerange)
x_train[x * numsteps + y, 0] = (ph +delta_angle_half - minangle)/(anglerange)
x_train[x * numsteps + y, 1] = (ps + delta_angle_half- minangle)/(anglerange)
y_train[x * numsteps + y] = twodenergy(ph +delta_angle_half,ps +delta_angle_half)
# Prediction with Keras
model = Sequential()
# Single RBF Layer, only one node
model.add(RBFLayer2D(input_shape=(2,)))
sgd = SGD(lr=learning_rate)
model.compile(loss="mean_squared_error", optimizer=sgd)
# We plot the starting configuration.
y = model.predict(x_train, batch_size=batch_size)
make_plot(y, numsteps, numbins, minangle, maxangle, 0, batch_size)
#Plot the first 15 iterations:
for i in range(0,15):
# For demonstration purposes, we fit 1 epoch and plot the output.
model.fit(x_train,y_train, epochs=1, batch_size=batch_size)
y = model.predict(x_train, batch_size=batch_size)
make_plot(y, numsteps, numbins, minangle, maxangle, 1 + i, batch_size)

Resources