Change custom loss parameter and NN parameter with respect to epoch - keras

I have a Keras model defined in the following manner (Tried to keep only the necessary parts):
temperature = 5.0
def knowledge_distillation_loss(y_true, y_pred, lambda_const):
y_true, logits = y_true[:, :10], y_true[:, 10:]
y_soft = K.softmax(logits/temperature)
y_pred, y_pred_soft = y_pred[:, :10], y_pred[:, 10:]
return lambda_const*logloss(y_true, y_pred) + logloss(y_soft, y_pred_soft)
def get_model(num_labels):
#Some layers for model
logits = model.layers[-1].output
probabilities = Activation('softmax')(logits)
# softed probabilities
logits_T = Lambda(lambda x: x/temperature)(logits)
probabilities_T = Activation('softmax')(logits_T)
output = concatenate([probabilities, probabilities_T])
model = Model(model.input, output)
lambda_const = 0.07
optimizer=optimizers.SGD(lr=1e-1, momentum=0.9, nesterov=True),
loss=lambda y_true, y_pred: knowledge_distillation_loss(y_true, y_pred, lambda_const),
return model
I am following this reference.
This is implemented using fit generator() on Keras with tf backend. Obviously, I will have trouble when loading the model since temperature is hared coded.
I wish to update temperature parameter with respect to the epoch number in both loss function and model.
How do I define such a control signal?

I've turned this into a complete example of one way to do this.
You could make a class for the loss function.
class TemperatureLossFunction:
def __init__(self, temperature):
self.temperature = temperature
def loss_fun(self, y_truth, y_pred):
return self.temperature*keras.losses.mse(y_truth, y_pred)
def setTemperature(self, t, session=None):
if session: t )
elif tensorflow.get_default_session():
tensorflow.get_default_session().run(self.temperature.assign( t ))
class TemperatureLossCallback(keras.callbacks.Callback):
def __init__(self, temp_lf):
self.temp_lf = temp_lf
def on_epoch_end(self, epoch, params):
I've created two methods for working with this, the first method creates and saves the model.
def init(session):
global temperature #global for serialization issues
temperature = tensorflow.Variable(5.0)
tlo = TemperatureLossFunction(temperature)
inp = keras.layers.Input((4,4))
l1 = keras.layers.Lambda( lambda x: temperature*x )
op = l1(inp)
m = keras.models.Model(inputs=[inp], outputs=[op])
m.compile( optimizer = keras.optimizers.SGD(0.01), loss=tlo.loss_fun)
#make sure the session is the one your using!
The first test I run makes sure we are changing the value.
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
The second test I run makes sure we can change the values with a callback.
cb = TemperatureLossCallback(tlo)
def gen():
for i in range(10):
yield numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4))
gen(), steps_per_epoch=1, epochs=10, callbacks=[cb]
Finally, to demonstrate reloading the file.
def restart(session):
global temperature
temperature = tensorflow.Variable(5.0)
tlo = TemperatureLossFunction(temperature)
loss_fun = tlo.loss_fun
m = keras.models.load_model(
custom_objects = {"loss_fun":tlo.loss_fun}
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
m.evaluate( numpy.ones( (1, 4, 4) ), numpy.zeros( ( 1, 4, 4) ) )
This is just the code I use to start the program for completeness
import sys
if __name__=="__main__":
sess = tensorflow.Session()
with sess.as_default():
if "restart" in sys.argv:
One downside of this method, if you run this you will see that the temperature variable does not get loaded from the model file. It takes on the value assigned in the code.
On the plus side, both the loss function and the layer are referencing the same Variable
One way I found to save the variable value is to create a new layer and use the variable as the weight for the new layer.
class VLayer(keras.layers.Layer):
def __init__(self, *args, **kwargs):
def build(self, input_shape):
self.v1 = self.add_weight(
shape = (),
def call(self, x):
return x*self.v1
def setValue(self, val):
self.set_weights( numpy.array([val]) )
Now when you load the model, the weight will be loaded. Unfortunately, I could not find a way to link the weight to a Variable on load. So there will be two variables, one for the loss function and one for the layer. Both of them can be set from a callback though. So I feel this method is on a more robust path.


How to drop running stats to default value for Norm layer in pyTorch?

I trained model on some images. Now to fit similar dataset but with another colors I want to load this model but also i want to drop all running stats from Batchnorm layers (set them to default value, like totally untrained). What parameters should i reset? Simple model looks like this
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv0 = nn.Conv2d(3, 3, 3, padding = 1)
self.norm = nn.BatchNorm2d(3)
self.conv = nn.Conv2d(3, 3, 3, padding = 1)
def forward(self, x):
x = self.conv0(x)
x = self.norm(x)
return self.conv(x)
net = Net()
##or for pretrained it will be
##net = torch.load('net.pth')
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
Simplest way to do that is to run reset_running_stats() method on BatchNorm objects:
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
Below is this method's source code:
def reset_running_stats(self) -> None:
if self.track_running_stats:
# running_mean/running_var/num_batches... are registered at runtime depending
# if self.track_running_stats is on
self.running_mean.zero_() # Zero (neutral) mean
self.running_var.fill_(1) # One (neutral) variance
self.num_batches_tracked.zero_() # Number of batches tracked
You can see the source code here, _NormBase class.

How to make custom validation_step in tensorflow 2 Tensorflow 2 / Keras?

I have a question regarding the validation Data.
I have this neural network and I divided my data into train_generator, val_generator, test_generator.
I made a custom model with a custom fit.
class MyModel(tf.keras.Model):
def __init__(self):
def __call__(.....)
def train_step(....)
then I have:
train_generator = DataGenerator(....)
val_generator = DataGenerator(....)
test_generator = DataGenerator(....)
then :
model = MyModel()
metrics=["accuracy"]), validation_data = val_generator, epochs=40)
ok and the program gives me no errors
But my question is : how can I know what happens with my validation_data ?
Is it processed the same way as the train_data ( train_generator ) in the train_step function ?
Or do I need to specify how to process the validation data ?
If it helps I will also live MyModel class
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel2, self).__init__()
self.dec2 = Decoder2()
def __call__(self, y_hat, **kwargs):
z_hat = self.dec2(y_hat)
return z_hat
def train_step(self, dataset):
with tf.GradientTape() as tape:
y_hat = dataset[0]
z_true = dataset[1]
z_pred = self(y_hat, training=True)
#print("This is z_true : ", z_true.shape)
#print("This is z_pred : ", z_pred.shape)
loss = tf.reduce_mean(tf.abs(tf.cast(z_pred, tf.float64) - tf.cast(z_true, tf.float64)))
print("loss: ", loss)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(z_true, z_pred)
# Return a dict mapping metric names to current value
return { m.result() for m in self.metrics}
You have to add a test_step(self, data) function to your MyModel class as you can see it here: Providing your own evaluation step

Pytorch. Optimizing the input to a model: Trying to backward through the graph a second time, but the buffers have already been freed

Currently, I'm trying to optimize the values of an input tensor, x, to a model.
I want to restrict the input to only contain values in the range [0.0;1.0].
There is not too much information about how to do this, when not working with a layer as such.
I've created a minimum working example below, which gives the error message in the title of this post.
The magic happens in the optimize_x() function
If I comment out the line: model.x = model.x.clamp(min=0.0, max=1.0) the issue is fixed, but the tensor is obviously not clamped.
I'm aware that I could just set retain_graph=True - but it's not clear whether this is the right way to go, or if there is a better way of achieving this functionality?
import torch
from torch.distributions import Uniform
class OptimizeInputModel(torch.nn.Module):
def __init__(self):
self.model = torch.nn.Sequential(
torch.nn.Linear(123, 1000),
torch.nn.Linear(1000, 100),
torch.nn.Linear(100, 1),
in_shape = (1, 123)
self.x = torch.ones(in_shape) * 0.1
self.x.requires_grad = True
def forward(self) -> torch.Tensor:
return self.model(self.x)
class MyLossFunc(torch.nn.Module):
def forward(self, y: torch.Tensor) -> torch.Tensor:
loss = torch.sum(-y)
return loss
def optimize_x():
model = OptimizeInputModel()
optimizer = torch.optim.Adam([model.x], lr=1e-4)
loss_fn = MyLossFunc()
for epoch in range(50000):
# Constrain X to have no values < 0
model.x = model.x.clamp(min=0.0, max=1.0)
y = model()
loss = loss_fn(y)
if epoch % 9 == 0:
print(f'Epoch: {epoch}\t Loss: {loss}')
Full error message:
RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.
For anyone in the future who might have the same question.
My solution was to do (note the underscore!):, max=1.0)
instead of:
model.x = model.x.clamp(min=0.0, max=1.0)

Keras Custom Layer Error (Operation IsVariableInitialized has been marked as not fetchable)

I'm trying to create a custom Keras layer on a toy dataset, and am having issues. At a high level, I want to create an "Input Gate" layer, which would have trainable weights to turn each column of input on or off. So I'm starting with just trying to multiply the inputs by a sigmoid'd version of the learned weights. My code is as follows:
### This is my custom layer
class InputGate(Layer):
def __init__(self, **kwargs):
super(InputGate, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='input_gate',
super(InputGate, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs):
gate_amount = K.sigmoid(self.kernel)
return inputs * gate_amount
def get_config(self):
config = {}
base_config = super(InputGate, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
def create_linear_model(x, y, num_noise_vars = 0, reg_strength=0):
new_x = get_x_with_noise(x, num_noise_vars=num_noise_vars)
model = Sequential([
Dense(1, kernel_regularizer=l2(reg_strength))
model.compile(optimizer="rmsprop", loss="mse") = 0.001
return {"model": model, "new_x": new_x}
def get_x_with_noise(x, num_noise_vars):
noise_vars = []
for noise_var in range(num_noise_vars):
x_with_noise = noise_vars
new_x = np.array(list(zip(*x_with_noise)))
return new_x
x = np.random.random(500)
y = (x * 3) + 10
num_noise_vars = 5
info = create_linear_model(x, y, num_noise_vars=num_noise_vars)
model = info["model"]
new_x = info["new_x"]
results =, y, epochs=num_epochs, verbose=0)
And then I get the following error:
ValueError: Operation 'input_gate_14/IsVariableInitialized' has been marked as not fetchable.
This layer is mostly taken from the docs( I'm using Keras 2.0.9, with Tensorflow backend on a CPU (Macbook Air).
This layer seems as simple as can be, and googling the error leads me to discussions that don't seem relevant. Anyone have ideas of what's causing this?
Any help is much appreciated! Thanks!

Persistent Variable in keras Custom Layer

I want write a custom layer, where I can keep a variable in memory between runs.
For example,
class MyLayer(Layer):
def __init__(self, out_dim = 51, **kwargs):
self.out_dim = out_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
a = 0.0
self.persistent_variable = K.variable(a)
self.built = True
def get_output_shape_for(self, input_shape):
return (input_shape[0], 1)
def call(self, x, mask=None):
a = K.eval(self.persistent_variable) + 1
K.set_value(self.persistent_variable, a)
return self.persistent_variable
m = Sequential()
When I run m.predict, I expect the persistent_variable to get updated, and print the incremented value.
But it looks like it always prints 0
# Dummy input
x = np.zeros(1)
m.predict(x, batch_size=1)
My question is, how do I make the persistent_variable increment and save after every run of m.predict
The trick is that you have to call self.add_update(...) in your call function to register a function that will be called every time your model is evaluated (I found this by digging into the source code of the stateful rnns). If you do self.stateful = True it will call your custom update function for every training and prediction call, otherwise it will only call it during training. For example:
import keras.backend as K
import numpy as np
from keras.engine.topology import Layer
class CounterLayer(Layer):
def __init__(self, stateful=False,**kwargs):
self.stateful = stateful # True means it will increment counter on predict and train, false means it will only increment counter on train
super(CounterLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Define variables in build
self.count = K.variable(0, name="count")
super(CounterLayer, self).build(input_shape)
def call(self, x, mask=None):
updates = []
# The format is (variable, value setting to)
# So this says
# self.pos = self.pos + 1
updates.append((self.count, self.count+1))
# You can append more updates to this list or call add_update more
# times if you want
# Add our custom update
# We stick x here so it calls our update function every time our layer
# is given a new x
self.add_update(updates, x)
# This will be an identity layer but keras gets mad for some reason
# if you just output x so we'll multiply it by 1 so it thinks it is a
# "new variable"
return self.count
# in newer keras versions you might need to name this compute_output_shape instead
def get_output_shape_for(self, input_shape):
# We will just return our count as an array ([[count]])
return (1,1)
def reset_states(self):
Example usage:
from keras.layers import Input
from keras.models import Model
from keras.optimizers import RMSprop
inputLayer = Input(shape=(10,))
counter = CounterLayer() # Don't update on predict
# counter = CounterLayer(stateful=True) # This will update each time you call predict
counterLayer = counter(inputLayer)
model = Model(input=inputLayer, output=counterLayer)
optimizer = RMSprop(lr=0.001)
model.compile(loss="mse", optimizer=optimizer)
# See the value of our counter
print counter.count.get_value()
# This won't actually train anything but each epoch will update our counter
# Note that if you say have a batch size of 5, update will be called 5 times per epoch[1, 10]), np.array([0]), batch_size=1, nb_epoch=5)
# The value of our counter has now changed
print counter.count.get_value()
model.predict(np.zeros([1, 10]))
# If we did stateful=False, this didn't change, otherwise it did
print counter.count.get_value()
One will need to make use of tf_state_ops.assign() or tf.compat.v1.scatter_update() for implementing this functionality.
Below is an example using tf_state_ops.assign().
import tensorflow as tf
import tensorflow.keras.layers as KL
import tensorflow_probability as tfp
from tensorflow.python.ops import state_ops as tf_state_ops
class CustomLayer(KL.Layer):
"""custom layer for storing moving average of nth percentile of some values"""
def __init__(
percentile: float = 66.67,
name: str = "thresh",
alpha: float = 0.9,
moving_thresh_initializer: float = 0.0,
"""Layer initialization
percentile (float, optional): percentile for thresholding. Defaults to 66.67.
name (str, optional): name for the tensor. Defaults to "thresh".
alpha (float, optional): decay value for moving average. Defaults to 0.9.
moving_thresh_initializer (float, optional): Initial threshold. Defaults to 0.0
super().__init__(trainable=False, name=name, **kwargs)
self.percentile = percentile
self.moving_thresh_initializer = tf.constant_initializer(
self.alpha = alpha
def build(self, input_shape):
"""build the layer"""
shape = ()
self.moving_thresh = self.add_weight(
return super().build(input_shape)
def call(self, inputs: tf.Tensor) -> tf.Tensor:
"""call method on the layer
inputs (tf.Tensor): samplewise values for a given batch
tf.Tensor (shape = ()): threshold value
batch_thresh = tfp.stats.percentile(
inputs, q=self.percentile, axis=[0], interpolation="linear"
self.moving_thresh = tf_state_ops.assign(
self.alpha * self.moving_thresh + (1.0 - self.alpha) * batch_loss_thresh,
# use_locking=self._use_locking,
return self.moving_thresh
def get_config(self) -> dict:
"""Setting up the layer config
dict: config key-value pairs
base_config = super().get_config()
config = {
"alpha": self.alpha,
"moving_thresh_initializer": self.moving_thresh_initializer,
"percentile": self.percentile,
"threshhold": self.moving_thresh,
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape: tuple) -> tuple:
"""shape of the layer output"""
return ()
The above custom layer can be included in the workflow as follows:
thresholding_layer = CustomLayer()
# Dummy input
x = np.zeros((batch_size, 1))
current_threshold = thresholding_layer(x)
For further details on working with the above custom layer and also usage of tf.compat.v1.scatter_update() you can check out the following link.
