Persistent Variable in keras Custom Layer - keras

I want write a custom layer, where I can keep a variable in memory between runs.
For example,
class MyLayer(Layer):
def __init__(self, out_dim = 51, **kwargs):
self.out_dim = out_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
a = 0.0
self.persistent_variable = K.variable(a)
self.built = True
def get_output_shape_for(self, input_shape):
return (input_shape[0], 1)
def call(self, x, mask=None):
a = K.eval(self.persistent_variable) + 1
K.set_value(self.persistent_variable, a)
return self.persistent_variable
m = Sequential()
m.add(MyLayer(input_shape=(1,)))
When I run m.predict, I expect the persistent_variable to get updated, and print the incremented value.
But it looks like it always prints 0
# Dummy input
x = np.zeros(1)
m.predict(x, batch_size=1)
My question is, how do I make the persistent_variable increment and save after every run of m.predict
Thanks,
Naveen

The trick is that you have to call self.add_update(...) in your call function to register a function that will be called every time your model is evaluated (I found this by digging into the source code of the stateful rnns). If you do self.stateful = True it will call your custom update function for every training and prediction call, otherwise it will only call it during training. For example:
import keras.backend as K
import numpy as np
from keras.engine.topology import Layer
class CounterLayer(Layer):
def __init__(self, stateful=False,**kwargs):
self.stateful = stateful # True means it will increment counter on predict and train, false means it will only increment counter on train
super(CounterLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Define variables in build
self.count = K.variable(0, name="count")
super(CounterLayer, self).build(input_shape)
def call(self, x, mask=None):
updates = []
# The format is (variable, value setting to)
# So this says
# self.pos = self.pos + 1
updates.append((self.count, self.count+1))
# You can append more updates to this list or call add_update more
# times if you want
# Add our custom update
# We stick x here so it calls our update function every time our layer
# is given a new x
self.add_update(updates, x)
# This will be an identity layer but keras gets mad for some reason
# if you just output x so we'll multiply it by 1 so it thinks it is a
# "new variable"
return self.count
# in newer keras versions you might need to name this compute_output_shape instead
def get_output_shape_for(self, input_shape):
# We will just return our count as an array ([[count]])
return (1,1)
def reset_states(self):
self.count.set_value(0)
Example usage:
from keras.layers import Input
from keras.models import Model
from keras.optimizers import RMSprop
inputLayer = Input(shape=(10,))
counter = CounterLayer() # Don't update on predict
# counter = CounterLayer(stateful=True) # This will update each time you call predict
counterLayer = counter(inputLayer)
model = Model(input=inputLayer, output=counterLayer)
optimizer = RMSprop(lr=0.001)
model.compile(loss="mse", optimizer=optimizer)
# See the value of our counter
print counter.count.get_value()
# This won't actually train anything but each epoch will update our counter
# Note that if you say have a batch size of 5, update will be called 5 times per epoch
model.fit(np.zeros([1, 10]), np.array([0]), batch_size=1, nb_epoch=5)
# The value of our counter has now changed
print counter.count.get_value()
model.predict(np.zeros([1, 10]))
# If we did stateful=False, this didn't change, otherwise it did
print counter.count.get_value()

One will need to make use of tf_state_ops.assign() or tf.compat.v1.scatter_update() for implementing this functionality.
Below is an example using tf_state_ops.assign().
import tensorflow as tf
import tensorflow.keras.layers as KL
import tensorflow_probability as tfp
from tensorflow.python.ops import state_ops as tf_state_ops
class CustomLayer(KL.Layer):
"""custom layer for storing moving average of nth percentile of some values"""
def __init__(
self,
percentile: float = 66.67,
name: str = "thresh",
alpha: float = 0.9,
moving_thresh_initializer: float = 0.0,
**kwargs
):
"""Layer initialization
Args:
percentile (float, optional): percentile for thresholding. Defaults to 66.67.
name (str, optional): name for the tensor. Defaults to "thresh".
alpha (float, optional): decay value for moving average. Defaults to 0.9.
moving_thresh_initializer (float, optional): Initial threshold. Defaults to 0.0
"""
super().__init__(trainable=False, name=name, **kwargs)
self.percentile = percentile
self.moving_thresh_initializer = tf.constant_initializer(
value=moving_thresh_initializer
)
self.alpha = alpha
def build(self, input_shape):
"""build the layer"""
shape = ()
self.moving_thresh = self.add_weight(
shape=shape,
name="moving_thresh",
initializer=self.moving_thresh_initializer,
trainable=False,
)
return super().build(input_shape)
def call(self, inputs: tf.Tensor) -> tf.Tensor:
"""call method on the layer
Args:
inputs (tf.Tensor): samplewise values for a given batch
Returns:
tf.Tensor (shape = ()): threshold value
"""
batch_thresh = tfp.stats.percentile(
inputs, q=self.percentile, axis=[0], interpolation="linear"
)
self.moving_thresh = tf_state_ops.assign(
self.moving_thresh,
self.alpha * self.moving_thresh + (1.0 - self.alpha) * batch_loss_thresh,
# use_locking=self._use_locking,
)
return self.moving_thresh
def get_config(self) -> dict:
"""Setting up the layer config
Returns:
dict: config key-value pairs
"""
base_config = super().get_config()
config = {
"alpha": self.alpha,
"moving_thresh_initializer": self.moving_thresh_initializer,
"percentile": self.percentile,
"threshhold": self.moving_thresh,
}
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape: tuple) -> tuple:
"""shape of the layer output"""
return ()
The above custom layer can be included in the workflow as follows:
thresholding_layer = CustomLayer()
# Dummy input
x = np.zeros((batch_size, 1))
current_threshold = thresholding_layer(x)
For further details on working with the above custom layer and also usage of tf.compat.v1.scatter_update() you can check out the following link.
https://medium.com/dive-into-ml-ai/custom-layer-with-memory-in-keras-1d0c03e722e9

Related

How to use PyMC (v4.0.1) with statsmodels to estimate state space models?

I am trying to use PyMC (v4.0.1) with statsmodels to estimate state-space models. I am following this example that uses PyMC3:
https://www.statsmodels.org/v0.12.0/examples/notebooks/generated/statespace_sarimax_pymc3.html
The example makes use of the pm.DensityDist() function and apparently the API has changed. And PyMC uses Aesara instead of Theano and I have no idea if that matters.
As a working example, here is my attempt to simulate and estimate an AR(1) process:
import numpy as np
import statsmodels.api as sm
import pymc as pm
import aesara.tensor as at
from scipy.signal import lfilter
# Generate artificial data
nobs = int(1e3)
true_phi = np.r_[0.5]
true_sigma = 0.5**0.5
np.random.seed(1234)
disturbances = np.random.normal(0, true_sigma, size=(nobs,))
endog = lfilter([1], np.r_[1, -true_phi], disturbances)
# Initialize model
mod = sm.tsa.statespace.SARIMAX(endog, order=(1, 0, 0))
# Helper functions copied. Do not know how they work
class Loglike(at.Op):
itypes = [at.dvector] # expects a vector of parameter values when called
otypes = [at.dscalar] # outputs a single scalar value (the log likelihood)
def __init__(self, model):
self.model = model
self.score = Score(self.model)
def perform(self, node, inputs, outputs):
theta, = inputs # contains the vector of parameters
llf = self.model.loglike(theta)
outputs[0][0] = np.array(llf) # output the log-likelihood
def grad(self, inputs, g):
# the method that calculates the gradients - it actually returns the
# vector-Jacobian product - g[0] is a vector of parameter values
theta, = inputs # our parameters
out = [g[0] * self.score(theta)]
return out
class Score(at.Op):
itypes = [at.dvector]
otypes = [at.dvector]
def __init__(self, model):
self.model = model
def perform(self, node, inputs, outputs):
theta, = inputs
outputs[0][0] = self.model.score(theta)
loglike = Loglike(mod)
# Set sampling params
ndraws = 3000 # number of draws from the distribution
nburn = 600 # number of "burn-in points" (which will be discarded)
# Sample from posterior
with pm.Model():
# Priors
arL1 = pm.Uniform('ar.L1', -0.99, 0.99)
sigma2 = pm.InverseGamma('sigma2', 2, 4)
# convert variables to tensor vectors
theta = at.as_tensor_variable([arL1, sigma2])
# use a DensityDist (use a lamdba function to "call" the Op)
pm.DensityDist('likelihood', theta, logp = lambda v: loglike(v))
# Draw samples
trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, cores=1)
The error is in the call to pm.sample().
---> 74 trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, cores=1)
But the error message suggests that the issue has something to do with the likelihood function:
TypeError: <lambda>() takes 1 positional argument but 2 were given
I've tried a bunch of things, but I'm at a loss. I'd really appreciate any suggestions.
Issue fixed by replacing:
pm.DensityDist('likelihood', theta, logp = lambda v: loglike(v))
with
pm.Potential('likelihood', loglike(theta))
Here's the full working code:
import numpy as np
import statsmodels.api as sm
import pymc as pm
import aesara.tensor as at
from scipy.signal import lfilter
# Generate artificial data
nobs = int(1e3)
true_phi = np.r_[0.5]
true_sigma = 0.5**0.5
np.random.seed(1234)
disturbances = np.random.normal(0, true_sigma, size=(nobs,))
endog = lfilter([1], np.r_[1, -true_phi], disturbances)
# Initialize model
mod = sm.tsa.statespace.SARIMAX(endog, order=(1, 0, 0))
# Helper functions copied. Do not know how they work
class Loglike(at.Op):
itypes = [at.dvector] # expects a vector of parameter values when called
otypes = [at.dscalar] # outputs a single scalar value (the log likelihood)
def __init__(self, model):
self.model = model
self.score = Score(self.model)
def perform(self, node, inputs, outputs):
theta, = inputs # contains the vector of parameters
llf = self.model.loglike(theta)
outputs[0][0] = np.array(llf) # output the log-likelihood
def grad(self, inputs, g):
# the method that calculates the gradients - it actually returns the
# vector-Jacobian product - g[0] is a vector of parameter values
theta, = inputs # our parameters
out = [g[0] * self.score(theta)]
return out
class Score(at.Op):
itypes = [at.dvector]
otypes = [at.dvector]
def __init__(self, model):
self.model = model
def perform(self, node, inputs, outputs):
theta, = inputs
outputs[0][0] = self.model.score(theta)
loglike = Loglike(mod)
# Set sampling params
ndraws = 3000 # number of draws from the distribution
nburn = 600 # number of "burn-in points" (which will be discarded)
# Sample from posterior
with pm.Model():
# Priors
arL1 = pm.Uniform('ar.L1', -0.99, 0.99)
sigma2 = pm.InverseGamma('sigma2', 2, 4)
# convert variables to tensor vectors
theta = at.as_tensor_variable([arL1, sigma2])
# use a DensityDist (use a lamdba function to "call" the Op)
pm.Potential('likelihood', loglike(theta))
# Draw samples
trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, cores=1)

How to drop running stats to default value for Norm layer in pyTorch?

I trained model on some images. Now to fit similar dataset but with another colors I want to load this model but also i want to drop all running stats from Batchnorm layers (set them to default value, like totally untrained). What parameters should i reset? Simple model looks like this
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv0 = nn.Conv2d(3, 3, 3, padding = 1)
self.norm = nn.BatchNorm2d(3)
self.conv = nn.Conv2d(3, 3, 3, padding = 1)
def forward(self, x):
x = self.conv0(x)
x = self.norm(x)
return self.conv(x)
net = Net()
##or for pretrained it will be
##net = torch.load('net.pth')
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
####???####
drop_to_default()
Simplest way to do that is to run reset_running_stats() method on BatchNorm objects:
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
m.reset_running_stats()
Below is this method's source code:
def reset_running_stats(self) -> None:
if self.track_running_stats:
# running_mean/running_var/num_batches... are registered at runtime depending
# if self.track_running_stats is on
self.running_mean.zero_() # Zero (neutral) mean
self.running_var.fill_(1) # One (neutral) variance
self.num_batches_tracked.zero_() # Number of batches tracked
You can see the source code here, _NormBase class.

Change custom loss parameter and NN parameter with respect to epoch

I have a Keras model defined in the following manner (Tried to keep only the necessary parts):
temperature = 5.0
def knowledge_distillation_loss(y_true, y_pred, lambda_const):
y_true, logits = y_true[:, :10], y_true[:, 10:]
y_soft = K.softmax(logits/temperature)
y_pred, y_pred_soft = y_pred[:, :10], y_pred[:, 10:]
return lambda_const*logloss(y_true, y_pred) + logloss(y_soft, y_pred_soft)
def get_model(num_labels):
#Some layers for model
model.add(Dense(num_labels))
logits = model.layers[-1].output
probabilities = Activation('softmax')(logits)
# softed probabilities
logits_T = Lambda(lambda x: x/temperature)(logits)
probabilities_T = Activation('softmax')(logits_T)
output = concatenate([probabilities, probabilities_T])
model = Model(model.input, output)
lambda_const = 0.07
model.compile(
optimizer=optimizers.SGD(lr=1e-1, momentum=0.9, nesterov=True),
loss=lambda y_true, y_pred: knowledge_distillation_loss(y_true, y_pred, lambda_const),
metrics=[accuracy])
return model
I am following this reference.
This is implemented using fit generator() on Keras with tf backend. Obviously, I will have trouble when loading the model since temperature is hared coded.
Also,
I wish to update temperature parameter with respect to the epoch number in both loss function and model.
How do I define such a control signal?
I've turned this into a complete example of one way to do this.
You could make a class for the loss function.
class TemperatureLossFunction:
def __init__(self, temperature):
self.temperature = temperature
def loss_fun(self, y_truth, y_pred):
return self.temperature*keras.losses.mse(y_truth, y_pred)
def setTemperature(self, t, session=None):
if session:
session.run(self.temperature.assign( t )
elif tensorflow.get_default_session():
tensorflow.get_default_session().run(self.temperature.assign( t ))
class TemperatureLossCallback(keras.callbacks.Callback):
def __init__(self, temp_lf):
self.temp_lf = temp_lf
def on_epoch_end(self, epoch, params):
self.temp_lf.setTemperature(epoch)
I've created two methods for working with this, the first method creates and saves the model.
def init(session):
global temperature #global for serialization issues
temperature = tensorflow.Variable(5.0)
tlo = TemperatureLossFunction(temperature)
inp = keras.layers.Input((4,4))
l1 = keras.layers.Lambda( lambda x: temperature*x )
op = l1(inp)
m = keras.models.Model(inputs=[inp], outputs=[op])
m.compile( optimizer = keras.optimizers.SGD(0.01), loss=tlo.loss_fun)
#make sure the session is the one your using!
session.run(temperature.initializer)
The first test I run makes sure we are changing the value.
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
session.run(temperature.assign(1))
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
The second test I run makes sure we can change the values with a callback.
cb = TemperatureLossCallback(tlo)
def gen():
for i in range(10):
yield numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4))
m.fit_generator(
gen(), steps_per_epoch=1, epochs=10, callbacks=[cb]
)
m.save("junk.h5")
Finally, to demonstrate reloading the file.
def restart(session):
global temperature
temperature = tensorflow.Variable(5.0)
tlo = TemperatureLossFunction(temperature)
loss_fun = tlo.loss_fun
m = keras.models.load_model(
"junk.h5",
custom_objects = {"loss_fun":tlo.loss_fun}
)
session.run(temperature.initializer)
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
session.run(temperature.assign(1))
m.evaluate( numpy.ones( (1, 4, 4) ), numpy.zeros( ( 1, 4, 4) ) )
This is just the code I use to start the program for completeness
import sys
if __name__=="__main__":
sess = tensorflow.Session()
with sess.as_default():
if "restart" in sys.argv:
restart(sess)
else:
init(sess)
One downside of this method, if you run this you will see that the temperature variable does not get loaded from the model file. It takes on the value assigned in the code.
On the plus side, both the loss function and the layer are referencing the same Variable
One way I found to save the variable value is to create a new layer and use the variable as the weight for the new layer.
class VLayer(keras.layers.Layer):
def __init__(self, *args, **kwargs):
super().__init__(**kwargs)
def build(self, input_shape):
self.v1 = self.add_weight(
dtype="float32",
shape = (),
trainable=False,
initializer="zeros"
)
def call(self, x):
return x*self.v1
def setValue(self, val):
self.set_weights( numpy.array([val]) )
Now when you load the model, the weight will be loaded. Unfortunately, I could not find a way to link the weight to a Variable on load. So there will be two variables, one for the loss function and one for the layer. Both of them can be set from a callback though. So I feel this method is on a more robust path.

I have no idea where 'plot_learning_curve' file or 'learning.utils' module is

While following an example about classifying cats and dogs using AlexNet on some post I got stuck on this import error:
Traceback (most recent call last):
File "C:\Users\Gsum\Desktop\Asirra 개 고양이\asirra-dogs-cats-classification-master\learning\optimizers.py", line 5, in <module>
from learning.utils import plot_learning_curve
ImportError: No module named 'learning'
I've been looking for modules named similar to learning or learn which includes 'plot_learning_curve' function.
Anyone who knows which library includes plot learning curve function, I would appreciate some help.
Here is my code:
import os
import time
from abc import abstractmethod
import tensorflow as tf
from learning.utils import plot_learning_curve
class Optimizer(object):
"""Base class for gradient-based optimization algorithms."""
def __init__(self, model, train_set, evaluator, val_set=None, **kwargs):
"""
Optimizer initializer.
:param model: ConvNet, the model to be learned.
:param train_set: DataSet, training set to be used.
:param evaluator: Evaluator, for computing performance scores during training.
:param val_set: DataSet, validation set to be used, which can be None if not used.
:param kwargs: dict, extra arguments containing training hyperparameters.
- batch_size: int, batch size for each iteration.
- num_epochs: int, total number of epochs for training.
- init_learning_rate: float, initial learning rate.
"""
self.model = model
self.train_set = train_set
self.evaluator = evaluator
self.val_set = val_set
# Training hyperparameters
self.batch_size = kwargs.pop('batch_size', 256)
self.num_epochs = kwargs.pop('num_epochs', 320)
self.init_learning_rate = kwargs.pop('init_learning_rate', 0.01)
self.learning_rate_placeholder = tf.placeholder(tf.float32) # Placeholder for current learning rate
self.optimize = self._optimize_op()
self._reset()
def _reset(self):
"""Reset some variables."""
self.curr_epoch = 1
self.num_bad_epochs = 0 # number of bad epochs, where the model is updated without improvement.
self.best_score = self.evaluator.worst_score # initialize best score with the worst one
self.curr_learning_rate = self.init_learning_rate # current learning rate
#abstractmethod
def _optimize_op(self, **kwargs):
"""
tf.train.Optimizer.minimize Op for a gradient update.
This should be implemented, and should not be called manually.
"""
pass
#abstractmethod
def _update_learning_rate(self, **kwargs):
"""
Update current learning rate (if needed) on every epoch, by its own schedule.
This should be implemented, and should not be called manually.
"""
pass
def _step(self, sess, **kwargs):
"""
Make a single gradient update and return its results.
This should not be called manually.
:param sess: tf.Session.
:param kwargs: dict, extra arguments containing training hyperparameters.
- augment_train: bool, whether to perform augmentation for training.
:return loss: float, loss value for the single iteration step.
y_true: np.ndarray, true label from the training set.
y_pred: np.ndarray, predicted label from the model.
"""
augment_train = kwargs.pop('augment_train', True)
# Sample a single batch
X, y_true = self.train_set.next_batch(self.batch_size, shuffle=True,
augment=augment_train, is_train=True)
# Compute the loss and make update
_, loss, y_pred = \
sess.run([self.optimize, self.model.loss, self.model.pred],
feed_dict={self.model.X: X, self.model.y: y_true,
self.model.is_train: True,
self.learning_rate_placeholder: self.curr_learning_rate})
return loss, y_true, y_pred
def train(self, sess, save_dir='/tmp', details=False, verbose=True, **kwargs):
"""
Run optimizer to train the model.
:param sess: tf.Session.
:param save_dir: str, the directory to save the learned weights of the model.
:param details: bool, whether to return detailed results.
:param verbose: bool, whether to print details during training.
:param kwargs: dict, extra arguments containing training hyperparameters.
:return train_results: dict, containing detailed results of training.
"""
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer()) # initialize all weights
train_results = dict() # dictionary to contain training(, evaluation) results and details
train_size = self.train_set.num_examples
num_steps_per_epoch = train_size // self.batch_size
num_steps = self.num_epochs * num_steps_per_epoch
if verbose:
print('Running training loop...')
print('Number of training iterations: {}'.format(num_steps))
step_losses, step_scores, eval_scores = [], [], []
start_time = time.time()
# Start training loop
for i in range(num_steps):
# Perform a gradient update from a single minibatch
step_loss, step_y_true, step_y_pred = self._step(sess, **kwargs)
step_losses.append(step_loss)
# Perform evaluation in the end of each epoch
if (i+1) % num_steps_per_epoch == 0:
# Evaluate model with current minibatch, from training set
step_score = self.evaluator.score(step_y_true, step_y_pred)
step_scores.append(step_score)
# If validation set is initially given, use it for evaluation
if self.val_set is not None:
# Evaluate model with the validation set
eval_y_pred = self.model.predict(sess, self.val_set, verbose=False, **kwargs)
eval_score = self.evaluator.score(self.val_set.labels, eval_y_pred)
eval_scores.append(eval_score)
if verbose:
# Print intermediate results
print('[epoch {}]\tloss: {:.6f} |Train score: {:.6f} |Eval score: {:.6f} |lr: {:.6f}'\
.format(self.curr_epoch, step_loss, step_score, eval_score, self.curr_learning_rate))
# Plot intermediate results
plot_learning_curve(-1, step_losses, step_scores, eval_scores=eval_scores,
mode=self.evaluator.mode, img_dir=save_dir)
curr_score = eval_score
# else, just use results from current minibatch for evaluation
else:
if verbose:
# Print intermediate results
print('[epoch {}]\tloss: {} |Train score: {:.6f} |lr: {:.6f}'\
.format(self.curr_epoch, step_loss, step_score, self.curr_learning_rate))
# Plot intermediate results
plot_learning_curve(-1, step_losses, step_scores, eval_scores=None,
mode=self.evaluator.mode, img_dir=save_dir)
curr_score = step_score
# Keep track of the current best model,
# by comparing current score and the best score
if self.evaluator.is_better(curr_score, self.best_score, **kwargs):
self.best_score = curr_score
self.num_bad_epochs = 0
saver.save(sess, os.path.join(save_dir, 'model.ckpt')) # save current weights
else:
self.num_bad_epochs += 1
self._update_learning_rate(**kwargs)
self.curr_epoch += 1
if verbose:
print('Total training time(sec): {}'.format(time.time() - start_time))
print('Best {} score: {}'.format('evaluation' if eval else 'training',
self.best_score))
print('Done.')
if details:
# Store training results in a dictionary
train_results['step_losses'] = step_losses # (num_iterations)
train_results['step_scores'] = step_scores # (num_epochs)
if self.val_set is not None:
train_results['eval_scores'] = eval_scores # (num_epochs)
return train_results
class MomentumOptimizer(Optimizer):
"""Gradient descent optimizer, with Momentum algorithm."""
def _optimize_op(self, **kwargs):
"""
tf.train.MomentumOptimizer.minimize Op for a gradient update.
:param kwargs: dict, extra arguments for optimizer.
- momentum: float, the momentum coefficient.
:return tf.Operation.
"""
momentum = kwargs.pop('momentum', 0.9)
update_vars = tf.trainable_variables()
return tf.train.MomentumOptimizer(self.learning_rate_placeholder, momentum, use_nesterov=False)\
.minimize(self.model.loss, var_list=update_vars)
def _update_learning_rate(self, **kwargs):
"""
Update current learning rate, when evaluation score plateaus.
:param kwargs: dict, extra arguments for learning rate scheduling.
- learning_rate_patience: int, number of epochs with no improvement
after which learning rate will be reduced.
- learning_rate_decay: float, factor by which the learning rate will be updated.
- eps: float, if the difference between new and old learning rate is smaller than eps,
the update is ignored.
"""
learning_rate_patience = kwargs.pop('learning_rate_patience', 10)
learning_rate_decay = kwargs.pop('learning_rate_decay', 0.1)
eps = kwargs.pop('eps', 1e-8)
if self.num_bad_epochs > learning_rate_patience:
new_learning_rate = self.curr_learning_rate * learning_rate_decay
# Decay learning rate only when the difference is higher than epsilon.
if self.curr_learning_rate - new_learning_rate > eps:
self.curr_learning_rate = new_learning_rate
self.num_bad_epochs = 0

how to get the real shape of batch_size which is none in keras

When implementing a custom layer in Keras, I need to know the real size of batch_size. my shape is (?,20).
questions:
1. What is the best way to change (?,20) to (batch_size,20).
I have looked into this but it can not adjust to my problem.
I can pass the batch_size to this layer. In that case, I need to reshape (?,20) to (batch_size,20), how can I do that?
2. Is it the best way to that, or is there any builtin function that can get the real batch_size while building and running the model?
This is my layer:
from scipy.stats import entropy
from keras.engine import Layer
import keras.backend as K
import numpy as np
class measure(Layer):
def __init__(self, beta, **kwargs):
self.beta = beta
self.uses_learning_phase = True
self.supports_masking = True
super(measure, self).__init__(**kwargs)
def call(self, x):
return K.in_train_phase(self.rev_entropy(x, self.beta), x)
def get_config(self):
config = {'beta': self.beta}
base_config = super(measure, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def rev_entropy(self, x, beta):
entropy_p_t_w = np.apply_along_axis(entropy, 1, x)
con = (beta / (1 + entropy_p_t_w)) ** 1.5
new_f_w_t = x * (con.reshape(con.shape[0], 1))
norm_const = 1e-30 + np.sum(new_f_w_t, axis=0)
for t in range(norm_const.shape[0]):
new_f_w_t[:, t] /= norm_const[t]
return new_f_w_t
And here is where I call this layer:
encoded = measure(beta=0.08)(encoded)
I am also using fit_generator if it can help at all:
autoencoder.fit_generator(train_gen, steps_per_epoch=num_train_steps, epochs=NUM_EPOCHS,
validation_data=test_gen, validation_steps=num_test_steps, callbacks=[checkpoint])
The dimension of the x passed to the layer is (?,20) and that's why I can not do my calculation.
Thanks:)

Resources