I'm trying to write a hook that will allow me to compute some global metrics (rather than batch-wise metrics). To prototype, I thought I'd get a simple hook up and running that would capture and remember true positives. It looks like this:
class TPHook(tf.train.SessionRunHook):
def after_create_session(self, session, coord):
print("Starting Hook")
tp_name = 'metrics/f1_macro/TP'
self.tp = []
self.args = session.graph.get_operation_by_name(tp_name)
print(f"Got Args: {self.args}")
def before_run(self, run_context):
print("Starting Before Run")
return tf.train.SessionRunArgs(self.args)
def after_run(self, run_context, run_values):
print("After Run")
print(f"Got Values: {run_values.results}")
However, the values returned in the "after_run" part of the hook are always None. I tested this in both the train and evaluation phase. Am I misunderstanding something about how the SessionRunHooks are supposed to work?
Maybe relevant information:
The model was build in keras and converted to an estimator with the keras.estimator.model_to_estimator() function. The model has been tested and works fine, and the op that I'm trying to retrieve in the hook is defined in this code block:
def _f1_macro_vector(y_true, y_pred):
"""Computes the F1-score with Macro averaging.
Arguments:
y_true {tf.Tensor} -- Ground-truth labels
y_pred {tf.Tensor} -- Predicted labels
Returns:
tf.Tensor -- The computed F1-Score
"""
y_true = K.cast(y_true, tf.float64)
y_pred = K.cast(y_pred, tf.float64)
TP = tf.reduce_sum(y_true * K.round(y_pred), axis=0, name='TP')
FN = tf.reduce_sum(y_true * (1 - K.round(y_pred)), axis=0, name='FN')
FP = tf.reduce_sum((1 - y_true) * K.round(y_pred), axis=0, name='FP')
prec = TP / (TP + FP)
rec = TP / (TP + FN)
# Convert NaNs to Zero
prec = tf.where(tf.is_nan(prec), tf.zeros_like(prec), prec)
rec = tf.where(tf.is_nan(rec), tf.zeros_like(rec), rec)
f1 = 2 * (prec * rec) / (prec + rec)
# Convert NaN to Zero
f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
return f1
In case anyone runs into the same problem, I found out how to restructure the program so that it worked. Although the documentation makes it sound like I can pass raw ops into the SessionRunArgs, it seems like it requires actual tensors (maybe this is a misreading on my part).
This is pretty easy to accomplish - I just changed the after_create_session code to what's shown below.
def after_create_session(self, session, coord):
tp_name = 'metrics/f1_macro/TP'
self.tp = []
tp_tensor = session.graph.get_tensor_by_name(tp_name+':0')
self.args = [tp_tensor]
And this successfully runs.
Related
Background
I'm currently trying to implement a DDPG framework to control a simple car agent. At first, the car agent would only need to learn how to reach the end of a straight path as quickly as possible by adjusting its acceleration. This task was simple enough, so I decided to introduce an additional steering action as well. I updated my observation and action spaces accordingly.
The lines below are the for loop that runs each episode:
for i in range(episodes):
observation = env.reset()
done = False
score = 0
while not done:
action = agent.choose_action(observation, evaluate)
observation_, reward, done, info = env.step(action)
score += reward
agent.remember(observation, action, reward, observation_, done)
if not load_checkpoint:
agent.learn()
observation = observation_
The lines below are my choose_action and learn functions:
def choose_action(self, observation, evaluate=False):
state = tf.convert_to_tensor([observation], dtype=tf.float32)
actions = self.actor(state)
if not evaluate:
actions += tf.random.normal(shape=[self.n_actions],
mean=0.0, stddev=self.noise)
actions = tf.clip_by_value(actions, self.min_action, self.max_action)
return actions[0]
def learn(self):
if self.memory.mem_cntr < self.batch_size:
return
state, action, reward, new_state, done = \
self.memory.sample_buffer(self.batch_size)
states = tf.convert_to_tensor(state, dtype=tf.float32)
states_ = tf.convert_to_tensor(new_state, dtype=tf.float32)
rewards = tf.convert_to_tensor(reward, dtype=tf.float32)
actions = tf.convert_to_tensor(action, dtype=tf.float32)
with tf.GradientTape() as tape:
target_actions = self.target_actor(states_)
critic_value_ = tf.squeeze(self.target_critic(
states_, target_actions), 1)
critic_value = tf.squeeze(self.critic(states, actions), 1)
target = reward + self.gamma*critic_value_*(1-done)
critic_loss = keras.losses.MSE(target, critic_value)
critic_network_gradient = tape.gradient(critic_loss,
self.critic.trainable_variables)
self.critic.optimizer.apply_gradients(zip(
critic_network_gradient, self.critic.trainable_variables))
with tf.GradientTape() as tape:
new_policy_actions = self.actor(states)
actor_loss = -self.critic(states, new_policy_actions)
actor_loss = tf.math.reduce_mean(actor_loss)
actor_network_gradient = tape.gradient(actor_loss,
self.actor.trainable_variables)
self.actor.optimizer.apply_gradients(zip(
actor_network_gradient, self.actor.trainable_variables))
self.update_network_parameters()
And finally, my ActorNetwork is as follows:
class ActorNetwork(keras.Model):
def __init__(self, fc1_dims=512, fc2_dims=512, n_actions=2, name='actor',
chkpt_dir='tmp/ddpg'):
super(ActorNetwork, self).__init__()
self.fc1_dims = fc1_dims
self.fc2_dims = fc2_dims
self.n_actions = n_actions
self.model_name = name
self.checkpoint_dir = chkpt_dir
self.checkpoint_file = os.path.join(self.checkpoint_dir,
self.model_name+'_ddpg.h5')
self.fc1 = Dense(self.fc1_dims, activation='relu')
self.fc2 = Dense(self.fc2_dims, activation='relu')
self.mu = Dense(self.n_actions, activation='tanh')
def call(self, state):
prob = self.fc1(state)
prob = self.fc2(prob)
mu = self.mu(prob) * 3.5
return mu
Note: The code I'm working with is just building off of the code from this tutorial
The Problem
Up until now, I hadn't faced any issues with the code but I did want to adjust the maximum/minimum values of my actions. When I was only considering the acceleration action, I simply multiplied mu by 3.5. However, I wanted the steering actions to exist within a range of -30 to 30 degrees, but I couldn't just multiply mu as I had before. To try to adjust the desired steering range, I made the following (not so elegant) changes to my ActorNetwork
def call(self, state):
prob = self.fc1(state)
prob = self.fc2(prob)
mu = self.mu(prob)# * 3.5
mu_ = []
mu_l = mu.numpy().tolist()
for i, elem1 in enumerate(mu_l):
temp_ = []
for j, elem2 in enumerate(elem1):
if j-1 == 0:
temp_.append(float(elem2 * 3.5))
else:
temp_.append(float(elem2 * math.radians(30)))
mu_.append(temp_)
mu = tf.convert_to_tensor(mu_, dtype=tf.float32)
return mu
The new lines that I added were meant to:
Convert the mu tensor into a list
Iterate through the elements in the mu list (mu_l) and if a value had an index of 0 (acceleration) then multiply by 3.5; otherwise, multiply the value at index=1 (steering) by the radians conversion of 30 degrees.
Append each adjusted value into a new list (mu_)
Set mu to be equal to a tensor conversion of mu_
It was at this point that I ran into the following error:
ValueError: No gradients provided for any variable: ['actor_network/dense/kernel:0', 'actor_network/dense/bias:0', 'actor_network/dense_1/kernel:0', 'actor_network/dense_1/bias:0', 'actor_network/dense_2/kernel:0', 'actor_network/dense_2/bias:0'].
I have tried to find solutions provided within StackOverflow and from outside sources (e.g. including watch, checking to make sure that I am using model() instead of model.predict() while in GradientTape(), making sure I'm not performing calculations outside of the Tape context) but I haven't had any luck resolving the issue. I suspect that my issue is similar to the one presented in this previous post but I'm not sure how to diagnose whether my problem stems from also overwritting mu with a tensor. Does anyone have any insight regarding this problem?
The issue has been resolved thanks to some simple but helpful advice I received on Reddit. I was disrupting the tracking of my variables by making changes using my custom for-loop. I should have used a TensorFlow function instead. The following changes fixed the problem for me:
def call(self, state):
prob = self.fc1(state)
prob = self.fc2(prob)
mu = self.mu(prob)
mult = tf.convert_to_tensor([3.5, math.radians(30)], dtype=tf.float32)
mu = tf.math.multiply(mu, mult)
return mu
I am trying to reimplement the original GAN paper by Ian Goodfellow et al. And I need to show that my implementation achieves same or similar results as the authors achieved. But I am not sure how to evaluate this metric. I took a look at their implementation but I got some funny results. In the paper they report 225 +- 2 on the MNIST for this metric, while the results I get are bellow -400000000. I thought that maybe the model is bad, but it generates really good images of MNIST digits.
Can someone tell me what am I doing wrong?
Bellow is the code which I used. I copied the part of the code from the official implementation.
Note: valid variable are images taken from the MNIST dataset.
def get_nll(x, parzen, batch_size=10):
"""
Credit: Yann N. Dauphin
"""
inds = range(x.shape[0])
n_batches = int(numpy.ceil(float(len(inds)) / batch_size))
print("N batches:", n_batches)
times = []
nlls = []
for i in range(n_batches):
begin = time.time()
nll = parzen(x[inds[i::n_batches]])
end = time.time()
times.append(end-begin)
nlls.extend(nll)
if i % 10 == 0:
print(i, numpy.mean(times), numpy.mean(nlls))
return numpy.array(nlls)
def log_mean_exp(a):
"""
Credit: Yann N. Dauphin
"""
max_ = a.max(1)
return max_ + T.log(T.exp(a - max_.dimshuffle(0, 'x')).mean(1))
def cross_validate_sigma(samples, data, sigmas, batch_size):
lls = []
for sigma in sigmas:
print("Sigma:", sigma)
parzen = theano_parzen(samples, sigma)
tmp = get_nll(data, parzen, batch_size = batch_size)
lls.append(numpy.asarray(tmp).mean())
del parzen
gc.collect()
ind = numpy.argmax(lls)
print(max(lls))
return sigmas[ind]
noise = torch.randn((10000, 100), device=device)
gen_model.eval()
gan_out = gen_model(noise)
sigma_range = numpy.logspace(-1., 0., num=10)
sigma = cross_validate_sigma(gan_out.reshape(10000,-1), valid[0:10000], sigma_range, 100)
I want to implement an Fourier Ring Correlation Loss for two images to train a GAN. Therefore I'd like to loop over a specific amount of times and calculate the loss. This works fine for a normal Python loop. To speed up the process I want to use the tf.while_loop but unfortunately I am not able to track the gradients through my while loop. I constructed a dummy example just to calculate gradients during a while loop but it doesn't work. First, the working python loop :
x = tf.constant(3.0)
y = tf.constant(2.0)
for i in range(3):
y = y * x
grad = tf.gradients(y, x)
with tf.Session() as ses:
print("output : ", ses.run(grad))
This works and gives the output
[54]
If i do the same with a tf.while_loop it doesn't work:
a = tf.constant(0, dtype = tf.int64)
b = tf.constant(3, dtype = tf.int64)
x = tf.constant(3.0)
y = tf.constant(2.0)
def cond(a,b,x,y):
return tf.less(a,b)
def body(a,b,x,y):
y = y * x
with tf.control_dependencies([y]):
a = a + 1
return [a,b,x,y]
results = tf.while_loop(cond, body, [a,b,x,y], back_prop = True)
grad = tf.gradients(y, results[2])
with tf.Session() as ses:
print("grad : ", ses.run(grad))
The output is :
TypeError: Fetch argument None has invalid type '<'class 'NoneType'>
So I guess somehow tensorflow is not able to do the backpropagation.
The problem still accours if you use tf.GradientTape() instead of tf.gradients().
I changed the code so that it now outputs the gradients:
import tensorflow as tf
a = tf.constant(0, dtype = tf.int64)
b = tf.constant(3, dtype = tf.int64)
x = tf.Variable(3.0, tf.float32)
y = tf.Variable(2.0, tf.float32)
dy = tf.Variable(0.0, tf.float32)
def cond(a,b,x,y,dy):
return tf.less(a,b)
def body(a,b,x,y,dy):
y = y * x
dy = tf.gradients(y, x)[0]
with tf.control_dependencies([y]):
a = a + 1
return [a,b,x,y,dy]
init = tf.global_variables_initializer()
with tf.Session() as ses:
ses.run(init)
results = ses.run(tf.while_loop(cond, body, [a,b,x,y,dy], back_prop = True))
print("grad : ", results[-1])
The things I modified:
I made x and y into variables and added their initialisation init.
I added a variable called dy which will contain the gradient of y.
I moved the tf.while_loop inside the session.
Put the evaluation of the gradient inside the body function
I think the problem before was that when you define grad = tf.gradients(y, results[2]) the loop has not run yet, so y is not a function of x. Therefore, there is no gradient.
Hope this helps.
I am new to tensorflow. The following partial code is throwing a AttributeError: 'NoneType' object has no attribute 'dtype'. Could someone point out what I am doing wrong and how to correctly code this?
I realize the problem is when I am trying to minimize the 'loss' within the train_function. I was assuming that during the interactive session, s, the batch_x_train and batch_y_train will be passed to the function call for 'optimizer', which calls the function 'binary_logloss', passing in the batch_y_train values. I know how to make this work without tensorflow, but I am bit confused when the graphs are involved.
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,num_classes])
predicted_y = tf.sigmoid(tf.matmul(input_X, weights) + b)
def binary_logloss(true_y):
if true_y ==1.:
return tf.reduce_mean(tf.reduce_sum(-tf.log(predicted_y) , axis=1))
elif true_y == 0.:
return tf.reduce_mean(tf.reduce_sum(-tf.log(1-predicted_y) , axis=1))
def train_function(X, y):
loss = binary_logloss(input_y)
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
_, c = s.run([optimizer, loss], {input_X:X, input_y:y})
return _, c
s.run(tf.global_variables_initializer())
for epoch in epochs:
_, c = train_function(batch_x_train, batch_y_train)
You have to refactor the code like this and implement a batching mechanism and I think it should execute.
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,num_classes])
predicted_y = tf.sigmoid(tf.matmul(input_X, weights) + b)
loss = tf.reduce_mean(-(input_y * tf.log(predicted_y) + (1 - input_y) * tf.log(1 - predicted_y)))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as s:
s.run(init)
s.run(tf.global_variables_initializer())
for epoch in epochs:
# batch_x_train, batch_y_train = Batching code
_, c = train_function(batch_x_train, batch_y_train)
I have a function that I define as follows
def NewLoss(y_true,y_pred):
p=0
for i in range(3074):
if (y_pred[i+1]-y_pred[i])<0:
p+=(y_true[i]-y_pred[i])**2
elif (y_pred[i+1]-y_pred[i])>0:
p+=(y_true[i]-y_pred[i])**2+(y_true[i]-y_pred[i])*(y_pred[i+1]-y_pred[i])**2
else:
p+=(y_true[i]-y_pred[i])**2+0.5*(y_true[i]-y_pred[i])*(y_pred[i+1]-y_pred[i])**2
return p
My y_true and y_pred are vectors. When I try to run a code that calls this function, I get the following error:
"Using a tf.Tensor as a Python bool is not allowed".
I would like to know how to check the sign of (y_true[i]-y_pred[i]) and avoid this error, I am actually using keras.
Thank you very much for your help.
def NewLoss(y_true, y_pred):
true = y_true[:3074]
pred = y_pred[:3074]
predShifted = y_pred[1:3075]
diff = true - pred
diffShifted = predShifted - pred
pLeftPart = K.square(diff)
pRightPart = diff * K.square(diffShifted)
greater = K.cast(K.greater(diffShifted,0),K.floatx())
equal = 0.5 * K.cast(K.equal(diffShifted, 0), K.floatx())
mask = greater + equal
return K.sum(pLeftPart + (mask*pRightPart))
Remarks:
1 - The first axis is the samples axis, perhaps you're trying to do this with the timesteps axis? If so, use:
true = y_true[:,:3074]
pred = y_pred[:,:3074]
predShifted = y_pred[:,1:3075]
2 - Having differences exactly equal to zero is so rare that maybe you don't need the last part of the if statement.
3 - If the max length of your tensors is 3075, you can simplify the selections:
true = y_true[:-1]
pred = y_pred[:-1]
predShifted = y_pred[1:]