Tensorflow AttributeError when a function is passed to another - python-3.x

I am new to tensorflow. The following partial code is throwing a AttributeError: 'NoneType' object has no attribute 'dtype'. Could someone point out what I am doing wrong and how to correctly code this?
I realize the problem is when I am trying to minimize the 'loss' within the train_function. I was assuming that during the interactive session, s, the batch_x_train and batch_y_train will be passed to the function call for 'optimizer', which calls the function 'binary_logloss', passing in the batch_y_train values. I know how to make this work without tensorflow, but I am bit confused when the graphs are involved.
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,num_classes])
predicted_y = tf.sigmoid(tf.matmul(input_X, weights) + b)
def binary_logloss(true_y):
if true_y ==1.:
return tf.reduce_mean(tf.reduce_sum(-tf.log(predicted_y) , axis=1))
elif true_y == 0.:
return tf.reduce_mean(tf.reduce_sum(-tf.log(1-predicted_y) , axis=1))
def train_function(X, y):
loss = binary_logloss(input_y)
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
_, c = s.run([optimizer, loss], {input_X:X, input_y:y})
return _, c
s.run(tf.global_variables_initializer())
for epoch in epochs:
_, c = train_function(batch_x_train, batch_y_train)

You have to refactor the code like this and implement a batching mechanism and I think it should execute.
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,num_classes])
predicted_y = tf.sigmoid(tf.matmul(input_X, weights) + b)
loss = tf.reduce_mean(-(input_y * tf.log(predicted_y) + (1 - input_y) * tf.log(1 - predicted_y)))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as s:
s.run(init)
s.run(tf.global_variables_initializer())
for epoch in epochs:
# batch_x_train, batch_y_train = Batching code
_, c = train_function(batch_x_train, batch_y_train)

Related

Gradient is equal to 'None'

I have two networks. The output of the first network is the input to the other. In order to calculate the loss for the second network, I use vanilla policy gradient. I want to backpropagate this loss into the first network. After checking if the gradeints has changed, I see that they are all none.
I first load the first network (a pre-trained autoencoer in my network this way):
def load_checkpoint(filepath, model):
checkpoint = torch.load(filepath)
model.load_state_dict(checkpoint['state_dict'])
for parameter in model.parameters():
parameter.requires_grad = True
model.train()
return model
Then I define the optimizers for both networks this way:
class MultipleOptimizer(object):
def __init__(self, *op):
self.optimizers = op
def zero_grad(self):
for op in self.optimizers:
op.zero_grad()
def step(self):
for op in self.optimizers:
op.step()
opt = MultipleOptimizer(SGD(model.parameters(), lr=1, momentum=0.9), Adam(logits_net.parameters(), lr=lr))
the reward function is:
#Reward function
def reward(x, act):
#print('action', act)
#print('x type', type(x))
km = KMeans(act, n_init=20, n_jobs=4)
y_pred = km.fit_predict(x.detach().cpu().numpy())# seems we can only get a centre from batch
#print('k-means output type', type(y_pred))
sil_score = sil(x.detach().cpu().numpy(), y_pred)
#print('sil score', sil_score)
return sil_score
The architecture of the second neural net and an alternative to avoid (logits=logits.mean(0)):
def mlp(sizes, activation=nn.Tanh, output_activation=nn.Identity):
# Build a feedforward neural network. outputs are the logits
layers = []
for j in range(len(sizes)-1):
act = activation if j < len(sizes)-2 else output_activation
layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
return nn.Sequential(*layers)
class mlp2(torch.nn.Module):
def __init__(self):
super(mlp2, self).__init__()
self.linear1 = nn.Linear(10,100)
self.relu1 = nn.ReLU(inplace=True)
self.linear2 = torch.nn.Linear(100,100)
self.linear3 = torch.nn.Linear(100,20)
self.linear4 = torch.nn.Linear(2000,100)
self.ident = nn.Identity()
def forward(self, x):
a = self.linear1(x)
a = self.relu1(a)
a = self.linear2(a)
a = self.relu1(a)
a = self.linear3(a)
a = torch.flatten(a)
a = self.linear4(a)
a = self.relu1(a)
a = self.linear3(a)
out = self.ident(a)
return out
Loss is calculated as in the following order:
def get_policy(obs):
logits = logits_net(obs)
return Categorical(logits=logits.mean(0))
def get_action(obs):
return get_policy(obs).sample().item()
def Logp(obs, act):
logp = get_policy(obs).log_prob(act.cuda())
return logp
def compute_loss(logp, weights):
return -(logp * weights).mean()
def train_one_epoch():
# make some empty lists for logging.
batch_obs = [] # for observations
batch_acts = [] # for actions
batch_weights = [] # for R(tau) weighting in policy gradient
batch_logp = []
# reset episode-specific variables
j = 1 # signal from environment that episode is over
ep_rews = [] # list for rewards accrued throughout ep
for i, data in enumerate(train_loader):
#Create the mean image out of those 100 images
x, label = data
x = model(x.cuda())#torch.Size([100, 10])
obs = x.data.cpu().numpy()#[100, 10] - a trajectory with only one state
# Save obs
batch_obs.append(obs.copy())
#act in the environment
#act = get_action(torch.as_tensor(obs, dtype=torch.float32))
act = get_action(x)
print('action type', type(act))
#log probability
#logp = Logp(torch.as_tensor(obs, dtype=torch.float32),act = torch.as_tensor(act, dtype=torch.int32))
logp = Logp(x, act = torch.as_tensor(act, dtype=torch.int32))
#rew = reward(obs, act+2)
rew = reward(x, act+2)
# save action, reward
batch_acts.append(act)
batch_weights.append(rew)#episode rewards
batch_logp.append(logp)
opt.zero_grad()
batch_logp = torch.stack(batch_logp, dim=0)
batch_loss = compute_loss(logp = torch.as_tensor(batch_logp, dtype=torch.float32),
weights = torch.as_tensor(batch_weights, dtype=torch.float32))
batch_loss.backward() #does it return anything? gradients? print them!
opt.step()
for name, param in logits_net.named_parameters():
print(name, param.grad)
I applied some changes with the assumption that maybe recreating some of the tensors maybe the issue:
I have the output of the first network, obs, converted like obs = x.data.cpu().numpy() this and then sent to get_action function: act = get_action(torch.as_tensor(obs, dtype=torch.float32)). I changes this to act = get_action(x) so, x is sent directly to this function. Also, change arguments of logp to logp = Logp(x, act = torch.as_tensor(act, dtype=torch.int32)).
After these changes, I still get the none value for the gradient. Is there anyway possible to backpropagate the gradient when loss is calculated this way? any changes that I can apply?
any help is appreciated.

I define a loss function but backward present error to me could someone tell me how to fix it

class loss(Function):
#staticmethod
def forward(ctx,x,INPUT):
batch_size = x.shape[0]
X = x.detach().numpy()
input = INPUT.detach().numpy()
Loss = 0
for i in range(batch_size):
t_R_r = input[i,0:4]
R_r = t_R_r[np.newaxis,:]
t_R_i = input[i,4:8]
R_i = t_R_i[np.newaxis,:]
t_H_r = input[i,8:12]
H_r = t_H_r[np.newaxis,:]
t_H_i = input[i,12:16]
H_i = t_H_i[np.newaxis,:]
t_T_r = input[i, 16:32]
T_r = t_T_r.reshape(4,4)
t_T_i = input[i, 32:48]
T_i = t_T_i.reshape(4,4)
R = np.concatenate((R_r, R_i), axis=1)
H = np.concatenate((H_r, H_i), axis=1)
temp_t1 = np.concatenate((T_r,T_i),axis=1)
temp_t2 = np.concatenate((-T_i,T_r),axis=1)
T = np.concatenate((temp_t1,temp_t2),axis=0)
phi_r = np.zeros((4,4))
row, col = np.diag_indices(4)
phi_r[row,col] = X[i,0:4]
phi_i = np.zeros((4, 4))
row, col = np.diag_indices(4)
phi_i[row, col] = 1 - np.power(X[i, 0:4],2)
temp_phi1 = np.concatenate((phi_r,phi_i),axis=1)
temp_phi2 = np.concatenate((-phi_i, phi_r), axis=1)
phi = np.concatenate((temp_phi1,temp_phi2),axis=0)
temp1 = np.matmul(R,phi)
temp2 = np.matmul(temp1,T) # error
H_hat = H + temp2
t_Q_r = np.zeros((4,4))
t_Q_r[np.triu_indices(4,1)] = X[i,4:10]
Q_r = t_Q_r + t_Q_r.T
row,col = np.diag_indices(4)
Q_r[row,col] = X[i,10:14]
Q_i = np.zeros((4,4))
Q_i[np.triu_indices(4,1)] = X[i,14:20]
Q_i = Q_i - Q_i.T
temp_Q1 = np.concatenate((Q_r,Q_i),axis=1)
temp_Q2 = np.concatenate((-Q_i,Q_r),axis=1)
Q = np.concatenate((temp_Q1,temp_Q2),axis=0)
t_H_hat_r = H_hat[0,0:4]
H_hat_r = t_H_hat_r[np.newaxis,:]
t_H_hat_i= H_hat[0,4:8]
H_hat_i = t_H_hat_i[np.newaxis,:]
temp_H1 = np.concatenate((-H_hat_i.T,H_hat_r.T),axis=0)
H_hat_H = np.concatenate((H_hat.T,temp_H1),axis=1)
temp_result1 = np.matmul(H_hat,Q)
temp_result2 = np.matmul(temp_result1,H_hat_H)
Loss += np.log10(1+temp_result2[0][0])
Loss = t.from_numpy(np.array(Loss / batch_size))
return Loss
#staticmethod
def backward(ctx,grad_output):
print('gradient')
return grad_output
def criterion(output,input):
return loss.apply(output,input)
This is my loss function. But it present the error:
Traceback (most recent call last):
File "/Users/mrfang/channel_capacity/training.py", line 24, in
loss.backward() File "/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/tensor.py",
line 150, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph) File
"/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/autograd/init.py",
line 99, in backward
allow_unreachable=True) # allow_unreachable flag RuntimeError: function lossBackward returned an incorrect number of gradients
(expected 2, got 1)
How could I fix it. Thanks very much
Your forward(ctx,x,INPUT) takes two inputs, x and INPUT, thus backward should output two gradients as well, grad_x and grad_INPUT.
In addition, in your snippet, you're not really computing a custom gradient, so you could compute that with Pytorch's autograd, without having to define a special Function.
If this is working code and you're going to define the custom loss, here's a quick boilerplate of what backward should comprise:
#staticmethod
def forward(ctx, x, INPUT):
# this is required so they're available during the backwards call
ctx.save_for_backward(x, INPUT)
# custom forward
#staticmethod
def backward(ctx, grad_output):
x, INPUT = ctx.saved_tensors
grad_x = grad_INPUT = None
# compute grad here
return grad_x, grad_INPUT
You don't need to return gradients for inputs that don't require it, thus you can return None for them.
More info here and here.

How to calculate backpropagation through tf.while_loop to use as loss function

I want to implement an Fourier Ring Correlation Loss for two images to train a GAN. Therefore I'd like to loop over a specific amount of times and calculate the loss. This works fine for a normal Python loop. To speed up the process I want to use the tf.while_loop but unfortunately I am not able to track the gradients through my while loop. I constructed a dummy example just to calculate gradients during a while loop but it doesn't work. First, the working python loop :
x = tf.constant(3.0)
y = tf.constant(2.0)
for i in range(3):
y = y * x
grad = tf.gradients(y, x)
with tf.Session() as ses:
print("output : ", ses.run(grad))
This works and gives the output
[54]
If i do the same with a tf.while_loop it doesn't work:
a = tf.constant(0, dtype = tf.int64)
b = tf.constant(3, dtype = tf.int64)
x = tf.constant(3.0)
y = tf.constant(2.0)
def cond(a,b,x,y):
return tf.less(a,b)
def body(a,b,x,y):
y = y * x
with tf.control_dependencies([y]):
a = a + 1
return [a,b,x,y]
results = tf.while_loop(cond, body, [a,b,x,y], back_prop = True)
grad = tf.gradients(y, results[2])
with tf.Session() as ses:
print("grad : ", ses.run(grad))
The output is :
TypeError: Fetch argument None has invalid type '<'class 'NoneType'>
So I guess somehow tensorflow is not able to do the backpropagation.
The problem still accours if you use tf.GradientTape() instead of tf.gradients().
I changed the code so that it now outputs the gradients:
import tensorflow as tf
a = tf.constant(0, dtype = tf.int64)
b = tf.constant(3, dtype = tf.int64)
x = tf.Variable(3.0, tf.float32)
y = tf.Variable(2.0, tf.float32)
dy = tf.Variable(0.0, tf.float32)
def cond(a,b,x,y,dy):
return tf.less(a,b)
def body(a,b,x,y,dy):
y = y * x
dy = tf.gradients(y, x)[0]
with tf.control_dependencies([y]):
a = a + 1
return [a,b,x,y,dy]
init = tf.global_variables_initializer()
with tf.Session() as ses:
ses.run(init)
results = ses.run(tf.while_loop(cond, body, [a,b,x,y,dy], back_prop = True))
print("grad : ", results[-1])
The things I modified:
I made x and y into variables and added their initialisation init.
I added a variable called dy which will contain the gradient of y.
I moved the tf.while_loop inside the session.
Put the evaluation of the gradient inside the body function
I think the problem before was that when you define grad = tf.gradients(y, results[2]) the loop has not run yet, so y is not a function of x. Therefore, there is no gradient.
Hope this helps.

SessionRunHook returning empty SessionRunValues after run

I'm trying to write a hook that will allow me to compute some global metrics (rather than batch-wise metrics). To prototype, I thought I'd get a simple hook up and running that would capture and remember true positives. It looks like this:
class TPHook(tf.train.SessionRunHook):
def after_create_session(self, session, coord):
print("Starting Hook")
tp_name = 'metrics/f1_macro/TP'
self.tp = []
self.args = session.graph.get_operation_by_name(tp_name)
print(f"Got Args: {self.args}")
def before_run(self, run_context):
print("Starting Before Run")
return tf.train.SessionRunArgs(self.args)
def after_run(self, run_context, run_values):
print("After Run")
print(f"Got Values: {run_values.results}")
However, the values returned in the "after_run" part of the hook are always None. I tested this in both the train and evaluation phase. Am I misunderstanding something about how the SessionRunHooks are supposed to work?
Maybe relevant information:
The model was build in keras and converted to an estimator with the keras.estimator.model_to_estimator() function. The model has been tested and works fine, and the op that I'm trying to retrieve in the hook is defined in this code block:
def _f1_macro_vector(y_true, y_pred):
"""Computes the F1-score with Macro averaging.
Arguments:
y_true {tf.Tensor} -- Ground-truth labels
y_pred {tf.Tensor} -- Predicted labels
Returns:
tf.Tensor -- The computed F1-Score
"""
y_true = K.cast(y_true, tf.float64)
y_pred = K.cast(y_pred, tf.float64)
TP = tf.reduce_sum(y_true * K.round(y_pred), axis=0, name='TP')
FN = tf.reduce_sum(y_true * (1 - K.round(y_pred)), axis=0, name='FN')
FP = tf.reduce_sum((1 - y_true) * K.round(y_pred), axis=0, name='FP')
prec = TP / (TP + FP)
rec = TP / (TP + FN)
# Convert NaNs to Zero
prec = tf.where(tf.is_nan(prec), tf.zeros_like(prec), prec)
rec = tf.where(tf.is_nan(rec), tf.zeros_like(rec), rec)
f1 = 2 * (prec * rec) / (prec + rec)
# Convert NaN to Zero
f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
return f1
In case anyone runs into the same problem, I found out how to restructure the program so that it worked. Although the documentation makes it sound like I can pass raw ops into the SessionRunArgs, it seems like it requires actual tensors (maybe this is a misreading on my part).
This is pretty easy to accomplish - I just changed the after_create_session code to what's shown below.
def after_create_session(self, session, coord):
tp_name = 'metrics/f1_macro/TP'
self.tp = []
tp_tensor = session.graph.get_tensor_by_name(tp_name+':0')
self.args = [tp_tensor]
And this successfully runs.

Having trouble extracting the string value from a tensor with datatype tf.string

I'm writing an NN which requires text (as a string) to be fed in as a placeholder in Tensorflow. I'm having trouble figuring out how to extract the string from the placeholder, which must hold a tensor object. I tried initializing and interactive session and then calling placeholder.eval(), but I got an error because in the initial run, before the text is fed into the placeholder, I got an error because the placeholder was empty. Can anyone give me any pointers on how to do this?
Here's my code for reference.
def train_1(self):
real_image_size = 256
text_input = tf.placeholder(dtype = tf.string)
real_image = tf.placeholder(dtype = tf.float32, shape = (real_image_size, real_image_size, 3))
text_input = text_input[0][0]
all_captions = self.caption_arr
rand_idx = np.random.random()*11788
fake_caption = all_captions[int(rand_idx)]
while text_input == fake_caption:
rand_idx = np.random.random()*len(captions)
fake_caption = all_captions[rand_idx]
fake_image_size = 64
fake_image = self.generator_1(text_input)
real_result_real_caption = discriminator_1(real_image, text_input)
real_result_fake_caption = discriminator_1(real_image, fake_caption)
fake_result = discriminator_1(fake_image, text_input)
dis_loss = tf.reduce_mean(real_result_fake_caption) + tf.reduce_mean(fake_result) - tf.reduce_mean(real_result_real_caption)
gen_loss = -tf.reduce_mean(fake_result)
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if 'dis' in var.name]
g_vars = [var for var in t_vars if 'gen' in var.name]
trainer_dis = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(d_loss, var_list = d_vars)
trainer_gen = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(g_loss, var_list = g_vars)
# sess = tf.InteractiveSession()
# sess.run(tf.local_variables_initializer())
# sess.run(tf.global_variables_initializer())
# text_input = text_input.eval({text_input : [[""]]})
with tf.Session() as sess:
batch_size = 1
num_of_imgs = 11788
num_epochs = 1000 #adjust if necessary
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
print('Start Training::: ')
for i in range(num_epochs):
print(str(i) + 'th epoch: ')
feeder = pr.FeedExamples()
num_of_batches = int(num_of_imgs/batch_size)
for j in range(num_of_batches):
#Training the Discriminator.
for k in range(5):
train_data = feeder.next_example()
train_image = train_data[0]
txt = train_data[1]
feed_txt = tf.constant([[txt]])
_, dLoss = sess.run([dis_loss, trainer_dis],
feed_dict = {text_input : feed_txt, real_image : train_image})
#Training the Generator.
for k in range(1):
train_data = feeder.curr_example()
train_image = train_data[0]
txt = train_data[1]
_, gLoss = sess.run([gen_loss, trainer_gen],
feed_dict = {text_input : tf.constant([[txt]]), real_image : train_image})
print('Discriminator Loss: ' + str(dLoss))
print('Generator Loss: ' + str(gLoss))
To answer your question:
https://www.tensorflow.org/api_docs/python/tf/placeholder
Inserts a placeholder for a tensor that will be always fed.
Important: This tensor will produce an error if evaluated. Its value
must be fed using the feed_dict optional argument to Session.run(),
Tensor.eval(), or Operation.run().
A placeholder does not have a value other than the value you feed in. That is the difference to a variable.
Although a variable wouldn't make much sense in your case since you are talking about the input. Therefore it is not clear what you are actually trying to achieve.
I would suggest to reduce the example to a minimal example (e.g. single placeholder, variable or operation). It will also help you to understand TensorFlow better.

Resources