I am new to tensorflow. The following partial code is throwing a AttributeError: 'NoneType' object has no attribute 'dtype'. Could someone point out what I am doing wrong and how to correctly code this?
I realize the problem is when I am trying to minimize the 'loss' within the train_function. I was assuming that during the interactive session, s, the batch_x_train and batch_y_train will be passed to the function call for 'optimizer', which calls the function 'binary_logloss', passing in the batch_y_train values. I know how to make this work without tensorflow, but I am bit confused when the graphs are involved.
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,num_classes])
predicted_y = tf.sigmoid(tf.matmul(input_X, weights) + b)
def binary_logloss(true_y):
if true_y ==1.:
return tf.reduce_mean(tf.reduce_sum(-tf.log(predicted_y) , axis=1))
elif true_y == 0.:
return tf.reduce_mean(tf.reduce_sum(-tf.log(1-predicted_y) , axis=1))
def train_function(X, y):
loss = binary_logloss(input_y)
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
_, c = s.run([optimizer, loss], {input_X:X, input_y:y})
return _, c
s.run(tf.global_variables_initializer())
for epoch in epochs:
_, c = train_function(batch_x_train, batch_y_train)
You have to refactor the code like this and implement a batching mechanism and I think it should execute.
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,num_classes])
predicted_y = tf.sigmoid(tf.matmul(input_X, weights) + b)
loss = tf.reduce_mean(-(input_y * tf.log(predicted_y) + (1 - input_y) * tf.log(1 - predicted_y)))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
init = (tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as s:
s.run(init)
s.run(tf.global_variables_initializer())
for epoch in epochs:
# batch_x_train, batch_y_train = Batching code
_, c = train_function(batch_x_train, batch_y_train)
Related
I have two networks. The output of the first network is the input to the other. In order to calculate the loss for the second network, I use vanilla policy gradient. I want to backpropagate this loss into the first network. After checking if the gradeints has changed, I see that they are all none.
I first load the first network (a pre-trained autoencoer in my network this way):
def load_checkpoint(filepath, model):
checkpoint = torch.load(filepath)
model.load_state_dict(checkpoint['state_dict'])
for parameter in model.parameters():
parameter.requires_grad = True
model.train()
return model
Then I define the optimizers for both networks this way:
class MultipleOptimizer(object):
def __init__(self, *op):
self.optimizers = op
def zero_grad(self):
for op in self.optimizers:
op.zero_grad()
def step(self):
for op in self.optimizers:
op.step()
opt = MultipleOptimizer(SGD(model.parameters(), lr=1, momentum=0.9), Adam(logits_net.parameters(), lr=lr))
the reward function is:
#Reward function
def reward(x, act):
#print('action', act)
#print('x type', type(x))
km = KMeans(act, n_init=20, n_jobs=4)
y_pred = km.fit_predict(x.detach().cpu().numpy())# seems we can only get a centre from batch
#print('k-means output type', type(y_pred))
sil_score = sil(x.detach().cpu().numpy(), y_pred)
#print('sil score', sil_score)
return sil_score
The architecture of the second neural net and an alternative to avoid (logits=logits.mean(0)):
def mlp(sizes, activation=nn.Tanh, output_activation=nn.Identity):
# Build a feedforward neural network. outputs are the logits
layers = []
for j in range(len(sizes)-1):
act = activation if j < len(sizes)-2 else output_activation
layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
return nn.Sequential(*layers)
class mlp2(torch.nn.Module):
def __init__(self):
super(mlp2, self).__init__()
self.linear1 = nn.Linear(10,100)
self.relu1 = nn.ReLU(inplace=True)
self.linear2 = torch.nn.Linear(100,100)
self.linear3 = torch.nn.Linear(100,20)
self.linear4 = torch.nn.Linear(2000,100)
self.ident = nn.Identity()
def forward(self, x):
a = self.linear1(x)
a = self.relu1(a)
a = self.linear2(a)
a = self.relu1(a)
a = self.linear3(a)
a = torch.flatten(a)
a = self.linear4(a)
a = self.relu1(a)
a = self.linear3(a)
out = self.ident(a)
return out
Loss is calculated as in the following order:
def get_policy(obs):
logits = logits_net(obs)
return Categorical(logits=logits.mean(0))
def get_action(obs):
return get_policy(obs).sample().item()
def Logp(obs, act):
logp = get_policy(obs).log_prob(act.cuda())
return logp
def compute_loss(logp, weights):
return -(logp * weights).mean()
def train_one_epoch():
# make some empty lists for logging.
batch_obs = [] # for observations
batch_acts = [] # for actions
batch_weights = [] # for R(tau) weighting in policy gradient
batch_logp = []
# reset episode-specific variables
j = 1 # signal from environment that episode is over
ep_rews = [] # list for rewards accrued throughout ep
for i, data in enumerate(train_loader):
#Create the mean image out of those 100 images
x, label = data
x = model(x.cuda())#torch.Size([100, 10])
obs = x.data.cpu().numpy()#[100, 10] - a trajectory with only one state
# Save obs
batch_obs.append(obs.copy())
#act in the environment
#act = get_action(torch.as_tensor(obs, dtype=torch.float32))
act = get_action(x)
print('action type', type(act))
#log probability
#logp = Logp(torch.as_tensor(obs, dtype=torch.float32),act = torch.as_tensor(act, dtype=torch.int32))
logp = Logp(x, act = torch.as_tensor(act, dtype=torch.int32))
#rew = reward(obs, act+2)
rew = reward(x, act+2)
# save action, reward
batch_acts.append(act)
batch_weights.append(rew)#episode rewards
batch_logp.append(logp)
opt.zero_grad()
batch_logp = torch.stack(batch_logp, dim=0)
batch_loss = compute_loss(logp = torch.as_tensor(batch_logp, dtype=torch.float32),
weights = torch.as_tensor(batch_weights, dtype=torch.float32))
batch_loss.backward() #does it return anything? gradients? print them!
opt.step()
for name, param in logits_net.named_parameters():
print(name, param.grad)
I applied some changes with the assumption that maybe recreating some of the tensors maybe the issue:
I have the output of the first network, obs, converted like obs = x.data.cpu().numpy() this and then sent to get_action function: act = get_action(torch.as_tensor(obs, dtype=torch.float32)). I changes this to act = get_action(x) so, x is sent directly to this function. Also, change arguments of logp to logp = Logp(x, act = torch.as_tensor(act, dtype=torch.int32)).
After these changes, I still get the none value for the gradient. Is there anyway possible to backpropagate the gradient when loss is calculated this way? any changes that I can apply?
any help is appreciated.
class loss(Function):
#staticmethod
def forward(ctx,x,INPUT):
batch_size = x.shape[0]
X = x.detach().numpy()
input = INPUT.detach().numpy()
Loss = 0
for i in range(batch_size):
t_R_r = input[i,0:4]
R_r = t_R_r[np.newaxis,:]
t_R_i = input[i,4:8]
R_i = t_R_i[np.newaxis,:]
t_H_r = input[i,8:12]
H_r = t_H_r[np.newaxis,:]
t_H_i = input[i,12:16]
H_i = t_H_i[np.newaxis,:]
t_T_r = input[i, 16:32]
T_r = t_T_r.reshape(4,4)
t_T_i = input[i, 32:48]
T_i = t_T_i.reshape(4,4)
R = np.concatenate((R_r, R_i), axis=1)
H = np.concatenate((H_r, H_i), axis=1)
temp_t1 = np.concatenate((T_r,T_i),axis=1)
temp_t2 = np.concatenate((-T_i,T_r),axis=1)
T = np.concatenate((temp_t1,temp_t2),axis=0)
phi_r = np.zeros((4,4))
row, col = np.diag_indices(4)
phi_r[row,col] = X[i,0:4]
phi_i = np.zeros((4, 4))
row, col = np.diag_indices(4)
phi_i[row, col] = 1 - np.power(X[i, 0:4],2)
temp_phi1 = np.concatenate((phi_r,phi_i),axis=1)
temp_phi2 = np.concatenate((-phi_i, phi_r), axis=1)
phi = np.concatenate((temp_phi1,temp_phi2),axis=0)
temp1 = np.matmul(R,phi)
temp2 = np.matmul(temp1,T) # error
H_hat = H + temp2
t_Q_r = np.zeros((4,4))
t_Q_r[np.triu_indices(4,1)] = X[i,4:10]
Q_r = t_Q_r + t_Q_r.T
row,col = np.diag_indices(4)
Q_r[row,col] = X[i,10:14]
Q_i = np.zeros((4,4))
Q_i[np.triu_indices(4,1)] = X[i,14:20]
Q_i = Q_i - Q_i.T
temp_Q1 = np.concatenate((Q_r,Q_i),axis=1)
temp_Q2 = np.concatenate((-Q_i,Q_r),axis=1)
Q = np.concatenate((temp_Q1,temp_Q2),axis=0)
t_H_hat_r = H_hat[0,0:4]
H_hat_r = t_H_hat_r[np.newaxis,:]
t_H_hat_i= H_hat[0,4:8]
H_hat_i = t_H_hat_i[np.newaxis,:]
temp_H1 = np.concatenate((-H_hat_i.T,H_hat_r.T),axis=0)
H_hat_H = np.concatenate((H_hat.T,temp_H1),axis=1)
temp_result1 = np.matmul(H_hat,Q)
temp_result2 = np.matmul(temp_result1,H_hat_H)
Loss += np.log10(1+temp_result2[0][0])
Loss = t.from_numpy(np.array(Loss / batch_size))
return Loss
#staticmethod
def backward(ctx,grad_output):
print('gradient')
return grad_output
def criterion(output,input):
return loss.apply(output,input)
This is my loss function. But it present the error:
Traceback (most recent call last):
File "/Users/mrfang/channel_capacity/training.py", line 24, in
loss.backward() File "/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/tensor.py",
line 150, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph) File
"/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/autograd/init.py",
line 99, in backward
allow_unreachable=True) # allow_unreachable flag RuntimeError: function lossBackward returned an incorrect number of gradients
(expected 2, got 1)
How could I fix it. Thanks very much
Your forward(ctx,x,INPUT) takes two inputs, x and INPUT, thus backward should output two gradients as well, grad_x and grad_INPUT.
In addition, in your snippet, you're not really computing a custom gradient, so you could compute that with Pytorch's autograd, without having to define a special Function.
If this is working code and you're going to define the custom loss, here's a quick boilerplate of what backward should comprise:
#staticmethod
def forward(ctx, x, INPUT):
# this is required so they're available during the backwards call
ctx.save_for_backward(x, INPUT)
# custom forward
#staticmethod
def backward(ctx, grad_output):
x, INPUT = ctx.saved_tensors
grad_x = grad_INPUT = None
# compute grad here
return grad_x, grad_INPUT
You don't need to return gradients for inputs that don't require it, thus you can return None for them.
More info here and here.
I want to implement an Fourier Ring Correlation Loss for two images to train a GAN. Therefore I'd like to loop over a specific amount of times and calculate the loss. This works fine for a normal Python loop. To speed up the process I want to use the tf.while_loop but unfortunately I am not able to track the gradients through my while loop. I constructed a dummy example just to calculate gradients during a while loop but it doesn't work. First, the working python loop :
x = tf.constant(3.0)
y = tf.constant(2.0)
for i in range(3):
y = y * x
grad = tf.gradients(y, x)
with tf.Session() as ses:
print("output : ", ses.run(grad))
This works and gives the output
[54]
If i do the same with a tf.while_loop it doesn't work:
a = tf.constant(0, dtype = tf.int64)
b = tf.constant(3, dtype = tf.int64)
x = tf.constant(3.0)
y = tf.constant(2.0)
def cond(a,b,x,y):
return tf.less(a,b)
def body(a,b,x,y):
y = y * x
with tf.control_dependencies([y]):
a = a + 1
return [a,b,x,y]
results = tf.while_loop(cond, body, [a,b,x,y], back_prop = True)
grad = tf.gradients(y, results[2])
with tf.Session() as ses:
print("grad : ", ses.run(grad))
The output is :
TypeError: Fetch argument None has invalid type '<'class 'NoneType'>
So I guess somehow tensorflow is not able to do the backpropagation.
The problem still accours if you use tf.GradientTape() instead of tf.gradients().
I changed the code so that it now outputs the gradients:
import tensorflow as tf
a = tf.constant(0, dtype = tf.int64)
b = tf.constant(3, dtype = tf.int64)
x = tf.Variable(3.0, tf.float32)
y = tf.Variable(2.0, tf.float32)
dy = tf.Variable(0.0, tf.float32)
def cond(a,b,x,y,dy):
return tf.less(a,b)
def body(a,b,x,y,dy):
y = y * x
dy = tf.gradients(y, x)[0]
with tf.control_dependencies([y]):
a = a + 1
return [a,b,x,y,dy]
init = tf.global_variables_initializer()
with tf.Session() as ses:
ses.run(init)
results = ses.run(tf.while_loop(cond, body, [a,b,x,y,dy], back_prop = True))
print("grad : ", results[-1])
The things I modified:
I made x and y into variables and added their initialisation init.
I added a variable called dy which will contain the gradient of y.
I moved the tf.while_loop inside the session.
Put the evaluation of the gradient inside the body function
I think the problem before was that when you define grad = tf.gradients(y, results[2]) the loop has not run yet, so y is not a function of x. Therefore, there is no gradient.
Hope this helps.
I'm trying to write a hook that will allow me to compute some global metrics (rather than batch-wise metrics). To prototype, I thought I'd get a simple hook up and running that would capture and remember true positives. It looks like this:
class TPHook(tf.train.SessionRunHook):
def after_create_session(self, session, coord):
print("Starting Hook")
tp_name = 'metrics/f1_macro/TP'
self.tp = []
self.args = session.graph.get_operation_by_name(tp_name)
print(f"Got Args: {self.args}")
def before_run(self, run_context):
print("Starting Before Run")
return tf.train.SessionRunArgs(self.args)
def after_run(self, run_context, run_values):
print("After Run")
print(f"Got Values: {run_values.results}")
However, the values returned in the "after_run" part of the hook are always None. I tested this in both the train and evaluation phase. Am I misunderstanding something about how the SessionRunHooks are supposed to work?
Maybe relevant information:
The model was build in keras and converted to an estimator with the keras.estimator.model_to_estimator() function. The model has been tested and works fine, and the op that I'm trying to retrieve in the hook is defined in this code block:
def _f1_macro_vector(y_true, y_pred):
"""Computes the F1-score with Macro averaging.
Arguments:
y_true {tf.Tensor} -- Ground-truth labels
y_pred {tf.Tensor} -- Predicted labels
Returns:
tf.Tensor -- The computed F1-Score
"""
y_true = K.cast(y_true, tf.float64)
y_pred = K.cast(y_pred, tf.float64)
TP = tf.reduce_sum(y_true * K.round(y_pred), axis=0, name='TP')
FN = tf.reduce_sum(y_true * (1 - K.round(y_pred)), axis=0, name='FN')
FP = tf.reduce_sum((1 - y_true) * K.round(y_pred), axis=0, name='FP')
prec = TP / (TP + FP)
rec = TP / (TP + FN)
# Convert NaNs to Zero
prec = tf.where(tf.is_nan(prec), tf.zeros_like(prec), prec)
rec = tf.where(tf.is_nan(rec), tf.zeros_like(rec), rec)
f1 = 2 * (prec * rec) / (prec + rec)
# Convert NaN to Zero
f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
return f1
In case anyone runs into the same problem, I found out how to restructure the program so that it worked. Although the documentation makes it sound like I can pass raw ops into the SessionRunArgs, it seems like it requires actual tensors (maybe this is a misreading on my part).
This is pretty easy to accomplish - I just changed the after_create_session code to what's shown below.
def after_create_session(self, session, coord):
tp_name = 'metrics/f1_macro/TP'
self.tp = []
tp_tensor = session.graph.get_tensor_by_name(tp_name+':0')
self.args = [tp_tensor]
And this successfully runs.
I'm writing an NN which requires text (as a string) to be fed in as a placeholder in Tensorflow. I'm having trouble figuring out how to extract the string from the placeholder, which must hold a tensor object. I tried initializing and interactive session and then calling placeholder.eval(), but I got an error because in the initial run, before the text is fed into the placeholder, I got an error because the placeholder was empty. Can anyone give me any pointers on how to do this?
Here's my code for reference.
def train_1(self):
real_image_size = 256
text_input = tf.placeholder(dtype = tf.string)
real_image = tf.placeholder(dtype = tf.float32, shape = (real_image_size, real_image_size, 3))
text_input = text_input[0][0]
all_captions = self.caption_arr
rand_idx = np.random.random()*11788
fake_caption = all_captions[int(rand_idx)]
while text_input == fake_caption:
rand_idx = np.random.random()*len(captions)
fake_caption = all_captions[rand_idx]
fake_image_size = 64
fake_image = self.generator_1(text_input)
real_result_real_caption = discriminator_1(real_image, text_input)
real_result_fake_caption = discriminator_1(real_image, fake_caption)
fake_result = discriminator_1(fake_image, text_input)
dis_loss = tf.reduce_mean(real_result_fake_caption) + tf.reduce_mean(fake_result) - tf.reduce_mean(real_result_real_caption)
gen_loss = -tf.reduce_mean(fake_result)
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if 'dis' in var.name]
g_vars = [var for var in t_vars if 'gen' in var.name]
trainer_dis = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(d_loss, var_list = d_vars)
trainer_gen = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(g_loss, var_list = g_vars)
# sess = tf.InteractiveSession()
# sess.run(tf.local_variables_initializer())
# sess.run(tf.global_variables_initializer())
# text_input = text_input.eval({text_input : [[""]]})
with tf.Session() as sess:
batch_size = 1
num_of_imgs = 11788
num_epochs = 1000 #adjust if necessary
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
print('Start Training::: ')
for i in range(num_epochs):
print(str(i) + 'th epoch: ')
feeder = pr.FeedExamples()
num_of_batches = int(num_of_imgs/batch_size)
for j in range(num_of_batches):
#Training the Discriminator.
for k in range(5):
train_data = feeder.next_example()
train_image = train_data[0]
txt = train_data[1]
feed_txt = tf.constant([[txt]])
_, dLoss = sess.run([dis_loss, trainer_dis],
feed_dict = {text_input : feed_txt, real_image : train_image})
#Training the Generator.
for k in range(1):
train_data = feeder.curr_example()
train_image = train_data[0]
txt = train_data[1]
_, gLoss = sess.run([gen_loss, trainer_gen],
feed_dict = {text_input : tf.constant([[txt]]), real_image : train_image})
print('Discriminator Loss: ' + str(dLoss))
print('Generator Loss: ' + str(gLoss))
To answer your question:
https://www.tensorflow.org/api_docs/python/tf/placeholder
Inserts a placeholder for a tensor that will be always fed.
Important: This tensor will produce an error if evaluated. Its value
must be fed using the feed_dict optional argument to Session.run(),
Tensor.eval(), or Operation.run().
A placeholder does not have a value other than the value you feed in. That is the difference to a variable.
Although a variable wouldn't make much sense in your case since you are talking about the input. Therefore it is not clear what you are actually trying to achieve.
I would suggest to reduce the example to a minimal example (e.g. single placeholder, variable or operation). It will also help you to understand TensorFlow better.