What is causing tensorflow retracing? - python-3.x
Here is my code for factorization
I am trying to run some part of my iterate function with TPUs and I am getting retracing error ? If I am making an obvious mistake I am sorry. update_xu and update_yi is mostly matrix multiplication and matrix inversion. Hence I am try to run them in strategy.scope() ?. Eventually some kind of memory leakage or something
def loss(R, X, Y, C, lmda_x, lmda_y):
'''
returns the MSE of the weighted least squares plus L2 regularisation error
'''
Error = R - tf.matmul(X, Y, transpose_a = True)
Error = Error * Error
Error = C * Error
Reg = 0 + tf.math.reduce_sum(X * X * lmda_x) + tf.math.reduce_sum(Y * Y * lmda_y)
return Reg + tf.math.reduce_sum(Error) * 1 / (Error.shape[0] * Error.shape[1])
def update_xu(Ru, Y, Cuser, lmda):
'''
input
column vector Ru,
k x m matrix Y,
m x m matrix Cuser and
lmda_x
output
the updated user row vector (xu) by making Y matrix constant
'''
Ru = tf.reshape(Ru, shape = [Y.shape[1], 1])
C = Cuser # Ru
inverse = tf.linalg.inv(Y # Cuser # tf.transpose(Y) + lmda * tf.eye(Y.shape[0]))
ans = (inverse # Y # C)
return tf.reshape(ans, [Y.shape[0]])
def update_yi(Ri, X, Citem, lmda):
'''
input
column vector Ri,
k x n matrix X,
n x n matrix Citem and
lmda_y
output
the updated user row vector (yi) by making X matrix constant
'''
Ri = tf.reshape(Ri, shape = [X.shape[1], 1])
C = Citem # Ri
inverse = tf.linalg.inv(X # Citem # tf.transpose(X) + lmda * tf.eye(X.shape[0]))
ans = inverse # X # C
return tf.reshape(ans, [X.shape[0]])
def iterate(R, X, Y, C, lmda_x, lmda_y, epochs):
'''
returns approximately updated X and Y such R = X(Y.T) with WALS algorithm
'''
with strategy.scope():
for _ in range(epochs):
Xtt = tf.vectorized_map(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C))
#Xtt = tf.map_fn(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C), dtype = tf.TensorSpec([Y.shape[0]], dtype = tf.float32), parallel_iterations=6)
print(Xtt.shape)
X = tf.transpose(Xtt)
R, C = tf.transpose(R), tf.transpose(C)
Ytt = tf.vectorized_map(lambda x: update_yi(x[0], X, tf.linalg.diag(x[1]), lmda_y), (R, C))
#Ytt = tf.map_fn(lambda x: update_yi(x[0], X, tf.linalg.diag(x[1]), lmda_y), (R, C), dtype = tf.TensorSpec([X.shape[0]], dtype = tf.float32), parallel_iterations=6)
R , C = tf.transpose(R), tf.transpose(C)
print(Ytt.shape)
Y = tf.transpose(Ytt)
return X, Y
def init_weights(R):
chk = tf.where(
tf.math.abs(R) > eps, 1., 0.
)
cnt = tf.constant(
tf.math.reduce_sum(chk, axis = 0), shape = [1, R.shape[1]]
)
cnt = tf.repeat(
cnt, repeats = R.shape[0], axis = 0
)
cnt = tf.reshape(cnt, [R.shape[0], R.shape[1]])
cnt = cnt * chk
cnt = tf.random.normal( R.shape, mean=0.0, stddev=1.0) * cnt
cnt = cnt + tf.random.normal([1], mean = 0.0, stddev = 1.0)
return cnt
def train( R, lmda_x, lmda_y, epochs, embd):
flag = False
loss_train , loss_test, total = 0., 0., 0
loss_train_list, loss_test_list,total_epochs = [], [], []
X, Y = tf.zeros([embd, R.shape[0]]), tf.zeros([embd, R.shape[1]])
C = init_weights(train_data)
for epoch in epochs:
if flag == False:
X, Y = tf.random.normal( [embd, R.shape[0]], mean=0.0, stddev=1.0), tf.random.normal( [embd, R.shape[1]], mean=0.0, stddev=1.0)
X, Y = iterate(R, X, Y, C, lmda_x, lmda_y, epoch)
flag = True
else:
X, Y = iterate(train_data, X, Y, C, lmda_x, lmda_y, epoch)
total += epoch
loss_train = loss(train_data, X, Y, C, lmda_x, lmda_y)
loss_test = loss(test_data, X, Y, C, lmda_x, lmda_y)
loss_train_list.append(loss_train)
loss_test_list.append(loss_test)
total_epochs.append(total)
print(loss_train, loss_test)
plt.plot(total_epochs, loss_train_list, label = "Training", linewidth = 5)
plt.plot(total_epochs, loss_test_list, label = "Test", linewidth = 1)
plt.xticks(fontsize = 10)
plt.title(str(embd)+ ', ' +str(lmda_x) + ',' + str(lmda_y))
plt.yticks(fontsize = 10)
plt.xlim(0, 200)
plt.ylim(0, 1000000000)
plt.xlabel('iterations', fontsize=30);
plt.ylabel('MSE', fontsize=30);
plt.legend(loc='best', fontsize=20);
return X, Y
def grid_search(epochs, embds, lmdas_x, lmdas_y, train_data, test_data):
for lmda_x in lmdas_x:
for lmda_y in lmdas_y:
for embd in embds:
lmda_x , lmda_y , epochs, embd = tf.convert_to_tensor(lmda_x, dtype = tf.float32), tf.convert_to_tensor(lmda_y, dtype =tf.float32), tf.convert_to_tensor(epochs, dtype = tf.int64), tf.convert_to_tensor(embd, dtype =tf.int64)
X, Y = train(train_data, lmda_x, lmda_y, epochs, embd)
Some errors on calling grid search
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7b90> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7b90> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
(943, 1)
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7560> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
WARNING:tensorflow:11 out of the last 11 calls to <function pfor.<locals>.f at 0x7f4dd38f7560> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
(1682, 1)
(943, 1)
ERROR:tensorflow:==================================
Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x7f4e018c73d0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2778, in while_loop
return result File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2726, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv)) File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/map_fn.py", line 507, in compute
return (i + 1, tas) File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/map_fn.py", line 505, in <listcomp>
ta.write(i, value) for (ta, value) in zip(tas, result_value_batchable) File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/tf_should_use.py", line 249, in wrapped
error_in_function=error_in_function)
==================================
InvalidArgumentError Traceback (most recent call last)
<ipython-input-58-a2922620d8dd> in <module>()
7 epochs = [5, 100] #2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]#, 588, 299, 300, 200]
8 #X, Y, C = train(train_data.shape[0], train_data.shape[1], train_data, 0, 0, 2, 2)
----> 9 grid_search(epochs, embds, lmdas, lmdas, train_data, test_data)
3 frames
<ipython-input-57-8cc0dfdb85de> in grid_search(epochs, embds, lmdas_x, lmdas_y, train_data, test_data)
5 plt.figure(figsize = (10,10))
6 lmda_x , lmda_y , epochs, embd = tf.convert_to_tensor(lmda_x, dtype = tf.float32), tf.convert_to_tensor(lmda_y, dtype =tf.float32), tf.convert_to_tensor(epochs, dtype = tf.int64), tf.convert_to_tensor(embd, dtype =tf.int64)
----> 7 X, Y = train(train_data, lmda_x, lmda_y, epochs, embd)
8
9
<ipython-input-56-d70632663530> in train(R, lmda_x, lmda_y, epochs, embd)
88 flag = True
89 else:
---> 90 X, Y = iterate(train_data, X, Y, C, lmda_x, lmda_y, epoch)
91 total += epoch
92 loss_train = loss(train_data, X, Y, C, lmda_x, lmda_y)
<ipython-input-56-d70632663530> in iterate(R, X, Y, C, lmda_x, lmda_y, epochs)
50 Xtt = tf.vectorized_map(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C))
51 #Xtt = tf.map_fn(lambda x: update_xu(x[0], Y, tf.linalg.diag(x[1]), lmda_x), (R, C), dtype = tf.TensorSpec([Y.shape[0]], dtype = tf.float32), parallel_iterations=6)
---> 52 print(Xtt.shape)
53 X = tf.transpose(Xtt)
54 R, C = tf.transpose(R), tf.transpose(C)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py in shape(self)
1173 # `_tensor_shape` is declared and defined in the definition of
1174 # `EagerTensor`, in C.
-> 1175 self._tensor_shape = tensor_shape.TensorShape(self._shape_tuple())
1176 except core._NotOkStatusException as e:
1177 six.raise_from(core._status_to_exception(e.code, e.message), None)
InvalidArgumentError: {{function_node __inference_f_5094764}} Input is not invertible.
[[{{node loop_body/MatrixInverse/pfor/MatrixInverse}}]]
Related
Can we get the Jacobian of functions with vector inputs in PyTorch?
My goal is to understand how to use torch.autograd.grad. Documentation gives a description as torch.autograd.grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False, is_grads_batched=False) Computes and returns the sum of gradients of outputs with respect to the inputs. so I made this derivative function: def derivative(y, t): return torch.autograd.grad(y, t) And can return gradient for y if y is scalar or sum of gradients of each component of y wrt vector t def f(x): return (x**2).sum() x = torch.tensor([1, 2, 3],dtype = torch.float, requires_grad = True) derivative(f(x), x) # returns 2*x def f(x): return (x[0], x[1], x[2], x[0]) x = torch.tensor([1, 2, 3],dtype = torch.float, requires_grad = True) derivative(f(x), x) # returns (tensor([2., 1., 1.]),) But it doesn't work for y values that are tensors in t, nor I can get the full Jacobian. Could you show how to get gradients of tensor outputs wrt to tensor inputs or how to get Jacobian?
Computing Jacobian and Derivative in Tensorflow is extremely slow
Is there a more efficient way to compute Jacobian (there must be, it doesn't even run for a single batch) I want to compute the loss as given in the self-explanatory neural network. Input has a shape of (32, 365, 3) where 32 is the batch size. The loss I want to minimize is Equation 3 of the paper. I believe that I am not using the GradientTape optimally. def compute_loss_theta(tape, parameter, concept, output, x): b = x.shape[0] in_dim = (x.shape[1], x.shape[2]) feature_dim = in_dim[0]*in_dim[1] J = tape.batch_jacobian(concept, x) grad_fx = tape.gradient(output, x) grad_fx = tf.reshape(grad_fx,shape=(b, feature_dim)) J = tf.reshape(J, shape=(b, feature_dim, feature_dim)) parameter = tf.expand_dims(parameter, axis =1) loss_theta_matrix = grad_fx - tf.matmul(parameter, J) loss_theta = tf.norm(loss_theta_matrix) return loss_theta for i in range(10): for x, y in train_dataset: with tf.GradientTape(persistent=True) as tape: tape.watch(x) parameter, concept, output = model(x) loss_theta = compute_loss_theta(tape, parameter, concept, output , x) loss_y = loss_object(y_true=y, y_pred=output) loss_value = loss_y + eps*loss_theta gradients = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(gradients, model.trainable_weights))
Modifying validation function for single image instead of Tencrop
I have a PublicTest function that runs every epoch for validation and there is a transform test variable that transforms the validation data as above: transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), ]) def PublicTest(epoch): global PublicTest_acc global best_PublicTest_acc global best_PublicTest_acc_epoch net.eval() PublicTest_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(PublicTestloader): bs, ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) outputs_avg = outputs.view(bs, ncrops, -1).mean(1) # avg over crops loss = criterion(outputs_avg, targets) PublicTest_loss += loss.item() _, predicted = torch.max(outputs_avg.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() utils.progress_bar(batch_idx, len(PublicTestloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (PublicTest_loss / (batch_idx + 1), 100. * correct / total, correct, total)) I'd like to modify the code so instead of Tencrop each image would be validated once. I changed the transform_test like this transform_test = transforms.Compose([transforms.ToTensor()]) I run the code but had the realized that PublicTest have ncrops as a parameter and received the error: File "mainpro_FER.py", line 147, in PublicTest bs, ncrops, c, h, w = np.shape(inputs) ValueError: not enough values to unpack (expected 5, got 4) removed ncrops parameter in the function, tried again had this error: IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1) So I need a way to make this function work for single image for every images in batch unlike TenCrop. Thanks.
Nevermind guys, I figured out just like this: def PublicTest(epoch): global PublicTest_acc global best_PublicTest_acc global best_PublicTest_acc_epoch net.eval() PublicTest_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(PublicTestloader): bs, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) # outputs_avg = outputs.view(bs, ncrops, -1).mean(1) # avg over crops loss = criterion(outputs, targets) PublicTest_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() utils.progress_bar(batch_idx, len(PublicTestloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (PublicTest_loss / (batch_idx + 1), 100. * correct / total, correct, total))
optimize a function with two bounded matrices as inputs
I am trying to optimise a loss function which takes two inputs "m, d" as inputs. Both of these are (32, 32, 1) matrices. I am not able to figure out how to bound/constrain their values between 0 and 1. "m, d" are filters that I apply to some input being fed into a trained ML model. I have looked at these documentations https://scipy-lectures.org/advanced/mathematical_optimization/index.html#id54 (See Box-Bounds; hyperlink in Chapter Contents) https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html scipy minimize with constraints def lossfunction(MD): m = MD[:, :, 0] d = MD[:, :, 1] x = data[np.argwhere(label != 6)] xt = np.multiply((1 - m), x) + np.multiply(m, d) # Todo: Apply Filter num_examples = xt.shape[0] sess = tf.get_default_session() totalloss = 0 for offset in range(0, num_examples, BATCH_SIZE): batchx, batchy = xt[offset:offset + BATCH_SIZE], (np.ones(BATCH_SIZE) * targetlabel) loss = sess.run(loss_operation, feed_dict={x: batchx, y: batchy, prob: 0.8}) totalloss = totalloss + loss finalloss = totalloss + lam * np.linalg.norm(m, 1) return finalloss optimize.minimize(lossfunction, np.zeros((32, 32, 2)), bounds=((0, 1), (0, 1))) I get this error message: ValueError: length of x0 != length of bounds I understand that the bounds and inputs should be of the same dimensions. Is there a convenient way of inputting the bounds?
Backpropagation from scratch -- having trouble with some matrix multiplications
I'm trying to implement a neural network from scratch and I'm having trouble correctly implementing the backpropagation. I feel like I'm either calling the wrong indexes of a weight/activation or I'm just not implementing the matrix multiplication correctly. I can't get the matrixes to align properly. Please note cost is sum of squared errors. I'm taking the gradient of cost with respect to Z. Instead of posting the entire code, I will just post the relevant parts. My networks are : #[3,5,5,1] 3 inputs, 5 hidden units, 5 hidden units, 1 output unit ##a[-1] = [1,1] ##a[-2] =[1,5] ##a[-3]= [1,5] ##a[-4]= [1,3] #input layer weight shapes as follows : ##weights[-1] = 5,1 ##weights [-2] = 5,5 ##weights [-3] = 3,5 #first layer Zs: ##zs shape: (1, 1) [-1] #last layer z ##zs shape: (1, 5) [-2] ##zs shape: (1, 5) [-3] #first layer z #code for generating data def psuedo_training_data(): input_data = np.random.randint(1, 5, (500,1,3)) labels =np.random.randint(0,2,(500,1)) training_data = [(x,y) for x,y in zip (input_data[:int(len(input_data)*.8)], labels[:int(len(labels)*.8)])] testing_data = [(x,y) for x,y in zip (input_data[int(len(input_data)*.8):], labels[int(len(labels)*.8):])] return training_data, testing_data def sigmoid_derivative(z): return sigmoid(z) * (1-sigmoid(z)) def sigmoid(z): return 1.0/(1.0+np.exp(-z)) def calculate_gradients(weights, biases, x, y): weight_derivatives, bias_derivatives, activations, zs = gather_backprop_data(weights, biases, x,y) #weight derivatives and bias_derivatives are empty shells #gradient of cost with respect to Z of last layer last_layer_z_error = (activations[-1] - y) * sigmoid_derivative(zs[-1]) #updating the weight_derivatives of final layer weight_derivatives[-1] = last_layer_z_error * (activations[-2]).T bias_derivatives[-1] = last_layer_z_error #attempting to get the gradients of the hidden layers z_previous_layer = last_layer_z_error for i in reversed(range(1,len(weights))): print('layer:', i) print('****************************************') print('z_previous_layer.shape', z_previous_layer.shape) print('weights[i-1].shape', weights[i-1].shape) print(' sigmoid_derivative(zs[i-1])',sigmoid_derivative(zs[i-1]).shape) print('******************************************************') z_previous_layer =np.dot(weights[i].T, z_previous_layer)* (sigmoid_derivative(zs[i-1])) print('********************************************') print('activations i -1', activations[i-1].shape) print('z_previous_layer_post.shape', z_previous_layer.shape) weight_derivatives[i-1] = np.dot(z_previous_layer,activations[i-1].T) bias_derivatives[i-1] = z_previous_layer return weight_derivatives, bias_derivatives FSE: ValueError Traceback (most recent call last) <ipython-input-399-ee974769393b> in <module>() ----> 1 stochastic_gradient_decent(training_data,testing_data, 5, 25, NN.weights, NN.biases, NN) <ipython-input-395-e78e37a1fda0> in stochastic_gradient_decent(training_data, testing_data, epochs, mini_batch_size, weights, biases, neural_network) 13 for mini_batch in mini_batches: 14 for x,y in mini_batch: ---> 15 weight_derivatives, bias_derivatives = calculate_gradients(weights, biases, x,y ) 16 17 for i in range(len(weights)): <ipython-input-393-bb458045ab42> in calculate_gradients(weights, biases, x, y) 19 print('******************************************************') 20 ---> 21 z_previous_layer =np.dot(weights[i].T, z_previous_layer)*(sigmoid_derivative(zs[i-1])) 22 23 ValueError: shapes (1,5) and (1,1) not aligned: 5 (dim 1) != 1 (dim 0)