Multiple parameterized metrics in Keras - python-3.x

Let's say I have a custom parameterized metric:
def my_metric_at_k(k):
def my_metric(y_true, y_pred):
'''
Do magic here with k
to get metric_value
'''
return metric_value
return my_metric
I want to evaluate my model with my metric on multiple values of k, say k=1 and k=2, so naturally, I'm inclined to feed [my_metric_at_k(1), my_metric_at_k(2)] into metrics input of the compile method.
When I go to fit my model, I run into this error
NotFoundError: FetchOutputs node metrics/my_metric_1/...: not found
I'm using Keras with a TensorFlow backend.
For a particular example, one metric I use is the following:
def variety_first_k(k):
def variety(y_true, y_pred):
y_pred = K.tf.cast(y_pred, dtype=K.tf.float64)
sort = K.tf.nn.top_k(y_pred, k=k, sorted=True).indices
unique, _ = K.tf.unique(K.flatten(sort))
num_unique = K.shape(unique)[0]
return K.tf.cast(num_unique, dtype=K.tf.float64) /
K.tf.cast(K.tf.shape(y_pred)[1], dtype=K.tf.float64)
return variety
I'm running a multi-label multi-class problem.
A simplified version of my model is:
metrics = [variety_first_k(1), variety_first_k(2)]
inputs = layers.Input(shape=(my_data.shape[1],))
merged_layer = layers.BatchNormalization()(inputs)
merged_layer = layers.Dense(256, activation='relu',
kernel_regularizer=l1_l2(l1=0.5, l2=0.5))(merged_layer)
merged_layer = layers.BatchNormalization()(merged_layer)
predictions = layers.Dense(len(mlb.classes_))(merged_layer)
predictions = layers.Activation('sigmoid')(predictions)
model = keras.Model(inputs=inputs, outputs=predictions)
EPOCHS = 100
INIT_LR = 1e-2
BS = 128
opt = keras.optimizers.Adadelta(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss=multilabel,
optimizer=opt,
metrics=metrics)
H = model.fit(x=my_data,
y=Y,
validation_split=0.02,
epochs=EPOCHS,
batch_size=BS)
The result gives me:
Train on 260563 samples, validate on 5318 samples
Epoch 1/100
...
...
NotFoundError: FetchOutputs node metrics/...: not found

Related

tf.GradientTape gradient() returns None

I am trying to train my keras model using TensorFlow, so far I can build the model,
def Model(input_shape, num_of_layers):
num_of_layers = 5
mod = keras.models.Sequential()
mod.add(keras.layers.Dense(1, input_shape = (input_shape,)))
for i in range(num_of_layers - 1):
mod.add(keras.layers.Dense(16, activation = 'tanh'))
mod.add(keras.layers.Dense(1, activation = 'tanh'))
return mod
and loss function.
def loss(u_pred, u_true):
return tf.reduce_mean(tf.keras.losses.mean_squared_error(u_pred, u_true))
Then I create a train function to train the model.
def train(model, X, epoch = 500, lr = 1e-3):
trainable_params = [tf.Variable(model.get_weights()[i]) for i in range(len(model.get_weights()))]
loss_array = []
optim = tf.keras.optimizers.Adam(learning_rate = lr)
for i in range(epoch):
with tf.GradientTape() as g:
g.watch(trainable_params)
loss_val = loss(model(X), tf.zeros_like(X))
grad = g.gradient(loss_val, trainable_params)
...
The grad returns a vector of None when I print it. What went wrong with my train function? I have converted my model's weights and biases to tensor object using tf.Variable. Using tf.cast or tf.convert_to_tensor doesn't help either.

Multi-label classification with class weights using sklearn compute_class_weight

I am trying to compute weights for my multilabel model loss function using compute_class_weight. But the output label for each lable is 1.
My data is in a batch of 5 and has 32 categories. I tried to implement the function as suggested here
This is how my code looks:
from sklearn.utils.class_weight import compute_class_weight
def calculating_class_weights(y_true):
print(y_true.shape)
number_dim = np.shape(y_true)[0] #5
num_labels_ = np.shape(y_true)[1] # 32
weights = np.empty([number_dim, num_labels_])
for i in range(number_dim):
#print(y_true[:, i])
values = (y_true[i]).cpu().detach().numpy()
classes_ = np.unique(values)
print(classes_)
print(values)
weights[i] = compute_class_weight('balanced', classes_, values)
return weights
def get_weighted_loss(y_true, y_pred, train = 0):
weights = calculating_class_weights(y_true)
print('weights :',weights)
criterion = torch.nn.MultiLabelSoftMarginLoss(weight =weights)
loss = criterion(y_true, y_pred)
return loss
The output weight generated in each case is an array of 1. Is there something I am missing?
I also tried running compute_sample_weight but got the same output.

How to find loss function values in unsupervised learning with Autoencoder?

To simplify my story: I was trying to test dimensionality reduction on my UNLABELED data with the encoder method using keras/tensorflow.
So I looked at the internet and found a nice code that might be useful for me. Here's the link: https://github.com/IvanBongiorni/TensorFlow2.0_Notebooks/blob/master/TensorFlow2.0__02.01_Autoencoder_for_Dimensionality_Reduction.ipynb
Although, I'm interested just in the encoder part. So here I added part of that code to mine, but I can't figure out how the code calculates loss function values if I didn't give any targets/labels. I'm new using keras/tensorflow and thought loss function values could only be generated if you give true and predicted labels.
data = np.random.randint(1, 100, 500)
df = pd.DataFrame({'f1':data, 'f2':data**2, 'f3':data*0.33, 'f4':data/20})
scaler = StandardScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df, columns=['f1','f2','f3','f4'])
n_input_layer = scaled_df.shape[1]
n_encoding_layer = 1
n_output_layer = n_input_layer
# AUTOENCODER
autoencoder = tf.keras.models.Sequential([
# ENCODER
Dense(n_input_layer, input_shape = (n_input_layer,), activation = 'elu'),
# CENTRAL LAYER
Dense(n_encoding_layer, activation = 'elu', name = 'central_layer'),
# DECODER
Dense(n_output_layer, activation = 'elu')])
n_epochs = 5000
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.optimizers.Adam(learning_rate = 0.001, decay = 0.0001, clipvalue = 0.5)
loss_history = [] # save loss improvement
for epoch in range(n_epochs):
with tf.GradientTape() as tape:
current_loss = loss(autoencoder(scaled_df.values), scaled_df.values)
gradients = tape.gradient(current_loss, autoencoder.trainable_variables)
optimizer.apply_gradients(zip(gradients, autoencoder.trainable_variables))
loss_history.append(current_loss.numpy()) # save current loss in its history
# show loss improvement every 200 epochs
if (epoch+1) % 200 == 0:
print(str(epoch+1) + '.\tLoss: ' + str(current_loss.numpy()))
Could anyone show me what I am missing? Thanks

How to sample Logits and Probabilties from a transformer seq2seq model for reinforcement learning?

skipping the formalities:
I am trying to apply reinforcement learning to a transformer based seq2seq model (for abstractive summarization purposes) in Pytorch.
My current setup looks something like this:
I am getting a greedy distribution (summary) from the model by inferring one token at a time in a loop
def get_greedy_distribution(model, batch):
src, (shift_tgt, lbl_tgt), segs, clss, mask_src, mask_tgt, mask_cls = batch
# the mock targets are just torch.zeros tensors to store inferred tokens
mock_tgt = get_mock_tgt(shift_tgt)
mock_return = get_mock_tgt(shift_tgt)
max_length = shift_tgt.shape[1]
with torch.no_grad():
for i in range(0, max_length-1):
prediction = model(src, mock_tgt, segs, clss, mask_src, mask_tgt, mask_cls)
prediction = F.softmax(prediction, dim=2)
val, ix = prediction.data.topk(1)
mock_tgt[:, i+1] = ix.squeeze()[:, i].detach()
mock_return[:, i] = ix.squeeze()[:, i].detach()
return mock_return
I am getting a sample distribution, with probabilities, from the model in a similar way:
def get_distribution(model, batch):
src, (shift_tgt, lbl_tgt), segs, clss, mask_src, mask_tgt, mask_cls = batch
mock_tgt = get_mock_tgt(shift_tgt)
mock_return = get_mock_tgt(shift_tgt)
max_length = shift_tgt.shape[1]
log_probs = []
for i in range(0, max_length-1):
prediction = model(src, mock_tgt, segs, clss, mask_src, mask_tgt, mask_cls)
prediction = F.softmax(prediction, dim=2)
multi_dist = Categorical(prediction[:, i])
x_t = multi_dist.sample()
log_prob = multi_dist.log_prob(x_t)
mock_tgt[:, i+1] = x_t
mock_return[:, i] = x_t
log_probs.append(log_prob)
return mock_return, log_probs
However, I am a bit unsure if I am inferring the sample distribution correctly. This would work well in an RNN context where I can sample logits and probabilities during the typical RNN loop, but it feels slightly wrong when using a Transformer.
How would you suggest to approach the Transformer for a typical baseline-sampled reinforcement learning setup (I am guessing it is a policy gradient)?
Pytorch code is preferred but if you have Tensorflow examples I am sure I can figure it out.

Failing to train SkipGram word embedding in Pytorch

I am training the skipgram word embeddings using the famous model described in https://arxiv.org/abs/1310.4546. I want to train it in PyTorch but I am getting errors and I can't figure out where they are coming from. Below I have provided my model class, training loop, and batching method. Does anyone have any insight into whats going on?
I am getting an error on the output = loss(data, target) line. It is having a problem with <class 'torch.LongTensor'> which is weird because CrossEntropyLoss takes a long tensor. The output shape might be wrong which is: torch.Size([1000, 100, 1000]) after the feedforward.
I have my model defined as:
import torch
import torch.nn as nn
torch.manual_seed(1)
class SkipGram(nn.Module):
def __init__(self, vocab_size, embedding_dim):
super(SkipGram, self).__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.hidden_layer = nn.Linear(embedding_dim, vocab_size)
# Loss needs to be input: (minibatch (N), C) target: (minibatch, 1), each label is a class
# Calculate loss in training
def forward(self, x):
embeds = self.embeddings(x)
x = self.hidden_layer(embeds)
return x
My training is defined as:
import torch.optim as optim
from torch.autograd import Variable
net = SkipGram(1000, 300)
optimizer = optim.SGD(net.parameters(), lr=0.01)
batch_size = 100
size = len(train_ints)
batches = batch_index_gen(batch_size, size)
inputs, targets = build_tensor_from_batch_index(batches[0], train_ints)
for i in range(100):
running_loss = 0.0
for batch_idx, batch in enumerate(batches):
data, target = build_tensor_from_batch_index(batch, train_ints)
# if (torch.cuda.is_available()):
# data, target = data.cuda(), target.cuda()
# net = net.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net.forward(data)
loss = nn.CrossEntropyLoss()
output = loss(data, target)
output.backward()
optimizer.step()
running_loss += loss.data[0]
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
i, batch_idx * len(batch_size), len(size),
100. * (batch_idx * len(batch_size)) / len(size), loss.data[0]))
If useful my batching is:
def build_tensor_from_batch_index(index, train_ints):
minibatch = []
for i in range(index[0], index[1]):
input_arr = np.zeros( (1000,1), dtype=np.int )
target_arr = np.zeros( (1000,1), dtype=np.int )
input_index, target_index = train_ints[i]
input_arr[input_index] = 1
target_arr[input_index] = 1
input_tensor = torch.from_numpy(input_arr)
target_tensor = torch.from_numpy(target_arr)
minibatch.append( (input_tensor, target_tensor) )
# Concatenate all tensors into a minibatch
#x = [tensor[0] for tensor in minibatch]
#print(x)
input_minibatch = torch.cat([tensor[0] for tensor in minibatch], 1)
target_minibatch = torch.cat([tensor[1] for tensor in minibatch], 1)
#target_minibatch = minibatch[0][1]
return input_minibatch, target_minibatch
I'm not sure about that since I did not read the paper, but seems weird that you are computing the loss with the original data and the targets:
output = loss(data, target)
Considering that the output of the network is output = net.forward(data) I think you should compute your loss as:
error = loss(output, target)
If this doesn't help, briefly point me out what the paper says about the loss function.

Resources