I am trying to compute weights for my multilabel model loss function using compute_class_weight. But the output label for each lable is 1.
My data is in a batch of 5 and has 32 categories. I tried to implement the function as suggested here
This is how my code looks:
from sklearn.utils.class_weight import compute_class_weight
def calculating_class_weights(y_true):
print(y_true.shape)
number_dim = np.shape(y_true)[0] #5
num_labels_ = np.shape(y_true)[1] # 32
weights = np.empty([number_dim, num_labels_])
for i in range(number_dim):
#print(y_true[:, i])
values = (y_true[i]).cpu().detach().numpy()
classes_ = np.unique(values)
print(classes_)
print(values)
weights[i] = compute_class_weight('balanced', classes_, values)
return weights
def get_weighted_loss(y_true, y_pred, train = 0):
weights = calculating_class_weights(y_true)
print('weights :',weights)
criterion = torch.nn.MultiLabelSoftMarginLoss(weight =weights)
loss = criterion(y_true, y_pred)
return loss
The output weight generated in each case is an array of 1. Is there something I am missing?
I also tried running compute_sample_weight but got the same output.
Related
from sklearn.metrics import f1_score
def f1_score_func(preds, labels):
preds_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return f1_score(labels_flat, preds_flat, average='weighted')
def accuracy_per_class(preds, labels):
label_dict_inverse = {v: k for k, v in label_dict.items()}
preds_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
for label in np.unique(labels_flat):
y_preds = preds_flat[labels_flat==label]
y_true = labels_flat[labels_flat==label]
print(f'Class: {label_dict_inverse[label]}')
print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')
Need to calculate classification report for multi class model but it gives accuracy and f1 score only
I suppose you are using the Pytorch environment. Here is the correct code to print the F1, recall and precision for each class in the dataset. If you have a trained model, load it and also the dataset to test on.
from sklearn.metrics import classification_report, confusion_matrix
val_dataset = LoadDataset('/content/val.csv')
val_loader = torch.utils.data.DataLoader(val_dataset,batch_size=51) # Load the data
model.load_state_dict(torch.load('vit-base.bin')) # Load the trained model
model.cuda() # For putting model on GPUs
with torch.no_grad():
image,target = next(iter(val_loader))
image = image.to(device)
target = target.flatten().to(device)
prediction = model(image)
prediction = prediction.argmax(dim=1).view(target.size()).cpu().numpy()
target = target.cpu().numpy()
print(classification_report(target,prediction,target_names=val_dataset.LE.classes_)) # LE is the label encoder
I would like to code in tf.Keras a Neural Network with a couple of loss functions. One is a standard mse (mean squared error) with a factor loading, while the other is basically a regularization term on the output of a hidden layer. This second loss is added through self.add_loss() in a user-defined class inheriting from tf.keras.layers.Layer. I have a couple of questions (the first is more important though).
1) The error I get when trying to combine the two losses together is the following:
ValueError: Shapes must be equal rank, but are 0 and 1
From merging shape 0 with other shapes. for '{{node AddN}} = AddN[N=2, T=DT_FLOAT](loss/weighted_loss/value, model/new_layer/mul_1)' with input shapes: [], [100].
So it comes from the fact that the tensors which should add up to make one unique loss value have different shapes (and ranks). Still, when I try to print the losses during the training, I clearly see that the vectors returned as losses have shape batch_size and rank 1. Could it be that when the 2 losses are summed I have to provide them (or at least the loss of add_loss) as scalar? I know the mse is usually returned as a vector where each entry is the mse from one sample in the batch, hence having batch_size as shape. I think I tried to do the same with the "regularization" loss. Do you have an explanation for this behavio(u)r?
The sample code which gives me error is the following:
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
def rate_mse(rate=1e5):
#tf.function # also needed for printing
def loss(y_true, y_pred):
tmp = rate*K.mean(K.square(y_pred - y_true), axis=-1)
# tf.print('shape %s and rank %s output in mse'%(K.shape(tmp), tf.rank(tmp)))
tf.print('shape and rank output in mse',[K.shape(tmp), tf.rank(tmp)])
tf.print('mse loss:',tmp) # print when I put tf.function
return tmp
return loss
class newLayer(tf.keras.layers.Layer):
def __init__(self, rate=5e-2, **kwargs):
super(newLayer, self).__init__(**kwargs)
self.rate = rate
# #tf.function # to be commented for NN training
def call(self, inputs):
tmp = self.rate*K.mean(inputs*inputs, axis=-1)
tf.print('shape and rank output in regularizer',[K.shape(tmp), tf.rank(tmp)])
tf.print('regularizer loss:',tmp)
self.add_loss(tmp, inputs=True)
return inputs
tot_n = 10000
xx = np.random.rand(tot_n,1)
yy = np.pi*xx
train_size = int(0.9*tot_n)
xx_train = xx[:train_size]; xx_val = xx[train_size:]
yy_train = yy[:train_size]; yy_val = yy[train_size:]
reg_layer = newLayer()
input_layer = Input(shape=(1,)) # input
hidden = Dense(20, activation='relu', input_shape=(2,))(input_layer) # hidden layer
hidden = reg_layer(hidden)
output_layer = Dense(1, activation='linear')(hidden)
model = Model(inputs=[input_layer], outputs=[output_layer])
model.compile(optimizer='Adam', loss=rate_mse(), experimental_run_tf_function=False)
#model.compile(optimizer='Adam', loss=None, experimental_run_tf_function=False)
model.fit(xx_train, yy_train, epochs=100, batch_size = 100,
validation_data=(xx_val,yy_val), verbose=1)
#new_xx = np.random.rand(10,1); new_yy = np.pi*new_xx
#model.evaluate(new_xx,new_yy)
print(model.predict(np.array([[1]])))
2) I would also have a secondary question related to this code. I noticed that printing with tf.print inside the function rate_mse only works with tf.function. Similarly, the call method of newLayer is only taken into consideration if the same decorator is commented during training. Can someone explain why this is the case or reference me to a possible solution?
Thanks in advance to whoever can provide me help. I am currently using Tensorflow 2.2.0 and keras version is 2.3.0-tf.
I stuck with the same problem for a few days. "Standard" loss is going to be a scalar at the moment when we add it to the loss from add_loss. The only way how I get it working is to add one more axis while calculating mean. So we will get a scalar, and it will work.
tmp = self.rate*K.mean(inputs*inputs, axis=[0, -1])
I am using the input gradient as feature important and want to compare the feature importance of a train datapoint with the human annotated feature importance. I would like to make this comparison differentiable such that it can be learned through backpropagation. For that, I am writing a custom loss function that in addition to the regular loss (e.g. m.s.e. on the prediction vs true labels) also checks whether the input gradient is correct (e.g. m.s.e. of the input gradient vs the human annotated feature importance).
With the following code I am able to get the input gradient:
from keras import backend as K
import numpy as np
from keras.models import Model
from keras.layers import Input, Dense
def normalize(x):
# utility function to normalize a tensor by its L2 norm
return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)
# Amount of training samples
N = 1000
input_dim = 10
# Generate training set make the 1st and 2nd feature same as the target feature
X = np.random.standard_normal(size=(N, input_dim))
y = np.random.randint(low=0, high=2, size=(N, 1))
X[:, 1] = y[:, 0]
X[:, 2] = y[:, 0]
# Create simple model
inputs = Input(shape=(input_dim,))
x = Dense(10, name="dense1")(inputs)
output = Dense(1, activation='sigmoid')(x)
model = Model(input=[inputs], output=output)
# Compile and fit model
model.compile(optimizer='adam', loss="mse", metrics=['accuracy'])
model.fit([X], y, epochs=100, batch_size=64)
# Get function to get input gradients
gradients = K.gradients(model.output, model.input)[0]
gradient_function = K.function([model.input], [normalize(gradients)])
# Get input gradient values of the training-set
grads_val = gradient_function([X])[0]
print(grads_val[:2])
This prints the following (you can see that the 1st and the 2nd features have the highest importance):
[[ 1.2629046e-02 2.2765596e+00 2.1479919e+00 2.1558853e-02
4.5277486e-03 2.9851785e-03 9.5279224e-04 -1.0903150e-02
-1.2230731e-02 2.1960819e-02]
[ 1.1318034e-02 2.0402350e+00 1.9250139e+00 1.9320872e-02
4.0577268e-03 2.6752844e-03 8.5390132e-04 -9.7713526e-03
-1.0961102e-02 1.9681118e-02]]
How can I write a custom loss function in which the input gradients are differentiable?
I started with the following loss function.
from keras.losses import mean_squared_error
def custom_loss():
# human annotated feature importance
# Let's say that it says to only look at the second feature
human_feature_importance = []
for i in range(N):
human_feature_importance.append([0,0,1,0,0,0,0,0,0,0])
def loss(y_true, y_pred):
# Get regular loss
regular_loss_value = mean_squared_error(y_true, y_pred)
# Somehow get the input gradient of each training sample as a tensor
# It should be differential w.r.t. all of the weights
gradients = ??
feature_importance_loss_value = mean_squared_error(gradients, human_feature_importance)
# Combine the both losses
return regular_loss_value + feature_importance_loss_value
return loss
I also found an implementation in tensorflow to make the input gradient differentialble: https://github.com/dtak/rrr/blob/master/rrr/tensorflow_perceptron.py#L18
Sorry for a nub's question:
Having the NN that is trained in fit_generator mode, say something like:
Lambda(...)
or
Dense(...)
and the custom loss function, what are input tensors?
Am I correct expecting (batch size, previous layer's output) in case of a Lambda layer?
Is it going to be the same (batch size, data) in case of a custom loss function that looks like:
triplet_loss(y_true, y_pred)
Are y_true, y_pred in format (batch,previous layer's output) and (batch, true 'expected' data we fed to NN)?
I would probaly duplicate the dense layers. Instead of having 2 layers with 128 units, have 4 layers with 64 units. The result is the same, but you will be able to perform the cross products better.
from keras.models import Model
#create dense layers and store their output tensors, they use the output of models 1 and to as input
d1 = Dense(64, ....)(Model_1.output)
d2 = Dense(64, ....)(Model_1.output)
d3 = Dense(64, ....)(Model_2.output)
d4 = Dense(64, ....)(Model_2.output)
cross1 = Lambda(myFunc, output_shape=....)([d1,d4])
cross2 = Lambda(myFunc, output_shape=....)([d2,d3])
#I don't really know what kind of "merge" you want, so I used concatenate, there are
Add, Multiply and others....
output = Concatenate()([cross1,cross2])
#use the "axis" attribute of the concatenate layer to define better which axis will
be doubled due to the concatenation
model = Model([Model_1.input,Model_2.input], output)
Now, for the lambda function:
import keras.backend as K
def myFunc(x):
return x[0] * x[1]
custom loss function, what are input tensors?
It depends on how you define your model outputs.
For example, let's define a simple model that returns the input unchanged.
model = Sequential([Lambda(lambda x: x, input_shape=(1,))])
Let's use dummy input X and label Y
x = [[0]]
x = np.array(x)
y = [[4]]
y = np.array(y)
If our custom loss function looks like this
def mce(y_true, y_pred):
print(y_true.shape)
print(y_pred.shape)
return K.mean(K.pow(K.abs(y_true - y_pred), 3))
model.compile('sgd', mce)
and then we can see the shape of y_true and y_pred will be
y_true: (?, ?)
y_pred: (?, 1)
However, for triplet loss the input for the loss function also can be received like this-
ALPHA = 0.2
def triplet_loss(x):
anchor, positive, negative = x
pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), ALPHA)
loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
return loss
# Source: https://github.com/davidsandberg/facenet/blob/master/src/facenet.py
def build_model(input_shape):
# Standardizing the input shape order
K.set_image_dim_ordering('th')
positive_example = Input(shape=input_shape)
negative_example = Input(shape=input_shape)
anchor_example = Input(shape=input_shape)
# Create Common network to share the weights along different examples (+/-/Anchor)
embedding_network = faceRecoModel(input_shape)
positive_embedding = embedding_network(positive_example)
negative_embedding = embedding_network(negative_example)
anchor_embedding = embedding_network(anchor_example)
loss = merge([anchor_embedding, positive_embedding, negative_embedding],
mode=triplet_loss, output_shape=(1,))
model = Model(inputs=[anchor_example, positive_example, negative_example],
outputs=loss)
model.compile(loss='mean_absolute_error', optimizer=Adam())
return model
Let's say I have a custom parameterized metric:
def my_metric_at_k(k):
def my_metric(y_true, y_pred):
'''
Do magic here with k
to get metric_value
'''
return metric_value
return my_metric
I want to evaluate my model with my metric on multiple values of k, say k=1 and k=2, so naturally, I'm inclined to feed [my_metric_at_k(1), my_metric_at_k(2)] into metrics input of the compile method.
When I go to fit my model, I run into this error
NotFoundError: FetchOutputs node metrics/my_metric_1/...: not found
I'm using Keras with a TensorFlow backend.
For a particular example, one metric I use is the following:
def variety_first_k(k):
def variety(y_true, y_pred):
y_pred = K.tf.cast(y_pred, dtype=K.tf.float64)
sort = K.tf.nn.top_k(y_pred, k=k, sorted=True).indices
unique, _ = K.tf.unique(K.flatten(sort))
num_unique = K.shape(unique)[0]
return K.tf.cast(num_unique, dtype=K.tf.float64) /
K.tf.cast(K.tf.shape(y_pred)[1], dtype=K.tf.float64)
return variety
I'm running a multi-label multi-class problem.
A simplified version of my model is:
metrics = [variety_first_k(1), variety_first_k(2)]
inputs = layers.Input(shape=(my_data.shape[1],))
merged_layer = layers.BatchNormalization()(inputs)
merged_layer = layers.Dense(256, activation='relu',
kernel_regularizer=l1_l2(l1=0.5, l2=0.5))(merged_layer)
merged_layer = layers.BatchNormalization()(merged_layer)
predictions = layers.Dense(len(mlb.classes_))(merged_layer)
predictions = layers.Activation('sigmoid')(predictions)
model = keras.Model(inputs=inputs, outputs=predictions)
EPOCHS = 100
INIT_LR = 1e-2
BS = 128
opt = keras.optimizers.Adadelta(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss=multilabel,
optimizer=opt,
metrics=metrics)
H = model.fit(x=my_data,
y=Y,
validation_split=0.02,
epochs=EPOCHS,
batch_size=BS)
The result gives me:
Train on 260563 samples, validate on 5318 samples
Epoch 1/100
...
...
NotFoundError: FetchOutputs node metrics/...: not found