Loss decreases but f1 score remains unchanged - pytorch

Model loss decreases but the performance of model(such as F1-score) does not increase.
I want to fine-tune a pertained language model XLM from Facebook to do NER tasks, so I linked a BiLSTM and CRF.
This is my model architecture. The entire code repo have been uploaded to github https://github.com/stefensa/XLM_NER
class XLM_BiLSTM_CRF(nn.Module):
def __init__(self, config, num_labels, params, dico, reloaded):
super().__init__()
self.config = config
self.num_labels = num_labels
self.batch_size = config.batch_size
self.hidden_dim = config.hidden_dim
self.xlm = TransformerModel(params, dico, True, True)
self.xlm.eval()
self.xlm.load_state_dict(reloaded['model'])
self.lstm = nn.LSTM(config.embedding_dim, config.hidden_dim // 2,
num_layers=1, bidirectional=True)
self.dropout = nn.Dropout(config.dropout)
self.classifier = nn.Linear(config.hidden_dim, config.num_class)
self.apply(self.init_bert_weights)
self.crf = CRF(config.num_class)
def forward(self, word_ids, lengths, langs=None, causal=False):
sequence_output = self.xlm('fwd', x=word_ids, lengths=lengths, causal=False).contiguous()
sequence_output, _ = self.lstm(sequence_output)
sequence_output = self.dropout(sequence_output)
logits = self.classifier(sequence_output)
return self.crf.decode(logits)
def log_likelihood(self, word_ids, lengths, tags):
sequence_output = self.xlm('fwd', x=word_ids, lengths=lengths, causal=False).contiguous()
sequence_output, _ = self.lstm(sequence_output)
sequence_output = self.dropout(sequence_output)
logits = self.classifier(sequence_output)
return - self.crf(logits, tags.transpose(0,1))
def init_bert_weights(self, module):
""" Initialize the weights.
"""
if isinstance(module, (nn.Linear, nn.Embedding)):
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
if isinstance(module, nn.Linear) and module.bias is not None:
module.bias.data.zero_()
This is the initial state of my model.
And this is the 9th epoch performance of my model. The metrics do not change.
Can anyone solve my problem?

Related

Python PyTorch RNN chat bot index out of range in self error

I was trying to make RNN chatbot with PyTorch but : return F.embedding(
File "/data/user/0/ru.iiec.pydroid3/files/aarch64-linux-android/lib/python3.9/site-packages/torch/nn/functional.py", line 2043, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self
Kod bu:
import torch
import torch.nn as nn
import torch.nn.functional as F
import requests
from bs4 import BeautifulSoup
url = "https://barisozcan.com"
try:
response = requests.get(url)
except:
print("Bağlantını kontrol et!\n")
exit()
soup = BeautifulSoup(response.text, "html.parser")
# Define a recurrent neural network
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(input_size, hidden_size)
self.lstm = nn.LSTM(hidden_size, hidden_size)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, input, hidden):
x = self.embedding(input.view(1, -1))
x, hidden = self.lstm(x.view(1, 1, -1), hidden)
x = self.fc(x.view(1, -1))
return x, hidden
def init_hidden(self):
return (torch.zeros(1, 1, self.hidden_size),
torch.zeros(1, 1, self.hidden_size))
# Define a chatbot class
class ChatBot(object):
def __init__(self, input_size, hidden_size, num_classes, conversations, responses):
self.rnn = RNN(input_size, hidden_size, num_classes)
self.word2index = {}
self.index2word = {}
self.word_to_vec_map = {}
self.conversations = conversations
self.responses = responses
self.words = []
# Prepare data for training the recurrent neural network
for conv in conversations:
for word in conv.split():
if word not in self.word2index:
self.word2index[word] = len(self.words)
self.index2word[len(self.words)] = word
self.words.append(word)
for resp in responses:
for word in resp.split():
if word not in self.word2index:
self.word2index[word] = len(self.words)
self.index2word[len(self.words)] = word
self.words.append(word)
# Define a random word embedding for each word
for i in range(len(self.words)):
self.word_to_vec_map[self.words[i]] = torch.randn(input_size)
def get_vec(self, word):
return self.word_to_vec_map[word]
def predict(self, conversation):
words = conversation.split()
inputt = []
for word in words:
if word in self.word2index:
inputt.append(self.word2index[word])
else:
print("Error: '{}' is not in the word embedding vocabulary.".format(word))
return None
# Pass the encoded conversation through the recurrent neural network
hidden = self.rnn.init_hidden()
for i in range(len(inputt)):
output, hidden = self.rnn(torch.tensor([inputt[i]]).view(1, -1), hidden)
# Use the hidden state to predict the next word
index = output.argmax().item()
return self.index2word[index]
# Define some example conversations and responses
#konuşmalar
conversations = [text for text in soup.stripped_strings]
n_con=[]
for c in conversations:
n_con.append(c.lower())
conversations.clear()
conversations=n_con
print(conversations)
n_con=None
#yanıtlar yapay zeka sanırım bunu kullanıyor
responses=conversations
# Create a chatbot
input_size = len(conversations)
hidden_size = 512
num_classes = len(conversations) + len(responses)
chatbot = ChatBot(input_size, hidden_size, num_classes, conversations, responses)
# Test the chatbot
while True:
conversation = input("Sen: ")
if conversation == "quit" or conversation=="exit":
break
try:
response = chatbot.predict(conversation)
print("FazAI: ", response)
except KeyError:
print("Key Error hatası: '{}'.Kelime haznesinde olmadığı için yazdığın metindeki kelimleler veya kelime hata veriyor.\n".format(conversation))
continue
How can I resolve this error?
What code do I need to replace with which code to fix this?
This code is a badly trained chatbot with certain phrases. It was supposed to chat with me.

Seq2Seq regression problem in PyTorch not performing well

I wrote the following code to solve a Seq2Seq regression problem. My implementation is based on the GRU and multi-head attention. The performance is horrible. I tried playing with the hyperparameters, but nothing changed. This led me to think it was a network architecture issue.
class Seq2Seq(nn.Module):
def __init__(self, input_size, output_size, hidden, num_heads):
super(Seq2Seq, self).__init__()
self.encoder = nn.GRU(input_size, hidden, 2)
self.decoder = nn.GRU(hidden, hidden, 2)
self.multihead_attn = nn.MultiheadAttention(hidden, num_heads)
self.linear = nn.Linear(hidden, output_size)
self.init_weights()
def init_weights(self):
self.linear.weight.data.normal_(0, 0.1)
def forward(self, x):
encoded, _ = self.encoder(x)
decoded, _ = self.decoder(encoded)
attention_output, _ = self.multihead_attn(decoded, decoded, decoded)
out = self.linear(attention_output)
return out
D_in = 4
D_out = 1
hidden = 16
num_heads = 4
seq2seq = Seq2Seq(input_size=D_in, output_size=D_out, hidden=hidden, num_heads=num_heads)
inputs = torch.rand((7, 100, D_in))
outputs = seq2seq(inputs)

Custom layer from keras to pytorch

Coming from TensorFlow background, I am trying to convert a snippet of code of the custom layer from Keras to PyTorch.
The custom layer in Keras looks like this:
class Attention_module(tf.keras.layers.Layer):
def __init__(self, class_num):
super(Attention_module,self).__init__(class_num)
self.class_num = class_num
self.Ws = None
def build(self, input_shape):
embedding_length = int(input_shape[2])
self.Ws = self.add_weight(shape=(self.class_num, embedding_length),
initializer=tf.keras.initializers.get('glorot_uniform'), trainable=True)
super(Attention_module, self).build(input_shape)
def call(self, inputs):
sentence_trans = tf.transpose(inputs, [0, 2, 1])
at = tf.matmul(self.Ws, sentence_trans)
at = tf.math.tanh(at)
at = K.exp(at - K.max(at, axis=-1, keepdims=True))
at = at / K.sum(at, axis=-1, keepdims=True)
v = K.batch_dot(at, inputs)
return v
I want to implement the same in the torch; I have already done the forward pass block but am confused about how to do the embedding and weight initialization the same as the above layer in PyTorch?
class Attention_module(torch.nn.Module):
def __init__(self, class_num):
# how to initialize weight with same as above keras layer?
def forward(self, inputs):
sentence_trans = inputs.permute(0, 2, 1)
at = torch.mm(self.Ws, sentence_trans)
at = torch.nn.Tanh(at)
at = torch.exp(at - torch.max(torch.Tensor(at), dim=-1, keepdims=True).values)
at = at / torch.sum(at, dim = -1, keepdims=True)
v = torch.einsum('ijk,ikl->ijl', at, inputs)
return v
Thank you!
class Attention_module(torch.nn.Module):
def __init__(self, class_num, input_shape):
super().__init__()
self.class_num = class_num
embedding_length = int(input_shape[2])
self.Ws = torch.nn.Embedding(num_embeddings=class_num,
embedding_dim=embedding_length) # Embedding layer
torch.nn.init.xavier_uniform_(self.Ws.weight) # Glorot initialization
Here's the reference for layer initialization methods. Xavier init is another name for Glorot init.
The _ at the end of torch.nn.init.xavier_uniform_ is a pytorch convention that signifies an inplace operation.
You can also use torch.nn.init at runtime. It doesn't have to be within __init__(). Like:
att = Attention_module(class_num, input_shape)
torch.nn.init.xavier_uniform_(att.Ws.weight)
or :
for param in att.parameters():
torch.nn.init.xavier_uniform_(param)

Learnable scalar weight in PyTorch

I have two neural networks running in parallel. Each gives a features map of same size say Nx1. Now I want weighted average of these embedding like this w1 * embed1 + w2 * embed2. I have tried these 1 2.But the weights are not updating. Any help would be appreciated. Here is how I am trying to do it:
class LinearWeightedAvg(nn.Module):
def __init__(self, n_inputs):
super(LinearWeightedAvg, self).__init__()
self.weight1 = Variable(torch.randn(1), requires_grad=True).cuda()
self.weight2 = Variable(torch.randn(1), requires_grad=True).cuda()
def forward(self, inp_embed):
return self.weight1 * inp_embed[0] + self.weight2 * inp_embed[1]
class EmbedBranch(nn.Module):
def __init__(self, feat_dim, embedding_dim):
super(EmbedBranch, self).__init__()
fc_layer1 = fc_layer
def forward(self, x):
x = self.fc_layer1(x)
return x
class EmbeddingNetwork(nn.Module):
def __init__(self, args, N):
super(EmbeddingNetwork, self).__init__()
embedding_dim = N
self.embed1 = EmbedBranch(N, N)
self.embed2 = EmbedBranch(N, N)
self.comb_branch = LinearWeightedAvg(metric_dim)
self.args = args
if args.cuda:
self.cuda()
def forward(self, emb1, emb2):
embeds1 = self.text_branch(emb1)
embeds2 = self.image_branch(emb2)
combined = self.comb_branch([embeds1, embeds2])
return combined
def train_forward(self, embed1, embed2):
combined = self(embed1, embed2)
embeds = model.train_forward(embed1, embed2)
loss = loss_func(embeds, labels)
running_loss.update(loss.data.item())
optimizer.zero_grad()
loss.backward()
Also I want the weight to be within 0-1 range.
Thanks,
You should use self.weightx = torch.nn.Parameter(your_inital_tensor) to register a tensor as a learnable parameter of the model.

optimizer got an empty parameter list (skorch)

So, I am used to use PyTorch and now decided to give Skorch a shot.
Here they define the network as
class ClassifierModule(nn.Module):
def __init__(
self,
num_units=10,
nonlin=F.relu,
dropout=0.5,
):
super(ClassifierModule, self).__init__()
self.num_units = num_units
self.nonlin = nonlin
self.dropout = dropout
self.dense0 = nn.Linear(20, num_units)
self.nonlin = nonlin
self.dropout = nn.Dropout(dropout)
self.dense1 = nn.Linear(num_units, 10)
self.output = nn.Linear(10, 2)
def forward(self, X, **kwargs):
X = self.nonlin(self.dense0(X))
X = self.dropout(X)
X = F.relu(self.dense1(X))
X = F.softmax(self.output(X), dim=-1)
return X
I prefer inputting lists of neurons in each layer i.e num_units=[30,15,5,2] would have 2 hidden layers with 15 and 5 neurons. Furthermore we have 30 features and 2 classes, thus re-writing it to something like this
class Net(nn.Module):
def __init__(
self,
num_units=[30,15,5,2],
nonlin=[F.relu,F.relu,F.relu],
dropout=[0.5,0.5,0.5],
):
super(Net, self).__init__()
self.layer_units = layer_units
self.nonlin = nonlin #Activation function
self.dropout = dropout #Drop-out rates in each layer
self.layers = [nn.Linear(i,p) for i,p in zip(layer_units,layer_units[1:])] #Dense layers
def forward(self, X, **kwargs):
print("Forwards")
for layer,func,drop in zip(self.layers[:-1],self.nonlin,self.dropout):
print(layer,func,drop)
X=drop(func(layer(X)))
X = F.softmax(X, dim=-1)
return X
should do the trick. The problem is that when calling
net = NeuralNetClassifier(Net,max_epochs=20,lr=0.1,device="cuda")
net.fit(X,y)
I get the error "ValueError: optimizer got an empty parameter list". I have narrowed it down to the removal of self.output = nn.Linear(10, 2) simply makes the net not enter forward i.e it seems like output is some kind of "trigger" variable. Is that really the case the network need a variable called output (being a layer) at the end, and that we are not free to define the variable-names ourself ?
Pytorch will look for subclasses of nn.Module, so changing
self.layers = [nn.Linear(i,p) for i,p in zip(layer_units,layer_units[1:])]
to
self.layers = nn.ModuleList([nn.Linear(i,p) for i,p in zip(layer_units,layer_units[1:])])
should work fine

Resources