LSTM pytorch not learning - pytorch

I am learning LSTM and language models, I developed the following code for a character level text generation:
Here is the model class:
class RNN(nn.Module):
def __init__(self, input_size, embedding_dim, hidden_size, num_layers, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
# Layers:
self.embed = nn.Embedding(input_size, embedding_dim, padding_idx=0)
self.dropout = nn.Dropout(0.5) # regularization to reduces overfitting and to increase stability
self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
self.o = nn.Softmax(dim=1)
def forward(self, x, hidden, cell):
out = self.embed(x)
out = self.dropout(out)
out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
out = self.fc(out.reshape(out.shape[0], -1))
out = self.o(out)
return out, (hidden, cell)
def init_hidden(self, batch_size):
hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
return hidden, cell
n_chars, embedding_dim, hidden_size, num_layers, output_size = 55, 20, 256, 2, 55
model = RNN(n_chars, embedding_dim, hidden_size, num_layers, n_chars).to(device)
And this is the train function, where I have the problem:
def train(model, optimizer, criterion, epochs=10, every=5):
for epoch in range(epochs):
k = random.randint(0,len(data))
x, y = get_batch(k)
xt, yt = tensorize(x,y)
mean_loss = 0
L = len(xt)
for i in range(L):
hidden, cell = model.init_hidden(batch_size) # not doing this will cause an error
out, (hidden, cell) = model(xt[i].unsqueeze(0), hidden, cell)
target = yt[i].unsqueeze(0)
loss = criterion(out, target)
loss.backward()
optimizer.step()
optimizer.zero_grad()
mean_loss += loss.item()
if epoch % every == 0:
print("epoch = ", epoch ," mean loss = ", mean_loss/L)
However, the loss seems to not change at all. What did I do wrong, please?
Note: I am giving the model character by character and not the entire batch at once.

Related

Pytorch Parameterized Layer not Updated

I have a problem here, so I want to make a layer where the weight value (and the bias) is based on the other frozen weight. So, let’s say I have a frozen weight (FW) as a base value, then my current model layer will have weight W = FW + D, where D is the trainable parameter. Later, when I train the model, I hope the only parameter that gets updated is D.
I made this simple code for illustration:
frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(100,10)
self.dw = nn.Parameter(torch.tensor(1.0, requires_grad=True))
self.db = nn.Parameter(torch.tensor(1.0, requires_grad=True))
def forward(self, x):
# the weight (and the bias) of fc layer is from FW and D
self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw))
self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))
return torch.sigmoid(self.fc(x))
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
But when I run that code, the model doesn’t train, and the self.dw and self.db doesn’t change. I am not sure whether my concept is wrong, so it’s not possible to train D, or I made a mistake in the implementation.
I also tried to implement using nn.utils.parameterize, but it still doesn’t work (I am new to using this, so I am not sure I implemented it correctly)
frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
class Adder(nn.Module):
def __init__(self, delta, frozen):
super().__init__()
self.delta = nn.Parameter(torch.tensor(delta, requires_grad=True))
self.frozen=frozen
def forward(self, x):
return torch.add(self.frozen, self.delta)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(100,10)
def forward(self, x):
nn.utils.parametrize.register_parametrization(self.fc, "weight", Adder(1.0, frozen.weight))
nn.utils.parametrize.register_parametrization(self.fc, "bias", Adder(1.0, frozen.bias))
return torch.sigmoid(self.fc(x))
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
Thank you for any responses.
Instead of recreating new weight and bias by
self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw))
self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))
You can utilize nn.functional.linear and intermediate variables
weight = self.weight + frozen.weight
bias = self.bias + frozen.bias
F.linear(x, weight, bias)
Complete version:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self, frozen):
super(Net, self).__init__()
self.weight = nn.Parameter(torch.ones(10, 100, dtype=torch.float32))
self.bias = nn.Parameter(torch.zeros(10, dtype=torch.float32))
self.frozen = frozen
#property
def weight_bias(self):
weight = self.weight + self.frozen.weight
bias = self.bias + self.frozen.bias
return weight, bias
def forward(self, x):
# the weight (and the bias) of fc layer is from FW and D
weight, bias = self.weight_bias
return F.linear(x, weight, bias) # this should return raw logits as required by nn.CrossEntropyLoss
frozen = nn.Linear(100, 10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
model = Net(frozen)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100).unsqueeze(0)
y = torch.tensor([0]*9+[1], dtype=torch.float32).unsqueeze(0)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()

How to implement LSTM in pytorch with 3d input and 1d output

I'm trying to do sequence binary classification with LSTM in pytorch. The input data dimension is (3014, 48, 184) and the output shape is (3014,). The purpose is to do medical prediction, which means there are 3014 patients, each patient has 48 hours data, each hour contains 184 features.
device = torch.device("cuda")
lr = 0.001
n_epochs = 10
input_dim = 184
hidden_dim = 184
layer_dim = 2
output_dim = 1
batch_size = 64
model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
model.to(device)
#print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
print('Start model training')
for epoch in range(1, n_epochs + 1):
for i, (x_batch, y_batch) in enumerate(trainloader):
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
optimizer.zero_grad()
out = model(x_batch)
loss = criterion(out.squeeze(1), y_batch)
loss.backward()
optimizer.step()
print("Epoch {:2d} | lr {:.5f} | loss {:.5f} ".format(epoch, lr, loss))
class RNN(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
self.batch_size = batch_size
self.hidden = None
def forward(self, x):
#initializing the hidden states
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device)
output, (hn, cn) = self.lstm(x, (h0, c0))
output=self.fc(output[:,-1])
return output
The loss of every epochs is not decreasing and the loss function seems not working, I'm wondering that maybe I implement the model with wrong way.
I'm expecting the loss will decrease, and the loss function will work properly. I was using keras to build the model before, and it works, I don't know how to bulid LSTM model in pytorch.

How to extract the encoded features after running a PyTorch LSTM autoencoder model?

I am very new to PyTorch and Python in general, and I am now struggling to get the encoded features from my pre-trained LSTM autoencoder which can be seen below:
import torch
import torch.nn as nn
# Bulding an LSTM autoencoder
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=32):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim1, self.hidden_dim2 = embedding_dim, 4 * embedding_dim, 2* embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim1, #128
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2, #64
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim2,
hidden_size=embedding_dim, #32
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (_, _) = self.rnn2(x)
x, (hidden_n, _) = self.rnn3(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=32, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim2, self.hidden_dim1, self.n_features = 4 * input_dim,2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim1,
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim2, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x, (hidden_n, cell_n) = self.rnn3(x)
x = x.reshape((self.seq_len, self.hidden_dim2))
return self.output_layer(x)
class RAE(nn.Module):
def __init__(self,seq_len, n_features, embedding_dim=32):
super(RAE, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim = embedding_dim
self.encoder = Encoder (seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder (seq_len, embedding_dim, n_features).to(device)
def forward(self,x):
x = self.encoder(x)
x = self.decoder(x)
return x
### TRAINING
def train_model(model,train_dataset,val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
criterion = nn.MSELoss(reduction='mean').to(device) # nn.L1Loss sum
history = dict(train = [], val = [])
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred =model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
#add accuracy
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
return model.eval(),history
Once I trained my model I followed the advice given by ptrblck here and implemented it as follows:
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
model.encoder.register_forward_hook(get_activation('encoder'))
x = test_dataset_SR[1] # instead of using his random example I used one example from my training set
x = x.cuda()
output = model(x)
print(activation['encoder'])
but this gives me this error:
2 def get_activation(name):
3 def hook(model, input, output):
----> 4 activation[name] = output.detach()
5 return hook
AttributeError: 'tuple' object has no attribute 'detach'
Can you please help me solve this issue? I want to take these encoded features, store them and use them as input to another network. I know I could probably train the encoder separately(not sure), but I will need both encoder and decoder so I thought hooks will be my salvation.

Training loss is not changing at all while training model

I’m trying to solve a VQA classification problem. my training loss is not changing at all while training the model.
I put in comment the CNN model and try to run it with the text only, but still, no change in loss value.
I pass through those models:
class question_lstm(nn.Module):
def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout, output_dim, que_size):
super(question_lstm, self).__init__()
self.hid_dim = hid_dim
self.n_layers = n_layers
self.embedding = nn.Embedding(input_dim, emb_dim)
self.tanh = nn.Tanh()
self.lstm = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
self.dropout = nn.Dropout(dropout)
#self.fc1=nn.Linear(n_layers*hid_dim,que_size)
self.fc1=nn.Linear(n_layers*output_dim,que_size)
def forward(self, question):
emb_question=self.embedding(question) #(batchsize, input_dim, emb_dim=256)
emb_question=self.dropout(emb_question)
emb_question=self.tanh(emb_question)
emb_question = emb_question.transpose(0, 1) #(input_dim, batchsize, emb_dim)
output, (hidden, cell) = self.lstm(emb_question)
qu_feature = torch.cat((hidden, cell), dim=2)
qu_feature = qu_feature.transpose(0, 1) #(batchsize=100, num_layer=2, hid_dim=2048)
question_output =self.fc1(qu_feature)
return question_output
class vqamodel(nn.Module):
def __init__(self, output_dim,input_dim, emb_dim, hid_dim, n_layers, dropout, answer_len, que_size,):
super(vqamodel,self).__init__()
#self.image=img_CNN(img_size,image_feature)
self.question=question_lstm(input_dim, emb_dim, hid_dim, n_layers, dropout,output_dim,que_size)
self.tanh=nn.Tanh()
self.relu=nn.ReLU()
self.dropout=nn.Dropout(dropout)
self.fc1=nn.Linear(que_size,output_dim)
self.fc2=nn.Linear(output_dim,answer_len)
def forward(self, image, question):
question_emb=self.question(question)
combine =question_emb #*img_emb
out_feature=self.fc1(combine) #(batchsize=100, output_dim=2048)
out_feature=self.relu(out_feature)
out_feature=self.dropout(out_feature)
out_feature=self.fc2(out_feature) #(batchsize=100, answer_len=1000)
return (out_feature)
I’m using cross entropy loss and Adam:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vqa_model.parameters(),lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
any idea what can cause this constant loss value?
the train loop:
def train(model,criterion,optimizer,scheduler):
start_time = time.time() #the time we start the train
for epoch in range(num_epochs):
train_loss = 0
#test_loss = 0
train_correct = 0
#test_correct = 0
vqa_model.train()
for i,sample in enumerate(train_VQAdataset_loader):
#image = sample['image'].to(device=device)
question = sample['question'].to(torch.int64).to(device=device)
label = sample['answer'].to(device=device)
output = vqa_model(image, question) # forward
loss = criterion(output, label)
optimizer.zero_grad() # Zero the gradients
loss.backward() # backprop
optimizer.step() # Update weights
scheduler.step()
# Statitcs
train_loss += loss.item() # save the loss for the entire epoch
_, predictions = torch.max(output, 1)
train_correct += (predictions == label).sum() #number of success - cumulative
train_losses.append(train_loss / len(train_VQAdataset_loader))

PyTorch LSTMCell Teacher Forcing

I'm fairly new to PyTorch and I'm trying to design an 18 node LSTM using LSTMCell with Teacher Forcing. I have quite a few difficulties.
Here's my model:
class tryLSTM(nn.moduleList):
def __init__(self, input_size, hidden_size, batch_size):
super(tryLSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.lstm0 = nn.LSTMCell(input_size, hidden_size, bias=True)
self.lstm1 = nn.LSTMCell(input_size, hidden_size, bias=True)
self.lstm2 = nn.LSTMCell(input_size, hidden_size, bias=True)
.........
self.lstm17 = nn.LSTMCell(input_size, hidden_size, bias=True)
def init_hidden(self):
# initialize the hidden state and the cell state to zeros
hidden = torch.zeros(self.batch_size, self.hidden_size)
cell = torch.zeros(self.batch_size, self.hidden_size)
return hidden, cell
def forward(self, x, hc):
out = []
h_0, c_0 = hc
h_1, c_1 = self.lstm1(x[0], h_0, c_0)
out[0] = h_1
h_2, c_2 = self.lstm2(x[1], h_1, c_1)
out[1] = h_2
......
h_17, c_17 = self.lstm17(x[16], h_16, c_16)
out[16] = h_17
model = tryLSTM(input_size=128, hidden_size=128, batch_size=18)
if gpu: model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.BCELoss(weight=None, reduction='mean')
here's the training loop:
def train(epoch):
model.train()
# initialize hidden and cell state
hc = model.init_hidden()
for batch_idx, (data, target) in enumerate(train_loader):
# Zero out the gradients
optimizer.zero_grad()
target = data[1:]
print(target.size())
# Put data on GPU
if gpu:
data = data.cuda()
target = target.cuda()
# Get outputs of LSTM
output = model(data, hc)
print(output.size)
# Calculate loss
loss = criterion(output, target)
# Calculate gradients
loss.backward()
# Update model parameters
optimizer.step()
train_loss.append(loss.item())
Q.1 I'm getting the following error:
TypeError: forward() takes from 2 to 3 positional arguments but 4 were given
Please help, Thanks!

Resources