I'm trying to do sequence binary classification with LSTM in pytorch. The input data dimension is (3014, 48, 184) and the output shape is (3014,). The purpose is to do medical prediction, which means there are 3014 patients, each patient has 48 hours data, each hour contains 184 features.
device = torch.device("cuda")
lr = 0.001
n_epochs = 10
input_dim = 184
hidden_dim = 184
layer_dim = 2
output_dim = 1
batch_size = 64
model = RNN(input_dim, hidden_dim, layer_dim, output_dim, batch_size)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
print('Start model training')
for epoch in range(1, n_epochs + 1):
for i, (x_batch, y_batch) in enumerate(trainloader):
x_batch =
y_batch =
out = model(x_batch)
loss = criterion(out.squeeze(1), y_batch)
print("Epoch {:2d} | lr {:.5f} | loss {:.5f} ".format(epoch, lr, loss))
class RNN(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, batch_size):
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
self.batch_size = batch_size
self.hidden = None
def forward(self, x):
#initializing the hidden states
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).to(device)
output, (hn, cn) = self.lstm(x, (h0, c0))
return output
The loss of every epochs is not decreasing and the loss function seems not working, I'm wondering that maybe I implement the model with wrong way.
I'm expecting the loss will decrease, and the loss function will work properly. I was using keras to build the model before, and it works, I don't know how to bulid LSTM model in pytorch.


PyTorch text classification model not improving

I am training a simple LSTM model for binary text classification. Here is the model class:
class LSTM(nn.Module):
def __init__(self, vocabulary_size, embeddings_size, num_classes):
super(LSTM, self).__init__()
self.vocabulary_size = vocabulary_size
self.embeddings_size = embeddings_size
self.embedding = nn.Embedding(num_embeddings=vocabulary_size,
self.lstm = nn.LSTM(input_size=embeddings_size,
self.fc = nn.Linear(in_features=128,
def forward(self, x):
out = self.embedding(x)
out, _ = self.lstm(out)
out = out[:, -1]
out = self.fc(out)
out = torch.sigmoid(out)
return out
I am using BCELoss and Adam optimizer created with the following code:
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=learning_rate)
This is the training loop that I am using:
train_steps = len(train_data_loader)
for epoch in range(epochs):
train_loss = 0
for i, (sequences, labels) in enumerate(train_data_loader):
sequences =
labels =
outputs = model(sequences)
loss = criterion(outputs, labels)
train_loss += loss.item()
print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss / train_steps:.4f}')
I have experimented with different datasets, number of epochs, learning rate, batch size. However, the model does not seem to learn - the loss is always around 0.7 and only the 0 class is predicted.
Does anyone know what the issue could be?

CNN-LSTM for image sequences classification | high loss

I'm working on a project where I need to classify image sequences of some plants (growing over time). I tried implementing a CNN-LSTM with a pretrained ResNet18 as a feature extractor and then feeding those feature sequences to the LSTM.
The issue is that I'm not used to train LSTMs, and I'm afraid I'm doing something wrong. I made a clear architecture and everything seems ok, but the loss is not decreasing.
here's the architecture:
class RecurrentCNN(nn.Module):
def __init__(self, embed_dim, hidden_size, num_layers, num_classes):
super(RecurrentCNN, self).__init__()
self.embed_dim = embed_dim
self.hidden_size = hidden_size
self.num_layers = num_layers
self.num_classes = num_classes
self.cnn = torchvision.models.resnet18(weights='DEFAULT')
self.cnn.fc = nn.Sequential(
nn.Linear(in_features=512, out_features=self.embed_dim, bias=False),
self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
self.fc = nn.Sequential(
nn.Linear(hidden_size, hidden_size),
nn.Linear(hidden_size, num_classes)
def forward(self, x):
batch_size, img_size = x.shape[0], x.shape[2:]
x = x.reshape(-1, *img_size) # i merge the batch_size and num_seq in order to feed everything to the cnn
x = self.cnn(x)
x = x.reshape(batch_size, -1, self.embed_dim) # then i comeback the original shape
# lstm part
h_0 = torch.autograd.Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device)
c_0 = torch.autograd.Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device)
x, (hn, cn) = self.lstm(x, (h_0, c_0))
x = x[:, -1, :]
x = self.fc(x)
return x
I have 40 classes to output. My sequences are of different lengths, so I was forced to pad with some black images sometimes! (mean seq length: 39, max: 55, min: 15)
I'm feeding the model with sequences of shape (batch_size, seq_len=55, 3, 112, 112).
It may be wrong but for now I just want to make sure that the model is at least working correctly, then I'll probably change the strategy of learning.
here's the training code:
dataset = PlantDataset(data_path, max_sequence_len=55, transform=None)
train_loader =
dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True
rcnn = RecurrentCNN(embed_dim=128, hidden_size=256, num_layers=2, num_classes=len(class_list)).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(rcnn.parameters(), lr=0.0001)
loss_am = list() #AverageMeter()
for epoch in range(EPOCHS):
progress = tqdm(range(dataset.__len__() * BATCH_SIZE))
for i, data in enumerate(train_loader):
sequences, targets = data
sequences, targets =, dtype=torch.float), torch.Tensor(targets).to(device)
output = torch.nn.functional.log_softmax(rcnn(sequences), dim=1)
loss_value = criterion(output, targets)
with torch.no_grad():
progress.set_description('Epoch: {}, Loss: {:.4f}'.format(epoch, loss_value.item()))
The loss on each batch goes like
3.53 => 4.22 => 4.62 => 3.83 => 3.75 => 3.80 => 3.70, etc
Do you have any idea ?
I am facing the same issue. But I am able to find the problem. Since I am using the Image-sequences dataset, my model is not able to predict the tokens, instead, I ended up with a whole set of garbage tokens. I am still trying to figure out why this is happening.

LSTM pytorch not learning

I am learning LSTM and language models, I developed the following code for a character level text generation:
Here is the model class:
class RNN(nn.Module):
def __init__(self, input_size, embedding_dim, hidden_size, num_layers, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
# Layers:
self.embed = nn.Embedding(input_size, embedding_dim, padding_idx=0)
self.dropout = nn.Dropout(0.5) # regularization to reduces overfitting and to increase stability
self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
self.o = nn.Softmax(dim=1)
def forward(self, x, hidden, cell):
out = self.embed(x)
out = self.dropout(out)
out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
out = self.fc(out.reshape(out.shape[0], -1))
out = self.o(out)
return out, (hidden, cell)
def init_hidden(self, batch_size):
hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
return hidden, cell
n_chars, embedding_dim, hidden_size, num_layers, output_size = 55, 20, 256, 2, 55
model = RNN(n_chars, embedding_dim, hidden_size, num_layers, n_chars).to(device)
And this is the train function, where I have the problem:
def train(model, optimizer, criterion, epochs=10, every=5):
for epoch in range(epochs):
k = random.randint(0,len(data))
x, y = get_batch(k)
xt, yt = tensorize(x,y)
mean_loss = 0
L = len(xt)
for i in range(L):
hidden, cell = model.init_hidden(batch_size) # not doing this will cause an error
out, (hidden, cell) = model(xt[i].unsqueeze(0), hidden, cell)
target = yt[i].unsqueeze(0)
loss = criterion(out, target)
mean_loss += loss.item()
if epoch % every == 0:
print("epoch = ", epoch ," mean loss = ", mean_loss/L)
However, the loss seems to not change at all. What did I do wrong, please?
Note: I am giving the model character by character and not the entire batch at once.

Training loss is not changing at all while training model

I’m trying to solve a VQA classification problem. my training loss is not changing at all while training the model.
I put in comment the CNN model and try to run it with the text only, but still, no change in loss value.
I pass through those models:
class question_lstm(nn.Module):
def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout, output_dim, que_size):
super(question_lstm, self).__init__()
self.hid_dim = hid_dim
self.n_layers = n_layers
self.embedding = nn.Embedding(input_dim, emb_dim)
self.tanh = nn.Tanh()
self.lstm = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, question):
emb_question=self.embedding(question) #(batchsize, input_dim, emb_dim=256)
emb_question = emb_question.transpose(0, 1) #(input_dim, batchsize, emb_dim)
output, (hidden, cell) = self.lstm(emb_question)
qu_feature =, cell), dim=2)
qu_feature = qu_feature.transpose(0, 1) #(batchsize=100, num_layer=2, hid_dim=2048)
question_output =self.fc1(qu_feature)
return question_output
class vqamodel(nn.Module):
def __init__(self, output_dim,input_dim, emb_dim, hid_dim, n_layers, dropout, answer_len, que_size,):
self.question=question_lstm(input_dim, emb_dim, hid_dim, n_layers, dropout,output_dim,que_size)
def forward(self, image, question):
combine =question_emb #*img_emb
out_feature=self.fc1(combine) #(batchsize=100, output_dim=2048)
out_feature=self.fc2(out_feature) #(batchsize=100, answer_len=1000)
return (out_feature)
I’m using cross entropy loss and Adam:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vqa_model.parameters(),lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
any idea what can cause this constant loss value?
the train loop:
def train(model,criterion,optimizer,scheduler):
start_time = time.time() #the time we start the train
for epoch in range(num_epochs):
train_loss = 0
#test_loss = 0
train_correct = 0
#test_correct = 0
for i,sample in enumerate(train_VQAdataset_loader):
#image = sample['image'].to(device=device)
question = sample['question'].to(torch.int64).to(device=device)
label = sample['answer'].to(device=device)
output = vqa_model(image, question) # forward
loss = criterion(output, label)
optimizer.zero_grad() # Zero the gradients
loss.backward() # backprop
optimizer.step() # Update weights
# Statitcs
train_loss += loss.item() # save the loss for the entire epoch
_, predictions = torch.max(output, 1)
train_correct += (predictions == label).sum() #number of success - cumulative
train_losses.append(train_loss / len(train_VQAdataset_loader))

PyTorch LSTMCell Teacher Forcing

I'm fairly new to PyTorch and I'm trying to design an 18 node LSTM using LSTMCell with Teacher Forcing. I have quite a few difficulties.
Here's my model:
class tryLSTM(nn.moduleList):
def __init__(self, input_size, hidden_size, batch_size):
super(tryLSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.lstm0 = nn.LSTMCell(input_size, hidden_size, bias=True)
self.lstm1 = nn.LSTMCell(input_size, hidden_size, bias=True)
self.lstm2 = nn.LSTMCell(input_size, hidden_size, bias=True)
self.lstm17 = nn.LSTMCell(input_size, hidden_size, bias=True)
def init_hidden(self):
# initialize the hidden state and the cell state to zeros
hidden = torch.zeros(self.batch_size, self.hidden_size)
cell = torch.zeros(self.batch_size, self.hidden_size)
return hidden, cell
def forward(self, x, hc):
out = []
h_0, c_0 = hc
h_1, c_1 = self.lstm1(x[0], h_0, c_0)
out[0] = h_1
h_2, c_2 = self.lstm2(x[1], h_1, c_1)
out[1] = h_2
h_17, c_17 = self.lstm17(x[16], h_16, c_16)
out[16] = h_17
model = tryLSTM(input_size=128, hidden_size=128, batch_size=18)
if gpu: model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.BCELoss(weight=None, reduction='mean')
here's the training loop:
def train(epoch):
# initialize hidden and cell state
hc = model.init_hidden()
for batch_idx, (data, target) in enumerate(train_loader):
# Zero out the gradients
target = data[1:]
# Put data on GPU
if gpu:
data = data.cuda()
target = target.cuda()
# Get outputs of LSTM
output = model(data, hc)
# Calculate loss
loss = criterion(output, target)
# Calculate gradients
# Update model parameters
Q.1 I'm getting the following error:
TypeError: forward() takes from 2 to 3 positional arguments but 4 were given
Please help, Thanks!
