Contrastive loss dose not change after some epochs - pytorch

I am trying to implement a Contrastive loss for Cifar10 in PyTorch and then in 3D images. I wrote the following pipeline and I checked the loss. Logically it is correct, I checked it. But I have three problems, the first problem is that the convergence is so slow. The second problem is that after some epochs the loss dose does not decrease anymore and it is fixed. My third problem is that for some settings, it does not proceed anymore, I mean it sticks for some reason and I have to restart the kernel. I will be very thankful if someone can help me to say where I am doing a mistake. Thank you very much in advance.
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.491, 0.482, 0.447], [0.247, 0.243,0.261])])
class PairwiseAg(object):
def __init__(self, transform):
self.transform = transform
def __call__(self, vol):
voli = self.transform(vol)
volj = self.transform(vol)
dataset =datasets.CIFAR10('./data', train=True, transform=PairwiseAg(transform), download=True)
ss = ShuffleSplit(n_splits=1, test_size= 0.2,random_state=0)
for train_idx, val_idx in ss.split(dataset):
train_idx = train_idx
val_idx = val_idx
train_set = Subset(dataset,train_idx)
val_set = Subset(dataset,val_idx)
train_loader = DataLoader(train_set,batch_size=512,shuffle=True,num_workers=2,drop_last=False)
val_loader = DataLoader(val_set,batch_size=512,shuffle=True,num_workers=2,drop_last=False)
device = torch.device('cuda:4' if torch.cuda.is_available() else 'cpu')
model = torchvision.models.resnet18(pretrained=False, progress=True)
model.fc = nn.Linear(in_features=512,out_features=128)
model =
class NT_Xent(nn.Module):
def __init__(self, temperature, device):
super(NT_Xent, self).__init__()
self.temperature = temperature
self.device = device
self.criterion = nn.CrossEntropyLoss(reduction="sum")
self.similarity_f = nn.CosineSimilarity(dim=2)
def forward(self, z_i, z_j):
self.batch_size= z_i.size()[0]
self.mask = torch.ones((self.batch_size * 2, self.batch_size * 2), dtype=bool)
self.mask = self.mask.fill_diagonal_(0)
for i in range(self.batch_size):
self.mask[i, self.batch_size + i] = 0
self.mask[self.batch_size + i, i] = 0
z_i= F.normalize(z_i, dim=1)
z_j= F.normalize(z_j, dim=1)
p1 =, z_j), dim=0)
sim = self.similarity_f(p1.unsqueeze(1), p1.unsqueeze(0)) / self.temperature)
sim_i_j = torch.diag(sim, self.batch_size)
sim_j_i = torch.diag(sim, -self.batch_size)
positive_samples =, sim_j_i), dim=0).reshape(self.batch_size * 2,1)
negative_samples = sim[self.mask].reshape(self.batch_size * 2, -1)
labels = torch.zeros(self.batch_size * 2).to(positive_samples.device).long()
logits =, negative_samples), dim=1)
loss = self.criterion(logits, labels)
loss /= 2 * self.batch_size
optimizer = optim.Adam(model.parameters(),lr=5e-1)
criterion = NT_Xent(0.7,device)
def train(epoch):
total_loss = 0
for i,(X,_) in enumerate(train_loader):
X1 = X[0].to(device)
X2 = X[1].to(device)
h1 = model(X1)
h2 = model(X2)
loss = criterion(h1.float(),h2.float())
def val():
total_loss = 0
for i, (X,_) in enumerate(val_loader):
X1 = X[0].to(device)
X2 = X[1].to(device)
h1 = model(X1)
h2 = model(X2)
loss = criterion(h1.float(),h2.float())
def main(num_epochs):
for epoch in range(num_epochs):
tr_loss = train(epoch)
val_loss = val()
Epoch: 0 train_loss:0.0131 val_loss:0.0131
Epoch: 1 train_loss:0.0127 val_loss:0.0129
Epoch: 2 train_loss:0.0126 val_loss:0.0129
Epoch: 3 train_loss:0.0126 val_loss:0.0129
Epoch: 4 train_loss:0.0125 val_loss:0.0129
Epoch: 5 train_loss:0.0124 val_loss:0.0128
Epoch: 6 train_loss:0.0124 val_loss:0.0126
Epoch: 7 train_loss:0.0123 val_loss:0.0127
Epoch: 8 train_loss:0.0123 val_loss:0.0125
Epoch: 9 train_loss:0.0123 val_loss:0.0127
Epoch: 10 train_loss:0.0123 val_loss:0.0126
Epoch: 11 train_loss:0.0122 val_loss:0.0125```


mini-batch gradient decent bad accuracy/loss

I’m trying mini-batch gradient descent on the popular iris dataset, but somehow I don’t manage to get the accuracy of the model above 75-80%. Also, I’m not certain if I’m calculating the loss as well as the accuracy correctly. Any suggestions on how to improve my code or mistakes I’m doing are appreciated.
batch_size = 10
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
Training loop:
n_iters = 1000
steps = n_iters/10
LOSS = []
for epochs in range(n_iters):
for i,(inputs, labels) in enumerate(train_loader):
out = model(inputs)
train_labels = transform_label(labels)
l = loss(out, train_labels)
#update weights
if epochs%steps == 0:
print(f"\n epoch: {int(epochs+steps)}/{n_iters}, loss: {sum(LOSS)/len(LOSS)}")
#if i % 1 == 0:
#print(f" steps: {i+1}, loss : {l.item()}")
claculate accuracy:
def accuracy(model,test_loader):
sum_acc= 0
#map labels with 0,1,2
def transform_label(label_data):
data = []
for i in label_data:
if i == "Iris-setosa":
if i == "Iris-versicolor":
if i == "Iris-virginica":
return torch.stack(data)
for i,(X_test, test_labels) in enumerate(test_loader):
test_labels = transform_label(test_labels)
x_label_pre = model(X_test)
_, x_label_pre_hat = torch.max(x_label_pre, 1)
idx = 0
number_pred = 0
while idx < len(X_test):
if x_label_pre_hat[idx].item() == test_labels[idx].item():
number_correct += 1
idx +=1
lr = 0.01
model = NeuralNetwork()
optim = torch.optim.Adam(model.parameters(), lr=lr)
#optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss = torch.nn.CrossEntropyLoss()
#loss = torch.nn.MSELoss()
#Weights are by default torch.32 not 64 --> error message
class NeuralNetwork(nn.Module):
def __init__(self):
self.linear_stack = nn.Sequential(
def forward(self, x):
logits = self.linear_stack(x)
return logits

Stack expects tensor to be equal size, but got [66, 67, 4] at entry 0 and [66, 68, 4] at entry 7

class customDataset(Dataset):
def __init__(self,csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
#self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self,index):
img_path = os.path.join(self.root_dir,self.annotations.iloc[index,0])
image = io.imread(img_path)
y_label = torch.tensor(int(self.annotations.iloc[index,1]))
#if self.transform:
# image = self.transform(image)
return (image,y_label)
device = torch.device("cuda")
in_channel = 1
num_classes = 1
learning_rate = 0.001
batch_size = 32
num_epochs = 1
dataset = customDataset(csv_file="biomass.csv", root_dir = "biomassMerged", transform = transforms.ToTensor())
train_set, test_set =, [len(dataset)- 10000,10000])
train_loader = DataLoader(dataset=train_set,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_set,batch_size=batch_size,shuffle=True)
model = torchvision.models.googlenet(pretrained = True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
for epoch in range(num_epochs):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
data =
targets =
scores = model(data)
loss = criterion(scores,targests)
print(f"Cost at each {epoch} is {sum(losses)/len(losses)}")
I have created a customDataset class since I need to go through a lot of image data that I have gathered, the issue is that every time I go through it there is a new entry that tensors dont match. How do I fix this? The images should all be the same size.

Using a target size (torch.Size([2])) that is different to the input size (torch.Size([2, 5])) is deprecated. Please ensure they have the same size

When I am using criterion = nn.BCELoss() for my binary classification task it creates problem and print "Using a target size (torch.Size([2])) that is different to the input size (torch.Size([2, 5])) is deprecated. Please ensure they have the same size." which is not the case if I use CrossEntropyLoss. Please, share your solution and explanation with me. Thanks in advance.
from torch.optim import Adam
from tqdm import tqdm
def train(model, train_data, val_data, learning_rate, epochs):
train, val = Dataset(train_data), Dataset(val_data)
train_dataloader =, batch_size=2, shuffle=True)
val_dataloader =, batch_size=2)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr= learning_rate)
if use_cuda:
model = model.cuda()
criterion = criterion.cuda()
for epoch_num in range(epochs):
total_acc_train = 0
total_loss_train = 0
for train_input, train_label in tqdm(train_dataloader):
train_label =
mask = train_input['attention_mask'].to(device)
input_id = train_input['input_ids'].squeeze(1).to(device)
output = model(input_id, mask)
batch_loss = criterion(output, train_label)
total_loss_train += batch_loss.item()
acc = (output.argmax(dim=1) == train_label).sum().item()
total_acc_train += acc
total_acc_val = 0
total_loss_val = 0
with torch.no_grad():
for val_input, val_label in val_dataloader:
val_label =
mask = val_input['attention_mask'].to(device)
input_id = val_input['input_ids'].squeeze(1).to(device)
output = model(input_id, mask)
batch_loss = criterion(output, val_label)
total_loss_val += batch_loss.item()
acc = (output.argmax(dim=1) == val_label).sum().item()
total_acc_val += acc
f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
| Train Accuracy: {total_acc_train / len(train_data): .3f} \
| Val Loss: {total_loss_val / len(val_data): .3f} \
| Val Accuracy: {total_acc_val / len(val_data): .3f}')
model = BertClassifier()
LR = 1e-6
train(model, df_train, df_val, LR, EPOCHS)

RuntimeError: input must have 2 dimensions, got 1

I am new to PyTorch, and I am trying to build a BiLSTM model to insert its output to a MaxPool1d layer and an AvgPool1d layer each before concatenating the outputs from both layers for a binary classification task. I am working with pretrained Word2Vec embeddings as input:
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import torch.nn.functional as F
class LSTM(nn.Module):
# define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim , num_classes, lstm_layers, weights):
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embedding_dim,
num_directions = 2 #if bidirectional else 1
self.m1 = nn.MaxPool1d(1,stride= 1)
self.m2 = nn.AvgPool1d(1,stride= 1)
self.fc1 = nn.Linear(lstm_units * num_directions, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, num_classes)
self.relu = nn.ReLU()
self.softmax = nn.Softmax()
self.lstm_layers = lstm_layers
self.num_directions = num_directions
self.lstm_units = lstm_units
def forward(self, text, text_lengths):
batch_size = text.shape[0]
h_0, c_0 = (Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)),
Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)))
embedded = self.embedding(text)
packed_embedded = pack_padded_sequence(embedded,"cpu"), batch_first=True)
output, (h_n, c_n) = self.lstm(packed_embedded, (h_0, c_0))
output_unpacked, output_lengths = pad_packed_sequence(output, batch_first=True, enforce_sorted=False)
# out = output_unpacked[:, -1, :]
#return self.linear(ht[-1])
out = output_unpacked
out1 = self.m1(out)
out2 = self.m2(out)
out =, out2), 1)
out = F.relu(self.fc1(out))
preds = F.softmax(self.fc2(out))
return preds
My training function is as below:
import time
def train(dataloader):
total_acc, total_count = 0, 0
log_interval = 500
text_lengths = np.dtype('int64').type(200)
start_time = time.time()
for idx, (label, text) in enumerate(dataloader):
predited_label = model(text, text_lengths = torch.tensor([text_lengths]))
loss = criterion(predited_label, label)
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
if idx % log_interval == 0 and idx > 0:
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches '
'| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
total_acc, total_count = 0, 0
start_time = time.time()
def evaluate(dataloader):
total_acc, total_count = 0, 0
with torch.no_grad():
for idx, (label, text) in enumerate(dataloader):
predited_label = model(text)
loss = criterion(predited_label, label)
total_acc += (predited_label.argmax(1) == label).sum().item()
total_count += label.size(0)
return total_acc/total_count
And I try to run the code like this:
from import DataLoader
# Hyperparameters
EPOCHS = 1 # epoch
LR =1 # learning rate
BATCH_SIZE = 1 # batch size for training
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
total_accu = None
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE,
shuffle=True, collate_fn=collate_batch)
for epoch in range(1, EPOCHS + 1):
epoch_start_time = time.time()
accu_val = evaluate(valid_dataloader)
if total_accu is not None and total_accu > accu_val:
total_accu = accu_val
print('-' * 59)
print('| end of epoch {:3d} | time: {:5.2f}s | '
'valid accuracy {:8.3f} '.format(epoch,
time.time() - epoch_start_time,
print('-' * 59)
However, I get the error below. I am not sure what the input size here refers to, and I can't find anyone else with the same error anywhere. Can anyone advise me please?
RuntimeError Traceback (most recent call last)
<ipython-input-111-14ba1dd26348> in <module>()
27 for epoch in range(1, EPOCHS + 1):
28 epoch_start_time = time.time()
---> 29 train(train_dataloader)
30 accu_val = evaluate(valid_dataloader)
31 if total_accu is not None and total_accu > accu_val:
6 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/ in check_input(self, input, batch_sizes)
201 raise RuntimeError(
202 'input must have {} dimensions, got {}'.format(
--> 203 expected_input_dim, input.dim()))
204 if self.input_size != input.size(-1):
205 raise RuntimeError(
RuntimeError: input must have 2 dimensions, got 1

PyTorch out of GPU memory in test loop

For the following training program, training and validation are all ok.
Once reach to Test method, I have CUDA out of memory. What should I change so that I have enough memory to test as well.
import torch
from torchvision import datasets, transforms
import torch.nn.functional as f
class CnnLstm(nn.Module):
def __init__(self):
super(CnnLstm, self).__init__()
self.cnn = CNN()
self.rnn = nn.LSTM(input_size=180000, hidden_size=256, num_layers=2, batch_first=True)#stacked LSTM with 2 layers
self.linear = nn.Linear(256, num_classes)
#print('after num_classes')
def forward(self, x):
batch_size, time_steps, channels, height, width = x.size()
c_in = x.view(batch_size * time_steps, channels, height, width)
_, c_out = self.cnn(c_in)
r_in = c_out.view(batch_size, time_steps, -1)
r_out, (_, _) = self.rnn(r_in)
r_out2 = self.linear(r_out[:, -1, :])
return f.log_softmax(r_out2, dim=1)
class TrainCNNLSTM:
def __init__(self):
self.seed = 1
self.batch_size = 8
self.validate_batch_size = 8
self.test_batch_size = 1
self.epoch = 20
self.learning_rate = 0.01
self.step = 100
self.train_loader = None
self.validate_loader = None
self.test_loader = None
self.model = CnnLstm().to(device)
self.criterion = nn.CrossEntropyLoss()
def load_data(self):
data_loader = DataLoader()
self.train_loader = data_loader.get_train_data(self.batch_size)
self.validate_loader = data_loader.get_validate_data(self.validate_batch_size)
self.test_loader = data_loader.get_test_data(self.test_batch_size)
def train(self):
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=self.learning_rate/100.0, max_lr=self.learning_rate, step_size_up=13)
#optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
for epoch in range(self.epoch):
for iteration, (data, target) in enumerate(self.train_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
if iteration % self.step == 0:
print('Epoch: {} | train loss: {:.4f}'.format(epoch, loss.item()))
avgd_trainloss = sum(t_losses)/len(t_losses)
self.validate(epoch, avgd_trainloss)
def validate(self, epoch, avg_tloss):
with torch.no_grad():
for iteration, (data, target) in enumerate(self.validate_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
avgd_validloss = sum(v_losses)/len(v_losses)
print('Epoch: {} | train loss: {:.4f} | validate loss: {:.4f}'.format(epoch, avg_tloss, avgd_validloss))
def test(self):
test_loss = []
correct = 0
for data, target in self.test_loader:
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#f.nll_loss(output, target, size_average=False).item() # sum up batch loss
pred = torch.max(output, 1)[1].data.squeeze()
correct += pred.eq(
test_loss = sum(test_loss)/len(test_loss)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(self.test_loader.dataset),
100. * correct / len(self.test_loader.dataset)))
train = TrainCNNLSTM()
You should call .item() on your loss when appending it to the list of losses:
loss = self.criterion(output, target)
This avoids accumulating tensors in a list which are still attached to the computational graph. I would say the same for your accuracy.
