5 fold cross validation using pytorch - pytorch

Need to perform 5 fold cross validation on my dataset. I was able to find 2 examples of doing this but could not integrate to my current pipeline.Could anyone please help me with this.
###############################################################################################
class leukemiaClassifier(Dataset):
def __init__(self, csv_file, transform):
self.data = pd.read_csv(csv_file)
self.data = self.data
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_name = self.data.loc[idx][0]
img = Image.open(img_name).convert('RGB')
img = cv2.imread(img_name)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = self.transform(image=img)
image = image['image']
labels = torch.tensor(self.data.loc[idx][1])
return image, labels
train_file = 'train.csv'
val_file = 'test.csv'
batch_size = 28
train_dataset = leukemiaClassifier(
csv_file=train_file,transform = data_transforms)
val_dataset = leukemiaClassifier(
csv_file=val_file, transform = data_transforms_test)
read_target = pd.read_csv('train.csv')
target = read_target['label'].values
data_loader_train = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,num_workers=64)
data_loader_val = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=64)
###############################################################################################
#Model Utils Hyperparameter etc
###############################################################################################
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def efficientnet(version,num_classes):
model = EfficientNet.from_pretrained('efficientnet-b{}'.format(version), num_classes=num_classes)
num_ftrs = model._fc.in_features
model._fc = nn.Linear(num_ftrs, num_classes)
return model.cuda()
target_names = ['Lymphocyte(atypical)', 'Monoblast', 'Promyelocyte(bilobed)', 'Metamyelocyte', 'Erythroblast', 'Neutrophil(segmented)', 'Myeloblast','Promyelocyte','Monocyte','Lymphocyte(typical)','Neutrophil(band)','Smudge cell', 'Eosinophil', 'Myelocyte', 'Basophil']
model= efficientnet(5,15)
model = nn.DataParallel(model)
wandb.watch(model)
# criterion = torch.nn.CrossEntropyLoss()
criterion = FocalLoss()
labels = torch.tensor((0,1,2,3,4,5,6,7,8,9,10,11,12,13,14)).cuda()
no_of_classes = 15
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.01, patience=5, verbose=True)
global_validation_loss= 100
###############################################################################################
#Training and validation loop
###############################################################################################
for epoch in range(300000): # loop over the dataset multiple times
running_loss = 0.0
label_list =[]
predicted_list=[]
model = model.train()
for batch_idx, data in enumerate( tqdm.tqdm(data_loader_train)):
inputs,labels = data
inputs,labels = inputs.cuda(), labels.cuda().long()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs,labels)
# print(loss)
loss.backward()
optimizer.step()
for item in torch.argmax(outputs,1).detach().cpu().numpy():predicted_list.append(item)
for item in labels.detach().cpu().numpy():label_list.append(item)
# print statistics
running_loss += loss.item()

Related

Bert NER pytorch lightning

I'm trying to use pytorch-lightning for token-classification model. I have already built a model for token classification without lightning. I'm confused on what changes should be done with existing code to integrate pytorch-lightning.
Following is my pytorch code:
model = BertForTokenClassification.from_pretrained(
'bert-large-cased',
num_labels=len(tag2idx),
output_attentions = False,
output_hidden_states = False
)
for _ in trange(epochs, desc="Epoch"):
# ========================================
# Training
# ========================================
model.train()
total_loss = 0
for step, batch in enumerate(train_dataloader):
batch = tuple(t.to(device) for t in batch)
b_input_ids, b_input_mask, b_labels = batch
model.zero_grad()
outputs = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
loss = outputs[0]
loss.backward()
total_loss += loss.item()
torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
optimizer.step()
scheduler.step()
avg_train_loss = total_loss / len(train_dataloader)
loss_values.append(avg_train_loss)
# ========================================
# Validation
# ========================================
model.eval()
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
predictions , true_labels = [], []
for batch in valid_dataloader:
batch = tuple(t.to(device) for t in batch)
b_input_ids, b_input_mask, b_labels = batch
with torch.no_grad():
outputs = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
logits = outputs[1].detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
eval_loss += outputs[0].mean().item()
predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
true_labels.extend(label_ids)
eval_loss = eval_loss / len(valid_dataloader)
validation_loss_values.append(eval_loss)
pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels)
for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
valid_tags = [tag_values[l_i] for l in true_labels
for l_i in l if tag_values[l_i] != "PAD"]
f1 = f1_score([valid_tags], [pred_tags])
Following is the code which I tried for pytorch lightning.
class LightningModule(pl.LightningModule):
def __init__(self, lr, lr_backbone, weight_decay, batch_size):
super().__init__()
self.model = BertForTokenClassification.from_pretrained("bert-large-cased",
num_labels=len(tag2idx),
output_attentions = False,
output_hidden_states = False)
self.lr = lr
self.lr_backbone = lr_backbone
self.weight_decay = weight_decay
self.batch_size = batch_size
def forward(self, input_ids, attention_mask, labels):
outputs = self.model(
input_ids, token_type_ids=None, attention_mask=attention_mask, labels=labels
)
loss = outputs[0]
logits = outputs[1]
return loss, logits
def training_step(self, batch, batch_idx):
b_input_ids, b_input_mask, b_labels = batch
outputs = self.model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
loss = outputs[0]
self.log("train_loss", loss)
return loss
def validation_step(self, batch, batch_idx):
b_input_ids, b_input_mask, b_labels = batch
outputs = self.model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
eval_loss = outputs[0]
self.log("val_loss", eval_loss)
return eval_loss
def validation_end(self, outputs):
eval_loss = np.mean([x["val_loss"] for x in outputs])
self.log("val_loss", eval_loss)
pred_tags = [tag_values[p_i] for p, l in zip(self.predictions, self.true_labels)
for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
valid_tags = [tag_values[l_i] for l in self.true_labels
for l_i in l if tag_values[l_i] != "PAD"]
f1 = f1_score([valid_tags], [pred_tags])
self.log("val_f1", f1)
def configure_optimizers(self):
# optimizer = torch.optim.NAdam(optimizer_grouped_parameters,lr=4e-6,eps=1e-8)
# scheduler = scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps )
return torch.optim.NAdam(optimizer_grouped_parameters,lr=4e-6,eps=1e-8)
def train_dataloader(self):
return train_dataloader # return your dataloader
def val_dataloader(self):
return valid_dataloader # return your validation dataloader
model = LightningModule(lr=1e-6, lr_backbone=1e-5, weight_decay=1e-4, batch_size=32)
trainer = pl.Trainer(accelerator='gpu', gradient_clip_val=0.1, max_epochs=epochs, auto_scale_batch_size=None, default_root_dir="lightning_output/", enable_checkpointing=False)
trainer.fit(model)
But, when I run inference, I get the following error.
TypeError: forward() missing 2 required positional arguments: 'attention_mask' and 'labels'

mini-batch gradient decent bad accuracy/loss

I’m trying mini-batch gradient descent on the popular iris dataset, but somehow I don’t manage to get the accuracy of the model above 75-80%. Also, I’m not certain if I’m calculating the loss as well as the accuracy correctly. Any suggestions on how to improve my code or mistakes I’m doing are appreciated.
batch_size = 10
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
Training loop:
n_iters = 1000
steps = n_iters/10
LOSS = []
for epochs in range(n_iters):
for i,(inputs, labels) in enumerate(train_loader):
out = model(inputs)
train_labels = transform_label(labels)
l = loss(out, train_labels)
l.backward()
#update weights
optim.step()
optim.zero_grad()
LOSS.append(l.item())
if epochs%steps == 0:
print(f"\n epoch: {int(epochs+steps)}/{n_iters}, loss: {sum(LOSS)/len(LOSS)}")
#if i % 1 == 0:
#print(f" steps: {i+1}, loss : {l.item()}")
claculate accuracy:
def accuracy(model,test_loader):
sum_acc= 0
#map labels with 0,1,2
def transform_label(label_data):
data = []
for i in label_data:
if i == "Iris-setosa":
data.append(torch.tensor([0]))
if i == "Iris-versicolor":
data.append(torch.tensor([1]))
if i == "Iris-virginica":
data.append(torch.tensor([2]))
return torch.stack(data)
for i,(X_test, test_labels) in enumerate(test_loader):
test_labels = transform_label(test_labels)
x_label_pre = model(X_test)
_, x_label_pre_hat = torch.max(x_label_pre, 1)
idx = 0
number_pred = 0
while idx < len(X_test):
if x_label_pre_hat[idx].item() == test_labels[idx].item():
number_correct += 1
idx +=1
lr = 0.01
model = NeuralNetwork()
optim = torch.optim.Adam(model.parameters(), lr=lr)
#optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss = torch.nn.CrossEntropyLoss()
#loss = torch.nn.MSELoss()
#Weights are by default torch.32 not 64 --> error message
class NeuralNetwork(nn.Module):
def __init__(self):
super().__init__()
self.linear_stack = nn.Sequential(
nn.Linear(4,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,3),
)
def forward(self, x):
logits = self.linear_stack(x)
return logits

Stack expects tensor to be equal size, but got [66, 67, 4] at entry 0 and [66, 68, 4] at entry 7

class customDataset(Dataset):
def __init__(self,csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
#self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self,index):
img_path = os.path.join(self.root_dir,self.annotations.iloc[index,0])
image = io.imread(img_path)
y_label = torch.tensor(int(self.annotations.iloc[index,1]))
#if self.transform:
# image = self.transform(image)
return (image,y_label)
device = torch.device("cuda")
in_channel = 1
num_classes = 1
learning_rate = 0.001
batch_size = 32
num_epochs = 1
dataset = customDataset(csv_file="biomass.csv", root_dir = "biomassMerged", transform = transforms.ToTensor())
train_set, test_set = torch.utils.data.random_split(dataset, [len(dataset)- 10000,10000])
train_loader = DataLoader(dataset=train_set,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_set,batch_size=batch_size,shuffle=True)
model = torchvision.models.googlenet(pretrained = True)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
for epoch in range(num_epochs):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
data = data.to(device=device)
targets = targets.to(device=device)
scores = model(data)
loss = criterion(scores,targests)
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Cost at each {epoch} is {sum(losses)/len(losses)}")
I have created a customDataset class since I need to go through a lot of image data that I have gathered, the issue is that every time I go through it there is a new entry that tensors dont match. How do I fix this? The images should all be the same size.

PyTorch out of GPU memory in test loop

For the following training program, training and validation are all ok.
Once reach to Test method, I have CUDA out of memory. What should I change so that I have enough memory to test as well.
import torch
from torchvision import datasets, transforms
import torch.nn.functional as f
class CnnLstm(nn.Module):
def __init__(self):
super(CnnLstm, self).__init__()
self.cnn = CNN()
self.rnn = nn.LSTM(input_size=180000, hidden_size=256, num_layers=2, batch_first=True)#stacked LSTM with 2 layers
#print(num_classes)
self.linear = nn.Linear(256, num_classes)
#print('after num_classes')
def forward(self, x):
#print(x.shape)
batch_size, time_steps, channels, height, width = x.size()
c_in = x.view(batch_size * time_steps, channels, height, width)
_, c_out = self.cnn(c_in)
r_in = c_out.view(batch_size, time_steps, -1)
r_out, (_, _) = self.rnn(r_in)
r_out2 = self.linear(r_out[:, -1, :])
return f.log_softmax(r_out2, dim=1)
class TrainCNNLSTM:
def __init__(self):
self.seed = 1
self.batch_size = 8
self.validate_batch_size = 8
self.test_batch_size = 1
self.epoch = 20
self.learning_rate = 0.01
self.step = 100
self.train_loader = None
self.validate_loader = None
self.test_loader = None
#print('before')
self.model = CnnLstm().to(device)
#print('after')
self.criterion = nn.CrossEntropyLoss()
def load_data(self):
data_loader = DataLoader()
self.train_loader = data_loader.get_train_data(self.batch_size)
self.validate_loader = data_loader.get_validate_data(self.validate_batch_size)
self.test_loader = data_loader.get_test_data(self.test_batch_size)
def train(self):
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=self.learning_rate/100.0, max_lr=self.learning_rate, step_size_up=13)
#optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
for epoch in range(self.epoch):
t_losses=[]
for iteration, (data, target) in enumerate(self.train_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
t_losses.append(loss)
loss.backward()
optimizer.step()
scheduler.step()
if iteration % self.step == 0:
print('Epoch: {} | train loss: {:.4f}'.format(epoch, loss.item()))
avgd_trainloss = sum(t_losses)/len(t_losses)
self.validate(epoch, avgd_trainloss)
def validate(self, epoch, avg_tloss):
v_losses=[]
with torch.no_grad():
for iteration, (data, target) in enumerate(self.validate_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
v_losses.append(loss)
avgd_validloss = sum(v_losses)/len(v_losses)
print('Epoch: {} | train loss: {:.4f} | validate loss: {:.4f}'.format(epoch, avg_tloss, avgd_validloss))
def test(self):
test_loss = []
correct = 0
for data, target in self.test_loader:
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#f.nll_loss(output, target, size_average=False).item() # sum up batch loss
test_loss.append(loss)
pred = torch.max(output, 1)[1].data.squeeze()
correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
test_loss = sum(test_loss)/len(test_loss)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(self.test_loader.dataset),
100. * correct / len(self.test_loader.dataset)))
train = TrainCNNLSTM()
train.load_data()
train.train()
train.test()
You should call .item() on your loss when appending it to the list of losses:
loss = self.criterion(output, target)
test_loss.append(loss.item())
This avoids accumulating tensors in a list which are still attached to the computational graph. I would say the same for your accuracy.

Training loss is not changing at all while training model

I’m trying to solve a VQA classification problem. my training loss is not changing at all while training the model.
I put in comment the CNN model and try to run it with the text only, but still, no change in loss value.
I pass through those models:
class question_lstm(nn.Module):
def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout, output_dim, que_size):
super(question_lstm, self).__init__()
self.hid_dim = hid_dim
self.n_layers = n_layers
self.embedding = nn.Embedding(input_dim, emb_dim)
self.tanh = nn.Tanh()
self.lstm = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
self.dropout = nn.Dropout(dropout)
#self.fc1=nn.Linear(n_layers*hid_dim,que_size)
self.fc1=nn.Linear(n_layers*output_dim,que_size)
def forward(self, question):
emb_question=self.embedding(question) #(batchsize, input_dim, emb_dim=256)
emb_question=self.dropout(emb_question)
emb_question=self.tanh(emb_question)
emb_question = emb_question.transpose(0, 1) #(input_dim, batchsize, emb_dim)
output, (hidden, cell) = self.lstm(emb_question)
qu_feature = torch.cat((hidden, cell), dim=2)
qu_feature = qu_feature.transpose(0, 1) #(batchsize=100, num_layer=2, hid_dim=2048)
question_output =self.fc1(qu_feature)
return question_output
class vqamodel(nn.Module):
def __init__(self, output_dim,input_dim, emb_dim, hid_dim, n_layers, dropout, answer_len, que_size,):
super(vqamodel,self).__init__()
#self.image=img_CNN(img_size,image_feature)
self.question=question_lstm(input_dim, emb_dim, hid_dim, n_layers, dropout,output_dim,que_size)
self.tanh=nn.Tanh()
self.relu=nn.ReLU()
self.dropout=nn.Dropout(dropout)
self.fc1=nn.Linear(que_size,output_dim)
self.fc2=nn.Linear(output_dim,answer_len)
def forward(self, image, question):
question_emb=self.question(question)
combine =question_emb #*img_emb
out_feature=self.fc1(combine) #(batchsize=100, output_dim=2048)
out_feature=self.relu(out_feature)
out_feature=self.dropout(out_feature)
out_feature=self.fc2(out_feature) #(batchsize=100, answer_len=1000)
return (out_feature)
I’m using cross entropy loss and Adam:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vqa_model.parameters(),lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
any idea what can cause this constant loss value?
the train loop:
def train(model,criterion,optimizer,scheduler):
start_time = time.time() #the time we start the train
for epoch in range(num_epochs):
train_loss = 0
#test_loss = 0
train_correct = 0
#test_correct = 0
vqa_model.train()
for i,sample in enumerate(train_VQAdataset_loader):
#image = sample['image'].to(device=device)
question = sample['question'].to(torch.int64).to(device=device)
label = sample['answer'].to(device=device)
output = vqa_model(image, question) # forward
loss = criterion(output, label)
optimizer.zero_grad() # Zero the gradients
loss.backward() # backprop
optimizer.step() # Update weights
scheduler.step()
# Statitcs
train_loss += loss.item() # save the loss for the entire epoch
_, predictions = torch.max(output, 1)
train_correct += (predictions == label).sum() #number of success - cumulative
train_losses.append(train_loss / len(train_VQAdataset_loader))

Resources