Input Tensors not being moved to GPU in pytorch - pytorch

When running my code, I get the error:
Input and parameter tensors are not at the same device, found input tensor at cpu and parameter tensor at cuda:0
even though I'm using .cuda() on my inputs.
Google Colab link
Code:
use_cuda = True
if use_cuda and torch.cuda.is_available():
model.cuda()
def test():
model.eval()
avgLoss = 0
for dataPoint in range(len(testData)):
lstmInput = testData[dataPoint][0]
lstmInput = torch.Tensor(lstmInput)
lstmInput = lstmInput.view(len(testData[dataPoint][0]), 1, 5)
label = testData[dataPoint][1]
label = torch.Tensor(label)
lstmInput = Variable(lstmInput)
label = Variable(label)
if use_cuda and torch.cuda.is_available():
lstmInput.cuda()
label.cuda()
pred_label = model(lstmInput)
loss = loss_fn(label, pred_label)
avgLoss += loss.item()
return avgLoss / len(testData)
def train(num_epochs):
model.train()
for epoch in range(num_epochs):
avgLoss = 0.0
for datapoint in range(len(trainData)):
model.hidden = model.init_hidden()
optimizer.zero_grad()
lstmInput = trainData[datapoint][0]
lstmInput = torch.Tensor(lstmInput)
lstmInput = lstmInput.view(len(trainData[datapoint][0]), 1, 5)
label = torch.Tensor(trainData[datapoint][1])
label = label.view(1, 5)
lstmInput = Variable(lstmInput)
label = Variable(label)
if use_cuda and torch.cuda.is_available():
print("happens")
lstmInput.cuda()
label.cuda()
pred_label = model(lstmInput)
loss = loss_fn(pred_label, label)
# print(label, pred_label)
avgLoss += loss.item()
loss.backward()
optimizer.step()
print("Epoch: ", epoch, "MSELoss: ", avgLoss / len(trainData), "Test Acc: ", test())

The cuda()method returns the tensor on the right gpu so you need to assign it back to your input variable:
lstmInput, label = lstimInput.cuda(), label.cuda()

Related

pytorch cpu & cuda issue using LBFGS optimiser

I am getting errors trying to change a youtube tutorial into a CUDA version. I moved both data and model to 'cuda' but I get an error that I do not understand.
Data:
x = np.empty((100, 1000), 'int64')
x[:] = np.array(range(1000)) + np.random.randint(-80, 80, 100).reshape(100, 1)
data = np.sin(x / 1.0 / 20).astype('float64')
input = (torch.from_numpy(data[3:, :-1])).to('cuda')
target = (torch.from_numpy(data[3:, 1:])).to('cuda')
test_input = (torch.from_numpy(data[:3, :-1])).to('cuda')
test_target = (torch.from_numpy(data[:3, 1:])).to('cuda')
Model:
class Sequence(nn.Module):
def __init__(self):
super(Sequence, self).__init__()
self.lstm1 = nn.LSTMCell(1, 51)
self.lstm2 = nn.LSTMCell(51, 51)
self.linear = nn.Linear(51, 1)
def forward(self, input, future = 0):
outputs = []
h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
for input_t in input.split(1, dim=1):
h_t, c_t = self.lstm1(input_t, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs += [output]
for i in range(future):# if we should predict the future
h_t, c_t = self.lstm1(output, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs += [output]
outputs = torch.cat(outputs, dim=1)
return outputs
Initializing the model and training:
seq = Sequence()
seq.double()
seq.to('cuda')
criterion = nn.MSELoss()
optimizer = optim.LBFGS(seq.parameters(), lr=0.8)
#begin to train
for i in range(10):
print('STEP: ', i)
def closure():
optimizer.zero_grad()
out = seq(input)
loss = criterion(out, target)
print(out.is_cuda,target.is_cuda)
print('loss:', loss.item().to('cpu').numpy())
loss.backward()
return loss
optimizer.step(closure)
# begin to predict, no need to track gradient here
with torch.no_grad():
future = 1000
pred = seq(test_input, future=future)
loss = criterion(pred[:, :-future], test_target)
print('test loss:', loss.item())
y = pred.detach().numpy()
The error I get is:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_mm).
the program works fine in cpu and I have tried moving and removing things to 'cuda' to see if it works to no success.

Stack expects tensor to be equal size, but got [66, 67, 4] at entry 0 and [66, 68, 4] at entry 7

class customDataset(Dataset):
def __init__(self,csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
#self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self,index):
img_path = os.path.join(self.root_dir,self.annotations.iloc[index,0])
image = io.imread(img_path)
y_label = torch.tensor(int(self.annotations.iloc[index,1]))
#if self.transform:
# image = self.transform(image)
return (image,y_label)
device = torch.device("cuda")
in_channel = 1
num_classes = 1
learning_rate = 0.001
batch_size = 32
num_epochs = 1
dataset = customDataset(csv_file="biomass.csv", root_dir = "biomassMerged", transform = transforms.ToTensor())
train_set, test_set = torch.utils.data.random_split(dataset, [len(dataset)- 10000,10000])
train_loader = DataLoader(dataset=train_set,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_set,batch_size=batch_size,shuffle=True)
model = torchvision.models.googlenet(pretrained = True)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
for epoch in range(num_epochs):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
data = data.to(device=device)
targets = targets.to(device=device)
scores = model(data)
loss = criterion(scores,targests)
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Cost at each {epoch} is {sum(losses)/len(losses)}")
I have created a customDataset class since I need to go through a lot of image data that I have gathered, the issue is that every time I go through it there is a new entry that tensors dont match. How do I fix this? The images should all be the same size.

PyTorch out of GPU memory in test loop

For the following training program, training and validation are all ok.
Once reach to Test method, I have CUDA out of memory. What should I change so that I have enough memory to test as well.
import torch
from torchvision import datasets, transforms
import torch.nn.functional as f
class CnnLstm(nn.Module):
def __init__(self):
super(CnnLstm, self).__init__()
self.cnn = CNN()
self.rnn = nn.LSTM(input_size=180000, hidden_size=256, num_layers=2, batch_first=True)#stacked LSTM with 2 layers
#print(num_classes)
self.linear = nn.Linear(256, num_classes)
#print('after num_classes')
def forward(self, x):
#print(x.shape)
batch_size, time_steps, channels, height, width = x.size()
c_in = x.view(batch_size * time_steps, channels, height, width)
_, c_out = self.cnn(c_in)
r_in = c_out.view(batch_size, time_steps, -1)
r_out, (_, _) = self.rnn(r_in)
r_out2 = self.linear(r_out[:, -1, :])
return f.log_softmax(r_out2, dim=1)
class TrainCNNLSTM:
def __init__(self):
self.seed = 1
self.batch_size = 8
self.validate_batch_size = 8
self.test_batch_size = 1
self.epoch = 20
self.learning_rate = 0.01
self.step = 100
self.train_loader = None
self.validate_loader = None
self.test_loader = None
#print('before')
self.model = CnnLstm().to(device)
#print('after')
self.criterion = nn.CrossEntropyLoss()
def load_data(self):
data_loader = DataLoader()
self.train_loader = data_loader.get_train_data(self.batch_size)
self.validate_loader = data_loader.get_validate_data(self.validate_batch_size)
self.test_loader = data_loader.get_test_data(self.test_batch_size)
def train(self):
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=self.learning_rate/100.0, max_lr=self.learning_rate, step_size_up=13)
#optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
for epoch in range(self.epoch):
t_losses=[]
for iteration, (data, target) in enumerate(self.train_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
t_losses.append(loss)
loss.backward()
optimizer.step()
scheduler.step()
if iteration % self.step == 0:
print('Epoch: {} | train loss: {:.4f}'.format(epoch, loss.item()))
avgd_trainloss = sum(t_losses)/len(t_losses)
self.validate(epoch, avgd_trainloss)
def validate(self, epoch, avg_tloss):
v_losses=[]
with torch.no_grad():
for iteration, (data, target) in enumerate(self.validate_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
v_losses.append(loss)
avgd_validloss = sum(v_losses)/len(v_losses)
print('Epoch: {} | train loss: {:.4f} | validate loss: {:.4f}'.format(epoch, avg_tloss, avgd_validloss))
def test(self):
test_loss = []
correct = 0
for data, target in self.test_loader:
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#f.nll_loss(output, target, size_average=False).item() # sum up batch loss
test_loss.append(loss)
pred = torch.max(output, 1)[1].data.squeeze()
correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
test_loss = sum(test_loss)/len(test_loss)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(self.test_loader.dataset),
100. * correct / len(self.test_loader.dataset)))
train = TrainCNNLSTM()
train.load_data()
train.train()
train.test()
You should call .item() on your loss when appending it to the list of losses:
loss = self.criterion(output, target)
test_loss.append(loss.item())
This avoids accumulating tensors in a list which are still attached to the computational graph. I would say the same for your accuracy.

5 fold cross validation using pytorch

Need to perform 5 fold cross validation on my dataset. I was able to find 2 examples of doing this but could not integrate to my current pipeline.Could anyone please help me with this.
###############################################################################################
class leukemiaClassifier(Dataset):
def __init__(self, csv_file, transform):
self.data = pd.read_csv(csv_file)
self.data = self.data
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_name = self.data.loc[idx][0]
img = Image.open(img_name).convert('RGB')
img = cv2.imread(img_name)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = self.transform(image=img)
image = image['image']
labels = torch.tensor(self.data.loc[idx][1])
return image, labels
train_file = 'train.csv'
val_file = 'test.csv'
batch_size = 28
train_dataset = leukemiaClassifier(
csv_file=train_file,transform = data_transforms)
val_dataset = leukemiaClassifier(
csv_file=val_file, transform = data_transforms_test)
read_target = pd.read_csv('train.csv')
target = read_target['label'].values
data_loader_train = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,num_workers=64)
data_loader_val = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=64)
###############################################################################################
#Model Utils Hyperparameter etc
###############################################################################################
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def efficientnet(version,num_classes):
model = EfficientNet.from_pretrained('efficientnet-b{}'.format(version), num_classes=num_classes)
num_ftrs = model._fc.in_features
model._fc = nn.Linear(num_ftrs, num_classes)
return model.cuda()
target_names = ['Lymphocyte(atypical)', 'Monoblast', 'Promyelocyte(bilobed)', 'Metamyelocyte', 'Erythroblast', 'Neutrophil(segmented)', 'Myeloblast','Promyelocyte','Monocyte','Lymphocyte(typical)','Neutrophil(band)','Smudge cell', 'Eosinophil', 'Myelocyte', 'Basophil']
model= efficientnet(5,15)
model = nn.DataParallel(model)
wandb.watch(model)
# criterion = torch.nn.CrossEntropyLoss()
criterion = FocalLoss()
labels = torch.tensor((0,1,2,3,4,5,6,7,8,9,10,11,12,13,14)).cuda()
no_of_classes = 15
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.01, patience=5, verbose=True)
global_validation_loss= 100
###############################################################################################
#Training and validation loop
###############################################################################################
for epoch in range(300000): # loop over the dataset multiple times
running_loss = 0.0
label_list =[]
predicted_list=[]
model = model.train()
for batch_idx, data in enumerate( tqdm.tqdm(data_loader_train)):
inputs,labels = data
inputs,labels = inputs.cuda(), labels.cuda().long()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs,labels)
# print(loss)
loss.backward()
optimizer.step()
for item in torch.argmax(outputs,1).detach().cpu().numpy():predicted_list.append(item)
for item in labels.detach().cpu().numpy():label_list.append(item)
# print statistics
running_loss += loss.item()

RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'

I tried to categorize the image of the skin. The following code is used to train the data, but I will encounter problems like the title when I train halfway.
use
python: 3.7
pytorch: 1.1
system: win10
start_epoch=0
for epoch in range(num_epochs):
print('Starting train epoch %d / %d' % (start_epoch + epoch + 1, num_epochs))
model = model.to(device)
print(device)
#model.train()
running_loss = 0
count = 0
epoch_loss = 0
#for i, (input, depth) in enumerate(train_loader):
for step,(input, depth) in enumerate(train_loader):
# input, depth = data
input = input.to(device)
depth = depth.to(device)
input_var=torch.tensor(input)
depth_var=torch.tensor(depth).squeeze(1)
#input_tensor = input_var.to(device)
#depth_tensor = depth_var.to(device)
output = model.forward(input_var)
#new_output = output.to(device)
#result = output.type(torch.FloatTensor)
loss = loss_fn(output, depth_var)
#loss = loss.type(torch.FloatTensor)
print('count: ',count,' loss:', loss.item())
count += 1
running_loss += loss.data.cpu().numpy()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if count%100==0:
torch.save(model,"./pkl/cifar.pkl")

Resources