CUDA error while passing hidden states to convLSTM - conv-neural-network

I'm getting the following error, here's the traceback:
Traceback (most recent call last):
File "train.py", line 136, in <module>
train(epoch)
File "train.py", line 112, in train
output = model(data, hc) # Get outputs of LSTM
File "/home/ama1128/.conda/envs/matrix/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/scratch/ama1128/convLSTM/model.py", line 118, in forward
hc = self.cell_list[t](input=x[0], prev_state=[h_0, c_0])
File "/home/ama1128/.conda/envs/matrix/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/scratch/ama1128/convLSTM/model.py", line 50, in forward
combined = torch.cat((input, h_prev), dim=1) # concatenate along channel axis
RuntimeError: Expected object of backend CUDA but got backend CPU for sequence element 1 in sequence argument at position #1 'tensors'
Training Loop:
def train(epoch):
model.train()
hc = model.init_hidden(batch_size=1)
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
target = data[1:] # Set target, images 2 to 18
if gpu:
data = data.cuda()
target = target.cuda()
output = model(data, hc) # outputs of LSTM
loss = criterion(output, target)
loss.backward()
optimizer.step()
convLSTMCell forward:
def forward(self, input, prev_state):
h_prev, c_prev = prev_state
combined = torch.cat((input, h_prev), dim=1) # concatenate along channel axis
combined_conv = self.conv(combined)
cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
i = torch.sigmoid(cc_i)
f = torch.sigmoid(cc_f)
o = torch.sigmoid(cc_o)
g = torch.tanh(cc_g)
c_cur = f * c_prev + i * g
h_cur = o * torch.tanh(c_cur)
return h_cur, c_cur
convLSTM Class:
Initialize states:
def init_hidden(self, batch_size):
hidden = torch.zeros(batch_size, self.hidden_dim[0], self.height, self.width)
cell = torch.zeros(batch_size, self.hidden_dim[0], self.height, self.width)
if gpu:
hidden.cuda()
cell.cuda()
return hidden, cell
convLSTM forward:
def forward(self, x, hc):
outputs = []
states = []
x = torch.unsqueeze(x, 1) # change shape from (18,3,128,128) to (18,1,3,128,128)
h_0, c_0 = hc
for t in range(0, self.num_layers):
if t==0:
hc = self.cell_list[t](input=x[0], prev_state=[h_0, c_0])
states.append(hc)
hidden, cell = hc
outputs.append(hidden.view(3,128,128))
else:
h, c = states[t-1] # unpack previous states
if gpu:
h.cuda()
c.cuda()
hc = self.cell_list[t](input=x[t], prev_state=[h,c]) # current states
states.append(hc) # store current states for next cell
hidden, cell = hc # unpack current states
outputs.append(hidden.view(3,128,128))
outputs is reformatted using torch.stack before passing it to the loss function.
I've tried to cast the hidden states to gpu wherever I can and the error still persists. What's going on?

Related

PyTorch on M1 Mac: RuntimeError: Placeholder storage has not been allocated on MPS device

I'm training a model in PyTorch 1.13.0 (I have also tried this on the nightly build torch-1.14.0.dev20221207 to no avail) on my M1 Mac and would like to use MPS hardware acceleration. I have the following relevant code in my project to send the model and input tensors to MPS:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") # This always results in MPS
model.to(device)
... And in my Dataset subclass:
class MyDataset(Dataset):
def __init__(self, df, window_size):
self.df = df
self.window_size = window_size
self.data = []
self.labels = []
for i in range(len(df) - window_size):
x = torch.tensor(df.iloc[i:i+window_size].values, dtype=torch.float, device=device)
y = torch.tensor(df.iloc[i+window_size].values, dtype=torch.float, device=device)
self.data.append(x)
self.labels.append(y)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.labels[idx]
This results in the following traceback during my first training step:
Traceback (most recent call last):
File "lstm_model.py", line 263, in <module>
train_losses, val_losses = train_model(model, criterion, optimizer, train_loader, val_loader, epochs=100)
File "lstm_model.py", line 212, in train_model
train_loss += train_step(model, criterion, optimizer, x, y)
File "lstm_model.py", line 191, in train_step
y_pred = model(x)
File "miniconda3/envs/pytenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "lstm_model.py", line 182, in forward
out, _ = self.lstm(x, (h0, c0))
File "miniconda3/envs/pytenv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "miniconda3/envs/pytenv/lib/python3.10/site-packages/torch/nn/modules/rnn.py", line 774, in forward
result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
RuntimeError: Placeholder storage has not been allocated on MPS device!
I've tried creating tensors in my Dataset subclass without a device specified and then calling .to(device) on them:
x = torch.tensor(df.iloc[i:i+window_size].values, dtype=torch.float)
x = x.to(device)
y = torch.tensor(df.iloc[i+window_size].values, dtype=torch.float)
y = y.to(device)
I've also tried creating the tensors without a device specified in my Dataset subclass and sending tensors to device in both the forward method of my model and in my train_step function.
How can I resolve my error?

Pytorch Runtime Error - The size of tensor a (5) must match the size of tensor b (3) at non-singleton dimension

I am trying to train a Faster RCNN Network on a custom dataset consisting of images for object detection. However, I don't want to directly give an RGB image as input, I actually need to pass it through another network (a feature extractor) along with the corresponding thermal image and give the extracted features as the input to the FRCNN Network. The feature extractor combines these two images into a 4 channel tensor and the output is a 5 channel tensor. It is this 5 channel tensor that I wish to give as input to the Faster RCNN Network.
I followed the PyTorch docs for Object Detection Finetuning (link here) and came up with the following code to suit my dataset.
class CustomDataset(torch.utils.data.Dataset):
def __getitem__(self, idx):
self.num_classes = 5
img_rgb_path = os.path.join(self.root, "rgb/", self.rgb_imgs[idx])
img_thermal_path = os.path.join(self.root, "thermal/", self.thermal_imgs[idx])
img_rgb = Image.open(img_rgb_path)
img_rgb = np.array(img_rgb)
x_rgb = TF.to_tensor(img_rgb)
x_rgb.unsqueeze_(0)
img_thermal = Image.open(img_thermal_path)
img_thermal = np.array(img_thermal)
img_thermal = np.expand_dims(img_thermal,-1)
x_th = TF.to_tensor(img_thermal)
x_th.unsqueeze_(0)
print(x_rgb.shape) # shape of [3,640,512]
print(x_th.shape) # shape of [1,640,512]
input = torch.cat((x_rgb,x_th),dim=1) # shape of [4,640,512]
img = self.feature_extractor(input) # My custom feature extractor which returns a 5 dimensional tensor
print(img.shape) # shape of [5,640,512]
filename = os.path.join(self.root,'annotations',self.annotations[idx])
tree = ET.parse(filename)
objs = tree.findall('object')
num_objs = len(objs)
boxes = np.zeros((num_objs, 4), dtype=np.uint16)
labels = np.zeros((num_objs), dtype=np.float32)
seg_areas = np.zeros((num_objs), dtype=np.float32)
boxes = []
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
x1 = float(bbox.find('xmin').text)
y1 = float(bbox.find('ymin').text)
x2 = float(bbox.find('xmax').text)
y2 = float(bbox.find('ymax').text)
cls = self._class_to_ind[obj.find('name').text.lower().strip()]
boxes.append([x1, y1, x2, y2])
labels[ix] = cls
seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
boxes = torch.as_tensor(boxes, dtype=torch.float32)
seg_areas = torch.as_tensor(seg_areas, dtype=torch.float32)
labels = torch.as_tensor(labels, dtype=torch.float32)
target = {'boxes': boxes,
'labels': labels,
'seg_areas': seg_areas,
}
return img,target
My main function code is as follows
import utils
def train_model(model, criterion,dataloader,num_epochs):
since = time.time()
best_model = model
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# optimizer = lr_scheduler(optimizer, epoch)
model.train() # Set model to training mode
running_loss = 0.0
running_corrects = 0
for data in dataloader:
inputs, labels = data[0][0], data[1]
inputs = inputs.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs, labels)
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
running_corrects += torch.sum(preds == labels).item()
epoch_loss = running_loss / len(dataloader)
epoch_acc = running_corrects / len(dataloader)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
output_size=7,
sampling_ratio=2)
num_classes = 5
model = FasterRCNN(backbone = backbone,num_classes=5,rpn_anchor_generator=anchor_generator,box_roi_pool=roi_pooler)
dataset = CustomDataset('train_folder/')
data_loader_train = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True,collate_fn=utils.collate_fn)
train_model(model, criterion, data_loader_train, num_epochs=10)
The collate_fn defined in the utils.py file is the following
def collate_fn(batch):
return tuple(zip(*batch))
I, however, get the following error while training
Traceback (most recent call last):
File "train.py", line 147, in <module>
train_model(model, criterion, data_loader_train, num_epochs)
File "train.py", line 58, in train_model
outputs = model(inputs, labels)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/generalized_rcnn.py", line 66, in forward
images, targets = self.transform(images, targets)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/transform.py", line 46, in forward
image = self.normalize(image)
File "/usr/local/lib/python3.6/dist-packages/torchvision/models/detection/transform.py", line 66, in normalize
return (image - mean[:, None, None]) / std[:, None, None]
RuntimeError: The size of tensor a (5) must match the size of tensor b (3) at non-singleton dimension 0
I am a newbie in Pytorch.
The backbone network you are using for the FasterRCNN is a pretrained mobilenet_v2.
The input channel of a network is decided by the number of channels of the input data. Since the (backbone) model is pretrained (on natural images?) with 3 channels 3xNxM, you cannot use it for tensors of dimension 5xPxQ (skipping the singleton <batch_size> dimension).
Basically, you have 2 options,
1. Reduce the output channel dimension of the 1st network to 3 (better if you are training it from scratch)
2. Make a new backbone for the FasterRCNN with 5 channels in input and train it from scratch.
As for explaining the error message,
return (image - mean[:, None, None]) / std[:, None, None]
Pytorch is trying to normalize the input image where your input image has dimension (5,M,N) and teh tensors mean and std have 3 channels instead of 5

AttributeError: dataset object has no attribute 'c' FastAI

I am trying to train a ResNet based UNet for image segmentation. I have the location of images and mask images in a csv file, that's why I have created my own dataloader, which is as follows:
X = list(df['input_img'])
y = list(df['mask_img'])
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size=0.33, random_state=42)
class NumbersDataset():
def __init__(self, inputs, labels):
self.X = inputs
self.y = labels
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
img_train = cv2.imread(self.X[idx])
img_mask = cv2.imread(self.y[idx])
img_train = cv2.resize(img_train, (427,240), interpolation = cv2.INTER_LANCZOS4)
img_mask = cv2.resize(img_mask, (427,240), interpolation = cv2.INTER_LANCZOS4)
return img_train, img_mask
I then call this datagenerator in the __main__ function:
if __name__ == '__main__':
dataset_train = NumbersDataset(X_train, y_train)
dataloader_train = DataLoader(dataset_train, batch_size=4, shuffle=True, num_workers=2)
dataset_valid = NumbersDataset(X_valid, y_valid)
dataloader_valid = DataLoader(dataset_valid, batch_size=4, shuffle=True, num_workers=2)
datas = DataBunch(train_dl = dataloader_train, valid_dl = dataloader_valid)
leaner = unet_learner(data = datas, arch = models.resnet34)
But I end up getting the following error:
Traceback (most recent call last):
File "dataset_test.py", line 70, in <module>
leaner = unet_learner(data = datas, arch = models.resnet34)
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/vision/learner.py", line 118, in unet_learner
model = to_device(models.unet.DynamicUnet(body, n_classes=data.c, img_size=size, blur=blur, blur_final=blur_final,
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/basic_data.py", line 122, in __getattr__
def __getattr__(self,k:int)->Any: return getattr(self.train_dl, k)
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/basic_data.py", line 38, in __getattr__
def __getattr__(self,k:str)->Any: return getattr(self.dl, k)
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/basic_data.py", line 20, in DataLoader___getattr__
def DataLoader___getattr__(dl, k:str)->Any: return getattr(dl.dataset, k)
AttributeError: 'NumbersDataset' object has no attribute 'c'
I tried searching and even tried using SegmentationItemList.from_df but nothing helped. What am I getting wrong here?
You should add the attribute c into your NumbersDataset, like this:
def __init__(self, inputs, labels, c):
self.inputs = inputs
self.labels = labels
self.c = c

AttributeError: 'list' object has no attribute 'dim' when predicting in pytorch

I'm currently loading in a model and 11 input values. Then I'm sending those 11 values into a tensor and attempting to predict outputs.
Here is my code:
# coding: utf-8
# In[5]:
import torch
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as utils
import numpy as np
data_np = np.loadtxt('input_preds.csv', delimiter=',')
train_ds = utils.TensorDataset(torch.tensor(data_np, dtype=torch.float32).view(-1,11))
trainset = torch.utils.data.DataLoader(train_ds, batch_size=1, shuffle=True)
# setting device on GPU if available, else CPU, replace .cuda() with .to(device)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class Net(nn.Module):
def __init__(self):
super().__init__()
#self.bn = nn.BatchNorm2d(11)
self.fc1 = nn.Linear(11, 22)
self.fc2 = nn.Linear(22, 44)
self.fc3 = nn.Linear(44, 22)
self.fc4 = nn.Linear(22, 11)
def forward(self, x):
#x = x.view(-1, 11)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
#return F.log_softmax(x, dim=1)
return x
model1 = torch.load('./1e-2')
model2 = torch.load('./1e-3')
for data in trainset:
X = data
X = X
output = model1(X).to(device)
print(output)
However, I get this error
Traceback (most recent call last):
File "inference.py", line 53, in <module>
output = model1(X).to(device)
File "C:\Users\Happy\Miniconda3\envs\torch\lib\site-packages\torch\nn\modules\module.py", line 477, in __call__
result = self.forward(*input, **kwargs)
File "inference.py", line 40, in forward
x = F.relu(self.fc1(x))
File "C:\Users\Happy\Miniconda3\envs\torch\lib\site-packages\torch\nn\modules\module.py", line 477, in __call__
result = self.forward(*input, **kwargs)
File "C:\Users\Happy\Miniconda3\envs\torch\lib\site-packages\torch\nn\modules\linear.py", line 55, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\Happy\Miniconda3\envs\torch\lib\site-packages\torch\nn\functional.py", line 1022, in linear
if input.dim() == 2 and bias is not None:
AttributeError: 'list' object has no attribute 'dim'
I've tried to convert the batch to a numpy array but that didn't help. How do I resolve this error? Thank you for your help.
It looks like your X (data) is a list of tensors, while a PyTorch tensor is expected.
Try X = torch.stack(X).to(device) before sending to the model.

Keras error Error when checking target: expected activation_1 to have 2 dimensions, but got array with shape (10, 5, 95)

I'm trying to create a simple RNN using keras but I'm getting this error.
Input is a stream of letters represented by binary classes. The shape is (10, 5, 95). 10 batches, 5 letters at a time, 95 characters in total.
I'm guessing it has something to do with incorrect input fed back as input but I'm not sure how to handle it.
Traceback (most recent call last):
File "07_rnn.py", line 90, in <module>
model.fit(x, y, epochs=3, batch_size=BATCHSIZE)
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/models.py", line 965, in fit
validation_steps=validation_steps)
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1593, in fit
batch_size=batch_size)
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1430, in _standardize_user_data
exception_prefix='target')
File "/home/dmabelin/.local/lib/python3.5/site-packages/keras/engine/training.py", line 110, in _standardize_input_data
'with shape ' + str(data_shape))
ValueError: Error when checking target: expected activation_1 to have 2 dimensions, but got array with shape (10, 5, 95)
Code
import numpy as np
import glob
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
CHARMAP = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-=!##$%^&*()_+`~[]\{}|;':\",./<>?"
SEQLEN = 5
BATCHSIZE = 10
ALPHASIZE = len(CHARMAP)
INTERNALSIZE = 128
FILES = "shakespeare/*.txt"
LEARNING_RATE = 0.001
## Data related stuff
def char_to_value(char):
idx = CHARMAP.find(char)
if idx >= 0:
return idx
else:
return 0
def char_to_class_map(char):
value = char_to_value(char)
return to_categorical(value,ALPHASIZE)
def value_to_char(value):
return CHARMAP[value]
# iterate every single file
def get_file_data(pattern, index):
paths = glob.glob(pattern)
length = len(paths)
if index < length:
data = []
with open(paths[index], "r") as file:
for line in file:
line_values = [char_to_class_map(l) for l in line]
data = data + list(line_values)
return data
else:
return None
# get batch data in file
def build_line_data(file_data, seqlen, batch_index, batch_count):
length = len(file_data)
start = batch_index * batch_count
end = start+seqlen
x = []
y = []
while end+1 <= length and len(x) < batch_count:
x_line = file_data[start:end]
y_line = file_data[start+1:end+1]
x.append(x_line)
y.append(y_line)
start = start + 1
end = start + seqlen
x = np.array(x)
y = np.array(y)
return x,y
def create_model():
model = Sequential()
model.add(LSTM(INTERNALSIZE,input_shape=(SEQLEN, ALPHASIZE)))
model.add(Dense(ALPHASIZE))
model.add(Activation('softmax'))
#adam optimizer
optimizer = Adam(lr=LEARNING_RATE)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
return model
print('before create_model')
model = create_model()
print('after create_model')
for i in range(1):
print('before get file data')
file_data = get_file_data(FILES, i)
print('after get file data')
idx = 0
while True:
print('before build line data')
x,y = build_line_data(file_data, SEQLEN, idx ,BATCHSIZE)
print('after build line data')
print('before fit')
model.fit(x, y, epochs=3, batch_size=BATCHSIZE)
print('after fit')
idx = idx + 1
if 0 == len(x):
break
if idx > 10:
break
github link: https://github.com/djaney/ml-studies/blob/master/07_rnn.py
Edit:
return_sequences=True in LSTM fixed it.
What are you trying to predict? If it is a sequences-to-sequence model than return_sequences=true is the right way to go.
The reason for the error is that your target was 3 dimensional (batchsize,sequence_length,features) and the LSTM layer only outputs (batchsize,features) for the last time step of the sequence if return_sequences=false.
So depending on your application you have to change the shape of your targets or set return_sequences=true as you already did.

Resources