I have written a custom Dataset and DataLoader for a PyTorch CNN project. Here is the relevant code for the dataset
class MyDataset(Dataset):
def __init__(self):
pass
def __len__(self):
return COUNT
def __getitem__(self, idx):
x, y = X[idx], Y[idx]
x = image_augment(x) # custom func to resize image to 32x32
return x, y
The shape of each training x is [4, 32, 32, 3].
And here is my Net code, taken directly from this PyTorch example.
class Net(nn.Module):
def __init__(self, nc):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, nc)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
When I try to train this net on my data from my DataLoader, I get the error statement Given groups=1, weight of size [6, 3, 5, 5], expected input[4, 32, 32, 3] to have 3 channels, but got 200 channels instead. It seems to me my issue is with the shape of my data coming from my DataLoader using x.view(4, 3, 32, 32), but then I got an error saying I couldn't use Conv2D on a ByteTensor. I'm a little lost here and would really appreciate any help. Thanks!
I got it eventually. Had to x = x.view(x.shape[0], 3, self.img_height, self.img_width).type('torch.FloatTensor'). for example. This would make that swap from [4, 32, 32, 3] to [4, 3, 32, 32].
Related
This is the architecture based on a research paper.
class NBV_Net(nn.Module):
def __init__(self, dropout_prob):
super(NBV_Net, self).__init__()
#dropout_prob = 0.0 # 1 - 0.7
# Four 3D convolutional layers
self.conv1 = nn.Conv3d(1,16, 3, stride=1, padding=1)
self.pool1 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv2 = nn.Conv3d(16, 32, 3, stride=1, padding=1)
self.pool2 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv3 = nn.Conv3d(32, 64, 3, stride=1, padding=1)
self.conv3_drop = nn.Dropout(dropout_prob)
self.pool3 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv4 = nn.Conv3d(64, 64, 3, stride=1, padding=1)
self.conv4_drop = nn.Dropout(dropout_prob)
# Five fully connected layers
self.fc1 = nn.Linear(4096, 1500)
self.fc1_drop = nn.Dropout(dropout_prob)
self.fc2 = nn.Linear(1500, 500)
self.fc2_drop = nn.Dropout(dropout_prob)
self.fc3 = nn.Linear(500, 100)
self.fc3_drop = nn.Dropout(dropout_prob)
self.fc4 = nn.Linear(100, 50)
self.fc4_drop = nn.Dropout(dropout_prob)
self.fc5 = nn.Linear(50, 3)
def forward(self, x):
## feedforward behavior of NBV-net
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = self.pool3(F.relu(self.conv3(x)))
x = self(F.relu(self.conv4(x)))
# Aplanar
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc1_drop(x)
x = F.relu(self.fc2(x))
x = self.fc2_drop(x)
x = F.relu(self.fc3(x))
x = self.fc3_drop(x)
x = F.relu(self.fc4(x))
x = self.fc4_drop(x)
x = F.tanh(self.fc5(x))
return x
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3, 3], expected input[250, 64, 4, 4, 4] to have 1 channels, but got 64 channels instead
But this code gives the Runtime Error. Similar errors are there but I could not understand what Group 1 and other dimensions mentioned exactly mean , any idea about the background of this error ?
The input shape for nn.Conv3d(1,16, 3, stride=1, padding=1) is (batch, channels, depth, height, width).
You define that the channel size is 1 but your input tensor has 64 channels.
self.conv1 = nn.Conv3d(64,16, 3, stride=1, padding=1) will resolve you error
I'm working on an image classification network and got a problem with the right values of inputs and outputs in the forward() function. I don't have an idea to solve this, because they seem the same to me. The error comes from this line:
x = F.relu(self.fc1(x)), but I can't figure it out.
Can anyone please help me with this problem?
That's my code:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 8, kernel_size=2)
self.conv2 = nn.Conv2d(8, 12, kernel_size=2)
self.conv3 = nn.Conv2d(12, 18, kernel_size=2)
self.conv4 = nn.Conv2d(18, 24, kernel_size=2)
self.fc1 = nn.Linear(5400, 64)
self.fc2 = nn.Linear(64, 2)
def forward(self, x):
print(f'1. {x.size()}')
x = self.conv1(x)
x = F.max_pool2d(x, 2)
x = F.relu(x)
print(f'2. {x.size()}')
x = self.conv2(x)
x = F.max_pool2d(x, 2)
x = F.relu(x)
print(f'3. {x.size()}')
x = self.conv3(x)
x = F.max_pool2d(x, 2)
x = F.relu(x)
print(f'4. {x.size()}')
x = self.conv4(x)
x = F.max_pool2d(x, 2)
x = F.relu(x)
print(f'5. {x.size()}')
x = x.view(-1, x.size(0))
print(f'6. {x.size()}')
x = F.relu(self.fc1(x))
print(f'7. {x.size()}')
x = self.fc2(x)
print(f'8. {x.size()}')
return torch.sigmoid(x)
That's the print output:
1. torch.Size([64, 3, 256, 256])
2. torch.Size([64, 8, 127, 127])
3. torch.Size([64, 12, 63, 63])
4. torch.Size([64, 18, 31, 31])
5. torch.Size([64, 24, 15, 15])
6. torch.Size([5400, 64])
I think changing
x = x.view(-1, x.size(0))
to
x = x.view([-1, 5400], x.size(0))
Will solve your problem, You see that in print 6:
6. torch.Size([5400, 64])
the batch size 64 is in the 1 axes and not in the 0 axes. The fully connected layer expects an input of size 5400 therefore changing this will likely solve since you do not know that batch size but you know that the input to the fully-connected is 5400.
There is a class where everything is set to 32x32 image format Taken from here
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5) # here I changed the image channel from 3 to 1
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 64, 5)
self.fc1 = nn.Linear(64 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 22) # here I changed the number of output neurons from 10 to 22
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
How to change all this under resolution 96 to 96? Channel 1 (grayscale)?
At resolution 32x32 the output of conv2 is shaped (1, 64, 5, 5). On the other hand, if the input is at resolution 96x96, it will be (1, 64, 21, 21). This means fc1 needs to have 28_224 input neurons.
>>> self.fc1 = nn.Linear(64 * 21 * 21, 120)
Alternatively, you can use nn.LazyLinear which will infer this number for you, based on the first inference.
>>> self.fc1 = nn.Linear(120)
I have two different size tensors to put in the network.
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = TC(C(a)), TC(C(b))
The results are
a_out = torch.size([1, 1, 99]) # What I want is [1, 1, 100]
b_out = torch.size([1, 1, 101])
Is there any method to handle this problem?
I need your help.
Thanks
It is expected behaviour as per documentation. May be padding can be used when even input length is detected to get same length as input.
Something like this
class PadEven(nn.Module):
def __init__(self, conv, deconv, pad_value=0, padding=(0, 1)):
super().__init__()
self.conv = conv
self.deconv = deconv
self.pad = nn.ConstantPad1d(padding=padding, value=pad_value)
def forward(self, x):
nd = x.size(-1)
x = self.deconv(self.conv(x))
if nd % 2 == 0:
x = self.pad(x)
return x
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
P = PadEven(C, TC)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = P(a), P(b)
i am trying to get the input and output information of a network. When debugging, i got this error, Runtime, shape ‘[-1, 400]’ is invalid for input of size 384. I tried different values, but can’t find the correct value. Is there a way to solve this issue? Thanks.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16*5*5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
input_shape = (3, 21,21)
dummy_input = torch.randn(6,*input_shape)
graph = torch.jit._get_trace_graph(model, args=dummy_input, _force_outplace=False, _return_inputs_states=False)
Error message:
RuntimeError: shape '[-1, 400]' is invalid for input of size 384
The shape of the tensor after the convolutional layers is [6,16,2,2]. So you cannot reshape it to 16*5*5 before feeding them to the linear layers. You should change your network to the one given below if you want to use the same filter sizes as the original in the convolutional layers.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*2*2, 120) # changed the size
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16*2*2) # changed the size
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x