I want to implement these three layers in a single class, but I face this problem
ValueError: out_channels must be divisible by groups
I don't understand where is the mistake?!
THis is the code:
class Block(nn.Module):
def __init__(self, in_channels, out_channels, exp=1, stride=1, type=''):
super().__init__()
self.t = type
self.stride = stride
self.inc, self.outc = in_channels, out_channels
self.exp = exp
self.blockc = nn.Sequential(
nn.Conv2d(self.inc, self.inc* self.exp, kernel_size=1),
nn.ReLU(),
nn.Conv2d(self.inc * self.exp, self.outc, kernel_size=3, groups= self.inc * self.exp, stride= self.stride),
nn.ReLU(),
nn.Conv2d(self.outc, self.outc * self.exp, kernel_size=1),
nn.Linear(self.outc * self.exp, self.outc * self.exp))
def forward(self, x):
out = self.blockc(x)
if self.t == 'A':
out = torch.sum(out,x)
return out
and here is the layers consists of depthwise convolution and conv 1x1 also there is a param exp which means out_channels = exp* in_channles
Here is a possible implementation, you will have to adjust the channels and padding for your needs:
class BType(Enum):
A = 0
B = 1
C = 2
class Block(nn.Module):
def __init__(self, c_in: int, c_out: int, btype: BType) -> nn.Module:
super().__init__()
self.btype = btype
if btype == BType.A:
assert c_in == c_out
self.c1 = nn.Sequential(
nn.Conv2d(c_in, c_in, kernel_size=1),
nn.ReLU())
self.c2 = nn.Sequential(
nn.Conv2d(c_in, c_in, kernel_size=3, groups=c_in,
stride=2 if btype == BType.C else 1,
padding=2 if btype == BType.C else 1),
nn.ReLU())
self.c3 = nn.Conv2d(c_in, c_out, kernel_size=1)
def forward(self, x: torch.Tensor) -> torch.Tensor:
out = self.c1(x)
out = self.c2(out)
out = self.c3(out)
if self.btype == BType.A:
out += x
return out
Here is a test with all three block types:
block A:
>>> block = Block(3, 3, BType.A)
>>> block(torch.rand(2,3,10,10)).shape
torch.Size([2, 3, 10, 10])
block B:
>>> block = Block(3, 10, BType.B)
>>> block(torch.rand(2,3,10,10)).shape
torch.Size([2, 10, 10, 10])
block C:
>>> block = Block(3, 10, BType.C)
>>> block(torch.rand(2,3,10,10)).shape
torch.Size([2, 10, 6, 6])
Related
I got this error RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[8, 1, 256, 256] to have 3 channels, but got 1 channels instead. This is my code
I haven't found the cause for this. Can anyone help me to figure out the problems? Thank you
"'"
"'"
class UNet(nn.Module):
def __init__(self, in_channels=3, features=[32,64,128,512], num_classes=3):
super().__init__()
self.down = []
self.pool = nn.MaxPool2d(2,2)
for i in range(len(features)):
if i == 0:
self.down.append(
nn.Sequential(
DoubleConv(in_channels, features[i])
)
)
else:
self.down.append(
nn.Sequential(
DoubleConv(features[i-1], features[i])
)
)
self.down = nn.ModuleList(self.down)
self.lower = DoubleConv(features[-1], features[-1]*2)
self.up = []
self.dbc = []
features.reverse()
for i in range(len(features)):
if i == 0:
self.up.append(
nn.Sequential(
nn.ConvTranspose2d(features[0]*2,features[i], 2, 2),
nn.ReLU()
)
)
self.dbc.append(DoubleConv(features[i]*2, features[i]))
else:
self.up.append(
nn.Sequential(
nn.ConvTranspose2d(features[i-1], features[i],2,2),
nn.ReLU(),
)
)
self.dbc.append(DoubleConv(features[i]*2, features[i]))
self.up = nn.ModuleList(self.up)
self.classifier = nn.Conv2d(features[-1], num_classes, 3, 1, 1)
def forward(self, x):
x1 = self
x_down = []
for i, layer in enumerate(self.down):
x_down.append(layer(x))
x = self.pool(x_down[-1])
print(x.shape)
x_lower = self.lower(x)
print(x_lower.shape)
x_down.reverse()
x_up = []
for i, (layer_up, layer_dbc) in enumerate(zip(self.up, self.dbc)):
if i == 0:
temp_x = layer_up(x_lower)
print(temp_x.shape)
temp_x = torch.cat((temp_x, x_down[i]), dim=1)
x_up.append(layer_dbc(temp_x))
else:
temp_x = layer_up(x_up[-1])
temp_x = torch.cat((temp_x, x_down[i]), dim=1)
x_up.append(layer_dbc(temp_x))
# print(x_up[-1].shape)
x_classifier = self.classifier(x_up[-1])
print(x_classifier.shape)
return x_classifier
I tried to check the model with the input
model = UNet()
x = torch.randn(3,3,256,256)
output = model(x)
and work
This is the architecture based on a research paper.
class NBV_Net(nn.Module):
def __init__(self, dropout_prob):
super(NBV_Net, self).__init__()
#dropout_prob = 0.0 # 1 - 0.7
# Four 3D convolutional layers
self.conv1 = nn.Conv3d(1,16, 3, stride=1, padding=1)
self.pool1 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv2 = nn.Conv3d(16, 32, 3, stride=1, padding=1)
self.pool2 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv3 = nn.Conv3d(32, 64, 3, stride=1, padding=1)
self.conv3_drop = nn.Dropout(dropout_prob)
self.pool3 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv4 = nn.Conv3d(64, 64, 3, stride=1, padding=1)
self.conv4_drop = nn.Dropout(dropout_prob)
# Five fully connected layers
self.fc1 = nn.Linear(4096, 1500)
self.fc1_drop = nn.Dropout(dropout_prob)
self.fc2 = nn.Linear(1500, 500)
self.fc2_drop = nn.Dropout(dropout_prob)
self.fc3 = nn.Linear(500, 100)
self.fc3_drop = nn.Dropout(dropout_prob)
self.fc4 = nn.Linear(100, 50)
self.fc4_drop = nn.Dropout(dropout_prob)
self.fc5 = nn.Linear(50, 3)
def forward(self, x):
## feedforward behavior of NBV-net
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = self.pool3(F.relu(self.conv3(x)))
x = self(F.relu(self.conv4(x)))
# Aplanar
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc1_drop(x)
x = F.relu(self.fc2(x))
x = self.fc2_drop(x)
x = F.relu(self.fc3(x))
x = self.fc3_drop(x)
x = F.relu(self.fc4(x))
x = self.fc4_drop(x)
x = F.tanh(self.fc5(x))
return x
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3, 3], expected input[250, 64, 4, 4, 4] to have 1 channels, but got 64 channels instead
But this code gives the Runtime Error. Similar errors are there but I could not understand what Group 1 and other dimensions mentioned exactly mean , any idea about the background of this error ?
The input shape for nn.Conv3d(1,16, 3, stride=1, padding=1) is (batch, channels, depth, height, width).
You define that the channel size is 1 but your input tensor has 64 channels.
self.conv1 = nn.Conv3d(64,16, 3, stride=1, padding=1) will resolve you error
I have two different size tensors to put in the network.
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = TC(C(a)), TC(C(b))
The results are
a_out = torch.size([1, 1, 99]) # What I want is [1, 1, 100]
b_out = torch.size([1, 1, 101])
Is there any method to handle this problem?
I need your help.
Thanks
It is expected behaviour as per documentation. May be padding can be used when even input length is detected to get same length as input.
Something like this
class PadEven(nn.Module):
def __init__(self, conv, deconv, pad_value=0, padding=(0, 1)):
super().__init__()
self.conv = conv
self.deconv = deconv
self.pad = nn.ConstantPad1d(padding=padding, value=pad_value)
def forward(self, x):
nd = x.size(-1)
x = self.deconv(self.conv(x))
if nd % 2 == 0:
x = self.pad(x)
return x
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
P = PadEven(C, TC)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = P(a), P(b)
I have written a custom Dataset and DataLoader for a PyTorch CNN project. Here is the relevant code for the dataset
class MyDataset(Dataset):
def __init__(self):
pass
def __len__(self):
return COUNT
def __getitem__(self, idx):
x, y = X[idx], Y[idx]
x = image_augment(x) # custom func to resize image to 32x32
return x, y
The shape of each training x is [4, 32, 32, 3].
And here is my Net code, taken directly from this PyTorch example.
class Net(nn.Module):
def __init__(self, nc):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, nc)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
When I try to train this net on my data from my DataLoader, I get the error statement Given groups=1, weight of size [6, 3, 5, 5], expected input[4, 32, 32, 3] to have 3 channels, but got 200 channels instead. It seems to me my issue is with the shape of my data coming from my DataLoader using x.view(4, 3, 32, 32), but then I got an error saying I couldn't use Conv2D on a ByteTensor. I'm a little lost here and would really appreciate any help. Thanks!
I got it eventually. Had to x = x.view(x.shape[0], 3, self.img_height, self.img_width).type('torch.FloatTensor'). for example. This would make that swap from [4, 32, 32, 3] to [4, 3, 32, 32].
I want to try some toy examples in pytorch, but the training loss does not decrease in the training.
Some info is provided here:
The model is vgg16, consisted of 13 conv layers and 3 dense layers.
The data is cifar100 in pytorch.
I choose cross entropy as the loss function.
The code is as follows
# encoding: utf-8
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision
import numpy as np
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv1_1(x))
x = F.relu(self.conv1_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv2_1(x))
x = F.relu(self.conv2_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv3_1(x))
x = F.relu(self.conv3_2(x))
x = F.relu(self.conv3_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv4_1(x))
x = F.relu(self.conv4_2(x))
x = F.relu(self.conv4_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv5_1(x))
x = F.relu(self.conv5_2(x))
x = F.relu(self.conv5_3(x))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
if __name__ == '__main__':
BATCH_SIZE = 128
LOG_INTERVAL = 5
# data
transform = transforms.Compose([
transforms.ToTensor()
])
trainset = torchvision.datasets.CIFAR100(
root='./data',
train=True,
download=True,
transform=transform
)
testset = torchvision.datasets.CIFAR100(
root='./data',
train=False,
download=True,
transform=transform
)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
# model
vgg16 = VGG16(100)
vgg16.cuda()
# optimizer
optimizer = optim.SGD(vgg16.parameters(), lr=0.01)
# loss
criterion = nn.CrossEntropyLoss()
print('———— Train Start —————')
for epoch in range(20):
running_loss = 0.
for step, (batch_x, batch_y) in enumerate(trainloader):
batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
#
optimizer.zero_grad()
output = vgg16(batch_x)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
running_loss += loss.item()
if step % LOG_INTERVAL == 0:
print('[%d, %4d] loss: %.4f' % (epoch, step, running_loss / LOG_INTERVAL))
running_loss = 0.
def test():
print('———— Test Start ————')
correct = 0
total = 0
#
with torch.no_grad():
for test_x, test_y in testloader:
images, labels = test_x.cuda(), test_y.cuda()
output = vgg16(images)
_, predicted = torch.max(output.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print('Accuracy of the network is: %.4f %%' % accuracy)
print('———— Test Finish ————')
test()
print('———— Train Finish —————')
The loss stays around 4.6060 and never decrease. I have tried different learning rate but does not work.
I have noticed that you are not using Batch normalization in between your convolution layers. I have added batch normalization layers and it seems to work. Following is the modified code:
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv11_bn = nn.BatchNorm2d(64)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv12_bn = nn.BatchNorm2d(64)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv21_bn = nn.BatchNorm2d(128)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv22_bn = nn.BatchNorm2d(128)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv31_bn = nn.BatchNorm2d(256)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv32_bn = nn.BatchNorm2d(256)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv33_bn = nn.BatchNorm2d(256)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv41_bn = nn.BatchNorm2d(512)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv42_bn = nn.BatchNorm2d(512)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv43_bn = nn.BatchNorm2d(512)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv51_bn = nn.BatchNorm2d(512)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv52_bn = nn.BatchNorm2d(512)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv53_bn = nn.BatchNorm2d(512)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv11_bn(self.conv1_1(x)))
x = F.relu(self.conv12_bn(self.conv1_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv22_bn(self.conv2_1(x)))
x = F.relu(self.conv21_bn(self.conv2_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv31_bn(self.conv3_1(x)))
x = F.relu(self.conv32_bn(self.conv3_2(x)))
x = F.relu(self.conv33_bn(self.conv3_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv41_bn(self.conv4_1(x)))
x = F.relu(self.conv42_bn(self.conv4_2(x)))
x = F.relu(self.conv43_bn(self.conv4_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv51_bn(self.conv5_1(x)))
x = F.relu(self.conv52_bn(self.conv5_2(x)))
x = F.relu(self.conv53_bn(self.conv5_3(x)))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
However, a more elegant version of the same could be found here