I have been using the UNet architecture for image segmentation. Still, I found an unusually high runtime for the first epoch(2 hrs) and the runtime reduces down to 30mins in the subsequent epoch. I don't know where the problem lies inside the model any advice is highly appreciated.
here is my code:
class UNet(nn.Module):
def __init__(self, num_classes):
super(UNet, self).__init__()
self.num_classes = num_classes
self.contracting_11 = self.conv_block(in_channels=3, out_channels=64)
self.contracting_12 = nn.MaxPool2d(kernel_size=2, stride=2)
self.contracting_21 = self.conv_block(in_channels=64, out_channels=128)
self.contracting_22 = nn.MaxPool2d(kernel_size=2, stride=2)
self.contracting_31 = self.conv_block(in_channels=128, out_channels=256)
self.contracting_32 = nn.MaxPool2d(kernel_size=2, stride=2)
self.contracting_41 = self.conv_block(in_channels=256, out_channels=512)
self.contracting_42 = nn.MaxPool2d(kernel_size=2, stride=2)
self.middle = self.conv_block(in_channels=512, out_channels=1024)
self.expansive_11 = nn.ConvTranspose2d(in_channels=1024, out_channels= 512, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_12 = self.conv_block(in_channels=1024, out_channels=512)
self.expansive_21 = nn.ConvTranspose2d(in_channels= 512, out_channels=256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_22 = self.conv_block(in_channels=512, out_channels=256)
self.expansive_31 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_32 = self.conv_block(in_channels=256, out_channels=128)
self.expansive_41 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_42 = self.conv_block(in_channels=128, out_channels=64)
self.output = nn.Conv2d(in_channels=64, out_channels=num_classes, kernel_size=3, stride=1, padding=1)
def conv_block(self, in_channels, out_channels):
block = nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.BatchNorm2d(num_features=out_channels),
nn.Conv2d(in_channels= out_channels, out_channels = out_channels, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.BatchNorm2d(num_features=out_channels)
)
return block
def forward(self, X):
contracting_11_out = self.contracting_11(X)
contracting_12_out = self.contracting_12(contracting_11_out)
contracting_21_out = self.contracting_21(contracting_12_out)
contracting_22_out = self.contracting_22(contracting_21_out)
contracting_31_out = self.contracting_31(contracting_22_out)
contracting_32_out = self.contracting_32(contracting_31_out) # [-1, 256, 32, 32]
contracting_41_out = self.contracting_41(contracting_32_out) # [-1, 512, 32, 32]
contracting_42_out = self.contracting_42(contracting_41_out) # [-1, 512, 16, 16]
middle_out = self.middle(contracting_42_out) # [-1, 1024, 16, 16]
expansive_11_out = self.expansive_11(middle_out) # [-1, 512, 32, 32]
expansive_12_out = self.expansive_12(torch.cat((expansive_11_out, contracting_41_out), dim=1)) # [-1, 1024, 32, 32] -> [-1, 512, 32, 32]
expansive_21_out = self.expansive_21(expansive_12_out) # [-1, 256, 64, 64]
expansive_22_out = self.expansive_22(torch.cat((expansive_21_out, contracting_31_out), dim=1)) # [-1, 512, 64, 64] -> [-1, 256, 64, 64]
expansive_31_out = self.expansive_31(expansive_22_out) # [-1, 128, 128, 128]
expansive_32_out = self.expansive_32(torch.cat((expansive_31_out, contracting_21_out), dim=1)) # [-1, 256, 128, 128] -> [-1, 128, 128, 128]
expansive_41_out = self.expansive_41(expansive_32_out) # [-1, 64, 256, 256]
expansive_42_out = self.expansive_42(torch.cat((expansive_41_out, contracting_11_out), dim=1)) # [-1, 128, 256, 256] -> [-1, 64, 256, 256]
output_out = self.output(expansive_42_out) # [-1, num_classes, 256, 256]
out = torch.sigmoid(output_out)
return out
Related
This is my first question, so please forgive if I've missed adding something.
I'm trying to create a Convolutional Autoencoder in Pytorch 1.7.0, yet am having difficulty in designing the model so that the output size is equal to the input size. I'm currently working on the MNIST dataset, with the input tensor size being 1128*28 and currently, the output is 1*1*29*29...
Can someone please help me identify the problem? *Please note that I'll incorporate the learnings afterwards.
class autoencoder(nn.Module):
def __init__(self, hidden_node_count):
super(autoencoder, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 5, stride=2, padding=2)
self.conv2 = nn.Conv2d(32,32, 5, stride=2)#, padding=2)
self.pool = nn.MaxPool2d(hidden_node_count, hidden_node_count)
self.t_conv1 = nn.ConvTranspose2d(32, 32, 5, stride=2)#, padding=2)
self.t_conv2 = nn.ConvTranspose2d(32, 32, 5, stride=2)#, padding=2)
self.t_conv3 = nn.ConvTranspose2d(32, 1, 5, stride=2)#, padding=2)
self.relu = nn.ReLU(True)
self.tanh = nn.Tanh()
def forward(self, x):
print(x.size(), "input")
x = self.conv1(x)
x = self.relu(x)
print(x.size(), "conv1")
x = self.conv2(x)
print(x.size(), "conv2")
x = self.pool(x)
print(x.size(), "pool")
x = self.t_conv1(x)
x = self.relu(x)
print(x.size(), "deconv1")
x = self.t_conv2(x)
x = self.relu(x)
print(x.size(), "deconv2")
x = self.t_conv3(x)
x = self.tanh(x)
print(x.size(), "deconv3")
return x
With its STDOUT being ->
torch.Size([1, 1, 28, 28]) input
torch.Size([1, 32, 14, 14]) conv1
torch.Size([1, 32, 5, 5]) conv2
torch.Size([1, 32, 1, 1]) pool
torch.Size([1, 32, 5, 5]) deconv1
torch.Size([1, 32, 13, 13]) deconv2
torch.Size([1, 1, 29, 29]) deconv3
torch.Size([1, 1, 29, 29])
torch.Size([1, 1, 28, 28])
according to the documentation for ConvTranspose2d, here is the formula to compute the output size :
Hout=(Hin−1)×stride[0]−2×padding[0]+dilation[0]×(kernel_size[0]−1)+output_padding[0]+1
In your case, Hin=13, padding=0, dilation=1, kernel_size=5, output_padding=0, which gives Hout=29. Your output tensor is as it should be !
If you want to have an output of 28, add some padding. With padding=1, you will get an output of size (1,32,27,27), because the output size of a ConvTranpose2d is ambiguous (read the doc). Therefore, you need to add some output padding as well :
conv = nn.ConvTranspose2d(32, 1, 5, stride= 2, padding=1, output_padding=1)
conv(randn(1,32,13,13)).size()
>>> (1, 1, 28, 28)
I am using PyTorch 1.7 and Python 3.8 with CIFAR-10 dataset. I am trying to create a block with: conv -> conv -> pool -> fc. Fully connected layer (fc) has 256 neurons. The code for this is as follows:
# Testing-
conv1 = nn.Conv2d(
in_channels = 3, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True
)
conv2 = nn.Conv2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True
)
pool = nn.MaxPool2d(
kernel_size = 2, stride = 2
)
fc1 = nn.Linear(
in_features = 64 * 16 * 16, out_features = 256
bias = True
)
images.shape
# torch.Size([32, 3, 32, 32])
x = conv1(images)
x.shape
# torch.Size([32, 64, 32, 32])
x = conv2(x)
x.shape
# torch.Size([32, 64, 32, 32])
x = pool(x)
x.shape
# torch.Size([32, 64, 16, 16])
# This line of code gives error-
x = fc1(x)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32768x16 and
16384x256)
What is going wrong?
You are nearly there! As you will have noticed nn.MaxPool returns a shape (32, 64, 16, 16) which is incompatible with a nn.Linear's input: a 2D dimensional tensor (batch, in_features). You need to broadcast to (batch, 64*16*16).
I would recommend using a nn.Flatten layer rather than broadcasting yourself. It will act as x.view(x.size(0), -1) but is clearer. By default it preserves the first dimension:
conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
pool = nn.MaxPool2d(kernel_size=2, stride=2)
flatten = nn.Flatten()
fc1 = nn.Linear(in_features=64*16*16, out_features=256)
x = conv1(images)
x = conv2(x)
x = pool(x)
x = flatten(x)
x = fc1(x)
Alternatively, you could use the functional alternative torch.flatten, where you will have to provide the start_dim as 1: x = torch.flatten(x, start_dim=1).
When you're done debugging, you could assemble your layers with nn.Sequential:
model = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(in_features=64*16*16, out_features=256)
)
x = model(images)
you need to flat the output of nn.MaxPool2d layer for giving input in nn.Linear layer.
try to use x = x.view(x.size(0), -1) before giving input to fc layer for flatten tensor.
I am trying to implement such CNN.
This is my implementation:
class Net(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
super(Net, self).__init__(observation_space, features_dim)
n_input_channels = observation_space.shape[0]
print("Observation space shape:"+str(observation_space.shape))
print("Number of channels:" + str(n_input_channels))
self.cnn = nn.Sequential(
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Flatten(),
nn.Linear(in_features=128,out_features=64),
nn.ReLU(),
nn.Linear(in_features=64,out_features=7),
nn.Sigmoid()
)
def forward(self, observations: th.Tensor) -> th.Tensor:
print("Observation shape:"+str(observations[0].shape))
return self.cnn(observations)
When I tried to run the code which uses this CNN, I am getting following log:
Observation space shape:(3, 6, 7)
Number of channels:3
Observation shape:torch.Size([3, 6, 7])
Traceback (most recent call last): File "/Users/joe/Documents/JUPYTER/ConnectX/training3.py", line 250, in <module>
learner.learn(total_timesteps=iterations, callback=eval_callback)
...
RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[4, 32, 6, 7] to have 3 channels, but got 32 channels instead
What is the problem here? How can I solve it?
in_channels of a conv layer should be equal to out_channels of the previous layer. In your case, in_channels of the 2nd and 3rd conv layers don't have the correct values. They should be like below,
self.cnn = nn.Sequential(
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
...
)
Also, you should check in_features of the 1st Linear layer. It depends on the input shape and should be equal to last_conv_out_channels * last_conv_output_height * last_conv_output_width.
For example, for an input=torch.randn(1, 3, 256, 256) last conv layer's output shape would be ([1, 32, 64, 64]), in that case the 1st Linear layer should be,
nn.Linear(in_features=32*64*64,out_features=64)
---- Update after the comment:
Output shape of a conv layer is calculated through the formula here (see under "Shape:" section). Using input = torch.randn(1, 3, 256, 256) as input to the network, here are outputs of each conv layer (I skipped the ReLUs since they don't change the shape),
conv1: (1, 3, 256, 256) -> (1, 32, 256, 256)
conv2: (1, 32, 256, 256) -> (1, 32, 128, 128)
conv3: (1, 32, 128, 128) -> (1, 32, 64, 64)
So how did last_conv_output_height and last_conv_output_width became 64 ? The last conv layer is defined as follows,
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1)
Data is processed as (num_samples, num_channels, height, width) in PyTorch and the default value for dilation is stated as 1 in the conv2d doc. So, for the last conv layer, H_in is 128, padding[0] is 1, dilation[0] is 1, kernel_size[0] is 3 and stride[0] is 2. Therefore, height of its output becomes,
H_out = ⌊(128 + 2 * 1 - 1 * (3 - 1) - 1) / 2⌋ + 1
H_out = 64
Since square-size kernels and equal-size stride, padding and dilation are used, W_out also becomes 64 for the last conv layer.
I think the easiest way to compute in_features for the 1st Linear layer would be run the model for the desired size input until that layer. An example for your architecture,
inp = torch.randn(1, 3, 256, 256)
arch = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1)
)
outp = arch(inp)
print('outp.shape:', outp.shape)
This prints,
outp.shape: torch.Size([1, 32, 64, 64])
Finally, last_conv_out_channels is out_channels of the last conv layer. The last conv layer in your architecture is nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1). Here out_channels is the 2nd parameter, so last_conv_out_channels is 32.
def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
)
How can I deal with this error? I think the error is with self.fc, but I can't say how to fix it.
The output from self.conv(x) is of shape torch.Size([32, 64, 2, 2]): 32*64*2*2= 8192 (this is equivalent to (self.conv_out_size). The input to fully connected layer expects a single dimension vector i.e. you need to flatten it before passing to a fully connected layer in the forward function.
i.e.
class Network():
...
def foward():
...
conv_out = self.conv(x)
print(conv_out.shape)
conv_out = conv_out.view(-1, 32*64*2*2)
print(conv_out.shape)
x = self.fc(conv_out)
return x
output
torch.Size([32, 64, 2, 2])
torch.Size([1, 8192])
EDIT:
I think you're using self._get_conv_out function wrong.
It should be
def _get_conv_out(self, shape):
output = self.conv(torch.zeros(1, *shape)) # not (32, *size)
return int(numpy.prod(output.size()))
then, in the forward pass, you can use
conv_out = self.conv(x)
# flatten the output of conv layers
conv_out = conv_out.view(conv_out.size(0), -1)
x = self.fc(conv_out)
For an input of (32, 1, 110, 110), the output should be torch.Size([32, 2]).
I had the same problem however I have solved it by using a batch of 32 and tensor size of [3, 32, 32] for my images and the following configurations on my model. I am using ResNet with 9 CNN and looking for 4 outputs.
transform = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet9(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 64)
self.conv2 = conv_block(64, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.conv3 = conv_block(128, 256, pool=True)
self.conv4 = conv_block(256, 512, pool=True)
self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.classifier = nn.Sequential(nn.MaxPool2d(4),
nn.Flatten(),
nn.Dropout(0.2),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.classifier(out)
return out
I want to remove the decoder portion of the Autoencoder.
and I want to put FC in the removed part.
In addition, the encoder parts will not train with pre-learned weights.
self.encoder = nn.Sequential(
nn.Conv2d(1, 16, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 8, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=4, stride=1),
)
self.decoder = nn.Sequential(
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(8, 8, kernel_size=2, stride=2),
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(8, 8, kernel_size=2, stride=2),
nn.Conv2d(8, 16, 3),
nn.ReLU(True),
nn.ConvTranspose2d(16, 16, kernel_size=2, stride=2),
nn.Conv2d(16, 1, 3, padding=1)
)
def forward(self, x):
if self.training :
x = self.encoder(x)
x = self.decoder(x)
return x
else:
x = classifier(x)
return x
is this possible?
help me...
One easy and clean solution would be to define a stand-alone network as your decoder, then replace the decoder attribute of your model with this new network after pre-training is over. Easy example below:
class sillyExample(torch.nn.Module):
def __init__(self):
super(sillyExample, self).__init__()
self.encoder = torch.nn.Linear(5, 5)
self.decoder = torch.nn.Linear(5, 10)
def forward(self, x):
return self.decoder(self.encoder(x))
test = sillyExample()
test(torch.rand(30, 5)).shape
Out: torch.Size([30, 10])
test.decoder = torch.nn.Linear(5, 20) # replace the decoder
test(torch.rand(30, 5)).shape
Out: torch.Size([30, 20])
Just make sure to re-initialize your optimizers with the updated model (or anything else that might be referencing the model's parameters).