I am trying to create a Deep convolutional autoencoder that the decoder architecture be the same as generator of DCGAN. But I am getting the error while running the model.
you can find whole of my code here:
https://colab.research.google.com/drive/1uO6eMBp4DmgEOaXY4iAuG9FpkYAasfG7?usp=sharing
part of my code here:
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
self.encoder = nn.Sequential( # like the Composition layer you built
nn.Conv2d(1, 16, 3, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(16, 16 * 2, 3, 2, 1, bias=False),
nn.BatchNorm2d(16 * 2),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(16 * 2, 16 * 4, 3, 2, 1, bias=False),
nn.BatchNorm2d(16 * 4),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(16 * 4, 16 * 8, 3, 2, 1, bias=False),
nn.BatchNorm2d(16 * 8),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(16 * 8, 16 * 16, 3),
nn.Sigmoid()
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d( 16 * 16, 16 * 8, 3),
nn.BatchNorm2d(64 * 8),
nn.ReLU(True),
nn.ConvTranspose2d(16 * 8, 16 * 4, 3, 2, 1, output_padding=1),
nn.BatchNorm2d(16 * 4),
nn.ReLU(True),
nn.ConvTranspose2d(16 * 4, 16 * 2, 3, 2, 1, output_padding=1),
nn.BatchNorm2d(16 * 2),
nn.ReLU(True),
nn.ConvTranspose2d(16 * 2, 16, 3, 2, 1, output_padding=1),
nn.BatchNorm2d(16),
nn.ReLU(True),
nn.ConvTranspose2d( 16, 1, 3, 2, 1, output_padding=1),
nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
The full error I am getting is:
RuntimeError Traceback (most recent call last)
<ipython-input-8-d32e488e5d56> in <module>
1 model = Autoencoder()
2 max_epochs = 20
----> 3 outputs = train(model, num_epochs=max_epochs)
7 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
452 _pair(0), self.dilation, self.groups)
453 return F.conv2d(input, weight, bias, self.stride,
--> 454 self.padding, self.dilation, self.groups)
455
456 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Calculated padded input size per channel: (2 x 2). Kernel size: (3 x 3). Kernel size can't be greater than actual input size
It tries to use kernel_size=3 (3x3). You have to lower the kernel size to 1 (1x1) or 2 (2x2), or modify your model somewhere before this layer.
Related
This is the architecture based on a research paper.
class NBV_Net(nn.Module):
def __init__(self, dropout_prob):
super(NBV_Net, self).__init__()
#dropout_prob = 0.0 # 1 - 0.7
# Four 3D convolutional layers
self.conv1 = nn.Conv3d(1,16, 3, stride=1, padding=1)
self.pool1 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv2 = nn.Conv3d(16, 32, 3, stride=1, padding=1)
self.pool2 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv3 = nn.Conv3d(32, 64, 3, stride=1, padding=1)
self.conv3_drop = nn.Dropout(dropout_prob)
self.pool3 = nn.MaxPool3d(kernel_size=(2,2,2), stride = (2,2,2))
self.conv4 = nn.Conv3d(64, 64, 3, stride=1, padding=1)
self.conv4_drop = nn.Dropout(dropout_prob)
# Five fully connected layers
self.fc1 = nn.Linear(4096, 1500)
self.fc1_drop = nn.Dropout(dropout_prob)
self.fc2 = nn.Linear(1500, 500)
self.fc2_drop = nn.Dropout(dropout_prob)
self.fc3 = nn.Linear(500, 100)
self.fc3_drop = nn.Dropout(dropout_prob)
self.fc4 = nn.Linear(100, 50)
self.fc4_drop = nn.Dropout(dropout_prob)
self.fc5 = nn.Linear(50, 3)
def forward(self, x):
## feedforward behavior of NBV-net
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = self.pool3(F.relu(self.conv3(x)))
x = self(F.relu(self.conv4(x)))
# Aplanar
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc1_drop(x)
x = F.relu(self.fc2(x))
x = self.fc2_drop(x)
x = F.relu(self.fc3(x))
x = self.fc3_drop(x)
x = F.relu(self.fc4(x))
x = self.fc4_drop(x)
x = F.tanh(self.fc5(x))
return x
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3, 3], expected input[250, 64, 4, 4, 4] to have 1 channels, but got 64 channels instead
But this code gives the Runtime Error. Similar errors are there but I could not understand what Group 1 and other dimensions mentioned exactly mean , any idea about the background of this error ?
The input shape for nn.Conv3d(1,16, 3, stride=1, padding=1) is (batch, channels, depth, height, width).
You define that the channel size is 1 but your input tensor has 64 channels.
self.conv1 = nn.Conv3d(64,16, 3, stride=1, padding=1) will resolve you error
I have created this neural net:
class _netD(nn.Module):
def __init__(self, num_classes=1, nc=1, ndf=64):
super(_netD, self).__init__()
self.num_classes = num_classes
# nc is number of channels
# num_classes is number of classes
# ndf is the number of output channel at the first layer
self.main = nn.Sequential(
# input is (nc) x 28 x 28
# conv2D(in_channels, out_channels, kernelsize, stride, padding)
nn.Conv2d(nc, ndf , 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 14 x 14
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 7 x 7
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 3 x 3
nn.Conv2d(ndf * 4, ndf * 8, 3, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 2 x 2
nn.Conv2d(ndf * 8, num_classes, 2, 1, 0, bias=False),
# out size = batch x num_classes x 1 x 1
)
if self.num_classes == 1:
self.main.add_module('prob', nn.Sigmoid())
# output = probability
else:
pass
# output = scores
def forward(self, input):
output = self.main(input)
return output.view(input.size(0), self.num_classes).squeeze(1)
I want to loop through the different layers and apply a weight initialization depending on the type of layer. I am trying to do the following:
D = _netD()
for name, param in D.named_parameters():
if type(param) == nn.Conv2d:
param.weight.normal_(...)
But that is not working. Can you please help me?
Thanks
type(param) will only return the actual datatype called a parameter for any type of weight or data in the model. Because named_parameters() doesn't return anything useful in the name either when used on an nn.sequential-based model, you need to look at the modules to see which layers are specifically related to the nn.Conv2d class using isinstance as such:
for layer in D.modules():
if isinstance(layer, nn.Conv2d):
layer.weight.data.normal_(...)
Or, the way that is recommended by Soumith Chintala himself, actually just loop through your main module itself:
for L,layer in D.main:
if isisntance(layer,nn.Conv2d):
layer.weight.data.normal_(..)
I actually prefer the first because you don't have to specify the exact nn.sequential module itself, and will search all possible modules in the model, but either one should do the job for you.
This is my first question, so please forgive if I've missed adding something.
I'm trying to create a Convolutional Autoencoder in Pytorch 1.7.0, yet am having difficulty in designing the model so that the output size is equal to the input size. I'm currently working on the MNIST dataset, with the input tensor size being 1128*28 and currently, the output is 1*1*29*29...
Can someone please help me identify the problem? *Please note that I'll incorporate the learnings afterwards.
class autoencoder(nn.Module):
def __init__(self, hidden_node_count):
super(autoencoder, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 5, stride=2, padding=2)
self.conv2 = nn.Conv2d(32,32, 5, stride=2)#, padding=2)
self.pool = nn.MaxPool2d(hidden_node_count, hidden_node_count)
self.t_conv1 = nn.ConvTranspose2d(32, 32, 5, stride=2)#, padding=2)
self.t_conv2 = nn.ConvTranspose2d(32, 32, 5, stride=2)#, padding=2)
self.t_conv3 = nn.ConvTranspose2d(32, 1, 5, stride=2)#, padding=2)
self.relu = nn.ReLU(True)
self.tanh = nn.Tanh()
def forward(self, x):
print(x.size(), "input")
x = self.conv1(x)
x = self.relu(x)
print(x.size(), "conv1")
x = self.conv2(x)
print(x.size(), "conv2")
x = self.pool(x)
print(x.size(), "pool")
x = self.t_conv1(x)
x = self.relu(x)
print(x.size(), "deconv1")
x = self.t_conv2(x)
x = self.relu(x)
print(x.size(), "deconv2")
x = self.t_conv3(x)
x = self.tanh(x)
print(x.size(), "deconv3")
return x
With its STDOUT being ->
torch.Size([1, 1, 28, 28]) input
torch.Size([1, 32, 14, 14]) conv1
torch.Size([1, 32, 5, 5]) conv2
torch.Size([1, 32, 1, 1]) pool
torch.Size([1, 32, 5, 5]) deconv1
torch.Size([1, 32, 13, 13]) deconv2
torch.Size([1, 1, 29, 29]) deconv3
torch.Size([1, 1, 29, 29])
torch.Size([1, 1, 28, 28])
according to the documentation for ConvTranspose2d, here is the formula to compute the output size :
Hout=(Hin−1)×stride[0]−2×padding[0]+dilation[0]×(kernel_size[0]−1)+output_padding[0]+1
In your case, Hin=13, padding=0, dilation=1, kernel_size=5, output_padding=0, which gives Hout=29. Your output tensor is as it should be !
If you want to have an output of 28, add some padding. With padding=1, you will get an output of size (1,32,27,27), because the output size of a ConvTranpose2d is ambiguous (read the doc). Therefore, you need to add some output padding as well :
conv = nn.ConvTranspose2d(32, 1, 5, stride= 2, padding=1, output_padding=1)
conv(randn(1,32,13,13)).size()
>>> (1, 1, 28, 28)
I am trying to implement such CNN.
This is my implementation:
class Net(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
super(Net, self).__init__(observation_space, features_dim)
n_input_channels = observation_space.shape[0]
print("Observation space shape:"+str(observation_space.shape))
print("Number of channels:" + str(n_input_channels))
self.cnn = nn.Sequential(
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Flatten(),
nn.Linear(in_features=128,out_features=64),
nn.ReLU(),
nn.Linear(in_features=64,out_features=7),
nn.Sigmoid()
)
def forward(self, observations: th.Tensor) -> th.Tensor:
print("Observation shape:"+str(observations[0].shape))
return self.cnn(observations)
When I tried to run the code which uses this CNN, I am getting following log:
Observation space shape:(3, 6, 7)
Number of channels:3
Observation shape:torch.Size([3, 6, 7])
Traceback (most recent call last): File "/Users/joe/Documents/JUPYTER/ConnectX/training3.py", line 250, in <module>
learner.learn(total_timesteps=iterations, callback=eval_callback)
...
RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[4, 32, 6, 7] to have 3 channels, but got 32 channels instead
What is the problem here? How can I solve it?
in_channels of a conv layer should be equal to out_channels of the previous layer. In your case, in_channels of the 2nd and 3rd conv layers don't have the correct values. They should be like below,
self.cnn = nn.Sequential(
nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
...
)
Also, you should check in_features of the 1st Linear layer. It depends on the input shape and should be equal to last_conv_out_channels * last_conv_output_height * last_conv_output_width.
For example, for an input=torch.randn(1, 3, 256, 256) last conv layer's output shape would be ([1, 32, 64, 64]), in that case the 1st Linear layer should be,
nn.Linear(in_features=32*64*64,out_features=64)
---- Update after the comment:
Output shape of a conv layer is calculated through the formula here (see under "Shape:" section). Using input = torch.randn(1, 3, 256, 256) as input to the network, here are outputs of each conv layer (I skipped the ReLUs since they don't change the shape),
conv1: (1, 3, 256, 256) -> (1, 32, 256, 256)
conv2: (1, 32, 256, 256) -> (1, 32, 128, 128)
conv3: (1, 32, 128, 128) -> (1, 32, 64, 64)
So how did last_conv_output_height and last_conv_output_width became 64 ? The last conv layer is defined as follows,
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1)
Data is processed as (num_samples, num_channels, height, width) in PyTorch and the default value for dilation is stated as 1 in the conv2d doc. So, for the last conv layer, H_in is 128, padding[0] is 1, dilation[0] is 1, kernel_size[0] is 3 and stride[0] is 2. Therefore, height of its output becomes,
H_out = ⌊(128 + 2 * 1 - 1 * (3 - 1) - 1) / 2⌋ + 1
H_out = 64
Since square-size kernels and equal-size stride, padding and dilation are used, W_out also becomes 64 for the last conv layer.
I think the easiest way to compute in_features for the 1st Linear layer would be run the model for the desired size input until that layer. An example for your architecture,
inp = torch.randn(1, 3, 256, 256)
arch = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1)
)
outp = arch(inp)
print('outp.shape:', outp.shape)
This prints,
outp.shape: torch.Size([1, 32, 64, 64])
Finally, last_conv_out_channels is out_channels of the last conv layer. The last conv layer in your architecture is nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1). Here out_channels is the 2nd parameter, so last_conv_out_channels is 32.
I want to remove the decoder portion of the Autoencoder.
and I want to put FC in the removed part.
In addition, the encoder parts will not train with pre-learned weights.
self.encoder = nn.Sequential(
nn.Conv2d(1, 16, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 8, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=4, stride=1),
)
self.decoder = nn.Sequential(
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(8, 8, kernel_size=2, stride=2),
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(8, 8, kernel_size=2, stride=2),
nn.Conv2d(8, 16, 3),
nn.ReLU(True),
nn.ConvTranspose2d(16, 16, kernel_size=2, stride=2),
nn.Conv2d(16, 1, 3, padding=1)
)
def forward(self, x):
if self.training :
x = self.encoder(x)
x = self.decoder(x)
return x
else:
x = classifier(x)
return x
is this possible?
help me...
One easy and clean solution would be to define a stand-alone network as your decoder, then replace the decoder attribute of your model with this new network after pre-training is over. Easy example below:
class sillyExample(torch.nn.Module):
def __init__(self):
super(sillyExample, self).__init__()
self.encoder = torch.nn.Linear(5, 5)
self.decoder = torch.nn.Linear(5, 10)
def forward(self, x):
return self.decoder(self.encoder(x))
test = sillyExample()
test(torch.rand(30, 5)).shape
Out: torch.Size([30, 10])
test.decoder = torch.nn.Linear(5, 20) # replace the decoder
test(torch.rand(30, 5)).shape
Out: torch.Size([30, 20])
Just make sure to re-initialize your optimizers with the updated model (or anything else that might be referencing the model's parameters).