I'm trying to mimic TimeDistributed in PyTorch just like keras TimeDistributed. please see below model
class GRULinear(nn.Module):
def __init__(self,input_size, hidden_size, num_layers, batch_first=False):
super().__init__()
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first)
self.fc = nn.Sequential(nn.ReLU(True),
nn.Linear(hidden_size, hidden_size),
nn.ReLU(True))
def forward(self, x):
out, _ = self.gru(x)
out = self.fc(out)
return out
class CNN_GRU(nn.Module):
def __init__(self, input_dim, output_dim):
super(CNN_GRU, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.feature_extractor = nn.Sequential(
nn.Conv2d(input_dim, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=(2, 2), stride=(2,2)),
nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
nn.Flatten()
)
self.gru_linear = GRULinear(16000, output_dim, 2, True)
def forward(self, state):
features=[]
for i in range(state.shape[1]):
features.append(self.feature_extractor(state[0][i]).unsqueeze(1))
features = torch.flatten(torch.cat(features, axis=1), start_dim=1)
features = torch.reshape(features, shape=(1,)+features.shape)
outs = torch.flatten(self.gru_linear(features))
return outs
TEST(using torchsummary (from torchsummary import summary)):
model = CNN_GRU(3,64).to('cuda')
summary(model, input_size=(125, 1, 3 , 16, 16))
OUTPUT SNIPPET:
================================================================
Total params: 59,758,160
Trainable params: 59,758,160
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.37
Forward/backward pass size (MB): 34.15
Params size (MB): 227.96
Estimated Total Size (MB): 262.48
----------------------------------------------------------------
FULL-OUTPUT :
https://justpaste.it/31r4v
I've tested above model using torchsymmary. it has a lot more trainable parameters than keras TimeDistributed model with the same parameters. i don't understand what I'm doing wrong. so what is proper way to mimic TimeDistributed?. if any other way possible then it would be grate to give some points.
Related
Using count_ops to calculate FLOPS for a neural network model gives me the error mentioned in the title. I have changed a pretrained model (resnet18) by using assignments. My goal is to calculate the FLOPS for each edited model (to make sure the model is passing inputs).
net = resnet18()
phi = math.pow(math.sqrt(2), 7)
# net.conv1 = nn.Conv2d(math.ceil(phi*3), math.ceil(phi*64), kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
net.conv1 = nn.Conv2d(math.ceil(3), math.ceil(phi*64), kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
net.bn1 = nn.BatchNorm2d(math.ceil(phi*64), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
net.relu = nn.ReLU(inplace=True)
net.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
net.layer1[0].conv1 = nn.Conv2d(math.ceil(phi*64), math.ceil(phi*64), kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
.
.
net.layer2[0].conv1 = nn.Conv2d(math.ceil(phi*64), math.ceil(phi*128), kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
.
.
net.layer3[0].conv1 = nn.Conv2d(math.ceil(phi*128), math.ceil(phi*256), kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
.
.
net.layer4[0].conv1 = nn.Conv2d(math.ceil(phi*256), math.ceil(phi*512), kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
.
.
# net.avgpool = Identity()
net.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
nn.fc = nn.Linear(in_features=math.ceil(phi*512), out_features=1000, bias=True)
The edited model works when I use model.named_parameters(). I am thinking that I messed up by directly assigning layers with new values (I am not sure of the order of parameters for the layers, printing my model seems to be fine, but for the love of god not a single input passes throught the model). Want to know where I messed up in the syntax, because of which my inputs are not passing through the model.
ip = torch.rand(1,3,224,224).to(dev)
count_ops(net, ip) # Count the number of FLOPs
I am working on visualizing feature maps of my vision transformer but i am unable to visualize feature maps. When i print model.children() it shows convolution layers but still i cannot verify the if statement.
list(model.children())
Output
[OverlapPatchEmbed(
(proj): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
(norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
),
OverlapPatchEmbed(
(proj): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
),
OverlapPatchEmbed(
(proj): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
),
OverlapPatchEmbed(
(proj): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
),
ModuleList(
(0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
(1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), paddin...
I want to access Conv2d and visulize the feature map but i am unable to do so type(model_children[i]) == Conv2d is not True and i have no idea why?
model_children = list(model.children())
# counter to keep count of the conv layers
counter = 0
# append all the conv layers and their respective wights to the list
for i in range(len(model_children)):
if type(model_children[i]) == Conv2d:
counter += 1
model_weights.append(model_children[i].weight)
conv_layers.append(model_children[i])
elif type(model_children[i]) == nn.Sequential:
for j in range(len(model_children[i])):
for child in model_children[i][j].children():
if type(child) == nn.Conv2d:
counter += 1
model_weights.append(child.weight)
conv_layers.append(child)
print(f"Total convolution layers: {counter}")
print("conv_layers")
Actually model_children[i].weight does not contain weight attribute. Inside OverlapPatchEmbed, the proj layer contains Conv2d and Conv2d consists of weight attribute. You can correct it below.
if model_children[i] == model.patch_embed1:
counter += 1
weigh = model_children[i].proj
model_weights.append(weigh.weight)
conv_layers.append(model_children[i].proj)
elif model_children[i] == model.patch_embed2:
counter += 1
weigh = model_children[i].proj
model_weights.append(weigh.weight)
conv_layers.append(model_children[i].proj)
I’m training a U-Net (model below) on 10 different datasets.
When I train using L1 loss, I receive no errors however, for one of the datasets using MSE loss the network outputs tensor[nan…] and cannot train after ~5-10th epoch. I have tried gradient clipping with arbitrary values of 0.5 and 1 - neither worked. I've also checked and none of the inputs contain nan values
Any help would be appreciated
Thank you
class CNN(nn.Module):
def __init__(self):
super(CNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(config.numinputs, 64,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(64))
self.layer2 = nn.Sequential(
nn.Conv2d(64, 64,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(64))
self.layer3 = nn.Sequential(
nn.MaxPool2d(2, stride=2, padding=0))
self.layer4 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(128))
self.layer5 = nn.Sequential(
nn.Conv2d(128, 128,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(128))
self.layer6 = nn.Sequential(
nn.MaxPool2d(2, stride=2, padding=0))
self.layer7 = nn.Sequential(
nn.Conv2d(128, 256,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(256))
self.layer8 = nn.Sequential(
nn.Conv2d(256, 256,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(256))
self.layer9 = nn.Sequential(
nn.Conv2d(256, 256,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(256))
self.layer10 = nn.UpsamplingBilinear2d(scale_factor=2)
self.layer11 = nn.Sequential(
nn.Conv2d(256, 128,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(128))
self.layer12 = nn.Sequential(
nn.Conv2d(128, 128,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(128))
self.layer13 = nn.Sequential(
nn.Conv2d(128, 128,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(128))
self.layer14 = nn.UpsamplingBilinear2d(scale_factor=2)
self.layer15 = nn.Sequential(
nn.Conv2d(128, 64,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(64))
self.layer16 = nn.Sequential(
nn.Conv2d(64, 64,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(64))
self.layer17 = nn.Sequential(
nn.Conv2d(64, 1,kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.ReLU(),
nn.BatchNorm2d(1))
self.layer18 = nn.Softmax(dim=1)
def forward(self,x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
x = self.layer8(x)
x = self.layer9(x)
x = self.layer10(x)
x = self.layer11(x)
x = self.layer12(x)
x = self.layer13(x)
x = self.layer14(x)
x = self.layer15(x)
x = self.layer16(x)
x = self.layer17(x)
x = x.view(1, -1)
x = self.layer18(x)
x = x.reshape(1, 512, 512)
return x
From my knowledge, nor MSE or L1 losses are suited to train a U-net, mainly because this model performs semantic segmentation and outputs a probability distribution.
Loss functions such as Categorical/Binary-Cross Entropy or Focal Loss (if you have class imbalance) will definitely give you better results!
I'm trying to use VGG16 with transfer learning, but getting errors:
model = torchvision.models.vgg16(pretrained=True)
print(model)
for param in model.parameters():
param.requires_grad = False
input_size = model.classifier[0].in_features
model.classifier[0] = nn.Sequential(
nn.Linear(input_size, 128), nn.ReLU(),
nn.Linear(128, 2))
torchinfo.summary(model, (64, 3, 224, 224))
VGG16:
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
Error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/torchinfo/torchinfo.py in forward_pass(model, x, batch_dim, cache_forward_pass, device, **kwargs)
260 if isinstance(x, (list, tuple)):
--> 261 _ = model.to(device)(*x, **kwargs)
262 elif isinstance(x, dict):
~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
~/.local/lib/python3.8/site-packages/torchvision/models/vgg.py in forward(self, x)
51 x = torch.flatten(x, 1)
---> 52 x = self.classifier(x)
53 return x
~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1119
-> 1120 result = forward_call(*input, **kwargs)
1121 if _global_forward_hooks or self._forward_hooks:
~/.local/lib/python3.8/site-packages/torch/nn/modules/container.py in forward(self, input)
140 for module in self:
--> 141 input = module(input)
142 return input
~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1119
-> 1120 result = forward_call(*input, **kwargs)
1121 if _global_forward_hooks or self._forward_hooks:
~/.local/lib/python3.8/site-packages/torch/nn/modules/linear.py in forward(self, input)
102 def forward(self, input: Tensor) -> Tensor:
--> 103 return F.linear(input, self.weight, self.bias)
104
~/.local/lib/python3.8/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias)
-> 1848 return torch._C._nn.linear(input, weight, bias)
1849
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x2 and 4096x4096)
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_8204/406510959.py in <module>
11 nn.Linear(128, 2))
12
---> 13 torchinfo.summary(model, (64, 3, 224, 224))
~/.local/lib/python3.8/site-packages/torchinfo/torchinfo.py in summary(model, input_size, input_data, batch_dim, cache_forward_pass, col_names, col_width, depth, device, dtypes, row_settings, verbose, **kwargs)
192 input_data, input_size, batch_dim, device, dtypes
193 )
--> 194 summary_list = forward_pass(
195 model, x, batch_dim, cache_forward_pass, device, **kwargs
196 )
~/.local/lib/python3.8/site-packages/torchinfo/torchinfo.py in forward_pass(model, x, batch_dim, cache_forward_pass, device, **kwargs)
268 except Exception as e:
269 executed_layers = [layer for layer in summary_list if layer.executed]
--> 270 raise RuntimeError(
271 "Failed to run torchinfo. See above stack traces for more details. "
272 f"Executed layers up to: {executed_layers}"
RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: [Sequential: 1, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, AdaptiveAvgPool2d: 1, Sequential: 2, Linear: 3, ReLU: 3, Linear: 3, ReLU: 2, Dropout: 2]
I'm using the following pytorch packages versions:
torch==1.10.0
torchinfo==1.5.3
torchvision==0.11.1
What is wrong ?
WHat do I need to change in order to use VGG16 (with transfer learning) ?
In case you're trying to change the final classifier, you should change the whole, not only one layer:
model.classifier = nn.Sequential(
nn.Linear(input_size, 128), nn.ReLU(),
nn.Linear(128, 2))
from torchvision import models
model = models.vgg16(pretrained=True).features
for i, module in enumerate(model.modules()):
print('-' * 60)
print(type(module))
print(module)
What is 'module' in model.modules() (modules() Returns an iterator over all modules in the network.)? Why torch.nn.modules.container.Sequential is stored along with single layers?
Output:
------------------------------------------------------------
<class 'torch.nn.modules.container.Sequential'>
Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.pooling.MaxPool2d'>
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.pooling.MaxPool2d'>
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.pooling.MaxPool2d'>
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.pooling.MaxPool2d'>
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.conv.Conv2d'>
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
------------------------------------------------------------
<class 'torch.nn.modules.activation.ReLU'>
ReLU(inplace=True)
------------------------------------------------------------
<class 'torch.nn.modules.pooling.MaxPool2d'>
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
nn.Module.modules will return all modules contained inside the model recursively, that includes parent submodules and children submodules that they may have of their own. You can instead use nn.Module.children to get the direct child submodules:
for child in module.children():
print(child)
If you want to investigate with recursion you could do:
def print_children(module, i=0):
if len(list(module.modules())) == 1:
return print(' '*i, module)
for child in module.children():
print_children(child, i+2)