PyTorch attach extra connection when building model - pytorch

I have the following Resnet prototype on Pytorch:
Resnet_Classifier(
(activation): ReLU()
(model): Sequential(
(0): Res_Block(
(mod): Sequential(
(0): Conv1d(1, 200, kernel_size=(5,), stride=(1,), padding=same)
(1): ReLU()
(2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(4): ReLU()
(5): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(7): ReLU()
(8): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Conv1d(1, 200, kernel_size=(1,), stride=(1,), padding=same)
)
(1): ReLU()
(2): Flatten(start_dim=1, end_dim=-1)
(3): Dropout(p=0.1, inplace=False)
(4): Linear(in_features=40000, out_features=2, bias=True)
(5): Softmax(dim=1)
)
)
Input sample shape is (1, 200).
It seems to be absolutely okay but, when I try to get graph in tensorboard, I get the following structure:
Somehow my Residual block connected with Linear. Does this connection really corresponds my net structure?
Model definition:
class Res_Block(nn.Module):
def __init__(self, in_ch, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.mod = nn.Sequential(
nn.Conv1d(in_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch)
)
self.shortcut = nn.Conv1d(in_ch, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.mod(X) + self.shortcut(X)
layers = []
layers.append(Res_Block(1, 200, 5, 1, nn.ReLU()))
layers.append(nn.ReLU())
layers.append(nn.Flatten())
layers.append(nn.Dropout(0.2))
layers.append(nn.Linear(200 * 200, 2))
layers.append(nn.Softmax(dim=1))
R = nn.Sequential(*layers)

The model visualization seems incorrect, the main branch and skip connection are encapsulated inside your Res_Block definition, it should not appear outside of the red Res_Block[0] box, but instead inside.

I solved the problem by removing nn.Sequential in Res_Block __init__ and adding self.l1, self.l2 ... instead. (I also removed some layers and added maxpool but only after I solved the problem)
class Res_Block(nn.Module):
def __init__(self, in_shape, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.l1 = nn.Conv1d(in_shape, out_ch, ks, stride, padding='same')
self.l2 = deepcopy(activation)
self.l3 = nn.BatchNorm1d(out_ch)
self.l4 = nn.Conv1d(out_ch, out_ch, ks, stride, padding='same')
self.l5 = nn.BatchNorm1d(out_ch)
self.shortcut = nn.Conv1d(in_shape, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.l5(self.l4(self.l3(self.l2(self.l1(X))))) + self.shortcut(X)
The corresponding tensorboard structure is
The only one left question is why did that help me solve the problem.

Related

How to find the name of layers in preloaded torchvision models?

I'm trying to use GradCAM with a Deeplabv3 resnet50 model preloaded from torchvision, but in Captum I need to say the name of the layer (of type nn.module). I can't find any documentation for how this is done, does anyone possibly have any ideas of how to get the name of the final ReLu layer?
Thanks in advance!
You can have a look at its representation and get an idea of where it's located by simply printing it:
>>> model = torchvision.models.segmentation.deeplabv3_resnet50()
>>> model
DeepLabV3(
(backbone): IntermediateLayerGetter(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
...
To get the actual exact name of the layer you can loop over the modules with named_modules and only pick the nn.ReLU layers:
>>> relus = [name for name, module in model.named_modules() if isinstance(module, nn.ReLU)]
>>> relus
['backbone.relu',
'backbone.layer1.0.relu',
'backbone.layer1.1.relu',
'backbone.layer1.2.relu',
'backbone.layer2.0.relu',
'backbone.layer2.1.relu',
'backbone.layer2.2.relu',
'backbone.layer2.3.relu',
'backbone.layer3.0.relu',
'backbone.layer3.1.relu',
'backbone.layer3.2.relu',
'backbone.layer3.3.relu',
'backbone.layer3.4.relu',
'backbone.layer3.5.relu',
'backbone.layer4.0.relu',
'backbone.layer4.1.relu',
'backbone.layer4.2.relu',
'classifier.0.convs.0.2',
'classifier.0.convs.1.2',
'classifier.0.convs.2.2',
'classifier.0.convs.3.2',
'classifier.0.convs.4.3',
'classifier.0.project.2',
'classifier.3']
Then pick the last one:
>>> relus[-1]
'classifier.3'

How to strip a pretrained network and add some layers to it using pytorch lightning?

I am trying to use transfer learning for an image segmentation task, and my plan is to use the first few layers of a pretrained model (VGG16 for example) as an encoder and then will add my own decoder.
So, I can load the model and see the structure by printing it:
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
print(model)
I get like this:
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
.....
.....
.....
I can also access the specific layers with model.layer3 for example. Now, I am struggling with certain things.
How to cut the model and take every module from the beginning to the end of any layer (model.layer3 for example)?
How to freeze only this stripped part, and keep the newly added modules available for training?
For 1): Initialize the ResNet in your LightningModule and slice it until the part that you need. Then add your own head after that, and define forward in the order that you need. See this example, based on the transfer learning docs:
import torchvision.models as models
class ImagenetTransferLearning(LightningModule):
def __init__(self):
super().__init__()
# init a pretrained resnet
backbone_tmp = models.resnet50(pretrained=True)
num_filters = backbone_tmp.fc.in_features
layers = list(backbone_tmp.children())[:-1]
self.backbone = nn.Sequential(*layers)
# use the pretrained model to classify cifar-10 (10 image classes)
num_target_classes = 10
self.classifier = nn.Linear(num_filters, num_target_classes)
For 2): Pass a BackboneFinetuning callback to your trainer. This requires that your LightningModule has a self.backbone attribute containing the modules that you want to be frozen, as shown on the snippet above. You can also use the BaseFinetuning callback if you need different freeze-unfreeze behavior.
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import BackboneFinetuning
multiplicative = lambda epoch: 1.5
backbone_finetuning = BackboneFinetuning(200, multiplicative)
trainer = Trainer(callbacks=[backbone_finetuning])
The following is true for any child module of model, but I will answer your question with model.layer3 here:
model.layer3 will give you the nn.Module associated with layer n°3 of your model. You can call it directly as you would with model
>>> z = model.layer3(torch.rand(16, 128, 10, 10))
>>> z.shape
torch.Size([16, 256, 5, 5])
To freeze the model:
you could put the layer in eval mode which disables dropouts and makes BN layers use statistics learning during training. This is done with model.layer3.eval()
you must disable training on that layer by toggling the requires_grad flag: model.layer3.requires_grad_(False), this will affect all child parameters.
You can freeze the layers with:
pretrained_model.freeze()
https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.core.lightning.html?highlight=Freeze#pytorch_lightning.core.lightning.LightningModule.freeze

How to load multi-task model but only predict one of the tasks?

Here is the Model structure. I combined almost everything together to for the convenience of hyper-parameters tuning.
'''
class MultiTaskDNN(nn.Module):
def __init__(self, n_tasks,
input_dim=1024,
output_dim=1,
hidden_dim=[1024, 100],
inits=['xavier_normal', 'kaiming_uniform'],
act_function=['relu', 'leaky_relu'],
dropouts=[0.10, 0.25],
batch_norm=True):
super(MultiTaskDNN, self).__init__()
self.n_tasks = n_tasks
self.input_dim = input_dim
self.output_dim = output_dim
self.hidden_dim = hidden_dim
self.act_function = act_function
self.batch_norm = batch_norm
current_dim = input_dim
self.layers = nn.ModuleList()
self.dropouts = nn.ModuleList()
self.bns = nn.ModuleList()
for k, hdim in enumerate(hidden_dim):
self.layers.append(nn.Linear(current_dim, hdim))
self.bns.append(nn.BatchNorm1d(hdim, eps=2e-1))
current_dim = hdim
if inits[k] == 'xavier_normal':
nn.init.xavier_normal_(self.layers[k].weight)
elif inits[k] == 'kaiming_normal':
nn.init.kaiming_normal_(self.layers[k].weight)
elif inits[k] == 'xavier_uniform':
nn.init.xavier_uniform_(self.layers[k].weight)
elif inits[k] == 'kaiming_uniform':
nn.init.kaiming_uniform_(self.layers[k].weight)
self.dropouts.append(nn.Dropout(dropouts[k]))
# n_targets
self.heads = nn.ModuleList()
for _ in range(self.n_tasks):
self.heads.append(nn.Linear(current_dim, output_dim))
def forward(self, x):
for k, layer in enumerate(self.layers):
x = layer(x)
if self.act_function[k] == 'sigmoid':
x = torch.sigmoid(x)
elif self.act_function[k] == 'relu':
x = F.relu(x)
elif self.act_function[k] == 'leaky_relu':
x = F.leaky_relu(x)
if self.batch_norm == True:
x = self.bns[k](x)
x = self.dropouts[k](x)
outputs = []
for head in self.heads:
outputs.append(head(x))
return outputs
'''
Please also let me know if the structure looks right. After training this multi-task model which has, say, 10 tasks (heads). I only want to predict task 7 which is head No.7. How should I load the model and do the prediction? Thank you.
model.state_dict()
MultiTaskDNN(
(layers): ModuleList(
(0): Linear(in_features=1024, out_features=128, bias=True)
(1): Linear(in_features=128, out_features=128, bias=True)
)
(dropouts): ModuleList(
(0): Dropout(p=0.25, inplace=False)
(1): Dropout(p=0.25, inplace=False)
)
(bns): ModuleList(
(0): BatchNorm1d(128, eps=0.2, momentum=0.1, affine=True, track_running_stats=True)
(1): BatchNorm1d(128, eps=0.2, momentum=0.1, affine=True, track_running_stats=True)
)
(heads): ModuleList(
(0): Linear(in_features=128, out_features=1, bias=True)
(1): Linear(in_features=128, out_features=1, bias=True)
(2): Linear(in_features=128, out_features=1, bias=True)
(3): Linear(in_features=128, out_features=1, bias=True)
(4): Linear(in_features=128, out_features=1, bias=True)
(5): Linear(in_features=128, out_features=1, bias=True)
(6): Linear(in_features=128, out_features=1, bias=True)
(7): Linear(in_features=128, out_features=1, bias=True)
(8): Linear(in_features=128, out_features=1, bias=True)
(9): Linear(in_features=128, out_features=1, bias=True)
(10): Linear(in_features=128, out_features=1, bias=True)
)
)

How to freeze selected layers of a model in Pytorch?

I am using the mobileNetV2 and I only want to freeze part of the model. I know I can use the following code to freeze the entire model
MobileNet = models.mobilenet_v2(pretrained = True)
for param in MobileNet.parameters():
param.requires_grad = False
but I want everything from (15) onward to remain unfrozen. How can I selectively freeze everything before the desired layer is frozen?
(15): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(16): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(17): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(18): ConvBNReLU(
(0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
) ) (classifier): Sequential(
(0): Dropout(p=0.2, inplace=False)
(1): Linear(in_features=1280, out_features=1000, bias=True) ) )
Pytorch's model implementation is in good modularization, so like you do
for param in MobileNet.parameters():
param.requires_grad = False
, you may also do
for param in MobileNet.features[15].parameters():
param.requires_grad = True
afterwards to unfreeze parameters in (15).
Loop from 15 to 18 to unfreeze the last several layers.
Just adding this here for completeness. You can also freeze parameters in place without iterating over them with requires_grad_ (API).
For example say you have a RetinaNet and want to just fine-tune on the heads
class RetinaNet(torch.nn.Module):
def __init__(self, ...):
self.backbone = ResNet(...)
self.fpn = FPN(...)
self.box_head = torch.nn.Sequential(...)
self.cls_head = torch.nn.Sequential(...)
Then you could freeze the backbone and FPN like this:
# Getting the model
retinanet = RetinaNet(...)
# Freezing backbone and FPN
retinanet.backbone.requires_grad_(False)
retinanet.fpn.requires_grad_(False)
If you want to define some layers by name and then unfreeze them, I propose a variant of #JVGD's answer:
class RetinaNet(torch.nn.Module):
def __init__(self, ...):
self.backbone = ResNet(...)
self.fpn = FPN(...)
self.box_head = torch.nn.Sequential(...)
self.cls_head = torch.nn.Sequential(...)
# Getting the model
retinanet = RetinaNet(...)
# The param name is f'{module_name}.weight' or f'{module_name}.bias'.
# Some layers, e.g., batch norm, have additional params.
# In some circumstances, e.g., when using DataParallel(),
# the param name is prefixed by 'module.'.
params_to_train = ['cls_head.weight', 'cls_head.bias']
for name, param in retinanet.named_parameters():
# Set True only for params in the list 'params_to_train'
param.requires_grad = True if name in params_to_train else False
...
The advantage is that you can define all layers to unfreeze in one Iterable.
An optimized answer to the first answer above is to freeze only the first 15 layers [0-14] because the last layers [15-18] are by default unfrozen (param.requires_grad = True).
Therefore, we only need to code this way:
MobileNet = torchvision.models.mobilenet_v2(pretrained = True)
for param in MobileNet.features[0:14].parameters():
param.requires_grad = False

Freezing all the layers but FCN head in FCN_ResNet101 Pytorch

I want to finetune an FCN_ResNet101. I would like to change the last layer as my dataset has a different number of classes. Also, finetune only the FCN head.
For the former, is it enough to only change the num_classes argument when defining the model or I need to use something like this:
model = torchvision.models.segmentation.fcn_resnet101(pretrained=True)
model.classifier=nn.identity()
model.Conv2d = nn.Conv2d(
in_channels=256,
out_channels=nb_classes,
kernel_size=1,
stride=1
)
I took this piece of code from another thread. I am not sure if it is necessary to use nn.identity(). When I do, the last layer does not change but the last layer of the one to the last FCN!
And, how many layers must be changed so my FCN head is re_trianed?
I wrote it this way but I’m mostly confused about FCN_ResNet101 architecture.
model = torchvision.models.segmentation.fcn_resnet101(pretrained=True, progress=True, num_classes=?)
#model.classifier[4] = nn.Identity()
“”"
FCNHead(
(0): Conv2d(2048, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Dropout(p=0.1)
(4): Conv2d(512, 21, kernel_size=(1, 1), stride=(1, 1))
), FCNHead(
(0): Conv2d(1024, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): Dropout(p=0.1)
(4): Conv2d(256, 21, kernel_size=(1, 1), stride=(1, 1))
)]
“”"
#setting our own number of classes
layer_list = list(model.children())[-5:]
model_small = nn.Sequential(*list(model.children()))[-5:]
for param in model_small.parameters():
param.requires_grad = False
model_small.Conv2d = nn.Conv2d( in_channels=1024,kernel_size=(3,3),stride=(1,1))
model_small.BatchNorm2d = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
model_small.ReLU = nn.ReLU()
model_small.Dropout = nn.Dropout(p=0.1)
model_small.Conv2d = nn.Conv2d(
in_channels=256,
out_channels=nb_classes,
kernel_size=1,
stride=1
)
model = model_small.to(device)
Any guidance is very much appreciated!

Resources