Pytorch Architecture Failure - pytorch

I am trying to build my own resent architecture.
I have the following Resent Architecture:
ResNetClassifier(
(feature_extractor): Sequential(
(0): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(1): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(2): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(3): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(4): AvgPool2d(kernel_size=2, stride=2, padding=0)
(5): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(6): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
(classifier): Sequential(
(0): Linear(in_features=518400, out_features=100, bias=True)
(1): LeakyReLU(negative_slope=0.01)
(2): Linear(in_features=100, out_features=100, bias=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Linear(in_features=100, out_features=10, bias=True)
)
)
when sending tensor in size torch.Size([1, 3, 100, 100])
with the following code
net = cnn.ResNetClassifier(
in_size=(3,100,100), out_classes=10, channels=[32, 64]*3,
pool_every=4, hidden_dims=[100]*2,
activation_type='lrelu', activation_params=dict(negative_slope=0.01),
pooling_type='avg', pooling_params=dict(kernel_size=2),
batchnorm=True, dropout=0.1,
)
print(net)
torch.manual_seed(seed)
test_image = torch.randint(low=0, high=256, size=(3, 100, 100), dtype=torch.float).unsqueeze(0)
test_out = net(test_image)
print('out =', test_out)
it fails with following error:
"RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[1, 64, 100, 100] to have 3 channels, but got 64 channels instead"
Any clue to solve it will be appreciated!
BTW, what is the recommended way to debug such network errors?

Related

How to change the first conv layer in the resnet 18?

I have a data with 20 class, and I'd like to use pretraied model with a bit of modification.
I know if we want to change the last linear of ResNet18 to categorize 20 calss (instead of 1000); we could write the following:
resnet.fc = nn.linear(512,20)
But I don't know how to access to any other layers? Like the second convolution in Bacic block?
When I call resnet.layer1 it returns:
Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
But how to grab and change conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)?
You can access to the layer (conv2) in sequential number (0) of layer.1 as follow:
from torchvision import datasets, transforms, models
resnet = models.resnet18(pretrained=True)
print(resnet.layer1[0].conv2)
Output:
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
bias=False)

How to Extract the feature vectors and save them in Densenet121?

I'm trying to extract the feature vectors of my dateset (x-ray images) which is trained on Densenet121 CNN for classification using Pytorch. I want to extract the feature vectors from one of the the intermediate layers.
model.eval() -->
DataParallel(
(module): DenseNet121(
(densenet121): DenseNet(
(features): Sequential(
(conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu0): ReLU(inplace=True)
(pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(denseblock1): _DenseBlock(
(denselayer1): _DenseLayer(
(norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer2): _DenseLayer(
(norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer3): _DenseLayer(
(norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer4): _DenseLayer(
(norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer5): _DenseLayer(
(norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer6): _DenseLayer(
(norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
)
(transition1): _Transition(
(norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(denseblock2): _DenseBlock(
(denselayer1): _DenseLayer(
(norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer2): _DenseLayer(
(norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer3): _DenseLayer(
(norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer4): _DenseLayer(
(norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer5): _DenseLayer(
(norm1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer6): _DenseLayer(
(norm1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(288, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer7): _DenseLayer(
(norm1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer8): _DenseLayer(
(norm1): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(352, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer9): _DenseLayer(
(norm1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer10): _DenseLayer(
(norm1): BatchNorm2d(416, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(416, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer11): _DenseLayer(
(norm1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(448, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer12): _DenseLayer(
(norm1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(480, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
)
(transition2): _Transition(
(norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
I think I have to do some work in the following block of code but I need help to do that.
class DenseNet121(nn.Module):
def __init__(self, out_size):
super(DenseNet121, self).__init__()
self.densenet121 = torchvision.models.densenet121(pretrained = True)
num_ftrs = self.densenet121.classifier.in_features
self.densenet121.classifier = nn.Sequential(
nn.Linear(num_ftrs, out_size),
nn.Sigmoid()
)
def forward(self, x):
x = self.densenet121(x)
return x
I want to get the feature vectors and then save them in order to use them later on as an input for another function.
Thank you.
You probably want to use something like a forward hook. It is basically a function call you can register which is executed when the forward of this specific module is called. So you can register the forward hook at the points in your model where you want to log the input and/or output and write the feature vector into a file or whatever.
Finding out how to bin the correct layer it is looking at the description you posted and going down the tree. So if you want to see the input and output of denseblock1.denselayer2.conv1. It should be something along these lines
model.densenet121.features.denseblock1.denselayer2.conv1
No guarantee that it will work and it is best to try a bit around in a debugger. Maybe you also need to access elements os Sequential via an index with the [] operator or something

require_grad = True in pytorch model despite changing require_grad = false for all parameters

I am trying to extract features from pretrained model in pytorch and then use the features for further training.
I have imported the model and set the require_grad to false for all parameters as follow:
import torchvision.models as models
vgg_model = models.vgg19_bn(pretrained=True)
for param in vgg_model.parameters():
param.requires_grad = False
Now, I defined my model, that extracts the features and then train on other layers as follows:
class VGGModel(nn.Module):
def __init__(self):
'''Input Image Size: (227, 227)'''
super(VGGModel, self).__init__()
self.inception = list(model.children())[0]
# self.inception = incept_model
self.conv1 = nn.Conv2d(in_channels = 512, out_channels = 128, kernel_size = 5)
self.dropout = nn.Dropout(0.4)
self.fc1 = nn.Linear(128, 5)
def forward(self, x):
x = self.inception(x)
x = F.relu(x)
x = self.conv1(x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size=3)
x = torch.flatten(x, 1)
x = self.dropout(x)
x = self.fc1(x)
x = F.log_softmax(x, dim=1)
return x
But when I check require_grad for the model, it gives VGG layers as one which require require_grad as well.
model = VGGModel().to(device)
model.requires_grad_
output:
<bound method Module.requires_grad_ of VGGModel(
(inception): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU(inplace=True)
(13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(16): ReLU(inplace=True)
(17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): ReLU(inplace=True)
(20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(22): ReLU(inplace=True)
(23): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(24): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(25): ReLU(inplace=True)
(26): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(27): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(29): ReLU(inplace=True)
(30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(32): ReLU(inplace=True)
(33): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(34): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(35): ReLU(inplace=True)
(36): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(37): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(38): ReLU(inplace=True)
(39): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(42): ReLU(inplace=True)
(43): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(44): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(45): ReLU(inplace=True)
(46): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(47): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(48): ReLU(inplace=True)
(49): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(50): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(51): ReLU(inplace=True)
(52): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv1): Conv2d(512, 128, kernel_size=(5, 5), stride=(1, 1))
(dropout): Dropout(p=0.4, inplace=False)
(fc1): Linear(in_features=128, out_features=5, bias=True)
)>
How do I prevent pretrained model from training again?
You should run the method:
model.requires_grad_(False)
You probably want to freeze only part of the network though, in your case you should change the fc1 attribute:
model.fc1 = torch.nn.Linear(128, num_classes)
Where num_classes is the number of classes you have (you should at least unfreeze the last linear layer).

Apply hooks on inner layers of ResNet

The pytorch official implementation of resnet results in the following model:
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(shortcut): Sequential()
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(shortcut): Sequential()
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(shortcut): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(shortcut): Sequential()
)
)
### Skipping layers 3 and 4
(linear): Linear(in_features=512, out_features=10, bias=True)
)
I tried applying hook to the conv1 in the first BasicBlock of layer2 using
handle = model.layer2[0][0].register_forward_hook(batchout_pre_hook)
but got the following error :
TypeError: 'BasicBlock' object does not support indexing
I am able to apply hook to the BasicBlock using handle = model.layer2[0].register_forward_hook(batchout_pre_hook) but cannot apply hook in the modules present inside the BasicBlock
For attaching a hook to conv1 in layer2's 0th block, you need to use
handle = model.layer2[0].conv1.register_forward_hook(batchout_pre_hook)
This is because inside the 0th block, the modules are named as conv1, bn1, etc. and are not a list to be accessed via an index.

How to move data_parallel model to a specific cuda device?

I currently need to use a pretrained model by setting it on a specific cuda device. The pretrained model is defined as below:
DataParallel(
(module): MobileFaceNet(
(conv1): Conv_block(
(conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=64)
)
(conv2_dw): Conv_block(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=64)
)
(conv_23): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(conv_3): Residual(
(model): Sequential(
(0): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(2): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(3): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
)
(conv_34): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(conv_4): Residual(
(model): Sequential(
(0): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(2): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(3): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(4): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
)
(conv_45): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=512)
)
(conv_dw): Conv_block(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=512, bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=512)
)
(project): Linear_block(
(conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(conv_5): Residual(
(model): Sequential(
(0): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
)
(conv_6_sep): Conv_block(
(conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=512)
)
(conv_6_dw): Linear_block(
(conv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), groups=512, bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv_6_flatten): Flatten()
(linear): Linear(in_features=512, out_features=512, bias=False)
(bn): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
If I conventionally declare
model.to(device)
with device on cuda:1, then it makes error when forwarding:
model(imgs)
RuntimeError: module must have its parameters and buffers on device cuda:0 (device_ids[0]) but found one of them on device: cuda:1
I think this is because the model was previously trained with data parallel utils in pytorch.
How can I properly set the model to the device that I specifically want?
You should get the neural network out of DataParallel first.
Assuming your DataParallel is named model you could do:
device = torch.device("cuda:1")
module = model.module.to(device)

Resources