Related
I have a data with 20 class, and I'd like to use pretraied model with a bit of modification.
I know if we want to change the last linear of ResNet18 to categorize 20 calss (instead of 1000); we could write the following:
resnet.fc = nn.linear(512,20)
But I don't know how to access to any other layers? Like the second convolution in Bacic block?
When I call resnet.layer1 it returns:
Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
But how to grab and change conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)?
You can access to the layer (conv2) in sequential number (0) of layer.1 as follow:
from torchvision import datasets, transforms, models
resnet = models.resnet18(pretrained=True)
print(resnet.layer1[0].conv2)
Output:
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),
bias=False)
I am trying to build my own resent architecture.
I have the following Resent Architecture:
ResNetClassifier(
(feature_extractor): Sequential(
(0): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(1): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(2): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(3): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(4): AvgPool2d(kernel_size=2, stride=2, padding=0)
(5): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(6): ResidualBlock(
(main_path): Sequential(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Dropout2d(p=0.1, inplace=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): Dropout2d(p=0.1, inplace=False)
(6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): LeakyReLU(negative_slope=0.01)
(8): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): Dropout2d(p=0.1, inplace=False)
(10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): LeakyReLU(negative_slope=0.01)
(12): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): Dropout2d(p=0.1, inplace=False)
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): LeakyReLU(negative_slope=0.01)
(16): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): Dropout2d(p=0.1, inplace=False)
(18): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): LeakyReLU(negative_slope=0.01)
(20): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(shortcut_path): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
(classifier): Sequential(
(0): Linear(in_features=518400, out_features=100, bias=True)
(1): LeakyReLU(negative_slope=0.01)
(2): Linear(in_features=100, out_features=100, bias=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Linear(in_features=100, out_features=10, bias=True)
)
)
when sending tensor in size torch.Size([1, 3, 100, 100])
with the following code
net = cnn.ResNetClassifier(
in_size=(3,100,100), out_classes=10, channels=[32, 64]*3,
pool_every=4, hidden_dims=[100]*2,
activation_type='lrelu', activation_params=dict(negative_slope=0.01),
pooling_type='avg', pooling_params=dict(kernel_size=2),
batchnorm=True, dropout=0.1,
)
print(net)
torch.manual_seed(seed)
test_image = torch.randint(low=0, high=256, size=(3, 100, 100), dtype=torch.float).unsqueeze(0)
test_out = net(test_image)
print('out =', test_out)
it fails with following error:
"RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[1, 64, 100, 100] to have 3 channels, but got 64 channels instead"
Any clue to solve it will be appreciated!
BTW, what is the recommended way to debug such network errors?
I'm trying to extract the feature vectors of my dateset (x-ray images) which is trained on Densenet121 CNN for classification using Pytorch. I want to extract the feature vectors from one of the the intermediate layers.
model.eval() -->
DataParallel(
(module): DenseNet121(
(densenet121): DenseNet(
(features): Sequential(
(conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu0): ReLU(inplace=True)
(pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(denseblock1): _DenseBlock(
(denselayer1): _DenseLayer(
(norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer2): _DenseLayer(
(norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer3): _DenseLayer(
(norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer4): _DenseLayer(
(norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer5): _DenseLayer(
(norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer6): _DenseLayer(
(norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
)
(transition1): _Transition(
(norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(denseblock2): _DenseBlock(
(denselayer1): _DenseLayer(
(norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer2): _DenseLayer(
(norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer3): _DenseLayer(
(norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer4): _DenseLayer(
(norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer5): _DenseLayer(
(norm1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer6): _DenseLayer(
(norm1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(288, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer7): _DenseLayer(
(norm1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer8): _DenseLayer(
(norm1): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(352, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer9): _DenseLayer(
(norm1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer10): _DenseLayer(
(norm1): BatchNorm2d(416, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(416, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer11): _DenseLayer(
(norm1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(448, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(denselayer12): _DenseLayer(
(norm1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(conv1): Conv2d(480, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
)
(transition2): _Transition(
(norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
I think I have to do some work in the following block of code but I need help to do that.
class DenseNet121(nn.Module):
def __init__(self, out_size):
super(DenseNet121, self).__init__()
self.densenet121 = torchvision.models.densenet121(pretrained = True)
num_ftrs = self.densenet121.classifier.in_features
self.densenet121.classifier = nn.Sequential(
nn.Linear(num_ftrs, out_size),
nn.Sigmoid()
)
def forward(self, x):
x = self.densenet121(x)
return x
I want to get the feature vectors and then save them in order to use them later on as an input for another function.
Thank you.
You probably want to use something like a forward hook. It is basically a function call you can register which is executed when the forward of this specific module is called. So you can register the forward hook at the points in your model where you want to log the input and/or output and write the feature vector into a file or whatever.
Finding out how to bin the correct layer it is looking at the description you posted and going down the tree. So if you want to see the input and output of denseblock1.denselayer2.conv1. It should be something along these lines
model.densenet121.features.denseblock1.denselayer2.conv1
No guarantee that it will work and it is best to try a bit around in a debugger. Maybe you also need to access elements os Sequential via an index with the [] operator or something
I am trying to extract features from pretrained model in pytorch and then use the features for further training.
I have imported the model and set the require_grad to false for all parameters as follow:
import torchvision.models as models
vgg_model = models.vgg19_bn(pretrained=True)
for param in vgg_model.parameters():
param.requires_grad = False
Now, I defined my model, that extracts the features and then train on other layers as follows:
class VGGModel(nn.Module):
def __init__(self):
'''Input Image Size: (227, 227)'''
super(VGGModel, self).__init__()
self.inception = list(model.children())[0]
# self.inception = incept_model
self.conv1 = nn.Conv2d(in_channels = 512, out_channels = 128, kernel_size = 5)
self.dropout = nn.Dropout(0.4)
self.fc1 = nn.Linear(128, 5)
def forward(self, x):
x = self.inception(x)
x = F.relu(x)
x = self.conv1(x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size=3)
x = torch.flatten(x, 1)
x = self.dropout(x)
x = self.fc1(x)
x = F.log_softmax(x, dim=1)
return x
But when I check require_grad for the model, it gives VGG layers as one which require require_grad as well.
model = VGGModel().to(device)
model.requires_grad_
output:
<bound method Module.requires_grad_ of VGGModel(
(inception): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU(inplace=True)
(13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(16): ReLU(inplace=True)
(17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): ReLU(inplace=True)
(20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(22): ReLU(inplace=True)
(23): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(24): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(25): ReLU(inplace=True)
(26): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(27): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(29): ReLU(inplace=True)
(30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(32): ReLU(inplace=True)
(33): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(34): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(35): ReLU(inplace=True)
(36): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(37): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(38): ReLU(inplace=True)
(39): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(42): ReLU(inplace=True)
(43): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(44): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(45): ReLU(inplace=True)
(46): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(47): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(48): ReLU(inplace=True)
(49): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(50): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(51): ReLU(inplace=True)
(52): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv1): Conv2d(512, 128, kernel_size=(5, 5), stride=(1, 1))
(dropout): Dropout(p=0.4, inplace=False)
(fc1): Linear(in_features=128, out_features=5, bias=True)
)>
How do I prevent pretrained model from training again?
You should run the method:
model.requires_grad_(False)
You probably want to freeze only part of the network though, in your case you should change the fc1 attribute:
model.fc1 = torch.nn.Linear(128, num_classes)
Where num_classes is the number of classes you have (you should at least unfreeze the last linear layer).
I currently need to use a pretrained model by setting it on a specific cuda device. The pretrained model is defined as below:
DataParallel(
(module): MobileFaceNet(
(conv1): Conv_block(
(conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=64)
)
(conv2_dw): Conv_block(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=64)
)
(conv_23): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(conv_3): Residual(
(model): Sequential(
(0): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(2): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(3): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(conv_dw): Conv_block(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=128)
)
(project): Linear_block(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
)
(conv_34): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(conv_4): Residual(
(model): Sequential(
(0): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(2): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(3): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(4): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
)
(conv_45): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=512)
)
(conv_dw): Conv_block(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=512, bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=512)
)
(project): Linear_block(
(conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(conv_5): Residual(
(model): Sequential(
(0): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Depth_Wise(
(conv): Conv_block(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(conv_dw): Conv_block(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=256)
)
(project): Linear_block(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
)
(conv_6_sep): Conv_block(
(conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(prelu): PReLU(num_parameters=512)
)
(conv_6_dw): Linear_block(
(conv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), groups=512, bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv_6_flatten): Flatten()
(linear): Linear(in_features=512, out_features=512, bias=False)
(bn): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
If I conventionally declare
model.to(device)
with device on cuda:1, then it makes error when forwarding:
model(imgs)
RuntimeError: module must have its parameters and buffers on device cuda:0 (device_ids[0]) but found one of them on device: cuda:1
I think this is because the model was previously trained with data parallel utils in pytorch.
How can I properly set the model to the device that I specifically want?
You should get the neural network out of DataParallel first.
Assuming your DataParallel is named model you could do:
device = torch.device("cuda:1")
module = model.module.to(device)