Pytorch: The size of tensor a (24) must match the size of tensor b (48) at non-singleton dimension 3 - pytorch
Below code works fine and generate proper results.
import torch
import torch.nn as nn
import torch.nn.functional as F
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
self.conv = nn.Conv2d(
3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
self.conv1 = nn.Conv2d(
32, 512, kernel_size=1, stride=1, padding=0, bias=False)
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.conv2 = nn.Conv2d(
32, 3, kernel_size=1, stride=1, padding=0, bias=False)
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
Now i have changed in self.con and add pretrained resent with layer. Now it shows tensor mismatched error after training. All things are same just add this line in code. I put ** in those line
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from modules import ConvLSTMCell, Sign
class EncoderCell(nn.Module):
def __init__(self):
super(EncoderCell, self).__init__()
#self.conv = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.conv = resConv.layer4**
self.rnn1 = ConvLSTMCell(
64,
256,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
256,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=2,
padding=1,
hidden_kernel_size=1,
bias=False)
def forward(self, input, hidden1, hidden2, hidden3):
x = self.conv(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
return x, hidden1, hidden2, hidden3
class Binarizer(nn.Module):
def __init__(self):
super(Binarizer, self).__init__()
self.conv = nn.Conv2d(512, 32, kernel_size=1, bias=False)
self.sign = Sign()
def forward(self, input):
feat = self.conv(input)
x = F.tanh(feat)
return self.sign(x)
class DecoderCell(nn.Module):
def __init__(self):
super(DecoderCell, self).__init__()
**resConv = models.resnet50(pretrained=True)
resConv.layer4 = nn.Conv2d(32, 512, kernel_size=3, stride=2, padding=1, bias=False)
self.conv1 = resConv.layer4**
self.rnn1 = ConvLSTMCell(
512,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn2 = ConvLSTMCell(
128,
512,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=1,
bias=False)
self.rnn3 = ConvLSTMCell(
128,
256,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
self.rnn4 = ConvLSTMCell(
64,
128,
kernel_size=3,
stride=1,
padding=1,
hidden_kernel_size=3,
bias=False)
**resConv2 = models.resnet50(pretrained=True)
resConv2.layer4 = nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False)
self.conv2 = resConv2.layer4**
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
x = self.conv1(input)
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
You are doing it a wrong way, some explanation is,
**resConv = models.resnet50(pretrained=True) # you are reading a model
now you are replacing the layer in that model with newly initialized layer. Secondly, layer4 in resnet50 is a sequential block containing multiple layers. Use print to see exact the layers in model.
resConv.layer4 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
here you are using new layer.
self.conv = resConv.layer4**
As per your query regarding usage of pretrained layer, you should do it like this,
resConv = models.resnet50(pretrained=True)
print(resConv) #see the layer which you want to use
self.conv = resConv.conv1 # replace conv1 with that layer
# note: conv1 is the name of first conv layer in resnet
To add to this, I would also recommend acquiring and adding this layer (or the weights and biases) outside of the object initialization. Something like:
enc = EncoderCell()
resnet50 = models.resnet50(pretrained=True)
and then either
enc.conv = resnet50.conv1
or more ideally
enc.conv.load_state_dict(resnet50.layer1.state_dict())
The reason being, calling state_dict() on a nn.Module class creates a clone of the parameters (weights and biases in this case) which can be loaded via nn.Module.load_state_dict() method as long as the two instances of nn.Module share the same shape. So you get the pretrained weights and they are completely detached from the pretrained model. Then you can get rid of the pretrained model since it could be rather large in memory.
del resnet50
I submitted a potential improvement to the other answer, but to address the errors you are getting I am answering here also. If the code runs before your edits, and the layer you are trying to change is the same shape as the previous one, then my guess is that it may have to do with the computational graph that is formed from creating the resnet50 object. I would recommended the approach I mentioned in my edit to the other answer, but I will state it here again (note, this assumes you keep the code as it was originally):
# instantiate you encoder (repeat these steps with the decoder as well)
enc = EncoderCell()
# get the pretrained model
resnet = models.resnet50(pretrained=True)
# load the state dict into the regular conv layer
enc.conv.load_state_dict(resnet50.layer4.state_dict())
This should load the pretrained weights and biases from the resnet50 model into your conv layer, and this can be done to the decoder conv layer as well as long as they all share the same shape.
To do more testing with your mismatch error I would recommend either using a debugger or print statements in the forward() method of the models to see the shape of the tensor after each layer is applied, like so
def forward(self, input, hidden1, hidden2, hidden3, hidden4):
print(x.size())
x = self.conv1(input)
print(x.size())
hidden1 = self.rnn1(x, hidden1)
x = hidden1[0]
x = F.pixel_shuffle(x, 2)
hidden2 = self.rnn2(x, hidden2)
x = hidden2[0]
x = F.pixel_shuffle(x, 2)
hidden3 = self.rnn3(x, hidden3)
x = hidden3[0]
x = F.pixel_shuffle(x, 2)
hidden4 = self.rnn4(x, hidden4)
x = hidden4[0]
x = F.pixel_shuffle(x, 2)
x = F.tanh(self.conv2(x)) / 2
return x, hidden1, hidden2, hidden3, hidden4
and of course you can put the print statements where ever else in the forward method. I would also highly recommend a debugger; pycharm makes this quite easy, and also makes it easy to see the state of variables in scientific mode beside the python console it gives. It might be worth looking up ways to calculate size of variables after they pass through certain layers like convolutional layers. This is well understood and formulas exist to calculate the size of the dimensions based on the initial size, the filter size, stride width, and padding.
Related
linear layer in pytorch not able to forward using torch-summary
from torch import nn from torchsummary import summary class CNNNetwork(nn.Module): def __init__(self): super().__init__() # 4 conv blocks / flatten / linear / softmax self.conv1 = nn.Sequential( nn.Conv2d( in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=2 ), nn.ReLU(), nn.MaxPool2d(kernel_size=2) ) self.conv2 = nn.Sequential( nn.Conv2d( in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=2 ), nn.ReLU(), nn.MaxPool2d(kernel_size=2) ) self.conv3 = nn.Sequential( nn.Conv2d( in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=2 ), nn.ReLU(), nn.MaxPool2d(kernel_size=2) ) self.flatten = nn.Flatten() self.linear = nn.Linear(64*8*11, 5) self.softmax = nn.Softmax(dim=1) def forward(self, input_data): x = self.conv1(input_data) x = self.conv2(x) x = self.conv3(x) x = self.flatten(x) logits = self.linear(x) predictions = self.softmax(logits) return predictions if __name__ == "__main__": cnn = CNNNetwork() cnn.to(device=dml) summary(cnn,(1,11,8),device=dml) in this code: logits = self.linear(x) is having some problem to generate summary there are no error messages but only this enter image description here i want to make summary of model and later train a audio classifier where mfcc size is (30,281) And also if you just explain to me how input for linear fn is properly calculated
conv2d() received an invalid combination of arguments
After resnet convolution, I want to further compress the 256 dimensions to 20 dimensions. I directly wrote a layer in the back, but after forward propagation, there is an error in this layer, I don't know why? def forward(self, x): x = self.conv1(x) dif_residual1 = self.downsample1(x) x = self.layer1_1(x) x =x + dif_residual1 residual = x x = self.layer1_2(x) x = x + residual residual = x x = self.layer1_3(x) x = x + residual if self.out_channel != 256: x = self.layer2 filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device) x = F.conv2d(x, weight=filters, padding=0) The dimension of x before I do if is: x = {Tensor:(1,256,117,240)} But after the if statement is executed, it becomes what the picture shows。 The error I get is this: x = F.conv2d(feature, weight=filters, padding=0) TypeError: conv2d() received an invalid combination of arguments - got (Sequential, weight=Tensor, padding=int), but expected one of: * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups) * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups) Encounter a new problem: File "D:\software\Anaconda\envs\torch1.10\lib\site-packages\torch\autograd\__init__.py", line 173, in backward Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 1, 117, 240]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True). My code: class VGG(nn.Module): def __init__(self, in_channel, out_channel=None, init_weights=True, device='gpu',batch_size=1): super(VGG, self).__init__() self.batch_size = batch_size self.out_channel = out_channel if device == 'gpu': self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") else: self.device = torch.device("cpu") modes = 'reflect' out_channel1 = 64 self.conv1_1 = nn.Sequential( nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel1), nn.LeakyReLU() ) self.conv1_2 = nn.Sequential( nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel1), nn.LeakyReLU() ) out_channel2 = 128 self.conv2_1 = nn.Sequential( nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel2), nn.LeakyReLU() ) self.conv2_2 = nn.Sequential( nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel2), nn.LeakyReLU() ) out_channel3 = 256 self.conv3_1 = nn.Sequential( nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU() ) self.conv3_2 = nn.Sequential( nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU() ) if out_channel == None: self.out_channel = 256 self.conv3_3 = nn.Sequential( nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU() ) else: self.conv3_3 = nn.Sequential( nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU(), nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False), nn.BatchNorm2d(out_channel), nn.LeakyReLU() ) if init_weights: self._init_weight() def forward(self, x): x = self.conv1_1(x) x = self.conv1_2(x) x = self.conv2_1(x) x = self.conv2_2(x) x = self.conv3_1(x) x = self.conv3_2(x) x = self.conv3_3(x) feature = x filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device) x = F.conv2d(x, weight = filters, padding = 0) return x,feature out_channel = 20 model = VGG(in_channel=12, out_channel=out_channel, init_weights=True, batch_size=batch_size) for epoch in range(start_epoch+1,epochs): # train model.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs,feature = model(images.to(device)) outputs = tonser_nolmal(outputs) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1, epochs, loss) checkpoint = { "net": model.state_dict(), "optimizer": optimizer.state_dict(), "epoch": epoch } torch.save(checkpoint, save_path + "/model-{}.pth".format(epoch)) # validate model.eval() count_acc = 0.0 count_mae = 0.0 with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs,_ = model(val_images.to(device)) # outputs = F.normalize(outputs,dim=3) outputs = tonser_nolmal(outputs) loss = loss_function(outputs, val_labels.to(device)) count_acc = count_acc + loss.item() mae = Evaluation().MAE(outputs, val_labels.to(device)) count_mae = count_mae + mae.item()
The error is likely to be caused by the following variable assignment: if self.out_channel != 256: x = self.layer2 which can be easily fixed by changing it to x = self.layer2(x) Update: As OP updated his code, I did some test. There were several things which I found problematic: self._init_weight was not provided, so I commented it out; filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device). The filter weight should have a shape of (c_out, c_in, kernel_size, kernel_size). However, batch_size appeared in the position of out_channels. The role of filter in the forward was not clear to me. If you wanted to reduce the out_channels further from 256 to 20, then initializing your model with VGG(..., out_channel=20) is sufficient. Basically, self.conv3_3 would do the job. On my end, I modified the code a little bit and it ran successfully: import sys import torch import torch.nn as nn from tqdm import tqdm from torchvision.datasets import FakeData from torch.utils.data import DataLoader import torch.nn.functional as F dataset = [torch.randn(12, 64, 64) for _ in range(1000)] train_loader = DataLoader(dataset, batch_size=1, shuffle=True) class VGG(nn.Module): def __init__(self, in_channel, out_channel=None, init_weights=True, device='cpu', batch_size=1): super(VGG, self).__init__() self.batch_size = batch_size self.out_channel = out_channel self.device = device modes = 'reflect' out_channel1 = 64 self.conv1_1 = nn.Sequential( nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel1), nn.LeakyReLU() ) self.conv1_2 = nn.Sequential( nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel1), nn.LeakyReLU() ) out_channel2 = 128 self.conv2_1 = nn.Sequential( nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel2), nn.LeakyReLU() ) self.conv2_2 = nn.Sequential( nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel2), nn.LeakyReLU() ) self.out_channel3 = out_channel3 = 256 self.conv3_1 = nn.Sequential( nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU() ) self.conv3_2 = nn.Sequential( nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU() ) self.out_channel = out_channel if out_channel == None: self.conv3_3 = nn.Sequential( nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU() ) else: self.conv3_3 = nn.Sequential( nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False), nn.BatchNorm2d(out_channel3), nn.LeakyReLU(), nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False), nn.BatchNorm2d(out_channel), nn.LeakyReLU() ) # The implementation of _init_weight is not found # if init_weights: # self._init_weight() def forward(self, x): x = self.conv1_1(x) x = self.conv1_2(x) x = self.conv2_1(x) x = self.conv2_2(x) x = self.conv3_1(x) x = self.conv3_2(x) x = self.conv3_3(x) feature = x if x.shape[1] == 256: # self.out_channel is None filters = torch.ones(20, self.out_channel3, 1, 1).to(self.device) x = F.conv2d(x, weight = filters, padding = 0) return x, feature out_channel = 20 device = "cuda:0" if torch.cuda.is_available() else "cpu" model = VGG(in_channel=12, out_channel=None, init_weights=True, device=device, batch_size=1) model.to(device) print(model(next(iter(train_loader)).to(device))[0].shape) model = VGG(in_channel=12, out_channel=20, init_weights=True, device=device, batch_size=1) model.to(device) print(model(next(iter(train_loader)).to(device))[0].shape) Outputs: torch.Size([1, 20, 64, 64]) torch.Size([1, 20, 64, 64])
`*** RuntimeError: mat1 dim 1 must match mat2 dim 0` whenever I run model(images)
def __init__(self): super().__init__() self.conv = nn.Sequential( nn.Conv2d(1, 64, kernel_size=5, stride=2, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False), nn.BatchNorm2d(64), ) How can I deal with this error? I think the error is with self.fc, but I can't say how to fix it.
The output from self.conv(x) is of shape torch.Size([32, 64, 2, 2]): 32*64*2*2= 8192 (this is equivalent to (self.conv_out_size). The input to fully connected layer expects a single dimension vector i.e. you need to flatten it before passing to a fully connected layer in the forward function. i.e. class Network(): ... def foward(): ... conv_out = self.conv(x) print(conv_out.shape) conv_out = conv_out.view(-1, 32*64*2*2) print(conv_out.shape) x = self.fc(conv_out) return x output torch.Size([32, 64, 2, 2]) torch.Size([1, 8192]) EDIT: I think you're using self._get_conv_out function wrong. It should be def _get_conv_out(self, shape): output = self.conv(torch.zeros(1, *shape)) # not (32, *size) return int(numpy.prod(output.size())) then, in the forward pass, you can use conv_out = self.conv(x) # flatten the output of conv layers conv_out = conv_out.view(conv_out.size(0), -1) x = self.fc(conv_out) For an input of (32, 1, 110, 110), the output should be torch.Size([32, 2]).
I had the same problem however I have solved it by using a batch of 32 and tensor size of [3, 32, 32] for my images and the following configurations on my model. I am using ResNet with 9 CNN and looking for 4 outputs. transform = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()]) def conv_block(in_channels, out_channels, pool=False): layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True)] if pool: layers.append(nn.MaxPool2d(2)) return nn.Sequential(*layers) class ResNet9(ImageClassificationBase): def __init__(self, in_channels, num_classes): super().__init__() self.conv1 = conv_block(in_channels, 64) self.conv2 = conv_block(64, 128, pool=True) self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128)) self.conv3 = conv_block(128, 256, pool=True) self.conv4 = conv_block(256, 512, pool=True) self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512)) self.classifier = nn.Sequential(nn.MaxPool2d(4), nn.Flatten(), nn.Dropout(0.2), nn.Linear(512, num_classes)) def forward(self, xb): out = self.conv1(xb) out = self.conv2(out) out = self.res1(out) + out out = self.conv3(out) out = self.conv4(out) out = self.res2(out) + out out = self.classifier(out) return out
How to re-use old weights in a slightly modified model?
I have a CNN network built like this for a particular task. class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1) self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1) self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1) def forward(self, x): in_size = x.size(0) x = F.relu(self.conv11(x)) x = F.relu(self.conv12(x)) x = F.relu(self.conv13(x)) x = F.softmax(x, 2) return x The model is stored using the torch built-in method like this. net = Net() optimizer = optim.SGD(net.parameters(), lr=1e-3) state = { 'state_dict': net.state_dict() 'opt': optimizer.state_dict() } torch.save(state, 'model.pt') I have increased a single layer in the network while the rest of the model was kept the same. class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1) self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1) self.conv13 = nn.Conv2d(256, 256, kernel_size=3, padding=1) # (new added) self.conv14 = nn.Conv2d(256, 2, kernel_size=3, padding=1) def forward(self, x): in_size = x.size(0) x = F.relu(self.conv11(x)) x = F.relu(self.conv12(x)) x = F.relu(self.conv13(x)) (new added) x = F.relu(self.conv14(x)) x = F.softmax(x, 2) return x Since the other conv layers are kept the same, is there any way I can re-use the saved model to load the weights to conv11, conv12 and conv14 ? Instead of starting to train from beginning ?
Assume you trained the following model and now you make a minor modification to it (like adding a layer) and want to use your trained weights import torch import torch.nn as nn import torch.optim as optim class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1) self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1) self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1) def forward(self, x): in_size = x.size(0) x = F.relu(self.conv11(x)) x = F.relu(self.conv12(x)) x = F.relu(self.conv13(x)) x = F.softmax(x, 2) return x net = Net() optimizer = optim.SGD(net.parameters(), lr=1e-3) you save the model (and the optimizer state) with: state = {'state_dict': net.state_dict(), 'opt': optimizer.state_dict() } torch.save(state, 'state.pt') Your new model is (note that corresponding layers keep the same name, so you don't make conv13 -> conv14): class NewNet(nn.Module): def __init__(self): super(NewNet, self).__init__() self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1) self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1) self.convnew = nn.Conv2d(256, 256, kernel_size=3, padding=1) # (new added) self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1) def forward(self, x): in_size = x.size(0) x = F.relu(self.conv11(x)) x = F.relu(self.conv12(x)) x = F.relu(self.convnew(x)) # (new added) x = F.relu(self.conv13(x)) x = F.softmax(x, 2) return x Now you can load your model.pt file: state = torch.load('state.pt') state is a dict, state['opt'] contains all the parameters that you had for your optimizer, for example state['opt']['param_groups'][0]['lr'] gives 0.001 Assuming corresponding layers kept the same name, you can recover your parameters and initialize the appropriate layers by: net = NewNet() for name, param in net.named_parameters(): if name in state['state_dict'].keys(): param = param.data param.copy_(state['state_dict'][name])
How is the print and view functions works in pytorch?
This is a convolutional neural network which I found in the web class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 10, kernel_size=5) self.conv2 = nn.Conv2d(10, 20, kernel_size=5) self.conv2_drop = nn.Dropout2d() self.fc1 = nn.Linear(500, 50) self.fc2 = nn.Linear(50, 64) def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 500) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x) and its summary print(net) Net( (conv1): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1)) (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1)) (conv2_drop): Dropout2d(p=0.5) (fc1): Linear(in_features=500, out_features=50, bias=True) (fc2): Linear(in_features=50, out_features=64, bias=True) ) What is x.view does? Is it similar to the Flatten function in keras. The other query is reagarding how pytorch prints summary of a model. Eventhough the model uses two dropouts nn.Dropout2d() and F.dropout. When printing the model we can see only one (conv2_drop): Dropout2d(p=0.5), why?. The last question is why pytorch dosen't print F.max_pool2d layer?
1) x.view can do more than just flatten: It will keep the same data while reshaping the dimension. So using x.view(batch_size, -1)will be equivalent to Flatten 2) In the __repr__function of nn.Module, the elements that are printed are the modules in self._modules.items() which are its children. F.dropoutand F.max_pool2d are functions and not children of nn.Module, thus they are not layers and will not be printed. For pooling and dropout however, there is a module in torch.nn which you already used for the first dropout.