Why am I wrong in the input of the tensor? - pytorch

Here is my class for cnn.
class SimpleCnn(nn.Module):
def __init__(self, n_classes):
super().__init__()
self.layer1 = nn.Sequential( # 224*224
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential( # 112*112
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(),
nn.BatchNorm2d(128),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer3 = nn.Sequential( # 56*56
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(),
nn.BatchNorm2d(256),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer3 = nn.Sequential( # 28*28
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
nn.ReLU(),
nn.BatchNorm2d(512),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer4 = nn.Sequential( # 14*14
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
nn.ReLU(),
nn.BatchNorm2d(512),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.out1 = nn.Linear(512*7*7, 4096) # 7*7
self.out2 = nn.Linear(4096, n_classes)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = x.view(1, -1)
x = self.out1(x)
logits = self.out2(x)
return logits
And it returns such a mistake.
RuntimeError: Given groups=1, weight of size [512, 256, 3, 3], expected input[64, 128, 56, 56] to have 256 channels, but got 128 channels instead.
I've seen other mistakes of such a type but can't find where I'm wrong here.
Thank you for your answer.

In your code self.layer3 is first defined but then overwritten (a copy-pasta error I assume?). The error is thrown because in the redefinition of layer3 you assume the input has 256 channels, but the output from self.layer2 only has 128 channels.

Related

Why can't I convert yolov1 weights to the Pytorch model?

I have written an exact yolov1 model and am trying to load the pre-trained weights on ImageNet on Extraction model. https://pjreddie.com/darknet/imagenet
I have a separate class with the Extraction model and I load weights there from a binary file of weights with ImageNet, but when loading weights, I am missing one weight in the very last layer. If I display the size of the buffer and the required space for the weights, I will see a difference of 1.
required size: 23455400
buffer size: 23455399
Error on the last layer:
torch.from_numpy(buf[start:start + num_w]).reshape(conv_layer.weight.data.shape))
RuntimeError: shape '[1000, 1024, 1, 1]' is invalid for input of size 1023999
Where can there be a problem?
I wrote the Extraction model and try to load weights.
My model with load function:
class Extraction(nn.Module):
def __init__(self):
super().__init__()
self.conv_block = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(negative_slope=0.1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(192),
nn.LeakyReLU(negative_slope=0.1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=192, out_channels=128, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(negative_slope=0.1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(1024),
nn.LeakyReLU(negative_slope=0.1)
)
self.classifier = nn.Sequential(
nn.Conv2d(in_channels=1024, out_channels=1000, kernel_size=1, stride=1, padding=0, bias=True),
nn.LeakyReLU(negative_slope=0.1),
nn.AvgPool2d(kernel_size=13, stride=13),
nn.Flatten()
)
def forward(self, x):
x = self.conv_block(x)
return self.classifier(x)
def load_weights(self, weightfile):
with open(weightfile, 'rb') as fp:
header = np.fromfile(fp, count=5, dtype=np.int32)
buf = np.fromfile(fp, dtype=np.float32)
start = 0
# load weights to convolution layers
for num_layer, layer in enumerate(self.conv_block):
if start >= buf.size:
break
if isinstance(layer, nn.modules.conv.Conv2d):
conv_layer = self.conv_block[num_layer]
if num_layer + 1 != len(self.conv_block):
if isinstance(self.conv_block[num_layer + 1], nn.modules.BatchNorm2d):
batch_norm_layer = self.conv_block[num_layer + 1]
start = load_conv_batch_norm(buf, start, conv_layer, batch_norm_layer)
else:
start = load_conv(buf, start, conv_layer)
# load weights to output layer
conv_layer = self.classifier[0]
start = load_conv(buf, start, conv_layer)
print("start: ", start)
print("buf size:", buf.size)
And my helper function:
def load_conv_batch_norm(buf, start, conv_layer, batch_norm_layer):
num_w = conv_layer.weight.numel()
num_b = batch_norm_layer.bias.numel()
batch_norm_layer.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]))
start += num_b
batch_norm_layer.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]))
start += num_b
batch_norm_layer.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]))
start += num_b
batch_norm_layer.running_var.copy_(torch.from_numpy(buf[start:start + num_b]))
start += num_b
conv_layer.weight.data.copy_(
torch.from_numpy(buf[start:start + num_w]).reshape(conv_layer.weight.data.shape))
start += num_w
return start
def load_conv(buf, start, conv_layer):
num_w = conv_layer.weight.numel()
num_b = conv_layer.bias.numel()
conv_layer.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]))
start += num_b
conv_layer.weight.data.copy_(
torch.from_numpy(buf[start:start + num_w]).reshape(conv_layer.weight.data.shape))
start += num_w
return start

conv2d() received an invalid combination of arguments

After resnet convolution, I want to further compress the 256 dimensions to 20 dimensions. I directly wrote a layer in the back, but after forward propagation, there is an error in this layer, I don't know why?
def forward(self, x):
x = self.conv1(x)
dif_residual1 = self.downsample1(x)
x = self.layer1_1(x)
x =x + dif_residual1
residual = x
x = self.layer1_2(x)
x = x + residual
residual = x
x = self.layer1_3(x)
x = x + residual
if self.out_channel != 256:
x = self.layer2
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
x = F.conv2d(x, weight=filters, padding=0)
The dimension of x before I do if is:
x = {Tensor:(1,256,117,240)}
But after the if statement is executed, it becomes what the picture shows。
The error I get is this:
x = F.conv2d(feature, weight=filters, padding=0)
TypeError: conv2d() received an invalid combination of arguments - got (Sequential, weight=Tensor, padding=int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
Encounter a new problem:
File "D:\software\Anaconda\envs\torch1.10\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 1, 117, 240]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
My code:
class VGG(nn.Module):
def __init__(self, in_channel, out_channel=None, init_weights=True, device='gpu',batch_size=1):
super(VGG, self).__init__()
self.batch_size = batch_size
self.out_channel = out_channel
if device == 'gpu':
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
self.device = torch.device("cpu")
modes = 'reflect'
out_channel1 = 64
self.conv1_1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
self.conv1_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
out_channel2 = 128
self.conv2_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.conv2_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
out_channel3 = 256
self.conv3_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.conv3_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
if out_channel == None:
self.out_channel = 256
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
else:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU(),
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel),
nn.LeakyReLU()
)
if init_weights:
self._init_weight()
def forward(self, x):
x = self.conv1_1(x)
x = self.conv1_2(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
feature = x
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
x = F.conv2d(x, weight = filters, padding = 0)
return x,feature
out_channel = 20
model = VGG(in_channel=12, out_channel=out_channel, init_weights=True, batch_size=batch_size)
for epoch in range(start_epoch+1,epochs):
# train
model.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs,feature = model(images.to(device))
outputs = tonser_nolmal(outputs)
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
epochs,
loss)
checkpoint = {
"net": model.state_dict(),
"optimizer": optimizer.state_dict(),
"epoch": epoch
}
torch.save(checkpoint, save_path + "/model-{}.pth".format(epoch))
# validate
model.eval()
count_acc = 0.0
count_mae = 0.0
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs,_ = model(val_images.to(device))
# outputs = F.normalize(outputs,dim=3)
outputs = tonser_nolmal(outputs)
loss = loss_function(outputs, val_labels.to(device))
count_acc = count_acc + loss.item()
mae = Evaluation().MAE(outputs, val_labels.to(device))
count_mae = count_mae + mae.item()
The error is likely to be caused by the following variable assignment:
if self.out_channel != 256:
x = self.layer2
which can be easily fixed by changing it to
x = self.layer2(x)
Update:
As OP updated his code, I did some test. There were several things which I found problematic:
self._init_weight was not provided, so I commented it out;
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device). The filter weight should have a shape of (c_out, c_in, kernel_size, kernel_size). However, batch_size appeared in the position of out_channels.
The role of filter in the forward was not clear to me. If you wanted to reduce the out_channels further from 256 to 20, then initializing your model with VGG(..., out_channel=20) is sufficient. Basically, self.conv3_3 would do the job.
On my end, I modified the code a little bit and it ran successfully:
import sys
import torch
import torch.nn as nn
from tqdm import tqdm
from torchvision.datasets import FakeData
from torch.utils.data import DataLoader
import torch.nn.functional as F
dataset = [torch.randn(12, 64, 64) for _ in range(1000)]
train_loader = DataLoader(dataset, batch_size=1, shuffle=True)
class VGG(nn.Module):
def __init__(self, in_channel, out_channel=None, init_weights=True, device='cpu', batch_size=1):
super(VGG, self).__init__()
self.batch_size = batch_size
self.out_channel = out_channel
self.device = device
modes = 'reflect'
out_channel1 = 64
self.conv1_1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
self.conv1_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
out_channel2 = 128
self.conv2_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.conv2_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.out_channel3 = out_channel3 = 256
self.conv3_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.conv3_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.out_channel = out_channel
if out_channel == None:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
else:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU(),
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel),
nn.LeakyReLU()
)
# The implementation of _init_weight is not found
# if init_weights:
# self._init_weight()
def forward(self, x):
x = self.conv1_1(x)
x = self.conv1_2(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
feature = x
if x.shape[1] == 256: # self.out_channel is None
filters = torch.ones(20, self.out_channel3, 1, 1).to(self.device)
x = F.conv2d(x, weight = filters, padding = 0)
return x, feature
out_channel = 20
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = VGG(in_channel=12, out_channel=None, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)
model = VGG(in_channel=12, out_channel=20, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)
Outputs:
torch.Size([1, 20, 64, 64])
torch.Size([1, 20, 64, 64])

Unusually high runtime for first epoch in UNet

I have been using the UNet architecture for image segmentation. Still, I found an unusually high runtime for the first epoch(2 hrs) and the runtime reduces down to 30mins in the subsequent epoch. I don't know where the problem lies inside the model any advice is highly appreciated.
here is my code:
class UNet(nn.Module):
def __init__(self, num_classes):
super(UNet, self).__init__()
self.num_classes = num_classes
self.contracting_11 = self.conv_block(in_channels=3, out_channels=64)
self.contracting_12 = nn.MaxPool2d(kernel_size=2, stride=2)
self.contracting_21 = self.conv_block(in_channels=64, out_channels=128)
self.contracting_22 = nn.MaxPool2d(kernel_size=2, stride=2)
self.contracting_31 = self.conv_block(in_channels=128, out_channels=256)
self.contracting_32 = nn.MaxPool2d(kernel_size=2, stride=2)
self.contracting_41 = self.conv_block(in_channels=256, out_channels=512)
self.contracting_42 = nn.MaxPool2d(kernel_size=2, stride=2)
self.middle = self.conv_block(in_channels=512, out_channels=1024)
self.expansive_11 = nn.ConvTranspose2d(in_channels=1024, out_channels= 512, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_12 = self.conv_block(in_channels=1024, out_channels=512)
self.expansive_21 = nn.ConvTranspose2d(in_channels= 512, out_channels=256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_22 = self.conv_block(in_channels=512, out_channels=256)
self.expansive_31 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_32 = self.conv_block(in_channels=256, out_channels=128)
self.expansive_41 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.expansive_42 = self.conv_block(in_channels=128, out_channels=64)
self.output = nn.Conv2d(in_channels=64, out_channels=num_classes, kernel_size=3, stride=1, padding=1)
def conv_block(self, in_channels, out_channels):
block = nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.BatchNorm2d(num_features=out_channels),
nn.Conv2d(in_channels= out_channels, out_channels = out_channels, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.BatchNorm2d(num_features=out_channels)
)
return block
def forward(self, X):
contracting_11_out = self.contracting_11(X)
contracting_12_out = self.contracting_12(contracting_11_out)
contracting_21_out = self.contracting_21(contracting_12_out)
contracting_22_out = self.contracting_22(contracting_21_out)
contracting_31_out = self.contracting_31(contracting_22_out)
contracting_32_out = self.contracting_32(contracting_31_out) # [-1, 256, 32, 32]
contracting_41_out = self.contracting_41(contracting_32_out) # [-1, 512, 32, 32]
contracting_42_out = self.contracting_42(contracting_41_out) # [-1, 512, 16, 16]
middle_out = self.middle(contracting_42_out) # [-1, 1024, 16, 16]
expansive_11_out = self.expansive_11(middle_out) # [-1, 512, 32, 32]
expansive_12_out = self.expansive_12(torch.cat((expansive_11_out, contracting_41_out), dim=1)) # [-1, 1024, 32, 32] -> [-1, 512, 32, 32]
expansive_21_out = self.expansive_21(expansive_12_out) # [-1, 256, 64, 64]
expansive_22_out = self.expansive_22(torch.cat((expansive_21_out, contracting_31_out), dim=1)) # [-1, 512, 64, 64] -> [-1, 256, 64, 64]
expansive_31_out = self.expansive_31(expansive_22_out) # [-1, 128, 128, 128]
expansive_32_out = self.expansive_32(torch.cat((expansive_31_out, contracting_21_out), dim=1)) # [-1, 256, 128, 128] -> [-1, 128, 128, 128]
expansive_41_out = self.expansive_41(expansive_32_out) # [-1, 64, 256, 256]
expansive_42_out = self.expansive_42(torch.cat((expansive_41_out, contracting_11_out), dim=1)) # [-1, 128, 256, 256] -> [-1, 64, 256, 256]
output_out = self.output(expansive_42_out) # [-1, num_classes, 256, 256]
out = torch.sigmoid(output_out)
return out

`*** RuntimeError: mat1 dim 1 must match mat2 dim 0` whenever I run model(images)

def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, bias=False),
nn.BatchNorm2d(64),
)
How can I deal with this error? I think the error is with self.fc, but I can't say how to fix it.
The output from self.conv(x) is of shape torch.Size([32, 64, 2, 2]): 32*64*2*2= 8192 (this is equivalent to (self.conv_out_size). The input to fully connected layer expects a single dimension vector i.e. you need to flatten it before passing to a fully connected layer in the forward function.
i.e.
class Network():
...
def foward():
...
conv_out = self.conv(x)
print(conv_out.shape)
conv_out = conv_out.view(-1, 32*64*2*2)
print(conv_out.shape)
x = self.fc(conv_out)
return x
output
torch.Size([32, 64, 2, 2])
torch.Size([1, 8192])
EDIT:
I think you're using self._get_conv_out function wrong.
It should be
def _get_conv_out(self, shape):
output = self.conv(torch.zeros(1, *shape)) # not (32, *size)
return int(numpy.prod(output.size()))
then, in the forward pass, you can use
conv_out = self.conv(x)
# flatten the output of conv layers
conv_out = conv_out.view(conv_out.size(0), -1)
x = self.fc(conv_out)
For an input of (32, 1, 110, 110), the output should be torch.Size([32, 2]).
I had the same problem however I have solved it by using a batch of 32 and tensor size of [3, 32, 32] for my images and the following configurations on my model. I am using ResNet with 9 CNN and looking for 4 outputs.
transform = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet9(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 64)
self.conv2 = conv_block(64, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.conv3 = conv_block(128, 256, pool=True)
self.conv4 = conv_block(256, 512, pool=True)
self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.classifier = nn.Sequential(nn.MaxPool2d(4),
nn.Flatten(),
nn.Dropout(0.2),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.classifier(out)
return out

how can I remove layer in Pytorch?

I want to remove the decoder portion of the Autoencoder.
and I want to put FC in the removed part.
In addition, the encoder parts will not train with pre-learned weights.
self.encoder = nn.Sequential(
nn.Conv2d(1, 16, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 8, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=4, stride=1),
)
self.decoder = nn.Sequential(
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(8, 8, kernel_size=2, stride=2),
nn.Conv2d(8, 8, 3, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(8, 8, kernel_size=2, stride=2),
nn.Conv2d(8, 16, 3),
nn.ReLU(True),
nn.ConvTranspose2d(16, 16, kernel_size=2, stride=2),
nn.Conv2d(16, 1, 3, padding=1)
)
def forward(self, x):
if self.training :
x = self.encoder(x)
x = self.decoder(x)
return x
else:
x = classifier(x)
return x
is this possible?
help me...
One easy and clean solution would be to define a stand-alone network as your decoder, then replace the decoder attribute of your model with this new network after pre-training is over. Easy example below:
class sillyExample(torch.nn.Module):
def __init__(self):
super(sillyExample, self).__init__()
self.encoder = torch.nn.Linear(5, 5)
self.decoder = torch.nn.Linear(5, 10)
def forward(self, x):
return self.decoder(self.encoder(x))
test = sillyExample()
test(torch.rand(30, 5)).shape
Out: torch.Size([30, 10])
test.decoder = torch.nn.Linear(5, 20) # replace the decoder
test(torch.rand(30, 5)).shape
Out: torch.Size([30, 20])
Just make sure to re-initialize your optimizers with the updated model (or anything else that might be referencing the model's parameters).

Resources