After resnet convolution, I want to further compress the 256 dimensions to 20 dimensions. I directly wrote a layer in the back, but after forward propagation, there is an error in this layer, I don't know why?
def forward(self, x):
x = self.conv1(x)
dif_residual1 = self.downsample1(x)
x = self.layer1_1(x)
x =x + dif_residual1
residual = x
x = self.layer1_2(x)
x = x + residual
residual = x
x = self.layer1_3(x)
x = x + residual
if self.out_channel != 256:
x = self.layer2
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
x = F.conv2d(x, weight=filters, padding=0)
The dimension of x before I do if is:
x = {Tensor:(1,256,117,240)}
But after the if statement is executed, it becomes what the picture shows。
The error I get is this:
x = F.conv2d(feature, weight=filters, padding=0)
TypeError: conv2d() received an invalid combination of arguments - got (Sequential, weight=Tensor, padding=int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
Encounter a new problem:
File "D:\software\Anaconda\envs\torch1.10\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 1, 117, 240]], which is output 0 of AddBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
My code:
class VGG(nn.Module):
def __init__(self, in_channel, out_channel=None, init_weights=True, device='gpu',batch_size=1):
super(VGG, self).__init__()
self.batch_size = batch_size
self.out_channel = out_channel
if device == 'gpu':
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
self.device = torch.device("cpu")
modes = 'reflect'
out_channel1 = 64
self.conv1_1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
self.conv1_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
out_channel2 = 128
self.conv2_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.conv2_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
out_channel3 = 256
self.conv3_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.conv3_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
if out_channel == None:
self.out_channel = 256
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
else:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU(),
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel),
nn.LeakyReLU()
)
if init_weights:
self._init_weight()
def forward(self, x):
x = self.conv1_1(x)
x = self.conv1_2(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
feature = x
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device)
x = F.conv2d(x, weight = filters, padding = 0)
return x,feature
out_channel = 20
model = VGG(in_channel=12, out_channel=out_channel, init_weights=True, batch_size=batch_size)
for epoch in range(start_epoch+1,epochs):
# train
model.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs,feature = model(images.to(device))
outputs = tonser_nolmal(outputs)
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.6f}".format(epoch + 1,
epochs,
loss)
checkpoint = {
"net": model.state_dict(),
"optimizer": optimizer.state_dict(),
"epoch": epoch
}
torch.save(checkpoint, save_path + "/model-{}.pth".format(epoch))
# validate
model.eval()
count_acc = 0.0
count_mae = 0.0
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs,_ = model(val_images.to(device))
# outputs = F.normalize(outputs,dim=3)
outputs = tonser_nolmal(outputs)
loss = loss_function(outputs, val_labels.to(device))
count_acc = count_acc + loss.item()
mae = Evaluation().MAE(outputs, val_labels.to(device))
count_mae = count_mae + mae.item()
The error is likely to be caused by the following variable assignment:
if self.out_channel != 256:
x = self.layer2
which can be easily fixed by changing it to
x = self.layer2(x)
Update:
As OP updated his code, I did some test. There were several things which I found problematic:
self._init_weight was not provided, so I commented it out;
filters = torch.ones(self.batch_size, self.out_channel, 1, 1).detach().requires_grad_(False).to(self.device). The filter weight should have a shape of (c_out, c_in, kernel_size, kernel_size). However, batch_size appeared in the position of out_channels.
The role of filter in the forward was not clear to me. If you wanted to reduce the out_channels further from 256 to 20, then initializing your model with VGG(..., out_channel=20) is sufficient. Basically, self.conv3_3 would do the job.
On my end, I modified the code a little bit and it ran successfully:
import sys
import torch
import torch.nn as nn
from tqdm import tqdm
from torchvision.datasets import FakeData
from torch.utils.data import DataLoader
import torch.nn.functional as F
dataset = [torch.randn(12, 64, 64) for _ in range(1000)]
train_loader = DataLoader(dataset, batch_size=1, shuffle=True)
class VGG(nn.Module):
def __init__(self, in_channel, out_channel=None, init_weights=True, device='cpu', batch_size=1):
super(VGG, self).__init__()
self.batch_size = batch_size
self.out_channel = out_channel
self.device = device
modes = 'reflect'
out_channel1 = 64
self.conv1_1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
self.conv1_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel1, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel1),
nn.LeakyReLU()
)
out_channel2 = 128
self.conv2_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel1, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.conv2_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel2, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel2),
nn.LeakyReLU()
)
self.out_channel3 = out_channel3 = 256
self.conv3_1 = nn.Sequential(
nn.Conv2d(in_channels=out_channel2, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.conv3_2 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode = modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
self.out_channel = out_channel
if out_channel == None:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1,
padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU()
)
else:
self.conv3_3 = nn.Sequential(
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel3, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel3),
nn.LeakyReLU(),
nn.Conv2d(in_channels=out_channel3, out_channels=out_channel, kernel_size=3, stride=1, padding=1, padding_mode=modes, bias=False),
nn.BatchNorm2d(out_channel),
nn.LeakyReLU()
)
# The implementation of _init_weight is not found
# if init_weights:
# self._init_weight()
def forward(self, x):
x = self.conv1_1(x)
x = self.conv1_2(x)
x = self.conv2_1(x)
x = self.conv2_2(x)
x = self.conv3_1(x)
x = self.conv3_2(x)
x = self.conv3_3(x)
feature = x
if x.shape[1] == 256: # self.out_channel is None
filters = torch.ones(20, self.out_channel3, 1, 1).to(self.device)
x = F.conv2d(x, weight = filters, padding = 0)
return x, feature
out_channel = 20
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = VGG(in_channel=12, out_channel=None, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)
model = VGG(in_channel=12, out_channel=20, init_weights=True, device=device, batch_size=1)
model.to(device)
print(model(next(iter(train_loader)).to(device))[0].shape)
Outputs:
torch.Size([1, 20, 64, 64])
torch.Size([1, 20, 64, 64])
I need to classify cherry, strawberry, and tomato with 3600 training images and 900 testing images. However, my model performed poorly and overfitted. I tried weight_decay to avoid overfiting but the model gave error around shape not fitting. My training accuracy is 85% and my testing accuracy is 60%.
This is roughly my training data, they are all around 300x300 pixels
Transformation:
train_transform = transforms.Compose([
transforms.RandomRotation(10),
transforms.RandomHorizontalFlip(),
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
Model:
Batch_size = 100
epoch = 8
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNNmodel.parameters(), lr=0.001)
class ConvolutionalNetwork(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 3, 1)
self.conv2 = nn.Conv2d(6, 16, 3, 1)
self.fc1 = nn.Linear(54*54*16, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 3)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.max_pool2d(X, 2, 2) #kernel size 2, stride 2, padding tbc
X = F.relu(self.conv2(X))
X = F.max_pool2d(X, 2, 2)
X = X.view(-1, 54*54*16)
X = F.relu(self.fc1(X))
X = F.relu(self.fc2(X))
X = self.fc3(X)
return F.log_softmax(X, dim=1)
The most egregious mistake in your approach is that you are training a NN from scratch instead of fine-tuning a model already trained on ImageNet.
Check out https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
I have my training dataset as below, where X_train is 3D with 3 channels
Shape of X_Train: (708, 256, 3)
Shape of Y_Train: (708, 4)
Then I convert them into a tensor and input into the dataloader:
X_train=torch.from_numpy(X_data)
y_train=torch.from_numpy(y_data)
training_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(training_dataset, batch_size=50, shuffle=False)
However when training the model, I get the following error:
RuntimeError: Given groups=1, weight of size 24 3 5, expected input[708, 256, 3] to have 3 channels, but got 256 channels instead
I suppose this is due to the position of the channel? In Tensorflow, the channel position is at the end, but in PyTorch the format is "Batch Size x Channel x Height x Width"? So how do I swap the positions in the x_train tensor to match the expected format in the dataloader?
class TwoLayerNet(torch.nn.Module):
def __init__(self):
super(TwoLayerNet,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(3, 3*8, kernel_size=5, stride=1),
nn.Sigmoid(),
nn.AvgPool1d(kernel_size=2, stride=0))
self.conv2 = nn.Sequential(
nn.Conv1d(3*8, 12, kernel_size=5, stride=1),
nn.Sigmoid(),
nn.AvgPool1d(kernel_size=2, stride = 0))
#self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(708, 732)
self.fc2 = nn.Linear(732, 4)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
Use permute.
X_train = torch.rand(708, 256, 3)
X_train = X_train.permute(2, 0, 1)
X_train.shape
# => torch.Size([3, 708, 256])
I try to run the following programe for images classification problem in Pytorch:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001
TRAIN_DATA_PATH = "train/"
TEST_DATA_PATH = "test/"
TRANSFORM_IMG = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225] )
])
train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=TRANSFORM_IMG)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_dataset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=TRANSFORM_IMG)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc = nn.Linear(7 * 7 * 32, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet(num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
# Test the model
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model/model.ckpt')
But I get a RuntimeError:
Traceback (most recent call last):
RuntimeError: Given groups=1, weight of size 16 1 5 5, expected input[100, 3, 256, 256] to have 1 channels, but got 3 channels instead
Someone could help to fix the bug? Thanks a lot.
Reference related:
https://discuss.pytorch.org/t/given-groups-1-weight-16-1-5-5-so-expected-input-100-3-64-64-to-have-1-channels-but-got-3-channels-instead/28831/17
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[3, 1, 224, 224] to have 3 channels, but got 1 channels instead
Your input layer self.layer1 starts with a 2d convolution nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2). This conv layer expects an input with two spatial dimensions and one channel, and outputs a tesnor with the same spatial dimensions and 16 channels.
However, your input has three channels and not one (RGB image instead of gray level image).
Make sure your net and data are in synch.