I have my training dataset as below, where X_train is 3D with 3 channels
Shape of X_Train: (708, 256, 3)
Shape of Y_Train: (708, 4)
Then I convert them into a tensor and input into the dataloader:
X_train=torch.from_numpy(X_data)
y_train=torch.from_numpy(y_data)
training_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(training_dataset, batch_size=50, shuffle=False)
However when training the model, I get the following error:
RuntimeError: Given groups=1, weight of size 24 3 5, expected input[708, 256, 3] to have 3 channels, but got 256 channels instead
I suppose this is due to the position of the channel? In Tensorflow, the channel position is at the end, but in PyTorch the format is "Batch Size x Channel x Height x Width"? So how do I swap the positions in the x_train tensor to match the expected format in the dataloader?
class TwoLayerNet(torch.nn.Module):
def __init__(self):
super(TwoLayerNet,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(3, 3*8, kernel_size=5, stride=1),
nn.Sigmoid(),
nn.AvgPool1d(kernel_size=2, stride=0))
self.conv2 = nn.Sequential(
nn.Conv1d(3*8, 12, kernel_size=5, stride=1),
nn.Sigmoid(),
nn.AvgPool1d(kernel_size=2, stride = 0))
#self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(708, 732)
self.fc2 = nn.Linear(732, 4)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
Use permute.
X_train = torch.rand(708, 256, 3)
X_train = X_train.permute(2, 0, 1)
X_train.shape
# => torch.Size([3, 708, 256])
Related
I am un able to find error input 32*32 gray images:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1, # gray-scale images
out_channels=16,
kernel_size=5, # 5x5 convolutional kernel
stride=1, #no. of pixels pass at a time
padding=2, # to preserve size of input image
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layers
self.out = nn.Linear(32*7*7, 3)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2
x = x.view(x.size(0), -1)
output = self.out(x)
return output
cnn=CNN()
cnn
Your linear layer expects input of size 32x7x7. Given that your conv1 and conv2 layers performs max pooling with stride=2, that means your network is configured for input size of 28x28 (MNIST usual input size) and not 32x32 as you expect.
Moreover, considering the values in your error message (64x2304) I assume you are working with batch_size=64, but your images are NOT 32x32, but rather 32x?? which is slightly larger than 32, resulting with a feature map of 32x8x9 after the pooling.
I know my images have only 1 channel so the first conv layer is (1,16,3,1) , but I have no idea why I got such an error.
Here is my code (I post only the related part).
org_x = train_csv.drop(['id', 'digit', 'letter'], axis=1).values
org_x = org_x.reshape(-1, 28, 28, 1)
org_x = org_x/255
org_x = np.array(org_x)
org_x = org_x.reshape(-1, 1, 28, 28)
org_x = torch.Tensor(org_x).float()
x_test = test_csv.drop(['id','letter'], axis=1).values
x_test = x_test.reshape(-1, 28, 28, 1)
x_test = x_test/255
x_test = np.array(x_test)
x_test = x_test.reshape(-1, 1, 28, 28)
x_test = torch.Tensor(x_test).float()
y = train_csv['digit']
y = list(y)
print(len(y))
org_y = np.zeros([len(y), 1])
for i in range(len(y)):
org_y[i] = y[i]
org_y = np.array(org_y)
org_y = torch.Tensor(org_y).float()
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(
org_x, org_y, test_size=0.2, random_state=42)
I checked the x_train shape is [1638, 1, 28, 28] and the x_valid shape is [410, 1, 28, 28].
transform = transforms.Compose([transforms.ToPILImage(),
transforms.ToTensor(),
transforms.Normalize((0.5, ), (0.5, )) ])
class kmnistDataset(data.Dataset):
def __init__(self, images, labels, transforms=None):
self.x = images
self.y = labels
self.transforms = transforms
def __len__(self):
return (len(self.x))
def __getitem__(self, idx):
data = np.asarray(self.x[idx][0:]).astype(np.uint8)
if self.transforms:
data = self.transforms(data)
if self.y is not None:
return (data, self.y[idx])
else:
return data
train_data = kmnistDataset(x_train, y_train, transforms=transform)
valid_data = kmnistDataset(x_valid, y_valid, transforms=transform)
# dataloaders
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=16, shuffle = False)
And here is my model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
self.bn1 = nn.BatchNorm2d(16)
self.pool = nn.MaxPool2d(2, 2)
unit = 64 * 14 * 14
self.fc1 = nn.Linear(unit, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = x.view(-1, 128 * 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
model = Net()
print(model)
Lastly,
n_epochs = 30
valid_loss_min = np.Inf
for epoch in range(1, n_epochs+1):
train_loss = 0
valid_loss = 0
###################
# train the model #
###################
model.train()
for data in train_loader:
inputs, labels = data[0], data[1]
optimizer.zero_grad()
output = model(inputs)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
#####################
# validate the model#
#####################
model.eval()
for data in valid_loader:
inputs, labels = data[0], data[1]
output = model(inputs)
loss = criterion(output, labels)
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/ len(train_loader.dataset)
valid_loss = valid_loss / len(valid_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch, train_loss, valid_loss))
When I run it, I got this error message
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3], expected input[16, 3, 1, 28] to have 1 channels, but got 3 channels instead
To be specific,
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-14-b8783819421f> in <module>
14 inputs, labels = data[0], data[1]
15 optimizer.zero_grad()
---> 16 output = model(inputs)
17 loss = criterion(output, labels)
18 loss.backward()
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-12-500e34c49306> in forward(self, x)
26
27 def forward(self, x):
---> 28 x = self.pool(F.relu(self.bn1(self.conv1(x))))
29 x = F.relu(self.conv2(x))
30 x = F.relu(self.conv3(x))
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
421
422 def forward(self, input: Tensor) -> Tensor:
--> 423 return self._conv_forward(input, self.weight)
424
425 class Conv3d(_ConvNd):
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
418 _pair(0), self.dilation, self.groups)
419 return F.conv2d(input, weight, self.bias, self.stride,
--> 420 self.padding, self.dilation, self.groups)
421
422 def forward(self, input: Tensor) -> Tensor:
RuntimeError: Given groups=1, weight of size [16, 1, 3, 3], expected input[16, 3, 1, 28] to have 1 channels, but got 3 channels instead
I tried a small demo with your code. and it works fine until your code had x = x.view(-1, 64*14*14) and input shape of torch.Size([1, 1, 28 ,28])
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
self.bn1 = nn.BatchNorm2d(16)
self.pool = nn.MaxPool2d(2, 2)
unit = 64 * 14 * 14
self.fc1 = nn.Linear(unit, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
#print(x.shape)
x = x.view(-1, 64*14*14)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
model = Net()
print(model)
data = torch.rand((1,1,28,28))
pred = model(data)
And if i give my data tensor as data = torch.rand((1,3,28,28)) i get your error i.e RuntimeError: Given groups=1, weight of size [16, 1, 3, 3], expected input[16, 3, 1, 28] to have 1 channels, but got 3 channels instead
So please check your channel dim of your data just before passing it to your model i.e here (highlighted by ** **)
for data in train_loader:
inputs, labels = data[0], data[1]
optimizer.zero_grad()
**print(inputs.shape)**
output = model(inputs)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
I think the problem is with the BatchNorm() layer ==> self.bn1 = nn.BatchNorm2d(16).
the parameter in this layer should be the number of channels of the input. So if you look at your last conv layer conv3, It produces a feature map of 64 channels, thus when you're feeding this feature map to your BatchNorm(), It should be 64 as well. So you can simply do the following:
self.bn1 = nn.BatchNorm2d(64)
I am using PyTorch 1.7 and Python 3.8 with CIFAR-10 dataset. I am trying to create a block with: conv -> conv -> pool -> fc. Fully connected layer (fc) has 256 neurons. The code for this is as follows:
# Testing-
conv1 = nn.Conv2d(
in_channels = 3, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True
)
conv2 = nn.Conv2d(
in_channels = 64, out_channels = 64,
kernel_size = 3, stride = 1,
padding = 1, bias = True
)
pool = nn.MaxPool2d(
kernel_size = 2, stride = 2
)
fc1 = nn.Linear(
in_features = 64 * 16 * 16, out_features = 256
bias = True
)
images.shape
# torch.Size([32, 3, 32, 32])
x = conv1(images)
x.shape
# torch.Size([32, 64, 32, 32])
x = conv2(x)
x.shape
# torch.Size([32, 64, 32, 32])
x = pool(x)
x.shape
# torch.Size([32, 64, 16, 16])
# This line of code gives error-
x = fc1(x)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32768x16 and
16384x256)
What is going wrong?
You are nearly there! As you will have noticed nn.MaxPool returns a shape (32, 64, 16, 16) which is incompatible with a nn.Linear's input: a 2D dimensional tensor (batch, in_features). You need to broadcast to (batch, 64*16*16).
I would recommend using a nn.Flatten layer rather than broadcasting yourself. It will act as x.view(x.size(0), -1) but is clearer. By default it preserves the first dimension:
conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
pool = nn.MaxPool2d(kernel_size=2, stride=2)
flatten = nn.Flatten()
fc1 = nn.Linear(in_features=64*16*16, out_features=256)
x = conv1(images)
x = conv2(x)
x = pool(x)
x = flatten(x)
x = fc1(x)
Alternatively, you could use the functional alternative torch.flatten, where you will have to provide the start_dim as 1: x = torch.flatten(x, start_dim=1).
When you're done debugging, you could assemble your layers with nn.Sequential:
model = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(in_features=64*16*16, out_features=256)
)
x = model(images)
you need to flat the output of nn.MaxPool2d layer for giving input in nn.Linear layer.
try to use x = x.view(x.size(0), -1) before giving input to fc layer for flatten tensor.
I am building a CNN to do image classification on the EMNIST dataset.
To do so, I have the following datasets:
import scipy .io
emnist = scipy.io.loadmat(DIRECTORY + '/emnist-letters.mat')
data = emnist ['dataset']
X_train = data ['train'][0, 0]['images'][0, 0]
X_train = X_train.reshape((-1,28,28), order='F')
y_train = data ['train'][0, 0]['labels'][0, 0]
X_test = data ['test'][0, 0]['images'][0, 0]
X_test = X_test.reshape((-1,28,28), order = 'F')
y_test = data ['test'][0, 0]['labels'][0, 0]
With shape:
X_train = (124800, 28, 28)
y_train = (124800, 1)
X_test = (20800, 28, 28)
y_test = (20800, 1)
Note that the pictures are grayscale, so the colors are represented with only one number.
Which I prepare further as follows:
train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
test_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
My model looks as follows:
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.cnn_layers = Sequential(
# Defining a 2D convolution layer
Conv2d(1, 4, kernel_size=3, stride=1, padding=1),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2),
# Defining another 2D convolution layer
Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2),
)
self.linear_layers = Sequential(
Linear(4 * 7 * 7, 10)
)
# Defining the forward pass
def forward(self, x):
x = self.cnn_layers(x)
x = x.view(x.size(0), -1)
x = self.linear_layers(x)
return x
model = CNNModel()
The code below is part of the code that I use to train my model:
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = Variable(images)
labels = Variable(labels)
# Forward pass to get output/logits
outputs = model(images)
However, by excuting my code, I get the following error:
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [4, 1, 3, 3], but got 3-dimensional input of size [100, 28, 28] instead
So a 4D input is expected while my input is 3D. What can I do so a 3D model is expected rather than a 4D model?
Here a similar question is asked, however I dont see how I can translate this to my code
The convolution expects the input to have size [batch_size, channels, height, width], but your images have size [batch_size, height, width], the channel dimension is missing. Greyscale is represented with a single channel and you have correctly set the in_channels of the first convolutions to 1, but your images don't have the matching dimension.
You can easily add the singular dimension with torch.unsqueeze.
Also, please don't use Variable, it was deprecated with PyTorch 0.4.0, which was released over 2 years ago, and all of its functionality has been merged into the tensors.
for i, (images, labels) in enumerate(train_loader):
# Add a single channel dimension
# From: [batch_size, height, width]
# To: [batch_size, 1, height, width]
images = images.unsqueeze(1)
# Forward pass to get output/logits
outputs = model(images)
I try to run the following programe for images classification problem in Pytorch:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001
TRAIN_DATA_PATH = "train/"
TEST_DATA_PATH = "test/"
TRANSFORM_IMG = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225] )
])
train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=TRANSFORM_IMG)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_dataset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=TRANSFORM_IMG)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc = nn.Linear(7 * 7 * 32, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet(num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
# Test the model
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model/model.ckpt')
But I get a RuntimeError:
Traceback (most recent call last):
RuntimeError: Given groups=1, weight of size 16 1 5 5, expected input[100, 3, 256, 256] to have 1 channels, but got 3 channels instead
Someone could help to fix the bug? Thanks a lot.
Reference related:
https://discuss.pytorch.org/t/given-groups-1-weight-16-1-5-5-so-expected-input-100-3-64-64-to-have-1-channels-but-got-3-channels-instead/28831/17
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[3, 1, 224, 224] to have 3 channels, but got 1 channels instead
Your input layer self.layer1 starts with a 2d convolution nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2). This conv layer expects an input with two spatial dimensions and one channel, and outputs a tesnor with the same spatial dimensions and 16 channels.
However, your input has three channels and not one (RGB image instead of gray level image).
Make sure your net and data are in synch.