How to solve "RuntimeError: Expected target size [6, 1, 224, 224], got [6, 3, 224, 224]"? - pytorch

I got this error in DeepCrack. I tried to solve it with several searches but I didn't find any solutions. How can I overcome the target size issue? Firstly, I faced tensor tuple issues, but I applied a stack that solve the tuple problem. Now, I don't understand the size fitting. I don't what is actual size here of CNN is. Can anyone please help me?
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from import random_split
import matplotlib.pyplot as plt
import time
import os
import copy
cudnn.benchmark = True
plt.ion() # interactive mode
from random import *
from tqdm.notebook import tqdm, trange
from time import sleep
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from warnings import filterwarnings
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
## codes for data augmentation
train_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
test_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
## Load data
from torchvision.datasets import ImageFolder
data = ImageFolder('../Data/Data_Structure(Annotated)', transform=train_trans , )
test_folder= ImageFolder("../Data/DATA_iPhone_13_Pro_Max", transform=test_trans, )
batch_size = 6
num_classes = 4
learning_rate = 0.01
num_epochs = 10
print("Follwing classes are there : \n",data.classes)
classes = ('Alligator Cracks', 'Delamination', 'Longitudinal Cracks', 'Transverse Cracks')
##Splitting Data and Prepare Batches:
## Source:
val_size = 127 ## Tamim:30% data for validation ##
train_size = len(data) - val_size
train_loader,val_loader = random_split(data,[train_size,val_size]) ## To randomly split the images into training and testing, PyTorch provides random_split()
print(f"Length of Train Data : {len(train_loader)}") ## changed the folder names
print(f"Length of Validation Data : {len(val_loader)}")
# Splitting train and validation data on batches
train_loader =, shuffle=True, batch_size=batch_size) ## defined train data & val data
val_loader =, shuffle=True, batch_size=batch_size)
test_loader =, shuffle=False, batch_size=batch_size)
# visualize images of a single batch
dataiter = iter(train_loader)
images, labels = next(dataiter)
# show images
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
# model = models.vgg19(pretrained=True)
# print(model)
from torch import nn
import torch
import torch.nn.functional as F
def Conv3X3(in_, out):
return torch.nn.Conv2d(in_, out, 3, padding=1)
class ConvRelu(nn.Module):
def __init__(self, in_, out):
self.conv = Conv3X3(in_, out)
self.activation = torch.nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.activation(x)
return x
class Down(nn.Module):
def __init__(self, nn):
self.nn = nn
self.maxpool_with_argmax = torch.nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True)
def forward(self,inputs):
down = self.nn(inputs)
unpooled_shape = down.size()
outputs, indices = self.maxpool_with_argmax(down)
return outputs, down, indices, unpooled_shape
class Up(nn.Module):
def __init__(self, nn):
self.nn = nn
def forward(self,inputs,indices,output_shape):
outputs = self.unpool(inputs, indices=indices, output_size=output_shape)
outputs = self.nn(outputs)
return outputs
class Fuse(nn.Module):
def __init__(self, nn, scale):
self.nn = nn
self.scale = scale
self.conv = Conv3X3(64,1)
def forward(self,down_inp,up_inp):
outputs =[down_inp, up_inp], 1)
outputs = F.interpolate(outputs, scale_factor=self.scale, mode='bilinear')
outputs = self.nn(outputs)
return self.conv(outputs)
class DeepCrack(nn.Module):
def __init__(self, num_classes=1000):
super(DeepCrack, self).__init__()
self.down1 = Down(torch.nn.Sequential(
self.down2 = Down(torch.nn.Sequential(
self.down3 = Down(torch.nn.Sequential(
self.down4 = Down(torch.nn.Sequential(
ConvRelu(256, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
self.down5 = Down(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
self.up1 = Up(torch.nn.Sequential(
ConvRelu(64, 64),
ConvRelu(64, 64),
self.up2 = Up(torch.nn.Sequential(
ConvRelu(128, 128),
ConvRelu(128, 64),
self.up3 = Up(torch.nn.Sequential(
ConvRelu(256, 256),
ConvRelu(256, 256),
ConvRelu(256, 128),
self.up4 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 256),
self.up5 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
self.fuse5 = Fuse(ConvRelu(512 + 512, 64), scale=16)
self.fuse4 = Fuse(ConvRelu(512 + 256, 64), scale=8)
self.fuse3 = Fuse(ConvRelu(256 + 128, 64), scale=4)
self.fuse2 = Fuse(ConvRelu(128 + 64, 64), scale=2)
self.fuse1 = Fuse(ConvRelu(64 + 64, 64), scale=1) = Conv3X3(5,1)
def forward(self,inputs):
# encoder part
out, down1, indices_1, unpool_shape1 = self.down1(inputs)
out, down2, indices_2, unpool_shape2 = self.down2(out)
out, down3, indices_3, unpool_shape3 = self.down3(out)
out, down4, indices_4, unpool_shape4 = self.down4(out)
out, down5, indices_5, unpool_shape5 = self.down5(out)
# decoder part
up5 = self.up5(out, indices=indices_5, output_shape=unpool_shape5)
up4 = self.up4(up5, indices=indices_4, output_shape=unpool_shape4)
up3 = self.up3(up4, indices=indices_3, output_shape=unpool_shape3)
up2 = self.up2(up3, indices=indices_2, output_shape=unpool_shape2)
up1 = self.up1(up2, indices=indices_1, output_shape=unpool_shape1)
fuse5 = self.fuse5(down_inp=down5,up_inp=up5)
fuse4 = self.fuse4(down_inp=down4, up_inp=up4)
fuse3 = self.fuse3(down_inp=down3, up_inp=up3)
fuse2 = self.fuse2(down_inp=down2, up_inp=up2)
fuse1 = self.fuse1(down_inp=down1, up_inp=up1)
output =[fuse5,fuse4,fuse3,fuse2,fuse1],1))
return output, fuse5, fuse4, fuse3, fuse2, fuse1
if __name__ == '__main__':
inp = torch.randn((1,3,512,512))
model = DeepCrack()
out = model(inp)
model = DeepCrack()
# specify loss function
criterion = nn.CrossEntropyLoss()
# specify loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# number of epochs to train the model
n_epochs = 10
for epoch in range(1, n_epochs+1):
# monitor training loss
train_loss = 0.0
# train the model #
for data in train_loader:
# _ stands in for labels, here
# no need to flatten images
images, _ = data
# clear the gradients of all optimized variables
# forward pass: compute predicted outputs by passing inputs to the model
outputs = model(images)
outputs = torch.stack(outputs, dim=0, out=None) ## Tamim: converted the tuple of tensors to one.
outputs = outputs ## Changed shape
print(outputs.shape) ## Tamim: printed the target tensor shape to see
print(outputs) ## Tamim: printed the target tensors
# calculate the loss
loss = criterion(outputs, images)
# backward pass: compute gradient of the loss with respect to model parameters
# perform a single optimization step (parameter update)
# update running training loss
train_loss += loss.item()*images.size(0)
# print avg training statistics
train_loss = train_loss/len(train_loader)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
Traceback (most recent call last):
File "", line 324, in <module>
loss = criterion(outputs, images)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/modules/", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/modules/", line 1150, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/", line 2846, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected target size [6, 1, 224, 224], got [6, 3, 224, 224]


Why is my autoencoder not learning the FMNIST dataset?

I am using a simple autoencoder to learn images from the FashionMnist dataset. I have preprocessed the dataset by grayscaling and normalizing it. I did not make the network too deep, to prevent it from creating a direct mapping.
Here's my PyTorch code -
import torch
import torchvision as tv
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch import nn
import os
from torchviz import make_dot
transforms = tv.transforms.Compose([tv.transforms.Grayscale(num_output_channels=1)])
trainset = tv.datasets.FashionMNIST(root='./data', train=True,
download=True, transform=transforms)
PATH = './ae.pth'
data =
data = data/255
# print(
plt.imshow([0], cmap = 'gray')
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.encode = nn.Sequential(
nn.Linear(28*28, 512),
nn.Linear(512, 30),
self.decode = nn.Sequential(
nn.Linear(30, 512),
nn.Linear(512, 28*28),
def forward(self, x):
x = self.flatten(x)
encoded = self.encode(x)
decoded = self.decode(encoded)
return decoded
print("Loading data on cpu")
device = torch.device('cpu')
model = NeuralNetwork()
model.load_state_dict(torch.load(PATH, map_location=device))
device = "cuda" if torch.cuda.is_available() else "cpu"
data =
print(f"Using device = {device}")
model = NeuralNetwork().to(device)
# print(model)
lossFn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)
for epoch in range(1000):
print("Epoch = ", epoch)
outputs = model(data)
loss = lossFn(outputs, data.reshape(-1, 784))
optimizer.step(), PATH)
data ="cpu")
model ="cpu")
pred = model(data)
pred = pred.reshape(-1, 28, 28)
# print(pred.shape)
plt.imshow(pred.detach().numpy()[0], cmap = 'gray')
For testing, I am inputting the following image -
However, I get this as output -
I had an intuition that there was an issue with your loss function. When working with images, distance-based losses such as L1 or L2 losses work really well, as you are essentially measuring how far-away your predictions are from the ground-truth images. This was what I had observed as well, as the loss wasn't converging with BCE and it was rather oscillating.
I rewrote the entire thing and replaced BCE loss with MSE Loss and in just 50 epochs, the loss has gone down considerably, and it is still going down.
Here is the prediction after just 50 epochs -
The ground-truth image is -
I believe that you can get the loss down much more if you train for longer.
Here is the full code. I used a dataloader for batchifying and processing the data.
I also changed the transformations so that the resulting data is a torch tensor.
import torch
import torchvision as tv
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch import nn
from import DataLoader
transforms = tv.transforms.Compose([
trainset = tv.datasets.FashionMNIST(root='./data', train=True,
download=True, transform=transforms)
loader = DataLoader(trainset, batch_size=32, num_workers=1, shuffle=True)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.encode = nn.Sequential(
nn.Linear(28*28, 512),
nn.Linear(512, 30),
self.decode = nn.Sequential(
nn.Linear(30, 512),
nn.Linear(512, 28*28),
def forward(self, x):
x = self.flatten(x)
encoded = self.encode(x)
decoded = self.decode(encoded)
return decoded
model = NeuralNetwork().to(device)
lossFn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-2)
epochs = 50
for epoch in range(epochs):
for images, labels in loader:
images, labels =,
outputs = model(images)
loss = lossFn(outputs, images.reshape(-1, 28*28))
print(f'Loss : {loss.item()}')
print(f'Epochs done : {epoch}')
Here is some inference code -
# infer on some test data
testset = tv.datasets.FashionMNIST(root='./data', train=False,
download=False, transform=transforms)
testloader = DataLoader(testset, shuffle=False, batch_size=32, num_workers=1)
test_images, test_labels = next(iter(testloader))
test_images =
predictions = model(test_images)
prediction = predictions[0]
prediction = prediction.view(1, 28, 28)
prediction = prediction.detach().cpu().numpy()
prediction = prediction.transpose(1, 2, 0)
# plot the prediction
plt.imshow(prediction, cmap = 'gray')
# plot the actual image
test_image = test_images[0]
test_image = test_image.detach().cpu().numpy()
test_image = test_image.transpose(1, 2, 0)
plt.imshow(test_image, cmap='gray')
This is the loss going down --
Epochs done : 39
Loss : 0.04641226679086685
Epochs done : 40
Loss : 0.04445071145892143
Epochs done : 41
Loss : 0.05033266171813011
Epochs done : 42
Loss : 0.04813298210501671
Epochs done : 43
Loss : 0.0474831722676754
Epochs done : 44
Loss : 0.044186390936374664
Epochs done : 45
Loss : 0.049083154648542404
Epochs done : 46
Loss : 0.04645842686295509
Epochs done : 47
Loss : 0.04586248844861984
Epochs done : 48
Loss : 0.0467853844165802
Epochs done : 49

Function AddmmBackward returned an invalid gradient

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
class NeuralNetwork(nn.Module):
def __init__(self):
self.conv1 = nn.Conv2d(1, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 3)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = NeuralNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
from torchvision import datasets, transforms
from import DataLoader, random_split
def UploadData(path, train):
#set up transforms for train and test datasets
train_transforms = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize(255), transforms.CenterCrop(224), transforms.RandomRotation(30),
transforms.RandomHorizontalFlip(), transforms.transforms.ToTensor()])
valid_transforms = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize(255), transforms.CenterCrop(224), transforms.RandomRotation(30),
transforms.RandomHorizontalFlip(), transforms.transforms.ToTensor()])
test_transforms = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor()])
#set up datasets from Image Folders
train_dataset = datasets.ImageFolder(path + '/train', transform=train_transforms)
valid_dataset = datasets.ImageFolder(path + '/validation', transform=valid_transforms)
test_dataset = datasets.ImageFolder(path + '/test', transform=test_transforms)
#set up dataloaders with batch size of 32
trainloader =, batch_size=32, shuffle=True)
validloader =, batch_size=32, shuffle=True)
testloader =, batch_size=32, shuffle=True)
return trainloader, validloader, testloader
trainloader, validloader, testloader = UploadData("/home/lns/research/dataset", True)
epochs = 5
min_valid_loss = np.inf
for e in range(epochs):
train_loss = 0.0
for data, labels in trainloader:
# Transfer Data to GPU if available
if torch.cuda.is_available():
print("using GPU for data")
data, labels = data.cuda(), labels.cuda()
# Clear the gradients
# Forward Pass
target = net(data)
# Find the Loss
loss = criterion(target,labels)
# Calculate gradients
# Update Weights
# Calculate Loss
train_loss += loss.item()
valid_loss = 0.0
model.eval() # Optional when not using Model Specific layer
for data, labels in validloader:
# Transfer Data to GPU if available
if torch.cuda.is_available():
print("using GPU for data")
data, labels = data.cuda(), labels.cuda()
# Forward Pass
target = net(data)
# Find the Loss
loss = criterion(target,labels)
# Calculate Loss
valid_loss += loss.item()
print('Epoch ',e+1, '\t\t Training Loss: ',train_loss / len(trainloader),' \t\t Validation Loss: ',valid_loss / len(validloader))
if min_valid_loss > valid_loss:
print("Validation Loss Decreased(",min_valid_loss,"--->",valid_loss,") \t Saving The Model")
min_valid_loss = valid_loss
# Saving State Dict, '/home/lns/research/MODEL.pth')
After searching a lot i am asking for help. Can someone help me
understand why this error is occuring in backward propagation.
i followed pytorch cnn tutorail and geeksforgeeks tutorial
dataset is x ray images transformed into grayscale and resize to 255
Is my neural network is wrong or data is not processed correctly?
This is a size mismmatch between the output of your CNN and the number of neurons on on your first fully-connected layer. Because of missing padding, the number of elements when flattened is 16*4*4 i.e. 256 (and not 16*5*5):
self.fc1 = nn.Linear(256, 120)
Once modified, the model will run correctly:
>>> model = NeuralNetwork()
>>> model(torch.rand(1, 1, 28, 28)).shape
torch.Size([1, 3])
Alternatively, you can use an nn.LazyLinear which will deduce the in_feature argument during the very first inference based on its input shape.
self.fc1 = nn.LazyLinear(120)

how can the the tensor matmul shape error be removed?

I am playing with pytorch ZOO AAE. I replaced the MNIST dataset with my own. However, getting an error which says : mat1 dim 1 must match mat2 dim 0 eventhough I am resizing the image in my dataloader. Here is my code. I am using ImageFolder to read my custom dataset. I made sure I am using opt.shape and opt.batch_size. There is also a resize on the dataloader.
import argparse
import os
import numpy as np
import math
import itertools
import torchvision.transforms as transforms
from torchvision.utils import save_image
from import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch
os.makedirs("images", exist_ok=True)
parser = argparse.ArgumentParser()
parser.add_argument("--n_epochs", type=int, default=200, help="number of epochs of training")
parser.add_argument("--batch_size", type=int, default=64, help="size of the batches")
parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
parser.add_argument("--latent_dim", type=int, default=10, help="dimensionality of the latent code")
parser.add_argument("--img_size", type=int, default=32, help="size of each image dimension")
parser.add_argument("--channels", type=int, default=1, help="number of image channels")
parser.add_argument("--sample_interval", type=int, default=400, help="interval between image sampling")
opt = parser.parse_args()
img_shape = (opt.channels, opt.img_size, opt.img_size)
cuda = True if torch.cuda.is_available() else False
def reparameterization(mu, logvar):
std = torch.exp(logvar / 2)
sampled_z = Variable(Tensor(np.random.normal(0, 1, (mu.size(0), opt.latent_dim))))
z = sampled_z * std + mu
return z
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.model = nn.Sequential(
nn.Linear(int(, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.LeakyReLU(0.2, inplace=True),
) = nn.Linear(512, opt.latent_dim)
self.logvar = nn.Linear(512, opt.latent_dim)
def forward(self, img):
img_flat = img.view(img.shape[0], -1)
x = self.model(img_flat)
mu =
logvar = self.logvar(x)
z = reparameterization(mu, logvar)
return z
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.model = nn.Sequential(
nn.Linear(opt.latent_dim, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, int(,
def forward(self, z):
img_flat = self.model(z)
img = img_flat.view(img_flat.shape[0], *img_shape)
return img
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.model = nn.Sequential(
nn.Linear(opt.latent_dim, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 256),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(256, 1),
def forward(self, z):
validity = self.model(z)
return validity
# Use binary cross-entropy loss
adversarial_loss = torch.nn.BCELoss()
pixelwise_loss = torch.nn.L1Loss()
# Initialize generator and discriminator
encoder = Encoder()
decoder = Decoder()
discriminator = Discriminator()
if cuda:
from torchvision.datasets import ImageFolder
# Configure data loader
os.makedirs("../../data/mnist", exist_ok=True)
import torchvision
import PIL
dataloader =
ImageFolder('./content', transform=transforms.Compose([
transforms.Normalize([0.5], [0.5])]))
# Optimizers
optimizer_G = torch.optim.Adam(
itertools.chain(encoder.parameters(), decoder.parameters()),, betas=(opt.b1, opt.b2)
optimizer_D = torch.optim.Adam(discriminator.parameters(),, betas=(opt.b1, opt.b2))
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
def sample_image(n_row, batches_done):
"""Saves a grid of generated digits"""
# Sample noise
z = Variable(Tensor(np.random.normal(0, 1, (n_row ** 2, opt.latent_dim))))
gen_imgs = decoder(z)
save_image(, "images/%d.png" % batches_done, nrow=n_row, normalize=True)
# ----------
# Training
# ----------
for epoch in range(opt.n_epochs):
for i, (imgs, _) in enumerate(dataloader):
# Adversarial ground truths
valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(Tensor))
# -----------------
# Train Generator
# -----------------
encoded_imgs = encoder(real_imgs)
decoded_imgs = decoder(encoded_imgs)
# Loss measures generator's ability to fool the discriminator
g_loss = 0.001 * adversarial_loss(discriminator(encoded_imgs), valid) + 0.999 * pixelwise_loss(
decoded_imgs, real_imgs
# ---------------------
# Train Discriminator
# ---------------------
# Sample noise as discriminator ground truth
z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim))))
# Measure discriminator's ability to classify real from generated samples
real_loss = adversarial_loss(discriminator(z), valid)
fake_loss = adversarial_loss(discriminator(encoded_imgs.detach()), fake)
d_loss = 0.5 * (real_loss + fake_loss)
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
% (epoch, opt.n_epochs, i, len(dataloader), d_loss.item(), g_loss.item())
batches_done = epoch * len(dataloader) + i
if batches_done % opt.sample_interval == 0:
sample_image(n_row=10, batches_done=batches_done)
Its because you are loading 3 channels.You have to use single channel.
add this method as well transforms.Grayscale() like i have added below
dataloader =
ImageFolder('./content', transform=transforms.Compose([transforms.Grayscale(),
transforms.Normalize([0.5], [0.5])]))

RuntimeError: value cannot be converted to type uint8_t without overflow: -0.192746

I am new to Pytorch and am aiming to do an image classification task using a CNN based on the EMNIST dataset.
I read my data in as follows:
emnist = + '/emnist-letters.mat')
data = emnist ['dataset']
X_train = data ['train'][0, 0]['images'][0, 0]
X_train = X_train.reshape((-1,28,28), order='F')
y_train = data ['train'][0, 0]['labels'][0, 0]
X_test = data ['test'][0, 0]['images'][0, 0]
X_test = X_test.reshape((-1,28,28), order = 'F')
y_test = data ['test'][0, 0]['labels'][0, 0]
train_dataset =, torch.from_numpy(y_train))
test_dataset =, torch.from_numpy(y_test))
batch_size = 128
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
train_loader =,
test_loader =,
Then, I found the following configurations (that I still have to adjust to fit to my data):
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
# Convolution 1
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
self.relu1 = nn.ReLU()
# Max pool 1
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
# Convolution 2
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
self.relu2 = nn.ReLU()
# Max pool 2
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
# Fully connected 1 (readout)
self.fc1 = nn.Linear(32 * 4 * 4, 10)
def forward(self, x):
# Convolution 1
out = self.cnn1(x)
out = self.relu1(out)
# Max pool 1
out = self.maxpool1(out)
# Convolution 2
out = self.cnn2(out)
out = self.relu2(out)
# Max pool 2
out = self.maxpool2(out)
# Resize
# Original size: (100, 32, 7, 7)
# out.size(0): 100
# New out size: (100, 32*7*7)
out = out.view(out.size(0), -1)
# Linear function (readout)
out = self.fc1(out)
return out
model = CNNModel()
criterion = nn.CrossEntropyLoss()
To train the model, I use the following code:
iter = 0
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Add a single channel dimension
# From: [batch_size, height, width]
# To: [batch_size, 1, height, width]
images = images.unsqueeze(1)
# Forward pass to get output/logits
outputs = model(images)
# Clear gradients w.r.t. parameters
# Forward pass to get output/logits
outputs = model(images)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, labels)
# Getting gradients w.r.t. parameters
# Updating parameters
iter += 1
if iter % 500 == 0:
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
for images, labels in test_loader:
images = images.unsqueeze(1)
# Forward pass only to get logits/output
outputs = model(images)
# Get predictions from the maximum value
_, predicted = torch.max(, 1)
# Total number of labels
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
# Print Loss
print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter,[0], accuracy))
However, when I run this, I get the following error:
RuntimeError Traceback (most recent call last)
<ipython-input-27-1fbdd53d1194> in <module>()
13 # Forward pass to get output/logits
---> 14 outputs = model(images)
16 # Clear gradients w.r.t. parameters
4 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ in _conv_forward(self, input, weight)
348 _pair(0), self.dilation, self.groups)
349 return F.conv2d(input, weight, self.bias, self.stride,
--> 350 self.padding, self.dilation, self.groups)
352 def forward(self, input):
RuntimeError: value cannot be converted to type uint8_t without overflow: -0.0510302
I found this question already and think that the solution might work for me as well. However, I don't understand where in my code I can implement this.
What can I do to overcome this problem?
I have used the following import statements:
import scipy .io
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import os
from PIL import Image
from PIL import ImageOps
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from import DataLoader
from torchvision.transforms import ToTensor
from torch.nn import Sequential
from torch.nn import Conv2d
from torch.nn import BatchNorm2d
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import Linear
What fixed my problem was replacing out = self.cnn1(x) with out = self.cnn1(x.float())

AttributeError: 'builtin_function_or_method' object has no attribute 'requires_grad'

I'm getting this error when training the MNIST data, the csvfiles is from Kaggle. Can someone show me where I went wrong? Here is my code. The version of PyTorch is 0.4.0.
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
import as data
import torchvision
import matplotlib.pyplot as plt
# Training Parameters
EPOCH = 20
BATCH_size = 15
LR = 0.001
img_row, img_col = 28, 28
# Networks structure
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
in_channels=1, out_channels=32,
kernel_size=5, stride=1, padding=2
nn.Conv2d(32, 32, 5, 1, 2),
self.conv2 = nn.Sequential(
nn.Conv2d(32, 64, 3, 1, 1),
nn.Conv2d(64, 64, 3, 1, 1),
self.out = nn.Sequential(
nn.Linear(64*7*7, 512),
nn.Linear(512, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
output = self.out(x)
return output
# Torch Dataset
class Torch_Dataset(data.Dataset):
def __init__(self, root_dir, csvfile, img_rows, img_cols, train=True, transform=None):
self.root_dir = root_dir
self.transform = transform
self.train = train
if self.train:
y_data0 = pd.read_csv(csvfile, header=0, usecols=['label'])
y_data1 = np.array(y_data0)
self.y_data = torch.from_numpy(y_data1)
x_data0 = pd.read_csv(csvfile, header=0, usecols=[i for i in range(1, 785)])
x_data1 = np.array(x_data0)
x_data1 = x_data1.reshape(x_data1.shape[0], 1, img_rows, img_cols)
x_data1 = x_data1.astype('float32')
x_data1 /= 255
self.x_data = torch.from_numpy(x_data1)
x_data0 = pd.read_csv(csvfile, header=0)
x_data1 = np.array(x_data0)
x_data1 = x_data1.reshape(x_data1.shape[0], 1, img_rows, img_cols)
x_data1 = x_data1.astype('float32')
x_data1 /= 255
self.x_data = torch.from_numpy(x_data1)
def __len__(self):
return len(self.x_data)
def __getitem__(self, idx):
if self.train:
img, target = self.x_data[idx], self.y_data[idx]
img = self.x_data[idx]
target = None
# sample = {'img': img, 'target': target}
return img, target
train = Torch_Dataset(
root_dir='./', # root
csvfile='train.csv', # filename
img_rows=img_row, # image rows
img_cols=img_col, # image cols
train=True # train or test
# DataLoader
loader = data.DataLoader(
dataset=train, # torch dataset format
batch_size=BATCH_size, # mini batch size
shuffle=True, # shuffle the data
# train the data
cnn = CNN()
optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_f = nn.CrossEntropyLoss()
for epoch in range(EPOCH):
for step, (x, y) in enumerate(loader):
b_x = Variable(x)
b_y = Variable(y)
b_y = b_y.squeeze
output = cnn(b_x)
loss = loss_f(output, b_y)
Traceback (most recent call last):
File "C:/Users/Bryan Zoe/PycharmProjects/MNIST_TEST/PyTorch/", line 118, in
loss = loss_f(output, b_y)
File "C:\Users\Bryan Zoe\Anaconda3\lib\site-packages\torch\nn\modules\", line 491, in __ call __
result = self.forward(*input, **kwargs)
File "C:\Users\Bryan Zoe\Anaconda3\lib\site-packages\torch\nn\modules\", line 757, in forward
File "C:\Users\Bryan Zoe\Anaconda3\lib\site-packages\torch\nn\modules\", line 11, in _assert_no_grad
assert not tensor.requires_grad, \
AttributeError: 'builtin_function_or_method' object has no attribute 'requires_grad'
You are not calling the squeeze method,This should work
b_y = b_y.squeeze()
