How can I solve input-output unmatched issue and using proper class numbers issue in encoder-decoder based CNN using pytorch? - pytorch

I'm really facing a problem with the unmatched input and output dimensions. This CNN is taken from geek for geeks where the input size was 28 * 28. Every tutorial are just on the MNIST dataset, what happens if my dataset is different with a different number of class? But my input size is 224 * 224 and my class number is 4. So, How can I prepare an encoder-decoder CNN to fit in my class numbers, particularly when I'm using PyTorch? I think I need to understand how to balance input size and channels with output size and channels. What is the basis of this? If anyone could help me, it would be much more appreciated.
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# License: BSD
# Author: Sasank Chilamkurthy
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from import random_split
import matplotlib.pyplot as plt
import time
import os
import copy
cudnn.benchmark = True
plt.ion() # interactive mode
from random import *
from tqdm.notebook import tqdm, trange
from time import sleep
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from warnings import filterwarnings
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
## codes for data augmentation
train_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
test_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
## Load data
from torchvision.datasets import ImageFolder
data = ImageFolder("D:\\SPRING 2023\\MS_Thesis\\MASTER'S THESIS\\EdmCrack600\\Data_Structure(Annotated)_training", transform=train_trans , )
test_folder= ImageFolder("D:\\SPRING 2023\\MS_Thesis\\MASTER'S THESIS\\Thesis results\\Data\\DATA_iPhone_13_Pro_Max", transform=test_trans, )
# constants
num_classes = 4
print("Follwing classes are there : \n",data.classes)
print("data length:", len(data))
classes = ('Alligator Cracks', 'Delamination', 'Longitudinal Cracks', 'Transverse Cracks')
##Splitting Data and Prepare Batches:
## Source:
val_size = 127 ## Tamim:30% data for validation ##
train_size = len(data) - val_size
## To randomly split the images into training and testing, PyTorch provides random_split()
train_data, val_data = random_split(data,[train_size,val_size])
print(f"Length of Train Data : {len(train_data)}") ## changed the folder names
print(f"Length of Validation Data : {len(val_data)}")
# Splitting train and validation data on batches
train_loader =, shuffle=True, batch_size=batch_size) ## defined train data & val data
val_loader =, shuffle=True, batch_size=batch_size)
test_loader =, shuffle=False, batch_size=batch_size)
# visualize images of a single batch
dataiter = iter(train_loader)
images, labels = next(dataiter)
# show images
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
# define device
torch.cuda.is_available = lambda : False
device = torch.device('cpu')
## Model
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
# encoder
self.enc1 = nn.Linear(in_features=224, out_features=224)
self.enc2 = nn.Linear(in_features=256, out_features=128)
self.enc3 = nn.Linear(in_features=128, out_features=64)
self.enc4 = nn.Linear(in_features=64, out_features=32)
self.enc5 = nn.Linear(in_features=32, out_features=16)
# decoder
self.dec1 = nn.Linear(in_features=16, out_features=32)
self.dec2 = nn.Linear(in_features=32, out_features=64)
self.dec3 = nn.Linear(in_features=64, out_features=128)
self.dec4 = nn.Linear(in_features=128, out_features=256)
self.dec5 = nn.Linear(in_features=256, out_features=784)
def forward(self, x):
x = F.relu(self.enc1(x))
x = F.relu(self.enc2(x))
x = F.relu(self.enc3(x))
x = F.relu(self.enc4(x))
x = F.relu(self.enc5(x))
x = F.relu(self.dec1(x))
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
x = F.relu(self.dec4(x))
x = F.relu(self.dec5(x))
return x
net = Autoencoder()
def make_dir():
image_dir = 'FashionMNIST_Images'
if not os.path.exists(image_dir):
def save_decoded_image(img, epoch):
img = img.view(img.size(0), 1, 224, 224)
save_image(img, './crack.png'.format(epoch))
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)
def train(net, train_loader, NUM_EPOCHS):
train_loss = []
for epoch in range(NUM_EPOCHS):
running_loss = 0.0
for data in train_loader:
img, _ = data
img =
img = img.view(img.size(0), -1)
outputs = net(img)
loss = criterion(outputs, img)
running_loss += loss.item()
loss = running_loss / len(trainloader)
print('Epoch {} of {}, Train Loss: {:.3f}'.format(
epoch+1, NUM_EPOCHS, loss))
if epoch % 5 == 0:
save_decoded_image(outputs.cpu().data, epoch)
return train_loss
def test_image_reconstruction(net, test_loader):
for batch in testloader:
img, _ = batch
img =
img = img.view(img.size(0), -1)
outputs = net(img)
outputs = outputs.view(outputs.size(0), 1, 28, 28).cpu().data
save_image(outputs, 'crack_reconstruction.png')
# load the neural network onto the device
# train the network
train_loss = train(net, train_loader, NUM_EPOCHS)
plt.title('Train Loss')
RuntimeError Traceback (most recent call last)
Cell In[9], line 4
2 make_dir()
3 # train the network
----> 4 train_loss = train(net, train_loader, NUM_EPOCHS)
5 plt.figure()
6 plt.plot(train_loss)
Cell In[7], line 10, in train(net, train_loader, NUM_EPOCHS)
8 img = img.view(img.size(0), -1)
9 optimizer.zero_grad()
---> 10 outputs = net(img)
11 loss = criterion(outputs, img)
12 loss.backward()
File ~\anaconda3\envs\DeepCrack\lib\site-packages\torch\nn\modules\, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
Cell In[4], line 18, in Autoencoder.forward(self, x)
17 def forward(self, x):
---> 18 x = F.relu(self.enc1(x))
19 x = F.relu(self.enc2(x))
20 x = F.relu(self.enc3(x))
File ~\anaconda3\envs\DeepCrack\lib\site-packages\torch\nn\modules\, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
File ~\anaconda3\envs\DeepCrack\lib\site-packages\torch\nn\modules\, in Linear.forward(self, input)
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x150528 and 224x224)


How to solve "RuntimeError: Expected target size [6, 1, 224, 224], got [6, 3, 224, 224]"?

I got this error in DeepCrack. I tried to solve it with several searches but I didn't find any solutions. How can I overcome the target size issue? Firstly, I faced tensor tuple issues, but I applied a stack that solve the tuple problem. Now, I don't understand the size fitting. I don't what is actual size here of CNN is. Can anyone please help me?
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from import random_split
import matplotlib.pyplot as plt
import time
import os
import copy
cudnn.benchmark = True
plt.ion() # interactive mode
from random import *
from tqdm.notebook import tqdm, trange
from time import sleep
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from warnings import filterwarnings
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
## codes for data augmentation
train_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
test_trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5), ## tamim: image will move left and right
transforms.RandomVerticalFlip(p=0.5), ## tamim: image will come to eye vertically
transforms.RandomRotation(degrees=(.5, 5)), ## very small rotation of the cracks
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
## Load data
from torchvision.datasets import ImageFolder
data = ImageFolder('../Data/Data_Structure(Annotated)', transform=train_trans , )
test_folder= ImageFolder("../Data/DATA_iPhone_13_Pro_Max", transform=test_trans, )
batch_size = 6
num_classes = 4
learning_rate = 0.01
num_epochs = 10
print("Follwing classes are there : \n",data.classes)
classes = ('Alligator Cracks', 'Delamination', 'Longitudinal Cracks', 'Transverse Cracks')
##Splitting Data and Prepare Batches:
## Source:
val_size = 127 ## Tamim:30% data for validation ##
train_size = len(data) - val_size
train_loader,val_loader = random_split(data,[train_size,val_size]) ## To randomly split the images into training and testing, PyTorch provides random_split()
print(f"Length of Train Data : {len(train_loader)}") ## changed the folder names
print(f"Length of Validation Data : {len(val_loader)}")
# Splitting train and validation data on batches
train_loader =, shuffle=True, batch_size=batch_size) ## defined train data & val data
val_loader =, shuffle=True, batch_size=batch_size)
test_loader =, shuffle=False, batch_size=batch_size)
# visualize images of a single batch
dataiter = iter(train_loader)
images, labels = next(dataiter)
# show images
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
# model = models.vgg19(pretrained=True)
# print(model)
from torch import nn
import torch
import torch.nn.functional as F
def Conv3X3(in_, out):
return torch.nn.Conv2d(in_, out, 3, padding=1)
class ConvRelu(nn.Module):
def __init__(self, in_, out):
self.conv = Conv3X3(in_, out)
self.activation = torch.nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.activation(x)
return x
class Down(nn.Module):
def __init__(self, nn):
self.nn = nn
self.maxpool_with_argmax = torch.nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True)
def forward(self,inputs):
down = self.nn(inputs)
unpooled_shape = down.size()
outputs, indices = self.maxpool_with_argmax(down)
return outputs, down, indices, unpooled_shape
class Up(nn.Module):
def __init__(self, nn):
self.nn = nn
def forward(self,inputs,indices,output_shape):
outputs = self.unpool(inputs, indices=indices, output_size=output_shape)
outputs = self.nn(outputs)
return outputs
class Fuse(nn.Module):
def __init__(self, nn, scale):
self.nn = nn
self.scale = scale
self.conv = Conv3X3(64,1)
def forward(self,down_inp,up_inp):
outputs =[down_inp, up_inp], 1)
outputs = F.interpolate(outputs, scale_factor=self.scale, mode='bilinear')
outputs = self.nn(outputs)
return self.conv(outputs)
class DeepCrack(nn.Module):
def __init__(self, num_classes=1000):
super(DeepCrack, self).__init__()
self.down1 = Down(torch.nn.Sequential(
self.down2 = Down(torch.nn.Sequential(
self.down3 = Down(torch.nn.Sequential(
self.down4 = Down(torch.nn.Sequential(
ConvRelu(256, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
self.down5 = Down(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
self.up1 = Up(torch.nn.Sequential(
ConvRelu(64, 64),
ConvRelu(64, 64),
self.up2 = Up(torch.nn.Sequential(
ConvRelu(128, 128),
ConvRelu(128, 64),
self.up3 = Up(torch.nn.Sequential(
ConvRelu(256, 256),
ConvRelu(256, 256),
ConvRelu(256, 128),
self.up4 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 256),
self.up5 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
self.fuse5 = Fuse(ConvRelu(512 + 512, 64), scale=16)
self.fuse4 = Fuse(ConvRelu(512 + 256, 64), scale=8)
self.fuse3 = Fuse(ConvRelu(256 + 128, 64), scale=4)
self.fuse2 = Fuse(ConvRelu(128 + 64, 64), scale=2)
self.fuse1 = Fuse(ConvRelu(64 + 64, 64), scale=1) = Conv3X3(5,1)
def forward(self,inputs):
# encoder part
out, down1, indices_1, unpool_shape1 = self.down1(inputs)
out, down2, indices_2, unpool_shape2 = self.down2(out)
out, down3, indices_3, unpool_shape3 = self.down3(out)
out, down4, indices_4, unpool_shape4 = self.down4(out)
out, down5, indices_5, unpool_shape5 = self.down5(out)
# decoder part
up5 = self.up5(out, indices=indices_5, output_shape=unpool_shape5)
up4 = self.up4(up5, indices=indices_4, output_shape=unpool_shape4)
up3 = self.up3(up4, indices=indices_3, output_shape=unpool_shape3)
up2 = self.up2(up3, indices=indices_2, output_shape=unpool_shape2)
up1 = self.up1(up2, indices=indices_1, output_shape=unpool_shape1)
fuse5 = self.fuse5(down_inp=down5,up_inp=up5)
fuse4 = self.fuse4(down_inp=down4, up_inp=up4)
fuse3 = self.fuse3(down_inp=down3, up_inp=up3)
fuse2 = self.fuse2(down_inp=down2, up_inp=up2)
fuse1 = self.fuse1(down_inp=down1, up_inp=up1)
output =[fuse5,fuse4,fuse3,fuse2,fuse1],1))
return output, fuse5, fuse4, fuse3, fuse2, fuse1
if __name__ == '__main__':
inp = torch.randn((1,3,512,512))
model = DeepCrack()
out = model(inp)
model = DeepCrack()
# specify loss function
criterion = nn.CrossEntropyLoss()
# specify loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# number of epochs to train the model
n_epochs = 10
for epoch in range(1, n_epochs+1):
# monitor training loss
train_loss = 0.0
# train the model #
for data in train_loader:
# _ stands in for labels, here
# no need to flatten images
images, _ = data
# clear the gradients of all optimized variables
# forward pass: compute predicted outputs by passing inputs to the model
outputs = model(images)
outputs = torch.stack(outputs, dim=0, out=None) ## Tamim: converted the tuple of tensors to one.
outputs = outputs ## Changed shape
print(outputs.shape) ## Tamim: printed the target tensor shape to see
print(outputs) ## Tamim: printed the target tensors
# calculate the loss
loss = criterion(outputs, images)
# backward pass: compute gradient of the loss with respect to model parameters
# perform a single optimization step (parameter update)
# update running training loss
train_loss += loss.item()*images.size(0)
# print avg training statistics
train_loss = train_loss/len(train_loader)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
Traceback (most recent call last):
File "", line 324, in <module>
loss = criterion(outputs, images)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/modules/", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/modules/", line 1150, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/apps/pkg/pytorch/1.10.2/cuda/lib/python3.8/site-packages/torch/nn/", line 2846, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected target size [6, 1, 224, 224], got [6, 3, 224, 224]

RuntimeError: value cannot be converted to type uint8_t without overflow: -0.192746

I am new to Pytorch and am aiming to do an image classification task using a CNN based on the EMNIST dataset.
I read my data in as follows:
emnist = + '/emnist-letters.mat')
data = emnist ['dataset']
X_train = data ['train'][0, 0]['images'][0, 0]
X_train = X_train.reshape((-1,28,28), order='F')
y_train = data ['train'][0, 0]['labels'][0, 0]
X_test = data ['test'][0, 0]['images'][0, 0]
X_test = X_test.reshape((-1,28,28), order = 'F')
y_test = data ['test'][0, 0]['labels'][0, 0]
train_dataset =, torch.from_numpy(y_train))
test_dataset =, torch.from_numpy(y_test))
batch_size = 128
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
train_loader =,
test_loader =,
Then, I found the following configurations (that I still have to adjust to fit to my data):
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
# Convolution 1
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
self.relu1 = nn.ReLU()
# Max pool 1
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
# Convolution 2
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
self.relu2 = nn.ReLU()
# Max pool 2
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
# Fully connected 1 (readout)
self.fc1 = nn.Linear(32 * 4 * 4, 10)
def forward(self, x):
# Convolution 1
out = self.cnn1(x)
out = self.relu1(out)
# Max pool 1
out = self.maxpool1(out)
# Convolution 2
out = self.cnn2(out)
out = self.relu2(out)
# Max pool 2
out = self.maxpool2(out)
# Resize
# Original size: (100, 32, 7, 7)
# out.size(0): 100
# New out size: (100, 32*7*7)
out = out.view(out.size(0), -1)
# Linear function (readout)
out = self.fc1(out)
return out
model = CNNModel()
criterion = nn.CrossEntropyLoss()
To train the model, I use the following code:
iter = 0
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Add a single channel dimension
# From: [batch_size, height, width]
# To: [batch_size, 1, height, width]
images = images.unsqueeze(1)
# Forward pass to get output/logits
outputs = model(images)
# Clear gradients w.r.t. parameters
# Forward pass to get output/logits
outputs = model(images)
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(outputs, labels)
# Getting gradients w.r.t. parameters
# Updating parameters
iter += 1
if iter % 500 == 0:
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
for images, labels in test_loader:
images = images.unsqueeze(1)
# Forward pass only to get logits/output
outputs = model(images)
# Get predictions from the maximum value
_, predicted = torch.max(, 1)
# Total number of labels
total += labels.size(0)
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
# Print Loss
print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter,[0], accuracy))
However, when I run this, I get the following error:
RuntimeError Traceback (most recent call last)
<ipython-input-27-1fbdd53d1194> in <module>()
13 # Forward pass to get output/logits
---> 14 outputs = model(images)
16 # Clear gradients w.r.t. parameters
4 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ in _conv_forward(self, input, weight)
348 _pair(0), self.dilation, self.groups)
349 return F.conv2d(input, weight, self.bias, self.stride,
--> 350 self.padding, self.dilation, self.groups)
352 def forward(self, input):
RuntimeError: value cannot be converted to type uint8_t without overflow: -0.0510302
I found this question already and think that the solution might work for me as well. However, I don't understand where in my code I can implement this.
What can I do to overcome this problem?
I have used the following import statements:
import scipy .io
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import os
from PIL import Image
from PIL import ImageOps
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from import DataLoader
from torchvision.transforms import ToTensor
from torch.nn import Sequential
from torch.nn import Conv2d
from torch.nn import BatchNorm2d
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import Linear
What fixed my problem was replacing out = self.cnn1(x) with out = self.cnn1(x.float())

Training 1D CNN in Pytorch

I want to train the model given below. I am developing 1D CNN model in PyTorch. Usually we use dataloaders in PyTorch. But I am not using dataloaders for my implementation. I need guidance on how i can train my model in pytorch.
import torch
import torch.nn as nn
import torch.nn.functional as F
class CharCNN(nn.Module):
def __init__(self,num_labels=11):
super(CharCNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(num_channels, depth_1, kernel_size=kernel_size_1, stride=stride_size),
nn.MaxPool1d(kernel_size=kernel_size_1, stride=stride_size),
self.conv2 = nn.Sequential(
nn.Conv1d(depth_1, depth_2, kernel_size=kernel_size_2, stride=stride_size),
nn.MaxPool1d(kernel_size=kernel_size_2, stride=stride_size),
self.fc1 = nn.Sequential(
nn.Linear(depth_2*kernel_size_2, num_hidden),
self.fc2 = nn.Sequential(
nn.Linear(num_hidden, num_labels),
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
# collapse
out = x.view(x.size(0), -1)
# linear layer
out = self.fc1(out)
# output layer
out = self.fc2(out)
#out = self.log_softmax(x,dim=1)
return out
I am training my network like this:
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(),lr=learning_rate)
for e in range(training_epochs):
train_losses = []
for batch in iterate_minibatches(train_x, train_y, batch_size):
x, y = batch
inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
inputs, targets = inputs.cuda(), targets.cuda()
output = model(inputs, batch_size)
loss = criterion(output, targets.long())
val_losses = []
print("Epoch: {}/{}...".format(e+1, training_epochs),
"Train Loss: {:.4f}...".format(np.mean(train_losses)))
But i am getting the following error
TypeError Traceback (most recent call last)
<ipython-input-60-3a3df06ef2f8> in <module>
14 inputs, targets = inputs.cuda(), targets.cuda()
15 opt.zero_grad()
---> 16 output = model(inputs, batch_size)
18 loss = criterion(output, targets.long())
~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\nn\modules\ in __call__(self,
* input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
TypeError: forward() takes 2 positional arguments but 3 were given
Please guide me how i can resolve this issue.
The forward method of your model only takes one argument, but you are calling it with two arguments:
output = model(inputs, batch_size)
It should be:
output = model(inputs)
The time series data uses a 5 element window. The target is a rolling window of 5. The convolution 1d model receives a Sales tensor 3 dimensional structure containing all the sales for a certain duration of time ( The kernel is set at 5 to match the moving window size. input and output are 1. The loss function is calculated over 1000 epochs. The prediction tensor is then converted to a numpy array and displayed comparing it to the actual moving average. I did find iterate_minibatches code but it does not work with time series data because the dimensions are different (32 target vs 36 source)
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
#created a three dimensional tensor
#1. number of samples
#2. number of channels
#3. -1 means infer value from dimension
net = nn.Conv1d(1, 1, 5, bias = False)
optimizer=optim.Adam(net.parameters(), lr=0.01) #l2
X_tensor = torch.Tensor(X).reshape(1, 1, -1)
print("Sales", X_tensor)
y_tensor = torch.Tensor(y).reshape(1, 1, -1)
print("Avg", y_tensor)
ts_tensor = torch.Tensor(X).reshape(1, 1, -1)
kernel = [0.5, 0.5]
kernel_tensor = torch.Tensor(kernel).reshape(1, 1, -1)
print("Kernel", F.conv1d(ts_tensor, kernel_tensor))
for epoch in range(1000):
loss_value = torch.mean((outputs - y_tensor)**2)
running_loss += loss_value.item()
if epoch % 100 == 0:
print('[%d] loss: %.3f' % (epoch, loss_value.item()))
prediction = (net(X_tensor).data).float()
#actual moving average

How to integrate LIME with PyTorch?

Using this mnist image classification model :
%reset -f
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import as data_utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
import torchvision.datasets as dset
import os
import torch.nn.functional as F
import time
import random
import pickle
from sklearn.metrics import confusion_matrix
import pandas as pd
import sklearn
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
root = './data'
if not os.path.exists(root):
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 64
train_loader =
test_loader =
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(28*28, 500)
self.fc2 = nn.Linear(500, 256)
self.fc3 = nn.Linear(256, 2)
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
num_epochs = 2
random_sample_size = 200
values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]
train_loader_subset =
test_loader_subset =
train_loader = train_loader_subset
# Hyper-parameters
input_size = 100
hidden_size = 100
num_classes = 2
# learning_rate = 0.00001
learning_rate = .0001
# Device configuration
device = 'cpu'
print_progress_every_n_epochs = 1
model = NeuralNet().to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
N = len(train_loader)
# Train the model
total_step = len(train_loader)
most_recent_prediction = []
test_actual_predicted_dict = {}
rm = random.sample(list(values_0_or_1), random_sample_size)
train_loader_subset = data_utils.DataLoader(rm, batch_size=4)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader_subset):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels =
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
if (epoch) % print_progress_every_n_epochs == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
predicted_test = []
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
probs_l = []
predicted_values = []
actual_values = []
labels_l = []
with torch.no_grad():
for images, labels in test_loader_subset:
images =
labels =
outputs = model(images)
_, predicted = torch.max(, 1)
sm = torch.nn.Softmax()
probabilities = sm(outputs)
if (epoch) % 1 == 0:
print('test accuracy : ', 100 * len((np.where(np.array(predicted_values[0])==(np.array(actual_values[0])))[0])) / len(actual_values[0]))
I'm to attempting to integrate 'Local Interpretable Model-Agnostic Explanations for machine learning classifiers' :
It appears PyTorch support is not enabled as it is not mentioned in doc and following tutorial :
With my updated code for PyTorch :
from lime import lime_image
import time
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(images[0].reshape(28,28), model(images[0]), top_labels=5, hide_color=0, num_samples=1000)
Causes error :
/opt/conda/lib/python3.6/site-packages/skimage/color/ in gray2rgb(image, alpha)
830 is_rgb = False
831 is_alpha = False
--> 832 dims = np.squeeze(image).ndim
834 if dims == 3:
AttributeError: 'Tensor' object has no attribute 'ndim'
So appears tensorflow object is expected here ?
How to integrate LIME with PyTorch image classification ?
Here's my solution:
Lime expects an image input of type numpy. This is why you get the attribute error and a solution would be to convert the image (from Tensor) to numpy before passing it to the explainer object. Another solution would be to select a specific image with the test_loader_subset and convert it with img = img.numpy().
Secondly, in order to make LIME work with pytorch (or any other framework), you'll need to specify a batch prediction function which outputs the prediction scores of each class for each image. The name of this function (here I've called it batch_predict) is then passed to explainer.explain_instance(img, batch_predict, ...). The batch_predict needs to loop through all images passed to it, convert them to Tensor, make a prediction and finally return the prediction score list (with numpy values). This is how I got it working.
Note also that the images need to have shape (... ,... ,3) or (... ,... ,1) in order to be properly segmented by the default segmentation algorithm. This means that you might have to use np.transpose(img, (...)). You may specify the segmentation algorithm as well if the results are poor.
Finally you'll need to display the LIME image mask on top of the original image. This snippet shows how this may be done:
from skimage.segmentation import mark_boundaries
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False)
img_boundry = mark_boundaries(temp, mask)
This notebook is a good reference:

size mismatch, m1: [3584 x 28], m2: [784 x 128] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:940

I have executed the following code and getting the error shown at extreme bottom. I would like to know how to resolve this. thanks
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import transforms
_tasks = transforms.Compose([
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
from torchvision.datasets import MNIST
mnist = MNIST("data", download=True, train=True, transform=_tasks)
from import DataLoader
from import SubsetRandomSampler
create training and validation split
split = int(0.8 * len(mnist))
index_list = list(range(len(mnist)))
train_idx, valid_idx = index_list[:split], index_list[split:]
create sampler objects using SubsetRandomSampler
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)
create iterator objects for train and valid datasets
trainloader = DataLoader(mnist, batch_size=256, sampler=tr_sampler)
validloader = DataLoader(mnist, batch_size=256, sampler=val_sampler)
Creating model for execution
class Model(nn.Module):
def init(self):
self.hidden = nn.Linear(784, 128)
self.output = nn.Linear(128, 10)
def forward(self, x):
x = self.hidden(x)
x = F.sigmoid(x)
x = self.output(x)
return x
model = Model()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)
for epoch in range(1, 11): ## run the model for 10 epochs
train_loss, valid_loss = [], []
#training part
for data, target in trainloader:
#1. forward propagation
output = model(data)
#2. loss calculation
loss = loss_function(output, target)
#3. backward propagation
#4. weight optimization
# evaluation part
for data, target in validloader:
output = model(data)
loss = loss_function(output, target)
Executing this I am getting the following error :
RuntimeError Traceback (most recent call last) in ()
----> 1 output = model(data) 2 3 ## 2. loss calculation 4 loss = loss_function(output, target) 5
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ in
call(self, *input, **kwargs) 487 result = self._slow_forward(*input,
/usr/local/lib/python3.6/dist-packages/torch/nn/ in
linear(input, weight, bias) 1352 ret =
torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) 1353
-> 1354 output = input.matmul(weight.t()) 1355 if bias is not None: 1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 128] at
Your input MNIST data has shape [256, 1, 28, 28] corresponding to [B, C, H, W]. You need to flatten the input images into a single 784 long vector before feeding it to the Linear layer Linear(784, 128) such that the input becomes [256, 784] corresponding to [B, N], where N is 1x28x28, your image size. This can be done as follows:
for data, target in trainloader:
# Flatten MNIST images into a 784 long vector
data = data.view(data.shape[0], -1)
The same is needed to be done in the validation loop.
