Variation between custom convolution vs pytorch conv2d results? - python-3.x

I am trying to build a custom convolution using the method shown in pytorch unfold function
The custom convolution function is given below:
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import math
from torch.nn.modules.utils import _pair
class customConv(nn.Module):
def __init__(self, n_channels, out_channels, kernel_size, dilation=1, padding=0, stride=1, bias=True):
super(customConv, self).__init__()
self.kernel_size = _pair(kernel_size)
self.out_channels = out_channels
self.dilation = _pair(dilation)
self.padding = _pair(padding)
self.stride = _pair(stride)
self.n_channels = n_channels
self.weight = Parameter(torch.Tensor(self.out_channels, self.n_channels, self.kernel_size[0], self.kernel_size[1]))
if bias:
self.bias = Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.n_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input_):
hout = ((input_.shape[2] + 2 * self.padding[0] - self.dilation[0] * (self.kernel_size[0]-1)-1)//self.stride[0])+1
wout = ((input_.shape[3] + 2 * self.padding[1] - self.dilation[1] * (self.kernel_size[1]-1)-1)//self.stride[1])+1
inputUnfolded = F.unfold(input_, kernel_size=self.kernel_size, padding=self.padding, dilation=self.dilation, stride=self.stride)
if self.bias:
convolvedOutput = (inputUnfolded.transpose(1, 2).matmul(
self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)) + self.bias.view(-1, 1)
else:
convolvedOutput = (inputUnfolded.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2))
convolutionReconstruction = convolvedOutput.view(input_.shape[0], self.out_channels, hout, wout)
return convolutionReconstruction
But when I try comparing it with the pytorch implementation, I do not get the exact value. The code to check for difference is provided below
import torch
from torch import nn
from customConvolve import customConv
torch.manual_seed(1)
input = torch.randn (10,3,64,64)
conv1 = nn.Conv2d(input.shape[1],5, kernel_size=3, dilation=1, padding=1, stride=1 ,bias = False)
conv1_output = conv1(input)
conv2 = customConv(n_channels=input.shape[1], out_channels=5, kernel_size=3, dilation=1, stride =1, padding = 1, bias = False)
conv2_output = conv2(input)
print(torch.equal(conv1.weight.data, conv2.weight.data))
print(torch.equal(conv1_output, conv2_output))
I would like to know why the variation exists and how to solve this?
Thank you.

Related

How can I connect between layers in two network by python and pytorch?

For speaker verification from speech, I want to impellent bellow figure. I have some question please guide me.
First I am defining a class for first 4 layers (Code model #1). Then I am using ECAPA-TDNN code from github (code model #2 – link: https://github.com/TaoRuijie/ECAPA-TDNN ). I don’t know how can I connect between two models 1 and 2 or in the other hand, how can I impelement bellow figure?
Model #1 : The first 4 layer code:
import torch.nn as nn
class NeuralNetwork(nn.Module):
def __init__(self, num_class):
super(NeuralNetwork, self).__init__()
self.conv1 = nn.Sequential(nn.Conv2d(1, 80, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
self.conv2 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv3 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv4 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv5 = nn.Sequential(nn.Conv2d(128, 20, T = flatten),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
Model #2 : ECAPA-TDNN form git-hub:
import math, torch, torchaudio
import torch.nn as nn
import torch.nn.functional as F
class ECAPA_TDNN(nn.Module):
def __init__(self, C):
super(ECAPA_TDNN, self).__init__()
self.torchfbank = torch.nn.Sequential(
PreEmphasis(),
torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_fft=512, win_length=400, hop_length=160, \
f_min = 20, f_max = 7600, window_fn=torch.hamming_window, n_mels=80),
)
self.specaug = FbankAug() # Spec augmentation
self.conv1 = nn.Conv1d(80, C, kernel_size=5, stride=1, padding=2)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(C)
self.layer1 = Bottle2neck(C, C, kernel_size=3, dilation=2, scale=8)
self.layer2 = Bottle2neck(C, C, kernel_size=3, dilation=3, scale=8)
self.layer3 = Bottle2neck(C, C, kernel_size=3, dilation=4, scale=8)
# I fixed the shape of the output from MFA layer, that is close to the setting from ECAPA paper.
self.layer4 = nn.Conv1d(3*C, 1536, kernel_size=1)
self.attention = nn.Sequential(
nn.Conv1d(4608, 256, kernel_size=1),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Tanh(), # I add this layer
nn.Conv1d(256, 1536, kernel_size=1),
nn.Softmax(dim=2),
)
self.bn5 = nn.BatchNorm1d(3072)
self.fc6 = nn.Linear(3072, 192)
self.bn6 = nn.BatchNorm1d(192)
def forward(self, x, aug):
with torch.no_grad():
x = self.torchfbank(x)+1e-6
x = x.log()
x = x - torch.mean(x, dim=-1, keepdim=True)
if aug == True:
x = self.specaug(x)
x = self.conv1(x)
x = self.relu(x)
x = self.bn1(x)
x1 = self.layer1(x)
x2 = self.layer2(x+x1)
x3 = self.layer3(x+x1+x2)
x = self.layer4(torch.cat((x1,x2,x3),dim=1))
x = self.relu(x)
t = x.size()[-1]
global_x = torch.cat((x,torch.mean(x,dim=2,keepdim=True).repeat(1,1,t), torch.sqrt(torch.var(x,dim=2,keepdim=True).clamp(min=1e-4)).repeat(1,1,t)), dim=1)
w = self.attention(global_x)
mu = torch.sum(x * w, dim=2)
sg = torch.sqrt( ( torch.sum((x**2) * w, dim=2) - mu**2 ).clamp(min=1e-4) )
x = torch.cat((mu,sg),1)
x = self.bn5(x)
x = self.fc6(x)
x = self.bn6(x)
return x
2. I am using log Mel filterbank energy for feature extraction. My dimension isn’t 80. How can I define 80 for dimension?(by batch-size? How?)
3. C is constant (128) but how can I calculate T? for one speech signal I used bellow code But for all speech in my dataset, second code didn’t work. Or T is another thing?
# Number of Channel
ch = file_info.channels('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (ch)
# Number of Frame in One Audio File
frame_one_wav = audiosegment.from_file('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (frame_one_wav)
# All Frame
all_frame = []
for i in range (number_of_files_train):
wavs_info = rate_train, sig_train = wavfile.read(wavs_train[i])
frame_wav = audiosegment.from_file(wavs_info)
all_frame.append(frame_wav())

Pytorch Transformer won't train due to tensor sizes

I tried following this tutorial for transformers:
https://www.youtube.com/watch?v=U0s0f995w14
However, when I try to train the code with my own vectors, I get the following error message:
Traceback (most recent call last):
File >"C:\Users\rreichel\Desktop\Smaragd_local\Programming\Scripts\Transformer_se>lfbuilt.py", line 279, in
loss = loss_func(outputs, target)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\modules\loss.py", line 1047, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\functional.py", line 2693, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, >ignore_index, None, reduction)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\functional.py", line 2397, in nll_loss
raise ValueError("Expected target size {}, got {}".format(out_size, >target.size()))
ValueError: Expected target size (3, 199), got torch.Size([3, 119])
when calculating the loss during training.
The code:
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 6 08:13:38 2021
#author: rreichel
"""
import torch
import torch.nn as nn
import pickle
import glob
import os
from SelfbuiltDataset_test import myDataset
import torch.optim as optim
class SelfAttention(nn.Module):
def __init__(self, embed_size, heads):
super(SelfAttention, self).__init__()
self.embed_size = embed_size
self.heads = heads
self.head_dim = embed_size // heads
assert(self.head_dim * heads == embed_size
), "Embedding size needs to be divisible by heads"
self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.fc_out = nn.Linear(heads * self.head_dim, embed_size)
def forward(self, values, keys, query, mask):
#Get number of training examples
N = query.shape[0]
value_len, key_len, query_len = values.shape[1], keys.shape[1], \
query.shape[1]
#Split the embedding into self.heads different pieces
values = values.reshape(N, value_len, self.heads, self.head_dim)
keys = keys.reshape(N, key_len, self.heads, self.head_dim)
query = query.reshape(N, query_len, self.heads, self.head_dim)
#(N, value_len, heads, head_dim)
values = self.values(values)
#(N, key_len, heads, head_dim)
keys = self.keys(keys)
#(N, query_len, heads, heads_dim)
queries = self.queries(query)
energy = torch.einsum("nqhd, nkhd -> nhqk", [queries, keys])
#queries shape: (N, query_len, heads, heads_dim),
#keys shape: (N, key_len, heads, heads_dim)
#energy: (N, heads, query_len, key_len)
#Mask padded indices so their weights become 0
if mask is not None:
energy = energy.masked_fill(mask == 0, float("-1e20"))
#Normalize energy values
attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)
#attention shape: (N, heads, query_len, key_len)
out = torch.einsum("nhql, nlhd -> nqhd", [attention, values]).reshape(
N, query_len, self.heads * self.head_dim)
#attention shape: (N, heads, query_len, key_len)
#values shape: (N, value_len, heads, heads_dim)
#out after matrix multiply: (N, query_len, heads, head_dim), then
#we reshape and flatten the last two dimensions.
out = self.fc_out(out)
return out
class TransformerBlock(nn.Module):
def __init__(self, embed_size, heads, dropout, forward_expansion):
super(TransformerBlock, self).__init__()
self.attention = SelfAttention(embed_size, heads)
self.norm1 = nn.LayerNorm(embed_size)
self.norm2 = nn.LayerNorm(embed_size)
self.feed_forward = nn.Sequential(
nn.Linear(embed_size, forward_expansion * embed_size),
nn.ReLU(),
nn.Linear(forward_expansion * embed_size, embed_size))
self.dropout = nn.Dropout(dropout)
def forward(self, value, key, query, mask):
attention = self.attention(value, key, query, mask)
# Add skip connection, run through normalization and finally dropout
x = self.dropout(self.norm1(attention + query))
forward = self.feed_forward(x)
out = self.dropout(self.norm2(forward + x))
return out
class Encoder(nn.Module):
def __init__(self, src_vocab_size, embed_size, num_layers, heads, device,
forward_expansion, dropout, max_length):
super(Encoder, self).__init__()
self.embed_size = embed_size
self.device = device
self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
self.position_embedding = nn.Embedding(max_length, embed_size)
self.layers = nn.ModuleList([TransformerBlock(embed_size, heads,
dropout=dropout, forward_expansion=forward_expansion)
for _ in range(num_layers)])
self.dropout = nn.Dropout(dropout)
def forward(self, x, mask):
N, seq_length = x.shape
positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
out = self.dropout(
(self.word_embedding(x) +
self.position_embedding(positions))
)
#In the Encoder the query, key, value are all the same, it's in the
#decoder this will change. This might look a bit odd in this case.
for layer in self.layers:
out = layer(out, out, out, mask)
return out
class DecoderBlock(nn.Module):
def __init__(self, embed_size, heads, forward_expansion, dropout, device):
super(DecoderBlock, self).__init__()
self.norm = nn.LayerNorm(embed_size)
self.attention = SelfAttention(embed_size, heads=heads)
self.transformer_block = TransformerBlock(embed_size, heads, dropout,
forward_expansion)
self.dropout = nn.Dropout(dropout)
def forward(self, x, value, key, src_mask, trg_mask):
attention = self.attention(x, x, x, trg_mask)
query = self.dropout(self.norm(attention + x))
out = self.transformer_block(value, key, query, src_mask)
return out
class Decoder(nn.Module):
def __init__(self, trg_vocab_size, embed_size, num_layers, heads,
forward_expansion, dropout, device, max_length):
super(Decoder, self).__init__()
self.device = device
self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
self.position_embedding = nn.Embedding(max_length, embed_size)
self.layers = nn.ModuleList([DecoderBlock(embed_size, heads,
forward_expansion, dropout,
device)
for _ in range(num_layers)])
self.fc_out = nn.Linear(embed_size, trg_vocab_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x, enc_out, src_mask, trg_mask):
N, seq_length = x.shape
positions = torch.arange(0, seq_length).expand(N,seq_length).to(self.device)
x = self.dropout((self.word_embedding(x) +
self.position_embedding(positions)))
for layer in self.layers:
x = layer(x, enc_out, enc_out, src_mask, trg_mask)
out = self.fc_out(x)
return out
class Transformer(nn.Module):
def __init__(self, src_vocab_size, trg_vocab_size, src_pad_idx,
trg_pad_idx, embed_size=512, num_layers=6,
forward_expansion=4, heads=8, dropout=0, device="cpu",
max_length=100):
super(Transformer, self).__init__()
self.encoder = Encoder(src_vocab_size, embed_size, num_layers, heads,
device, forward_expansion, dropout, max_length)
self.decoder = Decoder(trg_vocab_size, embed_size, num_layers, heads,
forward_expansion, dropout, device, max_length)
self.src_pad_idx = src_pad_idx
self.trg_pad_idx = trg_pad_idx
self.device = device
def make_src_mask(self, src):
#(N, 1, 1, src_len)
src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
return src_mask.to(self.device)
def make_trg_mask(self, trg):
N, trg_len = trg.shape
trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1,
trg_len, trg_len)
return trg_mask.to(self.device)
def forward(self, src, trg):
src_mask = self.make_src_mask(src)
trg_mask = self.make_trg_mask(trg)
enc_src = self.encoder(src, src_mask)
out = self.decoder(trg, enc_src, src_mask, trg_mask)
return out
def nextMultiple(n, x):
n = n + x / 2
n = n - (n % x)
return int(n)
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"""
#This shit are the one-hot encoded sentences (word 1, word 4 etc. as sentence)
train = torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0, 1, 11],
[1, 8, 7, 3, 4, 5, 6, 11, 2, 1, 3]]).to(device)
target = torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0, 2, 2],
[1, 5, 6, 2, 4, 7, 6, 2, 9, 1]]).to(device)
max_len = max([len(x) for x in train]) + 1
"""
#Loading in data
data = pickle.load(open('Testdaten.pkl', 'rb'))
tmp = myDataset(data, 'POS')
#Calculating maximum sentence length (+ 1 because of start tag)
max_len = max([len(x) for x in tmp.sent_encoded]) + 1
pad_element = len(tmp.lookup_words)
#Padding everything out to maximum sentence length
train_tmp = []
for sent in tmp.sent_encoded:
train_tmp.append([pad_element] + sent + [pad_element] * (max_len - len(sent) - 1))
target_tmp = []
for sent in tmp.tags_encoded:
target_tmp.append(sent + [pad_element] * (max_len - len(sent) - 1))
#Creating tensors for model
train = torch.squeeze(torch.tensor(train_tmp))
target = torch.squeeze(torch.tensor(target_tmp))
#"""
src_pad_idx = 0
trg_pad_idx = 0
src_vocab_size = int(torch.max(train)) + 1
trg_vocab_size = int(torch.max(target)) + 1
heads = 8
es = nextMultiple(max(src_vocab_size, trg_vocab_size), heads)
model = Transformer(src_vocab_size, trg_vocab_size, src_pad_idx,
trg_pad_idx, es, 3, 2, heads, 0.1, device,
max_len).to(device)
#Defining loss function and optimizer
lr = 0.001
num_epochs = 2
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss_func = nn.CrossEntropyLoss()
# optimization algorithm
optimizer = optim.Adam(model.parameters(), lr=lr)
# train and evaluation
for cnt in range(num_epochs):
optimizer.zero_grad()
outputs = model(train, target)
outputs = outputs
#Outputs now are size[3, 119, 119]
#CrossEntropyLoss mag one-hot-encoding nicht, how to deal with this?
loss = loss_func(outputs, target)
loss.backward()
optimizer.step()
#out = model(train, target)
#print(out.shape)
I am confused since the code works with the vectors from the tutorial, but once I try to run the model with my own vocabulary, it produces this strange error. The data is just integer values encoding the corresponding words, e.g. "Hello World" would result in the training vector [1 2].
There is no differences between my data and the data from the tutorial as far as I can see. The tensor types are the same (Torch.LongTensor), they are both integer values and in a specified range. The difference is in dimensionality, the tutorial uses vectors with dimension (2, 10), while mine are (3, 199).
Also, I am sorry, but I can't reduce the code any more since otherwise, the error might not be reproduceable.
Did anyone encounter this error before?

While running pytorch i got an error 'TypeError: object of type'CatsAndDogsDataset' has no len()' and I want to know how to fix it

I got an error while running pytorch
I train artificial intelligence with ResNet, and I wrote my own custom dataset for the dataset. After loading the data set from ResnNet, training data and test data were set separately by learning with artificial intelligence. But even though I ran it, an error occurred, but I don't know what kind of problem occurred.
Next, I will attach my ResNet.py code and my own data set CustomDataset.py code.
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets, models
from customDataset import CatsAndDogsDataset
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
EPOCHS = 3
BATCH_SIZE = 10
dataset = CatsAndDogsDataset(csv_file = 'cats_dogs.csv', root_dir = 'cats_dogs_resized',transform = transforms.ToTensor())
train_set, test_set = torch.utils.data.random_split(dataset, [28,4])
train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=True)
class BasicBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(nn.Conv2d(in_planes, planes,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 16
self.conv1 = nn.Conv2d(3, 16, kernel_size=3,stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.layer1 = self._make_layer(16, 2, stride=1)
self.layer2 = self._make_layer(32, 2, stride=2)
self.layer3 = self._make_layer(64, 2, stride=2)
self.linear = nn.Linear(64, num_classes)
def _make_layer(self, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(BasicBlock(self.in_planes, planes, stride))
self.in_planes = planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
model = ResNet().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1,momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
print(model)
def train(model, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(DEVICE), target.to(DEVICE)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
def evaluate(model, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(DEVICE), target.to(DEVICE)
output = model(data)
test_loss += F.cross_entropy(output, target,reduction='sum').item()
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= length(test_loader.dataset)
test_accuracy = 100. * correct / length(test_loader.dataset)
return test_loss, test_accuracy
for epoch in range(1, EPOCHS + 1):
scheduler.step()
train(model, train_loader, optimizer, epoch)
test_loss, test_accuracy = evaluate(model, test_loader)
print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
epoch, test_loss, test_accuracy))
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from skimage import io
class CatsAndDogsDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __length__(self):
return length(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
image = io.imread(img_path)
y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
if self.transform:
image = self.transform(image)
return (image, ylabel)
In this way, if I write the code and then run it
TypeError: object of type'CatsAndDogsDataset' has no len()
I wonder why I can't have len(). In addition, an error occurred in the result of running Backend.ai instead of pycharm, but the error content is
Cannot verify that dataset is Sized
if sum(lengths) != len(dataset):
It is raise ValueError("sum of input lengths does not equal the length of the input dataset!").
Error appears. Is there a workaround? help me plz
You need to define the function __len__ for your custom dataset (which you seem to have currently incorrectly defined as __length__).
This documentation provides details. Relevant excerpt:
torch.utils.data.Dataset is an abstract class representing a dataset.
Your custom dataset should inherit Dataset and override the following
methods:
__len__ so that len(dataset) returns the size of the dataset.
__getitem__ to support the indexing such that dataset[i] can be used to get i th sample.

how can the the tensor matmul shape error be removed?

I am playing with pytorch ZOO AAE. I replaced the MNIST dataset with my own. However, getting an error which says : mat1 dim 1 must match mat2 dim 0 eventhough I am resizing the image in my dataloader. Here is my code. I am using ImageFolder to read my custom dataset. I made sure I am using opt.shape and opt.batch_size. There is also a resize on the dataloader.
import argparse
import os
import numpy as np
import math
import itertools
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch
os.makedirs("images", exist_ok=True)
parser = argparse.ArgumentParser()
parser.add_argument("--n_epochs", type=int, default=200, help="number of epochs of training")
parser.add_argument("--batch_size", type=int, default=64, help="size of the batches")
parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
parser.add_argument("--latent_dim", type=int, default=10, help="dimensionality of the latent code")
parser.add_argument("--img_size", type=int, default=32, help="size of each image dimension")
parser.add_argument("--channels", type=int, default=1, help="number of image channels")
parser.add_argument("--sample_interval", type=int, default=400, help="interval between image sampling")
opt = parser.parse_args()
print(opt)
img_shape = (opt.channels, opt.img_size, opt.img_size)
cuda = True if torch.cuda.is_available() else False
def reparameterization(mu, logvar):
std = torch.exp(logvar / 2)
sampled_z = Variable(Tensor(np.random.normal(0, 1, (mu.size(0), opt.latent_dim))))
z = sampled_z * std + mu
return z
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.model = nn.Sequential(
nn.Linear(int(np.prod(img_shape)), 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU(0.2, inplace=True),
)
self.mu = nn.Linear(512, opt.latent_dim)
self.logvar = nn.Linear(512, opt.latent_dim)
def forward(self, img):
img_flat = img.view(img.shape[0], -1)
x = self.model(img_flat)
mu = self.mu(x)
logvar = self.logvar(x)
z = reparameterization(mu, logvar)
return z
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.model = nn.Sequential(
nn.Linear(opt.latent_dim, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, int(np.prod(img_shape))),
nn.Tanh(),
)
def forward(self, z):
img_flat = self.model(z)
img = img_flat.view(img_flat.shape[0], *img_shape)
return img
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.model = nn.Sequential(
nn.Linear(opt.latent_dim, 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 256),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(256, 1),
nn.Sigmoid(),
)
def forward(self, z):
validity = self.model(z)
return validity
# Use binary cross-entropy loss
adversarial_loss = torch.nn.BCELoss()
pixelwise_loss = torch.nn.L1Loss()
# Initialize generator and discriminator
encoder = Encoder()
decoder = Decoder()
discriminator = Discriminator()
if cuda:
encoder.cuda()
decoder.cuda()
discriminator.cuda()
adversarial_loss.cuda()
pixelwise_loss.cuda()
from torchvision.datasets import ImageFolder
# Configure data loader
os.makedirs("../../data/mnist", exist_ok=True)
import torchvision
import PIL
dataloader = torch.utils.data.DataLoader(
ImageFolder('./content', transform=transforms.Compose([
transforms.Resize(opt.img_size),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])]))
,
batch_size=opt.batch_size,
shuffle=True,
)
# Optimizers
optimizer_G = torch.optim.Adam(
itertools.chain(encoder.parameters(), decoder.parameters()), lr=opt.lr, betas=(opt.b1, opt.b2)
)
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
def sample_image(n_row, batches_done):
"""Saves a grid of generated digits"""
# Sample noise
z = Variable(Tensor(np.random.normal(0, 1, (n_row ** 2, opt.latent_dim))))
gen_imgs = decoder(z)
save_image(gen_imgs.data, "images/%d.png" % batches_done, nrow=n_row, normalize=True)
# ----------
# Training
# ----------
for epoch in range(opt.n_epochs):
for i, (imgs, _) in enumerate(dataloader):
# Adversarial ground truths
valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(Tensor))
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
encoded_imgs = encoder(real_imgs)
decoded_imgs = decoder(encoded_imgs)
# Loss measures generator's ability to fool the discriminator
g_loss = 0.001 * adversarial_loss(discriminator(encoded_imgs), valid) + 0.999 * pixelwise_loss(
decoded_imgs, real_imgs
)
g_loss.backward()
optimizer_G.step()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Sample noise as discriminator ground truth
z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim))))
# Measure discriminator's ability to classify real from generated samples
real_loss = adversarial_loss(discriminator(z), valid)
fake_loss = adversarial_loss(discriminator(encoded_imgs.detach()), fake)
d_loss = 0.5 * (real_loss + fake_loss)
d_loss.backward()
optimizer_D.step()
print(
"[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
% (epoch, opt.n_epochs, i, len(dataloader), d_loss.item(), g_loss.item())
)
batches_done = epoch * len(dataloader) + i
if batches_done % opt.sample_interval == 0:
sample_image(n_row=10, batches_done=batches_done)
Its because you are loading 3 channels.You have to use single channel.
add this method as well transforms.Grayscale() like i have added below
dataloader = torch.utils.data.DataLoader(
ImageFolder('./content', transform=transforms.Compose([transforms.Grayscale(),
transforms.Resize(opt.img_size),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])]))
,
batch_size=opt.batch_size,
shuffle=True,
)

Change input shape dimensions for ResNet model (pytorch)

I want to feed my 3,320,320 pictures in an existing ResNet model. The model actually expects input of size 3,32,32. As I am afraid of loosing information I don't simply want to resize my pictures.
What is the best way to preprocess my images, so that they are able to run on the ResNet34?
Should I add additional layers in the forward method of ResNet? If yes, what would be a suitable combination in my case?
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_fitmodule import FitModule
from torch.autograd import Variable
import numpy as np
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(FitModule):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(in_planes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion * planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(FitModule):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = conv3x3(3, 64)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x): # add additional layers here?
x = x.float()
out = F.relu(self.bn1(self.conv1(x).float()).float())
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
Thanks plenty!
Regards,
Fabian
If you change your avg_pool operation to 'AdaptiveAvgPool2d' your model will work for any image size.
However with your current setup, your 320x320 images would be 40x40 going into the pooling stage, which is a large feature map to pool over. Consider adding more conv layers.

Resources