I'm looking to re-implement in Pytorch the following WGAN-GP model,
taken by this paper.
The original implementation was in tensorflow. Apart from minor issues which require me to modify subtle details, since torch seems not supporting padding='same' for strided convolutions, my implementation is the following:
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.disc = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=32, kernel_size = 3, stride = (1, 1),padding='same'),
self._block(in_channels=32, out_channels=32, kernel_size=3, stride=(2,1), padding=(1,1)),
self._block(in_channels=32, out_channels=64, kernel_size = 3, stride = (1, 1),padding='same'),
self._block(in_channels=64, out_channels=64, kernel_size = 3, stride = (2, 1),padding=(1,1)),
self._block(in_channels=64, out_channels=128, kernel_size = 3, stride = (1, 1),padding='same'),
self._block(in_channels=128, out_channels=128, kernel_size = 3, stride = (2, 1),padding=(1,1)),
self._block(in_channels=128, out_channels=256, kernel_size=5, stride=(2,2),padding=(2,2))
)
self.lin = nn.Linear(256*6*4,1)
#unifies Conv2d leakyrelu and batchnorm
def _block(self, in_channels,
out_channels,
kernel_size, stride, padding):
return nn.Sequential(nn.Conv2d(in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2)) #bias false as we use batchnorm
def forward(self, x):
x = self.disc(x)
x = x.view(-1,256*6*4)
return self.lin(x)
class Generator(nn.Module):
def __init__(self, z_dim):
super(Generator, self).__init__()
self.z_dim = z_dim
self.lin1 = nn.Linear(z_dim, 6*4*256)
self.gen = nn.Sequential(
self._block(in_channels=256, out_channels=128, kernel_size=(5,4),stride=(2,2),padding=(2,1)),
self._block(in_channels=128, out_channels=128, kernel_size=(4,3), stride=(2,1),padding=(1,1)),
self._block(in_channels=128, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
self._block(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
self._block(in_channels=64, out_channels=64, kernel_size=(3,2), stride=(2,2), padding=(1,4)),
self._block(in_channels=64, out_channels=32, kernel_size=(3,3), stride=(1,1), padding=(1,1)),
self._block(in_channels=32, out_channels=32, kernel_size=3, stride=(2,1),padding=(1,1)),
self._block(in_channels=32, out_channels=1, kernel_size=3, stride=(1,1),padding=(1,1)),
nn.Sigmoid()
)
def _block(self, in_channels, out_channels,kernel_size, stride,padding):
return nn.Sequential(
nn.ConvTranspose2d(in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(), #they use relu in the generator
)
def forward(self, x):
x = x.view(-1, 128)
x = self.lin1(x)
x = x.view(-1,256,6,4)
return self.gen(x)
The inputs (real/fake/) have shape (batch_size, 1, 85, 8) and consist of very sparse one-hot matrices.
Now, with the above models, during the first training batches I have very bad errors for both loss G and loss D
Epoch [0/5] Batch 0/84 Loss D: -34.0230, loss G: 132.8942
Epoch [0/5] Batch 1/84 Loss D: -3080.0264, loss G: 601.3990
Epoch [0/5] Batch 2/84 Loss D: -216907.8125, loss G: 872.5948
Epoch [0/5] Batch 3/84 Loss D: -26314.8633, loss G: 4973.5327
Epoch [0/5] Batch 4/84 Loss D: -1000911.5000, loss G: 6153.7974
Epoch [0/5] Batch 5/84 Loss D: -14484664.0000, loss G: -5013.7808
Epoch [0/5] Batch 6/84 Loss D: -5119665.0000, loss G: -7194.0640
Epoch [0/5] Batch 7/84 Loss D: -25285320.0000, loss G: 20130.0801
Epoch [0/5] Batch 8/84 Loss D: -11411679.0000, loss G: 32655.1016
Epoch [0/5] Batch 9/84 Loss D: -18403266.0000, loss G: 37912.0469
Epoch [0/5] Batch 10/84 Loss D: -6191229.0000, loss G: 33614.3828
Epoch [0/5] Batch 11/84 Loss D: -8119311.0000, loss G: 28472.3496
Epoch [0/5] Batch 12/84 Loss D: -134419216.0000, loss G: 18065.1074
Epoch [0/5] Batch 13/84 Loss D: -123661928.0000, loss G: 71028.8984
Epoch [0/5] Batch 14/84 Loss D: -2723217.0000, loss G: 47931.0195
Epoch [0/5] Batch 15/84 Loss D: -806806.1250, loss G: 41759.3555
Even though these are just the first batches of the first epoch, the losses seem too large to me and I suspect something's wrong with my implementation. Or can be normal to obtain such numbers for the WGAN losses at first batches? I'm asking cause I have no huge experience with such architectures.
If the models look OK, should I upload my training loop for further discussion?
Related
I am trying to train a GAN model on anime face Dataset to generate anime faces. Here's my code-
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as T
import os
import torch
import torch.nn as nn
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
%matplotlib inline
def denorm(img_tensors):
return img_tensors * stats[1][0] + stats[0][0]
def show_images(images, nmax=64):
fig, ax = plt.subplots(figsize=(8, 8))
ax.set_xticks([]); ax.set_yticks([])
ax.imshow(make_grid(denorm(images.detach()[:nmax]), nrow=8).permute(1, 2, 0))
def show_batch(dl, nmax=64):
for images, _ in dl:
show_images(images, nmax)
break
def get_default_device():
"""Pick GPU if available, else CPU"""
if torch.cuda.is_available():
return torch.device('cuda')
else:
return torch.device('cpu')
def to_device(data, device):
"""Move tensor(s) to chosen device"""
if isinstance(data, (list,tuple)):
return [to_device(x, device) for x in data]
return data.to(device, non_blocking=True)
class DeviceDataLoader():
"""Wrap a dataloader to move data to a device"""
def __init__(self, dl, device):
self.dl = dl
self.device = device
def __iter__(self):
"""Yield a batch of data after moving it to device"""
for b in self.dl:
yield to_device(b, self.device)
def __len__(self):
"""Number of batches"""
return len(self.dl)
device = get_default_device()
device
train_dl = DeviceDataLoader(train_dl, device)
discriminator = nn.Sequential(
# in: 3 x 64 x 64
nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace=True),
# out: 64 x 32 x 32
nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace=True),
# out: 128 x 16 x 16
nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True),
# out: 256 x 8 x 8
nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True),
# out: 512 x 4 x 4
nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),
# out: 1 x 1 x 1
nn.Flatten(),
nn.Sigmoid())
discriminator = to_device(discriminator, device)
latent_size = 128
generator = nn.Sequential(
# in: latent_size x 1 x 1
nn.ConvTranspose2d(latent_size, 512, kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(512),
nn.ReLU(True),
# out: 512 x 4 x 4
nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(True),
# out: 256 x 8 x 8
nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(True),
# out: 128 x 16 x 16
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(True),
# out: 64 x 32 x 32
nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1, bias=False),
nn.Tanh()
# out: 3 x 64 x 64
)
xb = torch.randn(batch_size, latent_size, 1, 1) # random latent tensors
fake_images = generator(xb)
print(fake_images.shape)
show_images(fake_images)
generator = to_device(generator, device)
def train_discriminator(real_images, opt_d):
# Clear discriminator gradients
opt_d.zero_grad()
# Pass real images through discriminator
real_preds = discriminator(real_images)
real_targets = torch.ones(real_images.size(0), 1, device=device)
real_loss = F.binary_cross_entropy(real_preds, real_targets) # here nn.BCELoss() not working
real_score = torch.mean(real_preds).item()
# Generate fake images
latent = torch.randn(batch_size, latent_size, 1, 1, device=device)
fake_images = generator(latent)
# Pass fake images through discriminator
fake_targets = torch.zeros(fake_images.size(0), 1, device=device)
fake_preds = discriminator(fake_images)
fake_loss = F.binary_cross_entropy(fake_preds, fake_targets) # here nn.BCELoss() not working
fake_score = torch.mean(fake_preds).item()
# Update discriminator weights
loss = real_loss + fake_loss
loss.backward()
opt_d.step()
return loss.item(), real_score, fake_score
def train_generator(opt_g):
# Clear generator gradients
opt_g.zero_grad()
# Generate fake images
latent = torch.randn(batch_size, latent_size, 1, 1, device=device)
fake_images = generator(latent)
# Try to fool the discriminator
preds = discriminator(fake_images)
targets = torch.ones(batch_size, 1, device=device)
loss = F.binary_cross_entropy(preds, targets) # here nn.BCELoss() not working
# Update generator weights
loss.backward()
opt_g.step()
return loss.item()
from torchvision.utils import save_image
sample_dir = 'generated'
os.makedirs(sample_dir, exist_ok=True)
def save_samples(index, latent_tensors, show=True):
fake_images = generator(latent_tensors)
fake_fname = 'generated-images-{0:0=4d}.png'.format(index)
save_image(denorm(fake_images), os.path.join(sample_dir, fake_fname), nrow=8)
print('Saving', fake_fname)
if show:
fig, ax = plt.subplots(figsize=(8, 8))
ax.set_xticks([]); ax.set_yticks([])
ax.imshow(make_grid(fake_images.cpu().detach(), nrow=8).permute(1, 2, 0))
fixed_latent = torch.randn(64, latent_size, 1, 1, device=device)
save_samples(0, fixed_latent)
from tqdm.notebook import tqdm
import torch.nn.functional as F
def fit(epochs, lr, start_idx=1):
torch.cuda.empty_cache()
# Losses & scores
losses_g = []
losses_d = []
real_scores = []
fake_scores = []
# Create optimizers
opt_d = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))
opt_g = torch.optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
for epoch in range(epochs):
for real_images, _ in tqdm(train_dl):
# Train discriminator
loss_d, real_score, fake_score = train_discriminator(real_images, opt_d)
# Train generator
loss_g = train_generator(opt_g)
# Record losses & scores
losses_g.append(loss_g)
losses_d.append(loss_d)
real_scores.append(real_score)
fake_scores.append(fake_score)
# Log losses & scores (last batch)
print("Epoch [{}/{}], loss_g: {:.4f}, loss_d: {:.4f}, real_score: {:.4f}, fake_score: {:.4f}".format(
epoch+1, epochs, loss_g, loss_d, real_score, fake_score))
# Save generated images
save_samples(epoch+start_idx, fixed_latent, show=False)
return losses_g, losses_d, real_scores, fake_scores
lr = 0.0002
epochs = 95
history = fit(epochs, lr)
The above code is working fine but before I was using nn.BCELoss from torch instead of binary_cross_entropy from torch.nn.functional in 'train_generator()' and 'train_discriminator()' methods above and I was getting the following error,
RuntimeError: Boolean value of Tensor with more than one value is ambiguous
I wonder if they both don't perform the same operation. Can you help me to understand the problem?
nn.BCELoss is a class. Unlike nn.functional.binary_cross_entropy, you have to instantiate it first before using it to calculate the loss. In you case,
F.binary_cross_entropy(preds, targets)
is equivalent to
nn.BCELoss()(preds, targets)
I am un able to find error input 32*32 gray images:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1, # gray-scale images
out_channels=16,
kernel_size=5, # 5x5 convolutional kernel
stride=1, #no. of pixels pass at a time
padding=2, # to preserve size of input image
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layers
self.out = nn.Linear(32*7*7, 3)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2
x = x.view(x.size(0), -1)
output = self.out(x)
return output
cnn=CNN()
cnn
Your linear layer expects input of size 32x7x7. Given that your conv1 and conv2 layers performs max pooling with stride=2, that means your network is configured for input size of 28x28 (MNIST usual input size) and not 32x32 as you expect.
Moreover, considering the values in your error message (64x2304) I assume you are working with batch_size=64, but your images are NOT 32x32, but rather 32x?? which is slightly larger than 32, resulting with a feature map of 32x8x9 after the pooling.
I am trying to understand why my classifier has a dimension issue. Here is my code:
class convnet(nn.Module):
def __init__(self, num_classes=1000):
super(convnet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(kernel_size=2, stride = 2),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(kernel_size=2, stride = 2), #stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=2, stride = 2),
)
self.classifier = nn.Sequential(
nn.Linear(576, 128),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.Linear(64,num_classes),
nn.Softmax(),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x,1) #x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
def neuralnet(num_classes,**kwargs):
model = convnet(**kwargs)
return model
So here my issue is: expected 4D input (got 2D input)
I'm quite sure that the error arises from the flatten command, however I don't really understand why as the classifier has fully dense connections. If someone knows where I'm going wrong, that would be very helpful!
Thank you
After flattening, the input to the classifier has 2 dimensions (size: [batch_size, 576]), therefore the output of the first linear layer will also have 2 dimensions (size: [batch_size, 128]). That output is then passed to nn.BatchNorm2d, which requires its input to have 4 dimensions (size: [batch_size, channels, height, width]).
If you want to use batch norm on a 2D input, you need to use nn.BatchNorm1d, which accepts either a 3D input (size: [batch_size, channels, length]) or a 2D input (size: [batch_size, length]).
self.classifier = nn.Sequential(
nn.Linear(576, 128),
nn.BatchNorm1d(128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.BatchNorm1d(64),
nn.Linear(64,num_classes),
nn.Softmax(),
)
I try to run the following programe for images classification problem in Pytorch:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001
TRAIN_DATA_PATH = "train/"
TEST_DATA_PATH = "test/"
TRANSFORM_IMG = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225] )
])
train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=TRANSFORM_IMG)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_dataset = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=TRANSFORM_IMG)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc = nn.Linear(7 * 7 * 32, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet(num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
# Test the model
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model/model.ckpt')
But I get a RuntimeError:
Traceback (most recent call last):
RuntimeError: Given groups=1, weight of size 16 1 5 5, expected input[100, 3, 256, 256] to have 1 channels, but got 3 channels instead
Someone could help to fix the bug? Thanks a lot.
Reference related:
https://discuss.pytorch.org/t/given-groups-1-weight-16-1-5-5-so-expected-input-100-3-64-64-to-have-1-channels-but-got-3-channels-instead/28831/17
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[3, 1, 224, 224] to have 3 channels, but got 1 channels instead
Your input layer self.layer1 starts with a 2d convolution nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2). This conv layer expects an input with two spatial dimensions and one channel, and outputs a tesnor with the same spatial dimensions and 16 channels.
However, your input has three channels and not one (RGB image instead of gray level image).
Make sure your net and data are in synch.
I'm training a pytorch neural network on google colab to classify sign langauge alphabets of 29 classes in total.
We've been fixing the code by changing various params but it won't work anyway.
transform = transforms.Compose([
#gray scale
transforms.Grayscale(),
#resize
transforms.Resize((128,128)),
#converting to tensor
transforms.ToTensor(),
#normalize
transforms.Normalize( (0.1307,), (0.3081,)),
])
data_dir = 'data/train/asl_alphabet_train'
#dataset
full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)
#train & test
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
#splitting
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])
trainloader = torch.utils.data.DataLoader(train_dataset , batch_size = 4, shuffle = True )
testloader = torch.utils.data.DataLoader(test_dataset , batch_size = 4, shuffle = False )
#neural net architecture
Net(
(conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fc1): Linear(in_features=32768, out_features=128, bias=True)
(fc2): Linear(in_features=128, out_features=29, bias=True)
(dropout): Dropout(p=0.5)
)
loss_fn = nn.CrossEntropyLoss()
#optimizer
opt = optim.SGD(model.parameters(), lr=0.01)
def train(model, train_loader, optimizer, loss_fn, epoch, device):
#telling pytorch that training mode is on
model.train()
loss_epoch_arr = []
#epochs
for e in range(epoch):
# bach_no, data, target
for batch_idx, (data, target) in enumerate(train_loader):
#moving to GPU
#data, target = data.to(device), target.to(device)
#Making gradints zero
optimizer.zero_grad()
#generating output
output = model(data)
#calculating loss
loss = loss_fn(output, target)
#backward propagation
loss.backward()
#stepping optimizer
optimizer.step()
#printing at each 10th epoch
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
#de-allocating memory
del data,target,output
#torch.cuda.empty_cache()
#appending values
loss_epoch_arr.append(loss.item())
#plotting loss
plt.plot(loss_epoch_arr)
plt.show()
train(model, trainloader , opt, loss_fn, 10, device)
ValueError: Expected input batch_size (1) to match target batch_size
(4).
We're beginners in pytorch and trying to figure out what the problem is.
The most likely cause of this error relates to the value of in_features within the nn.Linear function
You haven't provided your full code for this.
One way to check for this is to add the following lines to you forward function (before x.view:
print('x_shape:',x.shape)
The result will be of the form [a,b,c,d]. in_features value should be equal to b*c*d