While running pytorch i got an error 'TypeError: object of type'CatsAndDogsDataset' has no len()' and I want to know how to fix it - pytorch

I got an error while running pytorch
I train artificial intelligence with ResNet, and I wrote my own custom dataset for the dataset. After loading the data set from ResnNet, training data and test data were set separately by learning with artificial intelligence. But even though I ran it, an error occurred, but I don't know what kind of problem occurred.
Next, I will attach my ResNet.py code and my own data set CustomDataset.py code.
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets, models
from customDataset import CatsAndDogsDataset
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
EPOCHS = 3
BATCH_SIZE = 10
dataset = CatsAndDogsDataset(csv_file = 'cats_dogs.csv', root_dir = 'cats_dogs_resized',transform = transforms.ToTensor())
train_set, test_set = torch.utils.data.random_split(dataset, [28,4])
train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=True)
class BasicBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(nn.Conv2d(in_planes, planes,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 16
self.conv1 = nn.Conv2d(3, 16, kernel_size=3,stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.layer1 = self._make_layer(16, 2, stride=1)
self.layer2 = self._make_layer(32, 2, stride=2)
self.layer3 = self._make_layer(64, 2, stride=2)
self.linear = nn.Linear(64, num_classes)
def _make_layer(self, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(BasicBlock(self.in_planes, planes, stride))
self.in_planes = planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
model = ResNet().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1,momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
print(model)
def train(model, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(DEVICE), target.to(DEVICE)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
def evaluate(model, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(DEVICE), target.to(DEVICE)
output = model(data)
test_loss += F.cross_entropy(output, target,reduction='sum').item()
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= length(test_loader.dataset)
test_accuracy = 100. * correct / length(test_loader.dataset)
return test_loss, test_accuracy
for epoch in range(1, EPOCHS + 1):
scheduler.step()
train(model, train_loader, optimizer, epoch)
test_loss, test_accuracy = evaluate(model, test_loader)
print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
epoch, test_loss, test_accuracy))
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from skimage import io
class CatsAndDogsDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __length__(self):
return length(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
image = io.imread(img_path)
y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
if self.transform:
image = self.transform(image)
return (image, ylabel)
In this way, if I write the code and then run it
TypeError: object of type'CatsAndDogsDataset' has no len()
I wonder why I can't have len(). In addition, an error occurred in the result of running Backend.ai instead of pycharm, but the error content is
Cannot verify that dataset is Sized
if sum(lengths) != len(dataset):
It is raise ValueError("sum of input lengths does not equal the length of the input dataset!").
Error appears. Is there a workaround? help me plz

You need to define the function __len__ for your custom dataset (which you seem to have currently incorrectly defined as __length__).
This documentation provides details. Relevant excerpt:
torch.utils.data.Dataset is an abstract class representing a dataset.
Your custom dataset should inherit Dataset and override the following
methods:
__len__ so that len(dataset) returns the size of the dataset.
__getitem__ to support the indexing such that dataset[i] can be used to get i th sample.

Related

Why is my autoencoder not learning the FMNIST dataset?

I am using a simple autoencoder to learn images from the FashionMnist dataset. I have preprocessed the dataset by grayscaling and normalizing it. I did not make the network too deep, to prevent it from creating a direct mapping.
Here's my PyTorch code -
import torch
import torchvision as tv
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch import nn
import os
from torchviz import make_dot
transforms = tv.transforms.Compose([tv.transforms.Grayscale(num_output_channels=1)])
trainset = tv.datasets.FashionMNIST(root='./data', train=True,
download=True, transform=transforms)
PATH = './ae.pth'
data = trainset.data.float()
data = data/255
# print(trainset.data.shape)
plt.imshow(trainset.data[0], cmap = 'gray')
plt.show()
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.encode = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 30),
nn.ReLU()
)
self.decode = nn.Sequential(
nn.Linear(30, 512),
nn.ReLU(),
nn.Linear(512, 28*28),
nn.Sigmoid()
)
def forward(self, x):
x = self.flatten(x)
encoded = self.encode(x)
decoded = self.decode(encoded)
return decoded
if(os.path.exists(PATH)):
print("Loading data on cpu")
device = torch.device('cpu')
model = NeuralNetwork()
model.load_state_dict(torch.load(PATH, map_location=device))
else:
device = "cuda" if torch.cuda.is_available() else "cpu"
data = data.to(device)
print(f"Using device = {device}")
model = NeuralNetwork().to(device)
# print(model)
lossFn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)
for epoch in range(1000):
print("Epoch = ", epoch)
optimizer.zero_grad()
outputs = model(data)
loss = lossFn(outputs, data.reshape(-1, 784))
loss.backward()
optimizer.step()
torch.save(model.state_dict(), PATH)
data = data.to("cpu")
model = model.to("cpu")
pred = model(data)
pred = pred.reshape(-1, 28, 28)
# print(pred.shape)
plt.imshow(pred.detach().numpy()[0], cmap = 'gray')
plt.show()
For testing, I am inputting the following image -
However, I get this as output -
I had an intuition that there was an issue with your loss function. When working with images, distance-based losses such as L1 or L2 losses work really well, as you are essentially measuring how far-away your predictions are from the ground-truth images. This was what I had observed as well, as the loss wasn't converging with BCE and it was rather oscillating.
I rewrote the entire thing and replaced BCE loss with MSE Loss and in just 50 epochs, the loss has gone down considerably, and it is still going down.
Here is the prediction after just 50 epochs -
The ground-truth image is -
I believe that you can get the loss down much more if you train for longer.
Here is the full code. I used a dataloader for batchifying and processing the data.
I also changed the transformations so that the resulting data is a torch tensor.
import torch
import torchvision as tv
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import DataLoader
transforms = tv.transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.ToTensor()
])
trainset = tv.datasets.FashionMNIST(root='./data', train=True,
download=True, transform=transforms)
loader = DataLoader(trainset, batch_size=32, num_workers=1, shuffle=True)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.encode = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 30),
nn.ReLU()
)
self.decode = nn.Sequential(
nn.Linear(30, 512),
nn.ReLU(),
nn.Linear(512, 28*28),
nn.Sigmoid()
)
def forward(self, x):
x = self.flatten(x)
encoded = self.encode(x)
decoded = self.decode(encoded)
return decoded
model = NeuralNetwork().to(device)
lossFn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-2)
epochs = 50
for epoch in range(epochs):
for images, labels in loader:
optimizer.zero_grad()
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = lossFn(outputs, images.reshape(-1, 28*28))
loss.backward()
optimizer.step()
print(f'Loss : {loss.item()}')
print(f'Epochs done : {epoch}')
Here is some inference code -
# infer on some test data
testset = tv.datasets.FashionMNIST(root='./data', train=False,
download=False, transform=transforms)
testloader = DataLoader(testset, shuffle=False, batch_size=32, num_workers=1)
test_images, test_labels = next(iter(testloader))
test_images = test_images.to(device)
predictions = model(test_images)
prediction = predictions[0]
prediction = prediction.view(1, 28, 28)
prediction = prediction.detach().cpu().numpy()
prediction = prediction.transpose(1, 2, 0)
# plot the prediction
plt.imshow(prediction, cmap = 'gray')
plt.show()
# plot the actual image
test_image = test_images[0]
test_image = test_image.detach().cpu().numpy()
test_image = test_image.transpose(1, 2, 0)
plt.imshow(test_image, cmap='gray')
plt.show()
This is the loss going down --
Epochs done : 39
Loss : 0.04641226679086685
Epochs done : 40
Loss : 0.04445071145892143
Epochs done : 41
Loss : 0.05033266171813011
Epochs done : 42
Loss : 0.04813298210501671
Epochs done : 43
Loss : 0.0474831722676754
Epochs done : 44
Loss : 0.044186390936374664
Epochs done : 45
Loss : 0.049083154648542404
Epochs done : 46
Loss : 0.04645842686295509
Epochs done : 47
Loss : 0.04586248844861984
Epochs done : 48
Loss : 0.0467853844165802
Epochs done : 49

RuntimeError: the derivative for ‘target’ is not implemented for Auto Encoder

I am getting the following error - RuntimeError: the derivative for ‘target’ is not implemented
I did have a look at similar posts however they are different from my problem. I’m trying to code an Auto Encoder from scratch. Here’s my code -
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch import nn
from torchviz import make_dot
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
download=True)
data = trainset.data.float()
# print(trainset.data.shape)
# plt.imshow(trainset.data[9])
# plt.show()
device = "cuda" if torch.cuda.is_available() else "cpu"
data = data.to(device)
print(f"Using device = {device}")
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.encode = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 30),
nn.ReLU()
)
self.decode = nn.Sequential(
nn.Linear(30, 512),
nn.ReLU(),
nn.Linear(512, 28*28),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
encoded = self.encode(x)
decoded = self.decode(encoded)
return decoded
model = NeuralNetwork().to(device)
# print(model)
lossFn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)
for epoch in range(1000):
optimizer.zero_grad()
outputs = model(data)
loss = lossFn(outputs, outputs)
loss.backward()
optimizer.step()
model.forward(data)
The reason you're having this error message is because you are calling a function which expects its argument to not require gradient computation. More specifically, nn.BCELoss expects the target (2nd argument) to not require gradient.
You can fix this by detaching the argument from the graph:
lossFn(outputs, outputs.detach())

How to extract the encoded features after running a PyTorch LSTM autoencoder model?

I am very new to PyTorch and Python in general, and I am now struggling to get the encoded features from my pre-trained LSTM autoencoder which can be seen below:
import torch
import torch.nn as nn
# Bulding an LSTM autoencoder
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=32):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim1, self.hidden_dim2 = embedding_dim, 4 * embedding_dim, 2* embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim1, #128
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2, #64
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim2,
hidden_size=embedding_dim, #32
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (_, _) = self.rnn2(x)
x, (hidden_n, _) = self.rnn3(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=32, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim2, self.hidden_dim1, self.n_features = 4 * input_dim,2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim1,
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim2, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x, (hidden_n, cell_n) = self.rnn3(x)
x = x.reshape((self.seq_len, self.hidden_dim2))
return self.output_layer(x)
class RAE(nn.Module):
def __init__(self,seq_len, n_features, embedding_dim=32):
super(RAE, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim = embedding_dim
self.encoder = Encoder (seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder (seq_len, embedding_dim, n_features).to(device)
def forward(self,x):
x = self.encoder(x)
x = self.decoder(x)
return x
### TRAINING
def train_model(model,train_dataset,val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
criterion = nn.MSELoss(reduction='mean').to(device) # nn.L1Loss sum
history = dict(train = [], val = [])
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred =model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
#add accuracy
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
return model.eval(),history
Once I trained my model I followed the advice given by ptrblck here and implemented it as follows:
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
model.encoder.register_forward_hook(get_activation('encoder'))
x = test_dataset_SR[1] # instead of using his random example I used one example from my training set
x = x.cuda()
output = model(x)
print(activation['encoder'])
but this gives me this error:
2 def get_activation(name):
3 def hook(model, input, output):
----> 4 activation[name] = output.detach()
5 return hook
AttributeError: 'tuple' object has no attribute 'detach'
Can you please help me solve this issue? I want to take these encoded features, store them and use them as input to another network. I know I could probably train the encoder separately(not sure), but I will need both encoder and decoder so I thought hooks will be my salvation.

ValueError: Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 2]))

I'm trying to implement a code for sentiment analysis( positive or negative labels) using BERT and i want to add a BiLSTM layer to see if I can increase the accuracy of the pretrained model from HuggingFace. I have the below code and a few questions :
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig
from torch import cuda
import re
import torch.nn as nn
device = 'cuda' if cuda.is_available() else 'cpu'
MAX_LEN = 200
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 1
LEARNING_RATE = 1e-05 #5e-5, 3e-5 or 2e-5
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
class CustomDataset(Dataset):
def __init__(self, dataframe, tokenizer, max_len):
self.tokenizer = tokenizer
self.data = dataframe
self.comment_text = dataframe.review
self.targets = self.data.sentiment
self.max_len = max_len
def __len__(self):
return len(self.comment_text)
def __getitem__(self, index):
comment_text = str(self.comment_text[index])
comment_text = " ".join(comment_text.split())
inputs = self.tokenizer.encode_plus(comment_text,None,add_special_tokens=True,max_length=self.max_len,
pad_to_max_length=True,return_token_type_ids=True)
ids = inputs['input_ids']
mask = inputs['attention_mask']
token_type_ids = inputs["token_type_ids"]
return {
'ids': torch.tensor(ids, dtype=torch.long),
'mask': torch.tensor(mask, dtype=torch.long),
'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
'targets': torch.tensor(self.targets[index], dtype=torch.float)
}
train_size = 0.8
train_dataset=df.sample(frac=train_size,random_state=200)
test_dataset=df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)
print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))
training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)
train_params = {'batch_size': TRAIN_BATCH_SIZE,'shuffle': True,'num_workers': 0}
test_params = {'batch_size': VALID_BATCH_SIZE,'shuffle': True,'num_workers': 0}
training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)
class BERTClass(torch.nn.Module):
def __init__(self):
super(BERTClass, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased',return_dict=False, num_labels =2)
self.lstm = nn.LSTM(768, 256, batch_first=True, bidirectional=True)
self.linear = nn.Linear(256*2,2)
def forward(self, ids , mask,token_type_ids):
sequence_output, pooled_output = self.bert(ids, attention_mask=mask, token_type_ids = token_type_ids)
lstm_output, (h, c) = self.lstm(sequence_output) ## extract the 1st token's embeddings
hidden = torch.cat((lstm_output[:, -1, :256], lstm_output[:, 0, 256:]), dim=-1)
linear_output = self.linear(lstm_output[:, -1].view(-1, 256 * 2))
return linear_output
model = BERTClass()
model.to(device)
print(model)
def loss_fn(outputs, targets):
return torch.nn.BCEWithLogitsLoss()(outputs, targets)
optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)
def train(epoch):
model.train()
for _, data in enumerate(training_loader, 0):
ids = data['ids'].to(device, dtype=torch.long)
mask = data['mask'].to(device, dtype=torch.long)
token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
targets = data['targets'].to(device, dtype=torch.float)
outputs = model(ids, mask, token_type_ids)
optimizer.zero_grad()
loss = loss_fn(outputs, targets)
if _ % 5000 == 0:
print(f'Epoch: {epoch}, Loss: {loss.item()}')
optimizer.zero_grad()
loss.backward()
optimizer.step()
for epoch in range(EPOCHS):
train(epoch)
So on the above code I ran into the error : Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 2])) . Checked online and tried to use targets = targets.unsqueeze(2) but then I get another error that I must use values from [-2,1] for unsqueeze. I also tried to modify the loss function to
def loss_fn(outputs, targets):
return torch.nn.BCELoss()(outputs, targets)
but I still receive the same error. Can someone advise if there is a solution to this problem? Or what can I do to make this work fine? Many thanks in advance.

TF 2.0 Error: Gradients does not exist for variables during training using gradienttape

I tried to make a class using batchnormalization layer from tf 2.0, however it gave me an error that Gradients does not exist for variables. I tried to use batchnormalization directly but it gave me the same error as well. it seems like it is not traing the variable related to the batchnormalization step.
I tried to use model.trainable_variables instead of model.variables but it didn't work either.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy import ndimage
learning_rate = 0.001
training_epochs = 15
batch_size = 100
tf.random.set_seed(777)
cur_dir = os.getcwd()
ckpt_dir_name = 'checkpoints'
model_dir_name = 'minst_cnn_best'
checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.astype(np.float32) /255.
test_images = test_images.astype(np.float32) /255.
print(train_images.shape, test_images.shape)
train_images = np.expand_dims(train_images, axis = -1)
test_images = np.expand_dims(test_images, axis = -1)
print(train_images.shape, test_images.shape)
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)
train_dataset = tf.data.Dataset.from_tensor_slices((train_images,
train_labels)).shuffle(buffer_size = 100000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images,
test_labels)).batch(batch_size)
class ConvBNRelu(tf.keras.Model):
def __init__(self, filters, kernel_size=3, strides=1, padding='SAME'):
super(ConvBNRelu, self).__init__()
self.conv = keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides,
padding=padding, kernel_initializer='glorot_normal')
self.batchnorm = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=False):
layer = self.conv(inputs)
layer = self.batchnorm(layer)
layer = tf.nn.relu(layer)
return layer
class DenseBNRelu(tf.keras.Model):
def __init__(self, units):
super(DenseBNRelu, self).__init__()
self.dense = keras.layers.Dense(units=units, kernel_initializer='glorot_normal')
self.batchnorm = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=False):
layer = self.dense(inputs)
layer = self.batchnorm(layer)
layer = tf.nn.relu(layer)
return layer
class MNISTModel(tf.keras.Model):
def __init__(self):
super(MNISTModel, self).__init__()
self.conv1 = ConvBNRelu(filters=32, kernel_size=[3, 3], padding='SAME')
self.pool1 = keras.layers.MaxPool2D(padding='SAME')
self.conv2 = ConvBNRelu(filters=64, kernel_size=[3, 3], padding='SAME')
self.pool2 = keras.layers.MaxPool2D(padding='SAME')
self.conv3 = ConvBNRelu(filters=128, kernel_size=[3, 3], padding='SAME')
self.pool3 = keras.layers.MaxPool2D(padding='SAME')
self.pool3_flat = keras.layers.Flatten()
self.dense4 = DenseBNRelu(units=256)
self.drop4 = keras.layers.Dropout(rate=0.4)
self.dense5 = keras.layers.Dense(units=10, kernel_initializer='glorot_normal')
def call(self, inputs, training=False):
net = self.conv1(inputs)
net = self.pool1(net)
net = self.conv2(net)
net = self.pool2(net)
net = self.conv3(net)
net = self.pool3(net)
net = self.pool3_flat(net)
net = self.dense4(net)
net = self.drop4(net)
net = self.dense5(net)
return net
models = []
num_models = 5
for m in range(num_models):
models.append(MNISTModel())
def loss_fn(model, images, labels):
logits = model(images, training=True)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=labels))
return loss
def grad(model, images, labels):
with tf.GradientTape() as tape:
loss = loss_fn(model, images, labels)
return tape.gradient(loss, model.variables)
def evaluate(models, images, labels):
predictions = np.zeros_like(labels)
for model in models:
logits = model(images, training=False)
predictions += logits
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy
optimizer = keras.optimizers.Adam(learning_rate = learning_rate)
checkpoints = []
for m in range(num_models):
checkpoints.append(tf.train.Checkpoint(cnn=models[m]))
for epoch in range(training_epochs):
avg_loss = 0.
avg_train_acc = 0.
avg_test_acc = 0.
train_step = 0
test_step = 0
for images, labels in train_dataset:
for model in models:
grads = grad(model, images, labels)
optimizer.apply_gradients(zip(grads, model.variables))
loss = loss_fn(model, images, labels)
avg_loss += loss / num_models
acc = evaluate(models, images, labels)
avg_train_acc += acc
train_step += 1
avg_loss = avg_loss / train_step
avg_train_acc = avg_train_acc / train_step
for images, labels in test_dataset:
acc = evaluate(models, images, labels)
avg_test_acc += acc
test_step += 1
avg_test_acc = avg_test_acc / test_step
print('Epoch:', '{}'.format(epoch + 1), 'loss =', '{:.8f}'.format(avg_loss),
'train accuracy = ', '{:.4f}'.format(avg_train_acc),
'test accuracy = ', '{:.4f}'.format(avg_test_acc))
for idx, checkpoint in enumerate(checkpoints):
checkpoint.save(file_prefix=checkpoint_prefix+'-{}'.format(idx))
print('Learning Finished!')
W0727 20:27:05.344142 140332288718656 optimizer_v2.py:982] Gradients does not exist for variables ['mnist_model/conv_bn_relu/batch_normalization/moving_mean:0', 'mnist_model/conv_bn_relu/batch_normalization/moving_variance:0', 'mnist_model/conv_bn_relu_1/batch_normalization_1/moving_mean:0', 'mnist_model/conv_bn_relu_1/batch_normalization_1/moving_variance:0', 'mnist_model/conv_bn_relu_2/batch_normalization_2/moving_mean:0', 'mnist_model/conv_bn_relu_2/batch_normalization_2/moving_variance:0', 'mnist_model/dense_bn_relu/batch_normalization_3/moving_mean:0', 'mnist_model/dense_bn_relu/batch_normalization_3/moving_variance:0'] when minimizing the loss.
W0727 20:27:05.407717 140332288718656 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:460: BaseResourceVariable.constraint (from tensorflow.python.ops.resource_variable_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Apply a constraint manually following the optimizer update step.
W0727 20:27:05.499249 140332288718656 optimizer_v2.py:982] Gradients does not exist for variables ['mnist_model_1/conv_bn_relu_3/batch_normalization_4/moving_mean:0', 'mnist_model_1/conv_bn_relu_3/batch_normalization_4/moving_variance:0', 'mnist_model_1/conv_bn_relu_4/batch_normalization_5/moving_mean:0', 'mnist_model_1/conv_bn_relu_4/batch_normalization_5/moving_variance:0', 'mnist_model_1/conv_bn_relu_5/batch_normalization_6/moving_mean:0', 'mnist_model_1/conv_bn_relu_5/batch_normalization_6/moving_variance:0', 'mnist_model_1/dense_bn_relu_1/batch_normalization_7/moving_mean:0', 'mnist_model_1/dense_bn_relu_1/batch_normalization_7/moving_variance:0'] when minimizing the loss.
...
You're computing the gradient of the loss with respect to the model.variables: this collection contains not only the trainable variables (the model weights) but also the non-trainable variables like the moving mean and variance computed by the batch normalization layer.
You have to compute the gradient with respect to the trainable_variables. In short change the lines
return tape.gradient(loss, model.variables)
and
optimizer.apply_gradients(zip(grads, model.variables))
to
return tape.gradient(loss, model.trainable_variables)
and
optimizer.apply_gradients(zip(grads, model.trainable_variables))

Resources