This is my network; I have 2 CNN layers and 1 FC layer.
I've already trained CNN filters for particular domain, so I'm trying to train the FC layer only.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (2,2), stride = 1, padding = 1, bias = False),
nn.AvgPool2d(kernel_size = (2,2), stride = (2,2)),
)
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels = 1, out_channels = 1, kernel_size = (2,2), stride = 1, padding = 1, bias = False),
nn.AvgPool2d(kernel_size = (2,2), stride = (2,2)),
)
self.fc = nn.Linear(1200, 1)
self.dropout = nn.Dropout(p = 0.10)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out1 = self.layer1(x)
out2 = self.layer2(out1)
out3 = out2.reshape(out2.size(0), -1)
output = self.sigmoid(self.dropout((self.fc(out3))))
return output
And I am trying to assign pre-trained values of parameters in this network.
params = list(Net().parameters())
params
Output:
[Parameter containing:
tensor([[[[ 0.2240, 0.2135],
[-0.2901, 0.4827]]]], requires_grad=True),
Parameter containing:
tensor([[[[-0.0363, -0.2801],
[ 0.0853, -0.0217]]]], requires_grad=True),
Parameter containing:
tensor([[-0.0155, -0.0073, -0.0065, ..., 0.0012, -0.0213, -0.0287]],
requires_grad=True),
Parameter containing:
tensor([-0.0010], requires_grad=True)]
params[0][0][0][0][0] = -0.2454
RuntimeError: a view of a leaf Variable that requires grad is being used in an in-place operation.
Is there any specific way to assign values to network's parameters?
Parameters by default have requires_grad=True, as you can see from the print of params. Runtime error points to that, meaning you can only modify your parameters in-place when they don't need to calculate gradients. One easy way for that is to use no_grad():
with torch.no_grad():
params[0][0][0][0][0] = -0.2454
This will let you modify your parameters in-place. You can also change whole parameter at once without using no_grad. For example:
net = Net()
weight = net.layer1[0].weight # Weights in the first convolution layer
# Detach and create a numpy copy, do some modifications on it
weight = weight.detach().cpu().numpy()
weight[0,0,0,:] = 0.0
# Now replace the whole weight tensor
net.layer1[0].weight = torch.nn.Parameter(torch.from_numpy(weight))
print(list(net.parameters()))
[Parameter containing:
tensor([[[[ 0.0000, 0.0000],
[-0.0865, 0.1675]]]], requires_grad=True), Parameter containing:
tensor([[[[-1.4364e-01, 2.9724e-01],
[ 3.0464e-04, -4.9807e-01]]]], requires_grad=True), Parameter containing:
tensor([[-0.0283, 0.0109, -0.0077, ..., -0.0016, -0.0108, -0.0179]],
requires_grad=True), Parameter containing:
tensor([-0.0004], requires_grad=True)]
Related
For a simple TF2 Object detection CNN architecture defined using Keras's functional API as follows:
input_ = Input(shape = (144, 144, 3), name = 'image')
# name - An optional name string for the Input layer. Should be unique in
# a model (do not reuse the same name twice). It will be autogenerated if it isn't provided.
# Here 'image' is the Python3 dict's key used to map the data to one of the layer in the model.
x = input_
# Define a conv block-
x = Conv2D(filters = 64, kernel_size = 3, activation = 'relu')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size = 2)(x)
x = Flatten()(x) # flatten the last pooling layer's output volume
x = Dense(256, activation='relu')(x)
# We are using a data generator which yields dictionaries. Using 'name' argument makes it
# possible to map the correct data generator's output to the appropriate layer
class_out = Dense(units = 9, activation = 'softmax', name = 'class_out')(x) # classification output
box_out = Dense(units = 2, activation = 'linear', name = 'box_out')(x) # regression output
# Define the CNN model-
model = tf.keras.models.Model(input_, [class_out, box_out]) # since we have 2 outputs, we use a list
I am attempting to define it using Model sub-classing as:
class OD(Model):
def __init__(self):
super(OD, self).__init__()
self.conv1 = Conv2D(filters = 64, kernel_size = 3, activation = None)
self.bn = BatchNormalization()
self.pool = MaxPool2D(pool_size = 2)
self.flatten = Flatten()
self.dense = Dense(256, activation = None)
self.class_out = Dense(units = 9, activation = None, name = 'class_out')
self.box_out = Dense(units = 2, activation = 'linear', name = 'box_out')
def call(self, x):
x = tf.nn.relu(self.bn(self.conv1(x)))
x = self.pool(x)
x = self.flatten(x)
x = tf.nn.relu(self.dense(x))
x = [tf.nn.softmax(self.class_out(x)), self.box_out(x)]
return x
A batch of training data is obtained as:
example, label = next(data_generator(batch_size = 32))
example.keys()
# dict_keys(['image'])
image = example['image']
image.shape
# (32, 144, 144, 3)
label.keys()
# dict_keys(['class_out', 'box_out'])
label['class_out'].shape, label['box_out'].shape
# ((32, 9), (32, 2))
Is my Model sub-classing architecture equivalent to Keras's functional API?
I have created a mutli-class classification neural network. Training, and validation iterators where created with BigBucketIterator method with fields {'text_normalized_tweet':TEXT, 'label': LABEL}
TEXT = a tweet
LABEL = a float number (with 3 values: 0,1,2)
Below I execute a dummy example of my neural network:
import torch.nn as nn
class MultiClassClassifer(nn.Module):
#define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
#Constructor
super(MultiClassClassifer, self).__init__()
#embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
#dense layer
self.hiddenLayer = nn.Linear(embedding_dim, hidden_dim)
#Batch normalization layer
self.batchnorm = nn.BatchNorm1d(hidden_dim)
#output layer
self.output = nn.Linear(hidden_dim, output_dim)
#activation layer
self.act = nn.Softmax(dim=1) #2d-tensor
#initialize weights of embedding layer
self.init_weights()
def init_weights(self):
initrange = 1.0
self.embedding.weight.data.uniform_(-initrange, initrange)
def forward(self, text, text_lengths):
embedded = self.embedding(text)
#packed sequence
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
tensor, batch_size = packed_embedded[0], packed_embedded[1]
hidden_1 = self.batchnorm(self.hiddenLayer(tensor))
return self.act(self.output(hidden_1))
Instantiate the model
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 64
OUTPUT_DIM = 3
model = MultiClassClassifer(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
When I call
text, text_lengths = batch.text_normalized_tweet
predictions = model(text, text_lengths).squeeze()
loss = criterion(predictions, batch.label)
it returns,
ValueError: Expected input batch_size (416) to match target batch_size (32).
model(text, text_lengths).squeeze() = torch.Size([416, 3])
batch.label = torch.Size([32])
I can see that the two objects have different sizes, but I have no clue how to fix this?
You may find the Google Colab notebook here
Shapes of each in, out tensor of my forward() method:
torch.Size([32, 10, 100]) #self.embedding(text)
torch.Size([320, 100]) #nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
torch.Size([320, 64]) #self.batchnorm(self.hiddenLayer(tensor))
torch.Size([320, 3]) #self.act(self.output(hidden_1))
You shouldn't be using the squeeze function after the forward pass, that doesn't make sense.
After removing the squeeze function, as you see, the shape of your final output is [320,3] whereas it is expecting [32,3]. One way to fix this is to average out the embeddings you obtain for each word after the self.Embedding function like shown below:
def forward(self, text, text_lengths):
embedded = self.embedding(text)
embedded = torch.mean(embedded, dim=1, keepdim=True)
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
tensor, batch_size = packed_embedded[0], packed_embedded[1]
hidden_1 = self.batchnorm(self.hiddenLayer(tensor))
return self.act(self.output(hidden_1))
I'm getting stuck on a function that is supposed to predict the label of a single image. I need to do this on a single image because I want to build a web app, where the user can upload an image and can get its prediction.
My CNN is the following with the base for the model :
class ImageClassificationBase(nn.Module):
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
epoch, result['train_loss'], result['val_loss'], result['val_acc']))
and the model itself:
class BrainTumorClassification(ImageClassificationBase):
def __init__(self):
super().__init__()
self.network = nn.Sequential(
nn.Conv2d(3, 32, kernel_size = 3, padding = 1),
nn.ReLU(),
nn.Conv2d(32,64, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.Conv2d(256,256, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Flatten(),
nn.Linear(82944,1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512,6))
def forward(self, xb):
return self.network(xb)
The function I'm trying to implement for testing a single image is the following:
from torch.autograd import Variable
transformer = transforms.Compose([
transforms.Resize((150,150)), transforms.ToTensor()])
def classify(image_path,image_transforms, classes):
image = Image.open(image_path)
image_tensor = image_transforms(image).float()
image_tensor = image_tensor.unsqueeze_(0)
input = Variable(image_tensor)
output = model(input)
index = output.data.numpy().argmax()
pred = classes[index]
return pred
I'm getting an error:
`pred=classes[index]` index out of range
I should mention that classes has 4 elements : ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor'].
A few points to note:
Don't forget to load your trained network on your initialized model.
Variable has been deprecated, you should not use it. Gradients are tracked on tensors that have the requires_grad flag on. Here you are only inferring so you can actually use the torch.no_grad context to avoid retaining parameter activations. This will increase inference speed.
torch.Tensor.unsqueeze_, you don't have to reassign the result as the input itself is modified by the function. As a general note, all torch.Tensor functions with a _ suffix are in-place operators.
Most of all, you mentioned only having 4 classes, yet your last fully connected layer outputs 6 logits. In this case, you need to change this to 4.
Here is a possible modification:
transformer = transforms.Compose([transforms.Resize((150,150)),
transforms.ToTensor()])
#torch.no_grad()
def classify(image_path,image_transforms, classes):
image = Image.open(image_path)
image_tensor = image_transforms(image)
image_tensor.unsqueeze_(0)
output = model(image_tensor)
index = output.data.numpy().argmax()
pred = classes[index]
return pred
I'm trying to implement a Bayesian Convolutional Neural Network using Pytorch on Python 3.7. I mainly orient myself on Shridhar's implementation. When running my CNN with normalized and MNIST data, the KL Divergence is NaN after a couple of iterations. I already implemented linear layers the same way and they worked perfectly fine.
I normalized the data as follows:
train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=BATCH_SIZE, shuffle=True, **LOADER_KWARGS)
eval_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=EVAL_BATCH_SIZE, shuffle=False, **LOADER_KWARGS)
My implementation of the Conv-Layer looks as follows:
class BayesianConv2d(nn.Module):
def __init__(self, in_channels, out_channels, prior_sigma, kernel_size, stride=1, padding=0, dilation=1, groups=1):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.normal = torch.distributions.Normal(0,1)
# conv-parameters
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
# Weight parameters
self.weight_mu = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size).uniform_(0, 0.1))
self.weight_rho = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size).uniform_(-3,0.1))
self.weight_sigma = 0
self.weight = 0
# Bias parameters
self.bias_mu = nn.Parameter(torch.Tensor(out_channels).uniform_(0, 0.1))
self.bias_rho = nn.Parameter(torch.Tensor(out_channels).uniform_(-3,0.1))
self.bias_sigma = 0
self.bias = 0
# prior
self.prior_sigma = prior_sigma
def forward(self, input, sample=False, calculate_log_probs=False):
# compute sigma out of rho: sigma = log(1+e^rho)
self.weight_sigma = torch.log1p(torch.exp(self.weight_rho))
self.bias_sigma = torch.log1p(torch.exp(self.bias_rho))
# sampling process -> use local reparameterization trick
activations_mu = F.conv2d(input.to(DEVICE), self.weight_mu, self.bias_mu, self.stride, self.padding, self.dilation, self.groups)
activations_sigma = torch.sqrt(1e-16 + F.conv2d((input**2).to(DEVICE), self.weight_sigma**2, self.bias_sigma**2, self.stride, self.padding, self.dilation, self.groups))
activation_epsilon = Variable(self.weight_mu.data.new(activations_sigma.size()).normal_(mean=0, std=1))
outputs = activations_mu + activations_sigma * activation_epsilon
if self.training or calculate_log_probs:
self.kl_div = 0.5 * ((2 * torch.log(self.prior_sigma / self.weight_sigma) - 1 + (self.weight_sigma / self.prior_sigma).pow(2) + ((0 - self.weight_mu) / self.prior_sigma).pow(2)).sum() \
+ (2 * torch.log(0.1 / self.bias_sigma) - 1 + (self.bias_sigma / 0.1).pow(2) + ((0 - self.bias_mu) / 0.1).pow(2)).sum())
return outputs
The implementation of the corresponding Conv-Net looks as follows:
class BayesianConvNetwork(nn.Module):
# Set up network by definining layers
def __init__(self):
super().__init__()
self.conv1 = layers.BayesianConv2d(1, 24, prior_sigma=0.1, kernel_size = (5,5), padding=2)
self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
self.conv2 = layers.BayesianConv2d(24, 48, prior_sigma=0.1, kernel_size = (5,5), padding=2)
self.pool2 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
self.conv3 = layers.BayesianConv2d(48, 64, prior_sigma=0.1, kernel_size = (5,5), padding=2)
self.pool3 = nn.MaxPool2d(kernel_size=3,stride=2, padding=1)
self.fcl1 = layers.BayesianLinearWithLocalReparamTrick(4*4*64, 256, prior_sigma=0.1)
self.fcl2 = layers.BayesianLinearWithLocalReparamTrick(256, 10, prior_sigma=0.1)
# define forward function by assigning corresponding activation functions to layers
def forward(self, x, sample=False):
x = F.relu(self.conv1(x, sample))
x = self.pool1(x)
x = F.relu(self.conv2(x, sample))
x = self.pool2(x)
x = F.relu(self.conv3(x, sample))
x = self.pool3(x)
x = x.view(-1, 4*4*64)
x = F.relu(self.fcl1(x, sample))
x = F.log_softmax(self.fcl2(x, sample), dim=1)
return x
# summing up KL-divergences to obtain overall KL-divergence-value
def total_kl_div(self):
return (self.conv1.kl_div + self.conv2.kl_div + self.conv3.kl_div + self.fcl1.kl_div + self.fcl2.kl_div)
# sampling prediction: perform prediction for each of the "different networks" that result from the weight distributions
def sample_elbo(self, input, target, batch_idx, nmbr_batches, samples=SAMPLES):
outputs = torch.zeros(samples, target.shape[0], CLASSES).to(DEVICE)
kl_divs = torch.zeros(samples).to(DEVICE)
for i in range(samples): # sample through networks
outputs[i] = self(input, sample=True) # perform prediction
kl_divs[i] = self.total_kl_div() # calculate total kl_div of the network
kl_div = kl_divs.mean() # compute mean kl_div from all samples
negative_log_likelihood = F.nll_loss(outputs.mean(0), target, size_average=False)
loss = kl_weighting * kl_div + negative_log_likelihood
return loss
Has anyone faced the same issue or knows how to solve it?
Many thanks in advance!
I figured out that it appears to be an issue with the SGD-optimizer. Using Adam as optimizer solved the problem though I don't know the reason for that. If anyone has an answer on why it works with Adam but not with SGD, feel free to comment.
I'm writing a U-net CNN in keras, and trying to use fit_generator for training. In order for this to work, I used a generator script, that could feed the images and labels for my network (simple fit function is working but I want to train a big dataset which cannot fit into the memory).
My problem is that in the model summary, it says correctly that, the output layer has a shape: (None, 288, 512, 4)
https://i.imgur.com/69xG8pO.jpg
but when I try actual training I get this error:
https://i.imgur.com/j7H6sHX.jpg
I don't get why keras wants (288, 512, 1) when in the summary it expects (288, 512, 4)
I tried it with my own unet code, and copied a working code from github also, but both of them has the exact same problem which leads me to believe that my generator script is the weak link. Below is the code I used (the image and label array functions used here were already working when I used them with "fit" in a previous CNN):
def generator(img_path, label_path, batch_size, height, width, num_classes):
input_pairs = get_pairs(img_path, label_path) # rewrite if param name changes
random.shuffle(input_pairs)
iterate_pairs = itertools.cycle(input_pairs)
while True:
X = []
Y = []
for _ in range(batch_size):
im, lab = next(iterate_pairs)
appended_im = next(iter(im))
appended_lab = next(iter(lab))
X.append(input_image_array(appended_im, width, height))
Y.append(input_label_array(appended_lab, width, height, num_classes, palette))
yield (np.array(X), np.array(Y))
I tried the generator out and the provided batches has the shapes of (for batch size of 15):
(15, 288, 512, 3)
(15, 288, 512, 4)
So I really do not know what could be the problem here.
EDIT: Here is the model code I used:
def conv_block(input_tensor, n_filter, kernel=(3, 3), padding='same', initializer="he_normal"):
x = Conv2D(n_filter, kernel, padding=padding, kernel_initializer=initializer)(input_tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(n_filter, kernel, padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(input_tensor, residual, n_filter, kernel=(3, 3), strides=(2, 2), padding='same'):
y = Conv2DTranspose(n_filter, kernel, strides, padding)(input_tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, n_filter)
return y
# NETWORK - n_classes is the desired number of classes, filters are fixed
def Unet(input_height, input_width, n_classes=4, filters=64):
# Downsampling
input_layer = Input(shape=(input_height, input_width, 3), name='input')
conv_1 = conv_block(input_layer, filters)
conv_1_out = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = conv_block(conv_1_out, filters*2)
conv_2_out = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = conv_block(conv_2_out, filters*4)
conv_3_out = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4 = conv_block(conv_3_out, filters*8)
conv_4_out = MaxPooling2D(pool_size=(2, 2))(conv_4)
conv_4_drop = Dropout(0.5)(conv_4_out)
conv_5 = conv_block(conv_4_drop, filters*16)
conv_5_drop = Dropout(0.5)(conv_5)
# Upsampling
deconv_1 = deconv_block(conv_5_drop, conv_4, filters*8)
deconv_1_drop = Dropout(0.5)(deconv_1)
deconv_2 = deconv_block(deconv_1_drop, conv_3, filters*4)
deconv_2_drop = Dropout(0.5)(deconv_2)
deconv_3 = deconv_block(deconv_2_drop, conv_2, filters*2)
deconv_3 = deconv_block(deconv_3, conv_1, filters)
# Output - mapping each 64-component feature vector to number of classes
output = Conv2D(n_classes, (1, 1))(deconv_3)
output = BatchNormalization()(output)
output = Activation("softmax")(output)
# embed into functional API
model = Model(inputs=input_layer, outputs=output, name="Unet")
return model
Change your loss to categorical_crossentropy.
When using the sparse_categorical_crossentropy loss, your targets
should be integer targets.