Why EfficientNet same model return different predictions - pytorch

!pip install efficientnet_pytorch -q
import torch
import torch.nn as nn
import efficientnet_pytorch as efn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
model = efn.EfficientNet.from_name('efficientnet-b0')
model = model.to(device)
img = torch.ones((2, 3, 680, 680))*0.5
img = img.to(device)
preds1 = model(img)
preds2 = model(img)
preds3 = model(img)
print(preds1[0][0])
print(preds2[0][0])
print(preds3[0][0])
del model, img, preds1, preds2, preds3
And preds1, preds2, and preds3 are different. I am confused about this point, model is same and input is same, why predictions are different?
tensor(0.2599, grad_fn=<SelectBackward>)
tensor(0.1364, grad_fn=<SelectBackward>)
tensor(0.1263, grad_fn=<SelectBackward>)

Wow, solved! I checked the Efficientnet-pytorch source code and suddendly found I should turn the model to eval() mode.
Now the predictions are same!!
model = efn.EfficientNet.from_name('efficientnet-b0')
model = model.to(device)
_ = model.eval() ## hey, look here!!
img = torch.ones((2, 3, 680, 680))*0.5
img = img.to(device)
preds1 = model(img)
preds2 = model(img)
preds3 = model(img)
print(preds1[0][0])
print(preds2[0][0])
print(preds3[0][0])

Related

Sentence classification(SST2 data) with BertForSequenceClassification of huggingface

I’m working on sentence classification with pretrained BERT.
I used BertForSequenceClassification class but it doesn’t look being trained.
Maybe I'm getting wrong output but I can't figure out what is the right output.
Can someone let me know what’s wrong with this code?
import torch
from torch import nn
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
from torchtext.datasets import SST2
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel, BertForSequenceClassification
LR = 0.0005
EPOCHS = 5
BATCH_SIZE = 128
device = torch.device("cuda:" + "0" if torch.cuda.is_available() else "cpu")
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = BertForSequenceClassification.from_pretrained(model_name)
max_input_length = 128
model = base_model.to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = AdamW(model.parameters(), lr=LR)
train_datapipe = SST2(split="train")
valid_datapipe = SST2(split="dev")
def collate_batch(batch):
ids, types, masks, label_list = [], [], [], []
for text, label in batch:
tokenized = tokenizer(text,
padding="max_length", max_length=max_input_length,
truncation=True, return_tensors="pt")
ids.append(tokenized['input_ids'])
types.append(tokenized['token_type_ids'])
masks.append(tokenized['attention_mask'])
label_list.append(label)
input_data = {
"input_ids": torch.squeeze(torch.stack(ids)).to(device),
"token_type_ids": torch.squeeze(torch.stack(types)).to(device),
"attention_mask": torch.squeeze(torch.stack(masks)).to(device)
}
label_list = torch.tensor(label_list, dtype=torch.int64)
return input_data, label_list
train_dataloader = DataLoader(train_datapipe, shuffle=True, batch_size=BATCH_SIZE, collate_fn=collate_batch)
valid_dataloader = DataLoader(valid_datapipe, batch_size=BATCH_SIZE, collate_fn=collate_batch)
# print("total instances: ", len(train_dataloader))
for epoch in range(EPOCHS):
model.train()
train_loss = []
all_labels = []
all_outs = []
for i, (input_data, label) in enumerate(tqdm(train_dataloader)):
model.zero_grad()
output = model(**input_data).logits
label = label.to(device)
loss = criterion(output, label)
loss.backward()
optimizer.step()

get InvalidArgumentError when using tf.image.resize_bilinear in Keras with multi-gpu environment

I use tf.image.resize_bilinear in a segmentation network, It seems this function does not support by multi-gpu model. The following code shows the simplified situation: (which can be run directly)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1'
from keras.backend.tensorflow_backend import set_session
from keras import backend as K
from keras.utils import multi_gpu_model
from keras.applications.mobilenet_v2 import preprocess_input
import tensorflow as tf
import numpy as np
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
set_session(sess)
batch = 4
num_classes = 2
size = 128
K.clear_session()
def _GetRandomImg():
shape = (batch, size, size, 3)
img = np.random.randint(low=0, high=256, size=shape)
return preprocess_input(img)
def _GetRandomLabel():
shape = (batch, size, size, num_classes)
label = np.random.randint(low=0, high=num_classes, size=shape)
label = np.exp(label)
label = label/ np.sum(label, axis=-1, keepdims=True)
return label
def DataGen():
while True:
x = _GetRandomImg()
y = _GetRandomLabel()
yield x, y
from keras.layers import Input, Conv2D, Lambda
from keras import Model
def GetModel():
inputs = Input(shape=(size, size, 3))
f = lambda x: tf.image.resize_bilinear(inputs, (size, size), align_corners=True)
x = Lambda(f, output_shape=(size, size, 3))(inputs)
outputs = Conv2D(num_classes, kernel_size=3, padding='same')(x)
model = Model(inputs=[inputs], outputs=[outputs])
return model
gen = DataGen()
with tf.device('/cpu:0'):
model = GetModel()
model = multi_gpu_model(model, gpus=2)
model.compile(loss='categorical_crossentropy', optimizer='sgd')
result = model.fit_generator(gen, epochs=2, verbose = 1, steps_per_epoch = 100)
it works fine with single gpu environment, but in multi-gpu environment, I got the following error:
InvalidArgumentError: Incompatible shapes: [3,128,128,2] vs. [6,128,128,2]
[[{{node loss/conv2d_1_loss/categorical_crossentropy/mul}}]]
[[{{node training/SGD/gradients/conv2d_1_1/concat_grad/Slice_1}}]]
the problem is solved. If tensorflow function is used in a customized Lambda layer, it is needed to explicitly use set_shape() function:
def MyResizeBilinear(x, height, width):
rows, cols = 1, 2
original_shape = K.int_shape(x)
new_shape = tf.constant(np.array([height, width], dtype='int32'))
x = tf.image.resize_bilinear(x, new_shape, align_corners=True)
new_height = None if original_shape[rows] is None else height
new_width = None if original_shape[cols] is None else width
output_shape = (None, new_height, new_width, None)
x.set_shape(output_shape)
return x

TF 2.0 Error: Gradients does not exist for variables during training using gradienttape

I tried to make a class using batchnormalization layer from tf 2.0, however it gave me an error that Gradients does not exist for variables. I tried to use batchnormalization directly but it gave me the same error as well. it seems like it is not traing the variable related to the batchnormalization step.
I tried to use model.trainable_variables instead of model.variables but it didn't work either.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy import ndimage
learning_rate = 0.001
training_epochs = 15
batch_size = 100
tf.random.set_seed(777)
cur_dir = os.getcwd()
ckpt_dir_name = 'checkpoints'
model_dir_name = 'minst_cnn_best'
checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.astype(np.float32) /255.
test_images = test_images.astype(np.float32) /255.
print(train_images.shape, test_images.shape)
train_images = np.expand_dims(train_images, axis = -1)
test_images = np.expand_dims(test_images, axis = -1)
print(train_images.shape, test_images.shape)
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)
train_dataset = tf.data.Dataset.from_tensor_slices((train_images,
train_labels)).shuffle(buffer_size = 100000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images,
test_labels)).batch(batch_size)
class ConvBNRelu(tf.keras.Model):
def __init__(self, filters, kernel_size=3, strides=1, padding='SAME'):
super(ConvBNRelu, self).__init__()
self.conv = keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides,
padding=padding, kernel_initializer='glorot_normal')
self.batchnorm = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=False):
layer = self.conv(inputs)
layer = self.batchnorm(layer)
layer = tf.nn.relu(layer)
return layer
class DenseBNRelu(tf.keras.Model):
def __init__(self, units):
super(DenseBNRelu, self).__init__()
self.dense = keras.layers.Dense(units=units, kernel_initializer='glorot_normal')
self.batchnorm = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=False):
layer = self.dense(inputs)
layer = self.batchnorm(layer)
layer = tf.nn.relu(layer)
return layer
class MNISTModel(tf.keras.Model):
def __init__(self):
super(MNISTModel, self).__init__()
self.conv1 = ConvBNRelu(filters=32, kernel_size=[3, 3], padding='SAME')
self.pool1 = keras.layers.MaxPool2D(padding='SAME')
self.conv2 = ConvBNRelu(filters=64, kernel_size=[3, 3], padding='SAME')
self.pool2 = keras.layers.MaxPool2D(padding='SAME')
self.conv3 = ConvBNRelu(filters=128, kernel_size=[3, 3], padding='SAME')
self.pool3 = keras.layers.MaxPool2D(padding='SAME')
self.pool3_flat = keras.layers.Flatten()
self.dense4 = DenseBNRelu(units=256)
self.drop4 = keras.layers.Dropout(rate=0.4)
self.dense5 = keras.layers.Dense(units=10, kernel_initializer='glorot_normal')
def call(self, inputs, training=False):
net = self.conv1(inputs)
net = self.pool1(net)
net = self.conv2(net)
net = self.pool2(net)
net = self.conv3(net)
net = self.pool3(net)
net = self.pool3_flat(net)
net = self.dense4(net)
net = self.drop4(net)
net = self.dense5(net)
return net
models = []
num_models = 5
for m in range(num_models):
models.append(MNISTModel())
def loss_fn(model, images, labels):
logits = model(images, training=True)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=labels))
return loss
def grad(model, images, labels):
with tf.GradientTape() as tape:
loss = loss_fn(model, images, labels)
return tape.gradient(loss, model.variables)
def evaluate(models, images, labels):
predictions = np.zeros_like(labels)
for model in models:
logits = model(images, training=False)
predictions += logits
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy
optimizer = keras.optimizers.Adam(learning_rate = learning_rate)
checkpoints = []
for m in range(num_models):
checkpoints.append(tf.train.Checkpoint(cnn=models[m]))
for epoch in range(training_epochs):
avg_loss = 0.
avg_train_acc = 0.
avg_test_acc = 0.
train_step = 0
test_step = 0
for images, labels in train_dataset:
for model in models:
grads = grad(model, images, labels)
optimizer.apply_gradients(zip(grads, model.variables))
loss = loss_fn(model, images, labels)
avg_loss += loss / num_models
acc = evaluate(models, images, labels)
avg_train_acc += acc
train_step += 1
avg_loss = avg_loss / train_step
avg_train_acc = avg_train_acc / train_step
for images, labels in test_dataset:
acc = evaluate(models, images, labels)
avg_test_acc += acc
test_step += 1
avg_test_acc = avg_test_acc / test_step
print('Epoch:', '{}'.format(epoch + 1), 'loss =', '{:.8f}'.format(avg_loss),
'train accuracy = ', '{:.4f}'.format(avg_train_acc),
'test accuracy = ', '{:.4f}'.format(avg_test_acc))
for idx, checkpoint in enumerate(checkpoints):
checkpoint.save(file_prefix=checkpoint_prefix+'-{}'.format(idx))
print('Learning Finished!')
W0727 20:27:05.344142 140332288718656 optimizer_v2.py:982] Gradients does not exist for variables ['mnist_model/conv_bn_relu/batch_normalization/moving_mean:0', 'mnist_model/conv_bn_relu/batch_normalization/moving_variance:0', 'mnist_model/conv_bn_relu_1/batch_normalization_1/moving_mean:0', 'mnist_model/conv_bn_relu_1/batch_normalization_1/moving_variance:0', 'mnist_model/conv_bn_relu_2/batch_normalization_2/moving_mean:0', 'mnist_model/conv_bn_relu_2/batch_normalization_2/moving_variance:0', 'mnist_model/dense_bn_relu/batch_normalization_3/moving_mean:0', 'mnist_model/dense_bn_relu/batch_normalization_3/moving_variance:0'] when minimizing the loss.
W0727 20:27:05.407717 140332288718656 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:460: BaseResourceVariable.constraint (from tensorflow.python.ops.resource_variable_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Apply a constraint manually following the optimizer update step.
W0727 20:27:05.499249 140332288718656 optimizer_v2.py:982] Gradients does not exist for variables ['mnist_model_1/conv_bn_relu_3/batch_normalization_4/moving_mean:0', 'mnist_model_1/conv_bn_relu_3/batch_normalization_4/moving_variance:0', 'mnist_model_1/conv_bn_relu_4/batch_normalization_5/moving_mean:0', 'mnist_model_1/conv_bn_relu_4/batch_normalization_5/moving_variance:0', 'mnist_model_1/conv_bn_relu_5/batch_normalization_6/moving_mean:0', 'mnist_model_1/conv_bn_relu_5/batch_normalization_6/moving_variance:0', 'mnist_model_1/dense_bn_relu_1/batch_normalization_7/moving_mean:0', 'mnist_model_1/dense_bn_relu_1/batch_normalization_7/moving_variance:0'] when minimizing the loss.
...
You're computing the gradient of the loss with respect to the model.variables: this collection contains not only the trainable variables (the model weights) but also the non-trainable variables like the moving mean and variance computed by the batch normalization layer.
You have to compute the gradient with respect to the trainable_variables. In short change the lines
return tape.gradient(loss, model.variables)
and
optimizer.apply_gradients(zip(grads, model.variables))
to
return tape.gradient(loss, model.trainable_variables)
and
optimizer.apply_gradients(zip(grads, model.trainable_variables))

How to integrate LIME with PyTorch?

Using this mnist image classification model :
%reset -f
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
import torchvision.datasets as dset
import os
import torch.nn.functional as F
import time
import random
import pickle
from sklearn.metrics import confusion_matrix
import pandas as pd
import sklearn
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
root = './data'
if not os.path.exists(root):
os.mkdir(root)
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 64
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=batch_size,
shuffle=True)
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(28*28, 500)
self.fc2 = nn.Linear(500, 256)
self.fc3 = nn.Linear(256, 2)
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
num_epochs = 2
random_sample_size = 200
values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]
print(len(values_0_or_1))
print(len(values_0_or_1_testset))
train_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1,
batch_size=batch_size,
shuffle=True)
test_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1_testset,
batch_size=batch_size,
shuffle=False)
train_loader = train_loader_subset
# Hyper-parameters
input_size = 100
hidden_size = 100
num_classes = 2
# learning_rate = 0.00001
learning_rate = .0001
# Device configuration
device = 'cpu'
print_progress_every_n_epochs = 1
model = NeuralNet().to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
N = len(train_loader)
# Train the model
total_step = len(train_loader)
most_recent_prediction = []
test_actual_predicted_dict = {}
rm = random.sample(list(values_0_or_1), random_sample_size)
train_loader_subset = data_utils.DataLoader(rm, batch_size=4)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader_subset):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch) % print_progress_every_n_epochs == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
predicted_test = []
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
probs_l = []
predicted_values = []
actual_values = []
labels_l = []
with torch.no_grad():
for images, labels in test_loader_subset:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
predicted_test.append(predicted.cpu().numpy())
sm = torch.nn.Softmax()
probabilities = sm(outputs)
probs_l.append(probabilities)
labels_l.append(labels.cpu().numpy())
predicted_values.append(np.concatenate(predicted_test).ravel())
actual_values.append(np.concatenate(labels_l).ravel())
if (epoch) % 1 == 0:
print('test accuracy : ', 100 * len((np.where(np.array(predicted_values[0])==(np.array(actual_values[0])))[0])) / len(actual_values[0]))
I'm to attempting to integrate 'Local Interpretable Model-Agnostic Explanations for machine learning classifiers' : https://marcotcr.github.io/lime/
It appears PyTorch support is not enabled as it is not mentioned in doc and following tutorial :
https://marcotcr.github.io/lime/tutorials/Tutorial%20-%20images.html
With my updated code for PyTorch :
from lime import lime_image
import time
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(images[0].reshape(28,28), model(images[0]), top_labels=5, hide_color=0, num_samples=1000)
Causes error :
/opt/conda/lib/python3.6/site-packages/skimage/color/colorconv.py in gray2rgb(image, alpha)
830 is_rgb = False
831 is_alpha = False
--> 832 dims = np.squeeze(image).ndim
833
834 if dims == 3:
AttributeError: 'Tensor' object has no attribute 'ndim'
So appears tensorflow object is expected here ?
How to integrate LIME with PyTorch image classification ?
Here's my solution:
Lime expects an image input of type numpy. This is why you get the attribute error and a solution would be to convert the image (from Tensor) to numpy before passing it to the explainer object. Another solution would be to select a specific image with the test_loader_subset and convert it with img = img.numpy().
Secondly, in order to make LIME work with pytorch (or any other framework), you'll need to specify a batch prediction function which outputs the prediction scores of each class for each image. The name of this function (here I've called it batch_predict) is then passed to explainer.explain_instance(img, batch_predict, ...). The batch_predict needs to loop through all images passed to it, convert them to Tensor, make a prediction and finally return the prediction score list (with numpy values). This is how I got it working.
Note also that the images need to have shape (... ,... ,3) or (... ,... ,1) in order to be properly segmented by the default segmentation algorithm. This means that you might have to use np.transpose(img, (...)). You may specify the segmentation algorithm as well if the results are poor.
Finally you'll need to display the LIME image mask on top of the original image. This snippet shows how this may be done:
from skimage.segmentation import mark_boundaries
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False)
img_boundry = mark_boundaries(temp, mask)
plt.imshow(img_boundry)
plt.show()
This notebook is a good reference:
https://github.com/marcotcr/lime/blob/master/doc/notebooks/Tutorial%20-%20images%20-%20Pytorch.ipynb

Keras single Image prediction not working with correct dims

I want to predict single images with the functional API (keras version 2.2.2, tensorflow backend v1.7) . I load my model:
# Loading Model
base_model = VGG16(include_top = False, weights=None,
input_shape=(224,224,3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.7)(x)
x = Dense(1020, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.load_weights("model.h5")
Then I load an Image and transform it into the input format and try to predict:
from keras.preprocessing.image import img_to_array, load_img
img = load_img("data/my_image.png") # this is a PIL image
array = img_to_array(img) # this is a Numpy array with shape
arrayresized = cv2.resize(array, (244,244))*1./255
inputarray = np.expand_dims(arrayresized, axis=0)
# Predicting
prediction = model.predict(inputarray, batch_size = 1)
Then I get this error back:
ValueError: Error when checking input: expected input_4 to have shape
(224, 224, 3) but got array with shape (244, 244, 3)

Resources