there has no negative values after BatchNorm2d in pytorch? - pytorch

According to the formula of BatchNorm, the output should have negative values, but only positive values and 0 after BatchNorm2d.
I use hooks to get the output of the "layer4.1.bn2" in Resnet.
import torch
import torchvision
from PIL import Image
from torchvision import transforms
class HookTool:
def __init__(self):
self.fea = None
def hook_fun(self, module, fea_in, fea_out):
self.fea = fea_out
def get_feas_by_hook(model):
print('begin hooks...')
fea_hooks = []
for n, m in model.named_modules():
if isinstance(m, torch.nn.BatchNorm2d) and n == 'layer4.1.bn2':
cur_hook = HookTool()
m.register_forward_hook(cur_hook.hook_fun)
fea_hooks.append(cur_hook)
return fea_hooks
model = torchvision.models.resnet18(pretrained=True)
transform = transforms.Compose([
# transforms.CenterCrop(224),
transforms.Resize((224, 224)),
transforms.ToTensor(),
# transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])
fea_hooks = get_feas_by_hook(model)
img = Image.open("ILSVRC2012_val_00002138.JPEG")
input = transform(img)
input = input.reshape(1, 3, 224, 224)
print(input.shape)
output = model(input)
print('The number of hooks is:', len(fea_hooks))
print('The shape of the first Conv2D feature is:', fea_hooks[0].fea.shape)
When I output the "fea_hooks", there are no negative numbers.
And why?

Related

Use bounding box model in webcam

I have a model trained using the following code:
https://jovian.ml/aakanksha-ns/road-signs-bounding-box-prediction/v/2?utm_source=embed
Then I saved the model using the following code:
torch.save(model.state_dict(), 'checkpoint.pth')
How can I use the save model and in webcam? I created the below code but it only works for the classification model and not for the bounding box with the classification model.
from PIL import Image
import time
#Load the saved model
# Create a neural net class
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.nn
import torchvision
import PIL
import cv2
from torchvision import models
class BB_model(nn.Module):
def __init__(self):
super(BB_model, self).__init__()
resnet = models.resnet34(pretrained=True)
layers = list(resnet.children())[:8]
self.features1 = nn.Sequential(*layers[:6])
self.features2 = nn.Sequential(*layers[6:])
self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
def forward(self, x):
x = self.features1(x)
x = self.features2(x)
x = F.relu(x)
x = nn.AdaptiveAvgPool2d((1, 1))(x)
x = x.view(x.shape[0], -1)
return self.classifier(x), self.bb(x)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ##Assigning the Device which will do the calculation
model = BB_model()
model.load_state_dict(torch.load("checkpoint_bb.pth"))
model = model.to(device)
model.eval()
video = cv2.VideoCapture(0)
# used to record the time when we processed last frame
prev_frame_time = 0
# used to record the time at which we processed current frame
new_frame_time = 0
encoder = {0:"Standing"}
# Let's preprocess the inputted frame
data_transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=(128, 128)),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
def argmax(prediction):
prediction = prediction.cpu()
prediction = prediction.detach().numpy()
top_1 = np.argmax(prediction, axis=1)
score = np.amax(prediction)
score = '{:6f}'.format(score)
prediction = top_1[0]
result = encoder[prediction]
return result,score
def preprocess(image):
image = PIL.Image.fromarray(image) #Webcam frames are numpy array format
#Therefore transform back to PIL image
print(image)
image = data_transforms(image)
image = image.float()
#image = Variable(image, requires_autograd=True)
image = image.unsqueeze(0) #I don't know for sure but Resnet-50 model seems to only
#accpets 4-D Vector Tensor so we need to squeeze another
return image
while True:
_, frame = video.read()
image = frame[100:450, 150:570]
image_data = preprocess(image)
print(image_data)
prediction = model(image_data)
result, score = argmax(prediction)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame,
f"{result}",
(50, 50),
font, 1,
(0, 0, 255),
2,
cv2.LINE_4)
# time when we finish processing for this frame
new_frame_time = time.time()
fps = 1 / (new_frame_time - prev_frame_time)
prev_frame_time = new_frame_time
cv2.putText(frame,
f"FPS: {round(fps,1)}",
(50, 80),
font, 1,
(255, 255, 0),
2,
cv2.LINE_4)
cv2.imshow("Capturing", frame)
key=cv2.waitKey(1)
if key == ord('q'):
break
video.release()
cv2.destroyAllWindows()
When I run the above code, I received the error stating that
prediction = prediction.cpu()
AttributeError: 'tuple' object has no attribute 'cpu'
Since my code was build for standard classification and not bounding box, the argmax function is not working and I need help on how to change it. Thank you

make multiple parallel predictions on tensorflow model

I want to make multiple predictions.
I have trained a segmentation model (images and masks) . You can find the model here.
The images have dimensions (32,32,3). The masks (32, 32).
What I am doing when I want to inference is:
Load the images array (tiles) with dim (62500, 32, 32, 3). You can find it here
Create tensorflow dataset from this array.
and then predict on each image, like:
masks = []
for k, element in enumerate(the_image_array):
the_img = np.asarray(np.expand_dims(element, 0))[-1, -1, :, :]
pred = model.predict(the_img[np.newaxis, :, :, :])[0]
mask = tf.where(pred > 0.5, 255, 0)
masks.append(mask)
Now, I want to do these predictions in parallel.
So, I tried:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.models import load_model
from itertools import chain
from tensorflow.keras import backend as K
import multiprocessing
from multiprocessing import Pool
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
multiprocessing.set_start_method('spawn', force=True)
model = load_model('./model.h5',
custom_objects={"K": K})
def resize_and_rescale(image):
image = tf.image.resize(image,
(32, 32),
preserve_aspect_ratio=True)
image /= 255.0
return image
def prepare(ds):
ds = ds.map(resize_and_rescale)
return ds
def _apply_df(data):
img = np.asarray(np.expand_dims(data, 0))[-1,-1, :, :]
print(img.shape)
pred = model.predict(img[np.newaxis, :, :, :], verbose=2)[0]
#pred = model.predict(data)[0]
mask = tf.where(pred[:, :, -1] > 0.5, 255, 0)
return mask
def apply_by_multiprocessing(data, workers):
pool = Pool(processes=workers)
#result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))
result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))
pool.close()
return list(result)
def after_prepare(data):
tens_data = tf.data.Dataset.from_tensor_slices(data)
tens_data = prepare(tens_data)
return tens_data
def main():
tiles = np.load('tiles.npy')
print(len(tiles))
print(tiles[0].shape)
prep = after_prepare(tiles)
print(len(prep))
masks = apply_by_multiprocessing(prep, workers=4)
masks_flatten = list(chain.from_iterable(masks))
print(len(masks_flatten), masks_flatten[0].shape) #
return masks_flatten
if __name__=="__main__":
masks_flatten = main()
The len(masks_flatten) is 128 and the shape of an element is (32,).
I would expect it to be len=62500 and every element (mask) (32, 32).
--- UPDATE ---
So, I want something like this:
def _apply_df(data):
results = []
for el in data:
pred = model.predict(el[np.newaxis, :, :, :], verbose=2)[0]
mask = tf.where(pred[:, :, -1] > 0.5, 255, 0)
results.append(mask)
return results
but without using the loop. Doing it in parallel.
Your approach is not incorrect, but even inside a single worker, it's better to let the TensorFlow/NumPy vectorization do its job instead of writing an explicit for loop:
def _apply_df(data):
pred = model.predict(data)
mask = tf.where(pred.squeeze(axis=-1) > 0.5, 255, 0)
return mask
This is the complete code:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.models import load_model
from itertools import chain
from tensorflow.keras import backend as K
import multiprocessing
from multiprocessing import Pool
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
multiprocessing.set_start_method("spawn", force=True)
model = load_model("./model.h5", custom_objects={"K": K})
def resize_and_rescale(image):
image = tf.image.resize(image, (32, 32), preserve_aspect_ratio=True)
image /= 255.0
return image
def prepare(ds):
ds = ds.map(resize_and_rescale)
return ds
def _apply_df(data):
pred = model.predict(data)
mask = tf.where(pred.squeeze(axis=-1) > 0.5, 255, 0)
return mask
def apply_by_multiprocessing(data, workers):
pool = Pool(processes=workers)
# result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))
result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))
pool.close()
return list(result)
def after_prepare(data):
tens_data = tf.data.Dataset.from_tensor_slices(data)
tens_data = prepare(tens_data)
return tens_data
def main():
tiles = np.load("tiles.npy")
prep = after_prepare(tiles)
masks = apply_by_multiprocessing(prep, workers=4)
masks_flatten = list(chain.from_iterable(masks))
print(len(masks_flatten), masks_flatten[0].shape) # 62500 (32, 32)
return masks_flatten
if __name__ == "__main__":
masks_flatten = main()

ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` when using custom callback to plot conv layer feature maps

I'm trying to implement a custom callback to get the feature maps of each Conv2D layer in the network plotted in TensorBoard.
When I run the code in Example 1 I get the following error:
<ipython-input-44-b691dabedd05> in on_epoch_end(self, epoch, logs)
28
29 # 3) Build partial model
---> 30 partial_model = keras.Model(
31 inputs=self.model.model.input,
32 outputs=output_layers
ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.engine.base_layer.Layer object at 0x000002773C631CA0>
which seams as if it can't build the partial network, which is strange, because it succeeds when running is separately from the main thread.
Here is an example that illustrates the issue:
Example 1
import os
import io
import datetime as dt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt
'''
You can adjust the verbosity of the logs which are being printed by TensorFlow
by changing the value of TF_CPP_MIN_LOG_LEVEL:
0 = all messages are logged (default behavior)
1 = INFO messages are not printed
2 = INFO and WARNING messages are not printed
3 = INFO, WARNING, and ERROR messages are not printed
'''
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
DEBUG = False
class ConvModel(keras.Model):
def __init__(self, input_shape):
super().__init__()
self.input_image_shape = input_shape
self.model = keras.Sequential([
layers.Input(shape=input_shape),
layers.Conv2D(32, 3),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(64, 5),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(128, 3, kernel_regularizer=keras.regularizers.l2(0.01)),
layers.BatchNormalization(),
layers.ReLU(),
layers.Flatten(),
layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
layers.Dropout(0.5),
layers.Dense(10)
])
def call(self, inputs):
return self.model(inputs)
def find_sub_string(string: str, sub_string: str):
return True if string.find(sub_string) > -1 else False
def get_file_type(file_name: str):
file_type = None
if isinstance(file_name, str):
dot_idx = file_name.find('.')
if dot_idx > -1:
file_type = file_name[dot_idx + 1:]
return file_type
def get_image_from_figure(figure):
buffer = io.BytesIO()
plt.savefig(buffer, format='png')
plt.close(figure)
buffer.seek(0)
image = tf.image.decode_png(buffer.getvalue(), channels=4)
image = tf.expand_dims(image, 0)
return image
class ConvLayerVis(keras.callbacks.Callback):
def __init__(self, X, figure_configs: dict, log_dir: str, log_interval: int):
super().__init__()
self.X_test = X
n_dims = len(self.X_test.shape)
assert 2 < n_dims < 5, f'The shape of the test image should be less than 5 and grater than 2, but current shape is {self.X_test.shape}'
# In case the image is not represented as a tensor - add a dimension to the left for the batch
if len(self.X_test.shape) < 4:
self.X_test = np.reshape(self.X_test, (1,) + self.X_test.shape)
self.file_writer = tf.summary.create_file_writer(log_dir)
self.figure_configs = figure_configs
self.log_interval = log_interval
def on_training_begin(self, logs=None):
pass
def on_epoch_end(self, epoch, logs=None):
# 1) Get the layers
if epoch % self.log_interval == 0:
# 1) Get the layers
output_layer_tuples = [(idx, layer) for idx, layer in enumerate(self.model.model.layers) if find_sub_string(layer.name, 'conv2d') or find_sub_string(layer.name, 'max_pooling2d')]
output_layers = [layer_tuple[1] for layer_tuple in output_layer_tuples]
# 2) Get the layer names
conv_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Conv 2D ') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'conv2d')]
max_pool_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Max Pooling 2D') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'max_pooling2d')]
layer_name_tuples = (conv_layer_name_tuples + max_pool_layer_name_tuples)
layer_name_tuples.sort(key=lambda x: x[0])
layer_names = [layer_name_tuple[1] for layer_name_tuple in layer_name_tuples]
# 3) Build partial model
partial_model = keras.Model(
inputs=model.model.input,
outputs=output_layers
)
# 4) Get the feature maps
feature_maps = partial_model.predict(self.X_test)
# 5) Plot
rows, cols = self.figure_configs.get('rows'), self.figure_configs.get('cols')
for feature_map, layer_name in zip(feature_maps, layer_names):
fig, ax = plt.subplots(rows, cols, figsize=self.figure_configs.get('figsize'))
for row in range(rows):
for col in range(cols):
ax[row][col].imshow(feature_map[0, :, :, row+col], cmap=self.figure_configs.get('cmap'))
fig.suptitle(f'{layer_name}')
with self.file_writer.as_default():
tf.summary.image(f'{layer_name} Feature Maps', get_image_from_figure(figure=fig), step=epoch)
if __name__ == '__main__':
print(tf.config.list_physical_devices('GPU'))
# Load the data
(X, y), (X_test, y_test) = cifar10.load_data()
X, X_test = X.astype(np.float32) / 255.0, X_test.astype(np.float32) / 255.0
n, w, h, c = X.shape[0], X.shape[1], X.shape[2], X.shape[3]
n_test, w_test, h_test, c_test = X_test.shape[0], X_test.shape[1], X_test.shape[2], X_test.shape[3]
print(f'''
Dataset Stats:
Number of train images: {n}
Dimensions:
> Train:
width = {w}, height = {h}, channels = {c}
> Test:
width = {w_test}, height = {h_test}, channels = {c_test}
''')
# Model with keras.Sequential
model = ConvModel(input_shape=(w, h, c))
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(learning_rate=3e-4), metrics=['accuracy'])
log_dir = f'./logs/{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}'
callbacks = [
keras.callbacks.TensorBoard(
log_dir=log_dir,
write_images=True
),
ConvLayerVis(
X=X[0],
figure_configs=dict(rows=5, cols=5, figsize=(35, 35), cmap='gray'),
log_dir=f'{log_dir}/train',
log_interval=3
)
]
model.fit(
X,
y,
batch_size=64,
epochs=15,
callbacks=callbacks
)
Thanks in advance for any help regarding this issue.
Just figured out the problem:
output_layers = [layer_tuple[1].output for layer_tuple in output_layer_tuples]
Should have recovered the output attribute of each layer.

How to apply image transform to a list of images and maintain the right dimensions?

I'm using the Omniglot dataset, which is a set of 19,280 images, each which is 105 x 105 (grayscale).
I defined a custom Dataset class with the following transform:
class OmniglotDataset(Dataset):
def __init__(self, X, transform=None):
self.X = X
self.transform = transform
def __len__(self):
return self.X.shape[0]
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img = self.X[idx]
if self.transform:
img = self.transform(img)
return img
img_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
X_train.shape
(19280, 105, 105)
train_dataset = OmniglotDataset(X_train, transform=img_transform)
When I index a single image, it returns the right dimensions:
train_dataset[0].shape
torch.Size([1, 105, 105])
But when I index several images, it returns the dimensions in the wrong order (I expect 3 x 105 x 105):
train_dataset[[1,2,3]].shape
torch.Size([105, 3, 105])
You got the error because try apply transformation of single image to list:
A more convenient way to get a batch of any size is to use Dataloader:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
img_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
omniglot = datasets.Omniglot(root='./data', background=True, download=True, transform = img_transform)
data_loader = DataLoader(omniglot, shuffle=False, batch_size = 8)
for image_batch in data_loader:
# now image_batch contain first eight samples
print(image_batch.shape) # torch.Size([8, 1, 105, 105])
break
If you really need to get images in arbitrary order:
from operator import itemgetter
indexes = [1,3,5]
selected_samples = itemgetter(*b)(omniglot)

Input size (depth of inputs) must be accessible via shape inference, but saw value None error whaen trying to set tf.expand_dims axis to 0

I am trying to use 20 news groups data set available in sklearn to train a LSTM to do incremental learning (classification). I used the sklearn's TfidfVectorizer to pre-process the data. Then I turned the resulting sparse matrix into a numpy array before feeding it. After that when coding the below line:
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
It gave an error saying that the 'inputs_' should have 3 dimensions. so I used:
inputs_ = tf.expand_dims(inputs_, 0)
To expand the dimension. But when I do that i get the error:
ValueError: Input size (depth of inputs) must be accessible via shape
inference, but saw value None.
The shape of 'input_' is:
(1, 134410)
I already went through this post, but it did not help.
I cannot seem to understand how to solve this issue. Any help is much appreciated. Thank you in advance!
show below is my complete code:
import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
features = vectorizer.fit_transform(newsgroups_data.data)
lb = LabelBinarizer()
labels = np.reshape(newsgroups_data.target, [-1])
labels = lb.fit_transform(labels)
return features, labels
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot_error(errorplot, datapoint, numberOfWrongPreds):
errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
errorplot.autoscale(enable=True, axis='both', tight=None)
plt.draw()
def train_test():
features, labels = pre_process()
#Defining Hyperparameters
epochs = 1
lstm_layers = 1
batch_size = 1
lstm_size = 30
learning_rate = 0.003
print(lstm_size)
print(batch_size)
print(epochs)
#--------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
#getting dynamic batch size according to the input tensor size
# dynamic_batch_size = tf.shape(inputs_)[0]
#output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
#Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
inputs_ = tf.expand_dims(inputs_, 0)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
#hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
errorplot, = plt.plot([], [])
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x.toarray(),
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
print("Prediction: ", predictions)
print("Actual: ",y)
pred = np.array(predictions)
print(pred)
print(y)
if not ((pred==y).all()):
wrongPred += 1
if ii % 27 == 0:
plot_error(errorplot,ii,wrongPred)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred/len(labels)
print("ERROR RATE: ", errorRate )
if __name__ == '__main__':
train_test()
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
This error is given because you don't specify the size nor the amount of inputs.
I got the script working like this:
inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")
inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim, initial_state=initial_state)

Resources