local binary pattern transform - pytorch

Here is my local binary pattern function:
def lbp(x):
imgUMat = np.float32(x)
gray = cv2.cvtColor(imgUMat, cv2.COLOR_RGB2GRAY)
radius = 2
n_points = 8 * radius
METHOD = 'uniform'
lbp = local_binary_pattern(gray, n_points, radius, METHOD)
lbp = torch.from_numpy(lbp).long()
return lbp
Here I call lbp function:
input_img = plt.imread(trn_fnames[31])
x = lbp(input_img)
When I use x.shape it is:
torch.Size([600, 600])
Sounds good!!!
But my problem is when I use transforms.Lambda(lbp) in my transform function, my output image is torch.Size([600])
tfms = transforms.Compose([
transforms.Lambda(lbp)])
train_ds = datasets.ImageFolder(trn_dir, transform = tfms)
(train_ds[0][0][0]).shape
torch.Size([600])!!! >>>> my problem
I need torch.Size([600, 600])
I also different ways such as this:
tfms = transforms.Compose([
transforms.Lambda(lbp),
transforms.ToPILImage(),
transforms.Resize((sz, sz))])
And I got this error:
TypeError: pic should be Tensor or ndarray. Got <class ‘torch.Tensor’>.
I also added
transforms.ToTensor()])
But still have the same error:
TypeError: pic should be Tensor or ndarray. Got <class ‘torch.Tensor’>.
I’ll appreciate to your comments please!
Thank you.

def lbp_transform(x):
radius = 2
n_points = 8 * radius
METHOD = 'uniform'
imgUMat = np.float32(x)
gray = cv2.cvtColor(imgUMat, cv2.COLOR_RGB2GRAY)
lbp = local_binary_pattern(gray, n_points, radius, METHOD)
lbp = torch.from_numpy(lbp).float()
return lbp
x = np.random.randn(600, 600, 3)
out = lbp_transform(x)
print(out.shape)
> torch.Size([600, 600])
tfms = transforms.Compose([
transforms.Lambda(lbp_transform),
transforms.ToPILImage(),
transforms.Resize((300, 300)),
transforms.ToTensor()
])
out = tfms(x)
print(out.shape)
> torch.Size([1, 300, 300])

Related

“Concatenate layer” problem when doing GRAD-CAM. How to overcome this in my custom functional model?

I am having problems with grad-cam. I would be grateful if anyone could help. my codes are here
https://www.kaggle.com/mervearmagan/gradcamproblem
Sorry, I couldn't fix the error I got
ValueError: Input 0 is incompatible with layer model_1: expected
shape=(None, 512, 512, 3), found shape=(512, 512, 3)
img = tf.keras.layers.Input(shape = IMG_SHAPE)
gender = tf.keras.layers.Input(shape=(1,))
base_model = tf.keras.applications.InceptionV3(input_shape = IMG_SHAPE, include_top = False, weights = 'imagenet')
cnn_vec=base_model(img)
cnn_vec = tf.keras.layers.GlobalAveragePooling2D()(cnn_vec)
cnn_vec = tf.keras.layers.Dropout(0.20)(cnn_vec)
gender_vec = tf.keras.layers.Dense(32,activation = 'relu')(gender)
features = tf.keras.layers.Concatenate(axis=-1)([cnn_vec,gender_vec])
dense_layer = tf.keras.layers.Dense(256,activation = 'relu')(features)
dense_layer = tf.keras.layers.Dropout(0.1)(dense_layer)
dense_layer = tf.keras.layers.Dense(128,activation = 'relu')(dense_layer)
dense_layer = tf.keras.layers.Dropout(0.1)(dense_layer)
dense_layer = tf.keras.layers.Dense(64,activation = 'relu')(dense_layer)
output_layer = tf.keras.layers.Dense(1, activation = 'linear')(dense_layer)
model = tf.keras.Model(inputs=[img,gender],outputs=output_layer`
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, classifier_layer_names):
last_conv_layer = model.get_layer(last_conv_layer_name)
last_conv_layer_model = tf.keras.Model(model.inputs, last_conv_layer.output)
classifier_input = tf.keras.layers.Input(shape=last_conv_layer.output.shape)
#classifier_input = tf.keras.layers.Input(shape=last_conv_layer.output.shape[1:])
x = classifier_input
for layer_name in classifier_layer_names:
x = model.get_layer(layer_name)(x)
classifier_model = tf.keras.Model(classifier_input, x)
with tf.GradientTape() as tape:
last_conv_layer_output =last_conv_layer_model(img_array)
#last_conv_layer_model(img_array)
tape.watch(last_conv_layer_output)
preds = classifier_model(last_conv_layer_output)
top_pred_index = tf.argmax(preds[0])
top_class_channel = preds[:, top_pred_index]
grads = tape.gradient(top_class_channel, last_conv_layer_output)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
last_conv_layer_output = last_conv_layer_output.numpy()[0]
pooled_grads = pooled_grads.numpy()
for i in range(pooled_grads.shape[-1]):
last_conv_layer_output[:, :, i] *= pooled_grads[i]
heatmap = np.mean(last_conv_layer_output, axis=-1)
heatmap = np.maximum(heatmap, 0) / np.max(heatmap)
return heatmap
last_conv_layer_name = 'global_average_pooling2d'
classifier_layer_names = ['dense_4']
img = get_input('4360.png' )
inputgender=tf.ones((1,1))
image=tf.reshape(img,(1,512,512,3))
heatmap = make_gradcam_heatmap([image,inputgender], model, last_conv_layer_name, classifier_layer_names)
When running the model remember to test the model using inputs in form:
model([tf.ones((1,512,512,3)),tf.ones((1,1))])
...in case where you input one image and one gender to the network. The first "1" in the tensors means the first "batch" of samples, and so on. That kind of input should give as a result like:
...which looks like OK at this stage. Go through your code and check this "stage" first and then go forward in your program.
This is a handy way to covert image in numpy array format to a tensor having a extra dimension in it, to be compatible with the neural network input:
#Advice how to convert image to format of tensor...
import tensorflow as tf
import numpy as np
#Download image...and suppose it has size 512x512,3...e.g. using PIL or whatever suitable library...
#image = Image.open('smile_or_not.png')
#Convert the image to numpy...here we simulate it because no real image was loaded...
image_np=np.random.rand(512,512,3)
#Let's see its shape...
print("Size of input image:",image_np.shape)
#And convert it to a tensor of shape (1,height,widht,3)
in_tensor_format=tf.reshape(image_np,(1,512,512,3))
print("...has a shape of: ", in_tensor_format.shape, "...when converted to tensor")

Hyperparameter optimization in pytorch (currently with sklearn GridSearchCV)

I use this(link) pytorch tutorial and wish to add the grid search functionality in it ,sklearn.model_selection.GridSearchCV (link), in order to optimize the hyper parameters. I struggle in understanding what X and Y in gs.fit(x,y) should be; per the documentation (link) x and y are supposed to have the following structure but I have trouble figuring out how to get these off the code. The output of the class PennFudanDataset returns img and target in a form that does not align with the X, Y I need.
Are n_samples, n_features within the following block of code or in the tutorial’s block regarding the model?
fit(X, y=None, *, groups=None, **fit_params)[source]
Run fit with all sets of parameters.
Parameters
Xarray-like of shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and n_features is the number of features.
yarray-like of shape (n_samples, n_output) or (n_samples,), default=None
Target relative to X for classification or regression; None for unsupervised learning.
Is there something else we could use instead that is easier to implement for this particular tutorial? I’ve read about ray tune(link), optuna(link) etc. but they seem more complex than that. I am currently also looking into scipy.optimize.brute(link) which seems simpler.
PennFundanDataset class:
import os
import numpy as np
import torch
from PIL import Image
class PennFudanDataset(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))
def __getitem__(self, idx):
# load images ad masks
img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
# convert the PIL Image into a numpy array
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)

numpy condition function for 2-D data

I have a synthetic dataset consisting of features (X) and labels (y) which is used for KMeans clustering using Python 3.8 and sklearn 0.22.2 and numpy 1.19.
X.shape, y.shape
# ((100, 2), (100,))
kmeans = KMeans(n_clusters = 3, init = 'random', n_init = 10, max_iter = 300)
# Train model on scaled features-
kmeans.fit(X)
After training KMeans on 'X', I want to replace the unique (continuous) values of 'X' with the cluster centers (discreet) obtained using KMeans.
for i in range(3):
print("cluster number {0} has center = {1}".format(i + 1, kmeans.cluster_centers_[i, :]))
'''
cluster number 1 has center = [-0.7869159 1.14173859]
cluster number 2 has center = [ 1.28010442 -1.04663318]
cluster number 3 has center = [-0.54654735 0.0054752 ]
'''
set(kmeans.labels_)
# {0, 1, 2}
One way I have of doing it is:
X[np.where(clustered_labels == 0)] = val[0,:]
X[np.where(clustered_labels == 1)] = val[1,:]
X[np.where(clustered_labels == 2)] = val[2,:]
Can I do it using np.select()?
cond = [clustered_labels == i for i in range(3)]
val = kmeans.cluster_centers_[:,:]
But on executing the code:
np.select(cond, val)
I get the following error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
----> 1 np.select(cond, val)
<array_function internals> in select(*args, **kwargs)
~/.local/lib/python3.8/site-packages/numpy/lib/function_base.py in
select(condlist, choicelist, default)
693 result_shape = condlist[0].shape
694 else:
--> 695 result_shape = np.broadcast_arrays(condlist[0], choicelist[0])[0].shape
696
697 result = np.full(result_shape, choicelist[-1], dtype)
<array_function internals> in broadcast_arrays(*args, **kwargs)
~/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py in
broadcast_arrays(subok, *args)
256 args = [np.array(_m, copy=False, subok=subok) for _m in args]
257
--> 258 shape = _broadcast_shape(*args)
259
260 if all(array.shape == shape for array in args):
~/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py in
_broadcast_shape(*args)
187 # use the old-iterator because np.nditer does not handle size 0 arrays
188 # consistently
--> 189 b = np.broadcast(*args[:32])
190 # unfortunately, it cannot handle 32 or more arguments directly
191 for pos in range(32, len(args), 31):
ValueError: shape mismatch: objects cannot be broadcast to a single
shape
Suggestions?
Thanks!
Somewhat cleaner way to do it (but very similar to your way) will be the following. Here's a simple example:
from sklearn.cluster import KMeans
import numpy as np
x1 = np.random.normal(0, 2, 100)
y1 = np.random.normal(0, 1, 100)
label1 = np.ones(100)
d1 = np.column_stack([x1, y1, label1])
x2 = np.random.normal(3, 1, 100)
y2 = np.random.normal(1, 2, 100)
label2 = np.ones(100) * 2
d2 = np.column_stack([x2, y2, label2])
x3 = np.random.normal(-3, 0.5, 100)
y3 = np.random.normal(0.5, 0.25, 100)
label3 = np.ones(100) * 3
d3 = np.column_stack([x3, y3, label3])
D = np.row_stack([d1, d2, d3])
np.random.shuffle(D)
X = D[:, :2]
y = D[:, 2]
print(f'X.shape = {X.shape}, y.shape = {y.shape}')
# X.shape = (300, 2), y.shape = (300,)
kmeans = KMeans(n_clusters = 3, init = 'random', n_init = 10, max_iter = 300)
# Train model on scaled features-
kmeans.fit(X)
preds = kmeans.predict(X)
X[preds==0] = kmeans.cluster_centers_[0]
X[preds==1] = kmeans.cluster_centers_[1]
X[preds==2] = kmeans.cluster_centers_[2]
Yet another way to accomplish the task is to use the np.put method instead of the assignment as follows:
np.put(X, preds==0, kmeans.cluster_centers_[0])
np.put(X, preds==1, kmeans.cluster_centers_[1])
np.put(X, preds==2, kmeans.cluster_centers_[2])
Frankly, I don't see a way to accomplish the task by the means of the np.select function, and I guess the way you do it is the best way, based on this answer.
Cheers.

Integrating two models into a single model ( Object classification and Gender classification & emotion recognition)

I have been dealing with a project on ML in which we are willing to build an offline application. So we are not using the API's for this project, instead we two models. One for Object classification and the other for Gender classification & emotion recognition. Now I have problem with integrating the two models into one. Both the models are in OpenCV.
Code for deep_learning_object_detecti*on
# USAGE
# python deep_learning_object_detection.py --image images/example_01.jpg \
#--prototxt MobileNetSSD_deploy.prototxt.txt --model MobileNetSSD_deploy.caffemodel
# import the necessary packages
import numpy as np
import argparse
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# load the input image and construct an input blob for the image
# by resizing to a fixed 300x300 pixels and then normalizing it
# (note: normalization is done via the authors of the MobileNet SSD
# implementation)
image = cv2.imread(args["image"])
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
print("[INFO] computing object detections...")
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the `detections`,
# then compute the (x, y)-coordinates of the bounding box for
# the object
idx = int(detections[0, 0, i, 1])
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# display the prediction
label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
print("[INFO] {}".format(label))
cv2.rectangle(image, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(image, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
# show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)
Code for Gender and emotion recognition
import sys
import argparse
import cv2
from keras.models import load_model
import numpy as np
from utils.datasets import get_labels
from utils.inference import detect_faces
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.inference import load_image
from utils.preprocessor import preprocess_input
# parameters for loading data and images
image_path = sys.argv[1]
detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml'
emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5'
gender_model_path = '../trained_models/gender_models/simple_CNN.81-0.96.hdf5'
emotion_labels = get_labels('fer2013')
gender_labels = get_labels('imdb')
font = cv2.FONT_HERSHEY_SIMPLEX
# hyper-parameters for bounding boxes shape
gender_offsets = (30, 60)
gender_offsets = (10, 10)
emotion_offsets = (20, 40)
emotion_offsets = (0, 0)
# loading models
face_detection = load_detection_model(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
gender_classifier = load_model(gender_model_path, compile=False)
# getting input model shapes for inference
emotion_target_size = emotion_classifier.input_shape[1:3]
gender_target_size = gender_classifier.input_shape[1:3]
# loading images
rgb_image = load_image(image_path, grayscale=False)
gray_image = load_image(image_path, grayscale=True)
gray_image = np.squeeze(gray_image)
gray_image = gray_image.astype('uint8')
faces = detect_faces(face_detection, gray_image)
for face_coordinates in faces:
x1, x2, y1, y2 = apply_offsets(face_coordinates, gender_offsets)
rgb_face = rgb_image[y1:y2, x1:x2]
x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets)
gray_face = gray_image[y1:y2, x1:x2]
try:
rgb_face = cv2.resize(rgb_face, (gender_target_size))
gray_face = cv2.resize(gray_face, (emotion_target_size))
except:
continue
rgb_face = preprocess_input(rgb_face, False)
rgb_face = np.expand_dims(rgb_face, 0)
gender_prediction = gender_classifier.predict(rgb_face)
gender_label_arg = np.argmax(gender_prediction)
gender_text = gender_labels[gender_label_arg]
gray_face = preprocess_input(gray_face, True)
gray_face = np.expand_dims(gray_face, 0)
gray_face = np.expand_dims(gray_face, -1)
emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face))
emotion_text = emotion_labels[emotion_label_arg]
if gender_text == gender_labels[0]:
color = (0, 0, 255)
else:
color = (255, 0, 0)
draw_bounding_box(face_coordinates, rgb_image, color)
draw_text(face_coordinates, rgb_image, gender_text, color, 0, -20, 1, 2)
draw_text(face_coordinates, rgb_image, emotion_text, color, 0, -50, 1, 2)
bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
cv2.imwrite('../images/predicted_test_image.png', bgr_image)
How to integrate these two models into a single model.
Thanks in advance.

Speed up predictions for Object Detection

I am struggling to get good FPS for my predictions. I am running my predictions on a Tesla K80 and I'd like to speed up my predictions by at least a factor of 20. Here is my code:
def load_detection_graph(PATH_TO_CKPT):
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return detection_graph
def load_image_into_numpy_array(image):
'''
convert image to numpy arrays
'''
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
def run_inference_for_single_image(image, graph, filename):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['filename'] = filename
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
def predict_image(TEST_IMAGE_PATHS, PATH_TO_CKPT, category_index, save_path):
detection_graph = load_detection_graph(PATH_TO_CKPT)
prediction_dict = defaultdict()
start_time = time.time()
for image_path in TEST_IMAGE_PATHS:
toc = time.time()
filename = image_path
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np, detection_graph, filename)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=1)
prediction_dict[filename] = output_dict
plt.figure(figsize=(8,6), dpi=100)
plt.imshow(image_np)
plt.savefig(save_path+'{}'.format(filename))
tic = time.time()
print('{0} saved in {1:.2f}sec'.format(filename, tic-toc))
end_time = time.time()
print('{0:.2f}min to predict all images'.format((end_time-start_time)/60))
with open('../predictions/predictions.pickle', 'wb') as f:
pickle.dump(prediction_dict, f)
return prediction_dict
Right now I am getting about 1.8 sec per detection. That includes saving image and drawing bounding boxes. I do not need to save image or draw bounding boxes, I just need the output_dict. Any advice on how to speed this up?
Session creation is the most costly operation, dont re-create it everytime, try to re-use the session object
check this - run_inference_for_single_image(image, graph) - Tensorflow, object detection
I observed that using skimage.io.imread() or cv2.imread() is pretty fast in loading images. These functions directly load images as numpy arrays. So you can skip "image = Image.open(image_path)" and "image_np = load_image_into_numpy_array(image)". Just make sure "image_tensor" in sess.run gets the correct dimension.
Also skimage or opencv are faster than matplotlib for saving images

Resources