Comparing maxVal's from OpenCV matchTemplate for dynamic template matching - python-3.x

I am trying to do some dynamic template matching with cv2.matchTemplate. The template is:
and the image I am matching it to is:
I am adjusting the size and angle of the template and then grabbing maxVal and maxLoc from cv2.matchTemplate and storing them in a dict with maxVal as the keys. I'm then grabbing max(data.keys()) as the "best" match from result = cv2.matchTemplate(image, resized_template, cv2.TM_CCORR_NORMED). When I loop through all of the keys and plot them on the image, there are some that find the mickey head much better than max(data.keys()).
This is the region for "best" match:
#Find best match from scores
best_match = max(scores.keys())
#Output image
image_copy = color_image.copy()
#Create rectangle around best match
cv2.rectangle(image_copy, (data[best_match][1][0], data[best_match][1][1]), (data[best_match][1][0] + data[best_match][0][0], data[best_match][1][1] + data[best_match][0][1]), (255, 255, 255), 2)
plt.imshow(image_copy)
But this region appears to locate it much more accurately:
image_copy = color_image.copy()
for n in list(data.keys())[69:70:1]:
#Create rectangle around best match
cv2.rectangle(image_copy, (data[n][1][0], data[n][1][1]), (data[n][1][0] + data[n][0][0], data[n][1][1] + data[n][0][1]), (255, 255, 255), 2)
cv2.imwrite('output_1.jpg', image_copy)
plt.imshow(image_copy)
Am I miss interpreting what the maxVal from cv2.matchTemplate actually represents? If so, how can I select the actual "best" region based on some value or criteria? Here's the full code:
#Import packages
import numpy as np
import matplotlib.pyplot as plt
import cv2
import imutils
import os
#Import template
template = cv2.imread(templates_dir + '\\' + os.listdir(templates_dir)[3])
gray_template = cv2.cvtColor(template, cv2.COLOR_RGB2GRAY)
#Import image
image = cv2.imread(images_dir + '\\' + os.listdir(images_dir)[5])
color_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
#Image dims
image_h = image.shape[1]
image_w = image.shape[0]
#Processing loop scaling the template
#Scores dict
scores = {}
#Data dict of structure (maxVal: (template dims, locs, angle))
data = {}
#Edge images
template = cv2.Canny(gray_template, 80, 200)
image = cv2.Canny(gray_image, 80, 200)
#Loop through different template scales
for scale in np.linspace(0.2, 1.5, 15):
#Loop through different template rotations
for angle in np.linspace(0, 360, 25)[:-1]:
#Rotate template
template_rotated = imutils.rotate(template, angle)
#Resize template
resized_template = imutils.resize(template_rotated, width = int(template_rotated.shape[0] * scale))
#Dims of resized template
template_h = resized_template.shape[1]
template_w = resized_template.shape[0]
#Break from loop if the template becomes bigger than the image
if template_h > image_h or template_w > image_w:
break
#Run template through image
result = cv2.matchTemplate(image, resized_template, cv2.TM_CCORR_NORMED)
#Get matching score and location
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
#Add correlation value and result to scores
scores[str(maxVal)] = result
#Add info to data
data[str(maxVal)] = ((template_h, template_w), (maxLoc[0], maxLoc[1]), angle)
#Find best match from scores
best_match = max(scores.keys())
#Output image
image_copy = color_image.copy()
#Create rectangle around best match
cv2.rectangle(image_copy, (data[best_match][1][0], data[best_match][1][1]), (data[best_match][1][0] + data[best_match][0][0], data[best_match][1][1] + data[best_match][0][1]), (0, 0, 255), 2)
plt.imshow(image_copy)

Related

Specific bounding box color

Can someone help me to modify this existing code to use different color for the bounding box i want to detect?
For example: If a person detect bounding box will be red and if animals or pets detect will be green and other object would be blue, been exploring for a week still no luck for modifying it if anyone can explain or help would be much appreciated. Thanks!
import os
import argparse
import cv2
import numpy as np
import sys
import glob
import importlib.util
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in', required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite', default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt', default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5)
parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir', default=None)
parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.', default=None)
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection', action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
use_TPU = args.edgetpu
IM_NAME = args.image
IM_DIR = args.imagedir
if (IM_NAME and IM_DIR):
print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.')
sys.exit()
if (not IM_NAME and not IM_DIR):
IM_NAME = 'test1.jpg'
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
if use_TPU:
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
CWD_PATH = os.getcwd()
if IM_DIR:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR)
images = glob.glob(PATH_TO_IMAGES + '/*')
elif IM_NAME:
PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME)
images = glob.glob(PATH_TO_IMAGES)
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
if labels[0] == '???':
del(labels[0])
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT, experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
for image_path in images:
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imH, imW, _ = image.shape
image_resized = cv2.resize(image_rgb, (width, height))
input_data = np.expand_dims(image_resized, axis=0)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
cv2.imshow('Object detector', image)
if cv2.waitKey(0) == ord('q'):
break
cv2.destroyAllWindows()
Basically what you want to do is make a dict where the key is the class and the value is a color in the same format that is here.
cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
Replace (10, 255, 0) with something like color_dict[classes[i]] and then you will be able to get a different color for each class.

Drawing bounding rectangle around the tumor cv2

I am working on a project which predicts that the MRI has tumor or not, now the next step is to draw a bounding rectangle around the tumor. I was able to extract the tumor from the MRI, now I want to get the opposite corners of the rectangle to bound the tumor in original figure.
EDIT:
For some of the MRI images the I cannot separate the the tumor from MRI, calculated the threshold using OTSU method seperately but its not working properly.
Thank you !
Computing threshold:
path=r"ImageProc\Y54.jpg"
img = cv.imread(path,0)
blur = cv.GaussianBlur(img,(5,5),0)
# find normalized_histogram, and its cumulative distribution function
hist = cv.calcHist([blur],[0],None,[256],[0,256])
hist_norm = hist.ravel()/hist.sum()
Q = hist_norm.cumsum()
bins = np.arange(256)
fn_min = np.inf
thresh = -1
for i in range(1,256):
p1,p2 = np.hsplit(hist_norm,[i]) # probabilities
q1,q2 = Q[i],Q[255]-Q[i] # cum sum of classes
if q1 < 1.e-6 or q2 < 1.e-6:
continue
b1,b2 = np.hsplit(bins,[i]) # weights
# finding means and variances
m1,m2 = np.sum(p1*b1)/q1, np.sum(p2*b2)/q2
v1,v2 = np.sum(((b1-m1)**2)*p1)/q1,np.sum(((b2-m2)**2)*p2)/q2
# calculates the minimization function
fn = v1*q1 + v2*q2
if fn < fn_min:
fn_min = fn
thresh = i
# find otsu's threshold value with OpenCV function
ret, otsu = cv.threshold(blur,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
print( "{} {}".format(thresh,ret) )
My progress so for through the code is:
import cv2
import matplotlib.pyplot as plt
def show_image(title, image):
cv2.imshow(title, image)
cv2.waitKey(0)
cv2.destroyAllWindows()
def show_image_plt(title, image, cmap = None):
plt.figure(title)
plt.imshow(image,cmap=cmap)
plt.axis('off')
plt.show()
def cvt_image_colorspace(image, colorspace = cv2.COLOR_BGR2GRAY):
return cv2.cvtColor(image, colorspace)
def median_filtering(image, kernel_size=3):
'''
:param image: grayscale image
:param kernel_size: kernel size should be odd number
:return: blurred image
'''
return cv2.medianBlur(image, kernel_size)
def apply_threshold(image, **kwargs):
'''
:param image: image object
:param kwargs: threshold parameters - dictionary
:return:
'''
threshold_method = kwargs['threshold_method']
max_value = kwargs['pixel_value']
threshold_flag = kwargs.get('threshold_flag', None)
if threshold_flag is not None:
ret, thresh1 = cv2.adaptiveThreshold(image, max_value, threshold_method,cv2.THRESH_BINARY,
kwargs['block_size'], kwargs['const'])
else:
ret, thresh1 = cv2.threshold(image, kwargs['threshold'], max_value, threshold_method)
return thresh1
def sobel_filter(img,x,y,kernel_size = 3):
return cv2.Sobel(img, cv2.CV_8U, x,y, ksize=kernel_size)
path=r"Imageproc\Y54.jpg"
image = cv2.imread(path, 1)
show_image('Original image', image)
#Step one - grayscale the image
grayscale_img = cvt_image_colorspace(image)
#show_image('Grayscaled image', grayscale_img)
#Step two - filter out image
median_filtered = median_filtering(grayscale_img,5)
#show_image('Median filtered', median_filtered)
#testing threshold function
bin_image = apply_threshold(median_filtered, **{"threshold" : 93,
"pixel_value" : 255,
"threshold_method" : cv2.THRESH_BINARY})
otsu_image = apply_threshold(median_filtered, **{"threshold" : 93,
"pixel_value" : 255,
"threshold_method" : cv2.THRESH_BINARY +
cv2.THRESH_OTSU})
#Step 3a - apply Sobel filter
img_sobelx = sobel_filter(median_filtered, 1, 0)
img_sobely = sobel_filter(median_filtered, 0, 1)
# Adding mask to the image
img_sobel = img_sobelx + img_sobely+grayscale_img
#show_image('Sobel filter applied', img_sobel)
#Step 4 - apply threshold
# Set threshold and maxValue
threshold = 160
maxValue = 255
# Threshold the pixel values
thresh = apply_threshold(img_sobel, **{"threshold" : 93,
"pixel_value" : 255,
"threshold_method" : cv2.THRESH_BINARY})
#show_image("Thresholded", thresh)
#Step 3b - apply erosion + dilation
#apply erosion and dilation to show only the part of the image having more intensity - tumor region
#that we want to extract
kernel=cv2.getStructuringElement(cv2.MORPH_RECT,(9,9))
erosion = cv2.morphologyEx(median_filtered, cv2.MORPH_ERODE, kernel)
#show_image('Eroded image', erosion)
dilation = cv2.morphologyEx(erosion, cv2.MORPH_DILATE, kernel)
#show_image('Dilatated image', dilation)
#Step 4 - apply thresholding
threshold = 160
maxValue = 255
# apply thresholding
new_thresholding = apply_threshold(dilation, **{"threshold" : 93,
"pixel_value" : 255,
"threshold_method" : cv2.THRESH_BINARY})
show_image('Threshold image after erosion + dilation', new_thresholding)
The output image for given MRI is:
I think the best way is to know where pixels are not black
pts = np.argwhere(new_thresholding>0)
y1,x1 = pts.min(axis=0)
y2,x2 = pts.max(axis=0)
new_thresholding_rect= cv2.rectangle(new_thresholding,(x1,y1),(x2,y2),(255,0,0),2)
show_image('Threshold image after erosion + dilation + Rectangle',new_thresholding_rect)

Detection of small object - aphids on plants

I'm currently trying to create a detector of aphids (green and rose) on plants but only using "classic" image processing technique (no neural network).
Here are an image I'm working on:
'aphids.jpg'
I'm working on a code (see below). If you apply it on the image you should have the plants alone. My problem is that I want to isolate the aphids that can be seen on the plants. There are a lot of them but I just want to detect the biggest or the more obvious.
On the code there is an "edges_detect" function I'm currently working on. One of the problem I have is that I can detect some of the aphids as contour but it will also take simple lines...
I tried to drop those line using the hierarchy of contour but it seems those line have inner contour so I can't easily delete them.
I also tried the adjust_gamma and contrast, but it doesn't give that much result.
I'm looking for more ideas. What would you try ?
Thank you in advance !
Here is the code:
import cv2
import numpy as np
import matplotlib.pyplot as plt
def adjust_gamma(image, gamma=1.0):
# build a lookup table mapping the pixel values [0, 255] to
# their adjusted gamma values
invGamma = 1.0 / gamma
table = np.array([((i / 255.0) ** invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
# apply gamma correction using the lookup table
return cv2.LUT(image, table)
def adjust_contrast(image,alpha=1.0,beta=0):
new = np.zeros(image.shape,image.dtype)
for y in range(image.shape[0]):
for x in range(image.shape[1]):
for c in range(image.shape[2]):
new[y,x,c] = np.clip(alpha*image[y,x,c]+beta,0,255)
return(new)
def img_process(img):
(h1, w1) = img.shape[:2]
center = (w1 / 2, h1 / 2)
blur = cv2.GaussianBlur(img.copy(),(5,5),0)
hsv = cv2.cvtColor(blur,cv2.COLOR_BGR2HSV)
#image = img.copy()
#Boundaries to separate plants from the image
l_bound = np.array([20,0,0])
h_bound = np.array([90,250,170])#green
mask = cv2.inRange(hsv,l_bound,h_bound)
res = cv2.bitwise_and(img,img,mask=mask)
#Find contour plants
cnt,_ = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
sort_cnt = sorted(cnt,key=cv2.contourArea,reverse=True)
cnt = [sort_cnt[i] for i in range(len(sort_cnt)) if cv2.contourArea(sort_cnt[i])>300]
cv2.drawContours(res, cnt, -1, (0,255,0), -1)
#Inverse mask to have only the plant in the image
mask2 = cv2.inRange(res,np.array([0,0,0]),np.array([250,250,250]))
mask2 = cv2.bitwise_not(mask2)
res2 = cv2.bitwise_and(img,img,mask=mask2)
#Augment bright/contrast
res2=res2*1.45
res2=res2.astype('uint8')
#Crop
res2 = res2[:-50,int(center[0]-300):int(center[0]+550)]
return res2
def edge_detec(img):
(h1, w1) = img.shape[:2]
center = (w1 / 2, h1 / 2)
blur = cv2.GaussianBlur(img.copy(),(5,5),0)
gray = cv2.cvtColor(blur,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,30,70,apertureSize = 3)
edges = edges[:-50,int(center[0]-300):int(center[0]+550)]
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
#edges = cv2.morphologyEx(edges, cv2.MORPH_GRADIENT, kernel)
cnt,hierarchy = cv2.findContours(edges,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnt = sorted(cnt,key=cv2.contourArea,reverse=True)
listArea = list(map(cv2.contourArea,cnt))
sort_cnt = [x for x in cnt if cv2.contourArea(x)>10]
cv2.drawContours(edges, sort_cnt, -1, (0,255,0), -1)
return edges,center,img
### Debut programme
img = cv2.imread('051.jpg')
while True:
##Put processing function here
img_mod = img_process(img)
cv2.imshow('img',img_mod)
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.destroyAllWindows()

Undefined name problem in camera calibration

I am using the same code that is provided by the OpenCv tutorial, it was working few weeks ago, today I was trying to run it is says that gray name is not defined!! can some one find me the error?
import numpy as np
#import matplotlib.pyplot as plt
import cv2
import glob
import os
def draw(img, corners, imgpts):
corner = tuple(corners[0].ravel())
img = cv2.line(img, corner, tuple(imgpts[0].ravel()), (255,0,0), 5)
img = cv2.line(img, corner, tuple(imgpts[1].ravel()), (0,255,0), 5)
img = cv2.line(img, corner, tuple(imgpts[2].ravel()), (0,0,255), 5)
return img
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((7*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:7].T.reshape(-1,2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
img_dir = "C:\\Hungary\\Biblography\\Rotating Solitary Wave\\My Work\\Final Work\\Experiment1111 \\Camera Calibration\\Image Processing\\chess"
data_path = os.path.join(img_dir,'*bmp')
images = glob.glob(data_path)
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv2.findChessboardCorners(gray, (7,7),None)
# If found, add object points, image points (after refining them)
if ret == True:
objpoints.append(objp)
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
imgpoints.append(corners2)
# Draw and display the corners
img = cv2.drawChessboardCorners(img, (7,7), corners2,ret)
cv2.imshow('img',img)
cv2.waitKey(500)
cv2.destroyAllWindows()
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape [::-1],None,None)
print('Rotation Vector, or the Angles For Each Photo: ', rvecs, '\n')
R = cv2.Rodrigues(rvecs[0])
print('The Rotation Matrix is: ', R)
print('Translation Vector: ', tvecs, '\n')
print(mtx, '\n')
print('Distortion Coefficients ', dist, '\n')
img = cv2.imread('00000274.bmp')
h, w = img.shape[:2]
newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
print('Camera Matrix', newcameramtx, '\n')
# undistort
dst = cv2.undistort(img, mtx, dist) #, None, newcameramtx)
p = np.ones_like(dst)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
# undistort
mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)
dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
cv2.imwrite('calibresult.png',dst)
mean_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)
mean_error += error
print("total error: ", mean_error/len(objpoints))
If you read the opencv document you will find that I did little changes on the code and it was working but today it is raising this error about the gray name is not defined!
Check your path once, and see if images is an empty list. In that case, for loop will not be executed where the gray variable is defined.

Template Matching: efficient way to create mask for minMaxLoc?

Template matching in OpenCV is great. And you can pass a mask to cv2.minMaxLoc so that you only search (sort of) in part of the image for the template you want. You can also use a mask at the matchTemplate operation, but this only masks the template.
I want to find a template and I want to be assured that this template is within some other region of my image.
Calculating the mask for minMaxLoc seems kind of heavy. That is, calculating an accurate mask feels heavy. If you calculate a mask the easy way, it ignores the size of the template.
Examples are in order. My input images are show below. They're a bit contrived. I want to find the candy bar, but only if it's completely inside the white circle of the clock face.
clock1
clock2
template
In clock1, the candy bar is inside the circular clock face and it's a "PASS". But in clock2, the candy bar is only partially inside the face and I want it to be a "FAIL". Here's a code sample for doing it the easy way. I use cv.HoughCircles to find the clock face.
import numpy as np
import cv2
img = cv2.imread('clock1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.imread('template.png')
t_h, t_w = template.shape[0:2] # template height and width
# find circle in gray image using Hough transform
circles = cv2.HoughCircles(gray, method = cv2.HOUGH_GRADIENT, dp = 1,
minDist = 150, param1 = 50, param2 = 70,
minRadius = 131, maxRadius = 200)
i = circles[0,0]
x0 = i[0]
y0 = i[1]
r = i[2]
# display circle on color image
cv2.circle(img,(x0, y0), r,(0,255,0),2)
# do the template match
result = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
# finally, here is the part that gets tricky. we want to find highest
# rated match inside circle and we'd like to use minMaxLoc
# make mask by drawing circle on zero array
mask = np.zeros(result.shape, dtype = np.uint8) # minMaxLoc will throw
# error w/o np.uint8
cv2.circle(mask, (x0, y0), r, color = 1, thickness = -1)
# call minMaxLoc
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result, mask = mask)
# draw found rectangle on img
if max_val > 0.4: # use 0.4 as threshold for finding candy bar
cv2.rectangle(img, max_loc, (max_loc[0]+t_w, max_loc[1]+t_h), (0,255,0), 4)
cv2.imwrite('output.jpg', img)
output using clock1
output using clock2
finds candy bar even
though part of it is outside circle
So to properly make a mask, I use a bunch of NumPy operations. I make four separate masks (one for each corner of the template bounding box) and then AND them together. I'm not aware of any convenience functions in OpenCV that would do the mask for me. I'm a little nervous that all of the array operations will be expensive. Is there a better way to do this?
h, w = result.shape[0:2]
# make arrays that hold x,y coords
grid = np.indices((h, w))
x = grid[1]
y = grid[0]
top_left_mask = np.hypot(x - x0, y - y0) - r < 0
top_right_mask = np.hypot(x + t_w - x0, y - y0) - r < 0
bot_left_mask = np.hypot(x - x0, y + t_h - y0) - r < 0
bot_right_mask = np.hypot(x + t_w - x0, y + t_h - y0) - r < 0
mask = np.logical_and.reduce((top_left_mask, top_right_mask,
bot_left_mask, bot_right_mask))
mask = mask.astype(np.uint8)
cv2.imwrite('mask.png', mask*255)
Here's what the "fancy" mask looks like:
Seems about right. It cannot be circular because of the template shape. If I run clock2.jpg with this mask I get:
It works. No candy bars are identified. But I wish I could do it in fewer lines of code...
EDIT:
I've done some profiling. I ran 100 cycles of the "easy" way and the "accurate" way and calculated frames per second (fps):
easy way: 12.7 fps
accurate way: 7.8 fps
so there is some price to pay for making the mask with NumPy. These tests were done on a relatively powerful workstation. It could get uglier on more modest hardware...
Method 1: 'mask' image before cv2.matchTemplate
Just for kicks, I tried to make my own mask of the image that I pass to cv2.matchTemplate to see what kind of performance I can achieve. To be clear, this isn't a proper mask -- I set all of the pixels to ignore to one color (black or white). This is to get around the fact only TM_SQDIFF and TM_CORR_NORMED support a proper mask.
#Alexander Reynolds makes a very good point in the comments that some care must be taken if the template image (the thing we're trying to find) has lots of black or lots of white. For many problems, we will know a priori what the template looks like and we can specify a white background or black background.
I use cv2.multiply, which seems to be faster than numpy.multiply. cv2.multiply has the added advantage that it automatically clips the results to the range 0 to 255.
import numpy as np
import cv2
import time
img = cv2.imread('clock1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.imread('target.jpg')
t_h, t_w = template.shape[0:2] # template height and width
mask_background = 'WHITE'
start_time = time.time()
for i in range(100): # do 100 cycles for timing
# find circle in gray image using Hough transform
circles = cv2.HoughCircles(gray, method = cv2.HOUGH_GRADIENT, dp = 1,
minDist = 150, param1 = 50, param2 = 70,
minRadius = 131, maxRadius = 200)
i = circles[0,0]
x0 = i[0]
y0 = i[1]
r = i[2]
# display circle on color image
cv2.circle(img,(x0, y0), r,(0,255,0),2)
if mask_background == 'BLACK': # black = 0, white = 255 on grayscale
mask = np.zeros(img.shape, dtype = np.uint8)
elif mask_background == 'WHITE':
mask = 255*np.ones(img.shape, dtype = np.uint8)
cv2.circle(mask, (x0, y0), r, color = (1,1,1), thickness = -1)
img2 = cv2.multiply(img, mask) # element wise multiplication
# values > 255 are truncated at 255
# do the template match
result = cv2.matchTemplate(img2, template, cv2.TM_CCOEFF_NORMED)
# call minMaxLoc
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# draw found rectangle on img
if max_val > 0.4:
cv2.rectangle(img, max_loc, (max_loc[0]+t_w, max_loc[1]+t_h), (0,255,0), 4)
fps = 100/(time.time()-start_time)
print('fps ', fps)
cv2.imwrite('output.jpg', img)
Profiling results:
BLACK background 12.3 fps
WHITE background 12.1 fps
Using this method has very little performance hit relative to 12.7 fps in original question. However, it has the drawback that it will still find templates that still stick over the edge a little bit. Depending on the exact nature of the problem, this may be acceptable in many applications.
Method 2: use cv2.boxFilter to create mask for minMaxLoc
In this technique, we start with a circular mask (as in OP), but then modify it with cv2.boxFilter. We change the anchor from default center of kernel to the top left corner (0, 0)
import numpy as np
import cv2
import time
img = cv2.imread('clock1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.imread('target.jpg')
t_h, t_w = template.shape[0:2] # template height and width
print('t_h, t_w ', t_h, ' ', t_w)
start_time = time.time()
for i in range(100):
# find circle in gray image using Hough transform
circles = cv2.HoughCircles(gray, method = cv2.HOUGH_GRADIENT, dp = 1,
minDist = 150, param1 = 50, param2 = 70,
minRadius = 131, maxRadius = 200)
i = circles[0,0]
x0 = i[0]
y0 = i[1]
r = i[2]
# display circle on color image
cv2.circle(img,(x0, y0), r,(0,255,0),2)
# do the template match
result = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
# finally, here is the part that gets tricky. we want to find highest
# rated match inside circle and we'd like to use minMaxLoc
# start to make mask by drawing circle on zero array
mask = np.zeros(result.shape, dtype = np.float)
cv2.circle(mask, (x0, y0), r, color = 1, thickness = -1)
mask = cv2.boxFilter(mask,
ddepth = -1,
ksize = (t_w, t_h),
anchor = (0,0),
normalize = True,
borderType = cv2.BORDER_ISOLATED)
# mask now contains values from zero to 1. we want to make anything
# less than 1 equal to zero
_, mask = cv2.threshold(mask, thresh = 0.9999,
maxval = 1.0, type = cv2.THRESH_BINARY)
mask = mask.astype(np.uint8)
# call minMaxLoc
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result, mask = mask)
# draw found rectangle on img
if max_val > 0.4:
cv2.rectangle(img, max_loc, (max_loc[0]+t_w, max_loc[1]+t_h), (0,255,0), 4)
fps = 100/(time.time()-start_time)
print('fps ', fps)
cv2.imwrite('output.jpg', img)
This code gives a mask identical to OP, but at 11.89 fps. This technique gives us more accuracy with slightly more performance hit than Method 1.

Resources