Related
I am using tensorflow lite for object detection specifically the SSDLite-MobileNet-v2 object detection model from google. Along with object detection I am also performing color detection of that particular object using opencv.For that I am referring to this tutorial on youtube.
Simple color detection
Here first the frame read is converted into HSV format and the the HSV value of a pixel at the center of the object bounding box is found out and the color is estimated
For object detection I'm referring to the following GitHub code--
https://github.com/EdjeElectronics/TensorFlow-Lite-Object-Detection-on-Android-and-Raspberry-Pi
The code for object and color detection:-
class VideoStream:
"""Camera object that controls video streaming from the Picamera"""
def __init__(self,resolution=(640,480),framerate=30):
## Initialize the PiCamera and the camera image stream
self.stream = cv2.VideoCapture(0)
ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
ret = self.stream.set(3,resolution[0])
ret = self.stream.set(4,resolution[1])
## Read first frame from the stream
(self.grabbed, self.frame) = self.stream.read()
## Variable to control when the camera is stopped
self.stopped = False
def start(self):
##Start the thread that reads frames from the video stream
Thread(target=self.update,args=()).start()
return self
def update(self):
## Keep looping indefinitely until the thread is stopped
while True:
## If the camera is stopped, stop the thread
if self.stopped:
## Close camera resources
self.stream.release()
return
##Otherwise, grab the next frame from the stream
(self.grabbed, self.frame) = self.stream.read()
def read(self):
## Return the most recent frame
return self.frame
def stop(self):
## Indicate that the camera and thread should be stopped
self.stopped = True
## Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.5)
parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
default='1280x720')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
resW, resH = args.resolution.split('x')
imW, imH = int(resW), int(resH)
use_TPU = args.edgetpu
## Import TensorFlow libraries
## If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
## If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
## If using Edge TPU, assign filename for Edge TPU model
if use_TPU:
# If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
## Get path to current working directory
CWD_PATH = os.getcwd()
## Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
## Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
## Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
## Have to do a weird fix for label map if using the COCO "starter model" from
## https://www.tensorflow.org/lite/models/object_detection/overview
## First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
## Load the Tensorflow Lite model.
## If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
## Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
## Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
## Initialize video stream
videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
time.sleep(1)
while True:
## Start timer (for calculating frame rate)
t1 = cv2.getTickCount()
## Grab frame from video stream
frame1 = videostream.read()
## Acquire frame and resize to expected shape [1xHxWx3]
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
## Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
## Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
## Retrieve detection results
boxes = interpreter.get_tensor(output_details[0]['index'])[0] ## Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] ## Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] ## Confidence of detected objects
##num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected
## Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
## Get bounding box coordinates and draw box
## Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
## print(ymin,xmin,ymax,xmax)
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
## Draw label
object_name = labels[int(classes[i])]
print(object_name)
##colour detection
hsv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
height, width=(ymax-ymin),(xmax-xmin)
cx = int(width / 2)
cy = int(height / 2)
## Pick pixel value
pixel_center = hsv_frame[cy+ymin, cx+xmin]
hue_value = pixel_center[0]
sat_value = pixel_center[1]
val_value = pixel_center[2]
print(hue_value,sat_value,val_value)
color = "Undefined"
if hue_value>166 and val_value<95:
color="black"
elif hue_value < 5:
color = "RED"
elif hue_value < 22:
color = "ORANGE"
elif hue_value < 33:
color = "YELLOW"
elif hue_value < 67:
color = "GREEN"
elif hue_value < 117:
color = "BLUE"
elif hue_value < 144:
color = "VIOLET"
elif hue_value < 160:
color = "PINK"
else:
color = "RED"
print(color)
label = '%s: %d%%' % (object_name, int(scores[i]*100)) ## Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) ## Get font size
label_ymin = max(ymin, labelSize[1] + 10) ## Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) ## Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) ## Draw label text
## Draw framerate in corner of frame
cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
## All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
## Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
## Clean up
cv2.destroyAllWindows()
videostream.stop()
I am new to tensorflow lite. When I run this code it runs perfectly for the first iteration of the while loop and outputs-- (cup and the color orange) but then i get this error.
cup
20 215 197
ORANGE
Traceback (most recent call last):
File "D:\download\TensorFlow-Lite-Object-Detection-on-Android-and-Raspberry-Pi-master\TFLite_detection_webcam.py", line 188,
in <module>
interpreter.set_tensor(input_details[0]['index'],input_data)
File "C:\Users\admin\AppData\Local\Programs\Python\Python39\lib\site-packages\tensorflow\lite\python\interpreter.py", line 705, in set_tensor
self._interpreter.SetTensor(tensor_index, value)
ValueError: Cannot set tensor: Dimension mismatch. Got 198 but expected 300 for dimension 1 of input 175.
But if I remove the color detection code completely from the while loop (Colour detection code is mentioned below). It runs without any error and returns the object name
Color detection code in while loop:-
##colour detection
hsv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
height, width=(ymax-ymin),(xmax-xmin)
cx = int(width / 2)
cy = int(height / 2)
## Pick pixel value
pixel_center = hsv_frame[cy+ymin, cx+xmin]
hue_value = pixel_center[0]
sat_value = pixel_center[1]
val_value = pixel_center[2]
print(hue_value,sat_value,val_value)
color = "Undefined"
if hue_value>166 and val_value<95:
color="black"
elif hue_value < 5:
color = "RED"
elif hue_value < 22:
color = "ORANGE"
elif hue_value < 33:
color = "YELLOW"
elif hue_value < 67:
color = "GREEN"
elif hue_value < 117:
color = "BLUE"
elif hue_value < 144:
color = "VIOLET"
elif hue_value < 160:
color = "PINK"
else:
color = "RED"
print(color)
how do I solve this problem?
The error is suggesting that the dimensions are wrong.
The cause is that you overwrite the variables width, height with the output dimensions of the network which is not same as input_dimensions of the network hence it breaks.
Solution Change the name of variable width, height in your color detection code.
I am using Matterport Mask RCNN as my model and I'm trying to build my database for training. After much deliberation over the below problem, I think what I'm actually asking is how do I add more than one class (+ BG)?
I get the following AssertionError:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-21-c20768952b65> in <module>()
15
16 # display image with masks and bounding boxes
---> 17 display_instances(image, bbox, masks, class_ids/4, train_set.class_names)
/usr/local/lib/python3.6/dist-packages/mask_rcnn-2.1-py3.6.egg/mrcnn/visualize.py in display_instances(image, boxes, masks, class_ids, class_names, scores, title, figsize, ax, show_mask, show_bbox, colors, captions)
103 print("\n*** No instances to display *** \n")
104 else:
--> 105 assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
106
107 # If no axis is passed, create one and automatically call show()
AssertionError:
The problem appears to come from this mask.shape[-1] == class_ids.shape[0] resulting in False which should not be the case.
I have now traced it back to the masks.shape[-1] is 4 times the value of the class_id.shape[0] and I think this may have something to do with having 4 classes in the data. Unfortunately, I haven't worked out how to solve this problem.
# load the masks for an image
def load_mask(self, image_id):
# get details of image
info = self.image_info[image_id]
# define box file location
path = info['annotation']
# load XML
boxes, w, h = self.extract_boxes(path)
# create one array for all masks, each on a different channel
masks = zeros([h, w, len(boxes)], dtype='uint8')
# create masks
class_ids = list()
for i in range(len(boxes)):
box = boxes[i]
row_s, row_e = box[1], box[3]
col_s, col_e = box[0], box[2]
masks[row_s:row_e, col_s:col_e, i] = 1
class_ids.append(self.class_names.index('Resistor'))
class_ids.append(self.class_names.index('LED'))
class_ids.append(self.class_names.index('Capacitor'))
class_ids.append(self.class_names.index('Diode'))
return masks, asarray(class_ids, dtype='int32')
# load the masks and the class ids
mask, class_ids = train_set.load_mask(image_id)
print(mask, "and", class_ids)
# display image with masks and bounding boxes
display_instances(image, bbox, mask, class_ids, train_set.class_names)
There are a couple of modifications you need to do to add multiple classes:
1) In load dataset, add classes in self.add_class("class_name"), and, then the
last line is modified to add class_ids. #number of classes you have.
# load the dataset definitions
def load_dataset(self, dataset_dir, is_train=True):
# define one class
self.add_class("dataset", 1, "car")
self.add_class("dataset", 2, "rider")
# define data locations
images_dir = dataset_dir + '/images_mod/'
annotations_dir = dataset_dir + '/annots_mod/'
# find all images
for filename in listdir(images_dir):
# extract image id
image_id = filename[:-4]
# skip all images after 150 if we are building the train set
if is_train and int(image_id) >= 3000:
continue
# skip all images before 150 if we are building the test/val set
if not is_train and int(image_id) < 3000:
continue
img_path = images_dir + filename
ann_path = annotations_dir + image_id + '.xml'
# add to dataset
self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path, class_ids=[0,1,2])
2) Now, in extract boxes, you need to modify to find the object and then look for name and bounding box dimensions. In case you have 2 classes and your XML files contains those exact classes only then you need no to use the if statement to append co-ordinates to boxes. But if you want to consider less number of classes compared to classes available in XML files, then you need to add if statement. Otherwise, all the boxes will be considered as masks.
# extract bounding boxes from an annotation file
def extract_boxes(self, filename):
# load and parse the file
tree = ElementTree.parse(filename)
# get the root of the document
root = tree.getroot()
# extract each bounding box
boxes = list()
for box in root.findall('.//object'):
name = box.find('name').text
xmin = int(box.find('./bndbox/xmin').text)
ymin = int(box.find('./bndbox/ymin').text)
xmax = int(box.find('./bndbox/xmax').text)
ymax = int(box.find('./bndbox/ymax').text)
coors = [xmin, ymin, xmax, ymax, name]
if name=='car' or name=='rider':
boxes.append(coors)
# extract image dimensions
width = int(root.find('.//size/width').text)
height = int(root.find('.//size/height').text)
return boxes, width, height
3) Finally, in the load_mask, if-else statement needs to be added to append the boxes accordingly.
# load the masks for an image
def load_mask(self, image_id):
# get details of image
info = self.image_info[image_id]
# define box file location
path = info['annotation']
# load XML
boxes, w, h = self.extract_boxes(path)
# create one array for all masks, each on a different channel
masks = zeros([h, w, len(boxes)], dtype='uint8')
# create masks
class_ids = list()
for i in range(len(boxes)):
box = boxes[i]
row_s, row_e = box[1], box[3]
col_s, col_e = box[0], box[2]
if (box[4] == 'car'):
masks[row_s:row_e, col_s:col_e, i] = 1
class_ids.append(self.class_names.index('car'))
else:
masks[row_s:row_e, col_s:col_e, i] = 2
class_ids.append(self.class_names.index('rider'))
return masks, asarray(class_ids, dtype='int32')
In my case, I require 2 classes and there are numerous classes available in XML files. Using the above code, I got the following image:
If u want to train multiple classes you can use the following code..
In load dataset, add classes in self.add_class("class_name"), and, then the last line is modified to add class_ids. #number of classes you have.
# define classes
self.add_class("dataset", 1, "class1name")
self.add_class("dataset", 2, "class2name")
# define data locations
images_dir = dataset_dir + '/images/'
annotations_dir = dataset_dir + '/annots/'
# find all images
for filename in listdir(images_dir):
# extract image id
image_id = filename[:-4]
# skip bad images
if image_id in ['00090']:
continue
# skip all images after 150 if we are building the train set
if is_train and int(image_id) >= 150:
continue
# skip all images before 150 if we are building the test/val set
if not is_train and int(image_id) < 150:
continue
img_path = images_dir + filename
ann_path = annotations_dir + image_id + '.xml'
# add to dataset
self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path,class_ids=[0,1,2])
You don't need to modify anything in below function
def extract_boxes(self, filename):
# load and parse the file
tree = ElementTree.parse(filename)
# get the root of the document
root = tree.getroot()
# extract each bounding box
boxes = list()
for box in root.findall('.//bndbox'):
xmin = int(box.find('xmin').text)
ymin = int(box.find('ymin').text)
xmax = int(box.find('xmax').text)
ymax = int(box.find('ymax').text)
coors = [xmin, ymin, xmax, ymax]
boxes.append(coors)
# extract image dimensions
width = int(root.find('.//size/width').text)
height = int(root.find('.//size/height').text)
return boxes, width, height
3)In the below function "if i == 0" means the first bounding boxes.For multiple bounding boxes(i.e for multiple classes) use i == 1,i == 2 .....
# load the masks for an image
def load_mask(self, image_id):
# get details of image
info = self.image_info[image_id]
# define box file location
path = info['annotation']
# load XML
boxes, w, h = self.extract_boxes(path)
# create one array for all masks, each on a different channel
masks = zeros([h, w, len(boxes)], dtype='uint8')
# create masks
class_ids = list()
for i in range(len(boxes)):
box = boxes[i]
row_s, row_e = box[1], box[3]
col_s, col_e = box[0], box[2]
# print()
if i == 0:
masks[row_s:row_e, col_s:col_e, i] = 1
class_ids.append(self.class_names.index('class1name'))
else:
masks[row_s:row_e, col_s:col_e, i] = 2
class_ids.append(self.class_names.index('class2name'))
# return boxes[0],masks, asarray(class_ids, dtype='int32') to check the points
return masks, asarray(class_ids, dtype='int32')
I am currently working on an program that detects and matches fingerprints as part of a fingerprint sensor. After processing the image, I obtain key points using Harris Corner Detection. Then, using ORB feature extractor, I obtain descriptors in the form of an array.
Problem is the number of key points I get for two different images of the same fingerprint are different. Hence, the descriptor arrays obtained are also of different sizes.
Now I've used Hamming distances to measure the difference between the descriptor arrays of two images, and hence the difference between the fingerprints themselves. However, due to the different array sizes, I'm finding it difficult to set a threshold for all fingerprints.
def get_descriptors(img):
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
img = clahe.apply(img)
img = image_enhance.image_enhance(img) #for image-processing
img = numpy.array(img, dtype=numpy.uint8)
# Threshold
ret, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# Normalize to 0 and 1 range
img[img == 255] = 1
#Thinning
skeleton = skeletonize(img)
skeleton = numpy.array(skeleton, dtype=numpy.uint8)
skeleton = removedot(skeleton)
# Harris corners
harris_corners = cv2.cornerHarris(img, 3, 3, 0.04)
harris_normalized = cv2.normalize(harris_corners, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32FC1)
threshold_harris = 125
# Extract keypoints
keypoints = []
for x in range(0, harris_normalized.shape[0]):
for y in range(0, harris_normalized.shape[1]):
if harris_normalized[x][y] > threshold_harris:
keypoints.append(cv2.KeyPoint(y, x, 1))
# Define descriptor
orb = cv2.ORB_create()
# Compute descriptors
_, des = orb.compute(img, keypoints)
return (keypoints, des);
def main():
img1 = cv2.imread("C:/Users/Nimesh Shahdadpuri/Desktop/DMRC Intern/database/106_1.tif" , cv2.IMREAD_GRAYSCALE)
kp1, des1 = get_descriptors(img1)
#print (des1)
#print (des1.shape)
img2 = cv2.imread("C:/Users/Nimesh Shahdadpuri/Desktop/DMRC Intern/database/106_2.tif" , cv2.IMREAD_GRAYSCALE)
kp2, des2 = get_descriptors(img2)
#print (des2)
# Matching between descriptors
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches= bf.match(des1,des2)
matches = sorted(matches, key= lambda match:match.distance)
#print (len(matches))
# Plot keypoints
img4 = cv2.drawKeypoints(img1, kp1, outImage=None)
img5 = cv2.drawKeypoints(img2, kp2, outImage=None)
#f, axarr = plt.subplots(1,2)
print ("First Fingerprint")
#axarr[0].imshow(img4)
plt.imshow(img4)
plt.show()
print ("Second Fingerprint")
#axarr[1].imshow(img5)
plt.imshow(img5)
plt.show()
# Plot matches
img3 = cv2.drawMatches(img1, kp1, img2, kp2, matches, flags=2, outImg=None)
print ("All the matching points and the corresponding distances")
plt.imshow(img3)
plt.show()
# Calculate score
score = 0
for match in matches:
score += match.distance
score_threshold = 40
matchper= score/len(matches)
print(matchper)
if matchper < score_threshold:
print("Fingerprint matches.")
else:
print("Fingerprint does not match.")
I expect an efficient way to define a general threshold for all fingerprints. I would also like suggestions for an alternate approach to define and match the key points.
I'm trying to remove numbers which are laying inside the circular part of image, numbers are in black in color and background varies between red,yellow, blue and green.
I am using opencv to remove those numbers. I used a mask which extracts numbers from image, with help of cv2.inpaint tried to remove those numbers from images.
For my further analysis I required to have clear image. But my current approach gives distorted image and numbers are not completely removed.
I tried changing the threshold values, lowering will neglect numbers from dark shaded area such as from green and red.
import cv2
img = cv2.imread('scan_1.jpg')
mask = cv2.threshold(img,50,255,cv2.THRESH_BINARY_INV)[1][:,:,0]
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
dst = cv2.inpaint(img, mask, 5, cv2.INPAINT_TELEA)
cv2.imshow('dst',dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite('ost_1.jpg',dst)
Input images: a) scan_1.jpg
b) scan_2.jpg
Output images: a) ost_1.jpg
b) ost_2.jpg
Expected Image: Circles can ignored, but something similar to it is required.
Here is my attempt, a better/easier solution might be acquired if you do not care about preserving texts outside of your circle.
import cv2
import numpy as np
# connectivity method used for finding connected components, 4 vs 8
CONNECTIVITY = 4
# HSV threshold for finding black pixels
H_THRESHOLD = 179
S_THRESHOLD = 255
V_THRESHOLD = 150
# read image
img = cv2.imread("a1.jpg")
img_height = img.shape[0]
img_width = img.shape[1]
# save a copy for creating resulting image
result = img.copy()
# convert image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# found the circle in the image
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1.7, minDist= 100, param1 = 48, param2 = 100, minRadius=70, maxRadius=100)
# draw found circle, for visual only
circle_output = img.copy()
# check if we found exactly 1 circle
num_circles = len(circles)
print("Number of found circles:{}".format(num_circles))
if (num_circles != 1):
print("invalid number of circles found ({}), should be 1".format(num_circles))
exit(0)
# save center position and radius of found circle
circle_x = 0
circle_y = 0
circle_radius = 0
if circles is not None:
# convert the (x, y) coordinates and radius of the circles to integers
circles = np.round(circles[0, :]).astype("int")
for (x, y, radius) in circles:
circle_x, circle_y, circle_radius = (x, y, radius)
cv2.circle(circle_output, (circle_x, circle_y), circle_radius, (255, 0, 0), 4)
print("circle center:({},{}), radius:{}".format(x,y,radius))
# keep a median filtered version of image, will be used later
median_filtered = cv2.medianBlur(img, 21)
# Convert BGR to HSV
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# define range of black color in HSV
lower_val = np.array([0,0,0])
upper_val = np.array([H_THRESHOLD,S_THRESHOLD,V_THRESHOLD])
# Threshold the HSV image to get only black colors
mask = cv2.inRange(hsv, lower_val, upper_val)
# find connected components
components = cv2.connectedComponentsWithStats(mask, CONNECTIVITY, cv2.CV_32S)
# apply median filtering to found components
#centers = components[3]
num_components = components[0]
print("Number of found connected components:{}".format(num_components))
labels = components[1]
stats = components[2]
for i in range(1, num_components):
left = stats[i, cv2.CC_STAT_LEFT] - 10
top = stats[i, cv2.CC_STAT_TOP] - 10
width = stats[i, cv2.CC_STAT_WIDTH] + 10
height = stats[i, cv2.CC_STAT_HEIGHT] + 10
# iterate each pixel and replace them if
#they are inside circle
for row in range(top, top+height+1):
for col in range(left, left+width+1):
dx = col - circle_x
dy = row - circle_y
if (dx*dx + dy*dy <= circle_radius * circle_radius):
result[row, col] = median_filtered[row, col]
# smooth the image, may be necessary?
#result = cv2.blur(result, (3,3))
# display image(s)
cv2.imshow("img", img)
cv2.imshow("gray", gray)
cv2.imshow("found circle:", circle_output)
cv2.imshow("mask", mask)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result for a1:
I'm trying to write a code that identifies the eyes of parakeets. So far, i have managed to use a code that identifies the circles edges and got great results at a certain threshold. But i can't save the result image.
I've tried using imwrite('result.png', clone) to save the result at the end of the code, but when I run it I'm get TypeError: Expected cv::UMat for argument 'img'.
I need the clone image to be colored too, but I've no idea where to start.
import cv2
import numpy as np
import imutils
def nothing(x):
pass
# Load an image
img = cv2.imread('sample.png')
# Resize The image
if img.shape[1] > 600:
img = imutils.resize(img, width=600)
# Create a window
cv2.namedWindow('Treshed')
# create trackbars for treshold change
cv2.createTrackbar('Treshold','Treshed',0,255,nothing)
while(1):
# Clone original image to not overlap drawings
clone = img.copy()
# Convert to gray
gray = cv2.cvtColor(clone, cv2.COLOR_BGR2GRAY)
# get current positions of four trackbars
r = cv2.getTrackbarPos('Treshold','Treshed')
# Thresholding the gray image
ret,gray_threshed = cv2.threshold(gray,r,255,cv2.THRESH_BINARY)
# Blur an image
bilateral_filtered_image = cv2.bilateralFilter(gray_threshed, 5, 175, 175)
# Detect edges
edge_detected_image = cv2.Canny(bilateral_filtered_image, 75, 200)
# Find contours
contours, _= cv2.findContours(edge_detected_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contour_list = []
for contour in contours:
# approximte for circles
approx = cv2.approxPolyDP(contour,0.01*cv2.arcLength(contour,True),True)
area = cv2.contourArea(contour)
if ((len(approx) > 8) & (area > 30) ):
contour_list.append(contour)
# Draw contours on the original image
cv2.drawContours(clone, contour_list, -1, (255,0,0), 2)
# there is an outer boundary and inner boundary for each eadge, so contours double
print('Number of found circles: {}'.format(int(len(contour_list)/2)))
#Displaying the results
cv2.imshow('Objects Detected', clone)
cv2.imshow("Treshed", gray_threshed)
# ESC to break
k = cv2.waitKey(1) & 0xFF
if k == 27:
break
# close all open windows
cv2.destroyAllWindows()
I just tried this modification and it works flawlessly.
# ESC to break
k = cv2.waitKey(1) & 0xFF
if k == 27:
cv2.imwrite('result.png', clone)
break
There's either a misconception about when/where to call imwrite or something is messy with your python/opencv version. I ran it in pycharm using python 3.6.8 and opencv-python 4.0.0.21