Bounding box detection is off - python-3.x

Hi -
I am playing around with tensor flows object detection for raspberry pi. https://github.com/tensorflow/examples/blob/master/lite/examples/object_detection/raspberry_pi/README.md
My problem is, Tensorflow detects the object correctly. However, incorrectly gives coordinates to the detected object. It's driving me bananas. I can't figure it out.
PFB:
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--model', help='File path of .tflite file.', required=True)
parser.add_argument(
'--labels', help='File path of labels file.', required=True)
parser.add_argument(
'--threshold',
help='Score threshold for detected objects.',
required=False,
type=float,
default=0.4)
args = parser.parse_args()
np.set_printoptions(threshold=sys.maxsize)
labels = load_labels(args.labels)
interpreter = Interpreter(args.model)
interpreter.allocate_tensors()
_, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']
with picamera.PiCamera(
resolution=(CAMERA_WIDTH, CAMERA_HEIGHT), framerate=30) as camera:
camera.rotation = 180
camera.start_preview()
try:
print("works!!!")
stream = io.BytesIO()
annotator = Annotator(camera)
count = 0
for _ in camera.capture_continuous(
stream, format='jpeg', use_video_port=True):
stream.seek(0)
image = Image.open(stream).convert('RGB').resize(
(input_width, input_height), Image.ANTIALIAS)
#image = image.rotate(180, PIL.Image.NEAREST, expand = 1)
start_time = time.monotonic()
results = detect_objects(interpreter, image, args.threshold)
elapsed_ms = (time.monotonic() - start_time) * 1000
open_cv_image = np.array(image)
distance = us.get_distance()
print(distance)
print("*********************NUM PY DATA **********************")
print(results)
#print(type(image))
filename = "geeks"
filename += str(count)
filename += ".png"
annotator.clear()
print("Annotating!!")
print(annotator)
annotate_objects(annotator,results,labels,open_cv_image, count)
annotator.text([5, 0], '%.1fms' % (elapsed_ms))
annotator.update()
print(annotator)
count += 1
stream.seek(0)
stream.truncate()
finally:
camera.stop_preview()
if __name__ == '__main__':
main()
def annotate_objects(annotator, results, labels, npcv, count):
"""Draws the bounding box and label for each object in the results."""
window_name = 'Image'
image = npcv
filename="image"
filename+=str(count)
filename+=".jpg"
for obj in results:
# Convert the bounding box figures from relative coordinates
# to absolute coordinates based on the original resolution
ymin, xmin, ymax, xmax = obj['bounding_box']
xmin = int(xmin * CAMERA_WIDTH)
xmax = int(xmax * CAMERA_WIDTH)
ymin = int(ymin * CAMERA_HEIGHT)
ymax = int(ymax * CAMERA_HEIGHT)
if obj['score'] >= 0.60:
start_point = (xmin,ymin)
end_point = (xmax,ymax)
color = (255, 0, 0)
thickness = 20
image = cv2.rectangle(image, start_point, end_point, color, thickness)
font = cv2.FONT_HERSHEY_SIMPLEX
org = (xmin, ymin)
fontScale = 0.5
color = (255, 0, 0)
text = labels[obj['class_id']]
thickness = 2
image = cv2.putText(image, text , org, font,
fontScale, color, thickness, cv2.LINE_AA)
annotator.bounding_box([xmin, ymin, xmax, ymax])
annotator.text([xmin, ymin],
'%s\n%.2f' % (labels[obj['class_id']], obj['score']))
print(labels[obj['class_id']], obj['score'])
print(xmin, ymin, xmax, ymax)
cv2.imwrite(filename, image)

I can't see what's wrong off hand, but it seems like something with the coordinates systems since the lower-right corner of the box is out of frame, or maybe the resizing. I'd start by looking at these values - can you print these out:
CAMERA_WIDTH, CAMERA_HEIGHT
input_height, input_width
ymin, xmin, ymax, xmax
start_point #(xmin,ymin)
end_point #(xmax,ymax)
And then display the model input image:
image = Image.open(stream).convert('RGB').resize(
(input_width, input_height), Image.ANTIALIAS)
And then display the image you are using to draw the bounding box:
open_cv_image

Related

using openCV to open the webcam and take picture with it every five seconds

I tried to use the webcam and take pictures every 5 seconds via openCV but the cam itself didn't work and kept causing the error...
I also tried changing the integer in the cv2.VideoCapture() to -1 and 1 but still that didn't work.
This is the form of the error: "[ WARN:0] global /io/opencv/modules/videoio/src/cap_v4l.cpp (802)
open VIDEOIO ERROR: V4L: can't open camera by index 0
Traceback (most recent call last):
File "webcam_detect.py", line 176, in
raise IOError("Cannot open webcam")
OSError: Cannot open webcam"
import colorsys
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import cv2
import time
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import image_preporcess
class YOLO(object):
_defaults = {
#"model_path": 'logs/trained_weights_final.h5',
"model_path": 'model_data/yolo_weights.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"text_size" : 1,
}
#classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
is_tiny_version = num_anchors==6 # default setting
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = image_preporcess(np.copy(image), tuple(reversed(self.model_image_size)))
image_data = boxed_image
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.shape[0], image.shape[1]],#[image.size[1], image.size[0]],
K.learning_phase(): 0
})
#print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
thickness = (image.shape[0] + image.shape[1]) // 600
fontScale=1
ObjectsList = []
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
#label = '{}'.format(predicted_class)
scores = '{:.2f}'.format(score)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.shape[0], np.floor(bottom + 0.5).astype('int32'))
right = min(image.shape[1], np.floor(right + 0.5).astype('int32'))
mid_h = (bottom-top)/2+top
mid_v = (right-left)/2+left
# put object rectangle
cv2.rectangle(image, (left, top), (right, bottom), self.colors[c], thickness)
# get text size
(test_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, thickness/self.text_size, 1)
# put text rectangle
cv2.rectangle(image, (left, top), (left + test_width, top - text_height - baseline), self.colors[c], thickness=cv2.FILLED)
# put text above rectangle
cv2.putText(image, label, (left, top-2), cv2.FONT_HERSHEY_SIMPLEX, thickness/self.text_size, (0, 0, 0), 1)
# add everything to list
ObjectsList.append([top, left, bottom, right, mid_v, mid_h, label, scores])
return image, ObjectsList
def close_session(self):
self.sess.close()
def detect_img(self, image):
#image = cv2.imread(image, cv2.IMREAD_COLOR)
original_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
original_image_color = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
r_image, ObjectsList = self.detect_image(original_image_color)
return r_image, ObjectsList
if __name__=="__main__":
yolo = YOLO()
# set start time to current time
#start_time = time.time()
# displays the frame rate every 2 second
#display_time = 2
# Set primarry FPS to 0
#fps = 0
# we create the video capture object cap
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise IOError("Cannot open webcam")
cap.set(3, 640)
cap.set(4,480)
img_counter=0
frame_set=[]
start_time=time.time()
#if not cap.isOpened():
#raise IOError("We cannot open webcam")
#while True:
#ret, frame = cap.read()
# resize our captured frame if we need
#frame = cv2.resize(frame, None, fx=1.0, fy=1.0,
interpolation=cv2.INTER_AREA)
# detect object on our frame
#r_image, ObjectsList = yolo.detect_img(frame)
# show us frame with detection
#cv2.imshow("Web cam input", r_image)
#if cv2.waitKey(25) & 0xFF == ord("q"):
#cv2.destroyAllWindows()
#break
# calculate FPS
#fps += 1
#TIME = time.time() - start_time
#if TIME > display_time:
#print("FPS:", fps / TIME)
#fps = 0
#start_time = time.time()
while True:
ret, frame = cap.read()
frame = cv2.resize(frame, None, fx=1.0, fy=1.0,
interpolation=cv2.INTER_AREA)
r_image, ObjectsList=yolo.detect_img(frame)
cv2.imshow('Web cam input', r_image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if time.time() - start_time >= 5: #<---- Check if 5 sec passed
img_name = "opencv_frame_{}.png".format(img_counter)
cv2.imwrite(img_name, frame)
print("{} written!".format(img_counter))
start_time = time.time()
img_counter += 1
cap.release()
cv2.destroyAllWindows()
yolo.close_session()

Python OpenCv2 place image over face found

I am loading several images will go over my face and I am having difficulty getting the image to go over the square for face created. I have looked at a many resources , but for some reason I am receiving an error when attempting to follow their method.
Every time I do so , I receive an error
ValueError: could not broadcast input array from shape (334,334,3) into shape (234,234,3)
I think the images might be too large, however I tried to resize them to see if they will fit to no avail.
here is my code:
import cv2
import sys
import logging as log
import datetime as dt
from time import sleep
import os
import random
from timeit import default_timer as timer
cascPath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascPath)
#log.basicConfig(filename='webcam.log',level=log.INFO)
video_capture = cv2.VideoCapture(0)
anterior = 0
#s_img = cv2.imread("my.jpg")
increment = 0
for filename in os.listdir("Faces/"):
if filename.endswith(".png"):
FullFile = (os.path.join("Faces/", filename))
#ret, frame = video_capture.read()
frame = cv2.imread(FullFile, cv2.IMREAD_UNCHANGED)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale( gray,scaleFactor=1.1, minNeighbors=5, minSize=(30, 30) )
edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
for (x, y, w, h) in faces:
roi_color = frame[y:( y ) + ( h ), x:x + w]
status = cv2.imwrite('export/faces_detected'+ str( increment ) +'.png', roi_color)
increment = increment + 1
else:
continue
masks = []
for filename in os.listdir("export/"):
if filename.endswith(".png"):
FullFile = (os.path.join("export/", filename))
s_img = cv2.imread(FullFile)
masks.append(s_img)
Start = timer()
End = timer()
MasksSize = len(masks)
nrand = random.randint(0, MasksSize -1 )
increment = 0
while True:
if not video_capture.isOpened():
print('Unable to load camera.')
sleep(5)
pass
# Capture frame-by-frame
ret, frame = video_capture.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
if (End - Start) > 3:
Start = timer()
End = timer()
nrand = random.randint(0, MasksSize -1 )
# -75 and +20 added to fit my face
cv2.rectangle(frame, (x, y - 75), (x+w, y+h+20), (0, 255, 0), 2)
s_img = masks[nrand]
increment = increment + 1
#maskresize = cv2.resize(s_img, (150, 150))
#frame[y:y+s_img.shape[0] , x:x+s_img.shape[1]] = s_img # problem occurs here with
# ValueError: could not broadcast input array from shape (334,334,3) into shape (234,234,3)
# I assume I am inserting somethign too big?
End = timer()
if anterior != len(faces):
anterior = len(faces)
#log.info("faces: "+str(len(faces))+" at "+str(dt.datetime.now()))
# Display the resulting frame
cv2.imshow('Video', frame)
#cv2.imshow('Video', cartoon)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Display the resulting frame
cv2.imshow('Video', frame)
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
In the following line,
frame[y:y+s_img.shape[0] , x:x+s_img.shape[1]] = s_img
you are trying to attempt to assign s_img to frame[y:y+s_img.shape[0] , x:x+s_img.shape[1]] which are of different shapes.
You can check the shapes of the two by printing the shape (it will be the same as the shapes mentioned in the error).
Try reshaping s_img to the same shape and then try to assign.
Refer to this link:https://www.geeksforgeeks.org/image-resizing-using-opencv-python/
I used this function to resize the image to scale.
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
Then later on called
r= image_resize(s_img, height = h, width=w)
frame[y:y+r.shape[0] , x:x+r.shape[1]] = r
Answer taken from here too:
Resize an image without distortion OpenCV

Undefined name problem in camera calibration

I am using the same code that is provided by the OpenCv tutorial, it was working few weeks ago, today I was trying to run it is says that gray name is not defined!! can some one find me the error?
import numpy as np
#import matplotlib.pyplot as plt
import cv2
import glob
import os
def draw(img, corners, imgpts):
corner = tuple(corners[0].ravel())
img = cv2.line(img, corner, tuple(imgpts[0].ravel()), (255,0,0), 5)
img = cv2.line(img, corner, tuple(imgpts[1].ravel()), (0,255,0), 5)
img = cv2.line(img, corner, tuple(imgpts[2].ravel()), (0,0,255), 5)
return img
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((7*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:7].T.reshape(-1,2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
img_dir = "C:\\Hungary\\Biblography\\Rotating Solitary Wave\\My Work\\Final Work\\Experiment1111 \\Camera Calibration\\Image Processing\\chess"
data_path = os.path.join(img_dir,'*bmp')
images = glob.glob(data_path)
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv2.findChessboardCorners(gray, (7,7),None)
# If found, add object points, image points (after refining them)
if ret == True:
objpoints.append(objp)
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
imgpoints.append(corners2)
# Draw and display the corners
img = cv2.drawChessboardCorners(img, (7,7), corners2,ret)
cv2.imshow('img',img)
cv2.waitKey(500)
cv2.destroyAllWindows()
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape [::-1],None,None)
print('Rotation Vector, or the Angles For Each Photo: ', rvecs, '\n')
R = cv2.Rodrigues(rvecs[0])
print('The Rotation Matrix is: ', R)
print('Translation Vector: ', tvecs, '\n')
print(mtx, '\n')
print('Distortion Coefficients ', dist, '\n')
img = cv2.imread('00000274.bmp')
h, w = img.shape[:2]
newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
print('Camera Matrix', newcameramtx, '\n')
# undistort
dst = cv2.undistort(img, mtx, dist) #, None, newcameramtx)
p = np.ones_like(dst)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
# undistort
mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)
dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
cv2.imwrite('calibresult.png',dst)
mean_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)
mean_error += error
print("total error: ", mean_error/len(objpoints))
If you read the opencv document you will find that I did little changes on the code and it was working but today it is raising this error about the gray name is not defined!
Check your path once, and see if images is an empty list. In that case, for loop will not be executed where the gray variable is defined.

Resizing image and its bounding box

I have an image with bounding box in it, and I want to resize the image.
img = cv2.imread("img.jpg",3)
x_ = img.shape[0]
y_ = img.shape[1]
img = cv2.resize(img,(416,416));
Now I want to calculate the scale factor:
x_scale = ( 416 / x_)
y_scale = ( 416 / y_ )
And draw an image, this is the code for the original bounding box:
( 128, 25, 447, 375 ) = ( xmin,ymin,xmax,ymax)
x = int(np.round(128*x_scale))
y = int(np.round(25*y_scale))
xmax= int(np.round (447*(x_scale)))
ymax= int(np.round(375*y_scale))
However using this I get:
While the original is:
I don't see any flag in this logic, what's wrong?
Whole code:
imageToPredict = cv2.imread("img.jpg",3)
print(imageToPredict.shape)
x_ = imageToPredict.shape[0]
y_ = imageToPredict.shape[1]
x_scale = 416/x_
y_scale = 416/y_
print(x_scale,y_scale)
img = cv2.resize(imageToPredict,(416,416));
img = np.array(img);
x = int(np.round(128*x_scale))
y = int(np.round(25*y_scale))
xmax= int(np.round (447*(x_scale)))
ymax= int(np.round(375*y_scale))
Box.drawBox([[1,0, x,y,xmax,ymax]],img)
and drawbox
def drawBox(boxes, image):
for i in range (0, len(boxes)):
cv2.rectangle(image,(boxes[i][2],boxes[i][3]),(boxes[i][4],boxes[i][5]),(0,0,120),3)
cv2.imshow("img",image)
cv2.waitKey(0)
cv2.destroyAllWindows()
The image and the data for the bounding box are loaded separately. I am drawing the bounding box inside the image. The image does not contain the box itself.
I believe there are two issues:
You should swap x_ and y_ because shape[0] is actually y-dimension and shape[1] is the x-dimension
You should use the same coordinates on the original and scaled image. On your original image the rectangle is (160, 35) - (555, 470) rather than (128,25) - (447,375) that you use in the code.
If I use the following code:
import cv2
import numpy as np
def drawBox(boxes, image):
for i in range(0, len(boxes)):
# changed color and width to make it visible
cv2.rectangle(image, (boxes[i][2], boxes[i][3]), (boxes[i][4], boxes[i][5]), (255, 0, 0), 1)
cv2.imshow("img", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
def cvTest():
# imageToPredict = cv2.imread("img.jpg", 3)
imageToPredict = cv2.imread("49466033\\img.png ", 3)
print(imageToPredict.shape)
# Note: flipped comparing to your original code!
# x_ = imageToPredict.shape[0]
# y_ = imageToPredict.shape[1]
y_ = imageToPredict.shape[0]
x_ = imageToPredict.shape[1]
targetSize = 416
x_scale = targetSize / x_
y_scale = targetSize / y_
print(x_scale, y_scale)
img = cv2.resize(imageToPredict, (targetSize, targetSize));
print(img.shape)
img = np.array(img);
# original frame as named values
(origLeft, origTop, origRight, origBottom) = (160, 35, 555, 470)
x = int(np.round(origLeft * x_scale))
y = int(np.round(origTop * y_scale))
xmax = int(np.round(origRight * x_scale))
ymax = int(np.round(origBottom * y_scale))
# Box.drawBox([[1, 0, x, y, xmax, ymax]], img)
drawBox([[1, 0, x, y, xmax, ymax]], img)
cvTest()
and use your "original" image as "49466033\img.png",
I get the following image
And as you can see my thinner blue line lies exactly inside your original red line and it stays there whatever targetSize you chose (so the scaling actually works correctly).
Another way of doing this is to use CHITRA
image = Chitra(img_path, box, label)
# Chitra can rescale your bounding box automatically based on the new image size.
image.resize_image_with_bbox((224, 224))
print('rescaled bbox:', image.bounding_boxes)
plt.imshow(image.draw_boxes())
https://chitra.readthedocs.io/en/latest/
pip install chitra
I encountered an issue with bounding box coordinates in Angular when using TensorFlow.js and MobileNet-v2 for prediction. The coordinates were based on the resolution of the video frame.
but I was displaying the video on a canvas with a fixed height and width. I resolved the issue by dividing the coordinates by the ratio of the original video resolution to the resolution of the canvas.
const x = prediction.bbox[0] / (this.Owidth / 300);
const y = prediction.bbox[1] / (this.Oheight / 300);
const width = prediction.bbox[2] / (this.Owidth / 300);
const height = prediction.bbox[3] / (this.Oheight / 300);
// Draw the bounding box.
ctx.strokeStyle = '#99ff00';
ctx.lineWidth = 2;
ctx.strokeRect(x, y, width, height);
this.Owidth & this.Oheight are original resolution of video. it is obtained by.
this.video.addEventListener(
'loadedmetadata',
(e: any) => {
this.Owidth = this.video.videoWidth;
this.Oheight = this.video.videoHeight;
console.log(this.Owidth, this.Oheight, ' pixels ');
},
false
);
300 X 300 is my static canvas width and height.
you can use the resize_dataset_pascalvoc
it's easy to use python3 main.py -p <IMAGES_&_XML_PATH> --output <IMAGES_&_XML> --new_x <NEW_X_SIZE> --new_y <NEW_X_SIZE> --save_box_images <FLAG>"
It resize all your dataset and rewrite new annotations files to resized images

Reading a Meter using OpenCV

I am trying to read values on my electricity meter LCD display using opencv, From my picture I am able to find meter using HoughCircles method, I am able to find LCD display on meter using contours, the lcd display isn't so clear so again I search for contours to extract digits from display. Now I am unable to read values on the display using tesseract or ssocr, how can i read the values on LCD display. I just started using opencv (Beginner), don't know the right way to go from here and if my approach is correct, would appreciate any help. Below is my code snippet and the meter images links are in comments.
def process_image(path, index):
img = cv2.imread(path)
img = cv2.resize(img,(0,0),fx=2.0,fy=2.0)
height, width, depth = img.shape
print("\n---------------------------------------------\n")
print("In Process Image Path is %s height is %d Width is %d depth is %d" %(path, height, width, depth))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 15)
circles = cv2.HoughCircles(blur, cv2.HOUGH_GRADIENT,1.2,100)
# ensure at least one circles is found, which is our meter
if circles is not None:
circles = np.uint16(np.around(circles))
print("Meter Found")
for i in circles[0]:
CenterX = i[0]
CenterY = i[1]
Radius = i[2]
circle_img = np.zeros((height, width), np.uint8)
cv2.circle(circle_img, (CenterX, CenterY), Radius, 1, thickness=-1)
masked_data = cv2.bitwise_and(img, img, mask=circle_img)
output = masked_data.copy()
cv2.circle(output, (i[0], i[1]), i[2], (0, 255, 0), 2)
cv2.circle(output, (i[0], i[1]), 2, (0, 0, 255), 3)
cv2.imwrite("output_" + str(index) + ".jpg", output)
break
gray = cv2.cvtColor(output, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray,(5,5),1)
edged = cv2.Canny(blurred, 5,10,200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
contour_list = []
# loop over the contours
for c in cnts:
# approximate the contour
peri = 0.02 * cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c,peri, True)
# if the contour has four vertices, then we have found
# the meter display
if len(approx) == 4:
contour_list.append(c)
cv2.contourArea(c)
displayCnt = approx
break
warped = four_point_transform(gray, displayCnt.reshape(4, 2))
output = four_point_transform(output, displayCnt.reshape(4, 2))
thresh = cv2.adaptiveThreshold(warped, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 31, 2)
cnts = cv2.findContours(thresh.copy(), cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
digitCnts = []
# loop over the digit area candidates
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# if the contour is sufficiently large, it must be a digit
if (w > 5 and w < 100) and (h >= 15 and h <= 150) :
digitCnts.append(c)
# sort the contours from left-to-right, then initialize the
# actual digits themselves
digitCnts = contours.sort_contours(digitCnts,method="left-to-right")[0]
mask = np.zeros(thresh.shape, np.uint8)
cv2.drawContours(mask, digitCnts, -80, (255, 255, 255),-1)
mask = cv2.bitwise_not(mask)
mask = cv2.resize(mask, (0, 0), fx=2.0, fy=2.0)
result = os.popen('/usr/local/bin/ssocr --number-digits=-1 -t 10 Mask.jpg')
output = result.read()
print("Output is " + output)
output = output[2:8]
return str(round(float(output) * 0.1, 1))
else:
print("Circle not Found")
print("\n---------------------------------------------\n")
return None

Resources