How to clear numbers from the image using openCV? - python-3.x

I'm trying to remove numbers which are laying inside the circular part of image, numbers are in black in color and background varies between red,yellow, blue and green.
I am using opencv to remove those numbers. I used a mask which extracts numbers from image, with help of cv2.inpaint tried to remove those numbers from images.
For my further analysis I required to have clear image. But my current approach gives distorted image and numbers are not completely removed.
I tried changing the threshold values, lowering will neglect numbers from dark shaded area such as from green and red.
import cv2
img = cv2.imread('scan_1.jpg')
mask = cv2.threshold(img,50,255,cv2.THRESH_BINARY_INV)[1][:,:,0]
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
dst = cv2.inpaint(img, mask, 5, cv2.INPAINT_TELEA)
cv2.imshow('dst',dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite('ost_1.jpg',dst)
Input images: a) scan_1.jpg
b) scan_2.jpg
Output images: a) ost_1.jpg
b) ost_2.jpg
Expected Image: Circles can ignored, but something similar to it is required.

Here is my attempt, a better/easier solution might be acquired if you do not care about preserving texts outside of your circle.
import cv2
import numpy as np
# connectivity method used for finding connected components, 4 vs 8
CONNECTIVITY = 4
# HSV threshold for finding black pixels
H_THRESHOLD = 179
S_THRESHOLD = 255
V_THRESHOLD = 150
# read image
img = cv2.imread("a1.jpg")
img_height = img.shape[0]
img_width = img.shape[1]
# save a copy for creating resulting image
result = img.copy()
# convert image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# found the circle in the image
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1.7, minDist= 100, param1 = 48, param2 = 100, minRadius=70, maxRadius=100)
# draw found circle, for visual only
circle_output = img.copy()
# check if we found exactly 1 circle
num_circles = len(circles)
print("Number of found circles:{}".format(num_circles))
if (num_circles != 1):
print("invalid number of circles found ({}), should be 1".format(num_circles))
exit(0)
# save center position and radius of found circle
circle_x = 0
circle_y = 0
circle_radius = 0
if circles is not None:
# convert the (x, y) coordinates and radius of the circles to integers
circles = np.round(circles[0, :]).astype("int")
for (x, y, radius) in circles:
circle_x, circle_y, circle_radius = (x, y, radius)
cv2.circle(circle_output, (circle_x, circle_y), circle_radius, (255, 0, 0), 4)
print("circle center:({},{}), radius:{}".format(x,y,radius))
# keep a median filtered version of image, will be used later
median_filtered = cv2.medianBlur(img, 21)
# Convert BGR to HSV
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# define range of black color in HSV
lower_val = np.array([0,0,0])
upper_val = np.array([H_THRESHOLD,S_THRESHOLD,V_THRESHOLD])
# Threshold the HSV image to get only black colors
mask = cv2.inRange(hsv, lower_val, upper_val)
# find connected components
components = cv2.connectedComponentsWithStats(mask, CONNECTIVITY, cv2.CV_32S)
# apply median filtering to found components
#centers = components[3]
num_components = components[0]
print("Number of found connected components:{}".format(num_components))
labels = components[1]
stats = components[2]
for i in range(1, num_components):
left = stats[i, cv2.CC_STAT_LEFT] - 10
top = stats[i, cv2.CC_STAT_TOP] - 10
width = stats[i, cv2.CC_STAT_WIDTH] + 10
height = stats[i, cv2.CC_STAT_HEIGHT] + 10
# iterate each pixel and replace them if
#they are inside circle
for row in range(top, top+height+1):
for col in range(left, left+width+1):
dx = col - circle_x
dy = row - circle_y
if (dx*dx + dy*dy <= circle_radius * circle_radius):
result[row, col] = median_filtered[row, col]
# smooth the image, may be necessary?
#result = cv2.blur(result, (3,3))
# display image(s)
cv2.imshow("img", img)
cv2.imshow("gray", gray)
cv2.imshow("found circle:", circle_output)
cv2.imshow("mask", mask)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result for a1:

Related

How to reduce noise in contours using Python OpenCV

I am trying to detect orange beats in below image
To detect these, I have first cropped the area from original image and then setting high and low hsv values to detect orange. This seems to be working fine. Below is the detected image:
Below is the code:
import cv2
import numpy as np
win_name = "Image"
cv2.namedWindow(win_name)
img = cv2.imread('image.png')
orangeImg = img[420:510, 457:953]
hsv = cv2.cvtColor(orangeImg, cv2.COLOR_BGR2HSV)
lower_bound = np.array([0, 80, 80])
upper_bound = np.array([20, 255, 255])
origMask = cv2.inRange(hsv, lower_bound, upper_bound)
contours, hierarchy = cv2.findContours(origMask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for _, c in enumerate(contours):
areas = [cv2.contourArea(c) for c in contours]
for area in areas:
if area >= 20.0:
boundRect = cv2.boundingRect(c)
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
color = (0, 0, 255)
cv2.rectangle(orangeImg, (int(rectX), int(rectY)), (int(rectX + rectWidth), int(rectY + rectHeight)), color, 2)
cv2.imshow(win_name, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
In the output image, you can notice that it still has some noise around the bbox created. Is there a better way to reduce the noise in it. Also is there a way to count the detected contours in the image?

Fetching text specifically digits from an image OpenCV

I am trying to fetch label 0026 from the attached image:
Initial input image
I tried below code initially to fetch the text:
import pytesseract
BGR = cv2.imread('C:/Users/Choudharyp/CV/11952/01_A_parcel_layer_single_parcel.png')
RGB = cv2.cvtColor(BGR, cv2.COLOR_BGR2RGB)
lower = np.array([175, 125, 45], dtype="uint8")
upper = np.array([255, 255, 255], dtype="uint8")
mask = cv2.inRange(RGB, lower, upper)
img = cv2.bitwise_and(RGB, RGB, mask=mask)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray = 255 - gray
emp = np.full_like(gray, 255)
emp -= gray
emp[emp==0] = 255
emp[emp<100] = 0
gauss = cv2.GaussianBlur(emp, (3,3), 1)
gauss[gauss<220] = 0
text = pytesseract.image_to_string(gauss, config='outputbase digits')
print("Text=>",text)
This did not work possibly because I need to remove green lines from the image.
Hence I first wrote below code to remove green lines from the image and extract only the black colors in the image (this works fine):
# Imports
import cv2
import numpy as np
import pytesseract
# Read image
imagePath = "C:/Users/Choudharyp/CV/11952/" #insert your own loctaion
inputImage = cv2.imread(imagePath + "01_A_parcel_layer_single_parcel.png")
# Conversion to CMYK (just the K channel):
# Convert to float and divide by 255:
imgFloat = inputImage.astype(np.float64) / 255.
# Calculate channel K:
kChannel = 1 - np.max(imgFloat, axis=2)
# Convert back to uint 8:
kChannel = (255 * kChannel).astype(np.uint8)
# Threshold image:
binaryThresh = 190
_, binaryImage = cv2.threshold(kChannel, binaryThresh, 255, cv2.THRESH_BINARY)
cv2.imshow('Black_LettersOnly', binaryImage)
cv2.waitKey(0)
Output looks like below :
Image with only black label
The label 0026 however is too small in the image.
Then, I used the same code as above to fetch the text from the image, however it still doesn't work. Can someone suggest what else I could do to start fetching the labels from the image ?
In binaryImage you get white text on black background. However, Tesseract is optimized to detect dark text on bright background and apparently it works perfectly fine in this case, when you simply invert your image:
...
text = pytesseract.image_to_string(255-binaryImage, config='outputbase digits')
print("Text=>", text.strip())
>>> Text=> 0026
However, if you want it even simpler, I'd prefer HSV color space and threshold the value channel with a one-liner to maintain just the non-colored/black pixels:
...
inputImageHSV = cv2.cvtColor(inputImage, cv2.COLOR_BGR2HSV)
binaryImage = (inputImageHSV[..., 2] > 120).astype(np.uint8) * 255
text = pytesseract.image_to_string(binaryImage, config='outputbase digits')
print("Text=>", text.strip())
>>> Text=> 0026

Display only bounding box region and neglect other part

I have this code of haarcascade which detect face and draw bounding box around it. I want to display only bounding box area in the original image in its original place and black out all other part just like we do it in color detection from opencv. Is there any way to do so?
cascPath = "haarcascade_frontalface_default.xml"
image = cv2.imread(imagePath)
faceCascade = cv2.CascadeClassifier(cascPath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
=
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags = cv2.CASCADE_SCALE_IMAGE
)
print("Found {0} faces!".format(len(faces)))
for ((x, y, w, h),i) in zip(faces,range(len(faces))):
a=cv2.rectangle(image, (x, y), (x+w, y+h), 2)
roi_color=image[y:y+h, x:x+w]
I would try to crop the ROI and then put it in a img which is a all black rectangle. In Pillow this is very easy to do.
as I don't have face images is hard to reproduce your code. I will use some random image but It should look something like this:
I put blue color just to highlight the background, but it's just a matter to change it to whatever color you want
from PIL import Image
img = Image.open('watch.jpeg', 'r')
img_w, img_h = img.size
left = img_w/8
top = img_h/8
right = 3 * img_w/8
bottom = 3 * img_h/8
cropped_img = img.crop((left, top, right, bottom))
cropped_img.save("cropped.png")
background = Image.new('RGB', (1440, 900), (0, 0, 255))
bg_w, bg_h = background.size
offset = ((bg_w - img_w) // 2, (bg_h - img_h) // 2)
background.paste(cropped_img, offset)
background.save('out.png')
input image:
output image:

Template Matching: efficient way to create mask for minMaxLoc?

Template matching in OpenCV is great. And you can pass a mask to cv2.minMaxLoc so that you only search (sort of) in part of the image for the template you want. You can also use a mask at the matchTemplate operation, but this only masks the template.
I want to find a template and I want to be assured that this template is within some other region of my image.
Calculating the mask for minMaxLoc seems kind of heavy. That is, calculating an accurate mask feels heavy. If you calculate a mask the easy way, it ignores the size of the template.
Examples are in order. My input images are show below. They're a bit contrived. I want to find the candy bar, but only if it's completely inside the white circle of the clock face.
clock1
clock2
template
In clock1, the candy bar is inside the circular clock face and it's a "PASS". But in clock2, the candy bar is only partially inside the face and I want it to be a "FAIL". Here's a code sample for doing it the easy way. I use cv.HoughCircles to find the clock face.
import numpy as np
import cv2
img = cv2.imread('clock1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.imread('template.png')
t_h, t_w = template.shape[0:2] # template height and width
# find circle in gray image using Hough transform
circles = cv2.HoughCircles(gray, method = cv2.HOUGH_GRADIENT, dp = 1,
minDist = 150, param1 = 50, param2 = 70,
minRadius = 131, maxRadius = 200)
i = circles[0,0]
x0 = i[0]
y0 = i[1]
r = i[2]
# display circle on color image
cv2.circle(img,(x0, y0), r,(0,255,0),2)
# do the template match
result = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
# finally, here is the part that gets tricky. we want to find highest
# rated match inside circle and we'd like to use minMaxLoc
# make mask by drawing circle on zero array
mask = np.zeros(result.shape, dtype = np.uint8) # minMaxLoc will throw
# error w/o np.uint8
cv2.circle(mask, (x0, y0), r, color = 1, thickness = -1)
# call minMaxLoc
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result, mask = mask)
# draw found rectangle on img
if max_val > 0.4: # use 0.4 as threshold for finding candy bar
cv2.rectangle(img, max_loc, (max_loc[0]+t_w, max_loc[1]+t_h), (0,255,0), 4)
cv2.imwrite('output.jpg', img)
output using clock1
output using clock2
finds candy bar even
though part of it is outside circle
So to properly make a mask, I use a bunch of NumPy operations. I make four separate masks (one for each corner of the template bounding box) and then AND them together. I'm not aware of any convenience functions in OpenCV that would do the mask for me. I'm a little nervous that all of the array operations will be expensive. Is there a better way to do this?
h, w = result.shape[0:2]
# make arrays that hold x,y coords
grid = np.indices((h, w))
x = grid[1]
y = grid[0]
top_left_mask = np.hypot(x - x0, y - y0) - r < 0
top_right_mask = np.hypot(x + t_w - x0, y - y0) - r < 0
bot_left_mask = np.hypot(x - x0, y + t_h - y0) - r < 0
bot_right_mask = np.hypot(x + t_w - x0, y + t_h - y0) - r < 0
mask = np.logical_and.reduce((top_left_mask, top_right_mask,
bot_left_mask, bot_right_mask))
mask = mask.astype(np.uint8)
cv2.imwrite('mask.png', mask*255)
Here's what the "fancy" mask looks like:
Seems about right. It cannot be circular because of the template shape. If I run clock2.jpg with this mask I get:
It works. No candy bars are identified. But I wish I could do it in fewer lines of code...
EDIT:
I've done some profiling. I ran 100 cycles of the "easy" way and the "accurate" way and calculated frames per second (fps):
easy way: 12.7 fps
accurate way: 7.8 fps
so there is some price to pay for making the mask with NumPy. These tests were done on a relatively powerful workstation. It could get uglier on more modest hardware...
Method 1: 'mask' image before cv2.matchTemplate
Just for kicks, I tried to make my own mask of the image that I pass to cv2.matchTemplate to see what kind of performance I can achieve. To be clear, this isn't a proper mask -- I set all of the pixels to ignore to one color (black or white). This is to get around the fact only TM_SQDIFF and TM_CORR_NORMED support a proper mask.
#Alexander Reynolds makes a very good point in the comments that some care must be taken if the template image (the thing we're trying to find) has lots of black or lots of white. For many problems, we will know a priori what the template looks like and we can specify a white background or black background.
I use cv2.multiply, which seems to be faster than numpy.multiply. cv2.multiply has the added advantage that it automatically clips the results to the range 0 to 255.
import numpy as np
import cv2
import time
img = cv2.imread('clock1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.imread('target.jpg')
t_h, t_w = template.shape[0:2] # template height and width
mask_background = 'WHITE'
start_time = time.time()
for i in range(100): # do 100 cycles for timing
# find circle in gray image using Hough transform
circles = cv2.HoughCircles(gray, method = cv2.HOUGH_GRADIENT, dp = 1,
minDist = 150, param1 = 50, param2 = 70,
minRadius = 131, maxRadius = 200)
i = circles[0,0]
x0 = i[0]
y0 = i[1]
r = i[2]
# display circle on color image
cv2.circle(img,(x0, y0), r,(0,255,0),2)
if mask_background == 'BLACK': # black = 0, white = 255 on grayscale
mask = np.zeros(img.shape, dtype = np.uint8)
elif mask_background == 'WHITE':
mask = 255*np.ones(img.shape, dtype = np.uint8)
cv2.circle(mask, (x0, y0), r, color = (1,1,1), thickness = -1)
img2 = cv2.multiply(img, mask) # element wise multiplication
# values > 255 are truncated at 255
# do the template match
result = cv2.matchTemplate(img2, template, cv2.TM_CCOEFF_NORMED)
# call minMaxLoc
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# draw found rectangle on img
if max_val > 0.4:
cv2.rectangle(img, max_loc, (max_loc[0]+t_w, max_loc[1]+t_h), (0,255,0), 4)
fps = 100/(time.time()-start_time)
print('fps ', fps)
cv2.imwrite('output.jpg', img)
Profiling results:
BLACK background 12.3 fps
WHITE background 12.1 fps
Using this method has very little performance hit relative to 12.7 fps in original question. However, it has the drawback that it will still find templates that still stick over the edge a little bit. Depending on the exact nature of the problem, this may be acceptable in many applications.
Method 2: use cv2.boxFilter to create mask for minMaxLoc
In this technique, we start with a circular mask (as in OP), but then modify it with cv2.boxFilter. We change the anchor from default center of kernel to the top left corner (0, 0)
import numpy as np
import cv2
import time
img = cv2.imread('clock1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
template = cv2.imread('target.jpg')
t_h, t_w = template.shape[0:2] # template height and width
print('t_h, t_w ', t_h, ' ', t_w)
start_time = time.time()
for i in range(100):
# find circle in gray image using Hough transform
circles = cv2.HoughCircles(gray, method = cv2.HOUGH_GRADIENT, dp = 1,
minDist = 150, param1 = 50, param2 = 70,
minRadius = 131, maxRadius = 200)
i = circles[0,0]
x0 = i[0]
y0 = i[1]
r = i[2]
# display circle on color image
cv2.circle(img,(x0, y0), r,(0,255,0),2)
# do the template match
result = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
# finally, here is the part that gets tricky. we want to find highest
# rated match inside circle and we'd like to use minMaxLoc
# start to make mask by drawing circle on zero array
mask = np.zeros(result.shape, dtype = np.float)
cv2.circle(mask, (x0, y0), r, color = 1, thickness = -1)
mask = cv2.boxFilter(mask,
ddepth = -1,
ksize = (t_w, t_h),
anchor = (0,0),
normalize = True,
borderType = cv2.BORDER_ISOLATED)
# mask now contains values from zero to 1. we want to make anything
# less than 1 equal to zero
_, mask = cv2.threshold(mask, thresh = 0.9999,
maxval = 1.0, type = cv2.THRESH_BINARY)
mask = mask.astype(np.uint8)
# call minMaxLoc
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result, mask = mask)
# draw found rectangle on img
if max_val > 0.4:
cv2.rectangle(img, max_loc, (max_loc[0]+t_w, max_loc[1]+t_h), (0,255,0), 4)
fps = 100/(time.time()-start_time)
print('fps ', fps)
cv2.imwrite('output.jpg', img)
This code gives a mask identical to OP, but at 11.89 fps. This technique gives us more accuracy with slightly more performance hit than Method 1.

Resizing image and its bounding box

I have an image with bounding box in it, and I want to resize the image.
img = cv2.imread("img.jpg",3)
x_ = img.shape[0]
y_ = img.shape[1]
img = cv2.resize(img,(416,416));
Now I want to calculate the scale factor:
x_scale = ( 416 / x_)
y_scale = ( 416 / y_ )
And draw an image, this is the code for the original bounding box:
( 128, 25, 447, 375 ) = ( xmin,ymin,xmax,ymax)
x = int(np.round(128*x_scale))
y = int(np.round(25*y_scale))
xmax= int(np.round (447*(x_scale)))
ymax= int(np.round(375*y_scale))
However using this I get:
While the original is:
I don't see any flag in this logic, what's wrong?
Whole code:
imageToPredict = cv2.imread("img.jpg",3)
print(imageToPredict.shape)
x_ = imageToPredict.shape[0]
y_ = imageToPredict.shape[1]
x_scale = 416/x_
y_scale = 416/y_
print(x_scale,y_scale)
img = cv2.resize(imageToPredict,(416,416));
img = np.array(img);
x = int(np.round(128*x_scale))
y = int(np.round(25*y_scale))
xmax= int(np.round (447*(x_scale)))
ymax= int(np.round(375*y_scale))
Box.drawBox([[1,0, x,y,xmax,ymax]],img)
and drawbox
def drawBox(boxes, image):
for i in range (0, len(boxes)):
cv2.rectangle(image,(boxes[i][2],boxes[i][3]),(boxes[i][4],boxes[i][5]),(0,0,120),3)
cv2.imshow("img",image)
cv2.waitKey(0)
cv2.destroyAllWindows()
The image and the data for the bounding box are loaded separately. I am drawing the bounding box inside the image. The image does not contain the box itself.
I believe there are two issues:
You should swap x_ and y_ because shape[0] is actually y-dimension and shape[1] is the x-dimension
You should use the same coordinates on the original and scaled image. On your original image the rectangle is (160, 35) - (555, 470) rather than (128,25) - (447,375) that you use in the code.
If I use the following code:
import cv2
import numpy as np
def drawBox(boxes, image):
for i in range(0, len(boxes)):
# changed color and width to make it visible
cv2.rectangle(image, (boxes[i][2], boxes[i][3]), (boxes[i][4], boxes[i][5]), (255, 0, 0), 1)
cv2.imshow("img", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
def cvTest():
# imageToPredict = cv2.imread("img.jpg", 3)
imageToPredict = cv2.imread("49466033\\img.png ", 3)
print(imageToPredict.shape)
# Note: flipped comparing to your original code!
# x_ = imageToPredict.shape[0]
# y_ = imageToPredict.shape[1]
y_ = imageToPredict.shape[0]
x_ = imageToPredict.shape[1]
targetSize = 416
x_scale = targetSize / x_
y_scale = targetSize / y_
print(x_scale, y_scale)
img = cv2.resize(imageToPredict, (targetSize, targetSize));
print(img.shape)
img = np.array(img);
# original frame as named values
(origLeft, origTop, origRight, origBottom) = (160, 35, 555, 470)
x = int(np.round(origLeft * x_scale))
y = int(np.round(origTop * y_scale))
xmax = int(np.round(origRight * x_scale))
ymax = int(np.round(origBottom * y_scale))
# Box.drawBox([[1, 0, x, y, xmax, ymax]], img)
drawBox([[1, 0, x, y, xmax, ymax]], img)
cvTest()
and use your "original" image as "49466033\img.png",
I get the following image
And as you can see my thinner blue line lies exactly inside your original red line and it stays there whatever targetSize you chose (so the scaling actually works correctly).
Another way of doing this is to use CHITRA
image = Chitra(img_path, box, label)
# Chitra can rescale your bounding box automatically based on the new image size.
image.resize_image_with_bbox((224, 224))
print('rescaled bbox:', image.bounding_boxes)
plt.imshow(image.draw_boxes())
https://chitra.readthedocs.io/en/latest/
pip install chitra
I encountered an issue with bounding box coordinates in Angular when using TensorFlow.js and MobileNet-v2 for prediction. The coordinates were based on the resolution of the video frame.
but I was displaying the video on a canvas with a fixed height and width. I resolved the issue by dividing the coordinates by the ratio of the original video resolution to the resolution of the canvas.
const x = prediction.bbox[0] / (this.Owidth / 300);
const y = prediction.bbox[1] / (this.Oheight / 300);
const width = prediction.bbox[2] / (this.Owidth / 300);
const height = prediction.bbox[3] / (this.Oheight / 300);
// Draw the bounding box.
ctx.strokeStyle = '#99ff00';
ctx.lineWidth = 2;
ctx.strokeRect(x, y, width, height);
this.Owidth & this.Oheight are original resolution of video. it is obtained by.
this.video.addEventListener(
'loadedmetadata',
(e: any) => {
this.Owidth = this.video.videoWidth;
this.Oheight = this.video.videoHeight;
console.log(this.Owidth, this.Oheight, ' pixels ');
},
false
);
300 X 300 is my static canvas width and height.
you can use the resize_dataset_pascalvoc
it's easy to use python3 main.py -p <IMAGES_&_XML_PATH> --output <IMAGES_&_XML> --new_x <NEW_X_SIZE> --new_y <NEW_X_SIZE> --save_box_images <FLAG>"
It resize all your dataset and rewrite new annotations files to resized images

Resources