Only integer scalar arrays can be converted to a scalar index not running under Spyder - python-3.x

I have the following code, which runs well under Visual Studio Code with python 3.9.10, opencv 4.5.5 and numpy 1.22.1.
I would like to migrate this code into the Spyder IDE (Version 5, another notebook), python 3.8, opencv 4.5.1 and numpy 1.22.2.
In spyder, I get the error message TypeError: only integer scalar arrays can be converted a scalar index in line: output_layers = [layer_names[i-1]...] (marked line down in the code section)
I have already checked other answers on this site such as
TypeError when indexing a list with a NumPy array: only integer scalar arrays can be converted to a scalar index
which suggests list comprehension, but in my understanding I am already implemented this.
What is the reason for running currectly in on environment but not in the other?
import cv2
import numpy as np
def get_output_layers(net):
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
return output_layers
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
label = str(classes[class_id])
color = COLORS[class_id]
cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
image = cv2.imread('horses.jpg')
Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392
classes = None
with open(r'yolov3.txt', 'r') as f:
classes = [line.strip() for line in f.readlines()]
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
net.setInput(blob)
outs = net.forward(get_output_layers(net))
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * Width)
center_y = int(detection[1] * Height)
w = int(detection[2] * Width)
h = int(detection[3] * Height)
x = center_x - w / 2
y = center_y - h / 2
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([x, y, w, h])
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
for i in indices:
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
draw_prediction(image, class_ids[i], confidences[i], round(x), round(y),
round(x+w), round(y+h))
cv2.imshow("object detection", image)
cv2.waitKey()
cv2.imwrite("object-detection.jpg", image)
cv2.destroyAllWindows()

there were subtle, recent api changes wrt handling std::vector in python
(4.5.1 still expects a 2d array, but it's 1d in 4.5.5)
to avoid the whole trouble, please simply use:
output_layers = net.getUnconnectedOutLayersNames()
(like it is done in the sample)

Related

Detection of small object - aphids on plants

I'm currently trying to create a detector of aphids (green and rose) on plants but only using "classic" image processing technique (no neural network).
Here are an image I'm working on:
'aphids.jpg'
I'm working on a code (see below). If you apply it on the image you should have the plants alone. My problem is that I want to isolate the aphids that can be seen on the plants. There are a lot of them but I just want to detect the biggest or the more obvious.
On the code there is an "edges_detect" function I'm currently working on. One of the problem I have is that I can detect some of the aphids as contour but it will also take simple lines...
I tried to drop those line using the hierarchy of contour but it seems those line have inner contour so I can't easily delete them.
I also tried the adjust_gamma and contrast, but it doesn't give that much result.
I'm looking for more ideas. What would you try ?
Thank you in advance !
Here is the code:
import cv2
import numpy as np
import matplotlib.pyplot as plt
def adjust_gamma(image, gamma=1.0):
# build a lookup table mapping the pixel values [0, 255] to
# their adjusted gamma values
invGamma = 1.0 / gamma
table = np.array([((i / 255.0) ** invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
# apply gamma correction using the lookup table
return cv2.LUT(image, table)
def adjust_contrast(image,alpha=1.0,beta=0):
new = np.zeros(image.shape,image.dtype)
for y in range(image.shape[0]):
for x in range(image.shape[1]):
for c in range(image.shape[2]):
new[y,x,c] = np.clip(alpha*image[y,x,c]+beta,0,255)
return(new)
def img_process(img):
(h1, w1) = img.shape[:2]
center = (w1 / 2, h1 / 2)
blur = cv2.GaussianBlur(img.copy(),(5,5),0)
hsv = cv2.cvtColor(blur,cv2.COLOR_BGR2HSV)
#image = img.copy()
#Boundaries to separate plants from the image
l_bound = np.array([20,0,0])
h_bound = np.array([90,250,170])#green
mask = cv2.inRange(hsv,l_bound,h_bound)
res = cv2.bitwise_and(img,img,mask=mask)
#Find contour plants
cnt,_ = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
sort_cnt = sorted(cnt,key=cv2.contourArea,reverse=True)
cnt = [sort_cnt[i] for i in range(len(sort_cnt)) if cv2.contourArea(sort_cnt[i])>300]
cv2.drawContours(res, cnt, -1, (0,255,0), -1)
#Inverse mask to have only the plant in the image
mask2 = cv2.inRange(res,np.array([0,0,0]),np.array([250,250,250]))
mask2 = cv2.bitwise_not(mask2)
res2 = cv2.bitwise_and(img,img,mask=mask2)
#Augment bright/contrast
res2=res2*1.45
res2=res2.astype('uint8')
#Crop
res2 = res2[:-50,int(center[0]-300):int(center[0]+550)]
return res2
def edge_detec(img):
(h1, w1) = img.shape[:2]
center = (w1 / 2, h1 / 2)
blur = cv2.GaussianBlur(img.copy(),(5,5),0)
gray = cv2.cvtColor(blur,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,30,70,apertureSize = 3)
edges = edges[:-50,int(center[0]-300):int(center[0]+550)]
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
#edges = cv2.morphologyEx(edges, cv2.MORPH_GRADIENT, kernel)
cnt,hierarchy = cv2.findContours(edges,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnt = sorted(cnt,key=cv2.contourArea,reverse=True)
listArea = list(map(cv2.contourArea,cnt))
sort_cnt = [x for x in cnt if cv2.contourArea(x)>10]
cv2.drawContours(edges, sort_cnt, -1, (0,255,0), -1)
return edges,center,img
### Debut programme
img = cv2.imread('051.jpg')
while True:
##Put processing function here
img_mod = img_process(img)
cv2.imshow('img',img_mod)
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.destroyAllWindows()

What is the best way to extract text contained within a table in a pdf using python?

I'm constructing a program to extract text from a pdf, put it in a structured format, and send it off to a database. I have roughly 1,400 individual pdfs that all follow a similar format, but nuances in the verbiage and plan designs that the documents summarize make it tricky.
I've played around with a couple different pdf readers in python including tabula-py and pdfminer but none of them are quite getting to what I'd like to do. Tabula reads in all of the text very well, however it pulls everything as it explicitly lays horizontally, excluding the fact that some of the text is wrapped in a box. For example, if you open up the sample SBC I have attached where it reads "What is the overall deductible?" Tabula will read in "What is the overall $500/Individual or..." skipping the fact that the word "deductible" is really part of the first sentence. (Note the files I'm working with are pdfs but I've attached a jpeg because I couldn't figure out how to attach a pdf.)
import tabula
df = tabula.read_pdf(*filepath*, pandas_options={'header': None))
print(df.iloc[0][0])
print(df)
In the end, I'd really like to be able to parse out the text within each box so that I can better identify what values belong to deductible, out-of-pocket limts, copays/coinsurance, etc. I thought possibly some sort of OCR would allow me to recognize which parts of the PDF are contained in the blue rectangles and then pull the string from there, but I really don't know where to start with that.Sample SBC
#jpnadas In this case the code you copied from my answer in this post isn't really suitable because it addresses the case when a table doesn't have surrounding grid. That algorithm looks for repeating blocks of texts and tries to find a pattern that resembles a table heuristically.
But in this particular case the table does have the grid and by taking this advantage we can achieve a lot more accurate result.
The strategy is the following:
Increase image gamma to make the grid darker
Get rid of colour and apply Otsu thresholding
Find long vertical an horizontal lines in the image and create a mask from it using erode and dilate functions
Find the cell blocks in the mask using findContours function.
Find table objects
5.1 The rest can be as in the post about finding a table without the
grid: find table structure heuristically
5.2 Alternative approach could be using hierarchy returned by the findContours function. This approach is even more accurate and
allows to find multiple tables on a single image.
Having cell coordinates it's easy to extract certain cell image from the original image:
cell_image = image[cell_y:cell_y + cell_h, cell_x:cell_x + cell_w]
Apply OCR to each cell_image.
BUT! I consider the OpenCV approach as a last resort when you're not able to read the PDF's contents: for instance in case when a PDF contains raster image inside.
If it's a vector-based PDF and its contents are readable it makes more sense to find the table inside contents and just read the text from it instead of doing heavy 'OCR lifting'.
Here's the code for reference for more accurate table recognition:
import os
import imutils
import numpy as np
import argparse
import cv2
def gamma_correction(image, gamma = 1.0):
look_up_table = np.empty((1,256), np.uint8)
for i in range(256):
look_up_table[0,i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
result = cv2.LUT(image, look_up_table)
return result
def pre_process_image(image):
# Let's get rid of color first
# Applying gamma to make the table lines darker
gamma = gamma_correction(image, 2)
# Getting rid of color
gray = cv2.cvtColor(gamma, cv2.COLOR_BGR2GRAY)
# Then apply Otsu threshold to reveal important areas
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# inverting the thresholded image
return ~thresh
def get_horizontal_lines_mask(image, horizontal_size=100):
horizontal = image.copy()
horizontal_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))
horizontal = cv2.erode(horizontal, horizontal_structure, anchor=(-1, -1), iterations=1)
horizontal = cv2.dilate(horizontal, horizontal_structure, anchor=(-1, -1), iterations=1)
return horizontal
def get_vertical_lines_mask(image, vertical_size=100):
vertical = image.copy()
vertical_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, vertical_size))
vertical = cv2.erode(vertical, vertical_structure, anchor=(-1, -1), iterations=1)
vertical = cv2.dilate(vertical, vertical_structure, anchor=(-1, -1), iterations=1)
return vertical
def make_lines_mask(preprocessed, min_horizontal_line_size=100, min_vertical_line_size=100):
hor = get_horizontal_lines_mask(preprocessed, min_horizontal_line_size)
ver = get_vertical_lines_mask(preprocessed, min_vertical_line_size)
mask = np.zeros((preprocessed.shape[0], preprocessed.shape[1], 1), dtype=np.uint8)
mask = cv2.bitwise_or(mask, hor)
mask = cv2.bitwise_or(mask, ver)
return ~mask
def find_cell_boxes(mask):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
image_width = mask.shape[1]
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
w = box[2]
# Excluding the page box shape but adding smaller boxes
if w < 0.95 * image_width:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_vertical_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to images directory")
args = vars(ap.parse_args())
in_file = args["image"]
filename_base = in_file.replace(os.path.splitext(in_file)[1], "")
img = cv2.imread(in_file)
pre_processed = pre_process_image(img)
# Visualizing pre-processed image
cv2.imwrite(filename_base + ".pre.png", pre_processed)
lines_mask = make_lines_mask(pre_processed, min_horizontal_line_size=1800, min_vertical_line_size=500)
# Visualizing table lines mask
cv2.imwrite(filename_base + ".mask.png", lines_mask)
cell_boxes = find_cell_boxes(lines_mask)
cells = find_table_in_boxes(cell_boxes)
# apply OCR to each cell rect here
# the cells array contains cell coordinates in tuples (x, y, w, h)
hor_lines, ver_lines = build_vertical_lines(cells)
# Visualize the table lines
vis = img.copy()
for line in hor_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(filename_base + ".result.png", vis)
Some parameters are hard-coded:
page size threshold - 0.95
min horizontal line size - 1800 px
min vertical line size - 500 px
You can provide them as configurable parameters or make them relative to image size.
Results:
I think that the best way to do what you need is to find and isolate the cells in the file and then apply OCR to each individual cell.
There are a number of solutions in SO for that, I got the code from this answer and played around a little with the parameters to get the output below (not perfect yet, but you can tweak it a little bit yourself).
import os
import cv2
import imutils
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(23, 23)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=20, max_text_height_limit=120):
# Looking for the text spots contours
contours, _ = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=100, min_columns=3):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
in_file = os.path.join(".", "test.jpg")
pre_file = os.path.join(".", "pre.png")
out_file = os.path.join(".", "out.png")
img = cv2.imread(os.path.join(in_file))
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# Visualize the result
vis = img.copy()
# for box in text_boxes:
# (x, y, w, h) = box
# cv2.rectangle(vis, (x, y), (x + w - 2, y + h - 2), (0, 255, 0), 1)
for line in hor_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)

How to perform Earth Mover's Distance instead of DoG for center surround difference on multiscale level in images in python 3.7

I am working on a image processing project where i have to perform center surround difference calculation with Earth Mover's distance(EMD) on multiscale level but the problem is that i can't figure it out how center surround difference works and how could i use EMD for it.
I found the python function for EMD but it works with 2 source image histograms whereas in my problem i have only one source.
I am generating multi scales of the image using skimage's pyramid_gaussian function using solution provided on
link: https://gist.github.com/duhaime/211365edaddf7ff89c0a36d9f3f7956c
I tried:
def get_img(path, norm_size=True, norm_exposure=False):
img = imread(path, flatten=True).astype(int)
if norm_size:
img = resize(img, (height, width), anti_aliasing=True, preserve_range=True)
if norm_exposure:
img = normalize_exposure(img)
return img
def get_histogram(img):
h, w = img.shape
hist = [0.0] * 256
for i in range(h):
for j in range(w):
hist[img[i, j]] += 1
return np.array(hist) / (h * w)
def normalize_exposure(img):
img = img.astype(int)
hist = get_histogram(img)
cdf = np.array([sum(hist[:i+1]) for i in range(len(hist))]) # get the sum of vals accumulated by each position in hist
sk = np.uint8(255 * cdf) # determine the normalization values for each unit of the cdf
height, width = img.shape # normalize each position in the output image
normalized = np.zeros_like(img)
for i in range(0, height):
for j in range(0, width):
normalized[i, j] = sk[img[i, j]]
return normalized.astype(int)
def earth_movers_distance(path_a, path_b):
img_a = get_img(path_a, norm_exposure=True)
img_b = get_img(path_b, norm_exposure=True)
hist_a = get_histogram(img_a)
hist_b = get_histogram(img_b)
return wasserstein_distance(hist_a, hist_b)
if __name__ == '__main__':
image = cv2.imread("images/test3.jpg")
pyramidlist=[]
dst = []
for (i, resized) in enumerate(pyramid_gaussian(image, downscale=1.4)):
if resized.shape[0] < 30 or resized.shape[1] < 30:
break
cv2.imshow(f"Layer {i+1}", resized)
cv2.waitKey(0)
pyramidlist.append(resized[i])
print(pyramidlist)
print(len(pyramidlist))
cv2.destroyAllWindows()
but don't know how to use EMD after generating pyramids and calculate center surround difference.

image dilation with python

I'm trying to execute a piece of code I found online and it is giving me the following error.
I'm new to opencv so please help me.
error:
<ipython-input-1-7fe9c579ec14> in image_masking(filepath)
15 gray = cv2.imread(filepath,0)
16 edges = cv2.Canny(gray, CANNY_THRESH_1, CANNY_THRESH_2)
---> 17 edges = cv2.dilate(edges,None)
18 edges = cv2.erode(edges, None)
19
error: OpenCV(3.4.1) C:\Miniconda3\conda-bld\opencv-
suite_1533128839831\work\modules\core\src\matrix.cpp:760: error: (-215)
dims <= 2 && step[0] > 0 in function cv::Mat::locateROI
code:
import cv2
import numpy as np
def image_masking(filepath):
BLUR = 21
CANNY_THRESH_1 = 100
CANNY_THRESH_2 = 100
MASK_DILATE_ITER = 10
MASK_ERODE_ITER = 10
MASK_COLOR = (0.0,0.0,0.0) # In BGR format
gray = cv2.imread(filepath,0)
edges = cv2.Canny(gray, CANNY_THRESH_1, CANNY_THRESH_2)
edges = cv2.dilate(edges,None)
edges = cv2.erode(edges, None)
contour_info = []
_, contours, __ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
for c in contours:
contour_info.append((c, cv2.isContourConvex(c), cv2.contourArea(c),))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
max_contour = contour_info[0]
for c in contour_info:
cv2.fillConvexPoly(mask, c[0], (255))
mask = cv2.dilate(mask, None, iterations=MASK_DILATE_ITER)
mask = cv2.erode(mask, None, iterations=MASK_ERODE_ITER)
mask = cv2.GaussianBlur(mask, (BLUR, BLUR), 0)
mask_stack = np.dstack([mask]*3)
mask_stack = mask_stack.astype('float32') / 255.0
img = img.astype('float32') / 255.0
masked = (mask_stack * img) + ((1-mask_stack) * MASK_COLOR)
masked = (masked * 255).astype('uint8')
fileName, fileExtension = filepath.split('.')
fileName += '-masked.'
filepath = fileName + fileExtension
print(filepath)
cv2.imwrite(filepath, masked)
if __name__ == '__main__':
filepath = 'C:\\Users\HP\Downloads\test3.jpg'
image_masking(filepath)
i tried replacing None in dilate function with kernel but it is giving me the same error
The second argument to cv2.dilate and cv2.erode should be the kernel with which you want to perform dilation/erosion as it is shown in the docs: opencv documentation
For example, you can try to do it like that:
kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(edges, kernel)
edges = cv2.erode(edges, kernel)
Good luck with further opencv exploration!

How to reduce the number of boxes/regions created in MSER

I have been trying to get less boxes with MSER since I have too many boxes created on the same element repeatedly with very little pixel differences. My code is as below:
## Get mser, and set parameters
_delta = 10
_min_area = 250
_max_area = 800
_max_variation = 10.0
_min_diversity = 30.0
_max_evolution = 10
_area_threshold = 12.0
_min_margin = 2.9
_edge_blur_size = 3
mser = cv2.MSER_create(_delta,_min_area, _max_area, _max_variation,
_min_diversity,_max_evolution, _area_threshold, _min_margin, _edge_blur_size)
and then
## Do mser detection, get the coodinates and bboxes on the original image
gray = cv2.cvtColor(final, cv2.COLOR_BGR2GRAY)
coordinates, bboxes = mser.detectRegions(gray)
After this , I see there are 26K boxes created. Which amongst the parameters can be tuned for lesser number of regions(since they are overlapping a lot). Kindly help?
_delta is the most important parameter for reducing the number of boxes. Try raising it to 25. The higher the _delta the less blobs you will get.
_min_area - The smallest blob
_max_area - The largest blob
_min_diversity - Raise to reduce the number of overlapping blobs
_max_variation - Raise to reduce areas with high variance
For more information
After that I would checking the bboxes to filter out over lapping blobs
Code Example
import cv2
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
img = cv.imread('input_img.png')
iou_th = 0.95
mser = cv2.MSER_create(_delta=10, _min_area=1000, _max_area=int(0.1 * np.pi * (img.shape[0] /2)**2), _max_variation=0.1)
regions, bboxes = mser.detectRegions(img)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
# Debug plot
img_ = img.copy()
cv2.polylines(img_, hulls, 1, (255, 0, 0), thickness=1)
fig, ax = plt.subplots(figsize=(10, 6))
ax.imshow(img_)
ax.set_title('MSER with overlapping regions')
size_dict = {k: len(region) for k, region in enumerate(regions)}
# Cull overlapping blobs
graph = nx.Graph()
graph.add_nodes_from(range(len(hulls)))
for i, cnt in enumerate(hulls):
for j, cnt in enumerate(hulls):
if i >= j:
continue
box_i = bboxes[i]
box_j = bboxes[j]
tl_i = box_i[:2]
tl_j = box_j[:2]
br_i = tl_i + box_i[2:]
br_j = tl_j + box_j[2:]
tl = np.maximum(tl_i, tl_j)
br = np.minimum(br_i, br_j)
intersected_rect = br - tl
intersection = np.prod(intersected_rect) if intersected_rect[0] > 0 and intersected_rect[1] > 0 else 0
union = np.prod(box_i[2:]) + np.prod(box_j[2:]) - intersection
iou = intersection / union
if iou > iou_th:
graph.add_edge(i, j, iou=iou)
# make list of unique regions - pick the smallest region
trees = list(nx.connected_component_subgraphs(graph))
unique_blobs = []
for tree in trees:
# Choose the smallest region
smallest_idx = None
smallest_blob = np.inf
for node in tree.nodes():
if size_dict[node] < smallest_blob:
smallest_blob = size_dict[node]
smallest_idx = node
unique_blobs.append(smallest_idx)
unique_blobs = unique_blobs
hulls = [hulls[k] for k in unique_blobs]
regions = [regions[k] for k in unique_blobs]
bboxes = [bboxes[k] for k in unique_blobs]
size_dict = {k: len(region) for k, region in enumerate(regions)}
# debug plot
img_ = img.copy()
cv2.polylines(img_, hulls, 1, (255, 0, 0), thickness=1)
fig, ax = plt.subplots(figsize=(10, 6))
ax.imshow(img_)
ax.set_title('MSER with unique regions')

Resources