Region growing implementation in python, without seeding - python-3.x

I am trying to implement the region growing segmentation algorithm in python, but I am not allowed to use seed points. My idea so far is this:
Start from the very first pixel, verify its neighbors for boundaries check (within width and height), then verify the neighbors so that they are within the threshold (I obtained this by using the euclidean distance between the current pixel and the neighbor). To be made sure that I don't visit the same pixel again, I created a matrix with the same dimensions as the image (width, height) and made all elements 0 in the beginning. I will start from currentArea number 1 and increment as it goes. In the end, after obaining all areas with their respective average colors, I will use a function that writes down the new Image based on the tuples I have obtained. (averagePixelColor, areaNumber)
This is my code:
from PIL import Image
from scipy.spatial import distance
import statistics
import numpy as np
import sys
sys.setrecursionlimit(10**9)
# SCRIPT: color palette reduction applier script
# SCRIPT: this is the second method. region growing segmentation
# list of red values of pixels
rList = []
# list of green values of pixels
gList = []
# list of blue values of pixels
bList = []
# this matrix will be initially 0, then every region growth
# will be updated to its number
overlayMatrix = []
# starting area number
currentArea = 1
def isValidPixel(x, y, width, height):
if x < 0 or x >= width:
return False
if y < 0 or y >= height:
return False
return True
def get_average_color():
global rList
global gList
global bList
# set values to none
r = None
g = None
b = None
# get average value for each chanel
if rList != []:
r = sum(rList)/len(rList)
g = sum(gList)/len(gList)
b = sum(bList)/len(bList)
# make values integers to be used as pixel
r = int(r)
g = int(g)
b = int(b)
# return values
return (r, g, b)
def add_pixel_to_lists(pixel):
global rList
global gList
global bList
rList.append(pixel[0])
gList.append(pixel[1])
bList.append(pixel[2])
def region_growing(x, y, pixel, img, currentArea):
global overlayMatrix
global rList
global gList
global bList
# get width, heihgt
width, height = img.size
# set this pixel to be visited on current area
overlayMatrix[x][y] = currentArea
# set a list for all possible neighbours
neighbouringPixels = [(x-1, y), (x-1, y-1), (x-1, y+1), (x, y-1), (x, y+1), (x+1, y), (x+1, y-1), (x+1, y+1)]
# filter to get only valid neighbours
validPixels = [x for x in neighbouringPixels if isValidPixel(x[0], x[1], width, height) == True]
# filter pixels to be not visited
notVisitedPixels = [x for x in validPixels if overlayMatrix[x[0]][x[1]] == 0]
# set a threshold value
threshold = 5
# filter to get only pixels in threshold
thresholdPixels = []
thresholdPixels = [x for x in notVisitedPixels if distance.euclidean(img.getpixel(x), pixel) < threshold]
# set the list for pixels to make recursive calls
toVisitRecursive = []
# for every pixel that is a valid neighbour, add it to the toVisit list in recursive call
# and add its rgb values to the lists so an average can be computed
for pixel in thresholdPixels:
toVisitRecursive.append(pixel)
add_pixel_to_lists(img.getpixel(pixel))
# compute average
averagePixel = get_average_color()
# if I still have neighoburs that haven't been visited
# and are within the threshold, get first from list
# remove it so we don't do the same again, then apply
# the algorithm for it
if toVisitRecursive != []:
pixel = toVisitRecursive[0]
toVisitRecursive.remove(pixel)
region_growing(pixel[0], pixel[1], averagePixel, img, currentArea)
# finally, return
return (averagePixel, currentArea+1)
def write_image():
pass
# this will write to the image based on the list of tuples
# average color to the area with number X
def palette_reduction_mt2(input_image):
# open original image
img = Image.open(input_image)
tuple_list = []
# get width, height
width, height = img.size
# make overlay matrix of 0, initial
global overlayMatrix
overlayMatrix = np.zeros((width, height), dtype=np.int32)
# create new image
newimg = Image.new("RGB", (width, height), "white")
currentArea = 1
# iterate through image pixels
for y in range(0, height-1):
for x in range(0, width-1):
global rList
global gList
global bList
# get pixel from edge image
p = img.getpixel((x, y))
# apply region growing
average, currentArea = region_growing(x, y, p, img, currentArea)
tuple_list.append((average, currentArea-1))
# reset color lists to compute new average
rList = []
gList = []
bList = []
print(tuple_list)
# Save image
newimg.save("images/reduced_mt1.jpg")
# return the name of the image
return "images/reduced_mt1.jpg"
For some reason, when I print the tuples list, I only get a single value: [(0,0,0), 1]
Can anyone please point me in the right direction?
Thank you!

Seems like you are not actually iterating over the toVisitRecursive list.
Maybe you should add a loop like:
for pixel in toVisitRecursive:
// region_growing...
or change this line if toVisitRecursive != []: for a while loop;

Related

What is the best way to extract text contained within a table in a pdf using python?

I'm constructing a program to extract text from a pdf, put it in a structured format, and send it off to a database. I have roughly 1,400 individual pdfs that all follow a similar format, but nuances in the verbiage and plan designs that the documents summarize make it tricky.
I've played around with a couple different pdf readers in python including tabula-py and pdfminer but none of them are quite getting to what I'd like to do. Tabula reads in all of the text very well, however it pulls everything as it explicitly lays horizontally, excluding the fact that some of the text is wrapped in a box. For example, if you open up the sample SBC I have attached where it reads "What is the overall deductible?" Tabula will read in "What is the overall $500/Individual or..." skipping the fact that the word "deductible" is really part of the first sentence. (Note the files I'm working with are pdfs but I've attached a jpeg because I couldn't figure out how to attach a pdf.)
import tabula
df = tabula.read_pdf(*filepath*, pandas_options={'header': None))
print(df.iloc[0][0])
print(df)
In the end, I'd really like to be able to parse out the text within each box so that I can better identify what values belong to deductible, out-of-pocket limts, copays/coinsurance, etc. I thought possibly some sort of OCR would allow me to recognize which parts of the PDF are contained in the blue rectangles and then pull the string from there, but I really don't know where to start with that.Sample SBC
#jpnadas In this case the code you copied from my answer in this post isn't really suitable because it addresses the case when a table doesn't have surrounding grid. That algorithm looks for repeating blocks of texts and tries to find a pattern that resembles a table heuristically.
But in this particular case the table does have the grid and by taking this advantage we can achieve a lot more accurate result.
The strategy is the following:
Increase image gamma to make the grid darker
Get rid of colour and apply Otsu thresholding
Find long vertical an horizontal lines in the image and create a mask from it using erode and dilate functions
Find the cell blocks in the mask using findContours function.
Find table objects
5.1 The rest can be as in the post about finding a table without the
grid: find table structure heuristically
5.2 Alternative approach could be using hierarchy returned by the findContours function. This approach is even more accurate and
allows to find multiple tables on a single image.
Having cell coordinates it's easy to extract certain cell image from the original image:
cell_image = image[cell_y:cell_y + cell_h, cell_x:cell_x + cell_w]
Apply OCR to each cell_image.
BUT! I consider the OpenCV approach as a last resort when you're not able to read the PDF's contents: for instance in case when a PDF contains raster image inside.
If it's a vector-based PDF and its contents are readable it makes more sense to find the table inside contents and just read the text from it instead of doing heavy 'OCR lifting'.
Here's the code for reference for more accurate table recognition:
import os
import imutils
import numpy as np
import argparse
import cv2
def gamma_correction(image, gamma = 1.0):
look_up_table = np.empty((1,256), np.uint8)
for i in range(256):
look_up_table[0,i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
result = cv2.LUT(image, look_up_table)
return result
def pre_process_image(image):
# Let's get rid of color first
# Applying gamma to make the table lines darker
gamma = gamma_correction(image, 2)
# Getting rid of color
gray = cv2.cvtColor(gamma, cv2.COLOR_BGR2GRAY)
# Then apply Otsu threshold to reveal important areas
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# inverting the thresholded image
return ~thresh
def get_horizontal_lines_mask(image, horizontal_size=100):
horizontal = image.copy()
horizontal_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))
horizontal = cv2.erode(horizontal, horizontal_structure, anchor=(-1, -1), iterations=1)
horizontal = cv2.dilate(horizontal, horizontal_structure, anchor=(-1, -1), iterations=1)
return horizontal
def get_vertical_lines_mask(image, vertical_size=100):
vertical = image.copy()
vertical_structure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, vertical_size))
vertical = cv2.erode(vertical, vertical_structure, anchor=(-1, -1), iterations=1)
vertical = cv2.dilate(vertical, vertical_structure, anchor=(-1, -1), iterations=1)
return vertical
def make_lines_mask(preprocessed, min_horizontal_line_size=100, min_vertical_line_size=100):
hor = get_horizontal_lines_mask(preprocessed, min_horizontal_line_size)
ver = get_vertical_lines_mask(preprocessed, min_vertical_line_size)
mask = np.zeros((preprocessed.shape[0], preprocessed.shape[1], 1), dtype=np.uint8)
mask = cv2.bitwise_or(mask, hor)
mask = cv2.bitwise_or(mask, ver)
return ~mask
def find_cell_boxes(mask):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
image_width = mask.shape[1]
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
w = box[2]
# Excluding the page box shape but adding smaller boxes
if w < 0.95 * image_width:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_vertical_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="path to images directory")
args = vars(ap.parse_args())
in_file = args["image"]
filename_base = in_file.replace(os.path.splitext(in_file)[1], "")
img = cv2.imread(in_file)
pre_processed = pre_process_image(img)
# Visualizing pre-processed image
cv2.imwrite(filename_base + ".pre.png", pre_processed)
lines_mask = make_lines_mask(pre_processed, min_horizontal_line_size=1800, min_vertical_line_size=500)
# Visualizing table lines mask
cv2.imwrite(filename_base + ".mask.png", lines_mask)
cell_boxes = find_cell_boxes(lines_mask)
cells = find_table_in_boxes(cell_boxes)
# apply OCR to each cell rect here
# the cells array contains cell coordinates in tuples (x, y, w, h)
hor_lines, ver_lines = build_vertical_lines(cells)
# Visualize the table lines
vis = img.copy()
for line in hor_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(filename_base + ".result.png", vis)
Some parameters are hard-coded:
page size threshold - 0.95
min horizontal line size - 1800 px
min vertical line size - 500 px
You can provide them as configurable parameters or make them relative to image size.
Results:
I think that the best way to do what you need is to find and isolate the cells in the file and then apply OCR to each individual cell.
There are a number of solutions in SO for that, I got the code from this answer and played around a little with the parameters to get the output below (not perfect yet, but you can tweak it a little bit yourself).
import os
import cv2
import imutils
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(23, 23)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=20, max_text_height_limit=120):
# Looking for the text spots contours
contours, _ = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=100, min_columns=3):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
in_file = os.path.join(".", "test.jpg")
pre_file = os.path.join(".", "pre.png")
out_file = os.path.join(".", "out.png")
img = cv2.imread(os.path.join(in_file))
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# Visualize the result
vis = img.copy()
# for box in text_boxes:
# (x, y, w, h) = box
# cv2.rectangle(vis, (x, y), (x + w - 2, y + h - 2), (0, 255, 0), 1)
for line in hor_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)

How to remove an image and it's label from a dataset

When I import a dataset of images and the csv file with the corresponding labels, I made a function to extract every image label. But know I have to remove some images that do not fit specific criteria. Is there a way to remove the corresponding label as well?
This is the function that is used to load the images and the labels
def imp_img():
dirname = '/s/desk/img/'
x = np.zeros((1000, 100, 100), dtype=np.float32)
for i in range(x.shape[0]):
img = Image.open(dirname + 'img_%02d.png' % (i))
img = np.array(img)
x[i] = img
path = '/s/desk/labels_classificatio.csv'
labels = pd.read_csv(path, usecols=["category"],
sep=";" )
y = np.array(labels)
return x, y
This is how they are imported
x, y = imp_img()
x = x/255.0
y = y.reshape(y.shape[0], 1)
x.shape, y.shape
and now I made for loop to remove the images that are too dark
c =[]
for i in x:
if np.sum(i) >= 100:
c.append(i)
c = np.asarray(c)
The problem now is that I have fewer images than I have labels. Is there a way to remove the corresponding label as well?
You're looking for enumerate. It lets you loop over an iterable while maintaining a count. Instead of for i in x we'll do for i, img in enumerate(x) which let us maintain a loop counter i. This way you can sbset the labels corresponding to the images that meet your criteria.
code:
c = []
c_labels = []
for i, img in enumerate(x):
if np.sum(img) >= 100:
c.append(img)
c_labels.append(y[i])
c = np.asarray(c)
c_labels = np.asarray(c_labels)

How to perform Earth Mover's Distance instead of DoG for center surround difference on multiscale level in images in python 3.7

I am working on a image processing project where i have to perform center surround difference calculation with Earth Mover's distance(EMD) on multiscale level but the problem is that i can't figure it out how center surround difference works and how could i use EMD for it.
I found the python function for EMD but it works with 2 source image histograms whereas in my problem i have only one source.
I am generating multi scales of the image using skimage's pyramid_gaussian function using solution provided on
link: https://gist.github.com/duhaime/211365edaddf7ff89c0a36d9f3f7956c
I tried:
def get_img(path, norm_size=True, norm_exposure=False):
img = imread(path, flatten=True).astype(int)
if norm_size:
img = resize(img, (height, width), anti_aliasing=True, preserve_range=True)
if norm_exposure:
img = normalize_exposure(img)
return img
def get_histogram(img):
h, w = img.shape
hist = [0.0] * 256
for i in range(h):
for j in range(w):
hist[img[i, j]] += 1
return np.array(hist) / (h * w)
def normalize_exposure(img):
img = img.astype(int)
hist = get_histogram(img)
cdf = np.array([sum(hist[:i+1]) for i in range(len(hist))]) # get the sum of vals accumulated by each position in hist
sk = np.uint8(255 * cdf) # determine the normalization values for each unit of the cdf
height, width = img.shape # normalize each position in the output image
normalized = np.zeros_like(img)
for i in range(0, height):
for j in range(0, width):
normalized[i, j] = sk[img[i, j]]
return normalized.astype(int)
def earth_movers_distance(path_a, path_b):
img_a = get_img(path_a, norm_exposure=True)
img_b = get_img(path_b, norm_exposure=True)
hist_a = get_histogram(img_a)
hist_b = get_histogram(img_b)
return wasserstein_distance(hist_a, hist_b)
if __name__ == '__main__':
image = cv2.imread("images/test3.jpg")
pyramidlist=[]
dst = []
for (i, resized) in enumerate(pyramid_gaussian(image, downscale=1.4)):
if resized.shape[0] < 30 or resized.shape[1] < 30:
break
cv2.imshow(f"Layer {i+1}", resized)
cv2.waitKey(0)
pyramidlist.append(resized[i])
print(pyramidlist)
print(len(pyramidlist))
cv2.destroyAllWindows()
but don't know how to use EMD after generating pyramids and calculate center surround difference.

Text Recognize using tesseract

Hello I'm trying to recognize text from Image using Tesseract but unable to get result.
I'm using EAST technique to detect text. I've one more question how can I extend padding of the box. cv2.putText does not work in this case.
original code for text detection: https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp
import cv2
import numpy as np
import argparse
import time
import math
import matplotlib.pyplot as plt
import skimage.io as io
import os
from imutils.object_detection import non_max_suppression
import pytesseract
print(np.__version__)
def decode_predictions(scores, geometry):
**# grab the number of rows and columns from the scores volume, then
# initialize our set of bounding box rectangles and corresponding
# confidence scores**
(numRows, numCols) = scores.shape[2:4]
boxes = []
confidences = []
**# loop over the number of rows**
for y in range(0, numRows):
**# extract the scores (probabilities), followed by the geometrical
# data used to derive potential bounding box coordinates that
# surround text**
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
**# loop over the number of columns**
for x in range(0, numCols):
**# if our score does not have sufficient probability, ignore it**
if scoresData[x] < args["min_confidence"]:
continue
**# compute the offset factor as our resulting feature maps will
# be 4x smaller than the input image**
(offsetX, offsetY) = (x * 4.0, y * 4.0)
**# extract the rotation angle for the prediction and then
# compute the sin and cosine**
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
**# use the geometry volume to derive the width and height of
# the bounding box**
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
**# compute the rotated rect for
# the text prediction bounding box**
offset = (offsetX + (cos * xData1[x]) + (sin * xData2[x]), offsetY - (sin * xData1[x]) + (cos * xData2[x]))
p1 = (-sin * h + offset[0], -cos * h + offset[1])
p3 = (-cos * w + offset[0], sin * w + offset[1])
center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1]))
**# add the bounding box coordinates and probability score to
# our respective lists**
boxes.append((center, (w,h), -angle * 180.0 / math.pi))
confidences.append(float(scoresData[x]))
return (boxes, confidences)
args = {
"image":"C:\\Users\\ckunwar\\Test_Images\\licence_plate1\\52.jpg",
"east": "frozen_east_text_detection.pb",
"min_confidence":0.25,
"nms_thresh": 0.24,
"width":480,
"height":320,
"padding":0.0
}
**# load the input image and grab the image dimensions**
image = cv2.imread(args["image"])
orig = image.copy()
(H, W) = image.shape[:2]
#print(H,W)
**# set the new width and height and then determine the ratio in change
# for both the width and height**
(newW, newH) = (args["width"], args["height"])
rW = W / float(newW)
rH = H / float(newH)
**# resize the image and grab the new image dimensions**
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
**# define the two output layer names for the EAST detector model that
# we are interested -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text**
layerNames = ["feature_fusion/Conv_7/Sigmoid","feature_fusion/concat_3"]
**# load the pre-trained EAST text detector**
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])
**# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets**
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
start = time.time()
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
end = time.time()
**# show timing information on text prediction**
print("[INFO] text detection took {:.6f} seconds".format(end - start))
(boxes, confidences) = decode_predictions(scores, geometry)
**# apply non-maxima suppression to suppress weak, overlapping bounding boxes**
indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, args["min_confidence"], args["nms_thresh"])
results = []
**# loop over the bounding boxes**
for i in indices:
**# get 4 corners of the rotated rect**
vertices = cv2.boxPoints(boxes[i[0]])
**# scale the bounding box coordinates based on the respective ratios**
for j in [0,1,2,3]:
vertices[j][0] *= rW
vertices[j][1] *= rH
**# draw the bounding box on the image**
for j in [0,1,2,3]:
p1 = (vertices[j][0], vertices[j][1])
p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
config = ("-l eng --oem 3 --psm 11")
text = pytesseract.image_to_string(orig,config=config)
results.append(((p1,p2), text))
results = sorted(results, key=lambda r:r[0][1])
output = orig.copy()
for ((p1,p2), text) in results:
print("OCR TEXT")
print("========")
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.line(output, p1, p2, (0, 255, 0), 2)
#cv2.rectangle(output, p1, p2,(0, 255, 0), 2)
cv2.putText(output, text,cv2.FONT_HERSHEY_TRIPLEX, 0.8, (0, 0, 255), 2)
**# show the output image**
#orig = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB)
cv2.imshow("Text Detection", output)
cv2.waitKey(0)

How to detect objects shape from images using python3?

i want to write a code in python3 that detects objects shapes from images.
I want to choose a pixel from an object in the given image and find the neighbours pixels.
If they have the same RGB value that means that they are part of the object.
When neighbour pixel changes the RGB value with an ajustable difference from original pixel the algorithm should stop searching for neighbours. I think that this will work unless the backgroud and object have the same color.
I have found a way to put the pixels with same color in an rectangle,but this will not help me. I want to save just the shape of the object and put it in a different image.
For example,
If i want to start my algorithm from the middle of an object, let's
say a black table with a white background,the algorithm will find
pixels with the same color in any direction. When the neighbour pixel
RGB values will change with more than 30 units in one direction,the
algorithm will stop going in that direction,and will start going in
another direction untill I have the shape of the table.
I found a code on another post that help me to determinate regions of pixels with a shared value using PIL
Thanks!
from collections import defaultdict
from PIL import Image, ImageDraw
def connected_components(edges):
"""
Given a graph represented by edges (i.e. pairs of nodes), generate its
connected components as sets of nodes.
Time complexity is linear with respect to the number of edges.
"""
neighbors = defaultdict(set)
for a, b in edges:
neighbors[a].add(b)
neighbors[b].add(a)
seen = set()
def component(node, neighbors=neighbors, seen=seen, see=seen.add):
unseen = set([node])
next_unseen = unseen.pop
while unseen:
node = next_unseen()
see(node)
unseen |= neighbors[node] - seen
yield node
return (set(component(node)) for node in neighbors if node not in seen)
def matching_pixels(image, test):
"""
Generate all pixel coordinates where pixel satisfies test.
"""
width, height = image.size
pixels = image.load()
for x in xrange(width):
for y in xrange(height):
if test(pixels[x, y]):
yield x, y
def make_edges(coordinates):
"""
Generate all pairs of neighboring pixel coordinates.
"""
coordinates = set(coordinates)
for x, y in coordinates:
if (x - 1, y - 1) in coordinates:
yield (x, y), (x - 1, y - 1)
if (x, y - 1) in coordinates:
yield (x, y), (x, y - 1)
if (x + 1, y - 1) in coordinates:
yield (x, y), (x + 1, y - 1)
if (x - 1, y) in coordinates:
yield (x, y), (x - 1, y)
yield (x, y), (x, y)
def boundingbox(coordinates):
"""
Return the bounding box of all coordinates.
"""
xs, ys = zip(*coordinates)
return min(xs), min(ys), max(xs), max(ys)
def disjoint_areas(image, test):
"""
Return the bounding boxes of all non-consecutive areas
who's pixels satisfy test.
"""
for each in connected_components(make_edges(matching_pixels(image, test))):
yield boundingbox(each)
def is_black_enough(pixel):
r, g, b = pixel
return r < 10 and g < 10 and b < 10
if __name__ == '__main__':
image = Image.open('some_image.jpg')
draw = ImageDraw.Draw(image)
for rect in disjoint_areas(image, is_black_enough):
draw.rectangle(rect, outline=(255, 0, 0))
image.show()
Try using opencv with Python.
With opencv you can make advanced image analysis and there are many tutorials to use it.
http://www.pyimagesearch.com/2014/04/21/building-pokedex-python-finding-game-boy-screen-step-4-6/

Resources