Convert own image to MNIST's image - python-3.x

I am newbie of tensorflow.
I trained the digit prediction model using MNIST's train data.
And then I test the model using my own image.
It cannot predict the actual result.
The problems are :
MNIST's images are needed black and white
The images are size normalized to fit in a 20x20 pixel box and there are centered in a 28x28 image using the center of mass.
I don't want to use OpenCV
The question is How to shift my own handwritten digit image to the center of 28x28 image. Own image can be any color and that image to change Black and White MNIST's image

from PIL import Image, ImageFilter
def imageprepare(argv):
"""
This function returns the pixel values.
The imput is a png file location.
"""
im = Image.open(argv).convert('L')
width = float(im.size[0])
height = float(im.size[1])
newImage = Image.new('L', (28, 28), (255)) # creates white canvas of 28x28 pixels
if width > height: # check which dimension is bigger
# Width is bigger. Width becomes 20 pixels.
nheight = int(round((20.0 / width * height), 0)) # resize height according to ratio width
if (nheight == 0): # rare case but minimum is 1 pixel
nheight = 1
# resize and sharpen
img = im.resize((20, nheight), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wtop = int(round(((28 - nheight) / 2), 0)) # calculate horizontal position
newImage.paste(img, (4, wtop)) # paste resized image on white canvas
else:
# Height is bigger. Heigth becomes 20 pixels.
nwidth = int(round((20.0 / height * width), 0)) # resize width according to ratio height
if (nwidth == 0): # rare case but minimum is 1 pixel
nwidth = 1
# resize and sharpen
img = im.resize((nwidth, 20), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wleft = int(round(((28 - nwidth) / 2), 0)) # caculate vertical pozition
newImage.paste(img, (wleft, 4)) # paste resized image on white canvas
# newImage.save("sample.png
tv = list(newImage.getdata()) # get pixel values
# normalize pixels to 0 and 1. 0 is pure white, 1 is pure black.
tva = [(255 - x) * 1.0 / 255.0 for x in tv]
print(tva)
return tva
x=imageprepare('./image.png')#file path here
print(len(x))# mnist IMAGES are 28x28=784 pixels

I would use numpy recipe like this one --
https://www.kaggle.com/c/digit-recognizer/forums/t/6366/normalization-and-centering-of-images-in-mnist
You could probably remap this to pure TensorFlow pipeline, but I'm not sure it's necessary given that it's tiny images.
Also you would get better accuracy if you went the other way -- instead of normalizing your input data, make your network robust to lack of normalization by training on a larger dataset of randomly shifted/rescaled MNIST digits.

Related

Draw or resize plotted quantized image with nearest neighbour scaling

Following this example of K means clustering I want to recreate the same - only I'm very keen for the final image to contain just the quantized colours (+ white background). As it is, the colour bars get smooshed together to create a pixel line of blended colours.
Whilst they look very similar, the image (top half) is what I've got from CV2 it contains 38 colours total.
The lower image only has 10 colours and is what I'm after.
Let's look at a bit of that with 6 times magnification:
I've tried :
# OpenCV and Python K-Means Color Clustering
# build a histogram of clusters and then create a figure
# representing the number of pixels labeled to each color
hist = colour_utils.centroid_histogram(clt)
bar = colour_utils.plot_colors(hist, clt.cluster_centers_)
bar = cv2.resize(bar, (460, 345), 0, 0, interpolation = cv2.INTER_NEAREST)
However, the resize seems to have no resizing effect or change the scaling type. I don't know what controls the initial image size either.
Confused.
Any ideas?
I recommend you to show the image using cv2.imshow, instead of using matplotlib.
cv2.imshow shows the image "pixel to pixel" by default, while matplotlib.pyplot matches the image dimensions to the size of the axes.
bar_bgr = cv2.cvtColor(bar, cv2.COLOR_RGB2BGR) # Convert RGB to BGR
cv2.imshow('bar', bar_bgr)
cv2.waitKey()
cv2.destroyAllWindows()
In case you want to use matplotlib, take a look at: Display image with a zoom = 1 with Matplotlib imshow() (how to?).
Code used for testing:
# import the necessary packages
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import argparse
#import utils
import cv2
def centroid_histogram(clt):
# grab the number of different clusters and create a histogram
# based on the number of pixels assigned to each cluster
numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
(hist, _) = np.histogram(clt.labels_, bins = numLabels)
# normalize the histogram, such that it sums to one
hist = hist.astype("float")
hist /= hist.sum()
# return the histogram
return hist
def plot_colors(hist, centroids):
# initialize the bar chart representing the relative frequency
# of each of the colors
bar = np.zeros((50, 300, 3), dtype = "uint8")
startX = 0
# loop over the percentage of each cluster and the color of
# each cluster
for (percent, color) in zip(hist, centroids):
# plot the relative percentage of each cluster
endX = startX + (percent * 300)
cv2.rectangle(bar, (int(startX), 0), (int(endX), 50),
color.astype("uint8").tolist(), -1)
startX = endX
# return the bar chart
return bar
# load the image and convert it from BGR to RGB so that
# we can dispaly it with matplotlib
image = cv2.imread('chelsea.png')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# show our image
plt.figure()
plt.axis("off")
plt.imshow(image)
# reshape the image to be a list of pixels
image = image.reshape((image.shape[0] * image.shape[1], 3))
# cluster the pixel intensities
clt = KMeans(n_clusters = 5)
clt.fit(image)
# build a histogram of clusters and then create a figure
# representing the number of pixels labeled to each color
hist = centroid_histogram(clt)
bar = plot_colors(hist, clt.cluster_centers_)
# show our color bart
#plt.figure()
#plt.axis("off")
#plt.imshow(bar)
#plt.show()
bar = cv2.resize(bar, (460, 345), 0, 0, interpolation = cv2.INTER_NEAREST)
bar_bgr = cv2.cvtColor(bar, cv2.COLOR_RGB2BGR) # Convert RGB to BGR
cv2.imshow('bar', bar_bgr)
cv2.waitKey()
cv2.destroyAllWindows()

Paste an image to another image at two given co-ordinates with altered opacity using PIL or OpenCV in Python

I have two images with given points, one point each image, that need to be aligned so that the result image is a summation of both images, while image 2 is pasted on image 1 with 40% opacity. I have taken this question into consideration but our case does not exactly match as the image co-ordinate is supplied by user and images can have wide range of sizes.
Image 1:
Image2:
Final result(desired output):
For this I have tried img.paste() function of PIL and replacing values in numpy array of images in cv2, both giving results that are far from desired.
I made two input images with ImageMagick like this:
magick -size 300x400 xc:"rgb(1,204,255)" -fill red -draw "point 280,250" 1.png
magick -size 250x80 xc:"rgb(150,203,0)" -fill red -draw "point 12,25" 2.png
Then ran the following code:
#!/usr/bin/env python3
"""
Paste one image on top of another such that given points in each are coincident.
"""
from PIL import Image
# Open images and ensure RGB
im1 = Image.open('1.png').convert('RGB')
im2 = Image.open('2.png').convert('RGB')
# x,y coordinates of point in each image
p1x, p1y = 280, 250
p2x, p2y = 12, 25
# Work out how many pixels of space we need left, right, above, below common point in new image
pL = max(p1x, p2x)
pR = max(im1.width-p1x, im2.width-p2x)
pT = max(p1y, p2y)
pB = max(im1.height-p1y, im2.height-p2y)
# Create background in solid white
bg = Image.new('RGB', (pL+pR, pT+pB),'white')
bg.save('DEBUG-bg.png')
# Paste im1 onto background
bg.paste(im1, (pL-p1x, pT-p1y))
bg.save('DEBUG-bg+im1.png')
# Make 40% opacity mask for im2
alpha = Image.new('L', (im2.width,im2.height), int(40*255/100))
alpha.save('DEBUG-alpha.png')
# Paste im2 over background with alpha
bg.paste(im2, (pL-p2x, pT-p2y), alpha)
bg.save('result.png')
The result is this:
The lines that save images with names starting "DEBUG-xxx.png" are just for easy debugging and can be removed. I can easily view them all to see what is going on with the code and I can easily delete them all by removing "DEBUG*png".
Without any more details, I will try to answer the question as best as I can and will name all the extra assumptions that I made (and how to handle them if you can't make them).
Since there were no provided images, I created a blue and green image with a black dot as merging coordinate, using the following code:
import numpy as np
from PIL import Image, ImageDraw
def create_image_with_point(name, color, x, y, width=3):
image = np.full((400, 400, 3), color, dtype=np.uint8)
image[y - width:y + width, x - width:x + width] = (0, 0, 0)
image = Image.fromarray(image, mode='RGB')
ImageDraw.Draw(image).text((x - 15, y - 20), 'Point', (0, 0, 0))
image.save(name)
return image
blue = create_image_with_point('blue.png', color=(50, 50, 255), x=300, y=100)
green = create_image_with_point('green.png', color=(50, 255, 50), x=50, y=50)
This results in the following images:
Now I will make the assumption that the images do not contain an alpha layer yet (as I created them without). Therefore I will load the image and add an alpha layer to them:
import numpy as np
from PIL import Image
blue = Image.open('blue.png')
blue.putalpha(255)
green = Image.open('green.png')
green.putalpha(255)
My following assumption is that you know the merge coordinates beforehand:
# Assuming x, y coordinates.
point_blue = (300, 100)
point_green = (50, 50)
Then you can create an empty image, that can hold both of the images easily:
new_image = np.zeros((1000, 1000, 4), dtype=np.uint8)
This is a far stretch assumption if you do not know the image size beforehand, and in case you do not know this you will have to calculate the combining size of the two images.
Then you can place the images dot in the center of the newly created images (in my case (500, 500). For this you use the merging points as offsets. And you can perform alpha blending (in any case: np.uint8(img_1*alpha + img_2*(1-alpha))) to merge the images using different opacity.
Which is in code:
def place_image(image: Image, point_xy: tuple[int, int], dest: np.ndarray, alpha: float = 1.) -> np.ndarray:
# Place the merging dot on (500, 500).
offset_x, offset_y = 500 - point_xy[0], 500 - point_xy[1]
# Calculate the location of the image and perform alpha blending.
destination = dest[offset_y:offset_y + image.height, offset_x:offset_x + image.width]
destination = np.uint8(destination * (1 - alpha) + np.array(image) * alpha)
# Copy the 'merged' imaged to the destination location.
dest[offset_y:offset_y + image.height, offset_x:offset_x + image.width] = destination
return dest
# Add the background image blue with alpha 1
new_image = place_image(blue, point_blue, dest=new_image, alpha=1)
# Add the second image with 40% opacity
new_image = place_image(green, point_green, dest=new_image, alpha=0.4)
# Store the resulting image.
image = Image.fromarray(new_image)
image.save('result.png')
The final result will be a bigger image, of the combined images, again you can calculate the correct bounding box, so you don't have these huge areas of 'nothing' sticking out. The final result will look like this:

How to remove the background from a picture in OpenCV python

Because I am new to computer vision. I would like also to ask how can I delete the whole background of this image and keep only the pills untouched. I tried different things like to change the background color but still, there are some small edges and also noise.
Or if it's possible for all the white background to be a neutral color, without the line between the circle.
Here is one way in Python/OpenCV. Threshold the image on white. Then apply some morphology to clean it up a bit. Then invert it to make a mask. Then apply the mask to the input. I note that your pills overlap the ring. So this method does not remove the ring.
Input:
import cv2
import numpy as np
# Read image
img = cv2.imread('pills.jpg')
hh, ww = img.shape[:2]
# threshold on white
# Define lower and uppper limits
lower = np.array([200, 200, 200])
upper = np.array([255, 255, 255])
# Create mask to only select black
thresh = cv2.inRange(img, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20,20))
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# invert morp image
mask = 255 - morph
# apply mask to image
result = cv2.bitwise_and(img, img, mask=mask)
# save results
cv2.imwrite('pills_thresh.jpg', thresh)
cv2.imwrite('pills_morph.jpg', morph)
cv2.imwrite('pills_mask.jpg', mask)
cv2.imwrite('pills_result.jpg', result)
cv2.imshow('thresh', thresh)
cv2.imshow('morph', morph)
cv2.imshow('mask', mask)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold image:
Morphology cleaned image:
Mask image:
Result:
Here is another way to do that in Python/OpenCV removing the ring. But it will remove parts of the pills that overlap the ring.
Read the input
Threshold on white
Apply morphology close to remove the center strip
Get the contours
Draw the contours as white filled on black background
Get the convex hull of the white filled contours
Fit an ellipse to the convex hull
Print the ellipse shape to make sure it is close to a circle
Draw the convex hull outline in red on the input to check if fits the white region
Draw a circle using the average ellipse radii and center as white filled on black background
Erode the circle a little to avoid leaving a partial white ring
Combine the inverted morph image and the circle image to make a final mask
Apply the final mask to the input
Save the results
import cv2
import numpy as np
# Read image
img = cv2.imread('pills.jpg')
hh, ww = img.shape[:2]
# threshold on white
# Define lower and uppper limits
lower = np.array([200, 200, 200])
upper = np.array([255, 255, 255])
# Create mask to only select black
thresh = cv2.inRange(img, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20,20))
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# draw white contours on black background as mask
mask = np.zeros((hh,ww), dtype=np.uint8)
for cntr in contours:
cv2.drawContours(mask, [cntr], 0, (255,255,255), -1)
# get convex hull
points = np.column_stack(np.where(thresh.transpose() > 0))
hullpts = cv2.convexHull(points)
((centx,centy), (width,height), angle) = cv2.fitEllipse(hullpts)
print("center x,y:",centx,centy)
print("diameters:",width,height)
print("orientation angle:",angle)
# draw convex hull on image
hull = img.copy()
cv2.polylines(hull, [hullpts], True, (0,0,255), 1)
# create new circle mask from ellipse
circle = np.zeros((hh,ww), dtype=np.uint8)
cx = int(centx)
cy = int(centy)
radius = (width+height)/4
cv2.circle(circle, (cx,cy), int(radius), 255, -1)
# erode circle a bit to avoid a white ring
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,6))
circle = cv2.morphologyEx(circle, cv2.MORPH_ERODE, kernel)
# combine inverted morph and circle
mask2 = cv2.bitwise_and(255-morph, 255-morph, mask=circle)
# apply mask to image
result = cv2.bitwise_and(img, img, mask=mask2)
# save results
cv2.imwrite('pills_thresh2.jpg', thresh)
cv2.imwrite('pills_morph2.jpg', morph)
cv2.imwrite('pills_mask2.jpg', mask)
cv2.imwrite('pills_hull2.jpg', hull)
cv2.imwrite('pills_circle.jpg', circle)
cv2.imwrite('pills_result2.jpg', result)
cv2.imshow('thresh', thresh)
cv2.imshow('morph', morph)
cv2.imshow('mask', mask)
cv2.imshow('hull', hull)
cv2.imshow('circle', circle)
cv2.imshow('mask2', mask2)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold image:
Morphology image:
Filled contours image:
Convex hull on input:
Circle image:
Final mask image:
Result:

How do I detect vertical text with OpenCV for extraction

I am new to OpenCV and trying to see if I can find a way to detect vertical text for the image attached.
In this case on row 3 , I would like to get the bounding box around Original Cost and the amount below ($200,000.00).
Similarly I would like to get the bounding box around Amount Existing Liens and the associated amount below. I then would use this data to send to an OCR engine to read text. Traditional OCR engines go line by line and extract and loses the context.
Here is what I have tried so far -
import cv2
import numpy as np
img = cv2.imread('Test3.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,100,100,apertureSize = 3)
cv2.imshow('edges',edges)
cv2.waitKey(0)
minLineLength = 20
maxLineGap = 10
lines = cv2.HoughLinesP(edges,1,np.pi/180,15,minLineLength=minLineLength,maxLineGap=maxLineGap)
for x in range(0, len(lines)):
for x1,y1,x2,y2 in lines[x]:
cv2.line(img,(x1,y1),(x2,y2),(0,255,0),2)
cv2.imshow('hough',img)
cv2.waitKey(0)
Here is my solution based on Kanan Vyas and Adrian Rosenbrock
It's probably not as "canonical" as you'd wish.
But it seems to work (more or less...) with the image you provided.
Just a word of CAUTION: The code looks within the directory from which it is running, for a folder named "Cropped" where cropped images will be stored. So, don't run it in a directory which already contains a folder named "Cropped" because it deletes everything in this folder at each run. Understood? If you're unsure run it in a separate folder.
The code:
# Import required packages
import cv2
import numpy as np
import pathlib
###################################################################################################################################
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
###################################################################################################################################
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b:b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
###################################################################################################################################
# https://medium.com/coinmonks/a-box-detection-algorithm-for-any-image-containing-boxes-756c15d7ed26 (with a few modifications)
###################################################################################################################################
def box_extraction(img_for_box_extraction_path, cropped_dir_path):
img = cv2.imread(img_for_box_extraction_path, 0) # Read the image
(thresh, img_bin) = cv2.threshold(img, 128, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU) # Thresholding the image
img_bin = 255-img_bin # Invert the imagecv2.imwrite("Image_bin.jpg",img_bin)
# Defining a kernel length
kernel_length = np.array(img).shape[1]//200
# A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
# A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
# A kernel of (3 X 3) ones.
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))# Morphological operation to detect verticle lines from an image
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
#cv2.imwrite("verticle_lines.jpg",verticle_lines_img)# Morphological operation to detect horizontal lines from an image
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
#cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
alpha = 0.5
beta = 1.0 - alpha
# This function helps to add two image with specific weight parameter to get a third image as summation of two image.
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)# For Debugging
# Enable this line to see verticle and horizontal lines in the image which is used to find boxes
#cv2.imwrite("img_final_bin.jpg",img_final_bin)
# Find contours for image, which will detect all the boxes
contours, hierarchy = cv2.findContours(
img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Sort all the contours by top to bottom.
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")
idx = 0
for c in contours:
# Returns the location and width,height for every contour
x, y, w, h = cv2.boundingRect(c)# If the box height is greater then 20, widht is >80, then only save it as a box in "cropped/" folder.
if (w > 50 and h > 20):# and w > 3*h:
idx += 1
new_img = img[y:y+h, x:x+w]
cv2.imwrite(cropped_dir_path+str(x)+'_'+str(y) + '.png', new_img)
###########################################################################################################################################################
def prepare_cropped_folder():
p=pathlib.Path('./Cropped')
if p.exists(): # Cropped folder non empty. Let's clean up
files = [x for x in p.glob('*.*') if x.is_file()]
for f in files:
f.unlink()
else:
p.mkdir()
###########################################################################################################################################################
# MAIN
###########################################################################################################################################################
prepare_cropped_folder()
# Read image from which text needs to be extracted
img = cv2.imread("dkesg.png")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Performing OTSU threshold
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
thresh1=255-thresh1
bin_y=np.zeros(thresh1.shape[0])
for x in range(0,len(bin_y)):
bin_y[x]=sum(thresh1[x,:])
bin_y=bin_y/max(bin_y)
ry=np.where(bin_y>0.995)[0]
for i in range(0,len(ry)):
cv2.line(img, (0, ry[i]), (thresh1.shape[1], ry[i]), (0, 0, 0), 1)
# We need to draw abox around the picture with a white border in order for box_detection to work
cv2.line(img,(0,0),(0,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(img.shape[1]-1,0),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(0,0),(img.shape[1]-1,0),(255,255,255),2)
cv2.line(img,(0,img.shape[0]-1),(img.shape[1]-1,img.shape[0]-1),(255,255,255),2)
cv2.line(img,(0,0),(0,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(img.shape[1]-3,0),(img.shape[1]-3,img.shape[0]-1),(0,0,0),1)
cv2.line(img,(0,0),(img.shape[1]-1,0),(0,0,0),1)
cv2.line(img,(0,img.shape[0]-2),(img.shape[1]-1,img.shape[0]-2),(0,0,0),1)
cv2.imwrite('out.png',img)
box_extraction("out.png", "./Cropped/")
Now... It puts the cropped regions in the Cropped folder. They are named as x_y.png with (x,y) the position on the original image.
Here are two examples of the outputs
and
Now, in a terminal. I used pytesseract on these two images.
The results are the following:
1)
Original Cost
$200,000.00
2)
Amount Existing Liens
$494,215.00
As you can see, pytesseract got the amount wrong in the second case... So, be careful.
Best regards,
Stéphane
I assume the bounding box is fix (rectangle that able to fit in "Original Amount and the amount below). You can use text detection to detect the "Original Amount" and "Amount Existing Liens" using OCR and crop out the image based on the detected location for further OCR on the amount. You can refer this link for text detection
Try to divide the image into different cells using the lines in the image.
For example, first divide the input into rows by detecting the horizontal lines. This can be done by using cv.HoughLinesP and checking for each line if the difference between y-coordinate of the begin and end point is smaller than a certain threshold abs(y2 - y1) < 10. If you have a horizontal line, it's a separator for a new row. You can use the y-coordinates of this line to split the input horizontally.
Next, for the row you're interested in, divide the region into columns using the same technique, but now make sure the difference between the x-coordinates of the begin and end point are smaller than a certain threshold, since you're now looking for the vertical lines.
You can now crop the image to different cells using the y-coordinates of the horizontal lines and the x-coordinates of the vertical lines. Pass these cropped regions one by one to the OCR engine and you'll have for each cell the corresponding text.

How do I find corners of a paper when there are printed corners/lines on paper itself?

I'm using openCV in Python to find the corners of a sheet of paper to unwarp it.
img = cv2.imread(images[i])
corners = cv2.goodFeaturesToTrack(cv2.cvtColor(img,cv2.COLOR_BGR2GRAY),4,.01,1000,useHarrisDetector=True,k=.04)
corners = np.float32(corners)
print(corners)
ratio = 1.6
cardH = math.sqrt((corners[2][0][0] - corners[1][0][0]) * (corners[2][0][0] - corners[1][0][0]) + (corners[2][0][1] - corners[1][0][1]) * (
corners[2][0][1] - corners[1][0][1]))
cardW = ratio * cardH;
pts2 = np.float32(
[[corners[0][0][0], corners[0][0][1]], [corners[0][0][0] + cardW, corners[0][0][1]], [corners[0][0][0] + cardW, corners[0][0][1] + cardH],
[corners[0][0][0], corners[0][0][1] + cardH]])
M = cv2.getPerspectiveTransform(corners, pts2)
offsetSize = 500
transformed = np.zeros((int(cardW + offsetSize), int(cardH + offsetSize)), dtype=np.uint8);
dst = cv2.warpPerspective(img, M, transformed.shape)
Before:
https://imgur.com/a/H7HjFro
After:
https://imgur.com/a/OA6Iscq
As you can see with these images, they're detecting edges inside the paper itself, rather than the corner of the paper. Should I consider using a different algorithm entirely? I'm quite lost.
I've tried increasing the minimum euclidean distance to 1000, but that really didn't do anything.
Please note, this no one's real information, this is a fake dataset found on Kaggle.
The kaggle dataset can be found https://www.kaggle.com/mcvishnu1/fake-w2-us-tax-form-dataset
Here is one way to do that in Python/OpenCV.
Note that the found corners are listed counter-clockwise from the top-most corner.
Read the input
Convert to gray
Gaussian blur
Otsu threshold
Morphology open/close to clean up the threshold
Get largest contour
Approximate a polygon from the contour
Get the corners
Draw the polygon on the input
Compute side lengths
Compute output corresponding corners
Get perspective transformation matrix from corresponding corner points
Warp the input image according to the matrix
Save the results
Input:
import cv2
import numpy as np
# read image
img = cv2.imread("efile.jpg")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blur image
blur = cv2.GaussianBlur(gray, (3,3), 0)
# do otsu threshold on gray image
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# apply morphology
kernel = np.ones((7,7), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
# get largest contour
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
area_thresh = 0
for c in contours:
area = cv2.contourArea(c)
if area > area_thresh:
area_thresh = area
big_contour = c
# draw white filled largest contour on black just as a check to see it got the correct region
page = np.zeros_like(img)
cv2.drawContours(page, [big_contour], 0, (255,255,255), -1)
# get perimeter and approximate a polygon
peri = cv2.arcLength(big_contour, True)
corners = cv2.approxPolyDP(big_contour, 0.04 * peri, True)
# draw polygon on input image from detected corners
polygon = img.copy()
cv2.polylines(polygon, [corners], True, (0,0,255), 1, cv2.LINE_AA)
# Alternate: cv2.drawContours(page,[corners],0,(0,0,255),1)
# print the number of found corners and the corner coordinates
# They seem to be listed counter-clockwise from the top most corner
print(len(corners))
print(corners)
# for simplicity get average of top/bottom side widths and average of left/right side heights
# note: probably better to get average of horizontal lengths and of vertical lengths
width = 0.5*( (corners[0][0][0] - corners[1][0][0]) + (corners[3][0][0] - corners[2][0][0]) )
height = 0.5*( (corners[2][0][1] - corners[1][0][1]) + (corners[3][0][1] - corners[0][0][1]) )
width = np.int0(width)
height = np.int0(height)
# reformat input corners to x,y list
icorners = []
for corner in corners:
pt = [ corner[0][0],corner[0][1] ]
icorners.append(pt)
icorners = np.float32(icorners)
# get corresponding output corners from width and height
ocorners = [ [width,0], [0,0], [0,height], [width,height] ]
ocorners = np.float32(ocorners)
# get perspective tranformation matrix
M = cv2.getPerspectiveTransform(icorners, ocorners)
# do perspective
warped = cv2.warpPerspective(img, M, (width, height))
# write results
cv2.imwrite("efile_thresh.jpg", thresh)
cv2.imwrite("efile_morph.jpg", morph)
cv2.imwrite("efile_polygon.jpg", polygon)
cv2.imwrite("efile_warped.jpg", warped)
# display it
cv2.imshow("efile_thresh", thresh)
cv2.imshow("efile_morph", morph)
cv2.imshow("efile_page", page)
cv2.imshow("efile_polygon", polygon)
cv2.imshow("efile_warped", warped)
cv2.waitKey(0)
Thresholded image:
Morphology cleaned image:
Polygon drawn on input:
Extracted Corners (counterclockwise from top right corner)
4
[[[693 67]]
[[ 23 85]]
[[ 62 924]]
[[698 918]]]
Warped Result:

Resources