My problem is extracting the text from multiple columns of .PDF.
Common libs like PyPDF2 didn't work.
The code below I made to try to read with Pytesseract but I was also unsuccessful because it is mixing the two columns.
Now my idea using this code as a base is to create a cutout in each column 1 and 2 and generate a new image by pasting column 1 and then columns 2 below, so I could read with Pytesseract or AWS Textract without problems.
how could i do this with opencv?
import fitz
import cv2
import pytesseract
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
SCANNED_FILE = "decreto_santos.pdf"
img = cv2.imread(SCANNED_FILE)
zoom_x = 2.0
zoom_y = 2.0
mat = fitz.Matrix(zoom_x, zoom_y)
# I create an image for each page of the PDF and save.
doc = fitz.open(SCANNED_FILE)
print("Generated pages: ")
for page in doc:
pix = page.get_pixmap(matriz=mat)
png = 'output/' + SCANNED_FILE.split('/')[-1].split('.')[0] + 'page-%i.png' % page.number
print(png)
pix.save(png)
# Upload an image to crop
original_image = cv2.imread('output/decreto_santospage-1.png')
# Grayscale image
gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
plt.figure(figsize=(25, 15))
plt.imshow(gray_image, cmap='gray')
plt.show()
# Result:
# Otsu thresholding
ret, threshold_image = cv2.threshold(gray_image, 0,255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
plt.figure(figsize=(25, 15))
plt.imshow(threshold_image, cmap='gray')
plt.show()
# Result:
rectangular_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# Applying dilation on the threshold image
dilated_image = cv2.dilate(threshold_image, rectangular_kernel, iterations = 1)
plt.figure(figsize=(25, 15))
plt.imshow(dilated_image)
plt.show()
# Result:
# Finding contours
contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Creating a copy of the image
copied_image = original_image.copy()
with open("output/recognized-kernel-66-66.txt", "w+") as f:
f.write("")
f.close()
mask = np.zeros(original_image.shape, np.uint8)
# Looping through the identified contours
# Then rectangular part is cropped and passed on to pytesseract
# pytesseract extracts the text inside each contours
# Extracted text is then written into a text file
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# Cropping the text block for giving input to OCR
cropped = copied_image[y:y + h, x:x + w]
with open("output/recognized-kernel-66-66.txt", "a") as f:
# Apply OCR on the cropped image
text = pytesseract.image_to_string(cropped, lang='por', config='--oem 1 --psm 1')
print(text)
masked = cv2.drawContours(mask, [cnt], 0, (255, 255, 255), -1)
plt.figure(figsize=(25, 15))
plt.imshow(masked, cmap='gray')
plt.show()
My base for this code was this post
Related
I have to crop a lot of images manually. Not the funniest thing to do. So I thought I'd try to do it using Python.
I can detect the subject, create a mask, but I have no idea how to get the points from the very bottom part and crop based on them.
Any help is appreciated
import cv2
img = cv2.imread('image5.jpg')
h, w = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.threshold(gray, 192, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imwrite('result5.png', thr)
you can try to find all external contours using cv2.RETR_EXTERNAL and pick the bottom most point, like this:
import cv2
import numpy as np
import imutils
im = cv2.imread('images/tennis.jpg')
# Percent of original size
scale_percent = 20
width = int(im.shape[1] * scale_percent / 100)
height = int(im.shape[0] * scale_percent / 100)
dim = (width, height)
# Resize image
im = cv2.resize(im, dim, interpolation = cv2.INTER_AREA)
# Convert to grayscale
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# Canny
canny_output = cv2.Canny(im, 120, 240)
# Find external contours
contours, hierarchy = cv2.findContours(canny_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#cv2.drawContours(im, [contours[0]], 0, (0,255,0), 3) # Uncomment this line to see what contour opencv is finding
# Pick the bottom most point and add an offset (whatever value you want, this is just for aesthetics)
c = contours[0]
bottommost = tuple(c[c[:, :, 1].argmax()][0])[1] + 5
# Crop image
im = im[:bottommost, :]
# Show image
cv2.imshow('image', im)
cv2.waitKey()
Very good thinking I'd say! now the implementation:
xx,yy = thrs.nonzero()
max_crop_h = xx.max()
crop = img[:max_crop_h,:]
numpy has your back!
I tried to apply a gaussian filter to 6 images to denoise them using the following code:
import os
import matplotlib.image as img
def load_data(dir_name ='C:/Users/ASUS/Desktop/Self_Learning/Coursera/Deep Learning in Computer Vision/plates'):
im_list=[]
for f in os.listdir(dir_name):
fpath = os.path.join(dir_name, f) # this will give you the path of each file in your directory
im = img.imread(fpath)
im_list.append(im)
return im_list
plates = load_data()
# The auxiliary function `visualize()` displays the images given as argument.
def visualize(imgs, format=None):
plt.figure(figsize=(20, 40))
for i, img in enumerate(imgs):
if img.shape[0] == 3:
img = img.transpose(1,2,0)
plt_idx = i+1
plt.subplot(3, 3, plt_idx)
plt.imshow(img, cmap=format)
plt.show()
visualize(plates, 'gray')
from scipy import ndimage
def noise_reduction(img):
denoised_list=[]
for i in img:
gauss_filtered = ndimage.gaussian_filter(i, sigma=1.4,truncate=2.0)
denoised_list.append(gauss_filtered)
return denoised_list
denoised_img= noise_reduction(plates)
visualize(denoised_img, 'gray')
plates is the file that contains my images and visualize is a function to display the images.
The result should have been 6 denoised gray scale images. However, I got blue-ish ones.
Here are my originale images (plates):
This is the result after applying the gaussian filter:
After applying mask original image
import cv2
import dlib
import numpy as np
img = cv2.imread("Aayush.jpg")
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
msk = np.zeros_like(img_gray)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
faces = detector(img_gray)
for face in faces:
landmarks = predictor(img_gray, face)
lp = []
for n in range(0,68):
x = landmarks.part(n).x
y = landmarks.part(n).y
lp.append((x,y))
p = np.array(lp, np.int32)
#cv2.circle(img, (x,y), 3, (0, 0, 255), -1)
convexhull = cv2.convexHull(p)
#cv2.polylines(img, [convexhull], True, (255,0,0), 3)
cv2.fillConvexPoly(msk, convexhull, 255)
img1 = cv2.bitwise_and(img, img, mask = msk)
img1 containsa complete black image with face cut from img, I just require the pixel values of face portion and not complete image
As original image and mask have not been provided in the question itself. I am assuming a simple input image and a mask image with circular cavity as:
The mask here is a single channel matrix with a value of 255 in the central cavity. To get the pixel info inside the cavity only you can use following numpy operation:
pixel_info = original_image[mask == 255]
# You may need to convert the numpy array to Python list.
pixel_info_list = pixel_info.tolist()
Now you may serialize the list to any format you want (csv in this case.)
Full code:
import cv2
import numpy as np
original_image = cv2.imread("/path/to/lena.png")
mask = np.zeros(original_image.shape[:2], dtype=original_image.dtype)
mask = cv2.circle(mask, (256, 256), 100, [255], -1)
pixel_info = original_image[mask == 255]
pixel_info_list = pixel_info.tolist()
I am not able to detect the QR code in registration certificate image
# import the necessary packages
import cv2
import imutils
import numpy as np
from pyzbar import pyzbar
image = cv2.imread("myimages/adhar1.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the Scharr gradient magnitude representation of the images
# in both the x and y direction using OpenCV 2.4
ddepth = cv2.cv.CV_32F if imutils.is_cv2() else cv2.CV_32F
gradX = cv2.Sobel(gray, ddepth=ddepth, dx=1, dy=0, ksize=-1)
gradY = cv2.Sobel(gray, ddepth=ddepth, dx=0, dy=1, ksize=-1)
# subtract the y-gradient from the x-gradient
gradient = cv2.subtract(gradX, gradY)
gradient = cv2.convertScaleAbs(gradient)
cv2.imshow("gradient", gradient)
cv2.waitKey()
cv2.destroyAllWindows()
# blur and threshold the image
blurred = cv2.blur(gradient, (3, 3))
cv2.imshow("blurred", blurred)
cv2.waitKey()
cv2.destroyAllWindows()
(_, thresh) = cv2.threshold(blurred, 225, 255, cv2.THRESH_BINARY)
cv2.imshow("thresh", thresh)
cv2.waitKey()
cv2.destroyAllWindows()
# construct a closing kernel and apply it to the thresholded image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 7))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# perform a series of erosions and dilations
closed = cv2.erode(closed, None, iterations=4)
closed = cv2.dilate(closed, None, iterations=4)
cv2.imshow("Image22.jpg", closed)
cv2.waitKey()
#find the contours in the thresholded image, then sort the contours
# by their area, keeping only the largest one
cnts = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
c = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
# compute the rotated bounding box of the largest contour
rect = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
box = np.int0(box)
# print(box)
# draw a bounding box arounded the detected barcode and display the
order_points = image[box[1][1]:box[3][1],box[1][0]:box[3][0]]
cv2.imwrite("test.jpg", order_points)
barcode = pyzbar.decode(order_points)
print(barcode)
cv2.drawContours(image, [box], -1, (0, 0, 255), 3)
# cv2.imshow("Image", image)
cv2.imshow("Image1.jpg", image)
barcoad = pyzbar.decode(image)
print(barcoad)
cv2.waitKey(0)
Hear am using open-CV for Barbados image detection but am not able to find registration certificate(rc). Please give me a better solution for thin problem.
I am new to image processing and python. You might've seen my amateur codes on this site in the last couple of days.
I am trying to count the number of trees using aerial images. This is my code:
from PIL import Image
import cv2
import numpy as np
from skimage import io, filters, measure
from scipy import ndimage
img = Image.open("D:\\Texture analysis\\K-2.jpg")
row, col = img.size
hsvimg = img.convert('HSV')
hsvimg.mode = 'RGB'
hsvimg.save('newImage2.jpg')
npHSI = np.asarray(hsvimg) #Convert HSI Image to np image
blur = cv2.GaussianBlur(npHSI, (45, 45), 5)
assert isinstance(blur, np.ndarray) ##############################
assert len(blur.shape) == 3 #Convert np Image to HSI Image
assert blur.shape[2] == 3 ##############################
hsiBlur = Image.fromarray(blur, 'RGB')
hsiBlur.save('hsiBlur.jpg') #Save the blurred image
## Read
img = cv2.imread("D:\\Texture analysis\\hsiBlur.jpg")
## convert to hsv
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#Threshold the image and segment the trees
mask = cv2.inRange(hsv, (36, 25, 25), (70, 255,255))
imask = mask>0
green = np.zeros_like(img, np.uint8)
green[imask] = img[imask]
## save
cv2.imwrite("green.png", green)
#Count the number of trees
im = io.imread('green.png', as_grey=True)
val = filters.threshold_otsu(im)
drops = ndimage.binary_fill_holes(im < val)
labels = measure.label(drops)
print(labels.max())
Original image:
HSI image with gaussian filter:
Segmented image:
The last part of the code returns 7, which is a wrong output. The value should be above 50. How can I properly count the number of green segments in the final segmented image?
EDIT
I converted green.png to binary and applied erosion with a 3x3 filter and iterated it 7 times to remove the noise.
This is what I did at the end. I followed this stackoverflow link
##save
cv2.imwrite("green.png", green)
#Convert to grayscale
gray = np.dot(green[...,:3], [0.299, 0.587, 0.114])
cv2.imwrite("grayScale.jpg", gray)
#Binarize the grayscale image
ret,bin_img = cv2.threshold(gray,127,255,cv2.THRESH_BINARY)
cv2.imwrite("bin_img.jpg", bin_img)
#Erosion to remove the noise
kernel = np.ones((3, 3),np.uint8)
erosion = cv2.erode(gray, kernel, iterations = 7)
cv2.imwrite("erosion.jpg", erosion)
#Count the number of trees
finalImage = cv2.imread('erosion.jpg')
finalImage = cv2.cvtColor(finalImage, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(finalImage, 127, 255, 1)
im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
cv2.drawContours(finalImage,[cnt],0,(0,0,255),1)
Saurav mentioned in his answer ... size of "contours" will give you the count. This print(contour.size())gives an error and print(contour) just prints a long 2D array. How can i get the size of contour?
PS. I didn't upload the grayscale, binary and eroded image because i felt that the images were already taking too much space, I can still upload them if anyone wants to.
I've found 52 trees with that script:
from PIL import Image, ImageDraw, ImageFont
image = Image.open('04uX3.jpg')
pixels = image.load()
size = image.size
draw = ImageDraw.Draw(image)
font = ImageFont.truetype('arial', 60)
i = 1
for x in range(0, size[0], 100):
for y in range(0, size[1], 100):
if pixels[x, y][1] > 200:
draw.text((x, y), str(i), (255, 0, 0), font=font)
i += 1
image.save('result.png')
You can see that some trees weren't detected and some non-trees were detected. So this is very rough calculation: