How to be a properly configure Asia Unicode font and text display for FPDF2? - python-3.x

enter image description hereI am trying to generate PDF file with a Python library "FPDF2" with this method:
from fpdf import FPDF, XPos, YPos
import warnings
warnings.filterwarnings("ignore", message="Core font or font already added")
# PDF title
title = 'INVOICE#'
class PDF(FPDF):
def header(self):
# Add logo
# self.image("logo.png", 10, 10, 20)
# Set font style
self.set_font('helvetica', 'B', 18)
# Add Padding by calculated value of title
title_width = self.get_string_width(title) + 6
doc_width = self.w
self.set_x((doc_width - title_width) / 2)
# colors of frame, background, and text
self.set_draw_color(0, 80, 180) # blue
self.set_fill_color(230, 230, 0) # background color - Yellow
self.set_text_color(220, 50, 50) # text color - red
# Thickness of frame (border)
self.set_line_width(0.5)
# Title
self.cell(title_width, 10, title, new_y="NEXT", border = 1, align='C', fill=True)
# Line break
self.ln(10)
# Footer
def footer(self):
# Position cursor at 1.5 cm from bottom:
self.set_y(-15)
# Setting font: helvetica italic 8
self.set_font("helvetica", "I", 8)
self.set_text_color(169,169,169) # text color - gray
# Printing page number:
self.cell(0, 10, f"Design by Ezstartup Team. Page {self.page_no()}/{{nb}}", align="C")
# Create Chapter
def chapter_body(self, name):
with open(name, 'rb') as fh:
txt = fh.read().decode('utf-8')
# set font
self.add_font('KhmerOSBattambang', '', 'font/KhmerOSbattambang.ttf')
self.set_font('KhmerOSBattambang', size=14)
self.multi_cell(0, 10, txt)
pdf.ln()
# Create FPDF object
# Layout ('P', 'L')
# format ('A3', 'A4', (default), 'A5', 'Letter', 'Legal', (100, 150))
pdf = PDF(orientation="P", unit="mm", format="A4")
# Get total pages number
pdf.alias_nb_pages()
# Set margin
pdf.set_auto_page_break(auto=True, margin=15)
# Add a page
pdf.add_page()
# Display text
# Read the content of the file and decode it
pdf.chapter_body('chapter1.txt')
pdf.chapter_body('chapter2.txt')
# w = width, h = height
pdf.cell(40, 10, 'Hello World! សួស្តីលោកអ្នកនាង', chr(0x17DF), align='R') # position , align='R')
# PDF file
pdf.output('unicode.pdf')
But the result is not like what I expected.
I have attached the creation file. It's a problem with Khmer language. Please advise me to configure it and makes it working and displaying correctly.
I want to generate PDF file with FPDF2 with Khmer language (Unicode characters).

Related

Please suggest how can I extract text data from hand-filled character per box type forms using python

hand-filled character per box form
I want to automate a process in which I would get hand-filled character per box type forms in image format and I need to extract text from these forms. The boxes surrounds each letter, I have to extract all the text from the image form.
You can use selecting contours by size, find rotated rectangle and inverse transform make.
import cv2
import numpy as np
img = cv2.imread('4YAry.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to binary image
thresh=cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV )[1]
contours,hierarchy = cv2.findContours(thresh, 1, 2)
for cnt in contours:
x , y , w , h = cv2 . boundingRect ( cnt )
if abs(w-345)<10: # width box is 345 px
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
srcTri=np.array( [box[1], box[0], box[2]] ).astype(np.float32)
dstTri = np.array( [[0, 0], [0, rect[1][1]], [rect[1][0],0]] ).astype(np.float32)
warp_mat = cv2.getAffineTransform(srcTri, dstTri)
warp_dst = cv2.warpAffine(img, warp_mat, (np.int0(rect[1][0]), np.int0(rect[1][1])))
N=14
s=0.99*warp_dst.shape[1]/N # tune rectangle positions
for i in range(N):
warp_dst = cv2.rectangle ( warp_dst , ( 2+int(i*s) ,2 ), ( 2+int((i+1)*s) , warp_dst.shape[0]-3 ), ( 255 , 255 , 255 ), 2 )
cv2.imwrite('chars.png', warp_dst)
Using for instance Hough, detect the top and bottom edges and the vertical separations. Validate the separations by checking that they run from top to bottom. The horizontal lines will be more reliable and accurate, you can use their direction for deskewing if necessary.
After doing that, you will have missing separations and false ones. Using some heuristics, try to find the correct pitch and detect the false positives and false negatives. Now you can extract the content of the individual boxes, or erase the edges.
This process cannot be perfect, some characters will be damaged.

font.getsize() seems not to work with "\n" (new lines) inside the parameter

The used font: https://www.fontspace.com/abeezee-font-f30774
ImageFont.truetype(*path/to/font*, 300)
font.getsize("1\n\r0\n\r9") # returns: (1080, 280) which is wrong!
image = np.full(shape=(1, 1, 3), fill_value=0, dtype=np.uint8)
image = Image.fromarray(image, mode="RGB")
draw = ImageDraw.Draw(image)
draw.multiline_textsize(text="1\n\r0\n\r9", font=font, spacing=0) # returns: (180, 837) which is correct"
Why are the results different? What am I missing?
So The main error was:
1) for multiline text we should use:
PIL.ImageDraw.ImageDraw.multiline_text(xy, text, fill=None, font=None, anchor=None, spacing=0, align="left", direction=None, features=None, language=None)
In addition .getsize() returned a height that is a little too big. The height that worked for me was:
font.getmask(digit).size[1]
wich is the same to:
font.getsize(digit)[1] - font.getoffset(digit)[1]

Increase width/height of image(not resize)

]From https://www.pyimagesearch.com/2018/07/19/opencv-tutorial-a-guide-to-learn-opencv/
I'm able to extract the contours and write as files.
For example I've a photo with some scribbled text : "in there".
I've been able to extract the letters as separate files but what I want is that these letter files should have same width and height. For example in case of "i" and "r" width will differ. In that case I want to append(any b/w pixels) to the right of "i" photo so it's width becomes same as that of "r"
How to do it in Python? Just increase the size of photo(not resize)
My code looks something like this:
# find contours (i.e., outlines) of the foreground objects in the
# thresholded image
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
output = image.copy()
ROI_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
ROI = image[y:y+h, x:x+w]
file = 'ROI_{}.png'.format(ROI_number)
cv2.imwrite(file.format(ROI_number), ROI)
[][1
Here are a couple of other ways to do that using Python/OpenCV using cv2.copyMakeBorder() to extend the border to the right by 50 pixels. The first way simply extends the border by replication. The second extends it with the mean (average) blue background color using a mask to get only the blue pixels.
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('i.png')
# get mask of background pixels (for result2b only)
lowcolor = (232,221,163)
highcolor = (252,241,183)
mask = cv2.inRange(img, lowcolor, highcolor)
# get average color of background using mask on img (for result2b only)
mean = cv2.mean(img, mask)[0:3]
color = (mean[0],mean[1],mean[2])
# extend image to the right by 50 pixels
result = img.copy()
result2a = cv2.copyMakeBorder(result, 0,0,0,50, cv2.BORDER_REPLICATE)
result2b = cv2.copyMakeBorder(result, 0,0,0,50, cv2.BORDER_CONSTANT, value=color)
# view result
cv2.imshow("img", img)
cv2.imshow("mask", mask)
cv2.imshow("result2a", result2a)
cv2.imshow("result2b", result2b)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save result
cv2.imwrite("i_extended2a.jpg", result2a)
cv2.imwrite("i_extended2b.jpg", result2b)
Replicated Result:
Average Background Color Result:
In Python/OpenCV/Numpy you create a new image of the size and background color you want. Then you use numpy slicing to insert the old image into the new one. For example:
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('i.png')
ht, wd, cc= img.shape
# create new image of desired size (extended by 50 pixels in width) and desired color
ww = wd+50
hh = ht
color = (242,231,173)
result = np.full((hh,ww,cc), color, dtype=np.uint8)
# copy img image into image at offsets yy=0,xx=0
yy=0
xx=0
result[yy:yy+ht, xx:xx+wd] = img
# view result
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save result
cv2.imwrite("i_extended.jpg", result)

How can I create mask images in Python 3 using features.rasterize?

Currently, I use the following piece of code to create mask images (classes = ['tree', 'car', 'bicycle'], polygons is the list of the geometry objects where each geometry object has coordinates field that defines the polygon on the image that is a bounding box for the class object):
def create_mask(self, mask_size, classes, polygons):
# type (Tuple[int, int], List[str], List[geometry]) -> Image
# Create a new palette image, the default color of Image.new() is black
# https://pillow.readthedocs.io/en/3.3.x/handbook/concepts.html#modes
img = Image.new('P', mask_size)
img.putpalette(self.palette) # palette = [0, 0, 0, 255, 0, 0, ...]
draw = ImageDraw.Draw(img)
for i, class_ in enumerate(classes):
color_index = self.class_to_color_index[class_]
draw.polygon(xy=polygons[i].exterior.coords, fill=color_index)
del draw
return img
Is there any way to rewrite this piece of code with using features.rasterize?

Python - OpenCV - Binarize To Isolate Object Which is Same Color as Background

I need to isolate the cardboard target in the image below and binarize it, so that the target is white and the background black. Normally, this is not a problem, but the background is almost the exact same color as the target.
Attempts:
# LOAD IMAGE
img_filepath = 'real_6.png'
img = cv2.imread( img_filepath )
rgb_img = img[:,:,::-1]
plt.imshow( rgb_img )
plt.title('ORIGINAL')
plt.show()
img_gray = cv2.cvtColor( img, cv2.COLOR_BGR2GRAY )
# SMOOTH
blur_kernel = np.ones((5,5),np.float32)/30
blur_img = cv2.filter2D( rgb_img, -1, blur_kernel )
# THRESHOLD
lower_color_rng = np.array( [100,50,100] )
upper_color_rng = np.array( [255,255,255] )
target_keyholes_img = cv2.inRange( blur_img, lower_color_rng, upper_color_rng )
plt.imshow( target_keyholes_img, cmap='gray' )
plt.title( 'THRESHOLD' )
plt.show()
Attempted Image Extraction
How can I use OpenCV in Python 3 to binarize this image?
Original Image

Resources