Centering a rotated image using Reportlab - python-3.x

I'm trying to center a rotated image on Reportlab, but I'm having issues using the correct calculation for the placement.
Here's the current code:
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from PIL import Image as PILImage
import requests
import math
def main(rotation):
# create a new PDF with Reportlab
a4 = (595.275590551181, 841.8897637795275)
c = canvas.Canvas('output.pdf', pagesize=a4)
c.saveState()
# loading the image:
img = requests.get('https://i.stack.imgur.com/dI5Rj.png', stream=True)
img = PILImage.open(img.raw)
width, height = img.size
# We calculate the bouding box of a rotated rectangle
angle_radians = rotation * (math.pi / 180)
bounding_height = abs(width * math.sin(angle_radians)) + abs(height * math.cos(angle_radians))
bounding_width = abs(width * math.cos(angle_radians)) + abs(height * math.sin(angle_radians))
a4_pixels = [x * (100 / 75) for x in a4]
offset_x = (a4_pixels[0] / 2) - (bounding_width / 2)
offset_y = (a4_pixels[1] / 2) - (bounding_height / 2)
c.translate(offset_x, offset_y)
c.rotate(rotation)
c.drawImage(ImageReader(img), 0, 0, width, height, 'auto')
c.restoreState()
c.save()
if __name__ == '__main__':
main(45)
So far, here's what I did:
Calculating the boundaries of a rotated rectangle (since it will be bigger)
Using these to calculate the position of the center of the image (size / 2 - image / 2) for width and height.
Two issues appears that I can't explain:
The "a4" variable is in points, everything else is in pixels. If I change them to pixels for calculating the position (which is logical, using a4_pixels = [x * (100 / 75) for x in a4]). The placement is incorrect for a rotation of 0 degree. If I keep the a4 in points, it works ... ?
If I change the rotation, it breaks even more.
So my final question: How can I calculate the offset_x and offset_y values to ensure it's always centered regardless of the rotation?
Thank you! :)

When you translate the canvas, you are literally moving the origin (0,0) point and all draw operations will be relative to that.
So in the code below, I moved the origin to the middle of the page.
Then I rotated the "page" and drew the image on the "page". No need to rotate the image since its canvas axes have rotated.
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.lib.pagesizes import A4
from PIL import Image as PILImage
import requests
def main(rotation):
c = canvas.Canvas('output.pdf', pagesize=A4)
c.saveState()
# loading the image:
img = requests.get('https://i.stack.imgur.com/dI5Rj.png', stream=True)
img = PILImage.open(img.raw)
# The image dimensions in cm
width, height = img.size
# now move the canvas origin to the middle of the page
c.translate(A4[0] / 2, A4[1] / 2)
# and rotate it
c.rotate(rotation)
# now draw the image relative to the origin
c.drawImage(ImageReader(img), -width/2, -height/2, width, height, 'auto')
c.restoreState()
c.save()
if __name__ == '__main__':
main(45)

Related

Rasterising only selected area of a CAD DXF file

Given a DXF file (2D CAD drawing), is it somehow possible to rasterise only part of it? Preferably in Python's ezdxf. By the part of it, I mean the selected rectangular area, not a single layer.
Background: I'm struggling to rasterise quite a big DXF file with decent DPI in a reasonable time, so I thought that maybe there's a way to speed up the process by parallelising rasterising different parts of the drawing. I'm using ezdxf with matplotlib backend.
This solution renders the DXF file in 4 tiles including filtering the DXF entities outside the rendering area. But the calculation of the bounding boxes is also costly and the entities in the overlapping area are rendered multiple times, this means this solution takes longer as a single-pass rendering. But it shows the concept. The images fit perfect together the space is left to show that this are 4 images:
import matplotlib.pyplot as plt
import random
import ezdxf
from ezdxf.addons.drawing import RenderContext, Frontend
from ezdxf.addons.drawing.matplotlib import MatplotlibBackend
from ezdxf import bbox
from ezdxf.math import BoundingBox2d
COLORS = list(range(1, 7))
DPI = 300
WIDTH = 400
HEIGHT = 200
LEFT = 0
BOTTOM = 0
doc = ezdxf.new()
msp = doc.modelspace()
def random_points(count):
for _ in range(count):
yield WIDTH * random.random(), HEIGHT * random.random()
for s, e in zip(random_points(100), random_points(100)):
msp.add_line(s, e, dxfattribs={"color": random.choice(COLORS)})
# detecting the drawing extents by ezdxf can take along time for big files!
cache = bbox.Cache() # reuse bounding boxes for entity filtering
rect = bbox.extents(msp, cache=cache)
WIDTH = rect.size.x
HEIGHT = rect.size.y
LEFT = rect.extmin.x
BOTTOM = rect.extmin.y
VIEWPORT_X = [LEFT, LEFT + WIDTH / 2, LEFT, LEFT + WIDTH / 2]
VIEWPORT_Y = [BOTTOM, BOTTOM, BOTTOM + HEIGHT / 2, BOTTOM + HEIGHT / 2]
ctx = RenderContext(doc)
for quarter in [0, 1, 2, 3]:
# setup drawing add-on:
fig = plt.figure(dpi=300)
ax = fig.add_axes([0, 0, 1, 1])
out = MatplotlibBackend(ax)
# calculate and set render borders:
left = VIEWPORT_X[quarter]
bottom = VIEWPORT_Y[quarter]
ax.set_xlim(left, left + WIDTH / 2)
ax.set_ylim(bottom, bottom + HEIGHT / 2)
# set entities outside of the rendering area invisible:
# Bounding box calculation can be very costly, especially for deep nested
# block references! If you did the extents calculation and reuse the cache
# you already have paid the price:
render_area = BoundingBox2d(
[(left, bottom), (left + WIDTH / 2, bottom + HEIGHT / 2)])
for entity in msp:
entity_bbox = bbox.extents([entity], cache=cache)
if render_area.intersect(entity_bbox):
entity.dxf.invisible = 0
else:
entity.dxf.invisible = 1
# finalizing invokes auto-scaling!
Frontend(ctx, out).draw_layout(msp, finalize=False)
# set output size in inches
# width = 6 in x 300 dpi = 1800 px
# height = 3 in x 300 dpi = 900 px
fig.set_size_inches(6, 3, forward=True)
filename = f"lines{quarter}.png"
print(f'saving to "{filename}"')
fig.savefig(filename, dpi=300)
plt.close(fig)
The draw_layout() method has an argument filter_func to specify a function which accepts a DXF entity as argument and returns True or False to render or ignore this entity. This would be an alternative to filter the entities outside of the rendering area without altering the DXF content.
UPDATE: a refined example can be found at github

How to translate points on image after cropping it and resizing it?

I am creating a program which allows a user to annotate images with points.
This program allows user to zoom in an image so user can annotate more precisely.
Program zooms in an image doing the following:
Find the center of image
Find minimum and maximum coordinates of new cropped image relative to center
Crop image
Resize the image to original size
For this I have written the following Python code:
import cv2
def zoom_image(original_image, cut_off_percentage, list_of_points):
height, width = original_image.shape[:2]
center_x, center_y = int(width/2), int(height/2)
half_new_width = center_x - int(center_x * cut_off_percentage)
half_new_height = center_y - int(center_y * cut_off_percentage)
min_x, max_x = center_x - half_new_width, center_x + half_new_width
min_y, max_y = center_y - half_new_height, center_y + half_new_height
#I want to include max coordinates in new image, hence +1
cropped = original_image[min_y:max_y+1, min_x:max_x+1]
new_height, new_width = cropped.shape[:2]
resized = cv2.resize(cropped, (width, height))
translate_points(list_of_points, height, width, new_height, new_width, min_x, min_y)
I want to resize the image to original width and height so user always works on same "surface"
regardless of how zoomed image is.
The problem I encounter is how to correctly scale points (annotations) when doing this. My algorithm to do so was following:
Translate points on original image by subtracting min_x from x coordinate and min_y from y coordinate
Calculate constants for scaling x and y coordinates of points
Multiply coordinates by constants
For this I use the following Python code:
import cv2
def translate_points(list_of_points, height, width, new_height, new_width, min_x, min_y):
#Calculate constants for scaling points
scale_x, scale_y = width / new_width, height / new_height
#Translate and scale points
for point in list_of_points:
point.x = (point.x - min_x) * scale_x
point.y = (point.y - min_y) * scale_y
This code doesn't work. If I zoom in once, it is hard to detect the offset of pixels but it happens. If I keep zooming in, it will be much easier to detect the "drift" of points. Here are images to provide examples. On original image (1440x850) I places a point in the middle of blue crosshair. The more I zoom in the image it is easier to see that algorithm doesn't work with bigger cut-ofs.
Original image. Blue crosshair is middle point of an image. Red angles indicate what will be borders after image is zoomed once
Image after zooming in once.
Image after zooming in 5 times. Clearly, green point is no longer in the middle of image
The cut_off_percentage I used is 15% (meaning that I keep 85% of width and height of original image, calculated from the center).
I have also tried the following library: Augmentit python library
Library has functions for cropping images and resizing them together with points. Library also causes the points to drift. This is expected since the code I implemented and library's functions use the same algorithm.
Additionally, I have checked whether this is a rounding problem. It is not. Library rounds the points after multiplying coordinates with scales. Regardless on how they are rounded, points are still off by 4-5 px. This increases the more I zoom in the picture.
EDIT: A more detailed explanation is given here since I didn't understand a given answer.
The following is an image of right human hand.
Image of a hand in my program
Original dimension of this image is 1440 pixels in width and 850 pixels in height. As you can see in this image, I have annotated right wrist at location (756.0, 685.0). To check whether my program works correctly, I have opened this exact image in GIMP and placed a white point at location (756.0, 685.0). The result is following:
Image of a hand in GIMP
Coordinates in program work correctly. Now, if I were to calculate parameters given in first answer according to code given in first answer I get following:
vec = [756, 685]
hh = 425
hw = 720
cov = [720, 425]
These parameters make sense to me. Now I want to zoom the image to scale of 1.15. I crop the image by choosing center point and calculating low and high values which indicate what rectangle of image to keep and what to cut. On the following image you can see what is kept after cutting (everything inside red rectangle).
What is kept when cutting
Lows and highs when cutting are:
xb = [95,1349]
yb = [56,794]
Size of cropped image: 1254 x 738
This cropped image will be resized back to original image. However, when I do that my annotation gets completely wrong coordinates when using parameters described above.
After zoom
This is the code I used to crop, resize and rescale points, based on the first answer:
width, height = image.shape[:2]
center_x, center_y = int(width / 2), int(height / 2)
scale = 1.15
scaled_width = int(center_x / scale)
scaled_height = int(center_y / scale)
xlow = center_x - scaled_width
xhigh = center_x + scaled_width
ylow = center_y - scaled_height
yhigh = center_y + scaled_height
xb = [xlow, xhigh]
yb = [ylow, yhigh]
cropped = image[yb[0]:yb[1], xb[0]:xb[1]]
resized = cv2.resize(cropped, (width, height), cv2.INTER_CUBIC)
#Rescaling poitns
cov = (width / 2, height / 2)
width, height = resized.shape[:2]
hw = width / 2
hh = height / 2
for point in points:
x, y = point.scx, point.scy
x -= xlow
y -= ylow
x -= cov[0] - (hw / scale)
y -= cov[1] - (hh / scale)
x *= scale
y *= scale
x = int(x)
y = int(y)
point.set_coordinates(x, y)
So this really is an integer rounding issue. It's magnified at high zoom levels because being off by 1 pixel at 20x zoom throws you off much further. I tried out two versions of my crop-n-zoom gui. One with int rounding, another without.
You can see that the one with int rounding keeps approaching the correct position as the zoom grows, but as soon as the zoom takes another step, it rebounds back to being wrong. The non-rounded version sticks right up against the mid-lines (denoting the proper position) the whole time.
Note that the resized rectangle (the one drawn on the non-zoomed image) blurs past the midlines. This is because of the resize interpolation from OpenCV. The yellow rectangle that I'm using to check that my points are correctly scaling is redrawn on the zoomed frame so it stays crisp.
With Int Rounding
Without Int Rounding
I have the center-of-view locked to the bottom right corner of the rectangle for this demo.
import cv2
import numpy as np
# clamp value
def clamp(val, low, high):
if val < low:
return low;
if val > high:
return high;
return val;
# bound the center-of-view
def boundCenter(cov, scale, hh, hw):
# scale half res
scaled_hw = int(hw / scale);
scaled_hh = int(hh / scale);
# bound
xlow = scaled_hw;
xhigh = (2*hw) - scaled_hw;
ylow = scaled_hh;
yhigh = (2*hh) - scaled_hh;
cov[0] = clamp(cov[0], xlow, xhigh);
cov[1] = clamp(cov[1], ylow, yhigh);
# do a zoomed view
def zoomView(orig, cov, scale, hh, hw):
# calculate crop
scaled_hh = int(hh / scale);
scaled_hw = int(hw / scale);
xlow = cov[0] - scaled_hw;
xhigh = cov[0] + scaled_hw;
ylow = cov[1] - scaled_hh;
yhigh = cov[1] + scaled_hh;
xb = [xlow, xhigh];
yb = [ylow, yhigh];
# crop and resize
copy = np.copy(orig);
crop = copy[yb[0]:yb[1], xb[0]:xb[1]];
display = cv2.resize(crop, (width, height), cv2.INTER_CUBIC);
return display;
# draw vector shape
def drawVec(img, vec, pos, cov, hh, hw, scale):
con = [];
for point in vec:
# unpack point
x,y = point;
x += pos[0];
y += pos[1];
# here's the int version
# Note: this is the same as xlow and ylow from the above function
# x -= cov[0] - int(hw / scale);
# y -= cov[1] - int(hh / scale);
# rescale point
x -= cov[0] - (hw / scale);
y -= cov[1] - (hh / scale);
x *= scale;
y *= scale;
x = int(x);
y = int(y);
# add
con.append([x,y]);
con = np.array(con);
cv2.drawContours(img, [con], -1, (0,200,200), -1);
# font stuff
font = cv2.FONT_HERSHEY_SIMPLEX;
fontScale = 1;
fontColor = (255, 100, 0);
thickness = 2;
# draw blank
res = (800,1200,3);
blank = np.zeros(res, np.uint8);
print(blank.shape);
# draw a rectangle on the original
cv2.rectangle(blank, (100,100), (400,200), (200,150,0), -1);
# vectored shape
# comparison shape
bshape = [[100,100], [400,100], [400,200], [100,200]];
bpos = [0,0]; # offset
# random shape
vshape = [[148, 89], [245, 179], [299, 67], [326, 171], [385, 222], [291, 235], [291, 340], [229, 267], [89, 358], [151, 251], [57, 167], [167, 164]];
vpos = [100,100]; # offset
# get original image res
height, width = blank.shape[:2];
hh = int(height / 2);
hw = int(width / 2);
# center of view
cov = [600, 400];
camera_spd = 5;
# scale
scale = 1;
scale_step = 0.2;
# loop
done = False;
while not done:
# crop and show image
display = zoomView(blank, cov, scale, hh, hw);
# drawVec(display, vshape, vpos, cov, hh, hw, scale);
drawVec(display, bshape, bpos, cov, hh, hw, scale);
# draw a dot in the middle
cv2.circle(display, (hw, hh), 4, (0,0,255), -1);
# draw center lines
cv2.line(display, (hw,0), (hw,height), (0,0,255), 1);
cv2.line(display, (0,hh), (width,hh), (0,0,255), 1);
# draw zoom text
cv2.putText(display, "Zoom: " + str(scale), (15,40), font,
fontScale, fontColor, thickness, cv2.LINE_AA);
# show
cv2.imshow("Display", display);
key = cv2.waitKey(1);
# check keys
done = key == ord('q');
# Note: if you're actually gonna make a GUI
# use the keyboard module or something else for this
# wasd to move center-of-view
if key == ord('d'):
cov[0] += camera_spd;
if key == ord('a'):
cov[0] -= camera_spd;
if key == ord('w'):
cov[1] -= camera_spd;
if key == ord('s'):
cov[1] += camera_spd;
# z,x to decrease/increase zoom (lower bound is 1.0)
if key == ord('x'):
scale += scale_step;
if key == ord('z'):
scale -= scale_step;
scale = round(scale, 2);
# bound cov
boundCenter(cov, scale, hh, hw);
Edit: Explanation of the drawVec parameters
img: The OpenCV image to be drawn on
vec: A list of [x,y] points
pos: The offset to draw those points at
cov: Center-Of-View, where the middle of our zoomed display is pointed at
hh: Half-Height, the height of "img" divided by 2
hw: Half-Width, the width of "img" divided by 2
I have looked through my code and realized where I was making a mistake which caused points to be offset.
In my program, I have a canvas of specific size. The size of canvas is a constant and is always larger than images being drawn on canvas. When program draws an image on canvas it first resizes that image so it could fit on canvas. The size of resized image is somewhat smaller than size of canvas. Image is usually drawn starting from top left corner of canvas. Since I wanted to always draw image in the center of canvas, I shifted the location from top left corner of canvas to another point. This is what I didn't account when doing image zooming.
def zoom(image, ratio, points, canvas_off_x, canvas_off_y):
width, height = image.shape[:2]
new_width, new_height = int(ratio * width), int(ratio * height)
center_x, center_y = int(new_width / 2), int(new_height / 2)
radius_x, radius_y = int(width / 2), int(height / 2)
min_x, max_x = center_x - radius_x, center_x + radius_x
min_y, max_y = center_y - radius_y, center_y + radius_y
img_resized = cv2.resize(image, (new_width,new_height), interpolation=cv2.INTER_LINEAR)
img_cropped = img_resized[min_y:max_y+1, min_x:max_x+1]
for point in points:
x, y = point.get_original_coordinates()
x -= canvas_off_x
y -= canvas_off_y
x = int((x * ratio) - min_x + canvas_off_x)
y = int((y * ratio) - min_y + canvas_off_y)
point.set_scaled_coordinates(x, y)
In the code below canvas_off_x and canvas_off_y is the location of offset from top left corner of canvas

How to crop a square image from normalized vertices

I'm using this code to identify tops and bottoms of photographs:
( as of now I only have it working for tops. one thing at a time ;) )
def get_file(path):
client = vision.ImageAnnotatorClient()
for images in os.listdir(path):
# # Loads the image into memory
with io.open(images, "rb") as image_file:
content = image_file.read()
image = types.Image(content=content)
objects = client.object_localization(image=image).localized_object_annotations
im = Image.open(images)
width, height = im.size
print("Number of objects found: {}".format(len(objects)))
for object_ in objects:
if object_.name == "Top":
print("Top")
l1 = object_.bounding_poly.normalized_vertices[0].x
l2 = object_.bounding_poly.normalized_vertices[0].y
l3 = object_.bounding_poly.normalized_vertices[2].x
l4 = object_.bounding_poly.normalized_vertices[3].y
left = l1 * width
top = l2 * height
right = l3 * width
bottom = l4 * height
im = im.crop((left, top, right, bottom))
im.save('new_test_cropped.tif', 'tiff')
im.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Script to automatically crop images based on google vision predictions of 'tops' and 'bottoms'")
parser.add_argument('--path', help='Include the path to the images folder')
args = parser.parse_args()
get_file(args.path)
The images are opened, clothing is identified, and then the images are cropped and saved to a new file. (granted as of now they are being overwritten within the loop, but I'll fix that later)
What I cant figure out, is how to make the crop a 1:1 ratio. I need to save them out as square-cropped to be put on our website.
I'll be honest, the normalized_vertices make no sense to me. Hence why I'm having trouble.
Starting image:
Output:
Desired Output:
"Normalized" means the coordinates are divided by the width or height of the image, so normalized coordinates [1, 0.5] would indicate all the way (1) across the image and halfway down (0.5).
For a 1:1 aspect ratio you want right - left to be equal to top - bottom. So you want to find out which dimension (width or height) you need to increase, and by how much.
height = abs(top - bottom)
width = abs(right - left)
extrawidth = max(0, height - width)
extraheight = max(0, width - height)
If height > width, we want to increase width but not height. Since height - width > 0, the correct value will go into extrawidth. But because width - height < 0, extraheight will be 0.
Now let's say we want to increase the dimensions of our image symmetrically around the original crop rectangle.
top -= extraheight // 2
bottom += extraheight // 2
left -= extrawidth // 2
right += extrawidth // 2
And finally, do the crop:
im = im.crop((left, top, right, bottom))
For your image, let's say you get left = 93, right = 215, top = 49, and bottom = 205
Before:
After:

How to clear numbers from the image using openCV?

I'm trying to remove numbers which are laying inside the circular part of image, numbers are in black in color and background varies between red,yellow, blue and green.
I am using opencv to remove those numbers. I used a mask which extracts numbers from image, with help of cv2.inpaint tried to remove those numbers from images.
For my further analysis I required to have clear image. But my current approach gives distorted image and numbers are not completely removed.
I tried changing the threshold values, lowering will neglect numbers from dark shaded area such as from green and red.
import cv2
img = cv2.imread('scan_1.jpg')
mask = cv2.threshold(img,50,255,cv2.THRESH_BINARY_INV)[1][:,:,0]
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
dst = cv2.inpaint(img, mask, 5, cv2.INPAINT_TELEA)
cv2.imshow('dst',dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite('ost_1.jpg',dst)
Input images: a) scan_1.jpg
b) scan_2.jpg
Output images: a) ost_1.jpg
b) ost_2.jpg
Expected Image: Circles can ignored, but something similar to it is required.
Here is my attempt, a better/easier solution might be acquired if you do not care about preserving texts outside of your circle.
import cv2
import numpy as np
# connectivity method used for finding connected components, 4 vs 8
CONNECTIVITY = 4
# HSV threshold for finding black pixels
H_THRESHOLD = 179
S_THRESHOLD = 255
V_THRESHOLD = 150
# read image
img = cv2.imread("a1.jpg")
img_height = img.shape[0]
img_width = img.shape[1]
# save a copy for creating resulting image
result = img.copy()
# convert image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# found the circle in the image
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1.7, minDist= 100, param1 = 48, param2 = 100, minRadius=70, maxRadius=100)
# draw found circle, for visual only
circle_output = img.copy()
# check if we found exactly 1 circle
num_circles = len(circles)
print("Number of found circles:{}".format(num_circles))
if (num_circles != 1):
print("invalid number of circles found ({}), should be 1".format(num_circles))
exit(0)
# save center position and radius of found circle
circle_x = 0
circle_y = 0
circle_radius = 0
if circles is not None:
# convert the (x, y) coordinates and radius of the circles to integers
circles = np.round(circles[0, :]).astype("int")
for (x, y, radius) in circles:
circle_x, circle_y, circle_radius = (x, y, radius)
cv2.circle(circle_output, (circle_x, circle_y), circle_radius, (255, 0, 0), 4)
print("circle center:({},{}), radius:{}".format(x,y,radius))
# keep a median filtered version of image, will be used later
median_filtered = cv2.medianBlur(img, 21)
# Convert BGR to HSV
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# define range of black color in HSV
lower_val = np.array([0,0,0])
upper_val = np.array([H_THRESHOLD,S_THRESHOLD,V_THRESHOLD])
# Threshold the HSV image to get only black colors
mask = cv2.inRange(hsv, lower_val, upper_val)
# find connected components
components = cv2.connectedComponentsWithStats(mask, CONNECTIVITY, cv2.CV_32S)
# apply median filtering to found components
#centers = components[3]
num_components = components[0]
print("Number of found connected components:{}".format(num_components))
labels = components[1]
stats = components[2]
for i in range(1, num_components):
left = stats[i, cv2.CC_STAT_LEFT] - 10
top = stats[i, cv2.CC_STAT_TOP] - 10
width = stats[i, cv2.CC_STAT_WIDTH] + 10
height = stats[i, cv2.CC_STAT_HEIGHT] + 10
# iterate each pixel and replace them if
#they are inside circle
for row in range(top, top+height+1):
for col in range(left, left+width+1):
dx = col - circle_x
dy = row - circle_y
if (dx*dx + dy*dy <= circle_radius * circle_radius):
result[row, col] = median_filtered[row, col]
# smooth the image, may be necessary?
#result = cv2.blur(result, (3,3))
# display image(s)
cv2.imshow("img", img)
cv2.imshow("gray", gray)
cv2.imshow("found circle:", circle_output)
cv2.imshow("mask", mask)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result for a1:

Resizing image and its bounding box

I have an image with bounding box in it, and I want to resize the image.
img = cv2.imread("img.jpg",3)
x_ = img.shape[0]
y_ = img.shape[1]
img = cv2.resize(img,(416,416));
Now I want to calculate the scale factor:
x_scale = ( 416 / x_)
y_scale = ( 416 / y_ )
And draw an image, this is the code for the original bounding box:
( 128, 25, 447, 375 ) = ( xmin,ymin,xmax,ymax)
x = int(np.round(128*x_scale))
y = int(np.round(25*y_scale))
xmax= int(np.round (447*(x_scale)))
ymax= int(np.round(375*y_scale))
However using this I get:
While the original is:
I don't see any flag in this logic, what's wrong?
Whole code:
imageToPredict = cv2.imread("img.jpg",3)
print(imageToPredict.shape)
x_ = imageToPredict.shape[0]
y_ = imageToPredict.shape[1]
x_scale = 416/x_
y_scale = 416/y_
print(x_scale,y_scale)
img = cv2.resize(imageToPredict,(416,416));
img = np.array(img);
x = int(np.round(128*x_scale))
y = int(np.round(25*y_scale))
xmax= int(np.round (447*(x_scale)))
ymax= int(np.round(375*y_scale))
Box.drawBox([[1,0, x,y,xmax,ymax]],img)
and drawbox
def drawBox(boxes, image):
for i in range (0, len(boxes)):
cv2.rectangle(image,(boxes[i][2],boxes[i][3]),(boxes[i][4],boxes[i][5]),(0,0,120),3)
cv2.imshow("img",image)
cv2.waitKey(0)
cv2.destroyAllWindows()
The image and the data for the bounding box are loaded separately. I am drawing the bounding box inside the image. The image does not contain the box itself.
I believe there are two issues:
You should swap x_ and y_ because shape[0] is actually y-dimension and shape[1] is the x-dimension
You should use the same coordinates on the original and scaled image. On your original image the rectangle is (160, 35) - (555, 470) rather than (128,25) - (447,375) that you use in the code.
If I use the following code:
import cv2
import numpy as np
def drawBox(boxes, image):
for i in range(0, len(boxes)):
# changed color and width to make it visible
cv2.rectangle(image, (boxes[i][2], boxes[i][3]), (boxes[i][4], boxes[i][5]), (255, 0, 0), 1)
cv2.imshow("img", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
def cvTest():
# imageToPredict = cv2.imread("img.jpg", 3)
imageToPredict = cv2.imread("49466033\\img.png ", 3)
print(imageToPredict.shape)
# Note: flipped comparing to your original code!
# x_ = imageToPredict.shape[0]
# y_ = imageToPredict.shape[1]
y_ = imageToPredict.shape[0]
x_ = imageToPredict.shape[1]
targetSize = 416
x_scale = targetSize / x_
y_scale = targetSize / y_
print(x_scale, y_scale)
img = cv2.resize(imageToPredict, (targetSize, targetSize));
print(img.shape)
img = np.array(img);
# original frame as named values
(origLeft, origTop, origRight, origBottom) = (160, 35, 555, 470)
x = int(np.round(origLeft * x_scale))
y = int(np.round(origTop * y_scale))
xmax = int(np.round(origRight * x_scale))
ymax = int(np.round(origBottom * y_scale))
# Box.drawBox([[1, 0, x, y, xmax, ymax]], img)
drawBox([[1, 0, x, y, xmax, ymax]], img)
cvTest()
and use your "original" image as "49466033\img.png",
I get the following image
And as you can see my thinner blue line lies exactly inside your original red line and it stays there whatever targetSize you chose (so the scaling actually works correctly).
Another way of doing this is to use CHITRA
image = Chitra(img_path, box, label)
# Chitra can rescale your bounding box automatically based on the new image size.
image.resize_image_with_bbox((224, 224))
print('rescaled bbox:', image.bounding_boxes)
plt.imshow(image.draw_boxes())
https://chitra.readthedocs.io/en/latest/
pip install chitra
I encountered an issue with bounding box coordinates in Angular when using TensorFlow.js and MobileNet-v2 for prediction. The coordinates were based on the resolution of the video frame.
but I was displaying the video on a canvas with a fixed height and width. I resolved the issue by dividing the coordinates by the ratio of the original video resolution to the resolution of the canvas.
const x = prediction.bbox[0] / (this.Owidth / 300);
const y = prediction.bbox[1] / (this.Oheight / 300);
const width = prediction.bbox[2] / (this.Owidth / 300);
const height = prediction.bbox[3] / (this.Oheight / 300);
// Draw the bounding box.
ctx.strokeStyle = '#99ff00';
ctx.lineWidth = 2;
ctx.strokeRect(x, y, width, height);
this.Owidth & this.Oheight are original resolution of video. it is obtained by.
this.video.addEventListener(
'loadedmetadata',
(e: any) => {
this.Owidth = this.video.videoWidth;
this.Oheight = this.video.videoHeight;
console.log(this.Owidth, this.Oheight, ' pixels ');
},
false
);
300 X 300 is my static canvas width and height.
you can use the resize_dataset_pascalvoc
it's easy to use python3 main.py -p <IMAGES_&_XML_PATH> --output <IMAGES_&_XML> --new_x <NEW_X_SIZE> --new_y <NEW_X_SIZE> --save_box_images <FLAG>"
It resize all your dataset and rewrite new annotations files to resized images

Resources