I have a set of data points in the 3D space and would like to fit a bounding box to them. I know that vtkOBBTree::ComputeOBB can do this for me. But I can't seem to figure out how to visualize the oriented bounding box.
Any help is appreciated!
For a bounding box you can use vtkOutlineFilter. You just have to set as input the 3D data that you want to fit. Then you create the mapper and the actor, and add it to the scene, as you would do in a typical VTK scenario. Here is a working example, in Python:
from vtk import *
quadric = vtkQuadric()
quadric.SetCoefficients(.5, 1, .2, 0, .1, 0, 0, .2, 0, 0)
sample = vtkSampleFunction()
sample.SetSampleDimensions(50,50,50)
sample.SetImplicitFunction(quadric)
contour = vtkContourFilter()
contour.SetInputConnection(sample.GetOutputPort())
contour.GenerateValues(5,0,1)
contourMapper = vtkPolyDataMapper()
contourMapper.SetInputConnection(contour.GetOutputPort())
contourMapper.SetScalarRange(0,1.2)
contourActor = vtkActor()
contourActor.SetMapper(contourMapper)
outline = vtkOutlineFilter()
outline.SetInputConnection(sample.GetOutputPort())
outlineMapper = vtkPolyDataMapper()
outlineMapper.SetInputConnection(outline.GetOutputPort())
outlineActor = vtkActor()
outlineActor.SetMapper(outlineMapper)
outlineActor.GetProperty().SetColor(1,1,1)
ren = vtkRenderer()
ren.SetBackground(0.188,0.373,0.647)
ren.AddActor(contourActor)
ren.AddActor(outlineActor)
renWin = vtkRenderWindow()
renWin.AddRenderer(ren)
renWin.SetWindowName("IsoSurface")
renWin.SetSize(500,500)
iren = vtkRenderWindowInteractor()
iren.SetRenderWindow(renWin)
renWin.Render()
iren.Initialize()
iren.Start()
Related
I represent a vtk poly data object that I read with vtkPlyReader and want to align it to given normalized direction vector, so that its orientation matches with that vector.
directionVector = np.array([-0.1134, -0.0695, 0.9911])
plyReader = vtk.vtkPLYReader()
plyReader.SetFileName(filePath)
transform = vtk.vtkTransform()
transform.RotateWXYZ(-90, 0, 0, 1) #initial rotation
transformFilter = vtk.vtkTransformPolyDataFilter()
transformFilter.SetTransform(transform)
transformFilter.SetInputConnection(plyReader.GetOutputPort())
transformFilter.Update()
mapper = vtk.vtkPolyDataMapper()
mapper.SetInputConnection(transformFilter.GetOutputPort())
mapper.ScalarVisibilityOn()
mapper.Update()
actor = vtk.vtkActor()
actor.SetMapper(mapper)
actor.Modified()
renderer.AddActor(actor)
I know that I should use the Rotate() function from vtkTransform but don't know how to align it.
You can try something like:
rotation = 0 # around new axis
initaxis = [0,0,1] # old object's axis
crossvec = np.cross(initaxis, newaxis)
angle = np.arccos(np.dot(initaxis, newaxis))
T = vtk.vtkTransform()
T.PostMultiply()
T.Translate(-pos)
if rotation:
T.RotateWXYZ(rotation, initaxis)
T.RotateWXYZ(np.rad2deg(angle), crossvec)
T.Translate(pos)
E.g. vedo uses the above
from vedo import Cube, show
c0 = Cube(side=2).lw(1)
c1 = c0.clone().alpha(0.2).c('tomato')
c1.orientation([1,1,1], rotation=20).pos([2,2,0])
show(c0, c1, axes=1)
I am using vtk in my project, and I need to convert the display & world coordinate. I find vtk provide vtkPropPicker and vtkCoordinate, and both of them can convert the display coordinate to world coordinate. However, I find the result is different.
The test code is:
import vtk
cone = vtk.vtkConeSource()
cone.SetCenter(150, 150, 0)
cone.SetHeight(100)
cone.SetRadius(50)
cone.Update()
coneMapper = vtk.vtkPolyDataMapper()
coneMapper.SetInputData(cone.GetOutput())
coneMapper.Update()
coneActor = vtk.vtkActor()
coneActor.SetMapper(coneMapper)
ren = vtk.vtkRenderer()
ren.AddActor(coneActor)
ren.SetBackground(0.1, 0.2, 0.4)
renWin = vtk.vtkRenderWindow()
renWin.AddRenderer(ren)
renWin.SetSize(400, 400)
iren = vtk.vtkRenderWindowInteractor()
iren.SetRenderWindow(renWin)
renWin.Render()
X = 100
Y = 100
picker = vtk.vtkPropPicker()
picker.Pick(X, Y, 0, ren)
pickerWorld = picker.GetPickPosition()
print('world point from vtkPropPicker: ', pickerWorld)
coordinate = vtk.vtkCoordinate()
coordinate.SetCoordinateSystemToDisplay()
coordinate.SetValue(X, Y)
coorWorld = coordinate.GetComputedWorldValue(ren)
print('world point from vtkCoordinate: ', coorWorld)
Please have a look for the above code. I print the world point from vtkPropPicker and vtkCoordinate, and the result is different:
world point from vtkPropPicker: (108.0365506828649, 108.0365506828649, 7.141902959080343)
world point from vtkCoordinate: (119.0534474476644, 119.0534474476644, 89.37313989502613)
Why there are different? And which one is correct?
GetComputedWorldValue seems to be picking some bounding box:
# ...
from vedo import *
show(coneActor,
Point(pickerWorld, c='green'),
Point(coorWorld, c='red'),
axes=1,
)
although the two points are not on the same vertical (not sure why so).
hand-filled character per box form
I want to automate a process in which I would get hand-filled character per box type forms in image format and I need to extract text from these forms. The boxes surrounds each letter, I have to extract all the text from the image form.
You can use selecting contours by size, find rotated rectangle and inverse transform make.
import cv2
import numpy as np
img = cv2.imread('4YAry.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to binary image
thresh=cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV )[1]
contours,hierarchy = cv2.findContours(thresh, 1, 2)
for cnt in contours:
x , y , w , h = cv2 . boundingRect ( cnt )
if abs(w-345)<10: # width box is 345 px
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
srcTri=np.array( [box[1], box[0], box[2]] ).astype(np.float32)
dstTri = np.array( [[0, 0], [0, rect[1][1]], [rect[1][0],0]] ).astype(np.float32)
warp_mat = cv2.getAffineTransform(srcTri, dstTri)
warp_dst = cv2.warpAffine(img, warp_mat, (np.int0(rect[1][0]), np.int0(rect[1][1])))
N=14
s=0.99*warp_dst.shape[1]/N # tune rectangle positions
for i in range(N):
warp_dst = cv2.rectangle ( warp_dst , ( 2+int(i*s) ,2 ), ( 2+int((i+1)*s) , warp_dst.shape[0]-3 ), ( 255 , 255 , 255 ), 2 )
cv2.imwrite('chars.png', warp_dst)
Using for instance Hough, detect the top and bottom edges and the vertical separations. Validate the separations by checking that they run from top to bottom. The horizontal lines will be more reliable and accurate, you can use their direction for deskewing if necessary.
After doing that, you will have missing separations and false ones. Using some heuristics, try to find the correct pitch and detect the false positives and false negatives. Now you can extract the content of the individual boxes, or erase the edges.
This process cannot be perfect, some characters will be damaged.
Currently, I use the following piece of code to create mask images (classes = ['tree', 'car', 'bicycle'], polygons is the list of the geometry objects where each geometry object has coordinates field that defines the polygon on the image that is a bounding box for the class object):
def create_mask(self, mask_size, classes, polygons):
# type (Tuple[int, int], List[str], List[geometry]) -> Image
# Create a new palette image, the default color of Image.new() is black
# https://pillow.readthedocs.io/en/3.3.x/handbook/concepts.html#modes
img = Image.new('P', mask_size)
img.putpalette(self.palette) # palette = [0, 0, 0, 255, 0, 0, ...]
draw = ImageDraw.Draw(img)
for i, class_ in enumerate(classes):
color_index = self.class_to_color_index[class_]
draw.polygon(xy=polygons[i].exterior.coords, fill=color_index)
del draw
return img
Is there any way to rewrite this piece of code with using features.rasterize?
I am trying to detect logo in invoices. Though I am able to get some results but not sufficient enough to process. While detecting logos, Unwanted text is also getting detected.
The following is from actual invoice:-original Image
and the following results I am getting Image after operations
I am using the`following code which I have written:-
gray=cv2.imread("Image",0)
ret,thresh1 = cv2.threshold(gray,180,255,cv2.THRESH_BINARY)
kernel_logo = np.ones((10,10),np.uint8)
closing_logo = cv2.morphologyEx(thresh1,cv2.MORPH_CLOSE,kernel_logo,
iterations = 1)
n=3
noise_removed_logo = cv2.medianBlur(closing_logo, n)
eroded_logo = cv2.erode(noise_removed_logo,kernel_logo, iterations = 8)
dilated_logo=cv2.dilate(eroded_logo,kernel_logo, iterations=3)
Could you please help me what changes should I make to remove noise from my documented image. I am new to Computer Vision
Few more sample:- Original document
The result I am getting:- Result after operations on document
Hello Mohd Anas Khan .
Your approch to define logo is too simple so it couldn't work. If you want a product-level approach, use some machine learning or deep learning. If you want just some toys, then a simple countours finder with fixed rules should work.
For example, in the following approach i defined "logo" as "the contour which has biggest area". You'll need more rules later, so good luck.
import numpy as np
import cv2
im = cv2.imread('contours_1.jpg')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(imgray,127,255, cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
threshed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, rect_kernel)
cv2.imwrite("contours_1_thres.jpg", threshed)
im2, contours, hierarchy = cv2.findContours(threshed,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
ws = []
hs = []
areas = []
for contour in contours:
area = cv2.contourArea(contour)
x, y, w, h = cv2.boundingRect(contour)
print("w: {}, h: {}, area: {}".format(w, h, area))
ws.append(w)
hs.append(h)
areas.append(area)
max_idx = np.argmax(areas)
cv2.drawContours(im, [contours[max_idx]], -1, (0, 255, 0), 3)
# cv2.drawContours(im, contours, -1, (0, 255, 0), 3)
cv2.imwrite("contours_1_test.jpg", im)
The output images are as follow : (The detected logo is covered in green box )