vtk: how to obtain the image pixel index from a world point - vtk

If I pick a world point from a image, How can I convert the world coordinate to image index?
import vtk
import numpy as np
from vtk.util.numpy_support import numpy_to_vtk
def numpyToVTK(data, multi_component=False, type='float'):
if type == 'float':
data_type = vtk.VTK_FLOAT
elif type == 'char':
data_type = vtk.VTK_UNSIGNED_CHAR
else:
raise RuntimeError('unknown type')
if multi_component == False:
if len(data.shape) == 2:
data = data[:, :, np.newaxis]
flat_data_array = data.transpose(2,1,0).flatten()
vtk_data = numpy_to_vtk(num_array=flat_data_array, deep=True, array_type=data_type)
shape = data.shape
else:
assert len(data.shape) == 3, 'only test for 2D RGB'
flat_data_array = data.transpose(1, 0, 2)
flat_data_array = np.reshape(flat_data_array, newshape=[-1, data.shape[2]])
vtk_data = numpy_to_vtk(num_array=flat_data_array, deep=True, array_type=data_type)
shape = [data.shape[0], data.shape[1], 1]
img = vtk.vtkImageData()
img.GetPointData().SetScalars(vtk_data)
img.SetDimensions(shape[0], shape[1], shape[2])
return img
global sphereActor, textActor
sphereActor = None
textActor = None
def mouseMoveEvent(iren, event):
x, y = iren.GetEventPosition()
picker = vtk.vtkWorldPointPicker()
picker.Pick(x, y, 0, render)
worldPoint = picker.GetPickPosition()
##############################################
## convert world point to image index
##############################################
sphere = vtk.vtkSphereSource()
sphere.SetCenter(worldPoint[0], worldPoint[1], worldPoint[2])
sphere.SetRadius(2)
sphere.Update()
sphereMapper = vtk.vtkPolyDataMapper()
sphereMapper.SetInputData(sphere.GetOutput())
global sphereActor, textActor
if sphereActor != None:
render.RemoveActor(sphereActor)
sphereActor = vtk.vtkActor()
sphereActor.SetMapper(sphereMapper)
sphereActor.GetProperty().SetColor(255, 0, 0)
render.AddActor(sphereActor)
render.Render()
if textActor != None:
render.RemoveActor(textActor)
textActor = vtk.vtkTextActor()
textActor.SetInput('world coordinate: (%.2f, %.2f, %.2f)'%(worldPoint[0], worldPoint[1], worldPoint[2]))
textActor.GetTextProperty().SetColor(1, 0, 0)
textActor.GetTextProperty().SetFontSize(15)
render.AddActor(textActor)
img = np.zeros(shape=[128, 128])
for i in range(128):
for j in range(128):
img[i, j] = i+j
vtkImg = numpyToVTK(img)
imgActor = vtk.vtkImageActor()
imgActor.SetInputData(vtkImg)
render = vtk.vtkRenderer()
render.AddActor(imgActor)
# render.Render()
renWin = vtk.vtkRenderWindow()
renWin.AddRenderer(render)
renWin.Render()
iren = vtk.vtkRenderWindowInteractor()
iren.SetRenderWindow(renWin)
iren.SetInteractorStyle(vtk.vtkInteractorStyleTrackballCamera())
iren.Initialize()
iren.AddObserver('MouseMoveEvent', mouseMoveEvent)
iren.Start()
In the above code, if I don't rotate the image, the world point is (x, y, 0):
And it is agree with what I know. For the world point (x, y, z) and the image index (i, j, k), the conversion should be:
worldPoint (x,y,z) = i*spacingX*directionX + j*spacingY*directionY + k*spacingZ*directionZ + originPoint
In the above code, the image is converted from numpy, thus:
directionX = [1, 0, 0]
directionY = [0, 1, 0]
directionZ = [0, 0, 1]
originPoint=[0, 0, 0]
spacingX=1
spacingY=1
spacingZ=1
In this way, x=i, y=j, z=k. Since this image is a 2D image, the k should be 0 and 'z' should also be 0.
Then, I rotate the image, z is not 0. Like the following picture.
I don't know why z is -0.24.
It means the following conversion is wrong. And how can I obtain the image index by the world point?
worldPoint (x,y,z) = i*spacingX*directionX + j*spacingY*directionY + k*spacingZ*directionZ + originPoint
Any suggestion is appreciated!

vtkImageData has the method TransformPhysicalPointToContinuousIndex for going from world space to image space and TransformIndexToPhysicalPoint to go the other way.
I don't think the computation you're doing is right, since direction is 3x3 rotation matrix.

Related

When ı compile yolov3 ı get take warnings

"""YOLO v3 output
"""
import numpy as np
import keras.backend as K
from keras.models import load_model
import os
class YOLO:
def __init__(self, obj_threshold, nms_threshold):
"""Init.
# Arguments
obj_threshold: Integer, threshold for object.
nms_threshold: Integer, threshold for box.
"""
self._t1 = obj_threshold
self._t2 = nms_threshold
self._yolo = load_model('data/yolo.h5')
def _process_feats(self, out, anchors, mask):
"""process output features.
# Arguments
out: Tensor (N, N, 3, 4 + 1 +80), output feature map of yolo.
anchors: List, anchors for box.
mask: List, mask for anchors.
# Returns
boxes: ndarray (N, N, 3, 4), x,y,w,h for per box.
box_confidence: ndarray (N, N, 3, 1), confidence for per box.
box_class_probs: ndarray (N, N, 3, 80), class probs for per box.
"""
grid_h, grid_w, num_boxes = map(int, out.shape[1: 4])
anchors = [anchors[i] for i in mask]
# Reshape to batch, height, width, num_anchors, box_params.
anchors_tensor = K.reshape(K.variable(anchors),
[1, 1,len(anchors), 2])
out = out[0]
box_xy = K.get_value(K.sigmoid(out[..., :2]))
box_wh = K.get_value(K.exp(out[..., 2:4]) * anchors_tensor)
box_confidence = K.get_value(K.sigmoid(out[..., 4]))
box_confidence = np.expand_dims(box_confidence, axis=-1)
box_class_probs = K.get_value(K.sigmoid(out[..., 5:]))
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
grid = np.concatenate((col, row), axis=-1)
box_xy += grid
box_xy /= (grid_w, grid_h)
box_wh /= (416, 416)
box_xy -= (box_wh / 2.)
boxes = np.concatenate((box_xy, box_wh), axis=-1)
return boxes, box_confidence, box_class_probs
def _filter_boxes(self, boxes, box_confidences, box_class_probs):
"""Filter boxes with object threshold.
# Arguments
boxes: ndarray, boxes of objects.
box_confidences: ndarray, confidences of objects.
box_class_probs: ndarray, class_probs of objects.
# Returns
boxes: ndarray, filtered boxes.
classes: ndarray, classes for boxes.
scores: ndarray, scores for boxes.
"""
box_scores = box_confidences * box_class_probs
box_classes = np.argmax(box_scores, axis=-1)
box_class_scores = np.max(box_scores, axis=-1)
pos = np.where(box_class_scores >= self._t1)
boxes = boxes[pos]
classes = box_classes[pos]
scores = box_class_scores[pos]
return boxes, classes, scores
def _nms_boxes(self, boxes, scores):
"""Suppress non-maximal boxes.
# Arguments
boxes: ndarray, boxes of objects.
scores: ndarray, scores of objects.
# Returns
keep: ndarray, index of effective boxes.
"""
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2]
h = boxes[:, 3]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 1)
h1 = np.maximum(0.0, yy2 - yy1 + 1)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= self._t2)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def _yolo_out(self, outs, shape):
"""Process output of yolo base net.
# Argument:
outs: output of yolo base net.
shape: shape of original image.
# Returns:
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
"""
masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
[59, 119], [116, 90], [156, 198], [373, 326]]
boxes, classes, scores = [], [], []
for out, mask in zip(outs, masks):
b, c, s = self._process_feats(out, anchors, mask)
b, c, s = self._filter_boxes(b, c, s)
boxes.append(b)
classes.append(c)
scores.append(s)
boxes = np.concatenate(boxes)
classes = np.concatenate(classes)
scores = np.concatenate(scores)
# Scale boxes back to original image shape.
width, height = shape[1], shape[0]
image_dims = [width, height, width, height]
boxes = boxes * image_dims
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = self._nms_boxes(b, s)
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def predict(self, image, shape):
"""Detect the objects with yolo.
# Arguments
image: ndarray, processed input image.
shape: shape of original image.
# Returns
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
"""
outs = self._yolo.predict(image)
boxes, classes, scores = self._yolo_out(outs, shape)
return boxes, classes, scores
This is the yolo v3 code and when ı work main program ı take this error
InvalidArgumentError: Incompatible shapes: [13,13,2] vs. [1,1,3,2] [Op:Mul]
Main part is
import cv2
import numpy as np
from yolo_model import YOLO
yolo = YOLO(0.6, 0.5)
file = "data/coco_classes.txt"
with open(file) as f:
class_name = f.readlines()
all_classes = [c.strip() for c in class_name]
print("A")
f = "dog_cat.jpg"
path = "images/"+f
image = cv2.imread(path)
cv2.imshow("image",image)
pimage = cv2.resize(image, (416,416))
pimage = np.array(pimage, dtype = "float32")
pimage /= 255.0
pimage = np.expand_dims(pimage, axis = 0)
# yolo
boxes, classes, scores = yolo.predict(pimage, image.shape)
for box, score, cl in zip(boxes, scores, classes):
x,y,w,h = box
top = max(0, np.floor(x + 0.5).astype(int))
left = max(0, np.floor(y + 0.5).astype(int))
right = max(0, np.floor(x + w + 0.5).astype(int))
bottom = max(0, np.floor(y + h + 0.5).astype(int))
cv2.rectangle(image, (top,left), (right, bottom),(255,0,0),2)
cv2.putText(image, "{} {}".format(all_classes[cl],score),(top,left-6),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0,0,255),1,cv2.LINE_AA)
cv2.imshow("yolo",image)
I take problem in box_wh = K.get_value(K.exp(out[..., 2:4]) * anchors_tensor). Is multiply necessary? And what do box_wh?

Pytorch custom randomcrop for semantic segmentation

I am trying to implement a custom dataset loader. Firstly I resize the images and labels with the same ratio between (0.98, 1,1) then I randomly crop both images and labels with same parameters so that I can feed them into NN. However, I am getting an error from PyTorch functional. Here is my code:
class RandomCrop(object):
def __init__(self, size, padding=None, pad_if_needed=True, fill=0, padding_mode='constant'):
self.size = size
self.padding = padding
self.pad_if_needed = pad_if_needed
self.fill = fill
self.padding_mode = padding_mode
#staticmethod
def get_params(img, output_size):
w, h = img.size
th, tw = output_size
if w == tw and h == th:
return 0, 0, h, w
i = random.randint(0, h - th)
j = random.randint(0, w - tw)
return i, j, th, tw
def __call__(self, data):
img,mask = data["image"],data["mask"]
# pad the width if needed
if self.pad_if_needed and img.size[0] < self.size[1]:
img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
mask = F.pad(mask, (self.size[1] - mask.size[0], 0), self.fill, self.padding_mode)
# pad the height if needed
if self.pad_if_needed and img.size[1] < self.size[0]:
img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
mask = F.pad(mask, (0, self.size[0] - mask.size[1]), self.fill, self.padding_mode)
i, j, h, w = self.get_params(img, self.size)
crop_image = transforms.functional.crop(img, i, j, h, w)
crop_mask = transforms.functional.crop(mask, i, j, h, w)
return{"image": crop_image, "mask": crop_mask }
Here is the error:
AttributeError: 'Image' object has no attribute 'dim'
Mistakenly I imported nn.functional.pad instead of the transforms.functional.pad. After changing it everything went smoothly

OpenCV Python HoughLines Transformation get the rectangle points to crop the original image

Is there anyway to get the rectangle points from the HoughLines Transformation results and apply the crop point to original image to get the cropped image. I have copied the code form the documentation. The idea is to extract the document from an image. Below is the result from the HoughLines Transformation and I required the intersection point to crop the image.
"""
#file hough_lines.py
#brief This program demonstrates line finding with the Hough transform
"""
import sys
import math
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
def main(argv=[]):
default_file = "/Users/apple/Downloads/Unknown-4"
filename = argv[0] if len(argv) > 0 else default_file
# Loads an image
src = cv.imread(cv.samples.findFile(filename), cv.IMREAD_GRAYSCALE)
# Check if image is loaded fine
if src is None:
print ('Error opening image!')
print ('Usage: hough_lines.py [image_name -- default ' + default_file + '] \n')
return -1
dst = cv.Canny(src, 50, 200, None, 3)
# Copy edges to the images that will display the results in BGR
cdst = cv.cvtColor(dst, cv.COLOR_GRAY2BGR)
cdstP = np.copy(cdst)
lines = cv.HoughLines(dst, 1, np.pi / 180, 150, None, 0, 0)
if lines is not None:
for i in range(0, len(lines)):
rho = lines[i][0][0]
theta = lines[i][0][1]
a = math.cos(theta)
b = math.sin(theta)
x0 = a * rho
y0 = b * rho
pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
cv.line(cdst, pt1, pt2, (0,0,255), 3, cv.LINE_AA)
linesP = cv.HoughLinesP(dst, 1, np.pi / 180, 50, None, 50, 10)
if linesP is not None:
for i in range(0, len(linesP)):
l = linesP[i][0]
cv.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv.LINE_AA)
#cv.imshow("Source", src)
#plt.imshow(src)
plt.imshow(cdstP)
#plt.imshow(cdstP)
if __name__ == "__main__":
main()

Delay in output video stream when using YOLOV3 Detection using OpenCV

This is my Code of Mask Detection using YOLOV3 weights created by me. Whenever I run my Program, I experience a delay in my output Video of detection. This is the code please have a look.
import cv2
import numpy as np
net = cv2.dnn.readNet("yolov3_custom_final.weights", "yolov3_custom.cfg")
with open("obj.name", "r") as f:
classes = f.read().splitlines()
cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
while True:
ret, img = cap.read()
height, weight, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
net.setInput(blob)
output = net.getUnconnectedOutLayersNames()
layers = net.forward(output)
box = []
confidences = []
class_ids = []
for out in layers:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
centre_x = int(detection[0] * weight)
centre_y = int(detection[1] * height)
w = int(detection[2] * weight)
h = int(detection[3] * height)
x = int(centre_x - w / 2)
y = int(centre_y - h / 2)
box.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = np.array(cv2.dnn.NMSBoxes(box, confidences, 0.5, 0.4))
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(box), 3))
for i in indexes.flatten():
x, y, w, h = box[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label + "I" + confidence, (x, y + 20), font, 2, (255, 255, 255), 2)
cv2.imshow("Final", img)
if cv2.waitKey(1) & 0xff == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
Can someone Please help me in this Issue or suggest a way to reduce the Lag in my Output videostream ?
As I have done some research over the Time, I have a found a Possible answer to this question. As I'm running my YOLO model in my local system which has no GPU, This is the factor that is causing a delay in the Output as it Processes a frame and takes another frame after completion.

CV2: Approximating Moments/Single Contour return

I am currently trying to get contours of a collection of images. But CV2 is unable to return the full contours for various images as shown below.
Hence, I am looking for a way to either approximate the moments for the list of contours, a way to make CV2 return a single contour when calling the function "findContours(...)" or merge the list of contours returned into a single contour if possible.
My codes(Updated) currently consists of:
def find_if_close(cnt1,cnt2):
row1,row2 = cnt1.shape[0],cnt2.shape[0]
for i in range(row1):
for j in range(row2):
dist = np.linalg.norm(cnt1[i]-cnt2[j])
if abs(dist) < 50 :
return True
elif i==row1-1 and j==row2-1:
return False
def thresh_callback(thresh, img, gray, blur):
edges = cv2.Canny(blur,thresh,thresh*2)
drawing = np.zeros(img.shape,np.uint8) # Image to draw the contours
image, contours,hierarchy = cv2.findContours(edges,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
LENGTH = len(contours)
status = np.zeros((LENGTH,1))
for i,cnt1 in enumerate(contours):
x = i
if i != LENGTH-1:
for j,cnt2 in enumerate(contours[i+1:]):
x = x+1
dist = find_if_close(cnt1,cnt2)
if dist == True:
val = min(status[i],status[x])
status[x] = status[i] = val
else:
if status[x]==status[i]:
status[x] = i+1
unified = []
maximum = int(status.max())+1
for i in range(maximum):
pos = np.where(status==i)[0]
if pos.size != 0:
cont = np.vstack(contours[i] for i in pos)
unified.append(cont)
cv2.drawContours(img,unified,-1,(0,255,0),2)
cv2.drawContours(drawing,unified,-1,255,-1)
cv2.imshow('output',img)
cv2.imshow('input',drawing)
cv2.waitKey(0)
cv2.destroyAllWindows()
return moments, cx, cy, count
def alter_image(img, blur):
ret, thresh1 = cv2.threshold(blur, 50, 255, cv2.THRESH_BINARY)
bitwise = cv2.bitwise_not(thresh1)
erosion = cv2.erode(bitwise, np.ones((2, 2) ,np.uint8), iterations=1) #15
dilation = cv2.dilate(erosion, np.ones((3, 3) ,np.uint8), iterations=1) #45
return dilation
imgs = cv2.imread('./images/'+str(num)+'.jpg')
img_grey = cv2.cvtColor(imgs,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(img_grey,(5,5),0)
imgs = alter_image(imgs, blur)
thresh = 255
max_thresh = 255
moments, cx, cy, count = thresh_callback(thresh, imgs, img_grey, blur)
As shown in the codes, I tried to alter the image through changes such as erosion, dilation, bitwise changes but the results were still the same.The original image can be found below.
Image Output (Updated):

Resources