image dilation with python - python-3.x

I'm trying to execute a piece of code I found online and it is giving me the following error.
I'm new to opencv so please help me.
error:
<ipython-input-1-7fe9c579ec14> in image_masking(filepath)
15 gray = cv2.imread(filepath,0)
16 edges = cv2.Canny(gray, CANNY_THRESH_1, CANNY_THRESH_2)
---> 17 edges = cv2.dilate(edges,None)
18 edges = cv2.erode(edges, None)
19
error: OpenCV(3.4.1) C:\Miniconda3\conda-bld\opencv-
suite_1533128839831\work\modules\core\src\matrix.cpp:760: error: (-215)
dims <= 2 && step[0] > 0 in function cv::Mat::locateROI
code:
import cv2
import numpy as np
def image_masking(filepath):
BLUR = 21
CANNY_THRESH_1 = 100
CANNY_THRESH_2 = 100
MASK_DILATE_ITER = 10
MASK_ERODE_ITER = 10
MASK_COLOR = (0.0,0.0,0.0) # In BGR format
gray = cv2.imread(filepath,0)
edges = cv2.Canny(gray, CANNY_THRESH_1, CANNY_THRESH_2)
edges = cv2.dilate(edges,None)
edges = cv2.erode(edges, None)
contour_info = []
_, contours, __ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
for c in contours:
contour_info.append((c, cv2.isContourConvex(c), cv2.contourArea(c),))
contour_info = sorted(contour_info, key=lambda c: c[2], reverse=True)
max_contour = contour_info[0]
for c in contour_info:
cv2.fillConvexPoly(mask, c[0], (255))
mask = cv2.dilate(mask, None, iterations=MASK_DILATE_ITER)
mask = cv2.erode(mask, None, iterations=MASK_ERODE_ITER)
mask = cv2.GaussianBlur(mask, (BLUR, BLUR), 0)
mask_stack = np.dstack([mask]*3)
mask_stack = mask_stack.astype('float32') / 255.0
img = img.astype('float32') / 255.0
masked = (mask_stack * img) + ((1-mask_stack) * MASK_COLOR)
masked = (masked * 255).astype('uint8')
fileName, fileExtension = filepath.split('.')
fileName += '-masked.'
filepath = fileName + fileExtension
print(filepath)
cv2.imwrite(filepath, masked)
if __name__ == '__main__':
filepath = 'C:\\Users\HP\Downloads\test3.jpg'
image_masking(filepath)
i tried replacing None in dilate function with kernel but it is giving me the same error

The second argument to cv2.dilate and cv2.erode should be the kernel with which you want to perform dilation/erosion as it is shown in the docs: opencv documentation
For example, you can try to do it like that:
kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(edges, kernel)
edges = cv2.erode(edges, kernel)
Good luck with further opencv exploration!

Related

Opencv detect side column and text

I am working on a opencv project, where I need to detect names column and any black color border present around the ROI. I am quite new with image processing so unable to figure out how to do this.
This is one of the sample images from which I wish to remove the column on the right (one containing all the details). But not all images contain this column, so I wish to detect the column and remove it from the image.
Here is the expected output.
EDIT
Here is the code that I have tried (I have tried using detection of largest rectangles in the region):
import cv2
from cv2 import dilate
from cv2 import findContours
import imutils
import numpy as np
image_name = 'test2.jpg'
og_plan = cv2.imread('test_images/{}'.format(image_name))
res = og_plan.copy()
img_height, img_width, img_channel = og_plan.shape
img_area = img_width * img_height
if og_plan.shape[0] > 800:
res = imutils.resize(res, height=720)
img_height, img_width, img_channel = res.shape
img_area = img_width * img_height
print(res.shape)
print(img_area)
hsv_plan = cv2.cvtColor(res, cv2.COLOR_BGR2HSV)
grey_plan = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
blue_min = np.array([14,100,76])
blue_max = np.array([130,255,255])
bluemask = cv2.inRange(hsv_plan,blue_min,blue_max)
blue_output = cv2.bitwise_and(hsv_plan, hsv_plan, mask=bluemask)
grey_mask = cv2.cvtColor(blue_output, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(grey_mask, 100, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
ret2, thresh2 = cv2.threshold(grey_plan, 160, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((3,3), np.uint8)
dil = dilate(thresh, kernel, iterations=2)
dil_grey = dilate(thresh2, kernel, iterations=2)
cont,hier = findContours(dil, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cont1,hier1 = findContours(dil_grey, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
def max_rect(cntrs):
ar = {}
for cnt in cntrs:
x,y,w,h = cv2.boundingRect(cnt)
area = w*h
ar[area] = (x,y,w,h)
# ar = sorted(ar, key=ar.keys, reverse=True)
return ar
area_dict = max_rect(cont1)
roi_area = []
for area in area_dict:
if area >= img_area*0.1 and area < img_area:
print(area)
roi_area.append(area)
plan_no = 1
for a in roi_area:
plan = area_dict[a]
# del area_dict[a]
x,y,w,h = plan
roi = res[y:y+h, x:x+w]
print(plan)
cv2.rectangle(res, (x-5,y-5), (x+w+5, y+h+5), (255,255,0), 2)
cv2.imshow('ROI-{}'.format(image_name),roi)
cv2.imwrite('./result/{}_plan-{}.png'.format(image_name,plan_no),roi)
cv2.waitKey(0)
plan_no += 1
'''plan1 = area_dict[max(area_dict)]
del area_dict[max(area_dict)]
plan2 = area_dict[max(area_dict)]
x,y,w,h = plan1
x1,y1,w1,h1 = plan2
roi1 = res[y:y+h, x:x+w]
roi2 = res[y1:y1+h1, x1:x1+w1]
print(plan1, plan2)
cv2.rectangle(res, (x-5,y-5), (x+w+5, y+h+5), (255,255,0), 2)
cv2.rectangle(res, (x1-5,y1-5), (x1+w1+5, y1+h1+20), (255,255,0), 2)'''

Only integer scalar arrays can be converted to a scalar index not running under Spyder

I have the following code, which runs well under Visual Studio Code with python 3.9.10, opencv 4.5.5 and numpy 1.22.1.
I would like to migrate this code into the Spyder IDE (Version 5, another notebook), python 3.8, opencv 4.5.1 and numpy 1.22.2.
In spyder, I get the error message TypeError: only integer scalar arrays can be converted a scalar index in line: output_layers = [layer_names[i-1]...] (marked line down in the code section)
I have already checked other answers on this site such as
TypeError when indexing a list with a NumPy array: only integer scalar arrays can be converted to a scalar index
which suggests list comprehension, but in my understanding I am already implemented this.
What is the reason for running currectly in on environment but not in the other?
import cv2
import numpy as np
def get_output_layers(net):
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
return output_layers
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
label = str(classes[class_id])
color = COLORS[class_id]
cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
image = cv2.imread('horses.jpg')
Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392
classes = None
with open(r'yolov3.txt', 'r') as f:
classes = [line.strip() for line in f.readlines()]
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
net.setInput(blob)
outs = net.forward(get_output_layers(net))
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * Width)
center_y = int(detection[1] * Height)
w = int(detection[2] * Width)
h = int(detection[3] * Height)
x = center_x - w / 2
y = center_y - h / 2
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([x, y, w, h])
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
for i in indices:
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
draw_prediction(image, class_ids[i], confidences[i], round(x), round(y),
round(x+w), round(y+h))
cv2.imshow("object detection", image)
cv2.waitKey()
cv2.imwrite("object-detection.jpg", image)
cv2.destroyAllWindows()
there were subtle, recent api changes wrt handling std::vector in python
(4.5.1 still expects a 2d array, but it's 1d in 4.5.5)
to avoid the whole trouble, please simply use:
output_layers = net.getUnconnectedOutLayersNames()
(like it is done in the sample)

3D-reconstruction using structure-from-motion

I want to do 3D-reconstruction using structure-from-motion algorithm. I am using opencv to do this in python. But some how the obtained pointcloud is breaking into 2 halves. My input images are:
Image 1
Image 2
Image 3.
I am matching every 2 images like image1 with image2 and image2 with image 3. I tried different feature detectors like SIFT, KAZE and SURF. With the obtained points I compute the essential matrix. I got the camera intrinsics from the camera calibration from Opencv and are stored in the variables 'mtx' and 'dist' in the code below.
```file = os.listdir('Path_to _images')
file.sort(key=lambda f: int(''.join(filter(str.isdigit,f))))
path = os.path.join(os.getcwd(),'Path_to_images/')
for i in range(0, len(file)-1):
if(i == len(file) - 1):
break
path1 = cv2.imread(path + file[i], 0)
path1 = cv2.equalizeHist(path1)
path2 = cv2.imread(path + file[i+1], 0)
path2 = cv2.equalizeHist(path2)
# Feature Detection #
sift = cv2.xfeatures2d.SIFT_create()
kp1, des1 = sift.detectAndCompute(path1,None)
kp2, des2 = sift.detectAndCompute(path2,None)
# Feature Matching #
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
good = []
pts1 = []
pts2 = []
for j, (m,n) in enumerate(matches):
if m.distance < 0.8*n.distance:
good.append(m)
pts2.append(kp2[m.trainIdx].pt)
pts1.append(kp1[m.queryIdx].pt)
pts1 = np.int32(pts1)
pts2 = np.int32(pts2)
pts1 = np.array([pts1],dtype=np.float32)
pts2 = np.array([pts2],dtype=np.float32)
# UNDISTORTING POINTS #
pts1_norm = cv2.undistortPoints(pts1, mtx, dist)
pts2_norm = cv2.undistortPoints(pts2, mtx, dist)
# COMPUTE FUNDAMENTAL MATRIX #
F, mask = cv2.findFundamentalMat(pts1_norm,pts2_norm,cv2.FM_LMEDS)
# COMPUTE ESSENTIAL MATRIX #
E, mask = cv2.findEssentialMat(pts1_norm, pts2_norm, focal=55.474, pp=(33.516, 16.630), method=cv2.FM_LMEDS, prob=0.999, threshold=3.0)
# POSE RECOVERY #
points, R, t, mask = cv2.recoverPose(E, pts1_norm, pts2_norm)
anglesBetweenImages = rotationMatrixToEulerAngles(R)
sys.stdout = open('path_to_folder/angles.txt', 'a')
print(anglesBetweenImages)
# COMPOSE PROJECTION MATRIX OF R, t #
matrix_1 = np.hstack((R, t))
matrix_2 = np.hstack((np.eye(3, 3), np.zeros((3, 1))))
projMat_1 = np.dot(mtx, matrix_1)
projMat_2 = np.dot(mtx, matrix_2)
# TRIANGULATE POINTS #
point_4d_hom = cv2.triangulatePoints(projMat_1[:3], projMat_2[:3], pts1[:2].T, pts2[:2].T)
# HOMOGENIZE THE 4D RESULT TO 3D #
point_4d = point_4d_hom
point_3d = point_4d[:3, :].T # Obtains 3D points
np.savetxt('/path_to_folder/'+ file[i] +'.txt', point_3d)
After cv2.triangulatePoints, I expected to obtain one pointcloud. But the result I got has 2 surfaces as shown in the image below.
Result 1.
I really appreciate if anyone can tell me what is going wrong with my algorithm. Thanks!
you need to do this interativilly
like this:
cv::Mat pointsMat1(2, 1, CV_64F);
cv::Mat pointsMat2(2, 1, CV_64F);
int size0 = m_history.getHistorySize();
for(int i = 0; i < size0; i++){
cv::Point pt1 = m_history.getOriginalPoint(0, i);
cv::Point pt2 = m_history.getOriginalPoint(1, i);
pointsMat1.at<double>(0,0) = pt1.x;
pointsMat1.at<double>(1,0) = pt1.y;
pointsMat2.at<double>(0,0) = pt2.x;
pointsMat2.at<double>(1,0) = pt2.y;
cv::Mat pnts3D(4, 1, CV_64F);
cv::triangulatePoints(m_projectionMat1, m_projectionMat2, pointsMat1, pointsMat2, pnts3D);
}

samples.cols == var_count && samples.type() == 5 in function 'cv::ml::SVMImpl::predict' error on svm.predict method

I'm creating a object classifier in opencv python using svm. Training dataset is of 200 positive and 200 negative images. For positive images first took 200 images and cropped target object from images and resized them to (64,128) size for HOG calculation. Then for negative images, First created Pyramid of images then applied sliding window of 64X128 and then calculated HOG for positive as well all windows of negative images with labels 1 and 0. Trained svm model on hog features.
I am getting error "cv2.error: OpenCV(3.4.2) C:\projects\opencv-python\opencv\modules\ml\src\svm.cpp:2010: error: (-215:Assertion failed) samples.cols == var_count && samples.type() == 5 in function 'cv::ml::SVMImpl::predict' " when i called predict function using res = svm.predict(samples[0]).ravel() method.
import cv2
import os
import time
import numpy as np
import imutils
positive_path='C:\\Users\\Admin\\3D Objects\\datqaet with hog and svm\\ROI images'
negative_path='C:\\Users\\Admin\\3D Objects\\datqaet with hog and svm\\Negative images'
def pyramid(img): #Create image Pyramid
minSize=(30, 30)
imgarr = []
while True:
scale = 2
imgarr.append(img)
w = int(img.shape[1] / scale)
img = imutils.resize(img, width=w)
if img.shape[0] < minSize[1] or img.shape[1] < minSize[0]:
break
return imgarr
def sliding_window(image, stepSize, windowSize): #Sliding window for negative images
sliding = []
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
sliding.append((x, y, image[y:y + windowSize[1], x:x + windowSize[0]]))
return sliding
def get_hog() :
winSize = (64,128)
blockSize = (16,16)
blockStride = (16,16)
cellSize = (8,8)
nbins = 9
derivAperture = 1
winSigma = 4.
histogramNormType = 0
L2HysThreshold = 0.2
gammaCorrection = 0
nlevels = 64
signedGradient = True
hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,histogramNormType,L2HysThreshold,gammaCorrection,nlevels, signedGradient)
return hog
samples = []
labels = []
sam = []
hog = get_hog()
for filename in os.listdir(positive_path):
img = cv2.imread(os.path.join(positive_path,filename),0) #RGB image
img = cv2.resize(img,(64,128))
img = np.array(img)
hist = hog.compute(img)
hist = cv2.normalize(hist,None)
sam.append(img)
samples.append(hist)
labels.append(1)
i=0
for filename in os.listdir(negative_path):
img = cv2.imread(os.path.join(negative_path,filename),0)
(winW, winH) = (64,128)
pyr = pyramid(img)
for resized in pyr:
sliding = sliding_window(resized, stepSize=32, windowSize=(winW, winH))
for (x, y, window) in sliding:
if window.shape[0] != winH or window.shape[1] != winW:
continue
hist = hog.compute(window)
hist = cv2.normalize(hist,None)
sam.append(window)
samples.append(hist)
labels.append(0)
print(i)
i=i+1
samples = np.array(samples,dtype=np.float32)
labels = np.array(labels,dtype=int)
samples = np.squeeze(samples)
print(len(samples))
print(samples.shape)
rand = np.random.RandomState(10)
shuffle = rand.permutation(len(samples))
sam = samples[shuffle]
samples = sam[shuffle]
labels = labels[shuffle]
svm = cv2.ml.SVM_create()
svm.setKernel(cv2.ml.SVM_LINEAR)
svm.setType(cv2.ml.SVM_C_SVC)
svm.setC(2.67)
svm.setGamma(5.383)
svm_params = dict( kernel_type = cv2.ml.SVM_LINEAR,
svm_type = cv2.ml.SVM_C_SVC,
C=2.67, gamma=5.383 )
svm.train(samples,cv2.ml.ROW_SAMPLE,labels)
print("trained")
res = svm.predict(samples[0]).ravel()
print(res)
cap = cv2.VideoCapture(0)
while True:
ret, img = cap.read()
img=cv2.resize(img,(400,400))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(winW, winH) = (64,128)
pyr = pyramid(img)
for resized in pyr:
sliding = sliding_window(resized, stepSize=32, windowSize=(winW, winH))
for (x, y, window) in sliding:
if window.shape[0] != winH or window.shape[1] != winW:
continue
hist = hog.compute(window)
hist = cv2.normalize(hist,None)
hist = np.reshape(hist,(1,hist.shape[0]))
res = svm.predict(hist)[1].ravel()
if res == 1:
print("found")
cv2.imshow('img',img)
cv2.waitKey(10)

OpenCV3 matrix transform on KeyPoint fails

I'm using Python 3.5.2 and opencv 3.1.0. I'm trying to warp some keypoints from a query image with a transformation matrix I generated with cv.getAffineTransform() (see below code). Whatever I try to pass to the transform function it will always throw me this error:
cv2.error: D:\opencv\sources\modules\core\src\matmul.cpp:1947: error: (-215) scn == m.cols || scn + 1 == m.cols in function cv::transform
How do I have to pass the keypoints to make cv2.transform() work?
import cv2
import numpy as np
import random
queryImage_path = "C:\tmp\query.jpg"
trainImage_path = "C:\tmp\train.jpg"
queryImage = cv2.imread(queryImage_path, cv2.IMREAD_COLOR)
trainImage = cv2.imread(trainImage_path, cv2.IMREAD_COLOR)
surf = cv2.xfeatures2d.SURF_create()
queryImage_keypoints = surf.detect(queryImage,None)
trainImage_keypoints = surf.detect(trainImage, None)
queryImage_keypoints, queryImage_descriptors = surf.compute(queryImage, queryImage_keypoints)
trainImage_keypoints, trainImage_descriptors = surf.compute(trainImage, trainImage_keypoints)
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
matches = bf.match(queryImage_descriptors, trainImage_descriptors)
# get three random match indices which are not the same
match_index_a = random.randint(0, len(matches) - 1)
match_index_b = random.randint(0, len(matches) - 1)
match_index_c = random.randint(0, len(matches) - 1)
# get Keypoints from match indices
# queryImage- keypoints
queryImage_keypoint_a = queryImage_keypoints[matches[match_index_a].queryIdx]
queryImage_keypoint_b = queryImage_keypoints[matches[match_index_b].queryIdx]
queryImage_keypoint_c = queryImage_keypoints[matches[match_index_c].queryIdx]
# trainImage-keypoints
trainImage_keypoint_a = trainImage_keypoints[matches[match_index_a].trainIdx]
trainImage_keypoint_b = trainImage_keypoints[matches[match_index_b].trainIdx]
trainImage_keypoint_c = trainImage_keypoints[matches[match_index_c].trainIdx]
# get affine transformation matrix from these 6 keypoints
trainImage_points = np.float32([[trainImage_keypoint_a.pt[0], trainImage_keypoint_a.pt[1]],
[trainImage_keypoint_b.pt[0], trainImage_keypoint_b.pt[1]],
[trainImage_keypoint_c.pt[0], trainImage_keypoint_c.pt[1]]])
queryImage_points = np.float32([[queryImage_keypoint_a.pt[0], queryImage_keypoint_a.pt[1]],
[queryImage_keypoint_b.pt[0], queryImage_keypoint_b.pt[1]],
[queryImage_keypoint_c.pt[0], queryImage_keypoint_c.pt[1]]])
# get transformation matrix for current points
currentMatrix = cv2.getAffineTransform(queryImage_points, trainImage_points)
queryImage_keypoint = queryImage_keypoints[matches[0].queryIdx]
keypoint_asArray = np.array([[queryImage_keypoint.pt[0]], [queryImage_keypoint.pt[1]], [1]])
#queryImage_warped_keypoint = currentMatrix.dot(keypoint_asArray)
queryImage_warped_keypoint = cv2.transform(keypoint_asArray,currentMatrix)
Use
keypoint_asArray = np.array([[[queryImage_keypoint.pt[0], queryImage_keypoint.pt[1], 1]]])
instead of
keypoint_asArray = np.array([[queryImage_keypoint.pt[0]], [queryImage_keypoint.pt[1]], [1]])

Resources