Related
Is there anyway to get the rectangle points from the HoughLines Transformation results and apply the crop point to original image to get the cropped image. I have copied the code form the documentation. The idea is to extract the document from an image. Below is the result from the HoughLines Transformation and I required the intersection point to crop the image.
"""
#file hough_lines.py
#brief This program demonstrates line finding with the Hough transform
"""
import sys
import math
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
def main(argv=[]):
default_file = "/Users/apple/Downloads/Unknown-4"
filename = argv[0] if len(argv) > 0 else default_file
# Loads an image
src = cv.imread(cv.samples.findFile(filename), cv.IMREAD_GRAYSCALE)
# Check if image is loaded fine
if src is None:
print ('Error opening image!')
print ('Usage: hough_lines.py [image_name -- default ' + default_file + '] \n')
return -1
dst = cv.Canny(src, 50, 200, None, 3)
# Copy edges to the images that will display the results in BGR
cdst = cv.cvtColor(dst, cv.COLOR_GRAY2BGR)
cdstP = np.copy(cdst)
lines = cv.HoughLines(dst, 1, np.pi / 180, 150, None, 0, 0)
if lines is not None:
for i in range(0, len(lines)):
rho = lines[i][0][0]
theta = lines[i][0][1]
a = math.cos(theta)
b = math.sin(theta)
x0 = a * rho
y0 = b * rho
pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
cv.line(cdst, pt1, pt2, (0,0,255), 3, cv.LINE_AA)
linesP = cv.HoughLinesP(dst, 1, np.pi / 180, 50, None, 50, 10)
if linesP is not None:
for i in range(0, len(linesP)):
l = linesP[i][0]
cv.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv.LINE_AA)
#cv.imshow("Source", src)
#plt.imshow(src)
plt.imshow(cdstP)
#plt.imshow(cdstP)
if __name__ == "__main__":
main()
This is my Code of Mask Detection using YOLOV3 weights created by me. Whenever I run my Program, I experience a delay in my output Video of detection. This is the code please have a look.
import cv2
import numpy as np
net = cv2.dnn.readNet("yolov3_custom_final.weights", "yolov3_custom.cfg")
with open("obj.name", "r") as f:
classes = f.read().splitlines()
cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
while True:
ret, img = cap.read()
height, weight, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
net.setInput(blob)
output = net.getUnconnectedOutLayersNames()
layers = net.forward(output)
box = []
confidences = []
class_ids = []
for out in layers:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
centre_x = int(detection[0] * weight)
centre_y = int(detection[1] * height)
w = int(detection[2] * weight)
h = int(detection[3] * height)
x = int(centre_x - w / 2)
y = int(centre_y - h / 2)
box.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = np.array(cv2.dnn.NMSBoxes(box, confidences, 0.5, 0.4))
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(box), 3))
for i in indexes.flatten():
x, y, w, h = box[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label + "I" + confidence, (x, y + 20), font, 2, (255, 255, 255), 2)
cv2.imshow("Final", img)
if cv2.waitKey(1) & 0xff == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
Can someone Please help me in this Issue or suggest a way to reduce the Lag in my Output videostream ?
As I have done some research over the Time, I have a found a Possible answer to this question. As I'm running my YOLO model in my local system which has no GPU, This is the factor that is causing a delay in the Output as it Processes a frame and takes another frame after completion.
I am trying to use object detection for digit detection.
I found the SVHN dataset.
Speed is important in my project so I decided to apply a YOLO approach.
However, all tutorials and explanatiosn on using YOLOv3 either expect me to be using a dataset made from the Google Open Images API or by manually labeling images using a tool such as labellimg.py.
I however have a premade dataset with annotaions in the PASCAL VOC format (which can be found here https://github.com/penny4860/svhn-voc-annotation-format). Because of this I do not create a labels.txt or classes.txt file as I do no labeling myself.
I am rather at a loss on where to get started.
Any help would be appreciated.
You can follow the below code to convert from PASCAL VOC to YOLO supported format.
import glob
import os
import pickle
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join
dirs = ['train', 'val']
classes = ['person', 'car']
def getImagesInDir(dir_path):
image_list = []
for filename in glob.glob(dir_path + '/*.jpg'):
image_list.append(filename)
return image_list
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(dir_path, output_path, image_path):
basename = os.path.basename(image_path)
basename_no_ext = os.path.splitext(basename)[0]
in_file = open(dir_path + '/' + basename_no_ext + '.xml')
out_file = open(output_path + basename_no_ext + '.txt', 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
cwd = getcwd()
for dir_path in dirs:
full_dir_path = cwd + '/' + dir_path
output_path = full_dir_path +'/yolo/'
if not os.path.exists(output_path):
os.makedirs(output_path)
image_paths = getImagesInDir(full_dir_path)
list_file = open(full_dir_path + '.txt', 'w')
for image_path in image_paths:
list_file.write(image_path + '\n')
convert_annotation(full_dir_path, output_path, image_path)
list_file.close()
print("Finished processing: " + dir_path)
I am using the same code that is provided by the OpenCv tutorial, it was working few weeks ago, today I was trying to run it is says that gray name is not defined!! can some one find me the error?
import numpy as np
#import matplotlib.pyplot as plt
import cv2
import glob
import os
def draw(img, corners, imgpts):
corner = tuple(corners[0].ravel())
img = cv2.line(img, corner, tuple(imgpts[0].ravel()), (255,0,0), 5)
img = cv2.line(img, corner, tuple(imgpts[1].ravel()), (0,255,0), 5)
img = cv2.line(img, corner, tuple(imgpts[2].ravel()), (0,0,255), 5)
return img
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((7*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:7].T.reshape(-1,2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
img_dir = "C:\\Hungary\\Biblography\\Rotating Solitary Wave\\My Work\\Final Work\\Experiment1111 \\Camera Calibration\\Image Processing\\chess"
data_path = os.path.join(img_dir,'*bmp')
images = glob.glob(data_path)
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv2.findChessboardCorners(gray, (7,7),None)
# If found, add object points, image points (after refining them)
if ret == True:
objpoints.append(objp)
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
imgpoints.append(corners2)
# Draw and display the corners
img = cv2.drawChessboardCorners(img, (7,7), corners2,ret)
cv2.imshow('img',img)
cv2.waitKey(500)
cv2.destroyAllWindows()
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape [::-1],None,None)
print('Rotation Vector, or the Angles For Each Photo: ', rvecs, '\n')
R = cv2.Rodrigues(rvecs[0])
print('The Rotation Matrix is: ', R)
print('Translation Vector: ', tvecs, '\n')
print(mtx, '\n')
print('Distortion Coefficients ', dist, '\n')
img = cv2.imread('00000274.bmp')
h, w = img.shape[:2]
newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
print('Camera Matrix', newcameramtx, '\n')
# undistort
dst = cv2.undistort(img, mtx, dist) #, None, newcameramtx)
p = np.ones_like(dst)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
# undistort
mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)
dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)
# crop the image
x,y,w,h = roi
dst = dst[y:y+h, x:x+w]
cv2.imwrite('calibresult.png',dst)
mean_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)
mean_error += error
print("total error: ", mean_error/len(objpoints))
If you read the opencv document you will find that I did little changes on the code and it was working but today it is raising this error about the gray name is not defined!
Check your path once, and see if images is an empty list. In that case, for loop will not be executed where the gray variable is defined.
I am working on a project of yawn detection, i am using dlib and opencv to detect the face and landmark on a video.
I want to get the length of eyes and mouth.
this is what i have done till now
import sys
import os
import dlib
import glob
from skimage import io
import cv2
import time
if len(sys.argv) != 3:
print("")
exit()
predictor_path = sys.argv[1]
faces_folder_path = sys.argv[2]
vidcap = cv2.VideoCapture('video.avi')
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
win = dlib.image_window()
while vidcap.isOpened():
success, image = vidcap.read()
if success:
win.clear_overlay()
win.set_image(image)
# Ask the detector to find the bounding boxes of each face. The 1 in the
# second argument indicates that we should upsample the image 1 time. This
# will make everything bigger and allow us to detect more faces.
dets = detector(image, 1)
print("Number of faces detected: {}".format(len(dets)))
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.left(), d.top(), d.right(), d.bottom()))
# Get the landmarks/parts for the face in box d.
shape = predictor(image, d)
print(shape)
print("Part 0: {}, Part 1: {},Part 2: {} ...".format(shape.part(0),shape.part(1),shape.part(2)))
# Draw the face landmarks on the screen.
win.add_overlay(shape)
win.add_overlay(dets)
time.sleep(0.01)
cv2.destroyAllWindows()
vidcap.release()
please help me how to get the length of open eyes and mouth.
From this figure
import Paths
import globals
from globals import ClassifierFiles
import numpy as np
import cv2
import time
import dlib
import math
import eyeCoordinates
import mouthCoordinates
from globals import Threshold
from globals import yawnFolder
import os
import openface
VIDEO_PATHS = []
readVideo('v.avi')#test video of faces
def readVideo(video):
global no,yes
video_capture = cv2.VideoCapture(video)
detector = dlib.get_frontal_face_detector() #Face detector
predictor = dlib.shape_predictor(ClassifierFiles.shapePredicter) #Landmark identifier
face_aligner = openface.AlignDlib(ClassifierFiles.shapePredicter)
u = 0
while True:
ret, frame = video_capture.read()
if frame != None:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# clahe_image = clahe.apply(gray)
detections = detector(frame, 1) #Detect the faces in the image
for k,d in enumerate(detections): #For each detected face
shape = predictor(frame, d) #Get coordinates
vec = np.empty([68, 2], dtype = int)
coor = []
for i in range(1,68): #There are 68 landmark points on each face
#cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 1, (0,0,255), thickness=1)
coor.append([shape.part(i).x, shape.part(i).y])
vec[i][0] = shape.part(i).x
vec[i][1] = shape.part(i).y
#RightEye and LeftEye coordinates
rightEye = eyeCoordinates.distanceRightEye(coor)
leftEye = eyeCoordinates.distanceLeftEye(coor)
eyes = (rightEye + leftEye)/2
#Mouth coordinates
mouth = mouthCoordinates.distanceBetweenMouth(coor)
print(eyes,mouth)
#prints both eyes average distance
#prints mouth distance
break
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == '__main__':
VIDEO_PATHS = Paths.videosPaths()
init()
eyeCoordinates File
import distanceFormulaCalculator
def distanceRightEye(c):
eR_36,eR_37,eR_38,eR_39,eR_40,eR_41 = 0,0,0,0,0,0
eR_36 = c[35]
eR_37 = c[36]
eR_38 = c[37]
eR_39 = c[38]
eR_40 = c[39]
eR_41 = c[40]
x1 = distanceFormulaCalculator.distanceFormula(eR_37,eR_41)
x2 = distanceFormulaCalculator.distanceFormula(eR_38,eR_40)
return ((x1+x2)/2)
def distanceLeftEye(c):
eL_42,eL_43,eL_44,eL_45,eL_46,eL_47 = 0,0,0,0,0,0
eL_42 = c[41]
eL_43 = c[42]
eL_44 = c[43]
eL_45 = c[44]
eL_46 = c[45]
eL_47 = c[46]
x1 = distanceFormulaCalculator.distanceFormula(eL_43,eL_47)
x2 = distanceFormulaCalculator.distanceFormula(eL_44,eL_46)
return ((x1+x2)/2)
def eyePoints():
return [36,37,38,39,40,41,42,43,44,45,46,47]
Mouth Coordinates File
import distanceFormulaCalculator
def distanceBetweenMouth(c):
m_60,m_61,m_62,m_63,m_64,m_65,m_66,m_67 = 0,0,0,0,0,0,0,0
m_60 = c[59]
m_61 = c[60]
m_62 = c[61]
m_63 = c[62]
m_64 = c[63]
m_65 = c[64]
m_66 = c[65]
m_67 = c[66]
x1 = distanceFormulaCalculator.distanceFormula(m_61,m_67)
x2 = distanceFormulaCalculator.distanceFormula(m_62,m_66)
x3 = distanceFormulaCalculator.distanceFormula(m_63,m_65)
return ((x1+x2+x3)/3)
def mouthPoints():
return [60,61,62,63,64,65,66,67]