I am working on a project of yawn detection, i am using dlib and opencv to detect the face and landmark on a video.
I want to get the length of eyes and mouth.
this is what i have done till now
import sys
import os
import dlib
import glob
from skimage import io
import cv2
import time
if len(sys.argv) != 3:
print("")
exit()
predictor_path = sys.argv[1]
faces_folder_path = sys.argv[2]
vidcap = cv2.VideoCapture('video.avi')
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
win = dlib.image_window()
while vidcap.isOpened():
success, image = vidcap.read()
if success:
win.clear_overlay()
win.set_image(image)
# Ask the detector to find the bounding boxes of each face. The 1 in the
# second argument indicates that we should upsample the image 1 time. This
# will make everything bigger and allow us to detect more faces.
dets = detector(image, 1)
print("Number of faces detected: {}".format(len(dets)))
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.left(), d.top(), d.right(), d.bottom()))
# Get the landmarks/parts for the face in box d.
shape = predictor(image, d)
print(shape)
print("Part 0: {}, Part 1: {},Part 2: {} ...".format(shape.part(0),shape.part(1),shape.part(2)))
# Draw the face landmarks on the screen.
win.add_overlay(shape)
win.add_overlay(dets)
time.sleep(0.01)
cv2.destroyAllWindows()
vidcap.release()
please help me how to get the length of open eyes and mouth.
From this figure
import Paths
import globals
from globals import ClassifierFiles
import numpy as np
import cv2
import time
import dlib
import math
import eyeCoordinates
import mouthCoordinates
from globals import Threshold
from globals import yawnFolder
import os
import openface
VIDEO_PATHS = []
readVideo('v.avi')#test video of faces
def readVideo(video):
global no,yes
video_capture = cv2.VideoCapture(video)
detector = dlib.get_frontal_face_detector() #Face detector
predictor = dlib.shape_predictor(ClassifierFiles.shapePredicter) #Landmark identifier
face_aligner = openface.AlignDlib(ClassifierFiles.shapePredicter)
u = 0
while True:
ret, frame = video_capture.read()
if frame != None:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# clahe_image = clahe.apply(gray)
detections = detector(frame, 1) #Detect the faces in the image
for k,d in enumerate(detections): #For each detected face
shape = predictor(frame, d) #Get coordinates
vec = np.empty([68, 2], dtype = int)
coor = []
for i in range(1,68): #There are 68 landmark points on each face
#cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 1, (0,0,255), thickness=1)
coor.append([shape.part(i).x, shape.part(i).y])
vec[i][0] = shape.part(i).x
vec[i][1] = shape.part(i).y
#RightEye and LeftEye coordinates
rightEye = eyeCoordinates.distanceRightEye(coor)
leftEye = eyeCoordinates.distanceLeftEye(coor)
eyes = (rightEye + leftEye)/2
#Mouth coordinates
mouth = mouthCoordinates.distanceBetweenMouth(coor)
print(eyes,mouth)
#prints both eyes average distance
#prints mouth distance
break
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == '__main__':
VIDEO_PATHS = Paths.videosPaths()
init()
eyeCoordinates File
import distanceFormulaCalculator
def distanceRightEye(c):
eR_36,eR_37,eR_38,eR_39,eR_40,eR_41 = 0,0,0,0,0,0
eR_36 = c[35]
eR_37 = c[36]
eR_38 = c[37]
eR_39 = c[38]
eR_40 = c[39]
eR_41 = c[40]
x1 = distanceFormulaCalculator.distanceFormula(eR_37,eR_41)
x2 = distanceFormulaCalculator.distanceFormula(eR_38,eR_40)
return ((x1+x2)/2)
def distanceLeftEye(c):
eL_42,eL_43,eL_44,eL_45,eL_46,eL_47 = 0,0,0,0,0,0
eL_42 = c[41]
eL_43 = c[42]
eL_44 = c[43]
eL_45 = c[44]
eL_46 = c[45]
eL_47 = c[46]
x1 = distanceFormulaCalculator.distanceFormula(eL_43,eL_47)
x2 = distanceFormulaCalculator.distanceFormula(eL_44,eL_46)
return ((x1+x2)/2)
def eyePoints():
return [36,37,38,39,40,41,42,43,44,45,46,47]
Mouth Coordinates File
import distanceFormulaCalculator
def distanceBetweenMouth(c):
m_60,m_61,m_62,m_63,m_64,m_65,m_66,m_67 = 0,0,0,0,0,0,0,0
m_60 = c[59]
m_61 = c[60]
m_62 = c[61]
m_63 = c[62]
m_64 = c[63]
m_65 = c[64]
m_66 = c[65]
m_67 = c[66]
x1 = distanceFormulaCalculator.distanceFormula(m_61,m_67)
x2 = distanceFormulaCalculator.distanceFormula(m_62,m_66)
x3 = distanceFormulaCalculator.distanceFormula(m_63,m_65)
return ((x1+x2+x3)/3)
def mouthPoints():
return [60,61,62,63,64,65,66,67]
Related
I have an image of a scanner and i wanted to segment the background but the background is not homogeneous. how could i do that?
My Input is a cloud points
I used the code below to convert to a depth image
import numpy as np
import cv2
import open3d as o3d
import sys
def convertToDepth():
argv = sys.argv[1]
argv2 = sys.argv[2]
pcd = o3d.io.read_point_cloud(argv)
p = np.asarray(pcd.points)
points = np.copy(p)
points = np.asarray(points, np.int16)
max_x = np.max(points[:,0])
min_x = np.min(points[:,0])
max_y = np.max(points[:,1])
min_y = np.min(points[:,1])
img = np.zeros(((max_x - min_x)+1,(max_y-min_y)+1), np.uint8)
img[img == 0] = 255
max_z = np.max(points[:,2])
min_z = np.min(points[:,2])
for p in points:
img[(p[0] - min_x),(p[1]-min_y)] = min(img[(p[0] - min_x),(p[1]-min_y)], int((p[2] - min_z)* (256/((max_z - min_z)+1))))
cv2.imwrite(argv2,img)
convertToDepth()
this is my depth image
I have tried this code to obtain the seeds
import cv2
import numpy as np
img = cv2.imread('amostra.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray ,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2.imwrite('thresh.png',thresh)
and obtained this
How can i get the seeds with an non-homogeneous background?
UPDATE:
I tried this code:
import cv2
import numpy as np
import sys
path_img = sys.argv[1]
img = cv2.imread(path_img)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
for i in range(0,255):
thresh = gray.copy()
thresh[:] = 0
thresh[(gray == i)] = 255
num_labels, labels_im = cv2.connectedComponents(thresh)
areaMaior = 100
maskMaior = np.zeros((img.shape[0],img.shape[1]),np.uint8)
for j in range(1,num_labels):
mask = np.zeros((img.shape[0],img.shape[1]),np.uint8)
mask[labels_im==j] = 255
pixels = cv2.countNonZero(mask)
if(pixels > areaMaior):
maskMaior = mask
areaMaior = pixels
#cv2.imwrite('segment/areaimg'+str(i)+'.png', maskMaior)
gray[(maskMaior==255)] = 255
cv2.imwrite('gray12.png',gray)
And obtained this
I tried to use the webcam and take pictures every 5 seconds via openCV but the cam itself didn't work and kept causing the error...
I also tried changing the integer in the cv2.VideoCapture() to -1 and 1 but still that didn't work.
This is the form of the error: "[ WARN:0] global /io/opencv/modules/videoio/src/cap_v4l.cpp (802)
open VIDEOIO ERROR: V4L: can't open camera by index 0
Traceback (most recent call last):
File "webcam_detect.py", line 176, in
raise IOError("Cannot open webcam")
OSError: Cannot open webcam"
import colorsys
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import cv2
import time
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import image_preporcess
class YOLO(object):
_defaults = {
#"model_path": 'logs/trained_weights_final.h5',
"model_path": 'model_data/yolo_weights.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"text_size" : 1,
}
#classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
is_tiny_version = num_anchors==6 # default setting
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = image_preporcess(np.copy(image), tuple(reversed(self.model_image_size)))
image_data = boxed_image
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.shape[0], image.shape[1]],#[image.size[1], image.size[0]],
K.learning_phase(): 0
})
#print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
thickness = (image.shape[0] + image.shape[1]) // 600
fontScale=1
ObjectsList = []
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
#label = '{}'.format(predicted_class)
scores = '{:.2f}'.format(score)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.shape[0], np.floor(bottom + 0.5).astype('int32'))
right = min(image.shape[1], np.floor(right + 0.5).astype('int32'))
mid_h = (bottom-top)/2+top
mid_v = (right-left)/2+left
# put object rectangle
cv2.rectangle(image, (left, top), (right, bottom), self.colors[c], thickness)
# get text size
(test_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, thickness/self.text_size, 1)
# put text rectangle
cv2.rectangle(image, (left, top), (left + test_width, top - text_height - baseline), self.colors[c], thickness=cv2.FILLED)
# put text above rectangle
cv2.putText(image, label, (left, top-2), cv2.FONT_HERSHEY_SIMPLEX, thickness/self.text_size, (0, 0, 0), 1)
# add everything to list
ObjectsList.append([top, left, bottom, right, mid_v, mid_h, label, scores])
return image, ObjectsList
def close_session(self):
self.sess.close()
def detect_img(self, image):
#image = cv2.imread(image, cv2.IMREAD_COLOR)
original_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
original_image_color = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
r_image, ObjectsList = self.detect_image(original_image_color)
return r_image, ObjectsList
if __name__=="__main__":
yolo = YOLO()
# set start time to current time
#start_time = time.time()
# displays the frame rate every 2 second
#display_time = 2
# Set primarry FPS to 0
#fps = 0
# we create the video capture object cap
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise IOError("Cannot open webcam")
cap.set(3, 640)
cap.set(4,480)
img_counter=0
frame_set=[]
start_time=time.time()
#if not cap.isOpened():
#raise IOError("We cannot open webcam")
#while True:
#ret, frame = cap.read()
# resize our captured frame if we need
#frame = cv2.resize(frame, None, fx=1.0, fy=1.0,
interpolation=cv2.INTER_AREA)
# detect object on our frame
#r_image, ObjectsList = yolo.detect_img(frame)
# show us frame with detection
#cv2.imshow("Web cam input", r_image)
#if cv2.waitKey(25) & 0xFF == ord("q"):
#cv2.destroyAllWindows()
#break
# calculate FPS
#fps += 1
#TIME = time.time() - start_time
#if TIME > display_time:
#print("FPS:", fps / TIME)
#fps = 0
#start_time = time.time()
while True:
ret, frame = cap.read()
frame = cv2.resize(frame, None, fx=1.0, fy=1.0,
interpolation=cv2.INTER_AREA)
r_image, ObjectsList=yolo.detect_img(frame)
cv2.imshow('Web cam input', r_image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if time.time() - start_time >= 5: #<---- Check if 5 sec passed
img_name = "opencv_frame_{}.png".format(img_counter)
cv2.imwrite(img_name, frame)
print("{} written!".format(img_counter))
start_time = time.time()
img_counter += 1
cap.release()
cv2.destroyAllWindows()
yolo.close_session()
I am trying to display an image over another image at a particular co-ordinates. I have detected the aruco markers using the webcam and I want to display another image over the aruco marker. The aruco marker can be moved and the overlaying image should move along with the marker.
There is various draw functions and to input text into the image. I have tried image overlay and image homography.
I can obtain the co-ordinates for the corners.
Is there any function to insert the image at those co-ordinates?
import cv2
import cv2.aruco as aruco
import glob
markerLength = 0.25
cap = cv2.VideoCapture(0)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
objp = np.zeros((6*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)
objpoints = []
imgpoints = []
images = glob.glob('calib_images/*.jpg')
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (7,6),None)
if ret == True:
objpoints.append(objp)
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
imgpoints.append(corners2)
img = cv2.drawChessboardCorners(img, (7,6), corners2,ret)
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None)
calibrationFile = "calibrationFileName.xml"
calibrationParams = cv2.FileStorage(calibrationFile, cv2.FILE_STORAGE_READ)
camera_matrix = calibrationParams.getNode("cameraMatrix").mat()
dist_coeffs = calibrationParams.getNode("distCoeffs").mat()
while(True):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
aruco_dict = aruco.Dictionary_get(aruco.DICT_6X6_250)
arucoParameters = aruco.DetectorParameters_create()
corners, ids, rejectedImgPoints = aruco.detectMarkers(gray, aruco_dict, parameters=arucoParameters)
if np.all(ids != None):
rvec, tvec, _ = aruco.estimatePoseSingleMarkers(corners, markerLength, mtx, dist)
axis = aruco.drawAxis(frame, mtx, dist, rvec, tvec, 0.3)
print(ids)
display = aruco.drawDetectedMarkers(axis, corners)
display = np.array(display)
else:
display = frame
cv2.imshow('Display',display)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()```
To replace a part of image
import cv2
import numpy as np
img1 = cv2.imread('Desert.jpg')
img2 = cv2.imread('Penguins.jpg')
img3 = img1.copy()
# replace values at coordinates (100, 100) to (399, 399) of img3 with region of img2
img3[100:400,100:400,:] = img2[100:400,100:400,:]
cv2.imshow('Result1', img3)
To alpha blend two images
alpha = 0.5
img3 = np.uint8(img1*alpha + img2*(1-alpha))
cv2.imshow('Result2', img3)
#user8190410's answer works fine. Just to give a complete answer, in order to alpha blend two images with different size at a particular position, you can do the following:
alpha= 0.7
img1_mod = img1.copy()
img1_mod[:pos_x,:pos_y,:] = img1[:pos_x,:pos_y,:]*alpha + img2*(1-alpha)
cv2.imshow('Image1Mod', img1_mod)
Actually, I found that image homography can be used to do it.
Here is the updated code.
import numpy as np
import cv2
import cv2.aruco as aruco
cap = cv2.VideoCapture(0)
while(True):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
aruco_dict = aruco.Dictionary_get(aruco.DICT_6X6_250)
arucoParameters = aruco.DetectorParameters_create()
corners, ids, rejectedImgPoints = aruco.detectMarkers(gray, aruco_dict, parameters=arucoParameters)
if np.all(ids != None):
display = aruco.drawDetectedMarkers(frame, corners)
x1 = (corners[0][0][0][0], corners[0][0][0][1])
x2 = (corners[0][0][1][0], corners[0][0][1][1])
x3 = (corners[0][0][2][0], corners[0][0][2][1])
x4 = (corners[0][0][3][0], corners[0][0][3][1])
im_dst = frame
im_src = cv2.imread("mask.jpg")
size = im_src.shape
pts_dst = np.array([x1,x2,x3,x4])
pts_src = np.array(
[
[0,0],
[size[1] - 1, 0],
[size[1] - 1, size[0] -1],
[0, size[0] - 1 ]
],dtype=float
);
h, status = cv2.findHomography(pts_src, pts_dst)
temp = cv2.warpPerspective(im_src, h, (im_dst.shape[1],im_dst.shape[0]))
cv2.fillConvexPoly(im_dst, pts_dst.astype(int), 0, 16);
im_dst = im_dst + temp
cv2.imshow('Display',im_dst)
else:
display = frame
cv2.imshow('Display',display)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
I'm creating a object classifier in opencv python using svm. Training dataset is of 200 positive and 200 negative images. For positive images first took 200 images and cropped target object from images and resized them to (64,128) size for HOG calculation. Then for negative images, First created Pyramid of images then applied sliding window of 64X128 and then calculated HOG for positive as well all windows of negative images with labels 1 and 0. Trained svm model on hog features.
I am getting error "cv2.error: OpenCV(3.4.2) C:\projects\opencv-python\opencv\modules\ml\src\svm.cpp:2010: error: (-215:Assertion failed) samples.cols == var_count && samples.type() == 5 in function 'cv::ml::SVMImpl::predict' " when i called predict function using res = svm.predict(samples[0]).ravel() method.
import cv2
import os
import time
import numpy as np
import imutils
positive_path='C:\\Users\\Admin\\3D Objects\\datqaet with hog and svm\\ROI images'
negative_path='C:\\Users\\Admin\\3D Objects\\datqaet with hog and svm\\Negative images'
def pyramid(img): #Create image Pyramid
minSize=(30, 30)
imgarr = []
while True:
scale = 2
imgarr.append(img)
w = int(img.shape[1] / scale)
img = imutils.resize(img, width=w)
if img.shape[0] < minSize[1] or img.shape[1] < minSize[0]:
break
return imgarr
def sliding_window(image, stepSize, windowSize): #Sliding window for negative images
sliding = []
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
sliding.append((x, y, image[y:y + windowSize[1], x:x + windowSize[0]]))
return sliding
def get_hog() :
winSize = (64,128)
blockSize = (16,16)
blockStride = (16,16)
cellSize = (8,8)
nbins = 9
derivAperture = 1
winSigma = 4.
histogramNormType = 0
L2HysThreshold = 0.2
gammaCorrection = 0
nlevels = 64
signedGradient = True
hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,histogramNormType,L2HysThreshold,gammaCorrection,nlevels, signedGradient)
return hog
samples = []
labels = []
sam = []
hog = get_hog()
for filename in os.listdir(positive_path):
img = cv2.imread(os.path.join(positive_path,filename),0) #RGB image
img = cv2.resize(img,(64,128))
img = np.array(img)
hist = hog.compute(img)
hist = cv2.normalize(hist,None)
sam.append(img)
samples.append(hist)
labels.append(1)
i=0
for filename in os.listdir(negative_path):
img = cv2.imread(os.path.join(negative_path,filename),0)
(winW, winH) = (64,128)
pyr = pyramid(img)
for resized in pyr:
sliding = sliding_window(resized, stepSize=32, windowSize=(winW, winH))
for (x, y, window) in sliding:
if window.shape[0] != winH or window.shape[1] != winW:
continue
hist = hog.compute(window)
hist = cv2.normalize(hist,None)
sam.append(window)
samples.append(hist)
labels.append(0)
print(i)
i=i+1
samples = np.array(samples,dtype=np.float32)
labels = np.array(labels,dtype=int)
samples = np.squeeze(samples)
print(len(samples))
print(samples.shape)
rand = np.random.RandomState(10)
shuffle = rand.permutation(len(samples))
sam = samples[shuffle]
samples = sam[shuffle]
labels = labels[shuffle]
svm = cv2.ml.SVM_create()
svm.setKernel(cv2.ml.SVM_LINEAR)
svm.setType(cv2.ml.SVM_C_SVC)
svm.setC(2.67)
svm.setGamma(5.383)
svm_params = dict( kernel_type = cv2.ml.SVM_LINEAR,
svm_type = cv2.ml.SVM_C_SVC,
C=2.67, gamma=5.383 )
svm.train(samples,cv2.ml.ROW_SAMPLE,labels)
print("trained")
res = svm.predict(samples[0]).ravel()
print(res)
cap = cv2.VideoCapture(0)
while True:
ret, img = cap.read()
img=cv2.resize(img,(400,400))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(winW, winH) = (64,128)
pyr = pyramid(img)
for resized in pyr:
sliding = sliding_window(resized, stepSize=32, windowSize=(winW, winH))
for (x, y, window) in sliding:
if window.shape[0] != winH or window.shape[1] != winW:
continue
hist = hog.compute(window)
hist = cv2.normalize(hist,None)
hist = np.reshape(hist,(1,hist.shape[0]))
res = svm.predict(hist)[1].ravel()
if res == 1:
print("found")
cv2.imshow('img',img)
cv2.waitKey(10)
I've tried to add the GUI (tkinter) into my script, but to no avail. If anyone can help me, i would be so grateful. I'm using Python 3.6 and i think the latest opencv?
I have only started programming 2 weeks ago. So, kinda new into all this. Basically, I want to create a window that just pick the image from my folder and then process it through the script so that whenever i want to use another image, i don't have to change the script. I hope that make sense..
this is the script that i took from Chris Dahms from youtube, and managed to change it to what I want.
import cv2
import numpy as np
import os
import DetectChars
import DetectPlates
import PossiblePlate
SCALAR_BLACK = (0.0, 0.0, 0.0)
SCALAR_WHITE = (255.0, 255.0, 255.0)
SCALAR_YELLOW = (0.0, 255.0, 255.0)
SCALAR_GREEN = (0.0, 255.0, 0.0)
SCALAR_CYAN = (255.0, 255.0, 0.0)
showSteps = False
def main():
blnKNNTrainingSuccessful = DetectChars.loadKNNDataAndTrainKNN()
if blnKNNTrainingSuccessful == False:
print ("\nerror: KNN training was not successful\n")
return
imgOriginalScene = cv2.imread("CAR/Malaysia/22.jpg")
if imgOriginalScene is None:
print ("\nerror: image not read from file \n\n")
os.system("pause")
return
if imgOriginalScene is None:
print ("\nerror: image not read from file \n\n")
os.system("pause")
return
listOfPossiblePlates = DetectPlates.detectPlatesInScene(imgOriginalScene)
listOfPossiblePlates = DetectChars.detectCharsInPlates(listOfPossiblePlates)
cv2.imshow("imgOriginalScene", imgOriginalScene)
if len(listOfPossiblePlates) == 0:
print ("\nno license plates were detected\n")
else:
listOfPossiblePlates.sort(key = lambda possiblePlate: len(possiblePlate.strChars), reverse = True)
licPlate = listOfPossiblePlates[0]
cv2.imshow("Image Plate", licPlate.imgPlate)
cv2.imshow("Image Threshold", licPlate.imgThresh)
if len(licPlate.strChars) == 0:
print ("\nno characters were detected\n\n")
return
drawRedRectangleAroundPlate(imgOriginalScene, licPlate)
print ("\nlicense plate read from image = " + licPlate.strChars + "\n")
print ("----------------------------------------")
writeLicensePlateCharsOnImage(imgOriginalScene, licPlate)
cv2.imshow("imgOriginalScene", imgOriginalScene)
cv2.imwrite("imgOriginalScene.png", imgOriginalScene)
cv2.waitKey(0)
return
def drawRedRectangleAroundPlate(imgOriginalScene, licPlate):
p2fRectPoints = cv2.boxPoints(licPlate.rrLocationOfPlateInScene)
cv2.line(imgOriginalScene, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), SCALAR_RED, 2)
cv2.line(imgOriginalScene, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), SCALAR_RED, 2)
cv2.line(imgOriginalScene, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), SCALAR_RED, 2)
cv2.line(imgOriginalScene, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), SCALAR_RED, 2)
def writeLicensePlateCharsOnImage(imgOriginalScene, licPlate):
ptCenterOfTextAreaX = 0
ptCenterOfTextAreaY = 0
ptLowerLeftTextOriginX = 0
ptLowerLeftTextOriginY = 0
sceneHeight, sceneWidth, sceneNumChannels = imgOriginalScene.shape
plateHeight, plateWidth, plateNumChannels = licPlate.imgPlate.shape
intFontFace = cv2.FONT_HERSHEY_SIMPLEX
fltFontScale = float(plateHeight) / 30.0
intFontThickness = int(round(fltFontScale * 2))
textSize, baseline = cv2.getTextSize(licPlate.strChars, intFontFace, fltFontScale, intFontThickness)
( (intPlateCenterX, intPlateCenterY), (intPlateWidth, intPlateHeight), fltCorrectionAngleInDeg ) = licPlate.rrLocationOfPlateInScene
intPlateCenterX = int(intPlateCenterX)
intPlateCenterY = int(intPlateCenterY)
ptCenterOfTextAreaX = int(intPlateCenterX)
if intPlateCenterY < (sceneHeight * 0.75):
ptCenterOfTextAreaY = int(round(intPlateCenterY)) + int(round(plateHeight * 1.6))
else:
ptCenterOfTextAreaY = int(round(intPlateCenterY)) - int(round(plateHeight * 1.6))
textSizeWidth, textSizeHeight = textSize
ptLowerLeftTextOriginX = int(ptCenterOfTextAreaX - (textSizeWidth / 2))
ptLowerLeftTextOriginY = int(ptCenterOfTextAreaY + (textSizeHeight / 2))
cv2.putText(imgOriginalScene, licPlate.strChars, (ptLowerLeftTextOriginX, ptLowerLeftTextOriginY), intFontFace, fltFontScale, SCALAR_CYAN, intFontThickness)
if __name__ == "__main__":
main()
cv2.waitKey()
cv2.destroyAllWindows()
Pre-processing stage
# Preprocess.py
import numpy as np
import math
# module level variables ##########################################################################
GAUSSIAN_SMOOTH_FILTER_SIZE = (5, 5)
ADAPTIVE_THRESH_BLOCK_SIZE = 19
ADAPTIVE_THRESH_WEIGHT = 9
def preprocess(imgOriginal):
imgGrayscale = extractValue(imgOriginal)
imgMaxContrastGrayscale = maximizeContrast(imgGrayscale)
height, width = imgGrayscale.shape
grayscaled = cv2.cvtColor(imgOriginal,cv2.COLOR_BGR2GRAY)
imgBlurred = np.zeros((height, width, 1), np.uint8)
imgBlurred, otsu = cv2.threshold(grayscaled,125,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
imgThresh = cv2.medianBlur(otsu,5)
return imgGrayscale, imgThresh
# end function
def extractValue(imgOriginal):
height, width, numChannels = imgOriginal.shape
imgHSV = np.zeros((height, width, 3), np.uint8)
imgHSV = cv2.cvtColor(imgOriginal, cv2.COLOR_BGR2HSV)
imgHue, imgSaturation, imgValue = cv2.split(imgHSV)
return imgValue
# end function
def maximizeContrast(imgGrayscale):
height, width = imgGrayscale.shape
imgTopHat = np.zeros((height, width, 1), np.uint8)
imgBlackHat = np.zeros((height, width, 1), np.uint8)
structuringElement = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
imgTopHat = cv2.morphologyEx(imgGrayscale, cv2.MORPH_TOPHAT, structuringElement)
imgBlackHat = cv2.morphologyEx(imgGrayscale, cv2.MORPH_BLACKHAT, structuringElement)
imgGrayscalePlusTopHat = cv2.add(imgGrayscale, imgTopHat)
imgGrayscalePlusTopHatMinusBlackHat = cv2.subtract(imgGrayscalePlusTopHat, imgBlackHat)
return imgGrayscalePlusTopHatMinusBlackHat
# end function
If all you are wanting is a window to select a file then this should work.
import Tkinter
from Tkinter import *
import tkSimpleDialog
from tkFileDialog import askopenfilename
master = Tk()
master.withdraw()
my_file = askopenfilename()
mainloop()
i recommend Gtk3 for your GUI.
here's a simple Gtk window with button:
#!/usr/bin/env python3
import gi
gi.require_version( 'Gtk', '3.0' )
from gi.repository import Gtk
class Window( Gtk.Window ):
def __init__( self ):
Gtk.Window.__init__( self )
self.connect( 'destroy', lambda q: Gtk.main_quit() )
button = Gtk.Button( "Gtk.Button" )
button.connect( "clicked", self.on_button_clicked )
grid = Gtk.Grid( )
grid.attach( button, 0, 0, 1, 1 )
self.add( grid )
self.show_all()
def on_button_clicked( self, button ):
print( "Gtk.Button was clicked" )
w = Window()
Gtk.main()