I have a list of points that almost create a straight line (but they are not perfectly align on that line). I want to create a line that best describes those points.
For example, for points:
points = [(150, 250),(180, 220), (200, 195), (225, 180), (250, 150), (275, 115), (300, 100)]
I want to create line similar to this:
The problem is that sometimes there are points that are very far from that line (outliers). I want to ignore those outliers while creating the line:
How can I create this line?
P.S. this is the code for colab to generate the points:
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
img = np.zeros([400,500,3],dtype=np.uint8)
points = [(150, 250),(180, 225), (200, 200), (225, 100), (250, 150), (275, 115), (300, 100)]
#points = [(150, 250),(180, 220), (200, 195), (225, 180), (250, 150), (275, 115), (300, 100)]
for idx, p in enumerate(points):
img = cv2.circle(img, p, radius=0, color=(0, 0, 255), thickness=10)
text_x, text_y = p
p = round(text_x-20), round(text_y+5)
img = cv2.putText(img=img, text=str(idx), fontFace=cv2.FONT_HERSHEY_SCRIPT_COMPLEX, org=p, fontScale=0.5, color=(0,255,0))
image = cv2.line(img, points[0], points[-1], (255, 0, 255), 1)
cv2_imshow(img)
In my code, I generate the line between first and last element of the list of points, so of course if the last point is outlier, all the line is disrupted:
Thanks for #Christoph Rackwitz's answer, I followed sklearn's doc for RANSAC, and created simple script to calculate the RANSAC (of course that it's need to be polished):
import numpy as np
from matplotlib import pyplot as plt
from sklearn import linear_model, datasets
"""
Add points:
"""
points = [(150, 250),(175, 225), (200, 200), (225, 175), (250, 150), (275, 115), (300, 150)]
Y = []
X = []
for x,y in points:
Y.append(y)
X.append(x)
Y = np.array(Y)
X = np.array(X)
lr = linear_model.LinearRegression()
lr.fit(X.reshape(-1, 1), Y)
# Robustly fit linear model with RANSAC algorithm
ransac = linear_model.RANSACRegressor()
ransac.fit(X.reshape(-1, 1), Y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)
# Predict data of estimated models
line_X = np.arange(X.min(), X.max())[:, np.newaxis]
line_y = lr.predict(line_X)
line_y_ransac = ransac.predict(line_X)
# Compare estimated coefficients
print("Estimated coefficients (true, linear regression, RANSAC):")
print(coef, lr.coef_, ransac.estimator_.coef_)
lw = 2
plt.gca().invert_yaxis() # Mirror points
plt.scatter(
X[inlier_mask], Y[inlier_mask], color="yellowgreen", marker=".", label="Inliers"
)
plt.scatter(
X[outlier_mask], Y[outlier_mask], color="gold", marker=".", label="Outliers"
)
plt.plot(line_X, line_y, color="navy", linewidth=lw, label="Linear regressor")
plt.plot(
line_X,
line_y_ransac,
color="cornflowerblue",
linewidth=lw,
label="RANSAC regressor",
)
plt.legend(loc="lower right")
plt.xlabel("Input")
plt.ylabel("Response")
plt.show()
And I got the following image (which looks great):
Related
I tried to mask image by its color using opencv.
import cv2
import numpy as np
import matplotlib.pyplot as plt
After importing libraries, I load the image
img = cv2.imread('gmaps.jpg')
image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
plt.imshow(image);
Turn the color into hsv
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
plt.imshow(hsv);
Masking process
low_orange = np.array([44, 6, 100])
high_orange = np.array([44, 24, 99])
masking = cv2.inRange(hsv,low_orange, high_orange)
plt.imshow(masking);
The result isn't what I expected.
Image :
Result :
EDIT: I want to mask the building only. Instead I got the result of masking all of the frame.
Using my answer from here I manage to extract the right values for you
Code:
frame = cv2.imread("Xv6gx.png")
blurred_frame = cv2.GaussianBlur(frame, (5, 5), 0)
hsv = cv2.cvtColor(blurred_frame, cv2.COLOR_BGR2HSV)
lower = np.array([4, 0, 7])
upper = np.array([87, 240, 255])
mask = cv2.inRange(hsv, lower, upper)
contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for contour in contours:
area = cv2.contourArea(contour)
if area > 5000:
# -- Draw Option 1 --
cv2.drawContours(frame, contour, -1, (0, 255, 0), 3)
# -- Draw Option 2--
# rect = cv2.boundingRect(contour)
# x, y, w, h = rect
# cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow("Mask", mask)
cv2.imshow("Frame", frame)
cv2.waitKey(0)
Final Results:
I wouldn't expect the low Value (100) to exceed the high Value (99).
Also, OpenCV uses a range of 0..180 for Hue rather than 0..360, so you likely need to divide your 44 by 2.
I need to resample regularly gridded (in lon-lat) data to a new grid with lower resolution and different origin. I though I'd use pyresample.
Problem: I get an obviously wrong spatial location of my results after resampling.
In the following example, I construct a simple 2D array with some spatial grid (defined in sourcegrid which is a pyresample AreaDefinition object) and some mask, to resample it to another targetgrid. The spatial information is lost somewhere in the process, I can't figure out where... any idea?
import numpy as np
from pyresample.geometry import AreaDefinition
from pyresample.kd_tree import resample_nearest
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
# Source data
lonmin = -10; lonmax = 10.; latmin=40.; latmax=60.; nlon = 300; nlat = 250
lon = np.linspace(lonmin, lonmax, nlon); lat = np.linspace(latmin, latmax, nlat)
dlon = lon[1] - lon[0]; dlat = lat[1] - lat[0]
lon2d, lat2d = np.meshgrid(lon, lat)
sourcedata = np.cos(np.deg2rad(lat2d)*100) + np.sin(np.deg2rad(lon2d)*100)
# Introduce a polygon as mask
xpol = [frac*(nlon-1) for frac in (0, 0.5, 0.4, 0.6, 0.9, 0., 0)]
ypol = [frac*(nlat-1) for frac in (0, 0.4, 0.6, 0.5, 1., 1., 0)]
polygon = [xy for xy in zip(xpol, ypol)]
img = Image.new('L', (nlon, nlat), 0)
ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
mask = np.array(img)
xpol = [lon[int(x)] for x in xpol]; ypol = [lat[int(y)] for y in ypol] # translate in lon-lat for plot
sourcedata = np.ma.masked_where(mask, sourcedata)
# Define source and target areas
sourceextent = [lonmin-dlon/2, latmin-dlat/2, lonmax+dlon/2, latmax+dlat/2] # [xmin, ymin, xmax, ymax]
sourceextentforplot = [sourceextent[i] for i in (0,2,1,3)] # [xmin, xmax, ymin, ymax]
targetextent = [lonmin-dlon/2 + 0.12*(lonmax-lonmin), latmin-dlat/2 + 0.24*(latmax-latmin),
lonmin-dlon/2 + 0.78*(lonmax-lonmin), latmin-dlat/2 + 0.91*(latmax-latmin)]
targetextentforplot = [targetextent[i] for i in (0,2,1,3)]
sourcegrid = AreaDefinition(area_id='Grd1', description='Source Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=nlon, height=nlat, area_extent=sourceextent)
# Lower resolution, different origin
targetgrid = AreaDefinition(area_id='Grd2', description='Target Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=123, height=97, area_extent=targetextent)
# Resample sourcedata to newdata
newdata = resample_nearest(sourcegrid, sourcedata, targetgrid, fill_value=None, radius_of_influence=50000)
# Plot
def doplt(ax, data, extent):
ax.coastlines(resolution='50m', color='gray', alpha=1., linewidth=2.)
ax.gridlines(draw_labels=True)
ax.imshow(data, origin='lower', transform=ccrs.PlateCarree(), extent=extent)
ax.plot(xpol, ypol, 'k--', transform=ccrs.PlateCarree())
ax.plot([targetextentforplot[x] for x in (0, 1, 1, 0, 0)], [targetextentforplot[y] for y in (2, 2, 3, 3, 2)],
'r--', lw=3, transform=ccrs.PlateCarree())
ax.set_extent([-12, 12, 38, 62])
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5,10), subplot_kw={'projection': ccrs.PlateCarree()})
doplt(ax1, sourcedata, extent=sourceextentforplot)
ax1.set_title('Source data, target area in red')
doplt(ax2, newdata, extent=targetextentforplot)
ax2.set_title('New data, with wrong spatial ref (or plotting?)')
plt.show()
Note: other suggestions to do the resampling operation than pyresample, ideally with example, are welcome.
So the problem is that you're assuming row 0 is the bottom of the image, but as shown in this example, pyresample uses row 0 as the top. I modified your example to tweak the polygon latitudes as well as using origin='upper' to plot with imshow:
import numpy as np
from pyresample.geometry import AreaDefinition
from pyresample.kd_tree import resample_nearest
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
# Source data
lonmin = -10; lonmax = 10.; latmin=40.; latmax=60.; nlon = 300; nlat = 250
lon = np.linspace(lonmin, lonmax, nlon); lat = np.linspace(latmin, latmax, nlat)
dlon = lon[1] - lon[0]; dlat = lat[1] - lat[0]
lon2d, lat2d = np.meshgrid(lon, lat)
sourcedata = np.hypot(lon2d, lat2d - 50) * (np.cos(np.deg2rad(lat2d)*100) + np.sin(np.deg2rad(lon2d)*100))
# Introduce a polygon as mask
xpol = [frac*(nlon-1) for frac in (0, 0.5, 0.4, 0.6, 0.9, 0., 0)]
ypol = [frac*(nlat-1) for frac in (0, 0.4, 0.6, 0.5, 1., 1., 0)]
polygon = [xy for xy in zip(xpol, ypol)]
img = Image.new('L', (nlon, nlat), 0)
ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
mask = np.array(img)
xpol = [lon[int(x)] for x in xpol]; ypol = [lat[nlat - 1 - int(y)] for y in ypol] # translate in lon-lat for plot
sourcedata = np.ma.masked_where(mask, sourcedata)
# Define source and target areas
sourceextent = [lonmin-dlon/2, latmin-dlat/2, lonmax+dlon/2, latmax+dlat/2] # [xmin, ymin, xmax, ymax]
sourceextentforplot = [sourceextent[i] for i in (0,2,1,3)] # [xmin, xmax, ymin, ymax]
targetextent = [lonmin-dlon/2 + 0.12*(lonmax-lonmin), latmin-dlat/2 + 0.24*(latmax-latmin),
lonmin-dlon/2 + 0.78*(lonmax-lonmin), latmin-dlat/2 + 0.91*(latmax-latmin)]
targetextentforplot = [targetextent[i] for i in (0,2,1,3)]
sourcegrid = AreaDefinition(area_id='Grd1', description='Source Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=nlon, height=nlat, area_extent=sourceextent)
# Lower resolution, different origin
targetgrid = AreaDefinition(area_id='Grd2', description='Target Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=123, height=97, area_extent=targetextent)
# Resample sourcedata to newdata
newdata = resample_nearest(sourcegrid, sourcedata, targetgrid, fill_value=None, radius_of_influence=50000)
# Plot
def doplt(ax, data, extent):
ax.coastlines(resolution='50m', color='gray', alpha=1., linewidth=2.)
ax.gridlines(draw_labels=True)
ax.imshow(data, transform=ccrs.PlateCarree(), extent=extent, norm=plt.Normalize(0, 20), origin='upper')
ax.plot(xpol, ypol, 'k--', transform=ccrs.PlateCarree())
ax.plot([targetextentforplot[x] for x in (0, 1, 1, 0, 0)], [targetextentforplot[y] for y in (2, 2, 3, 3, 2)],
'r--', lw=3, transform=ccrs.PlateCarree())
ax.set_extent([-12, 12, 38, 62])
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5,10), subplot_kw={'projection': ccrs.PlateCarree()})
doplt(ax1, sourcedata, extent=sourceextentforplot)
ax1.set_title('Source data, target area in red')
doplt(ax2, newdata, extent=targetextentforplot)
ax2.set_title('New data, with wrong spatial ref (or plotting?)');
That gives:
I found it helpful to use an image with more variation to be able to line it up better with the source data.
I found out that using a SwathDefinition instead of an AreaDefinition (see doc) solved the problem.
Defining sourcegrid and targetgrid as follows in the original code gives good results:
sourcegrid = SwathDefinition(lons=lon2d, lats=lat2d)
lon2dtarget, lat2dtarget = np.meshgrid(np.linspace(targetextent[0], targetextent[2], 123),
np.linspace(targetextent[1], targetextent[3], 97))
targetgrid = SwathDefinition(lons=lon2dtarget, lats=lat2dtarget)
I have gotten this code from pyimagesearch and tried to run it but when i run the file i get these errors. Can anyone please tell me what is wrong here? I have installed all the needed packages and libraries. All conda packages are up to date. Please look in to the error codes section and have a look if you can figure out what is wrong here.
# USAGE
# python detect_blinks.py --shape-predictor
shape_predictor_68_face_landmarks.dat --video blink_detection_demo.mp4
# python detect_blinks.py --shape-predictor
shape_predictor_68_face_landmarks.dat
# import the necessary packages
from scipy.spatial import distance as dist
from imutils.video import FileVideoStream
from imutils.video import VideoStream
from imutils import face_utils
import numpy as np
import argparse
import imutils
import time
import dlib
import cv2
def eye_aspect_ratio(eye):
# compute the euclidean distances between the two sets of
# vertical eye landmarks (x, y)-coordinates
A = dist.euclidean(eye[1], eye[5])
B = dist.euclidean(eye[2], eye[4])
# compute the euclidean distance between the horizontal
# eye landmark (x, y)-coordinates
C = dist.euclidean(eye[0], eye[3])
# compute the eye aspect ratio
ear = (A + B) / (2.0 * C)
# return the eye aspect ratio
return ear
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--shape-predictor", required=True,
help="path to facial landmark predictor")
ap.add_argument("-v", "--video", type=str, default="",
help="path to input video file")
args = vars(ap.parse_args())
# define two constants, one for the eye aspect ratio to indicate
# blink and then a second constant for the number of consecutive
# frames the eye must be below the threshold
EYE_AR_THRESH = 0.3
EYE_AR_CONSEC_FRAMES = 3
# initialize the frame counters and the total number of blinks
COUNTER = 0
TOTAL = 0
# initialize dlib's face detector (HOG-based) and then create
# the facial landmark predictor
print("[INFO] loading facial landmark predictor...")
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(args["shape_predictor"])
# grab the indexes of the facial landmarks for the left and
# right eye, respectively
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
# start the video stream thread
print("[INFO] starting video stream thread...")
vs = FileVideoStream(args["video"]).start()
fileStream = True
# vs = VideoStream(src=0).start()
# vs = VideoStream(usePiCamera=True).start()
# fileStream = False
time.sleep(1.0)
# loop over frames from the video stream
while True:
# if this is a file video stream, then we need to check if
# there any more frames left in the buffer to process
if fileStream and not vs.more():
break
# grab the frame from the threaded video file stream, resize
# it, and convert it to grayscale
# channels)
frame = vs.read()
frame = imutils.resize(frame, width=450)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# detect faces in the grayscale frame
rects = detector(gray, 0)
# loop over the face detections
for rect in rects:
# determine the facial landmarks for the face region, then
# convert the facial landmark (x, y)-coordinates to a NumPy
# array
shape = predictor(gray, rect)
shape = face_utils.shape_to_np(shape)
# extract the left and right eye coordinates, then use the
# coordinates to compute the eye aspect ratio for both eyes
leftEye = shape[lStart:lEnd]
rightEye = shape[rStart:rEnd]
leftEAR = eye_aspect_ratio(leftEye)
rightEAR = eye_aspect_ratio(rightEye)
# average the eye aspect ratio together for both eyes
ear = (leftEAR + rightEAR) / 2.0
# compute the convex hull for the left and right eye, then
# visualize each of the eyes
leftEyeHull = cv2.convexHull(leftEye)
rightEyeHull = cv2.convexHull(rightEye)
cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)
# check to see if the eye aspect ratio is below the blink
# threshold, and if so, increment the blink frame counter
if ear < EYE_AR_THRESH:
COUNTER += 1
# otherwise, the eye aspect ratio is not below the blink
# threshold
else:
# if the eyes were closed for a sufficient number of
# then increment the total number of blinks
if COUNTER >= EYE_AR_CONSEC_FRAMES:
TOTAL += 1
# reset the eye frame counter
COUNTER = 0
# draw the total number of blinks on the frame along with
# the computed eye aspect ratio for the frame
cv2.putText(frame, "Blinks: {}".format(TOTAL), (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(frame, "EAR: {:.2f}".format(ear), (300, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
# show the frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
the errors are
usage: detect_blinks.py [-h] -p SHAPE_PREDICTOR [-v VIDEO]
detect_blinks.py: error: the following arguments are required: -p/--shape-
predictor
An exception has occurred, use %tb to see the full traceback.
SystemExit: 2
%tb
Traceback (most recent call last):
File "<ipython-input-6-55db51806586>", line 1, in <module>
runfile('C:/Users/Rayhan/Downloads/Compressed/blink-detection/blink-detection/detect_blinks.py', wdir='C:/Users/Rayhan/Downloads/Compressed/blink-detection/blink-detection')
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 668, in runfile
execfile(filename, namespace)
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Rayhan/Downloads/Compressed/blink-detection/blink-detection/detect_blinks.py", line 39, in <module>
args = vars(ap.parse_args())
File "C:\ProgramData\Anaconda3\lib\argparse.py", line 1734, in parse_args
args, argv = self.parse_known_args(args, namespace)
File "C:\ProgramData\Anaconda3\lib\argparse.py", line 1766, in parse_known_args
namespace, args = self._parse_known_args(args, namespace)
File "C:\ProgramData\Anaconda3\lib\argparse.py", line 2001, in _parse_known_args
', '.join(required_actions))
File "C:\ProgramData\Anaconda3\lib\argparse.py", line 2393, in error
self.exit(2, _('%(prog)s: error: %(message)s\n') % args)
File "C:\ProgramData\Anaconda3\lib\argparse.py", line 2380, in exit
_sys.exit(status)
SystemExit: 2
This is my code which works :
import numpy as np
import cv2
import dlib
from scipy.spatial import distance as dist
PREDICTOR_PATH = "/home/erp-next/Downloads/shape_predictor_68_face_landmarks.dat"
# FULL_POINTS = list(range(0, 68))
# FACE_POINTS = list(range(17, 68))
# JAWLINE_POINTS = list(range(0, 17))
# RIGHT_EYEBROW_POINTS = list(range(17, 22))
# LEFT_EYEBROW_POINTS = list(range(22, 27))
# NOSE_POINTS = list(range(27, 36))
RIGHT_EYE_POINTS = list(range(36, 42))
LEFT_EYE_POINTS = list(range(42, 48))
# MOUTH_OUTLINE_POINTS = list(range(48, 61))
# MOUTH_INNER_POINTS = list(range(61, 68))
EYE_AR_THRESH = 0.2
EYE_AR_CONSEC_FRAMES = 2
frame_c=0
COUNTER_LEFT = 0
TOTAL_LEFT = 0
COUNTER_RIGHT = 0
TOTAL_RIGHT = 0
def eye_aspect_ratio(eye):
# compute the euclidean distances between the two sets of
# vertical eye landmarks (x, y)-coordinates
A = dist.euclidean(eye[1], eye[5])
B = dist.euclidean(eye[2], eye[4])
# compute the euclidean distance between the horizontal
# eye landmark (x, y)-coordinates
C = dist.euclidean(eye[0], eye[3])
# compute the eye aspect ratio
ear = (A + B) / (2.0 * C)
# return the eye aspect ratio
return ear
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(PREDICTOR_PATH)
# Start capturing the WebCam
video_capture = cv2.VideoCapture(0)
while True:
global frame_c
print(frame_c)
frame_c +=1
ret, frame = video_capture.read()
if ret:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
rects = detector(gray, 0)
for rect in rects:
x = rect.left()
y = rect.top()
# x1 = rect.right()
# y1 = rect.bottom()
landmarks = np.matrix([[p.x, p.y] for p in predictor(frame, rect).parts()])
left_eye = landmarks[LEFT_EYE_POINTS]
right_eye = landmarks[RIGHT_EYE_POINTS]
left_eye_hull = cv2.convexHull(left_eye)
right_eye_hull = cv2.convexHull(right_eye)
cv2.drawContours(frame, [left_eye_hull], -1, (0, 255, 0), 1)
cv2.drawContours(frame, [right_eye_hull], -1, (0, 255, 0), 1)
ear_left = eye_aspect_ratio(left_eye)
ear_right = eye_aspect_ratio(right_eye)
cv2.putText(frame, "E.A.R. Left : {:.2f}".format(ear_left), (300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
cv2.putText(frame, "E.A.R. Right: {:.2f}".format(ear_right), (300, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
if ear_left < EYE_AR_THRESH:
COUNTER_LEFT += 1
else:
if COUNTER_LEFT >= EYE_AR_CONSEC_FRAMES:
TOTAL_LEFT += 1
print("Left eye winked")
COUNTER_LEFT = 0
if ear_right < EYE_AR_THRESH:
COUNTER_RIGHT += 1
else:
if COUNTER_RIGHT >= EYE_AR_CONSEC_FRAMES:
TOTAL_RIGHT += 1
print("Right eye winked")
COUNTER_RIGHT = 0
cv2.putText(frame, "Wink Left : {}".format(TOTAL_LEFT), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
cv2.putText(frame, "Wink Right: {}".format(TOTAL_RIGHT), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
cv2.imshow("Faces found", frame)
ch = 0xFF & cv2.waitKey(1)
if ch == ord('q'):
break
cv2.destroyAllWindows()
Above code uses "shape_predictor_68_face_landmarks.dat" library which plotes 68 predefine points on face.
using those points it will track eye and using euclidean distance algorithm it check if eye is blinked or not.
try this.
I have been dealing with a project on ML in which we are willing to build an offline application. So we are not using the API's for this project, instead we two models. One for Object classification and the other for Gender classification & emotion recognition. Now I have problem with integrating the two models into one. Both the models are in OpenCV.
Code for deep_learning_object_detecti*on
# USAGE
# python deep_learning_object_detection.py --image images/example_01.jpg \
#--prototxt MobileNetSSD_deploy.prototxt.txt --model MobileNetSSD_deploy.caffemodel
# import the necessary packages
import numpy as np
import argparse
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# load the input image and construct an input blob for the image
# by resizing to a fixed 300x300 pixels and then normalizing it
# (note: normalization is done via the authors of the MobileNet SSD
# implementation)
image = cv2.imread(args["image"])
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
print("[INFO] computing object detections...")
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the `detections`,
# then compute the (x, y)-coordinates of the bounding box for
# the object
idx = int(detections[0, 0, i, 1])
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# display the prediction
label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
print("[INFO] {}".format(label))
cv2.rectangle(image, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(image, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
# show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)
Code for Gender and emotion recognition
import sys
import argparse
import cv2
from keras.models import load_model
import numpy as np
from utils.datasets import get_labels
from utils.inference import detect_faces
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.inference import load_image
from utils.preprocessor import preprocess_input
# parameters for loading data and images
image_path = sys.argv[1]
detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml'
emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5'
gender_model_path = '../trained_models/gender_models/simple_CNN.81-0.96.hdf5'
emotion_labels = get_labels('fer2013')
gender_labels = get_labels('imdb')
font = cv2.FONT_HERSHEY_SIMPLEX
# hyper-parameters for bounding boxes shape
gender_offsets = (30, 60)
gender_offsets = (10, 10)
emotion_offsets = (20, 40)
emotion_offsets = (0, 0)
# loading models
face_detection = load_detection_model(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
gender_classifier = load_model(gender_model_path, compile=False)
# getting input model shapes for inference
emotion_target_size = emotion_classifier.input_shape[1:3]
gender_target_size = gender_classifier.input_shape[1:3]
# loading images
rgb_image = load_image(image_path, grayscale=False)
gray_image = load_image(image_path, grayscale=True)
gray_image = np.squeeze(gray_image)
gray_image = gray_image.astype('uint8')
faces = detect_faces(face_detection, gray_image)
for face_coordinates in faces:
x1, x2, y1, y2 = apply_offsets(face_coordinates, gender_offsets)
rgb_face = rgb_image[y1:y2, x1:x2]
x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets)
gray_face = gray_image[y1:y2, x1:x2]
try:
rgb_face = cv2.resize(rgb_face, (gender_target_size))
gray_face = cv2.resize(gray_face, (emotion_target_size))
except:
continue
rgb_face = preprocess_input(rgb_face, False)
rgb_face = np.expand_dims(rgb_face, 0)
gender_prediction = gender_classifier.predict(rgb_face)
gender_label_arg = np.argmax(gender_prediction)
gender_text = gender_labels[gender_label_arg]
gray_face = preprocess_input(gray_face, True)
gray_face = np.expand_dims(gray_face, 0)
gray_face = np.expand_dims(gray_face, -1)
emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face))
emotion_text = emotion_labels[emotion_label_arg]
if gender_text == gender_labels[0]:
color = (0, 0, 255)
else:
color = (255, 0, 0)
draw_bounding_box(face_coordinates, rgb_image, color)
draw_text(face_coordinates, rgb_image, gender_text, color, 0, -20, 1, 2)
draw_text(face_coordinates, rgb_image, emotion_text, color, 0, -50, 1, 2)
bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
cv2.imwrite('../images/predicted_test_image.png', bgr_image)
How to integrate these two models into a single model.
Thanks in advance.
This question already has answers here:
Animate points with labels with matplotlib
(3 answers)
Closed 5 years ago.
I made this animation using matplotlib and it is working properly, however, i need to add some animated labels to be moving with its corresponding points.
The first label to be referring to the intersection point between the circle and the horizontal line from the centre of the ellipse and the other text label is to be in the middle of the inclined line annotating its length.
I tried some ideas but nothing worked properly. Any ideas?
screenshot
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
fig = plt.figure()
ax = fig.add_subplot(111, autoscale_on=True, xlim=(-6, 6), ylim=(-7, 17))
# ax.grid()
line, = ax.plot([], [], 'k', lw=1)
line2, = ax.plot([], [], 'k--', lw=1)
a,b = 3,2
x,y = list(),list()
x1 =np.array([item/10 for item in range(-30,31)])
y1 = np.sqrt(b**2 * (1-(x1**2 / a**2)))
x =list(x1)+[-item for item in list(x1)]
y =list(y1)+[-item for item in list(y1)]
plt.plot(x, y, 'k:')
plt.plot((0,0), (0,15), 'k--')
ax.annotate('$A$', xy=(0,15), xytext=(-10, 10),color='b',
textcoords='offset points')
ax.annotate('$O$', xy=(0,-1), xytext=(-10, 10),color='b',ha='center',
textcoords='offset points')
ax.annotate('$4a$', xy=(0,7), xytext=(-10, 10),color='b',ha='center',
textcoords='offset points', family='sans serif')
def animate(i):
thisx = [0, x[i]]
thisy = [15, y[i]]
xx = [x[i], 0]
yy = [y[i], 0]
line.set_data(thisx, thisy)
line2.set_data(xx, yy)
return line, line2
ani = animation.FuncAnimation(fig, animate, np.arange(0, len(x)), interval=20, blit=False)
ax.annotate('$P$', xy=(3,0), xytext=(0, 0),color='b',ha='center',
textcoords='offset points', family='sans serif', style='italic')
plt.show()
# ani.save('circular_motion.mp4', fps=20)
#
plt.close()
You can alter the annotation properties in the same way that you alter the line properties. Just store the Annotation object that is returned from the ax.annotation command and then update its position in the animate function. Note that the function set_position does not work properly, as was also noted in here, therefore you have to use the xy attribute.
Furthermore, I noticed that your animation runs faster when you y values are close to zero. You can fix that (if it needs fixing) by defining a vector of angles and computing the xy coordinates from that. I took the freedom to alter your code to show what I mean.
About the length of the inclined line, I annotated it here as L, as you don't state what the length of the distance OP is, but I guess that you can fill that in yourself.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
fig = plt.figure()
ax = fig.add_subplot(111, autoscale_on=True, xlim=(-6, 6), ylim=(-7, 17))
# ax.grid()
line, = ax.plot([], [], 'k', lw=1)
line2, = ax.plot([], [], 'k--', lw=1)
a,b = 3,2
x,y = list(),list()
z = 15
phi = np.linspace(0,2*np.pi,100)
x = a * np.cos(phi)
y = -b * np.sin(phi)
plt.plot(x, y, 'k:')
plt.plot((0,0), (0,z), 'k--')
ax.annotate('$A$', xy=(0,15), xytext=(-10, 10),color='b',
textcoords='offset points')
ax.annotate('$O$', xy=(0,-1), xytext=(-10, 10),color='b',ha='center',
textcoords='offset points')
ax.annotate('$4a$', xy=(0,7), xytext=(-10, 10),color='b',ha='center',
textcoords='offset points', family='sans serif')
def animate(i):
thisx = [0, x[i]]
thisy = [z, y[i]]
xx = [x[i], 0]
yy = [y[i], 0]
line.set_data(thisx, thisy)
line2.set_data(xx, yy)
P.xy = (x[i]*1.05,y[i])
L.xy = ((x[i]/2)*1.05, z/2+y[i]/2)
return line, line2, P, L
ani = animation.FuncAnimation(fig, animate, np.arange(0, len(x)), interval=20, blit=False)
P = ax.annotate('$P$', xy=(a,0), xytext=(0, 0),color='b',ha='center',
textcoords='offset points', family='sans serif', style='italic')
L = ax.annotate('$L$', xy=(a/2,z/2), xytext=(0, 0),color='b',ha='center',
textcoords='offset points', family='sans serif', style='italic')
plt.show()
# ani.save('circular_motion.mp4', fps=20)
#
plt.close()
Hope this helps.