I have a face recognition project using a camera without any problems.
I now want to do this simultaneously from two cameras.
This is my code for one camera, I haven't any idea on how to employ two cameras for this purpose.
import face_recognition
import cv2
import numpy as np
video_capture = cv2.VideoCapture('rtsp://admin:11111#192.168.1.13:554/mode=real&idc=1&ids=2')
farid_image = face_recognition.load_image_file("farid.jpg")
farid_face_encoding = face_recognition.face_encodings(farid_image)[0]
# Load a second sample picture and learn how to recognize it.
roice_image = face_recognition.load_image_file("roice.jpg")
roice_face_encoding = face_recognition.face_encodings(roice_image)[0]
known_face_encodings = [
farid_face_encoding,
roice_face_encoding
]
known_face_names = [
"farid",
"roice"
]
while True:
ret, frame = video_capture.read()
rgb_frame = frame[:, :, ::-1]
face_locations = face_recognition.face_locations(rgb_frame)
face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)
# Loop through each face in this frame of video
for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
# See if the face is a match for the known face(s)
matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
name = "Unknown"
# Calculate face distance
face_distance = face_recognition.face_distance(known_face_encodings, face_encoding)
# If a match was found in known_face_encodings, just use the first one.
if True in matches:
# first_match_index = matches.index(True)
# Sort nearest distance
name = known_face_names[np.argsort(face_distance)[0]]
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
I can Simply add more cameras using cv2.VideoCapture() module, but how can I make changes to face_recognition in order to work with two cameras?
You could try making this multithreaded. One thread for each camera that does it's own facial recognition on the images that it sees.
They would be acting independently on their respective streams, but you can then get results from both threads to combine the information for improved detection and/or recognition.
Related
So I'm trying out code from this website: https://towardsdatascience.com/real-time-age-gender-and-emotion-prediction-from-webcam-with-keras-and-opencv-bde6220d60a. I'm only interested in the real-time emotion prediction bit, and I use the emotion prediction model provided by the author. Following the setup and cutting out the code I don't need (all of the code was provided in the link), I'm left with this:
import cv2
from PIL import Image
import numpy as np
from mtcnn import MTCNN
import pickle
# load face detector
detector = MTCNN()
# load the model
emotion_model = pickle.load(open('emotion-model-final.pkl', 'rb'))
def rgb2gray(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
def detect_face(img):
mt_res = detector.detect_faces(img)
return_res = []
for face in mt_res:
x, y, width, height = face['box']
center = [x+(width/2), y+(height/2)]
max_border = max(width, height)
# center alignment
left = max(int(center[0]-(max_border/2)), 0)
right = max(int(center[0]+(max_border/2)), 0)
top = max(int(center[1]-(max_border/2)), 0)
bottom = max(int(center[1]+(max_border/2)), 0)
# crop the face
center_img_k = img[top:top+max_border,
left:left+max_border, :]
center_img = np.array(Image.fromarray(center_img_k).resize([224, 224]))
# convert to grey scale then predict using the emotion model
grey_img = np.array(Image.fromarray(center_img_k).resize([48, 48]))
emotion_preds = emotion_model.predict(rgb2gray(grey_img).reshape(1, 48, 48, 1))
# output to the cv2
return_res.append([top, right, bottom, left, sex_preds, age_preds, emotion_preds])
return return_res
# Get a reference to webcam
video_capture = cv2.VideoCapture(0)
emotion_dict = {
0: 'Surprise',
1: 'Happy',
2: 'Disgust',
3: 'Anger',
4: 'Sadness',
5: 'Fear',
6: 'Contempt'
}
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
# Convert the image from BGR color (which OpenCV uses) to RGB color
rgb_frame = frame[:, :, ::-1]
# Find all the faces in the current frame of video
face_locations = detect_face(rgb_frame)
# Display the results
for top, right, bottom, left, emotion_preds in face_locations:
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
cv2.putText(frame, 'Emotion: {}({:.3f})'.format(emotion_dict[np.argmax(emotion_preds)], np.max(emotion_preds)), (left, top-40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36,255,12), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
Running the code doesn't work for me because I get the following error message:
File "C:\Users\<my name>\source\repos\webcamtest\webcamtest\webcamtest.py", line 43, in detect_face
emotion_preds = emotion_model.predict(rgb2gray(grey_img).reshape(1, 48, 48, 1))
File "C:\Users\<my name>\miniconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\<my name>\miniconda3\lib\site-packages\keras\engine\training.py", line 3555, in _check_call_args
fullargspec = self._call_spec.full_argspec
AttributeError: 'Model' object has no attribute '_call_spec'
So it seems like the error leads to keras\engine\training.py. I've done nothing to it other than install it. I'm using Keras 2.11.0, tensorflow 2.11.0 and mtcnn 0.1.1 . What could be the problem?
I am trying to measure the velocity of a water spray.
The code can be seen here:
import cv2
import numpy as np
# Get a VideoCapture object from video and store it in vs
vc = cv2.VideoCapture('spray.avi')
# Read first frame
ret, first_frame = vc.read()
# Scale and resize image
resize_dim = 600
max_dim = max(first_frame.shape)
scale = resize_dim / max_dim
first_frame = cv2.resize(first_frame, None, fx=scale, fy=scale)
# Convert to gray scale
prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
# Create mask
mask = np.zeros_like(first_frame)
# Sets image saturation to maximum
mask[..., 1] = 255
out = cv2.VideoWriter('video.mp4', -1, 1, (600, 600))
while (vc.isOpened()):
# Read a frame from video
ret, frame = vc.read()
# Convert new frame format`s to gray scale and resize gray frame obtained
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.resize(gray, None, fx=scale, fy=scale)
# Calculate dense optical flow by Farneback method
# https://docs.opencv.org/3.0-beta/modules/video/doc/motion_analysis_and_object_tracking.html#calcopticalflowfarneback
flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, pyr_scale=0.5, levels=5, winsize=11, iterations=5,
poly_n=5, poly_sigma=1.1, flags=0)
# Compute the magnitude and angle of the 2D vectors
magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
# Set image hue according to the optical flow direction
mask[..., 0] = angle * 180 / np.pi / 2
# Set image value according to the optical flow magnitude (normalized)
mask[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
# Convert HSV to RGB (BGR) color representation
rgb = cv2.cvtColor(mask, cv2.COLOR_HSV2BGR)
# Resize frame size to match dimensions
frame = cv2.resize(frame, None, fx=scale, fy=scale)
# Open a new window and displays the output frame
dense_flow = cv2.addWeighted(frame, 1, rgb, 2, 0)
cv2.imshow("Dense optical flow", dense_flow)
#cv2.imshow("Initial video", vc )
out.write(dense_flow)
# Update previous frame
prev_gray = gray
# Frame are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
if cv2.waitKey(10) & 0xFF == ord('q'):
break
# The following frees up resources and closes all windows
vc.release()
cv2.destroyAllWindows()
I can have the video of the dense optical flow. Works fine.
(see image here
But I want to find the actual numbers of the velocity profile.
Is it somewhere within the code? And I haven't understood it yet?
Or are there any more additions needed to find them?
Thank you!!
(the code is from here: https://github.com/IRailean/Dense-Optical-Flow/blob/master/OpticalFlow.py
so i am using darkflow to detect objects(hats) in a video. It detects people wearing the hats and draws bounding boxes around the hat in a video. Now I want to save the top right and bottom left coordinates of the detected bounding boxes into a txt or csv file for further processing. I wrote the code in opencv-python. I can display the video and it draws bounding box successfully but i don't know how to save the coordinates of the boxes. Any idea how to do it?
I am using Mark jay's code for my purpose
#import libraries
import cv2
from darkflow.net.build import TFNet
import numpy as np
import time
#load model and weights and threshold
option = {
'model': 'cfg/yolo-5c.cfg',
'load': 'bin/yolo.weights',
'threshold': 0.15,
'gpu': 1.0
}
tfnet = TFNet(option)
#open video file
capture = cv2.VideoCapture('videofile_1080_20fps.avi')
colors = [tuple(255 * np.random.rand(3)) for i in range(5)]
#read video file and set parameters for object detection
while (capture.isOpened()):
stime = time.time()
ret, frame = capture.read()
if ret:
results = tfnet.return_predict(frame)
for color, result in zip(colors, results):
tl = (result['topleft']['x'], result['topleft']['y']) # show top left coordinate
br = (result['bottomright']['x'], result['bottomright']['y']) #show bottom right coordinate
label = result['label'] # show label
frame = cv2.rectangle(frame, tl, br, color, 7)
frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX,
1, (0, 0, 0), 2)
cv2.imshow('frame', frame)
print('FPS {:.1f}'.format(1 / (time.time() - stime)))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
capture.release()
cv2.destroyAllWindows()
break
So as you can see i can display the video and detect object which draws bounding boxes. Now my goal is to save the pixel coordinates of these bounding boxes, just the top left and bottom right one. Any ideas guys?
The 'result' parameter in your code is the set of coordinates. You make a list and append it with the values from the result and then in your 'else' write it to a .txt file.
Cheers!
I'm using the face_recognition library for Python and I'm trying to change the KNN algorithm to run with OpenCV in real time. For that I "merged" two other algorithms provided by the library author (algorithm1, algorithm2).
(Edited: Now it shows the frame until it detect some face, then it crashes)
What I tried so far:
import numpy as np
import cv2
import face_recognition
import pickle
def predict(frame, knn_clf=None, model_path=None, distance_threshold=0.6):
"""
Recognizes faces in given image using a trained KNN classifier
:param knn_clf: (optional) a knn classifier object. if not specified, model_save_path must be specified.
:param model_path: (optional) path to a pickled knn classifier. if not specified, model_save_path must be knn_clf.
:param distance_threshold: (optional) distance threshold for face classification. the larger it is, the more chance
of mis-classifying an unknown person as a known one.
:return: a list of names and face locations for the recognized faces in the image: [(name, bounding box), ...].
For faces of unrecognized persons, the name 'unknown' will be returned.
"""
if knn_clf is None and model_path is None:
raise Exception("Must supply knn classifier either thourgh knn_clf or model_path")
# Load a trained KNN model (if one was passed in)
if knn_clf is None:
with open(model_path, 'rb') as f:
knn_clf = pickle.load(f)
# find face locations from frame
X_face_locations = face_recognition.face_locations(frame)
# If no faces are found in the image, return an empty result.
if len(X_face_locations) == 0:
return []
# Find encodings for faces in the frame
faces_encodings = face_recognition.face_encodings(frame, known_face_locations=X_face_locations)
# Use the KNN model to find the best matches for the test face
closest_distances = knn_clf.kneighbors(faces_encodings, n_neighbors=1)
are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))]
# Predict classes and remove classifications that aren't within the threshold
return [(pred, loc) if rec else ("unknown", loc) for pred, loc, rec in zip(knn_clf.predict(faces_encodings), X_face_locations, are_matches)]
def show_labels_on_webcam(RGBFrame, predictions):
"""
Shows the face recognition results visually.
:param img_path: path to image to be recognized
:param predictions: results of the predict function
:return:
"""
frame = RGBFrame
for name, (top, right, bottom, left) in predictions:
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
top *= 4
right *= 4
bottom *= 4
left *= 4
# Draw a box around the face
print (frame.shape)
print (frame.dtype)
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Get a reference to webcam #0 (the default one)
video_capture = cv2.VideoCapture(0)
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
# Resize frame of video to 1/4 size for faster face recognition processing
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
rgb_small_frame = small_frame[:, :, ::-1]
predictions = predict(rgb_small_frame, model_path="trained_knn_model_1.clf")
# Display results overlaid on webcam video
print (rgb_small_frame.shape)
print (rgb_small_frame.dtype)
show_labels_on_webcam(rgb_small_frame, predictions)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
The error I'm getting:
Traceback (most recent call last):
File "withOpenCV.py", line 91, in <module>
show_labels_on_webcam(rgb_small_frame, predictions)
File "withOpenCV.py", line 62, in show_labels_on_webcam
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
TypeError: Layout of the output array img is incompatible with cv::Mat (step[ndims-1] != elemsize or step[1] != elemsize*nchannels)
If you have any suggestions or see what I'm missing, please let me know! Thanks in advance!
I solved the error by changing the show_labels_on_webcam(rgb_small_frame, predictions) by show_labels_on_webcam(frame, predictions). Thanks to #api55 for the hint!
I am seriously struggling here. I'm using open cv2 and python3. tracking multiple objects of the same color this question is the exact same one I'm asking. But the pages are out of date and the links don't work anymore. I can't find anything else online about it. I can track multiple colors (red object, green object, a blue object, etc) However I cannot for the life of me figure out how to track two red objects.
# import the necessary packages
from collections import deque
import numpy as np
import argparse
import imutils
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=64,
help="max buffer size")
args = vars(ap.parse_args())
# define the lower and upper boundaries of the "green"
# ball in the HSV color space, then initialize the
# list of tracked points
greenLower = (29, 86, 6)
greenUpper = (64, 255, 255)
pts = deque(maxlen=args["buffer"])
# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
camera = cv2.VideoCapture(0)
# otherwise, grab a reference to the video file
else:
camera = cv2.VideoCapture(args["video"])
# keep looping
while True:
# grab the current frame
(grabbed, frame) = camera.read()
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
if args.get("video") and not grabbed:
break
# resize the frame, blur it, and convert it to the HSV
# color space
frame = imutils.resize(frame, width=600)
# blurred = cv2.GaussianBlur(frame, (11, 11), 0)
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# construct a mask for the color "green", then perform
# a series of dilations and erosions to remove any small
# blobs left in the mask
mask = cv2.inRange(hsv, greenLower, greenUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# find contours in the mask and initialize the current
# (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(cnts, key=cv2.contourArea)
I figured that in the line above this one that reads "c = max(cnts, key=cv2.contourArea)" I could simply find the second largest circle and use that one, but once again. I couldn't find anything online about how to do this.
((x, y), radius) = cv2.minEnclosingCircle(c)
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
# then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# update the points queue
pts.appendleft(center)