Related
I am trying to do the code for predicting Age, gender and emotion using CNN but i keep getting the above error.
I have a function for taking web cam input which then passes the detected frame to another function which is giving the error.
Here is the code for face detection:
def detect_face(img):
mt_res = detector.detect_faces(img)
return_res = []
for face in mt_res:
x, y, width, height = face['box']
center = [x+(width/2), y+(height/2)]
max_border = max(width, height)
# center alignment
left = max(int(center[0]-(max_border/2)), 0)
right = max(int(center[0]+(max_border/2)), 0)
top = max(int(center[1]-(max_border/2)), 0)
bottom = max(int(center[1]+(max_border/2)), 0)
# crop the face
center_img_k = img[top:top+max_border,
left:left+max_border, :]
center_img = np.array(Image.fromarray(center_img_k).resize([224, 224]))
print(center_img)
# create predictions
sex_preds = sex_model.predict(center_img.reshape(1,224,224,3))[0][0]
age_preds = age_model.predict(center_img.reshape(1,224,224,3))[0][0]
# convert to grey scale then predict using the emotion model
grey_img = np.array(Image.fromarray(center_img_k).resize([48, 48]))
emotion_preds = emotion_model.predict(rgb2gray(grey_img).reshape(1, 48, 48, 1))
# output to the cv2
return_res.append([top, right, bottom, left, sex_preds, age_preds, emotion_preds])
return return_res
Here is the code of Web Cam input:
# Get a reference to webcam
video_capture = cv2.VideoCapture(0)
emotion_dict = {
0: 'Surprise',
1: 'Happy',
2: 'Disgust',
3: 'Anger',
4: 'Sadness',
5: 'Fear',
6: 'Contempt'
}
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
# Convert the image from BGR color (which OpenCV uses) to RGB color
rgb_frame = frame[:, :, ::-1]
# Find all the faces in the current frame of video
face_locations = detect_face(rgb_frame)
# Display the results
for top, right, bottom, left, sex_preds, age_preds, emotion_preds in face_locations:
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
sex_text = 'Female' if sex_preds > 0.5 else 'Male'
cv2.putText(frame, 'Sex: {}({:.3f})'.format(sex_text, sex_preds), (left, top-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36,255,12), 1)
cv2.putText(frame, 'Age: {:.3f}'.format(age_preds), (left, top-25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36,255,12), 1)
cv2.putText(frame, 'Emotion: {}({:.3f})'.format(emotion_dict[np.argmax(emotion_preds)], np.max(emotion_preds)), (left, top-40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36,255,12), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
Versions:
Tensorflow & keras == 2.10.0
compositing a png into an MP4 video creates a black border around the edge.
This is using moviepy 1.0.0
Code below reproduces the MP4 with the attached red text png.
import numpy as np
import moviepy.editor as mped
def composite_txtpng_on_colour():
bg_color = mped.ColorClip(size=[400, 300], color=np.array([0, 255, 0]).astype(np.uint8),
duration=2).set_position((0, 0))
text_png_postition = [5, 5]
text_png = mped.ImageClip("./txtpng.png", duration=3).set_position((text_png_postition))
canvas_size = bg_color.size
stacked_clips = mped.CompositeVideoClip([bg_color, text_png], size=canvas_size).set_duration(2)
stacked_clips.write_videofile('text_with_black_border_video.mp4', fps=24)
composite_txtpng_on_colour()
The result is an MP4 that can be played in VLC player. A screenshot of the black edge can be seen below:-
Any suggestions to remove the black borders would be much appreciated.
Update: It looks like moviepy does a blit instead of alpha compositing.
def blit(im1, im2, pos=None, mask=None, ismask=False):
""" Blit an image over another. Blits ``im1`` on ``im2`` as position ``pos=(x,y)``, using the
``mask`` if provided. If ``im1`` and ``im2`` are mask pictures
(2D float arrays) then ``ismask`` must be ``True``.
"""
if pos is None:
pos = [0, 0]
# xp1,yp1,xp2,yp2 = blit area on im2
# x1,y1,x2,y2 = area of im1 to blit on im2
xp, yp = pos
x1 = max(0, -xp)
y1 = max(0, -yp)
h1, w1 = im1.shape[:2]
h2, w2 = im2.shape[:2]
xp2 = min(w2, xp + w1)
yp2 = min(h2, yp + h1)
x2 = min(w1, w2 - xp)
y2 = min(h1, h2 - yp)
xp1 = max(0, xp)
yp1 = max(0, yp)
if (xp1 >= xp2) or (yp1 >= yp2):
return im2
blitted = im1[y1:y2, x1:x2]
new_im2 = +im2
if mask is None:
new_im2[yp1:yp2, xp1:xp2] = blitted
else:
mask = mask[y1:y2, x1:x2]
if len(im1.shape) == 3:
mask = np.dstack(3 * [mask])
blit_region = new_im2[yp1:yp2, xp1:xp2]
new_im2[yp1:yp2, xp1:xp2] = (1.0 * mask * blitted + (1.0 - mask) * blit_region)
return new_im2.astype('uint8') if (not ismask) else new_im2
and so, Rotem is right.
new_im2[yp1:yp2, xp1:xp2] = (1.0 * mask * blitted + (1.0 - mask) * blit_region)
is
(alpha * img_rgb + (1.0 - alpha) * bg)
and this is how moviepy composites. And this is why we see black at the edges.
The main issue is the YUV420 color sub-sumpling, but it's also a result of compression artifacts, and imperfect "Text red" image.
The image imperfection is just in the alpha channel.
There are alpha (transparency) values that are not 255 and not 0 (semi-transparnt) pixels around the text.
The following code sample corrects it, and show the difference (using OpenCV):
orig_img = cv2.imread('txtpng.png', cv2.IMREAD_UNCHANGED)
img = orig_img.copy()
img[(img != 255) & (img != 0)] = 255 # Keep only two values: 0 and 255
cv2.imwrite('txtpng2.png', img) # Write img to txtpng2.png
cv2.imshow('alpha diff', cv2.absdiff(orig_img[:,:,3], img[:,:,3])*100) # Show the difference in alpha channels
cv2.waitKey()
cv2.destroyAllWindows()
The above code keeps only two values: 0 and 255 in img.
Result ('alpha diff'):
As you can see there are differences.
Setting codec parameters:
For reference I created an uncompressed AVI video file:
# Save uncompressed AVI as reference
stacked_clips.write_videofile('text_with_black_border_video.avi', fps=24, codec='rawvideo', ffmpeg_params=['-pix_fmt', 'bgr24'])
I also tried to select H.264 codec with yuv444p pixel format, but for some reason it's not working.
I have selected H.265 codec instead.
Using ffmpeg_params, I also set '-crf', '10' for almost lossless video compression:
stacked_clips.write_videofile('text_with_black_border_video.mp4', fps=24, codec='libx265', ffmpeg_params=['-pix_fmt', 'yuv444p', '-crf', '10'])
Here is the complete code sample:
import numpy as np
import moviepy.editor as mped
import cv2
orig_img = cv2.imread('txtpng.png', cv2.IMREAD_UNCHANGED)
img = orig_img.copy()
img[(img != 255) & (img != 0)] = 255 # Keep only two values: 0 and 255
cv2.imwrite('txtpng2.png', img)
cv2.imshow('img', img)
cv2.imshow('alpha diff', cv2.absdiff(orig_img[:,:,3], img[:,:,3])*100) # Show the difference in alpha channels
cv2.waitKey()
cv2.destroyAllWindows()
def composite_txtpng_on_colour():
bg_color = mped.ColorClip(size=[400, 300], color=np.array([0, 255, 0]).astype(np.uint8),
duration=2).set_position((0, 0))
text_png_postition = [5, 5]
text_png = mped.ImageClip('txtpng2.png', duration=3).set_position((text_png_postition))
canvas_size = bg_color.size
stacked_clips = mped.CompositeVideoClip([bg_color, text_png], size=canvas_size).set_duration(2)
stacked_clips.write_videofile('text_with_black_border_video.mp4', fps=24, codec='libx265', ffmpeg_params=['-pix_fmt', 'yuv444p', '-crf', '10'])
# Save uncompressed AVI as reference
stacked_clips.write_videofile('text_with_black_border_video.avi', fps=24, codec='rawvideo', ffmpeg_params=['-pix_fmt', 'bgr24'])
composite_txtpng_on_colour()
Result (text_with_black_border_video.mp4):
Reference (uncompressed text_with_black_border_video.avi):
Magnified part:
Note:
I am using moviepy version 1.03
I figured out why H.264 codec is not working with pixel format yuv444p.
We can add '-report' to the list of ffmpeg_params:
stacked_clips.write_videofile('text_with_black_border_video.mp4', fps=24, codec='libx264', ffmpeg_params=['-pix_fmt', 'yuv444p', '-crf', '10', '-report'])
The logged report begins with:
... \\lib\\site-packages\\imageio_ffmpeg\\binaries\\ffmpeg-win64-v4.2.2.exe" -y -loglevel error -f rawvideo -vcodec rawvideo -s 400x300 -pix_fmt rgb24 -r 24.00 -an -i - -vcodec libx264 -preset medium -pix_fmt yuv444p -crf 10 -report -pix_fmt yuv420p text_with_black_border_video.mp4
The log shows that moviepy added -pix_fmt yuv420p argument.
pix_fmt argument is added twice: -pix_fmt yuv444p -pix_fmt yuv420p.
The yuv420p argument "wins".
Update:
Way to keep the edges slightly blurred:
The edges color is not back, but dark green.
The edges color is a result of Alpha compositing in RGB color space.
I suppose the compositing is performed using the simple formula:
dst_img = alpha*img_rgb + (1-alpha)*bg
Where alpha = img[:,:,0:3]/255.
When applying the above formula we are getting the following image:
Text edge color is dark green.
Suggested solution:
Apply alpha compositing in LAB color space.
Advantage:
Unlike the RGB and CMYK color models, CIELAB is designed to approximate human vision.
Linear operations in LAB color space are precepted as (approximate) linear by human vision.
Here is a code sample for alpha compositing in LAB color space:
img = cv2.imread('txtpng.png', cv2.IMREAD_UNCHANGED)
img2 = np.zeros((300, 400, 4), np.uint8)
img2[(300-img.shape[0])//2:(300+img.shape[0])//2, (400-img.shape[1])//2:(400+img.shape[1])//2, :] = img
alpha = img2[:, :, 3].astype(np.float64)/255 # Convert alpha to range [0, 1]
alpha = np.dstack((alpha, alpha, alpha)) # Duplicate alpha to 3 channels
img2 = img2[:, :, 0:3] # Only BGR without alpha
bg = np.full_like(img2, (0, 255, 0)) # Green background
bg = cv2.cvtColor(bg, cv2.COLOR_BGR2LAB)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2LAB)
composed_img = (img2.astype(np.float64)*alpha + bg.astype(np.float64)*(1-alpha)).astype(np.uint8)
composed_img = cv2.cvtColor(composed_img, cv2.COLOR_LAB2BGR)
cv2.imwrite('composed_img.png', composed_img) # Store the result
Result:
Text edge color looks better.
Note:
I couldn't find any papers about alpha compositing in LAB color space (but I didn't look so hard).
Don't use on_color to make a background color for your CompositeVideoClip, or else, edges will be black. Use this helper function to generate a color clip instead:
def get_color_video_clip(color, size, duration):
image = Image.new("RGB", size, color)
image_array = numpy.array(image)
return mpy.ImageClip(image_array, duration=duration)
Also, as a sanity check, play the video back in different media players. On Mac, IINA made edges gray for me, while QuickTime Player had no such issues.
I am loading several images will go over my face and I am having difficulty getting the image to go over the square for face created. I have looked at a many resources , but for some reason I am receiving an error when attempting to follow their method.
Every time I do so , I receive an error
ValueError: could not broadcast input array from shape (334,334,3) into shape (234,234,3)
I think the images might be too large, however I tried to resize them to see if they will fit to no avail.
here is my code:
import cv2
import sys
import logging as log
import datetime as dt
from time import sleep
import os
import random
from timeit import default_timer as timer
cascPath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascPath)
#log.basicConfig(filename='webcam.log',level=log.INFO)
video_capture = cv2.VideoCapture(0)
anterior = 0
#s_img = cv2.imread("my.jpg")
increment = 0
for filename in os.listdir("Faces/"):
if filename.endswith(".png"):
FullFile = (os.path.join("Faces/", filename))
#ret, frame = video_capture.read()
frame = cv2.imread(FullFile, cv2.IMREAD_UNCHANGED)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale( gray,scaleFactor=1.1, minNeighbors=5, minSize=(30, 30) )
edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
for (x, y, w, h) in faces:
roi_color = frame[y:( y ) + ( h ), x:x + w]
status = cv2.imwrite('export/faces_detected'+ str( increment ) +'.png', roi_color)
increment = increment + 1
else:
continue
masks = []
for filename in os.listdir("export/"):
if filename.endswith(".png"):
FullFile = (os.path.join("export/", filename))
s_img = cv2.imread(FullFile)
masks.append(s_img)
Start = timer()
End = timer()
MasksSize = len(masks)
nrand = random.randint(0, MasksSize -1 )
increment = 0
while True:
if not video_capture.isOpened():
print('Unable to load camera.')
sleep(5)
pass
# Capture frame-by-frame
ret, frame = video_capture.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30)
)
edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
if (End - Start) > 3:
Start = timer()
End = timer()
nrand = random.randint(0, MasksSize -1 )
# -75 and +20 added to fit my face
cv2.rectangle(frame, (x, y - 75), (x+w, y+h+20), (0, 255, 0), 2)
s_img = masks[nrand]
increment = increment + 1
#maskresize = cv2.resize(s_img, (150, 150))
#frame[y:y+s_img.shape[0] , x:x+s_img.shape[1]] = s_img # problem occurs here with
# ValueError: could not broadcast input array from shape (334,334,3) into shape (234,234,3)
# I assume I am inserting somethign too big?
End = timer()
if anterior != len(faces):
anterior = len(faces)
#log.info("faces: "+str(len(faces))+" at "+str(dt.datetime.now()))
# Display the resulting frame
cv2.imshow('Video', frame)
#cv2.imshow('Video', cartoon)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Display the resulting frame
cv2.imshow('Video', frame)
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
In the following line,
frame[y:y+s_img.shape[0] , x:x+s_img.shape[1]] = s_img
you are trying to attempt to assign s_img to frame[y:y+s_img.shape[0] , x:x+s_img.shape[1]] which are of different shapes.
You can check the shapes of the two by printing the shape (it will be the same as the shapes mentioned in the error).
Try reshaping s_img to the same shape and then try to assign.
Refer to this link:https://www.geeksforgeeks.org/image-resizing-using-opencv-python/
I used this function to resize the image to scale.
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
Then later on called
r= image_resize(s_img, height = h, width=w)
frame[y:y+r.shape[0] , x:x+r.shape[1]] = r
Answer taken from here too:
Resize an image without distortion OpenCV
I am capturing the frames in real time from various cameras. The output of each camera is displayed in a window with cv2.imshow. What I want to achieve now is that all the windows are concatenated forming a grid/matrix, for example, if I have 4 cameras, that the windows show me concatenated in 2x2.
Below I attach the code of what I have. Which allows me to capture the frames of the different cameras that I have, but I am not being able to do the above.
cap = []
ret = []
frame = []
final = ""
i = 0
cap.append(cv2.VideoCapture(0))
cap.append(cv2.VideoCapture(1))
cap.append(cv2.VideoCapture(2))
number_cameras = len(cap)
# I initialize values
for x in range(number_cameras):
ret.append(x)
frame.append(x)
while(True):
# I capture frame by frame from each camera
ret[i], frame[i] = cap[i].read()
if i == number_cameras-1:
final = cv2.hconcat([cv2.resize(frame[x], (400, 400)) for x in range(number_cameras)])
cv2.namedWindow('frame')
cv2.moveWindow('frame', 0, 0)
# I show the concatenated outputs
cv2.imshow('frame', final)
i = 0
else:
i = i + 1
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# I release the cameras
for x in range(number_cameras):
cap[x].release()
cv2.destroyAllWindows()
You can construct a frame out of 4 frames by resizing the frames to half their width and height. Then create an empty frame and fill it according to the resized width and height.
Since I don't currently have 4 cameras I simulating this using a single camera.
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
while(True):
ret, frame = cap.read()
#find half of the width
w = int(frame.shape[1] * 50 / 100)
#find half of the height
h = int(frame.shape[0] * 50 / 100)
dim = (w, h)
#resize the frame to half it size
frame =cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
#create an empty frame
output = np.zeros((h*2,w*2,3), np.uint8)
#place a frame at the top left
output[0:h, 0:w] = frame
#place a frame at the top right
output[0:h, w:w * 2] = frame
#place a frame at the bottom left
output[h:h * 2, w:w * 2] = frame
#place a frame at the bottom right
output[h:h * 2, 0:w] = frame
cv2.imshow("Output", output)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
cv2.destroyAllWindows()
I am trying to display an image over another image at a particular co-ordinates. I have detected the aruco markers using the webcam and I want to display another image over the aruco marker. The aruco marker can be moved and the overlaying image should move along with the marker.
There is various draw functions and to input text into the image. I have tried image overlay and image homography.
I can obtain the co-ordinates for the corners.
Is there any function to insert the image at those co-ordinates?
import cv2
import cv2.aruco as aruco
import glob
markerLength = 0.25
cap = cv2.VideoCapture(0)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
objp = np.zeros((6*7,3), np.float32)
objp[:,:2] = np.mgrid[0:7,0:6].T.reshape(-1,2)
objpoints = []
imgpoints = []
images = glob.glob('calib_images/*.jpg')
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (7,6),None)
if ret == True:
objpoints.append(objp)
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
imgpoints.append(corners2)
img = cv2.drawChessboardCorners(img, (7,6), corners2,ret)
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None)
calibrationFile = "calibrationFileName.xml"
calibrationParams = cv2.FileStorage(calibrationFile, cv2.FILE_STORAGE_READ)
camera_matrix = calibrationParams.getNode("cameraMatrix").mat()
dist_coeffs = calibrationParams.getNode("distCoeffs").mat()
while(True):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
aruco_dict = aruco.Dictionary_get(aruco.DICT_6X6_250)
arucoParameters = aruco.DetectorParameters_create()
corners, ids, rejectedImgPoints = aruco.detectMarkers(gray, aruco_dict, parameters=arucoParameters)
if np.all(ids != None):
rvec, tvec, _ = aruco.estimatePoseSingleMarkers(corners, markerLength, mtx, dist)
axis = aruco.drawAxis(frame, mtx, dist, rvec, tvec, 0.3)
print(ids)
display = aruco.drawDetectedMarkers(axis, corners)
display = np.array(display)
else:
display = frame
cv2.imshow('Display',display)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()```
To replace a part of image
import cv2
import numpy as np
img1 = cv2.imread('Desert.jpg')
img2 = cv2.imread('Penguins.jpg')
img3 = img1.copy()
# replace values at coordinates (100, 100) to (399, 399) of img3 with region of img2
img3[100:400,100:400,:] = img2[100:400,100:400,:]
cv2.imshow('Result1', img3)
To alpha blend two images
alpha = 0.5
img3 = np.uint8(img1*alpha + img2*(1-alpha))
cv2.imshow('Result2', img3)
#user8190410's answer works fine. Just to give a complete answer, in order to alpha blend two images with different size at a particular position, you can do the following:
alpha= 0.7
img1_mod = img1.copy()
img1_mod[:pos_x,:pos_y,:] = img1[:pos_x,:pos_y,:]*alpha + img2*(1-alpha)
cv2.imshow('Image1Mod', img1_mod)
Actually, I found that image homography can be used to do it.
Here is the updated code.
import numpy as np
import cv2
import cv2.aruco as aruco
cap = cv2.VideoCapture(0)
while(True):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
aruco_dict = aruco.Dictionary_get(aruco.DICT_6X6_250)
arucoParameters = aruco.DetectorParameters_create()
corners, ids, rejectedImgPoints = aruco.detectMarkers(gray, aruco_dict, parameters=arucoParameters)
if np.all(ids != None):
display = aruco.drawDetectedMarkers(frame, corners)
x1 = (corners[0][0][0][0], corners[0][0][0][1])
x2 = (corners[0][0][1][0], corners[0][0][1][1])
x3 = (corners[0][0][2][0], corners[0][0][2][1])
x4 = (corners[0][0][3][0], corners[0][0][3][1])
im_dst = frame
im_src = cv2.imread("mask.jpg")
size = im_src.shape
pts_dst = np.array([x1,x2,x3,x4])
pts_src = np.array(
[
[0,0],
[size[1] - 1, 0],
[size[1] - 1, size[0] -1],
[0, size[0] - 1 ]
],dtype=float
);
h, status = cv2.findHomography(pts_src, pts_dst)
temp = cv2.warpPerspective(im_src, h, (im_dst.shape[1],im_dst.shape[0]))
cv2.fillConvexPoly(im_dst, pts_dst.astype(int), 0, 16);
im_dst = im_dst + temp
cv2.imshow('Display',im_dst)
else:
display = frame
cv2.imshow('Display',display)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()