I want to make a screen recorder and that the cursor appears in each frame of the video, for this I paste the image of the cursor in the screenshot, but when I finish recording the video it comes out with an error when playing it.
import cv2
import os
import pyautogui
from PIL import Image
import numpy as np
import keyboard
screen_size = pyautogui.size()
FPS = 11.0
def recorder(screen_size, FPS):
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video = cv2.VideoWriter("VIDEO.avi", fourcc, FPS, (screen_size))
start = input("RECORD ")
if start == "y":
ruta_actual = os.getcwd()
print(f"ruta actual: {ruta_actual}")
while True:
if keyboard.is_pressed("p"):
break
# CURSOR POSITION
screenshot_img = pyautogui.screenshot()
x, y = pyautogui.position()
cursor_img = Image.open(f"{ruta_actual}\\resources\\cursor_state0.png").convert("RGBA")
complete_img = Image.new('RGBA', (screen_size), (0, 0, 0, 0))
complete_img.paste(screenshot_img, (0, 0))
complete_img.paste(cursor_img, (x, y), cursor_img)
data = np.array(complete_img)
video.write(data)
video.release()
if __name__ == "__main__":
grabar = recorder(screen_size, FPS)
You are writing RGBA image format to cv2.VideoWriter, while the expected format is BGR.
Replace data = np.array(complete_img) with:
data = cv2.cvtColor(np.array(complete_img), cv2.COLOR_RGBA2BGR)
In my machine, I am not getting any error playing the video (I am getting black video), so there might be second problem.
If it still doesn't work, try replacing the codec to cv2.VideoWriter_fourcc(*"MJPG").
Related
This is my first time posting a question so pardon any mistakes. I'm trying to write a script that will do face_recognition and save the video file at the same time and running into I think latency issues. When there isn't a face to detect it saves the video file fine. When there is a face though it seems to get every other frame. I feel like that is because it's doing computations for finding the face which prevents it from saving the next frame. Is there a way around this? maybe threading or multiprocessing?
import face_recognition as fr
import os
import face_recognition
import numpy as np
import cv2
def get_encoded_faces():
encoded = {}
for dirpath, dnames, fnames in os.walk("./faces"):
for f in fnames:
if f.endswith(".jpg") or f.endswith(".png"):
face = fr.load_image_file("faces/" + f)
encoding = fr.face_encodings(face)[0]
encoded[f.split(".")[0]] = encoding
return encoded
def unknown_image_encoded(img):
face = fr.load_image_file("faces/" + img)
encoding = fr.face_encodings(face)[0]
return encoding
faces = get_encoded_faces()
faces_encoded = list(faces.values())
known_face_names = list(faces.keys())
def FindFace(img):
face_locations = face_recognition.face_locations(img)
unknown_face_encodings = face_recognition.face_encodings(img, face_locations)
face_names = []
for face_encoding in unknown_face_encodings:
matches = face_recognition.compare_faces(faces_encoded, face_encoding)
name = "Unknown"
face_distances = face_recognition.face_distance(faces_encoded, face_encoding)
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
face_names.append(name)
#cv2.imwrite('final_image.png',img)
video_capture = cv2.VideoCapture(1)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 20.0, (640,480))
if not video_capture.isOpened():
raise Exception("Could not open video device")
while(video_capture.isOpened()):
ret, frame = video_capture.read()
out.write(frame)
#cv2.imshow('Video', frame)
FindFace(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()
I want to get the depth and rgb video stream from a kinect (version 1).
I'm using the Python 3 version of pykinect, not CPython.
I have found some examples but Pykinect documentation is nearly inexistant and I don't want to use pygame.
On linux with freenect I did:
rgb_stream = freenect.sync_get_video()[0]
rgb_stream = rgb_stream[:, :, ::-1]
rgb_image = cv.cvtColor(rgb_stream, cv.COLOR_BGR2RGB)
depth_stream = freenect.sync_get_depth()[0]
depth_stream = np.uint8(depth_stream)
depth_image = cv.cvtColor(depth_stream, cv.COLOR_GRAY2RGB)
However I'm using pykinect on Windows and I want to get depth and rgb stream in a similar fashion, and then process it with OpenCV and display it with Qt.
Here is an example code I found:
from pykinect import nui
import numpy
import cv2
def video_handler_function(frame):
video = numpy.empty((480,640,4),numpy.uint8)
frame.image.copy_bits(video.ctypes.data)
cv2.imshow('KINECT Video Stream', video)
kinect = nui.Runtime()
kinect.video_frame_ready += video_handler_function
kinect.video_stream.open(nui.ImageStreamType.Video, 2,nui.ImageResolution.Resolution640x480,nui.ImageType.Color)
cv2.namedWindow('KINECT Video Stream', cv2.WINDOW_AUTOSIZE)
while True:
key = cv2.waitKey(1)
if key == 27: break
kinect.close()
cv2.destroyAllWindows()
What is video_handler_function ? What is the purpose of kinect.video_frame_ready += video_handler_function ?
I tried kinect.depth_stream.open(nui.ImageStreamType.Depth, 2, nui.ImageResolution.Resolution320x240, nui.ImageType.Depth) to get the depth image with some modifications to the handler function but couldn't make it work.
from pykinect import nui
import numpy
import cv2
kinect = nui.Runtime()
kinect.skeleton_engine.enabled = True
def getColorImage(frame):
height, width = frame.image.height, frame.image.width #get width and height of the images
rgb = numpy.empty((height, width, 4), numpy.uint8)
frame.image.copy_bits(rgb.ctypes.data) #copy the bit of the image to the array
cv2.imshow('KINECT Video Stream', rgb) # display the image
def getDepthImage(frame):
height, width = frame.image.height, frame.image.width #get frame height and width
depth = numpy.empty((height, width, 1), numpy.uint8)
arr2d = (depth >> 3) & 4095
arr2d >>= 4
frame.image.copy_bits(arr2d.ctypes.data)
cv2.imshow('KINECT depth Stream', arr2d)
def frame_ready(frame):
for skeleton in frame.SkeletonData:
if skeleton.eTrackingState == nui.SkeletonTrackingState.TRACKED:
print(skeleton.Position.x, skeleton.Position.y, skeleton.Position.z, skeleton.Position.w)
def main():
while True:
kinect.video_frame_ready += getColorImage
kinect.video_stream.open(nui.ImageStreamType.Video, 2, nui.ImageResolution.Resolution640x480, nui.ImageType.Color)
cv2.namedWindow('KINECT Video Stream', cv2.WINDOW_AUTOSIZE)
kinect.depth_frame_ready += getDepthImage
kinect.depth_stream.open(nui.ImageStreamType.Depth, 2, nui.ImageResolution.Resolution320x240, nui.ImageType.Depth)
cv2.namedWindow('KINECT depth Stream', cv2.WINDOW_AUTOSIZE)
kinect.skeleton_frame_ready += frame_ready
if cv2.waitKey(0) == 27:
cv2.destroyAllWindows()
kinect.close()
break
if __name__ == '__main__':
main()
~~~~~
i am wondering about the behavior of the backroundSubractor Classes of opencv in python according to the offical documentation all childs should have apply and getBackgroundImage.
But if i adapt the python example to show the backround instead of the foreground, it works for:
createBackgroundSubtractorGSOC
createBackgroundSubtractorCNT
createBackgroundSubtractorKNN
createBackgroundSubtractorLSBP
createBackgroundSubtractorMOG2.
But for 2 it doesn't work:
createBackgroundSubtractorMOG
createBackgroundSubtractorGMG
is this behavior intended or something wents wrong?
my code:
import numpy as np
import cv2
cap = cv2.VideoCapture('vtest.avi')
fgbg=cv2.bgsegm.createBackgroundSubtractorMOG()
#works
#fgbg=cv2.bgsegm.createBackgroundSubtractorGSOC()
#fgbg=cv2.bgsegm.createBackgroundSubtractorCNT()
#fgbg=cv2.createBackgroundSubtractorKNN()
#fgbg=cv2.bgsegm.createBackgroundSubtractorLSBP()
#fgbg = cv2.createBackgroundSubtractorMOG2(100, 16, 0)
#error
#fgbg=cv2.bgsegm.createBackgroundSubtractorMOG()
#fgbg=cv2.bgsegm.createBackgroundSubtractorGMG()
while(1):
ret, frame = cap.read()
height = np.size(frame,0)
width = np.size(frame,1)
blank_image = np.zeros((height, width, 3), np.uint8)
fgmask = fgbg.apply(frame)
bgmask=fgbg.getBackgroundImage()
cv2.imshow('frame', bgmask)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
cap.release()
cv2.destroyAllWindows()
I have a simple question, while streaming audio signal from audio jack in Python, using pyaudio library how can I keep streaming the audio signal until I choose to "stop" the program.
Example: The way we capture our web camera frames infinitely under a infinite while loop.
For example: In this code(take from link)that records the stream just for 5 seconds what will be the modification that will serve my purpose
import pyaudio
import wave
import numpy as np
CHUNK = 44100
FORMAT = pyaudio.paInt32
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
audio_data = np.fromstring(data, dtype=np.int32)
print(data)
print(audio_data)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
Also the code given on this link (Handling audio data using callback mode) records it for 4-5 seconds.
I will be really grateful if someone could help me with this!!
Well , Meanwhile I figured out solution
import pyaudio
import numpy as np
import pylab
import time
import sys
import matplotlib.pyplot as plt
RATE = 44100
CHUNK = int(RATE/20) # RATE / number of updates per second
def soundplot(stream):
t1=time.time()
#use np.frombuffer if you face error at this line
data = np.fromstring(stream.read(CHUNK),dtype=np.int16)
print(data)
if __name__=="__main__":
p=pyaudio.PyAudio()
stream=p.open(format=pyaudio.paInt16,channels=1,rate=RATE,input=True,
frames_per_buffer=CHUNK)
for i in range(sys.maxsize**10):
soundplot(stream)
stream.stop_stream()
stream.close()
p.terminate()
And this post here will help you in simple and concrete way
Hello this is my code with which audio and video is recorded separately and pause the audio and video I hope it helps you
import cv2
import numpy as np
from datetime import datetime
import gtk
import keyboard
import pyaudio
import wave
import sys
flagrecord=True
#chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
ropen=True
chunk = int(RATE/20)
def show_webcam(flagrecord):
cam = cv2.VideoCapture(0)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
frame_width = int(cam.get(3))
frame_height = int(cam.get(4))
FONT = cv2.FONT_HERSHEY_PLAIN
filename = datetime.now().strftime("%Y-%m-%d_%H.%M.%S") + ".avi"
filenamea = datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer = chunk)
out = cv2.VideoWriter(filename,fourcc, 20, (frame_width,frame_height))
all = []
aux = []
stream.start_stream()
flagaudio=False
while True:
ret_val, img = cam.read()
title = datetime.now().strftime("%Y-%m-%d*%H:%M:%S")
if flagrecord:
img = cv2.flip(img,1)
cv2.putText(img, "REC", (40,40), FONT, 3 , (0,0,255), 3)
cv2.circle(img, (20,20), 10 , (0,0,255), -1)
cv2.rectangle(img, (30,430),(600,480),(0,0,0), -1)
cv2.putText(img, title, (40,470), FONT, 3 , (255,255,255), 2)
cv2.imshow('Grabacion de Audiencias', img)
data = stream.read(chunk)
aux.append(data)
out.write(img)
else:
img = cv2.flip(img,1)
cv2.putText(img, "PAUSE", (40,40), FONT, 3 , (255,0,0), 3)
cv2.circle(img, (20,20), 10 , (255,0,0), -1)
cv2.rectangle(img, (50,430),(570,480),(0,0,0), -1)
cv2.putText(img, "Audiencias En Pausa", (60,470), FONT, 3 , (255,0,0), 2)
cv2.imshow('Grabacion de Audiencias', img)
if flagaudio:
all+=aux
del aux[:]
data= 0
stream.stop_stream()
else:
pass
q=cv2.waitKey(1)
if q == 27:
break
if q == ord('p'):
flagrecord=False
flagaudio = True
if q == ord('c'):
flagrecord=True
flagaudio=False
stream.start_stream()
if q == ord('q'):
break
cam.release()
out.release()
cv2.destroyAllWindows()
stream.close()
p.terminate()
all+=aux
data = ''.join(all)
wf = wave.open(filenamea, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
def main():
show_webcam(mirror=True)
if __name__ == '__main__':
main()
Here is the code for image thresholding I am getting the error at line 22,
which is:-
ret,thresh2 = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
Here in this code I want to capture image frames from the video camera and then perform various kinds of thresholding operations on the captured image frames.
I have stored image frames at various instances of time. My objective is to segment the moving objects in the video. Hence I am applying thresholding operation.
Does anybody have any idea, how to do it?
Thanks in advance.
import cv2
import numpy as np
import time
from matplotlib import pyplot as plt
import sys
cam = cv2.VideoCapture(0)
while(cam.isOpened()):
ret, frame = cam.read() #Keep on capturing the frames continuously
while (ret==True):
#img = cv2.imread('/home/shrikrishna/Detection&Tracking/OpenCV-Tutorial',6)
cv2.imwrite('At time'+ str(time.clock()) + '.jpg', frame)
img2 = cv2.imread('At time'+ str(time.clock()) + '.jpg',6)
t = str(time.clock())
cv2.imshow('Orignal',frame)
k = cv2.waitKey(0) & 0xffff
if(k==27):
#img = cv2.imread('At time'+ str(time.clock()) + '.jpg',6)
break
if(k==ord('q')):
sys.exit(0)
break
#cv2.imwrite('At time'+ t + '.jpg', frame)
img = cv2.imread('At time'+ t + '.jpg',6)
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
ret,thresh2 = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
ret,thresh3 = cv2.threshold(img,127,255,cv2.THRESH_TRUNC)
ret,thresh4 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO)
ret,thresh5 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO_INV)
titles = ['Original Image','BINARY','BINARY_INV','TRUNC','TOZERO','TOZERO_INV']
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
for i in xrange(6):
plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')
plt.title(titles[i])
plt.xticks([]),plt.yticks([])
plt.show()
cv2.waitKey(0)
cv2.destroyAllWindows()
In the following line, you read in an image as colour image (based on the second parameter -- flags).
img = cv2.imread('At time'+ t + '.jpg',6)
This means that img contains 3 channels, which in Python is represented by a 3-dimensional array.
You immediately use this image as the source for thresholding:
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
According to the documentation, the first parameter to threshold() is:
src – input array (single-channel, 8-bit or 32-bit floating point).
That means you need a single channel image, e.g. a grayscale image:
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh1 = cv2.threshold(img_gray,127,255,cv2.THRESH_BINARY)
# ...
Another option would be to just read the image as grayscale in first place:
img_gray = cv2.imread('At time'+ t + '.jpg',0)
# ...