Continuesly streaming audio signal real time infinitely, Python - python-3.x

I have a simple question, while streaming audio signal from audio jack in Python, using pyaudio library how can I keep streaming the audio signal until I choose to "stop" the program.
Example: The way we capture our web camera frames infinitely under a infinite while loop.
For example: In this code(take from link)that records the stream just for 5 seconds what will be the modification that will serve my purpose
import pyaudio
import wave
import numpy as np
CHUNK = 44100
FORMAT = pyaudio.paInt32
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
audio_data = np.fromstring(data, dtype=np.int32)
print(data)
print(audio_data)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
Also the code given on this link (Handling audio data using callback mode) records it for 4-5 seconds.
I will be really grateful if someone could help me with this!!

Well , Meanwhile I figured out solution
import pyaudio
import numpy as np
import pylab
import time
import sys
import matplotlib.pyplot as plt
RATE = 44100
CHUNK = int(RATE/20) # RATE / number of updates per second
def soundplot(stream):
t1=time.time()
#use np.frombuffer if you face error at this line
data = np.fromstring(stream.read(CHUNK),dtype=np.int16)
print(data)
if __name__=="__main__":
p=pyaudio.PyAudio()
stream=p.open(format=pyaudio.paInt16,channels=1,rate=RATE,input=True,
frames_per_buffer=CHUNK)
for i in range(sys.maxsize**10):
soundplot(stream)
stream.stop_stream()
stream.close()
p.terminate()
And this post here will help you in simple and concrete way

Hello this is my code with which audio and video is recorded separately and pause the audio and video I hope it helps you
import cv2
import numpy as np
from datetime import datetime
import gtk
import keyboard
import pyaudio
import wave
import sys
flagrecord=True
#chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
ropen=True
chunk = int(RATE/20)
def show_webcam(flagrecord):
cam = cv2.VideoCapture(0)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
frame_width = int(cam.get(3))
frame_height = int(cam.get(4))
FONT = cv2.FONT_HERSHEY_PLAIN
filename = datetime.now().strftime("%Y-%m-%d_%H.%M.%S") + ".avi"
filenamea = datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
frames_per_buffer = chunk)
out = cv2.VideoWriter(filename,fourcc, 20, (frame_width,frame_height))
all = []
aux = []
stream.start_stream()
flagaudio=False
while True:
ret_val, img = cam.read()
title = datetime.now().strftime("%Y-%m-%d*%H:%M:%S")
if flagrecord:
img = cv2.flip(img,1)
cv2.putText(img, "REC", (40,40), FONT, 3 , (0,0,255), 3)
cv2.circle(img, (20,20), 10 , (0,0,255), -1)
cv2.rectangle(img, (30,430),(600,480),(0,0,0), -1)
cv2.putText(img, title, (40,470), FONT, 3 , (255,255,255), 2)
cv2.imshow('Grabacion de Audiencias', img)
data = stream.read(chunk)
aux.append(data)
out.write(img)
else:
img = cv2.flip(img,1)
cv2.putText(img, "PAUSE", (40,40), FONT, 3 , (255,0,0), 3)
cv2.circle(img, (20,20), 10 , (255,0,0), -1)
cv2.rectangle(img, (50,430),(570,480),(0,0,0), -1)
cv2.putText(img, "Audiencias En Pausa", (60,470), FONT, 3 , (255,0,0), 2)
cv2.imshow('Grabacion de Audiencias', img)
if flagaudio:
all+=aux
del aux[:]
data= 0
stream.stop_stream()
else:
pass
q=cv2.waitKey(1)
if q == 27:
break
if q == ord('p'):
flagrecord=False
flagaudio = True
if q == ord('c'):
flagrecord=True
flagaudio=False
stream.start_stream()
if q == ord('q'):
break
cam.release()
out.release()
cv2.destroyAllWindows()
stream.close()
p.terminate()
all+=aux
data = ''.join(all)
wf = wave.open(filenamea, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
def main():
show_webcam(mirror=True)
if __name__ == '__main__':
main()

Related

How can I transcribe audio in real time with pyaudio

I'm having a hard time implementing real time audio transcriptions with a microphone using the pyaudio library. I've developed my own speech recognition model with pytorch. The model is given wrong predictions most of the time. This is the sample code:
import torch
import pyaudio as pa
import numpy as np
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC,AutoTokenizer
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2- base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
frames_per_buffer = 3200
format = pyaudio.paInt16
channel = 1
rate = 16000
p = pyaudio.PyAudio()
def record_microphone():
stream = p.open(rate=rate,channels=channel,format = format,\
input=True,frames_per_buffer=frames_per_buffer)
frames = []
seconds=1
for i in range(0,int(rate/frames_per_buffer * seconds)):
data = stream.read(frames_per_buffer)
frames.append(data)
stream.stop_stream()
stream.close()
return np.frombuffer(b''.join(frames),dtype=np.int16)
def terminate():
p.terminate()
def predict():
audio = record_microphone()
input_values = processor(torch.FloatTensor(audio),sampling_rate=16_000,
return_tensors="pt", padding="longest").input_values
logits = model(input_values).logits
pred = torch.argmax(logits,dim=-1)
pred_word = processor.batch_decode(pred)[0]
print(pred_word.lower()+" ",end="")
return pred_word
if "__name__" == "__main__":
try:
predict()
finally:
terminate()

Python cv2: Video does not play

I want to make a screen recorder and that the cursor appears in each frame of the video, for this I paste the image of the cursor in the screenshot, but when I finish recording the video it comes out with an error when playing it.
import cv2
import os
import pyautogui
from PIL import Image
import numpy as np
import keyboard
screen_size = pyautogui.size()
FPS = 11.0
def recorder(screen_size, FPS):
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video = cv2.VideoWriter("VIDEO.avi", fourcc, FPS, (screen_size))
start = input("RECORD ")
if start == "y":
ruta_actual = os.getcwd()
print(f"ruta actual: {ruta_actual}")
while True:
if keyboard.is_pressed("p"):
break
# CURSOR POSITION
screenshot_img = pyautogui.screenshot()
x, y = pyautogui.position()
cursor_img = Image.open(f"{ruta_actual}\\resources\\cursor_state0.png").convert("RGBA")
complete_img = Image.new('RGBA', (screen_size), (0, 0, 0, 0))
complete_img.paste(screenshot_img, (0, 0))
complete_img.paste(cursor_img, (x, y), cursor_img)
data = np.array(complete_img)
video.write(data)
video.release()
if __name__ == "__main__":
grabar = recorder(screen_size, FPS)
You are writing RGBA image format to cv2.VideoWriter, while the expected format is BGR.
Replace data = np.array(complete_img) with:
data = cv2.cvtColor(np.array(complete_img), cv2.COLOR_RGBA2BGR)
In my machine, I am not getting any error playing the video (I am getting black video), so there might be second problem.
If it still doesn't work, try replacing the codec to cv2.VideoWriter_fourcc(*"MJPG").

pyaudio callback called only once

I try use pyaudio with the callback option, and I want to yield the data instead of reading from a file. When I use the callback option, it gets called only once.
There is another question with the same problem, but it doesn't have an answer. I have made a minimal reproducable example. The code works when blocking is used.
import time
import numpy as np
import scipy.signal
import sounddevice as sd
import pyaudio
sample_rate=44100
max_amp = 2**(15)-1
f0 = 500
duration = 1
f1 = 3000
x = np.arange(0, duration, 1/sample_rate)
y_float = max_amp*scipy.signal.chirp(x, f0, duration, f1)
y = y_float.astype(np.int16)
data = y.tostring()
def create_data_generator(data):
periodsize = 1000
for i in range(int(len(data)/(periodsize))):
chunk = data[periodsize*i:periodsize*(i+1)]
yield chunk
data_generator = create_data_generator(data)
def callback(in_data, frame_count, time_info, status):
data = next(data_generator)
return (data, pyaudio.paContinue)
# -------- blocking------------------
# periodsize = 1000
# p = pyaudio.PyAudio()
# stream = p.open(format=pyaudio.paInt16,
# channels=1,
# rate=sample_rate,
# output=True)
# start = time.time()
# for i in range(int(len(data)/(periodsize))):
# chunk, status = callback(0, 0, 0, 0)
# stream.write(chunk)
# time.sleep(duration-(time.time()-start))
# stream.stop_stream()
# stream.close()
# p.terminate()
# -------- callback ------------------
periodsize = 1000
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=sample_rate,
output=True,
stream_callback=callback)
# start the stream (4)
stream.start_stream()
# wait for stream to finish (5)
while stream.is_active():
time.sleep(0.1)
# stop stream (6)
stream.stop_stream()
stream.close()
# close PyAudio (7)
p.terminate()

Python face_recognition and saving file with cv2

This is my first time posting a question so pardon any mistakes. I'm trying to write a script that will do face_recognition and save the video file at the same time and running into I think latency issues. When there isn't a face to detect it saves the video file fine. When there is a face though it seems to get every other frame. I feel like that is because it's doing computations for finding the face which prevents it from saving the next frame. Is there a way around this? maybe threading or multiprocessing?
import face_recognition as fr
import os
import face_recognition
import numpy as np
import cv2
def get_encoded_faces():
encoded = {}
for dirpath, dnames, fnames in os.walk("./faces"):
for f in fnames:
if f.endswith(".jpg") or f.endswith(".png"):
face = fr.load_image_file("faces/" + f)
encoding = fr.face_encodings(face)[0]
encoded[f.split(".")[0]] = encoding
return encoded
def unknown_image_encoded(img):
face = fr.load_image_file("faces/" + img)
encoding = fr.face_encodings(face)[0]
return encoding
faces = get_encoded_faces()
faces_encoded = list(faces.values())
known_face_names = list(faces.keys())
def FindFace(img):
face_locations = face_recognition.face_locations(img)
unknown_face_encodings = face_recognition.face_encodings(img, face_locations)
face_names = []
for face_encoding in unknown_face_encodings:
matches = face_recognition.compare_faces(faces_encoded, face_encoding)
name = "Unknown"
face_distances = face_recognition.face_distance(faces_encoded, face_encoding)
best_match_index = np.argmin(face_distances)
if matches[best_match_index]:
name = known_face_names[best_match_index]
face_names.append(name)
#cv2.imwrite('final_image.png',img)
video_capture = cv2.VideoCapture(1)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 20.0, (640,480))
if not video_capture.isOpened():
raise Exception("Could not open video device")
while(video_capture.isOpened()):
ret, frame = video_capture.read()
out.write(frame)
#cv2.imshow('Video', frame)
FindFace(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()

Simply get rgb and depth image stream with pykinect and python3

I want to get the depth and rgb video stream from a kinect (version 1).
I'm using the Python 3 version of pykinect, not CPython.
I have found some examples but Pykinect documentation is nearly inexistant and I don't want to use pygame.
On linux with freenect I did:
rgb_stream = freenect.sync_get_video()[0]
rgb_stream = rgb_stream[:, :, ::-1]
rgb_image = cv.cvtColor(rgb_stream, cv.COLOR_BGR2RGB)
depth_stream = freenect.sync_get_depth()[0]
depth_stream = np.uint8(depth_stream)
depth_image = cv.cvtColor(depth_stream, cv.COLOR_GRAY2RGB)
However I'm using pykinect on Windows and I want to get depth and rgb stream in a similar fashion, and then process it with OpenCV and display it with Qt.
Here is an example code I found:
from pykinect import nui
import numpy
import cv2
def video_handler_function(frame):
video = numpy.empty((480,640,4),numpy.uint8)
frame.image.copy_bits(video.ctypes.data)
cv2.imshow('KINECT Video Stream', video)
kinect = nui.Runtime()
kinect.video_frame_ready += video_handler_function
kinect.video_stream.open(nui.ImageStreamType.Video, 2,nui.ImageResolution.Resolution640x480,nui.ImageType.Color)
cv2.namedWindow('KINECT Video Stream', cv2.WINDOW_AUTOSIZE)
while True:
key = cv2.waitKey(1)
if key == 27: break
kinect.close()
cv2.destroyAllWindows()
What is video_handler_function ? What is the purpose of kinect.video_frame_ready += video_handler_function ?
I tried kinect.depth_stream.open(nui.ImageStreamType.Depth, 2, nui.ImageResolution.Resolution320x240, nui.ImageType.Depth) to get the depth image with some modifications to the handler function but couldn't make it work.
from pykinect import nui
import numpy
import cv2
kinect = nui.Runtime()
kinect.skeleton_engine.enabled = True
def getColorImage(frame):
height, width = frame.image.height, frame.image.width #get width and height of the images
rgb = numpy.empty((height, width, 4), numpy.uint8)
frame.image.copy_bits(rgb.ctypes.data) #copy the bit of the image to the array
cv2.imshow('KINECT Video Stream', rgb) # display the image
def getDepthImage(frame):
height, width = frame.image.height, frame.image.width #get frame height and width
depth = numpy.empty((height, width, 1), numpy.uint8)
arr2d = (depth >> 3) & 4095
arr2d >>= 4
frame.image.copy_bits(arr2d.ctypes.data)
cv2.imshow('KINECT depth Stream', arr2d)
def frame_ready(frame):
for skeleton in frame.SkeletonData:
if skeleton.eTrackingState == nui.SkeletonTrackingState.TRACKED:
print(skeleton.Position.x, skeleton.Position.y, skeleton.Position.z, skeleton.Position.w)
def main():
while True:
kinect.video_frame_ready += getColorImage
kinect.video_stream.open(nui.ImageStreamType.Video, 2, nui.ImageResolution.Resolution640x480, nui.ImageType.Color)
cv2.namedWindow('KINECT Video Stream', cv2.WINDOW_AUTOSIZE)
kinect.depth_frame_ready += getDepthImage
kinect.depth_stream.open(nui.ImageStreamType.Depth, 2, nui.ImageResolution.Resolution320x240, nui.ImageType.Depth)
cv2.namedWindow('KINECT depth Stream', cv2.WINDOW_AUTOSIZE)
kinect.skeleton_frame_ready += frame_ready
if cv2.waitKey(0) == 27:
cv2.destroyAllWindows()
kinect.close()
break
if __name__ == '__main__':
main()
~~~~~

Resources