I want to record the audio and get an audiofile while using Speech Recognition. For some reason my program always crashes after a few moments. It also does not come to creating the audiofile.
I suspect there is a problem with using threads as both processes worked fine on their own. Unfortunately I could not find anything.
Does anyone have an idea how I can fix this or how I can use Speech Recognition while recording sound?
import threading
import speech_recognition as sr
import pyaudio
import wave
import time
status = True
def record():
chunk = 1024
sample_format = pyaudio.paInt16
channels = 1
fs = 44100
filename = 'output.wav'
global status
p = pyaudio.PyAudio()
print('Recording')
stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
input=True)
frames = []
while status == True:
data = stream.read(chunk)
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
print('Finished recording')
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
def get_audio():
while True:
r = sr.Recognizer()
with sr.Microphone() as source:
print("Höre zu ...")
audio = r.listen(source)
said = ''
try:
said = r.recognize_google(audio, language="de_DE")
print(said)
except Exception as e:
print('')
thread1=threading.Thread(target=record)
thread1.start()
thread2=threading.Thread(target=get_audio)
thread2.start()
time.sleep(5)
status=False
You can record and save sound with Speech Recognition. Just use this part of the code and it will create a speech.wav file:
def get_audio():
while True:
r = sr.Recognizer()
with sr.Microphone() as source:
print("Höre zu ...")
audio = r.listen(source)
with open('speech.wav', 'wb') as f:
f.write(audio.get_wav_data())
try:
said = r.recognize_google(audio, language="de_DE")
print(said)
except Exception as e:
print('')
Related
I don't know what is wrong.
when i call the voice_test command on my discord server, the bot joins a voice channel, it's outline turns green but I dont hear anything.
While running the code, i get no traceback.
here is the code:
CHUNK = 2048
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
#client.command()
async def voice_test(ctx, *, channel: discord.VoiceChannel):
if ctx.voice_client is not None:
vc = await ctx.voice_client.move_to(channel)
else:
vc = await channel.connect()
p = pyaudio.PyAudio()
stream = p.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK
)
while vc.is_connected():
data = stream.read(CHUNK)
vc.send_audio_packet(data, encode=False)
#print(data)
print('done playing',file=sys.stderr)
stream.stop_stream()
stream.close()
p.terminate()
Based of my own recent experience, you have several issues, but it all boils down to discord is expecting 20ms of 48000hz dual channel opus encoded audio.
If you encoded your audio to opus before calling send_audio_packet, you would start hearing sound. Bad sound, but sound.
This is what worked for me, after many iterations and trial and error.
class PyAudioPCM(discord.AudioSource):
def __init__(self, channels=2, rate=48000, chunk=960, input_device=1) -> None:
p = pyaudio.PyAudio()
self.chunks = chunk
self.input_stream = p.open(format=pyaudio.paInt16, channels=channels, rate=rate, input=True, input_device_index=input_device, frames_per_buffer=chunk)
def read(self) -> bytes:
return self.input_stream.read(self.chunks)
async def play_audio_in_voice():
vc.play(PyAudioPCM(), after=lambda e: print(f'Player error: {e}') if e else None)
I got a basic code on the internet and edited it to fit my requirements, on the hardware side I used Arduino uno, relays, and esp8266 esp01 wifi module, and controlled it using blynk and ifttt
the problem is that the assistant is very slow, I first thought that maybe google text to speech is the problem so I tried the pyttsx3 module to do that offline but that didn't help.
so I thought maybe the if statements are a problem so I made each one of them into a function and tried multi-threading but that didn't help either...
so I thought that it has to listen and process all I speak .. so I added a trigger word so only after the trigger word is used it starts to listen for commands,
sometimes it will work properly at first then get slower
I don't know what else to do
here is the code I am using:
import pyttsx3
import speech_recognition as sr
import datetime
import wikipedia
import webbrowser
import os
import requests
import time
import smtplib
from goto import goto, label
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[0].id)
def speak(audio):
engine.say(audio)
engine.runAndWait()
def wishMe():
speak("Hello sir ,Assistant booting up")
def takeCommand():
r = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
r.pause_threshold = 0.5
audio = r.listen(source)
print("saving")
try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-in')
print(f"User said: {query}\n")
except Exception as e:
print(e)
print("Say that again please...")
return
return query
i=0
j=0
if __name__ == "__main__":
wishMe()
while True:
print("iteration jarvis",i)
i=i+1
r = sr.Recognizer()
with sr.Microphone() as source:
print("waiting...")
r.pause_threshold = 0.5
audio = r.listen(source)
try:
query = r.recognize_google(audio, language='en-in')
print(f"User said: {query}\n")
if 'Jarvis' in query:
query=takeCommand().lower()
except Exception as e:
print(e)
print("jaris not called")
continue
print("iteration command",j)
j=j+1
if 'how are you' in query:
speak("good")
elif 'open google' in query:
webbrowser.open("google.com")
speak("There you go!")
elif 'lights on' in query:
requests.post("https://maker.ifttt.com/trigger/light_on/with/key/d*************")
speak("ok sure")
elif 'lights off' in query:
requests.post("https://maker.ifttt.com/trigger/lights_off/with/key/d************")
speak("with pleasure")
elif 'play music' in query:
music_dir = 'D:\\Non Critical\\songs\\Favorite Songs2'
songs = os.listdir(music_dir)
print(songs)
os.startfile(os.path.join(music_dir, songs[0]))
elif 'stop listening' in query:
speak("going offline")
exit()
You might want to only initialize your recognizer once.
I took the liberty of cleaning up and refactoring the code a little too.
from itertools import count
import pyttsx3
import speech_recognition as sr
import webbrowser
import os
import requests
engine = pyttsx3.init("sapi5")
voices = engine.getProperty("voices")
engine.setProperty("voice", voices[0].id)
recognizer = sr.Recognizer()
recognizer.pause_threshold = 0.5
def speak(audio):
engine.say(audio)
engine.runAndWait()
def takeCommand():
with sr.Microphone() as source:
print("Listening...")
audio = recognizer.listen(source)
print("saving")
print("Recognizing...")
query = recognizer.recognize_google(audio, language="en-in")
print(f"User said: {query}\n")
return query
def process_query(query):
if "how are you" in query:
speak("good")
elif "open google" in query:
webbrowser.open("google.com")
speak("There you go!")
elif "lights on" in query:
requests.post("https://maker.ifttt.com/trigger/light_on/with/key/d*************")
speak("ok sure")
elif "lights off" in query:
requests.post("https://maker.ifttt.com/trigger/lights_off/with/key/d************")
speak("with pleasure")
elif "play music" in query:
music_dir = "D:\\Non Critical\\songs\\Favorite Songs2"
songs = os.listdir(music_dir)
print(songs)
os.startfile(os.path.join(music_dir, songs[0]))
elif "stop listening" in query:
speak("going offline")
exit()
def main():
speak("Hello sir ,Assistant booting up")
for iteration in count(1):
print("iteration jarvis", iteration)
try:
query = takeCommand()
except Exception as e:
print(e)
speak("Recognition error: {}".format(e))
else:
try:
process_query(query)
except Exception as e:
print(e)
speak("Processing error: {}".format(e))
if __name__ == "__main__":
main()
I used the following code a couple of days ago and it was working fine, but now it does not recognize any of the audio files it used to recognize before. I am wondering what is wrong?
import speech_recognition as sr
r = sr.Recognizer()
audio_file_name = 'audio.wav'
audiofile = sr.AudioFile(audio_file_name)
with audiofile as source:
audio = r.record(source)
try:
text = r.recognize_google(audio)
print("i: {} You said : {}".format(i+1, text))
except:
print("Sorry could not recognize what you said")
Im trying to do speech recognition but every time I run it I get this error.
JackShmReadWritePtr::~JackShmReadWritePtr - Init not done for -1, skipping unlock
Here is my code.
from gtts import gTTS
import playsound as ps
import speech_recognition as sr
sr.Microphone.list_microphone_names()
text=('text')
mic = sr.Microphone(device_index=20)
r = sr.Recognizer()
with mic as source:
audio = r.listen(source)
re = r.recognize_google(audio)
def rSpeak():
tts = gTTS(text)
tts.save('hello.mp3')
ps.playsound('hello.mp3', True)
rSpeak()
any help very appreciated
You should try to speak using this:
import pyttsx3
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[0].id)
def speak(audio):
engine.say(audio)
engine.runAndWait()
speak('Hello, Sir.')
And if you want to recognize a voice then go with this:
import speech_recognition as sr
def takeCommand():
#It takes microphone input from the user and returns string output
r = sr.Recognizer()
with sr.Microphone() as mic:
print("Listening...")
r.adjust_for_ambient_noise(mic)
audio = r.listen(mic)
try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-in')
print(f"User said: {query}\n")
except Exception:
print("Say that again please...")
return "None"
return query
query = takeCommand().lower() #lower() is used to keep all your queries in lowercase.
And then you can match your query variable to match with the command you want to follow.
'''
Please help me out at this stage.i want to do this program with win32com & pyttsx 3. tried both but not working . Below is my code please check and help.
I want program to answer me back as as per defined answers.
Program is working fine and replying as a text but i am not getting any response by voice..
'''
# audio.py File
import pyaudio
import speech_recognition as sr
import pyglet
from commands import Commander
import subprocess
import win32com.client as wincl
import wave
import pyttsx3
running = True #ss
def say(text):
speak = wincl("SAPI.SpVoice")
speak.Speak('say ' + text, shell=True)
def play_audio(filename):
chunk = 1024
wf = wave.open(filename, 'rb')
pa = pyaudio.PyAudio()
stream = pa.open(
format=pa.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True
)
data_stream = wf.readframes(chunk)
while data_stream:
stream.write(data_stream)
data_stream = wf.readframes(chunk)
stream.close()
pa.terminate()
play_audio("./audio/after.wav")
# Upper part is to play sound using pyaudio
r = sr.Recognizer()
cmd = Commander() #
def initSpeech():
print("Listening.....")
play_audio("./audio/before.wav")
with sr.Microphone() as source:
print("Say Something")
audio = r.listen(source)
play_audio("./audio/before.wav")
command = ""
try:
command = r.recognize_google(audio)
except:
print("Couldn't understand you bro. ")
print("Your Command: ")
print(command)
if command == "quit":
running = False #
#echo('You Said: ' + command)
cmd.discover(command) #
#speak.Speak('You Said: ' + command) -------> no comment
while running == True:
initSpeech()
-----------------------------------------------------------------------------------------------------
# commands.py File
import subprocess
import os
class Commander:
def __init__(self):
self.confirm = ["yes", "affirmative", "si", "sure", "do it", "yeah", "confirm"]
self.cancel = ["no", "negative", "negative soldier", "don't", "wait", "cancel"]
def discover(self, text):
if "what" in text and "your name" in text:
if "my" in text:
self.respond("You haven't told your name yet")
self.respond("My name is python commander. How are you")
def respond(self, response):
print(response)
speak.Speak('say ' + response, shell=True) # This Speak is
from pywin32