how to do speech recognition using python - python-3.x

i have tried
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("Speak:")
audio = r.listen(source)
try:
print("You said " + r.recognize_google(audio))
except sr.UnknownValueError:
print("Could not understand audio")
except sr.RequestError as e:
print("Could not request results; {0}".format(e))
but i didn't get the expected output it always shows could not understand the voice

I would check out:
https://pypi.org/project/SpeechRecognition/
Should be just what you need :-D

Related

Using pyaudio and Speech Recognition at the same time

I want to record the audio and get an audiofile while using Speech Recognition. For some reason my program always crashes after a few moments. It also does not come to creating the audiofile.
I suspect there is a problem with using threads as both processes worked fine on their own. Unfortunately I could not find anything.
Does anyone have an idea how I can fix this or how I can use Speech Recognition while recording sound?
import threading
import speech_recognition as sr
import pyaudio
import wave
import time
status = True
def record():
chunk = 1024
sample_format = pyaudio.paInt16
channels = 1
fs = 44100
filename = 'output.wav'
global status
p = pyaudio.PyAudio()
print('Recording')
stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
input=True)
frames = []
while status == True:
data = stream.read(chunk)
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
print('Finished recording')
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
def get_audio():
while True:
r = sr.Recognizer()
with sr.Microphone() as source:
print("Höre zu ...")
audio = r.listen(source)
said = ''
try:
said = r.recognize_google(audio, language="de_DE")
print(said)
except Exception as e:
print('')
thread1=threading.Thread(target=record)
thread1.start()
thread2=threading.Thread(target=get_audio)
thread2.start()
time.sleep(5)
status=False
You can record and save sound with Speech Recognition. Just use this part of the code and it will create a speech.wav file:
def get_audio():
while True:
r = sr.Recognizer()
with sr.Microphone() as source:
print("Höre zu ...")
audio = r.listen(source)
with open('speech.wav', 'wb') as f:
f.write(audio.get_wav_data())
try:
said = r.recognize_google(audio, language="de_DE")
print(said)
except Exception as e:
print('')

i am trying to make a voice assistant to control my room and the response times are very slow

I got a basic code on the internet and edited it to fit my requirements, on the hardware side I used Arduino uno, relays, and esp8266 esp01 wifi module, and controlled it using blynk and ifttt
the problem is that the assistant is very slow, I first thought that maybe google text to speech is the problem so I tried the pyttsx3 module to do that offline but that didn't help.
so I thought maybe the if statements are a problem so I made each one of them into a function and tried multi-threading but that didn't help either...
so I thought that it has to listen and process all I speak .. so I added a trigger word so only after the trigger word is used it starts to listen for commands,
sometimes it will work properly at first then get slower
I don't know what else to do
here is the code I am using:
import pyttsx3
import speech_recognition as sr
import datetime
import wikipedia
import webbrowser
import os
import requests
import time
import smtplib
from goto import goto, label
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[0].id)
def speak(audio):
engine.say(audio)
engine.runAndWait()
def wishMe():
speak("Hello sir ,Assistant booting up")
def takeCommand():
r = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
r.pause_threshold = 0.5
audio = r.listen(source)
print("saving")
try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-in')
print(f"User said: {query}\n")
except Exception as e:
print(e)
print("Say that again please...")
return
return query
i=0
j=0
if __name__ == "__main__":
wishMe()
while True:
print("iteration jarvis",i)
i=i+1
r = sr.Recognizer()
with sr.Microphone() as source:
print("waiting...")
r.pause_threshold = 0.5
audio = r.listen(source)
try:
query = r.recognize_google(audio, language='en-in')
print(f"User said: {query}\n")
if 'Jarvis' in query:
query=takeCommand().lower()
except Exception as e:
print(e)
print("jaris not called")
continue
print("iteration command",j)
j=j+1
if 'how are you' in query:
speak("good")
elif 'open google' in query:
webbrowser.open("google.com")
speak("There you go!")
elif 'lights on' in query:
requests.post("https://maker.ifttt.com/trigger/light_on/with/key/d*************")
speak("ok sure")
elif 'lights off' in query:
requests.post("https://maker.ifttt.com/trigger/lights_off/with/key/d************")
speak("with pleasure")
elif 'play music' in query:
music_dir = 'D:\\Non Critical\\songs\\Favorite Songs2'
songs = os.listdir(music_dir)
print(songs)
os.startfile(os.path.join(music_dir, songs[0]))
elif 'stop listening' in query:
speak("going offline")
exit()
You might want to only initialize your recognizer once.
I took the liberty of cleaning up and refactoring the code a little too.
from itertools import count
import pyttsx3
import speech_recognition as sr
import webbrowser
import os
import requests
engine = pyttsx3.init("sapi5")
voices = engine.getProperty("voices")
engine.setProperty("voice", voices[0].id)
recognizer = sr.Recognizer()
recognizer.pause_threshold = 0.5
def speak(audio):
engine.say(audio)
engine.runAndWait()
def takeCommand():
with sr.Microphone() as source:
print("Listening...")
audio = recognizer.listen(source)
print("saving")
print("Recognizing...")
query = recognizer.recognize_google(audio, language="en-in")
print(f"User said: {query}\n")
return query
def process_query(query):
if "how are you" in query:
speak("good")
elif "open google" in query:
webbrowser.open("google.com")
speak("There you go!")
elif "lights on" in query:
requests.post("https://maker.ifttt.com/trigger/light_on/with/key/d*************")
speak("ok sure")
elif "lights off" in query:
requests.post("https://maker.ifttt.com/trigger/lights_off/with/key/d************")
speak("with pleasure")
elif "play music" in query:
music_dir = "D:\\Non Critical\\songs\\Favorite Songs2"
songs = os.listdir(music_dir)
print(songs)
os.startfile(os.path.join(music_dir, songs[0]))
elif "stop listening" in query:
speak("going offline")
exit()
def main():
speak("Hello sir ,Assistant booting up")
for iteration in count(1):
print("iteration jarvis", iteration)
try:
query = takeCommand()
except Exception as e:
print(e)
speak("Recognition error: {}".format(e))
else:
try:
process_query(query)
except Exception as e:
print(e)
speak("Processing error: {}".format(e))
if __name__ == "__main__":
main()

Unable to recognize the audio in python while using SpeechRecognition

r = sr.Recognizer()
with sr.Microphone() as source:
print("speak up:")
a = r.listen(source)
try:
text = r.recognize_google(audio)
print(text)
except:
print("sorry didn't get you")
even after talking the output is still
sorry didn't get you
I am experienced with this package. Tell me if this works for you...
def takeCommand():
r = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
r.pause_threshold = 1
audio = r.listen(source)
try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-us')
print("User said: {query}\n")
except Exception as e:
print(e)
print("Google was unable to hear")
return "None"
return query

Python3 SPEECH to TEXT

I am a 12 year old kid and am a beginner at programming. It would be great if someone could help be. below I have the code for my speech recognition application, I am on MAC OS CATALINA and the same error keeps on coming up, It prints out "say something", and then once I say something nothing happens and it stays frozen, once I stop the code running I get this error.
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("SAY SOMETHING")
audio = r.listen(source)
print("THANK YOU")
try:
print("TEXT: "+r.recognize_google(audio))
except:
print("SORRY I DONT KNOW WHAT YOU MEAN")
This is the error I get when I stop the code, once it is paused at SAY SOMETHING for a long time.
Traceback (most recent call last):
File "/Users/anishnagariya/PycharmProjects/HelloWorld/Tester.py", line 8, in <module>
print("THANK YOU")
File "/Users/anishnagariya/PycharmProjects/AI/HelloWorld/lib/python3.7/site-packages/speech_recognition/__init__.py", line 620, in listen
buffer = source.stream.read(source.CHUNK)
File "/Users/anishnagariya/PycharmProjects/AI/HelloWorld/lib/python3.7/site-packages/speech_recognition/__init__.py", line 161, in read
return self.pyaudio_stream.read(size, exception_on_overflow=False)
File "/Users/anishnagariya/PycharmProjects/AI/HelloWorld/lib/python3.7/site-packages/pyaudio.py", line 608, in read
return pa.read_stream(self._stream, num_frames, exception_on_overflow)
KeyboardInterrupt
Process finished with exit code 1
I would advise going through the documentation for the speech recognition module you're using:
https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst
Check this part specifically:
https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst#recognizer_instanceenergy_threshold--300---type-float
The energy threshold is described as such:
Represents the energy level threshold for sounds. Values below this threshold are considered silence, and values above this threshold are considered speech. Can be changed.
You can set the threshold like this:
r.energy_threshold = 4000
Or you can do this which will adjust the threshold dynamically based on sound currently from the environment.
r.dynamic_energy_threshold = True
Also ensure your microphone is working properly
Edit:
https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst#recognizer_instancelistensource-audiosource-timeout-unionfloat-none--none-phrase_time_limit-unionfloat-none--none-snowboy_configuration-uniontuplestr-iterablestr-none--none---audiodata
Set a timeout &/or pause duration for the listen call.
import speech_recognition as sr
r = sr.Recognizer()
while True:
with sr.Microphone() as source:
print("SAY SOMETHING")
try:
audio = r.listen(source, timeout=3)
print("THANK YOU")
break
except sr.WaitTimeoutError:
print("timed out")
try:
print("TEXT: "+r.recognize_google(audio))
except:
print("SORRY I DONT KNOW WHAT YOU MEAN")
Depending on your microphone quality you will probably need to set some thresholds:
import speech_recognition as sr
r = sr.Recognizer()
r.energy_threshold = 1000
r.pause_threshold = 0.5
with sr.Microphone() as source:
print("SAY SOMETHING")
audio = r.listen(source)
print("THANK YOU")
try:
print("TEXT: " + r.recognize_google(audio))
except sr.UnknownValueError:
print("SORRY I DONT KNOW WHAT YOU MEAN")
It is also not good practice to have a bare except clause sr.UnknownValueError is the standard error for unknown speech in the speech_recognition library

I can not use speech_recognition

Hello I have copied code below from website about python for convert my voice to text but it has a bug that i can't understand or fix. Can you guy teach me some reason.
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source)
while True:
print("said")
audio = r.listen(source)
try:
print("You said " + r.recognize_google(audio))
except LookupError:
print("i do not understand audio")
enter image description here
use this code.It worked for me in pycharm
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("speak into mic")
audio = r.listen(source,timeout=2)
try:
print("Transcription:" + r.recognize_google(audio))
except sr.UnknownValueError:
print("Audio Unintelligible")
except sr.RequestError as e:
print("cannot obtain results : {0}".format(e))
`

Resources