Python3 SPEECH to TEXT - python-3.x

I am a 12 year old kid and am a beginner at programming. It would be great if someone could help be. below I have the code for my speech recognition application, I am on MAC OS CATALINA and the same error keeps on coming up, It prints out "say something", and then once I say something nothing happens and it stays frozen, once I stop the code running I get this error.
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("SAY SOMETHING")
audio = r.listen(source)
print("THANK YOU")
try:
print("TEXT: "+r.recognize_google(audio))
except:
print("SORRY I DONT KNOW WHAT YOU MEAN")
This is the error I get when I stop the code, once it is paused at SAY SOMETHING for a long time.
Traceback (most recent call last):
File "/Users/anishnagariya/PycharmProjects/HelloWorld/Tester.py", line 8, in <module>
print("THANK YOU")
File "/Users/anishnagariya/PycharmProjects/AI/HelloWorld/lib/python3.7/site-packages/speech_recognition/__init__.py", line 620, in listen
buffer = source.stream.read(source.CHUNK)
File "/Users/anishnagariya/PycharmProjects/AI/HelloWorld/lib/python3.7/site-packages/speech_recognition/__init__.py", line 161, in read
return self.pyaudio_stream.read(size, exception_on_overflow=False)
File "/Users/anishnagariya/PycharmProjects/AI/HelloWorld/lib/python3.7/site-packages/pyaudio.py", line 608, in read
return pa.read_stream(self._stream, num_frames, exception_on_overflow)
KeyboardInterrupt
Process finished with exit code 1

I would advise going through the documentation for the speech recognition module you're using:
https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst
Check this part specifically:
https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst#recognizer_instanceenergy_threshold--300---type-float
The energy threshold is described as such:
Represents the energy level threshold for sounds. Values below this threshold are considered silence, and values above this threshold are considered speech. Can be changed.
You can set the threshold like this:
r.energy_threshold = 4000
Or you can do this which will adjust the threshold dynamically based on sound currently from the environment.
r.dynamic_energy_threshold = True
Also ensure your microphone is working properly
Edit:
https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst#recognizer_instancelistensource-audiosource-timeout-unionfloat-none--none-phrase_time_limit-unionfloat-none--none-snowboy_configuration-uniontuplestr-iterablestr-none--none---audiodata
Set a timeout &/or pause duration for the listen call.
import speech_recognition as sr
r = sr.Recognizer()
while True:
with sr.Microphone() as source:
print("SAY SOMETHING")
try:
audio = r.listen(source, timeout=3)
print("THANK YOU")
break
except sr.WaitTimeoutError:
print("timed out")
try:
print("TEXT: "+r.recognize_google(audio))
except:
print("SORRY I DONT KNOW WHAT YOU MEAN")

Depending on your microphone quality you will probably need to set some thresholds:
import speech_recognition as sr
r = sr.Recognizer()
r.energy_threshold = 1000
r.pause_threshold = 0.5
with sr.Microphone() as source:
print("SAY SOMETHING")
audio = r.listen(source)
print("THANK YOU")
try:
print("TEXT: " + r.recognize_google(audio))
except sr.UnknownValueError:
print("SORRY I DONT KNOW WHAT YOU MEAN")
It is also not good practice to have a bare except clause sr.UnknownValueError is the standard error for unknown speech in the speech_recognition library

Related

Speech Recognition Request Failed: Service Unavailable

I'm creating a simple virtual assistant but I keep getting an error saying recognition request failed service unavailable.
Might the problem be that I cannot be able to access the google speech api?
I'm not sure.
Here is my code
import pyttsx3
import os
from time import sleep
import playsound
import speech_recognition as sr
voiceEngine = pyttsx3.init()
# rate
voiceEngine.setProperty('rate', 150)
# voice
voices = voiceEngine.getProperty('voices')
voiceEngine.setProperty('voice', voices[1].id)
def speak(text):
voiceEngine.say(text)
voiceEngine.runAndWait()
def takeCommand():
print('\nListening...')
recog = sr.Recognizer()
with sr.Microphone() as source:
print("Listening to the user")
recog.pause_threshold = 1
userInput = recog.listen(source)
try:
print("Recognizing the command")
command = recog.recognize_google(userInput, language ='en-in')
print(f"Command is: {command}\n")
except Exception as e:
print(e)
print("Unable to Recognize the voice.")
return "None"
return command
def wish():
print("Wishing.")
time = int(datetime.datetime.now().hour)
if time>= 0 and time<12:
speak("Good Morning!")
elif time<18:
speak("Good Afternoon!")
else:
speak("Good Evening!")
speak("I am your Voice Assistant, DIVA")
print("I am your Voice Assistant, DIVA")
os.system('cls')
wish()
while True:
for j in range(5):
speak("What can I do for you")
command = takeCommand().lower()
print(command)
if command:
break
if "DIVA" in command:
wish()
else:
speak("Sorry, I am not able to understand you")
When I run it I get this error
Wishing.
I am your Voice Assistant, DIVA
Listening...
Listening to the user
Recognizing the command
recognition request failed: Service Unavailable
Unable to Recognize the voice.
Any help will be highly appreciated. Thanks.

Python 3.7 text-to-speech only playing one audio file, sometimes no audio at all, using IDLE editor

I am using the IDLE editor and Python 3.7, and I would like to know why my code is not playing multiple audio files (sequentially) and sometimes not playing audio at all:
import re
import wave
import pyaudio
import _thread
import time
class TextToSpeech:
CHUNK = 1024
def __init__(self, words_pron_dict:str = 'cmudict-0.7b.txt'):
self._l = {}
self._load_words(words_pron_dict)
def _load_words(self, words_pron_dict:str):
with open(words_pron_dict, 'r') as file:
for line in file:
if not line.startswith(';;;'):
key, val = line.split(' ',2)
self._l[key] = re.findall(r"[A-Z]+",val)
def get_pronunciation(self, str_input):
list_pron = []
for word in re.findall(r"[\w']+",str_input.upper()):
if word in self._l:
list_pron += self._l[word]
print(list_pron)
delay=0
for pron in list_pron:
_thread.start_new_thread( TextToSpeech._play_audio, (pron,delay,))
delay += 0.145
def _play_audio(sound, delay):
try:
time.sleep(delay)
wf = wave.open("sounds/"+sound+".wav", 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(TextToSpeech.CHUNK)
while data:
stream.write(data)
data = wf.readframes(TextToSpeech.CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
return
except:
pass
if __name__ == '__main__':
tts = TextToSpeech()
while True:
tts.get_pronunciation(input('Enter a word or phrase: '))
I have a list of audio files that will play in a certain order, depending on what word I type in, when running the code. The code has no errors, but when I run it, when I type in a word, it only plays the first audio file needed (Example: When I type in "buy" it requires these two sounds: "b" and "ie" played together), but it only plays the first sound, "b", and sometimes no sound at all.
Why isn't it playing multiple audio files? I know that lots of people have been having this issue, but haven't been able to solve it.
Thank you for your help in advance, it is greatly appreciated :)

how to do speech recognition using python

i have tried
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("Speak:")
audio = r.listen(source)
try:
print("You said " + r.recognize_google(audio))
except sr.UnknownValueError:
print("Could not understand audio")
except sr.RequestError as e:
print("Could not request results; {0}".format(e))
but i didn't get the expected output it always shows could not understand the voice
I would check out:
https://pypi.org/project/SpeechRecognition/
Should be just what you need :-D

I can not use speech_recognition

Hello I have copied code below from website about python for convert my voice to text but it has a bug that i can't understand or fix. Can you guy teach me some reason.
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source)
while True:
print("said")
audio = r.listen(source)
try:
print("You said " + r.recognize_google(audio))
except LookupError:
print("i do not understand audio")
enter image description here
use this code.It worked for me in pycharm
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
print("speak into mic")
audio = r.listen(source,timeout=2)
try:
print("Transcription:" + r.recognize_google(audio))
except sr.UnknownValueError:
print("Audio Unintelligible")
except sr.RequestError as e:
print("cannot obtain results : {0}".format(e))
`

python3 OSError: [Errno -9999] Unanticipated host error with pyaudio in speech recognition AI

I am working on an AI, like Jarvis in python3. I am using the python speech_recognition module and pyaudio and everything else required acording to this page.
https://pypi.python.org/pypi/SpeechRecognition/
I have it on a raspberry pi now, before i was using my mac which was working fine. Now sometimes i get an error when running my Jarvis code on my Raspberry pi! Not always but frequetly enough to put a wrench in our progress. And not knowing when the error will come is a big problem and we need to get rid of it. Iḿ using a blue Snowball mic. Here is my code and my error if you could help, that would be great thanks!
Traceback (most recent call last):
File "/media/pi/TRAVELDRIVE/Jarvis(10.0).py", line 172, in <module>
with m as source: r.adjust_for_ambient_noise(source)
File "/usr/local/lib/python3.4/dist-packages/speech_recognition/__init__.py", line 140, in __enter__
input=True, # stream is an input stream
File "/usr/local/lib/python3.4/dist-packages/pyaudio.py", line 750, in open
stream = Stream(self, *args, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/pyaudio.py", line 441, in __init__
self._stream = pa.open(**arguments)
OSError: [Errno -9999] Unanticipated host error
Jarvis.py
#JARVIS mark 10. python 3.5.1 version
#JUST.A.RATHER.VERY.INTELEGENT.SYSTEM.
##import speech_recognition
##import datetime
##import os
##import random
##import datetime
##import webbrowser
##import time
##import calendar
from difflib import SequenceMatcher
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tokenize import PunktSentenceTokenizer
import speech_recognition as sr
import sys
from time import sleep
import os
import random
r = sr.Recognizer()
m = sr.Microphone()
#Brain functions, vocab!
what_i_should_call_someone = [""]
Good_Things = ["love","sweet","nice","happy","fun","awesome","great"]
Bad_Things = ["death","kill","hurt","harm","discomfort","rape","pain","sad","depression","depressed","angry","mad","broken","raging","rage"]
# Words that you might says in the beginning of your input, for example: "um hey where are we!?!"
Slang_Words = ["um","uh","hm","eh"]
# Put all greetings in here
Static_Greetings = ["Hey","Hi","Hello"]
# Put your AIs Name and other names just in case.
Name = ["jarvis"]
posible_answer_key_words = ["becuase","yes","no"]
Chance_that_question_was_asked_1 = 0
Chance_that_question_was_asked_2 = 0
certainty_question_was_asked = 0
Me_statment_keywords = ["you","your","yours"]
You_statment_keywords = ["i","i'm","me"]
global certainty_person_is_talking_to_me
what_i_said = ("")
Just_asked_querstion = False
the_last_thing_i_said = ("")
the_last_thing_person_said = ("")
what_person_said = ("")
what_person_said_means = [""]
what_im_about_to_say = [""]
why_im_about_to_say_it = [""]
who_im_talking_to = [""]
how_i_feel = [""]
why_do_i_feel_the_way_i_do = [""]
what_i_am_thinking = ("")
# ways to describe the nouns last said
it_pronouns = ["it","they","she","he"]
# last person place or thing described spoken or descussed!
last_nouns = [""]
# Sample of random questions so Jarvis has somthing to index to know what a question is!
Sample_Questions = ["what is the weather like","where are we today","why did you do that","where is the dog","when are we going to leave","why do you hate me","what is the Answer to question 8",
"what is a dinosour","what do i do in an hour","why do we have to leave at 6.00", "When is the apointment","where did you go","why did you do that","how did he win","why won’t you help me",
"when did he find you","how do you get it","who does all the shipping","where do you buy stuff","why don’t you just find it in the target","why don't you buy stuff at target","where did you say it was",
"when did he grab the phone","what happened at seven am","did you take my phone","do you like me","do you know what happened yesterday","did it break when it dropped","does it hurt everyday",
"does the car break down often","can you drive me home","where did you find me"
"can it fly from here to target","could you find it for me"]
Sample_Greetings = ["hey","hello","hi","hey there","hi there","hello there","hey jarvis","hey dude"]
Question_Keyword_Answer = []
Int_Question_Keywords_In_Input = []
Possible_Question_Key_Words = ["whats","what","where","when","why","isn't","whats","who","should","would","could","can","do","does","can","can","did"]
Possible_Greeting_Key_Words = ["hey","hi","hello",Name]
# In this function: Analyze the user input find out if it's (Question, Answer, Command. Etc) and what is being: Asked, Commanded, ETC.
def Analyze():
def Analyze_For_Greeting():
def Greeting_Keyword_Check():
global Possible_Greeting_Key_Words
Int_Greeting_Keywords_In_Input = []
for words in what_person_said_l_wt:
if words in Possible_Greeting_Key_Words:
Int_Greeting_Keywords_In_Input.append(words)
Amount_Greeting_Keywords = (len(Int_Greeting_Keywords_In_Input))
if Amount_Greeting_Keywords > 0:
return True
def Greeting_Sentence_Match():
for Ran_Greeting in Sample_Greetings:
Greeting_Matcher = SequenceMatcher(None, Ran_Greeting, what_person_said_l).ratio()
if Greeting_Matcher > 0.5:
print (Greeting_Matcher)
print ("Similar to Greeting: "+Ran_Greeting)
return True
Greeting_Keyword_Check()
Greeting_Sentence_Match()
#In this function: determin if the input is a question or not.
def Analyze_For_Question():
# In this function: if there is atleast one question keyword in the user input then return true.
def Question_Keyword_Check():
global Possible_Question_Key_Words
Int_Question_Keywords_In_Input = []
for words in what_person_said_l_wt:
if words in Possible_Question_Key_Words:
Int_Question_Keywords_In_Input.append(words)
Amount_Question_keywords = (len(Int_Question_Keywords_In_Input))
if Amount_Question_keywords > 0:
return True
# In this function: if the users input is simular to other sample questions, return true.
def Question_Sentence_Match():
for Ran_Question in Sample_Questions:
Question_Matcher = SequenceMatcher(None, Ran_Question, what_person_said_l).ratio()
if Question_Matcher > 0.5:
print (Question_Matcher)
print ("Similar to Question: "+Ran_Question)
return True
# In this function: if the first word of the users input is a question keyword and there is a different question keyword in the input return true.
def Question_Verb_Noun_Check():
#if you say "hey jarvis" before somthing like a question or command it will still understand
try:
for word in what_person_said_l_wt:
if word in Static_Greetings or word in Name:
print (word)
Minus_Begin_Greet1 = what_person_said_l_wt.remove(word)
print (Minus_Begin_Greet1)
return True
except IndexError:
pass
Question_Keyword_Check()
Question_Sentence_Match()
Question_Verb_Noun_Check()
if Question_Keyword_Check()==True and Question_Sentence_Match()==True and Question_Verb_Noun_Check()==True:
return True
else:
return False
# All the funtions in Analyze
Analyze_For_Greeting()
Analyze_For_Question()
Conversation=True
Conversation_Started=False
while Conversation==True:
try:
if Conversation_Started==False:
#Greeting()
Conversation_Started=True
with m as source: r.adjust_for_ambient_noise(source)
print(format(r.energy_threshold))
print("Say something!") # just here for now and testing porposes so we know whats happening
with m as source: audio = r.listen(source)
print("Got it! Now to recognize it...")
try:
# recognize speech using Google Speech Recognition
value = r.recognize_google(audio)
# we need some special handling here to correctly print unicode characters to standard output
if str is bytes: # this version of Python uses bytes for strings (Python 2)
print(u"You said {}".format(value).encode("utf-8"))
else: # this version of Python uses unicode for strings (Python 3+)
print("You said {}".format(value))
what_person_said_l = value.lower()
what_person_said_l_wt = word_tokenize(what_person_said_l)
Analyze()
except sr.UnknownValueError:
print ("what was that?")
except sr.RequestError as e:
print("Uh oh! Sorry sir Couldn't request results from Google Speech Recognition service; {0}".format(e))
except KeyboardInterrupt:
pass

Resources