Trasncribe ogg file using Speech SDK from Microsoft Azure - speech-to-text

I've been trying to trasncript an (.ogg) file using the SpeechSDK of cognitive services from Azure. But I can not make it work. Below is my code:
import azure.cognitiveservices.speech as speechsdk
import time
speech_key, service_region = "my-subscription", "eastus"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_config.speech_recognition_language="es-ES"
# audio_filename = "AudioTest.wav"
audio_filename = "AudioFile.ogg"
def speech_recognize_continuous_from_file():
"""performs continuous speech recognition with input from an audio file"""
# <SpeechContinuousRecognitionWithFile>
audio_config = speechsdk.audio.AudioConfig(filename=audio_filename)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
# Connect callbacks to the events fired by the speech recognizer
# speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
# speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
# speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
# speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
# </SpeechContinuousRecognitionWithFile>
if __name__ == "__main__":
speech_recognize_continuous_from_file()
The problem is when I try it with a (.wav) file it works perfectly but when I try it with the .ogg file I got the following error record
(796): 24ms SPX_THROW_HR_IF: (0x00a) = 0xa
(41): 85ms SPX_RETURN_ON_FAIL: hr = 0x47a4dbe0
SPX_RETURN_ON_FAIL: hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, 0xffffffffui32) = 0x47a4dbe0
SPX_THROW_ON_FAIL: hr = 0x47a4dbe0
Traceback (most recent call last):
File "c:\Users\jramirezs\Documents\VisualStudioCode\Testing5.py", line 44, in <module>
speech_recognize_continuous_from_file()
File "c:\Users\jramirezs\Documents\VisualStudioCode\Testing5.py", line 36, in speech_recognize_continuous_from_file
speech_recognizer.start_continuous_recognition()
File "C:\Python64bit\lib\site-packages\azure\cognitiveservices\speech\speech.py", line 404, in start_continuous_recognition
return self._impl.start_continuous_recognition()
File "C:\Python64bit\lib\site-packages\azure\cognitiveservices\speech\speech_py_impl.py", line 3679, in start_continuous_recognition
return _speech_py_impl.SpeechRecognizer_start_continuous_recognition(self)
RuntimeError: Exception with an error code: 0xa (SPXERR_INVALID_HEADER)
[CALL STACK BEGIN]
> CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- 00007FFFF2B50BAF (SymFromAddr() error: Se ha intentado tener acceso a una direcci�n no v�lida.)
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- o_exp
- BaseThreadInitThunk
- RtlUserThreadStart
[CALL STACK END]
Any help will be appreciate, Thanks a lot

#juanferrs.
Currently in SpeechSDK we do not support compressed input for python language. We have it only for C#, Java, C++ and ObjectiveC.
It is in the plan to support compressed input for python language. Please subscribe to https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/releasenotes for the next release.

Related

RuntimeWarning , RuntimeError (Python Al Chat Bot on Discord Server)

My Aim: Be able to integrate Al Chatbot and Discord
import nltk
nltk.download('punkt')
from nltk.stem.lancaster import LancasterStemmer
stemmer=LancasterStemmer()
import numpy
import tflearn
import tensorflow
import random
import json
import pickle
import nest_asyncio
import asyncio
#---------------------------------------------------------------------------
import discord
import os
with open("intents.json") as file:
data=json.load(file)
print(data['intents'])
try:
with open("data.pickle","rb") as f:
words,labels,training,output=pickle.load(f)
except:
words=[]
labels=[]
docs_x=[]
docs_y=[]
for intent in data['intents']:
for pattern in intent['patterns']:
wrds=nltk.word_tokenize(pattern)
words.extend(wrds)
docs_x.append(wrds)
docs_y.append(intent["tag"])
if intent["tag"] not in labels:
labels.append(intent["tag"])
#remove duplicate
words=[stemmer.stem(w.lower()) for w in words if w != "?"]
words=sorted(list(set(words)))
labels=sorted(labels)
training=[]
output=[]
out_empty=[0 for _ in range(len(labels))]
for x, doc in enumerate(docs_x):
bag=[]
wrds=[stemmer.stem(w) for w in doc]
for w in words:
if w in wrds:
bag.append(1)
else:
bag.append(0)
output_row=out_empty[:]
output_row[labels.index(docs_y[x])]=1
training.append(bag)
output.append(output_row)
training=numpy.array(training)
output=numpy.array(output)
with open("data.pickle","wb") as f:
pickle.dump((words,labels,training,output),f)
tensorflow.compat.v1.reset_default_graph()
net=tflearn.input_data(shape=[None,len(training[0])])
net=tflearn.fully_connected(net,16)
net=tflearn.fully_connected(net,16)
net=tflearn.fully_connected(net,len(output[0]),activation="softmax")
net=tflearn.regression(net)
model=tflearn.DNN(net)
model.fit(training, output,n_epoch=10000,batch_size=16,show_metric=True )
model.save('C:/Users/Desktop/chatbot/model/model.tflearn')
model.load('C:/Users/Desktop/chatbot/model/model.tflearn')
def bag_of_words(s,words):
bag=[0 for _ in range(len(words))]
s_words=nltk.word_tokenize(s)
s_words=[stemmer.stem(word.lower()) for word in s_words]
for se in s_words:
for i,w in enumerate(words):
if w==se:
bag[i]=1
return numpy.array(bag)
def chat():
print("start talking with the bot (type quit to stop!")
while True:
inp=input("You:")
if inp.lower()=="quit":
break
results= model.predict([bag_of_words(inp,words)])[0]
# print("results:",results)
results_index=numpy.argmax(results)
if results[results_index]>0.7:
tag=labels[results_index]
print("tag:", tag)
for tg in data["intents"]:
if tg["tag"]==tag:
responses=tg['responses']
client=discord.Client() #FOR DISCORD--------------------------------------
async def on_message(message):
if inp.author == client.user:
return
if inp.content.startswith("$M-bot"):
response=responses.request(inp.content[7:])
await asyncio.sleep(5)
await inp.channel.send(response)
on_message(inp)
client.run("API KEY TAKEN FROM DISCORD for BOT")
print("Bot:",random.choice(responses))
else:
print("I didn't get that. Please try again")
chat()
Warnings and Errors (Pyconsole):
start talking with the bot (type quit to stop!
You:hello
tag: greeting
C:/Users/Desktop/chatbot/chatbot.py:154: RuntimeWarning: coroutine 'chat.<locals>.on_message' was never awaited
on_message(inp)
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
Traceback (most recent call last):
File "F:\Anaconda\lib\site-packages\discord\client.py", line 713, in run
loop.run_forever()
File "F:\Anaconda\lib\asyncio\base_events.py", line 560, in run_forever
self._check_running()
File "F:\Anaconda\lib\asyncio\base_events.py", line 552, in _check_running
raise RuntimeError('This event loop is already running')
RuntimeError: This event loop is already running
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:\Anaconda\lib\site-packages\discord\client.py", line 90, in _cleanup_loop
_cancel_tasks(loop)
File "F:\Anaconda\lib\site-packages\discord\client.py", line 75, in _cancel_tasks
loop.run_until_complete(asyncio.gather(*tasks, return_exceptions=True))
File "F:\Anaconda\lib\asyncio\base_events.py", line 592, in run_until_complete
self._check_running()
File "F:\Anaconda\lib\asyncio\base_events.py", line 552, in _check_running
raise RuntimeError('This event loop is already running')
RuntimeError: This event loop is already running
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Desktop/chatbot/chatbot.py", line 162, in <module>
chat()
File "C:/Users/Desktop/chatbot/chatbot.py", line 155, in chat
client.run("API KEY TAKEN FROM DISCORD for BOT")
File "F:\Anaconda\lib\site-packages\discord\client.py", line 719, in run
_cleanup_loop(loop)
File "F:\Anaconda\lib\site-packages\discord\client.py", line 95, in _cleanup_loop
loop.close()
File "F:\Anaconda\lib\asyncio\selector_events.py", line 89, in close
raise RuntimeError("Cannot close a running event loop")
RuntimeError: Cannot close a running event loop
PROBLEM: Hello Friends, I'm trying to make a chatbot that works on discord and can give its answers through the artificial intelligence model I built, but I am getting a RuntimeWarning: Enable tracemalloc to get the object allocation traceback and RuntimeError: This event loop is already running How can I solve these?
Your error is because you keep reinitiating discord.Client. In every program, there should be only one instance of discord.Client. If you want to make it spit out the last response, you should move client out of the loop. Set the bot's response to a global variable and have the bot spit out the global variable when a command is sent
arrangements:
import nltk
nltk.download('punkt')
from nltk.stem.lancaster import LancasterStemmer
stemmer=LancasterStemmer()
import numpy
import tflearn
import tensorflow
import random
import json
import pickle
import nest_asyncio
import asyncio
#-------------------------------------------------------------------------------
import discord
import os
with open("intents.json") as file:
data=json.load(file)
print(data['intents'])
client=discord.Client() #OUT OF LOOP
#client.event #LISTEN EVENTS
async def on_message(message):
if message.author == client.user:
return
if message.content.startswith("$M-bot"):
response=responses.request(message.content[7:])
await message.channel.send(response)
try:
with open("data.pickle","rb") as f:
words,labels,training,output=pickle.load(f)
except:
words=[]
labels=[]
docs_x=[]
docs_y=[]
for intent in data['intents']:
for pattern in intent['patterns']:
wrds=nltk.word_tokenize(pattern)
words.extend(wrds)
docs_x.append(wrds)
docs_y.append(intent["tag"])
if intent["tag"] not in labels:
labels.append(intent["tag"])
#remove duplicate
words=[stemmer.stem(w.lower()) for w in words if w != "?"]
words=sorted(list(set(words)))
labels=sorted(labels)
training=[]
output=[]
out_empty=[0 for _ in range(len(labels))]
for x, doc in enumerate(docs_x):
bag=[]
wrds=[stemmer.stem(w) for w in doc]
for w in words:
if w in wrds:
bag.append(1)
else:
bag.append(0)
output_row=out_empty[:]
output_row[labels.index(docs_y[x])]=1
training.append(bag)
output.append(output_row)
training=numpy.array(training)
output=numpy.array(output)
with open("data.pickle","wb") as f:
pickle.dump((words,labels,training,output),f)
tensorflow.compat.v1.reset_default_graph()
net=tflearn.input_data(shape=[None,len(training[0])])
net=tflearn.fully_connected(net,16)
net=tflearn.fully_connected(net,16)
net=tflearn.fully_connected(net,len(output[0]),activation="softmax")
net=tflearn.regression(net)
model=tflearn.DNN(net)
model.fit(training, output,n_epoch=5000,batch_size=16,show_metric=True )
model.save('C:/Users/Desktop/chatbot/model/model.tflearn')
model.load('C:/Users/Desktop/chatbot/model/model.tflearn')
def bag_of_words(s,words):
bag=[0 for _ in range(len(words))]
s_words=nltk.word_tokenize(s)
s_words=[stemmer.stem(word.lower()) for word in s_words]
for se in s_words:
for i,w in enumerate(words):
if w==se:
bag[i]=1
return numpy.array(bag)
def chat():
global responses #GLOBAL VARIABLES
global inp #GLOBAL VARIABLES
print("start talking with the bot (type quit to stop!")
while True:
inp=input("You:")
if inp.lower()=="quit":
break
results= model.predict([bag_of_words(inp,words)])[0]
# print("results:",results)
results_index=numpy.argmax(results)
if results[results_index]>0.7:
tag=labels[results_index]
print("tag:", tag)
for tg in data["intents"]:
if tg["tag"]==tag:
responses=tg['responses']
print("Bot:",random.choice(responses))
else:
print("I didn't get that. Please try again")
chat()
client.run("API KEY")

Call to Pythons mutliprocessing.Process.join() fails

When I run the code below it occasionally fails
time_start = time.time()
job = multiprocessing.Process(target=load_cpu, args=(deadline, ))
job.start() # This is line 37 in the source code linked below
# timeout=None in the call to join() solves the problem
job.join(deadline)
elapsed = time.time()-time_start
if elapsed < deadline and job.is_alive():
# I am getting here from time to time
logger.error(f"#{job_counter}: job.join() returned while process {job.pid} is still alive elapsed={elapsed} deadline={deadline}")
The Python 3.7 container (Docker) demonstrating the problem is here https://github.com/larytet-py/multiprocess
If I run the code for a few minutes on a 4 cores Ubuntu 18.04 host I get
Traceback (most recent call last):
File "/usr/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/usr/lib/python3.7/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "main.py", line 37, in spawn_job
job.start()
File "/usr/lib/python3.7/multiprocessing/process.py", line 111, in start
_cleanup()
File "/usr/lib/python3.7/multiprocessing/process.py", line 56, in _cleanup
if p._popen.poll() is not None:
AttributeError: 'NoneType' object has no attribute 'poll'
What am I doing wrong?
My workaround is to replace the call to job.join() with a polling and check is_alive(). Unfortunately this approach impacts latency. Are there better alternatives?
def join_process(job, timeout):
time_start = time.time()
# Typical processing time is 100ms I want to reduce latency impact
# 10ms looks ok.
# TODO I can end up in a tight loop here.
polling_time = min(0.1*timeout, 0.010)
while time.time()-time_start < timeout and job.is_alive():
time.sleep(polling_time)
continue
Update. I tried multiprocessing.Event() instead of Process.join() The code fails with the same exception
Update2. I have reproduced the problem in the code not calling Process.join() at all. It require more time and more load, but eventually Process.start() crashes.
Update3. https://bugs.python.org/issue40860 is accepted? I am still looking for a workaround.
Synchronizing the call to Process.start() helps. This is a fair workaround. There no other answers. I am accepting my own answer.
diff --git a/main.py b/main.py
index d09dc53..49d68f0 100644
--- a/main.py
+++ b/main.py
## -26,17 +26,24 ## def load_cpu(deadline):
while time.time() - start < 0.2*deadline:
math.pow(random.randint(0, 1), random.randint(0, 1))
+def join_process(job, timeout):
+ time_start = time.time()
+ while time.time()-time_start < timeout and job.is_alive():
+ time.sleep(0.1 * timeout)
+ continue
+
job_counter = 0
+lock = threading.Lock()
def spawn_job(deadline):
'''
Creat a new Process, call join(), process errors
'''
global job_counter
time_start = time.time()
- job = multiprocessing.Process(target=load_cpu, args=(deadline, ))
- job.start()
- # timeout=None in the call to join() solves the problem
- job.join(deadline)
+ with lock:
+ job = multiprocessing.Process(target=load_cpu, args=(deadline, ))
+ job.start()
+ join_process(job, deadline)
My final version uses os.fork(). I drop multiprocessing altogether. The multiprocessing is not thread safe (I am kidding not) https://gist.github.com/larytet/3ca9f9a32b1dc089a24cb7011455141f

Cloud Video Intelligence API error 400 & 504

When I tried to call Cloud Video Intelligence API to detec subtitle in local video file.It always returned error 400 or 504, but use gas is fine.I have tried to adjusted timeout in Cloud Video Intelligence config but it still show error 400 with invalid argument.
this is my python code for detecting video subtitle:
"""This application demonstrates detection subtitles in video using the Google Cloud API.
Usage Examples:
use video in google cloud storge:
python analyze.py text_gcs gs://"video path"
use video in computer:
python analyze.py text_file video.mp4
"""
import argparse
import io
from google.cloud import videointelligence
from google.cloud.videointelligence import enums
def video_detect_text_gcs(input_uri):
# [START video_detect_text_gcs]
"""Detect text in a video stored on GCS."""
from google.cloud import videointelligence
video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]
config = videointelligence.types.TextDetectionConfig(language_hints=["zh-TW","en-US"])
video_context = videointelligence.types.VideoContext(text_detection_config=config)
operation = video_client.annotate_video(input_uri=input_uri, features=features, video_context=video_context)
print("\nSubtitle detecting......")
result = operation.result(timeout=300)
# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]
subtitle_data=[ ]
for text_annotation in annotation_result.text_annotations:
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
frame = text_segment.frames[0]
vertex=frame.rotated_bounding_box.vertices[0]
if text_segment.confidence > 0.95 and vertex.y >0.7:
lists=[text_annotation.text,start_time.seconds+ start_time.nanos * 1e-9,vertex.y]
subtitle_data=subtitle_data+[lists]
length=len(subtitle_data)
subtitle_sort=sorted(subtitle_data,key = lambda x: (x[1],x[2]))
i=0
subtitle=[ ]
while i<length :
subtitle=subtitle+[subtitle_sort[i][0]]
i=i+1
with open("subtitle.txt",mode="w",encoding="utf-8") as file:
for x in subtitle:
file.write(x+'\n')
def video_detect_text(path):
# [START video_detect_text]
"""Detect text in a local video."""
from google.cloud import videointelligence
video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]
video_context = videointelligence.types.VideoContext()
with io.open(path, "rb") as file:
input_content = file.read()
operation = video_client.annotate_video(
input_content=input_content, # the bytes of the video file
features=features,
video_context=video_context
)
print("\nSubtitle detecting......")
result = operation.result(timeout=300)
# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]
subtitle_data=[ ]
for text_annotation in annotation_result.text_annotations:
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
frame = text_segment.frames[0]
vertex=frame.rotated_bounding_box.vertices[0]
if text_segment.confidence > 0.95 and vertex.y >0.7:
lists=[text_annotation.text,start_time.seconds+ start_time.nanos * 1e-9,vertex.y]
subtitle_data=subtitle_data+[lists]
length=len(subtitle_data)
subtitle_sort=sorted(subtitle_data,key = lambda x: (x[1],x[2]))
i=0
subtitle=[ ]
while i<length :
subtitle=subtitle+[subtitle_sort[i][0]]
i=i+1
with open("subtitle.txt",mode="w",encoding="utf-8") as file:
for x in subtitle:
file.write(x+'\n')
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(dest="command")
detect_text_parser = subparsers.add_parser(
"text_gcs", help=video_detect_text_gcs.__doc__
)
detect_text_parser.add_argument("path")
detect_text_file_parser = subparsers.add_parser(
"text_file", help=video_detect_text.__doc__
)
detect_text_file_parser.add_argument("path")
args = parser.parse_args()
if args.command == "text_gcs":
video_detect_text_gcs(args.path)
if args.command == "text_file":
video_detect_text(args.path)
This is error report:
Ghuang#/Users/Ghuang/Documents/GitHub/Video-subtitles-detection$ python3 analyze.py text_file video.mp4
Traceback (most recent call last):
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/grpc_helpers.py", line 57, in error_remapped_callable
return callable_(*args, **kwargs)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/grpc/_channel.py", line 826, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/grpc/_channel.py", line 729, in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.DEADLINE_EXCEEDED
details = "Deadline Exceeded"
debug_error_string = "{"created":"#1587691109.677447000","description":"Error received from peer ipv4:172.217.24.10:443","file":"src/core/lib/surface/call.cc","file_line":1056,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "analyze.py", line 144, in <module>
video_detect_text(args.path)
File "analyze.py", line 90, in video_detect_text
video_context=video_context
File "/Library/Python/3.7/site-packages/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client.py", line 303, in annotate_video
request, retry=retry, timeout=timeout, metadata=metadata
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/gapic_v1/method.py", line 143, in __call__
return wrapped_func(*args, **kwargs)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/retry.py", line 286, in retry_wrapped_func
on_error=on_error,
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/retry.py", line 184, in retry_target
return target()
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/timeout.py", line 214, in func_with_timeout
return func(*args, **kwargs)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/grpc_helpers.py", line 59, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.DeadlineExceeded: 504 Deadline Exceeded

How do I not get OSError: [Errno -9993] Illegal combination of I/O devices when using pyaudio

I believe it's this module that's crashing my program and throwing this error: OSError: [Errno -9993] Illegal combination of I/O devices. I wrapped the call to this module in a try except SystemExit block, and it still crashed. Any ideas how to fix this will be greatly appreciated.
I'm running on Ubuntu 18.04 in a Conda virtual environment using Python 3.6.10.
###############################################################################################
######## STT SPEECH TO TEXT FUNCTION THAT RETURNS THE VARIABLE: command
import pyaudio
from vosk import Model, KaldiRecognizer
def myCommand():
# "listens for commands"
# We imported vosk up above.
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
stream.start_stream()
model = Model("model-en")
rec = KaldiRecognizer(model, 16000)
while True:
data = stream.read(2000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
#print(rec.Result())
# I commented out this line and added the 3 lines below
myResult = rec.Result()
myList = myResult.split("text")
command = myList[1]
return command
######## END STT SPEECH TO TEXT FUNCTION THAT RETURNS THE VARIABLE: command
###############################################################################################
❯ pip list
Package Version
beautifulsoup4 4.9.0
certifi 2020.4.5.1
chardet 3.0.4
click 7.1.1
future 0.18.2
gTTS 2.1.1
gTTS-token 1.1.3
idna 2.9
isort 4.3.21
lazy-object-proxy 1.4.3
mccabe 0.6.1
mock 4.0.1
MouseInfo 0.1.3
mypy 0.770
mypy-extensions 0.4.3
numpy 1.18.1
Pillow 7.1.1
pip 20.0.2
psutil 5.7.0
PyAudio 0.2.11
PyAutoGUI 0.9.50
PyGetWindow 0.0.8
pylint 2.4.4
PyMsgBox 1.0.7
pyperclip 1.8.0
PyQt5 5.14.2
PyQt5-sip 12.7.2
PyRect 0.1.4
PyScreeze 0.1.26
python3-xlib 0.15
PyTweening 1.0.3
requests 2.23.0
setuptools 45.2.0
six 1.14.0
soupsieve 2.0
subprocess.run 0.0.8
typed-ast 1.4.1
typing-extensions 3.7.4.1
urllib3 1.25.9
vosk 0.3.3
wheel 0.34.2
wikipedia 1.4.0
wrapt 1.11.2
#
I'm also running this module, and I'm not 100% sure which is causing the problem, although I suspect the first:
###############################################################################################
######## TTS TEXT TO SPEECH FUNCTION
# This gets used all over to speak text aloud.
# It also prints to the console for people with bad memories.
from gtts import gTTS
import os
def talkToMe(mytext):
# "speaks audio passed as argument"
print(mytext)
# can handle multiline text.
#for line in mytext.splitlines():
# uses the google text to speech module to synthesize text
text_to_speech = gTTS(text=mytext, lang='en-uk')
# saves syntesized speech to audio.mp3
# this file gets written, played. and overwritten
# over and over again.
text_to_speech.save('audio.mp3')
# the sox modules wrapper is mpg123.
# This is called by the operating system imported os module.
os.system('mpg123 -q audio.mp3')
###############################################################################################
######## END TTS TEXT TO SPEECH FUNCTION
#
Here's what I'm seeing as error:
LOG (vosk[5.5.641~1-79319]:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (vosk[5.5.641~1-79319]:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (vosk[5.5.641~1-79319]:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 1 orphan nodes.
LOG (vosk[5.5.641~1-79319]:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 2 orphan components.
LOG (vosk[5.5.641~1-79319]:Collapse():nnet-utils.cc:1472) Added 1 components, removed 2
LOG (vosk[5.5.641~1-79319]:CompileLooped():nnet-compile-looped.cc:345) Spent 0.028789 seconds in looped compilation.
ALSA lib pulse.c:243:(pulse_connect) PulseAudio: Unable to connect: Connection terminated
Expression 'ret' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 1735
Expression 'AlsaOpen( &alsaApi->baseHostApiRep, params, streamDir, &self->pcm )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 1902
Expression 'PaAlsaStreamComponent_Initialize( &self->capture, alsaApi, inParams, StreamDirection_In, NULL != callback )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 2166
Expression 'PaAlsaStream_Initialize( stream, alsaHostApi, inputParameters, outputParameters, sampleRate, framesPerBuffer, callback, streamFlags, userData )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 2835
Traceback (most recent call last):
File "Juliet.py", line 376, in <module>
main()
File "Juliet.py", line 366, in main
output = mycommand.myCommand()[3:]
File "/home/bard/Code/Juliet/SpeakAndHear/mycommand.py", line 10, in myCommand
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
File "/home/bard/miniconda3/envs/Juliet/lib/python3.6/site-packages/pyaudio.py", line 750, in open
stream = Stream(self, *args, **kwargs)
File "/home/bard/miniconda3/envs/Juliet/lib/python3.6/site-packages/pyaudio.py", line 441, in __init__
self._stream = pa.open(**arguments)
OSError: [Errno -9993] Illegal combination of I/O devices
I've solved this. I needed to close my stream and terminate the process:
###############################################################################################
######## STT SPEECH TO TEXT FUNCTION THAT RETURNS THE VARIABLE: command
import pyaudio
from vosk import Model, KaldiRecognizer
def myCommand():
# "listens for commands"
# We imported vosk up above.
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
stream.start_stream()
model = Model("model-en")
rec = KaldiRecognizer(model, 16000)
while True:
data = stream.read(2000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
#print(rec.Result())
# I commented out this line and added the 3 lines below
myResult = rec.Result()
myList = myResult.split("text")
command = myList[1]
stream.stop_stream()
stream.close()
p.terminate()
return command
######## END STT SPEECH TO TEXT FUNCTION THAT RETURNS THE VARIABLE: command
###############################################################################################

How to deal with “OSError: [Errno -9999] Unanticipated host error”?

I am relatively new to Python. I am trying to create a program to open youtube. Here the user is supposed to say "open youtube" and python should open it in the browser. However, I am continuously getting OS error in the below code. When I tried the same program on my friend's PC, error surprisingly disappeared! But it persisted in my system for some reason. On thorough study, I found the issue is with the line "audio = r.listen(source)", where the code returns an error. Kindly help. ( I have checked my microphone, no issues seem to be there).
import pyttsx3 #pip install pyttsx3
import speech_recognition as sr #pip install speechRecognition
import webbrowser
import os
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
# print(voices[1].id)
engine.setProperty('voice', voices[0].id)
def takeCommand():
#It takes microphone input from the user and returns string output
r = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
r.pause_threshold = 0.5
audio = r.listen(source)
try:
print("Recognizing...")
query = r.recognize_google(audio, language='en-in')
print(f"User said: {query}\n")
except Exception as e:
# print(e)
print("Say that again please...")
return "None"
return query
while True:
query = takeCommand().lower()
if 'open youtube' in query:
webbrowser.open("youtube.com") #end of code
I am getting the following error message:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-15-a53e26f9d26c> in <module>
32
33 while True:
---> 34 query = takeCommand().lower()
35
36 if 'open youtube' in query:
<ipython-input-15-a53e26f9d26c> in takeCommand()
14
15 r = sr.Recognizer()
---> 16 with sr.Microphone() as source:
17 print("Listening...")
18 r.pause_threshold = 0.5
c:\users\it\appdata\local\programs\python\python37\lib\site-packages\speech_recognition\__init__.py in __enter__(self)
139 input_device_index=self.device_index, channels=1,
140 format=self.format, rate=self.SAMPLE_RATE, frames_per_buffer=self.CHUNK,
--> 141 input=True, # stream is an input stream
142 )
143 )
c:\users\it\appdata\local\programs\python\python37\lib\site-packages\pyaudio.py in open(self, *args, **kwargs)
748 """
749
--> 750 stream = Stream(self, *args, **kwargs)
751 self._streams.add(stream)
752 return stream
c:\users\it\appdata\local\programs\python\python37\lib\site-packages\pyaudio.py in __init__(self, PA_manager, rate, channels, format, input, output, input_device_index, output_device_index, frames_per_buffer, start, input_host_api_specific_stream_info, output_host_api_specific_stream_info, stream_callback)
439
440 # calling pa.open returns a stream object
--> 441 self._stream = pa.open(**arguments)
442
443 self._input_latency = self._stream.inputLatency
OSError: [Errno -9999] Unanticipated host error

Resources