Related
I want to record the audio and get an audiofile while using Speech Recognition. For some reason my program always crashes after a few moments. It also does not come to creating the audiofile.
I suspect there is a problem with using threads as both processes worked fine on their own. Unfortunately I could not find anything.
Does anyone have an idea how I can fix this or how I can use Speech Recognition while recording sound?
import threading
import speech_recognition as sr
import pyaudio
import wave
import time
status = True
def record():
chunk = 1024
sample_format = pyaudio.paInt16
channels = 1
fs = 44100
filename = 'output.wav'
global status
p = pyaudio.PyAudio()
print('Recording')
stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
input=True)
frames = []
while status == True:
data = stream.read(chunk)
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
print('Finished recording')
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
def get_audio():
while True:
r = sr.Recognizer()
with sr.Microphone() as source:
print("Höre zu ...")
audio = r.listen(source)
said = ''
try:
said = r.recognize_google(audio, language="de_DE")
print(said)
except Exception as e:
print('')
thread1=threading.Thread(target=record)
thread1.start()
thread2=threading.Thread(target=get_audio)
thread2.start()
time.sleep(5)
status=False
You can record and save sound with Speech Recognition. Just use this part of the code and it will create a speech.wav file:
def get_audio():
while True:
r = sr.Recognizer()
with sr.Microphone() as source:
print("Höre zu ...")
audio = r.listen(source)
with open('speech.wav', 'wb') as f:
f.write(audio.get_wav_data())
try:
said = r.recognize_google(audio, language="de_DE")
print(said)
except Exception as e:
print('')
I am trying to build a program for my IT course. The point of the program is to have a client app to send commands to the server. It seemd to work pretty well until today where, after a few calls, when I receive a response from the server it is not up to date.
eg : I send a few commands that all work fine. But then send another command and receive the response from the previous one.
I checked the command sent by the client and it is the one I type and in the server part, when I receive a command from the client it is the one actually sent by the client (not the previous one)
Here is the Shell classes (in the server and client) that I use to send and receive messages aswell as an example on how I use it.
Server :
class Shell:
command = ""
next_command = True
def __init__(self, malware_os):
self._os = malware_os
self._response = ""
def receive(self):
self.command = distant_socket.recv(4096).decode("utf-8")
def execute_command(self):
if self.command[:2] == "cd":
os.chdir(self.command[3:])
if self._os == "Windows":
self.result = Popen("cd", shell=True, stdout=PIPE)
else:
self.result = Popen("pwd", shell=True, stdout=PIPE)
else:
self.result = Popen(self.command, shell=True, stdout=PIPE)
self._response = self.result.communicate()
def send(self):
self._response = self._response[0]
self._response = self._response.decode("utf-8", errors="ignore")
self._response = self._response + " "
self._response = self._response.encode("utf-8")
distant_socket.send(self._response)
self._response = None
Use in server :
shell.receive()
shell.execute_command()
shell.send()
Client :
class Shell:
def __init__(self):
self._history = []
self._command = ""
def send(self):
self._history.append(self._command)
s.send(self._command.encode("utf-8"))
def receive(self):
content = s.recv(4096).decode("utf-8", errors="ignore")
if content[2:] == "cd":
malware_os.chdir(self._command[3:].decode("utf-8", errors="ignore"))
print(content)
def history(self):
print("The history of your commands is:")
print("----------------------")
for element in self._history:
print(element)
def get_command(self):
return self._command
def set_command(self, command):
self._command = command
Use in client :
shell.set_command(getinfo.get_users())
shell.send()
shell.receive()
Thank you in advance for your help,
Cordially,
Sasquatch
Since you said the response is not up to date, I'm guessing you used TCP (you didn't post the socket creation). Like the comment mentioned, there are 2 things that you aren't doing right:
Protocol: TCP gives you a stream, which is divided as the OS sees fit into packets. When transferring data over the network, the receiving end must know when it has a complete transmission. The easiest way to do that would be to send the length of the transmission, in a fixed format (say 4 bytes, big endian), before the transmission itself. Also, use sendall. For example:
import struct
def send_message(sock, message_str):
message_bytes = message_str.encode("utf-8")
size_prefix = struct.pack("!I", len(message_bytes)) # I means 4 bytes integer in big endian
sock.sendall(size_prefix)
sock.sendall(message_bytes)
Since TCP is a stream socket, the receiving end might return from recv before the entire message was received. You need to call it in a loop, checking the return value at every iteration to correctly handle disconnects. Something such as:
def recv_message_str(sock):
#first, get the message size, assuming you used the send above
size_buffer = b""
while len(size_buffer) != 4:
recv_ret = sock.recv(4 - len(size_buffer))
if len(recv_ret) == 0:
# The other side disconnected, do something (raise an exception or something)
raise Exception("socket disconnected")
size_buffer += recv_ret
size = struct.unpack("!I", size_buffer)[0]
# Loop again, for the message string
message_buffer = b""
while len(message_buffer) != size:
recv_ret = sock.recv(size - len(message_buffer))
if len(recv_ret) == 0:
# The other side disconnected, do something (raise an exception or something)
raise Exception("socket disconnected")
message_buffer += recv_ret
return message_buffer.decode("utf-8", errors="ignore")
I want to send an image (.pgm) via TCP as soon as it is written to the ramdisk. For this I'm working with pyinotify and sockets. After the picture is sent I would like to tell the server to stop now.
Everything works fine but the last part gives me following error:
if data.decode('utf-8') == 'stop': UnicodeDecodeError: 'utf-8' codec can't
decode byte 0x88 in position 319: invalid start byte
Client:
import pyinotify
import socket
import traceback
import sys
class ModHandler(pyinotify.ProcessEvent):
def __init__(self, socket, buffer_size):
self.socket = socket
self.buffer_size = buffer_size
def process_IN_CLOSE_WRITE(self, event):
try:
self.socket.send(bytes(event.pathname, encoding='utf-8'))
file = open(event.pathname, "rb")
line = file.read(self.buffer_size)
while(line):
self.socket.send(line)
line = file.read(self.buffer_size)
except Exception:
traceback.print_exc()
finally:
try:
self.socket.send(bytes('stop', encoding='utf-8'))
print("done")
file.close
except Exception:
traceback.print_exc()
class TCPStream():
def __init__(self, ip, port, buffer_size):
self.ip = ip
self.port = port
self.buffer_size = buffer_size
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
self.socket.connect((self.ip, self.port))
except Exception:
traceback.print_exc()
def __del__(self):
try:
self.socket.close()
except Exception:
traceback.print_exc()
stream = TCPStream('127.0.0.1', 5005, 1024)
handler = ModHandler(stream.socket, stream.buffer_size)
wm = pyinotify.WatchManager()
notifier = pyinotify.Notifier(wm, handler)
wd_value = wm.add_watch("/media/ram_disk", pyinotify.IN_CLOSE_WRITE)
if wd_value["/media/ram_disk"] <= 0:
print("can't add watchmanager to the ram_disk... insufficient
authorization? another watchmanager already running?")
sys.exit(0)
notifier.loop()
Server:
import socket
TCP_IP = '127.0.0.1'
TCP_PORT = 5005
BUFFER_SIZE = 1024
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((TCP_IP, TCP_PORT))
s.listen(1)
conn, addr = s.accept()
print("connection address: ", addr)
path = conn.recv(BUFFER_SIZE).decode('utf-8')
filename = path.split("/")
with open(filename[3], 'wb') as f:
data = conn.recv(BUFFER_SIZE)
while data:
print("receiving...")
f.write(data)
data = conn.recv(BUFFER_SIZE)
if not data:
break
if data.decode('utf-8') == 'stop':
f.close()
print("done")
break
conn.close()
The goal is to have a constant TCP stream of images written to the ramdisk. Therefore I wanted to communicate via Bytes with the server to tell him what to do. It seems that after the first picture gets transmitted it breaks somehow. Any help is appreciated!
What if four consecutive bytes in your image at the beginning of a buffer happen to match the ASCII (and UTF-8) characters s t o p? Also, how does the receiving side know when the file name ends, and the file data starts?
You should create a binary encoding that frames the various bits of your data. This makes the whole process nicely deterministic. That's often best done with the struct module's pack and unpack methods. Given a file you want to send, client side:
import os
import struct
...
pathname = event.pathname.encode('utf-8') # Encode pathname into bytes
pathname_len = len(pathname)
file = open(event.pathname, "rb")
filesize = os.path.getsize(event.filename) # Get file size
# Encode size of file name, and size of file into a binary header
header_format = struct.Struct("!II")
header = header_format.pack(pathname_len, filesize)
self.socket.sendall(header)
self.socket.sendall(pathname)
while True:
line = file.read(self.buffer_size)
if not line: break # EOF
self.socket.sendall(line)
# (Remove sending of 'stop' from finally block)
Note the use of sendall to ensure that the entire buffer gets sent (it is legal for send to send only part of a buffer but that can result in missing bytes if you don't account for it).
Server side will look something like this:
import struct
...
def recv_exactly(s, buffer_len):
""" This is the converse of sendall """
data = b''
rem_bytes = buffer_len
while rem_bytes > 0:
buf = s.recv(rem_bytes)
if not buf:
raise Exception("Received EOF in middle of block")
data += buf
rem_bytes -= len(buf)
return data
conn, addr = s.accept()
...
header_format = struct.Struct("!II")
# Receive exactly the bytes of the header
header = recv_exactly(conn, header_format.size)
pathname_len, file_len = header_format.unpack(header)
path = recv_exactly(conn, pathname_len)
filename = path.split("/")
...
rem_bytes = file_len
while rem_bytes > 0:
data = conn.recv(min(rem_bytes, BUFFER_SIZE))
if not data:
raise Exception("Received EOF in middle of file")
f.write(data)
rem_bytes -= len(data)
Another important advantage of this model is that you now have a clear notion of the boundary between one file and the next (without having a "signal value" that might appear in the data). The receiver always knows exactly how many bytes remain until the end of the current file, and the sender can simply move on to send a new header, pathname, and file without opening a new connection.
I'm using the following script to play all the WAV files in the current path. I will be modifying it to print the output of some text files. That part is easy.
Need to know how/where in the loop of playing the WAV files, where to add some code to pause/interupt the execution of the code with the keyboard.
#!/usr/bin/python3
import vlc
import time
import glob
wav_files = glob.glob("*.wav")
instance=vlc.Instance(["--no-sub-autodetect-file"])
# You should not recreate a player for each file, just reuse the same
# player
player=instance.media_player_new()
for wav in wav_files:
player.set_mrl(wav)
player.play()
playing = set([1,2,3,4])
time.sleep(5) #Give time to get going
duration = player.get_length() / 1000
mm, ss = divmod(duration, 60)
print("Playing", wav, "Length:", "%02d:%02d" % (mm,ss))
while True:
state = player.get_state()
if state not in playing:
break
continue
Steal the getch right out of vlc.py
I've added the windows option, as you didn't specify an OS.
#!/usr/bin/python3
import vlc
import time
import glob
import sys
import termios, tty
try:
from msvcrt import getch # try to import Windows version
except ImportError:
def getch(): # getchar(), getc(stdin) #PYCHOK flake
fd = sys.stdin.fileno()
old = termios.tcgetattr(fd)
try:
tty.setraw(fd)
ch = sys.stdin.read(1)
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old)
return ch
wav_files = glob.glob("*.wav")
print("Play List")
for f in wav_files:
print(f)
instance=vlc.Instance(["--no-sub-autodetect-file"])
# You should not recreate a player for each file, just reuse the same
# player
player=instance.media_player_new()
for wav in wav_files:
player.set_mrl(wav)
player.play()
playing = set([1,2,3,4])
time.sleep(1) #Give time to get going
duration = player.get_length() / 1000
mm, ss = divmod(duration, 60)
print("Playing", wav, "Length:", "%02d:%02d" % (mm,ss))
while True:
state = player.get_state()
if state not in playing:
break
k = getch()
if k in ["N","n"]:#Next
player.stop()
break
elif k in ["Q","q"]:#Quit
player.stop()
sys.exit()
break
elif k == " ":#Toggle Pause
player.pause()
else:
print("[Q - Quit, N - Next, Space - Pause]")
continue
I tried pygame for playing wav file like this:
import pygame
pygame.init()
pygame.mixer.music.load("mysound.wav")
pygame.mixer.music.play()
pygame.event.wait()
but It change the voice and I don't know why!
I read this link solutions and can't solve my problem with playing wave file!
for this solution I dont know what should I import?
s = Sound()
s.read('sound.wav')
s.play()
and for this solution /dev/dsp dosen't exist in new version of linux :
from wave import open as waveOpen
from ossaudiodev import open as ossOpen
s = waveOpen('tada.wav','rb')
(nc,sw,fr,nf,comptype, compname) = s.getparams( )
dsp = ossOpen('/dev/dsp','w')
try:
from ossaudiodev import AFMT_S16_NE
except ImportError:
if byteorder == "little":
AFMT_S16_NE = ossaudiodev.AFMT_S16_LE
else:
AFMT_S16_NE = ossaudiodev.AFMT_S16_BE
dsp.setparameters(AFMT_S16_NE, nc, fr)
data = s.readframes(nf)
s.close()
dsp.write(data)
dsp.close()
and when I tried pyglet It give me this error:
import pyglet
music = pyglet.resource.media('mysound.wav')
music.play()
pyglet.app.run()
--------------------------
nima#ca005 Desktop]$ python play.py
Traceback (most recent call last):
File "play.py", line 4, in <module>
music = pyglet.resource.media('mysound.wav')
File "/usr/lib/python2.7/site-packages/pyglet/resource.py", line 587, in media
return media.load(path, streaming=streaming)
File "/usr/lib/python2.7/site-packages/pyglet/media/__init__.py", line 1386, in load
source = _source_class(filename, file)
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 194, in __init__
format = wave_form.get_format_chunk()
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 174, in get_format_chunk
for chunk in self.get_chunks():
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 110, in get_chunks
chunk = cls(self.file, name, length, offset)
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 155, in __init__
raise RIFFFormatException('Size of format chunk is incorrect.')
pyglet.media.riff.RIFFFormatException: Size of format chunk is incorrect.
AL lib: ReleaseALC: 1 device not closed
You can use PyAudio. An example here on my Linux it works:
#!usr/bin/env python
#coding=utf-8
import pyaudio
import wave
#define stream chunk
chunk = 1024
#open a wav format music
f = wave.open(r"/usr/share/sounds/alsa/Rear_Center.wav","rb")
#instantiate PyAudio
p = pyaudio.PyAudio()
#open stream
stream = p.open(format = p.get_format_from_width(f.getsampwidth()),
channels = f.getnchannels(),
rate = f.getframerate(),
output = True)
#read data
data = f.readframes(chunk)
#play stream
while data:
stream.write(data)
data = f.readframes(chunk)
#stop stream
stream.stop_stream()
stream.close()
#close PyAudio
p.terminate()
Works for me on Windows:
https://pypi.org/project/playsound/
>>> from playsound import playsound
>>> playsound('/path/to/a/sound/file/you/want/to/play.wav')
NOTE: This has a bug in Windows where it doesn't close the stream.
I've added a PR for a fix here:
https://github.com/TaylorSMarks/playsound/pull/53/commits/53240d970aef483b38fc6d364a0ae0ad6f8bf9a0
The reason pygame changes your audio is mixer defaults to a 22k sample rate:
initialize the mixer module
pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=4096): return None
Your wav is probably 8k. So when pygame plays it, it plays roughly twice as fast. So specify your wav frequency in the init.
Pyglet has some problems correctly reading RIFF headers. If you have a very basic wav file (with exactly a 16 byte fmt block) with no other information in the fmt chunk (like 'fact' data), it works. But it makes no provision for additional data in the chunks, so it's really not adhering to the RIFF interface specification.
PyGame has 2 different modules for playing sound and music, the pygame.mixer module and the pygame.mixer.music module. This module contains classes for loading Sound objects and controlling playback. The difference is explained in the documentation:
The difference between the music playback and regular Sound playback is that the music is streamed, and never actually loaded all at once. The mixer system only supports a single music stream at once.
If you want to play a single wav file, you have to initialize the module and create a pygame.mixer.Sound() object from the file. Invoke play() to start playing the file. Finally, you have to wait for the file to play.
Use get_length() to get the length of the sound in seconds and wait for the sound to finish:
(The argument to pygame.time.wait() is in milliseconds)
import pygame
pygame.mixer.init()
my_sound = pygame.mixer.Sound('mysound.wav')
my_sound.play()
pygame.time.wait(int(my_sound.get_length() * 1000))
Alternatively you can use pygame.mixer.get_busy to test if a sound is being mixed. Query the status of the mixer continuously in a loop:
import pygame
pygame.init()
pygame.mixer.init()
my_sound = pygame.mixer.Sound('mysound.wav')
my_sound.play()
while pygame.mixer.get_busy():
pygame.time.delay(10)
pygame.event.poll()
Windows
winsound
If you are a Windows user,the easiest way is to use winsound.You don't even need to install it.
Not recommended, too few functions
import winsound
winsound.PlaySound("Wet Hands.wav", winsound.SND_FILENAME)
# add winsound.SND_ASYNC flag if you want to wait for it.
# like winsound.PlaySound("Wet Hands.wav", winsound.SND_FILENAME | winsound.SND_ASYNC)
mp3play
If you are looking for more advanced functions, you can try mp3play.
Unluckily,mp3play is only available in Python2 and Windows.
If you want to use it on other platforms,use playsound despite its poor functions.If you want to use it in Python3,I will give you the modified version which is available on Python 3.(at the bottom of the answer)
Also,mp3play is really good at playing wave files, and it gives you more choices.
import time
import mp3play
music = mp3play.load("Wet Hands.wav")
music.play()
time.sleep(music.seconds())
Cross-platform
playsound
Playsound is very easy to use,but it is not recommended because you can't pause or get some infomation of the music, and errors often occurs.Unless other ways doesn't work at all, you may try this.
import playsound
playsound.playsound("Wet Hands.wav", block=True)
pygame
I'm using this code and it works on Ubuntu 22.04 after my test.
If it doesn't work on your machine, consider updating your pygame lib.
import pygame
pygame.mixer.init()
pygame.mixer.music.load("Wet Hands.wav")
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
pass
pyglet
This works on Windows but it doesn't work on my Ubuntu, so I can do nothing.
import pyglet
import time
sound = pyglet.media.load("Wet Hands.wav", "Wet Hands.wav")
sound.play()
time.sleep(sound.duration)
Conclusion
It seems that you are using Linux,so playsound may be your choice.My code maybe cannot solve your problem by using pygame and pyglet,because I always use Windows.If none of the solutions work on your machine,I suggest you run the program on Windows...
To other users seeing my answer, I have done many tests among many libraries,so if you are using Windows,you may try mp3play which can play both mp3 and wave files, and mp3play is the most pythonic, easy, light-weight and functional library.
mp3play in Python3
just copy the code below and create a file named mp3play.py in your working directory and paste the content.
import random
from ctypes import windll, c_buffer
class _mci:
def __init__(self):
self.w32mci = windll.winmm.mciSendStringA
self.w32mcierror = windll.winmm.mciGetErrorStringA
def send(self, command):
buffer = c_buffer(255)
command = command.encode(encoding="utf-8")
errorcode = self.w32mci(command, buffer, 254, 0)
if errorcode:
return errorcode, self.get_error(errorcode)
else:
return errorcode, buffer.value
def get_error(self, error):
error = int(error)
buffer = c_buffer(255)
self.w32mcierror(error, buffer, 254)
return buffer.value
def directsend(self, txt):
(err, buf) = self.send(txt)
# if err != 0:
# print('Error %s for "%s": %s' % (str(err), txt, buf))
return err, buf
class _AudioClip(object):
def __init__(self, filename):
filename = filename.replace('/', '\\')
self.filename = filename
self._alias = 'mp3_%s' % str(random.random())
self._mci = _mci()
self._mci.directsend(r'open "%s" alias %s' % (filename, self._alias))
self._mci.directsend('set %s time format milliseconds' % self._alias)
err, buf = self._mci.directsend('status %s length' % self._alias)
self._length_ms = int(buf)
def volume(self, level):
"""Sets the volume between 0 and 100."""
self._mci.directsend('setaudio %s volume to %d' %
(self._alias, level * 10))
def play(self, start_ms=None, end_ms=None):
start_ms = 0 if not start_ms else start_ms
end_ms = self.milliseconds() if not end_ms else end_ms
err, buf = self._mci.directsend('play %s from %d to %d'
% (self._alias, start_ms, end_ms))
def isplaying(self):
return self._mode() == 'playing'
def _mode(self):
err, buf = self._mci.directsend('status %s mode' % self._alias)
return buf
def pause(self):
self._mci.directsend('pause %s' % self._alias)
def unpause(self):
self._mci.directsend('resume %s' % self._alias)
def ispaused(self):
return self._mode() == 'paused'
def stop(self):
self._mci.directsend('stop %s' % self._alias)
self._mci.directsend('seek %s to start' % self._alias)
def milliseconds(self):
return self._length_ms
def __del__(self):
self._mci.directsend('close %s' % self._alias)
_PlatformSpecificAudioClip = _AudioClip
class AudioClip(object):
__slots__ = ['_clip']
def __init__(self, filename):
self._clip = _PlatformSpecificAudioClip(filename)
def play(self, start_ms=None, end_ms=None):
if end_ms is not None and end_ms < start_ms:
return
else:
return self._clip.play(start_ms, end_ms)
def volume(self, level):
assert 0 <= level <= 100
return self._clip.volume(level)
def isplaying(self):
return self._clip.isplaying()
def pause(self):
return self._clip.pause()
def unpause(self):
return self._clip.unpause()
def ispaused(self):
return self._clip.ispaused()
def stop(self):
return self._clip.stop()
def seconds(self):
return int(round(float(self.milliseconds()) / 1000))
def milliseconds(self):
return self._clip.milliseconds()
def load(filename):
"""Return an AudioClip for the given filename."""
return AudioClip(filename)