Send audio stream from microphone to Google Speech - Javascript

Send audio stream from microphone to Google Speech - Javascript - node.js

I am trying to send microphone input at the client(nuxt) side to the node + socket.io server and then to the google speech api. I am getting stream from navigator.mediaDevices.getUserMedia({ audio: true }) and send it to back end using socket.io-stream. My client side code as follows.
import ss from 'socket.io-stream'
navigator.mediaDevices.getUserMedia({ audio: true }).then((mediaStream) => {
ss(this.$socket).emit('audio', mediaStream);
});
And my server code as follows.
const io = require('socket.io')(3555);
const ss = require('socket.io-stream');
const speech = require('#google-cloud/speech');
io.on('connection', (socket) => {
const client = new speech.SpeechClient({ keyFilename: 'key.json' });
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: true,
};
ss(socket).on('audio', (stream) => {
const recognizeStream = client.streamingRecognize(request)
.on('error', console.error)
.on('data', data => {
process.stdout.write(
data.results[0] && data.results[0].alternatives[0]
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`
);
});
stream.pipe(recognizeStream);
});
});
But this code doesn't work and display the error TypeError: stream.pipe is not a function.
Someone please point out the error or tell me a way to achieve this. Thank you!

Related

Google speech to text not working on nodejs

I have created app for speech to text converter. react frontend and nodejs API.i record audio from react and post it to nodejs.but google API result is empty.how can I fix it?
why getting always empty results?
that's my code.
ReactMic Recorder
<ReactMic
record={record}
className="sound-wave"
onStop={onStop}
onData={onData}
strokeColor="#000000"
backgroundColor="#FF4081"
mimeType="audio/wav"/>
<button onClick={startRecording} type="button">Start</button>
<button onClick={stopRecording} type="button">Stop</button>
NodeJs API
app.post('/SpeechConvert', (req, res) => {
const client = new speech.SpeechClient();
console.log(req.files.file);
req.files.file.mv('./input.wav',function (err) {
if (err) {
console.log(err);
}
})
async function speechToText() {
// The name of the audio file to transcribe
const fileData = req.files.file.data;
// Reads a local audio file and converts it to base64
const file = fs.readFileSync('input.wav');
const audioBytes = fileData.toString('base64');
// console.log(audioBytes);
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes,
};
const config = {
enableAutomaticPunctuation: true,
encoding: 'LINEAR16',
sampleRateHertz: 44100,
languageCode: 'en-US',
};
const request = {
audio: audio,
config: config,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
console.log(response);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
res.send({ 'transcription': transcription, 'msg': 'The Audio successfully converted to the text' });
}
speechToText().catch(console.error);
});
can anyone help me to fix this?

Convert mediarecorder blobs to a type that google speech to text can transcribe

I am making an app where the user browser records the user speaking and sends it to the server which then passes it on to the Google speech to the text interface. I am using mediaRecorder to get 1-second blobs which are sent to a server. On the server-side, I send these blobs over to the Google speech to the text interface. However, I am getting an empty transcriptions.
I know what the issue is. Mediarecorder's default Mime Type id audio/WebM codec=opus, which is not accepted by google's speech to text API. After doing some research, I realize I need to use ffmpeg to convert blobs to LInear16. However, ffmpeg only accepts audio FILES and I want to be able to convert BLOBS. Then I can send the resulting converted blobs over to the API interface.
server.js
wsserver.on('connection', socket => {
console.log("Listening on port 3002")
audio = {
content: null
}
socket.on('message',function(message){
// const buffer = new Int16Array(message, 0, Math.floor(data.byteLength / 2));
// console.log(`received from a client: ${new Uint8Array(message)}`);
// console.log(message);
audio.content = message.toString('base64')
console.log(audio.content);
livetranscriber.createRequest(audio).then(request => {
livetranscriber.recognizeStream(request);
});
});
});
livetranscriber
module.exports = {
createRequest: function(audio){
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
return new Promise((resolve, reject, err) =>{
if (err){
reject(err)
}
else{
const request = {
audio: audio,
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
resolve(request);
}
});
},
recognizeStream: async function(request){
const [response] = await client.recognize(request)
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
// console.log(message);
// message.pipe(recognizeStream);
},
}
client
recorder.ondataavailable = function(e) {
console.log('Data', e.data);
var ws = new WebSocket('ws://localhost:3002/websocket');
ws.onopen = function() {
console.log("opening connection");
// const stream = websocketStream(ws)
// const duplex = WebSocket.createWebSocketStream(ws, { encoding: 'utf8' });
var blob = new Blob(e, { 'type' : 'audio/wav; base64' });
ws.send(blob.data);
// e.data).pipe(stream);
// console.log(e.data);
console.log("Sent the message")
};
// chunks.push(e.data);
// socket.emit('data', e.data);
}

I wrote a similar script several years ago. However, I used a JS frontend and a Python backend instead of NodeJS. I remember using a sox transformer to transform the audio input into to an output that the Google Speech API could use.
Perhaps this might be useful for you.
https://github.com/bitnahian/speech-transcriptor/blob/9f186e5416566aa8a6959fc1363d2e398b902822/app.py#L27
TLDR:
Converted from a .wav format to .raw format using ffmpeg and sox.

Save speech to text in local using node js

I'm trying to replicate the code given at https://github.com/googleapis/nodejs-speech/blob/master/samples/recognize.js. There is no error when I run it locally. But here I'm confused on where can I see the result that is created. Is there a way that I can write the result to a file?
Here is the code.
const record = require('node-record-lpcm16');
// Imports the Google Cloud client library
const speech = require('#google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
// Create a recognize stream
const recognizeStream = client
.streamingRecognize(request)
.on('error', console.error)
.on('data', data =>
process.stdout.write(
data.results[0] && data.results[0].alternatives[0] ?
`Transcription: ${data.results[0].alternatives[0].transcript}\n` :
`\n\nReached transcription time limit, press Ctrl+C\n`
)
);
// Start recording and send the microphone input to the Speech API
record
.start({
sampleRateHertz: sampleRateHertz,
threshold: 0,
// Other options, see https://www.npmjs.com/package/node-record-lpcm16#options
verbose: false,
recordProgram: 'sox', // Try also "arecord" or "sox"
silence: '10.0',
})
.on('error', console.error)
.pipe(recognizeStream);
console.log('Listening, press Ctrl+C to stop.');
This is very confusing :(. please let me know how can I achieve this.
Thanks

It's in the "data". Please looking into the code and see how the console logs the data.
Example:
client
.recognize(request)
.then(data => {
const response = data[0];
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: `, transcription);
})

Google Speech API not working in Node.js

I am using Google Cloud Speech API in Node.js. When I send a local audio file it gives the result but when I try to send a live stream it gets stopped within a second. Can anyone help me with this?
I am using this file: recognize.js
this is the code when I pass the audio file to google speech API. here it is working fine.I am passing the audio file stored in fileName sending into google speech API
function sample() {
const projectId = 'project id';
let file = "conf.json" //google exported this for you
var speech = require('#google-cloud/speech')({
projectId: 'project id',
keyFilename: 'Speech to text-a5ff6058e586.json'
});
const fs = require('fs');
const fileName = 'C:/Users/nap1225/Downloads/audio-files/audio_001.wav';
// const fileName = 'C:/xampp/htdocs/SpeechWatson/public/audio/Us_English_Broadband_Sample_2.wav';
// const fileName = 'Sample 1.wav';
// const fileName = 'C:/Users/nap1225/Desktop/dolo.wav';
// Reads a local audio file and converts it to base64
const fileMp3 = fs.readFileSync(fileName);
const audioBytes = fileMp3.toString('base64');
const audio = {
content: audioBytes
};
const config = {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'en-US',
speechContexts: {
"phrases": ["refsum"]
}
};
const request = {
audio: audio,
config: config
};
speech.recognize(request)
.then((results) => {
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: `, transcription);
})
.catch((err) => {
console.error('ERROR:', err);
});
}
sample();
2.here is the code I am sending stream audio.when I execute this code in, it is recording for 1 second and get stopped
function streamingMicRecognize () {
// [START speech_streaming_mic_recognize]
// Imports the Google Cloud client library
const record = require('node-record-lpcm16');
const projectId = 'project id';
let file="conf.json"//google exported this for you
const speech = require('#google-cloud/speech')({
projectId: 'project id',
keyFilename: 'Speech to text-a5ff6058e586.json'
});
// Instantiates a client
//const speech = Speech();
// The encoding of the audio file, e.g. 'LINEAR16'
const encoding = 'LINEAR16';
// The sample rate of the audio file in hertz, e.g. 16000
const sampleRateHertz = 16000;
// The BCP-47 language code to use, e.g. 'en-US'
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
},
interimResults: true, // If you want interim results, set this to true
singleUtterance: false
};
// Create a recognize stream
const recognizeStream = speech.streamingRecognize(request)
.on('error', console.error)
.on('data', (data) =>
process.stdout.write(
(data.results[0] && data.results[0].alternatives[0])
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`));
// Start recording and send the microphone input to the Speech API
record
.start({
sampleRateHertz: sampleRateHertz,
threshold: 20,
// Other options, see https://www.npmjs.com/package/node-record-lpcm16#options
verbose: false,
recordProgram: 'sox', // Try also "arecord" or "sox"
silence: '10.0',
device : 'plughw:0'
// device : settingsRecord.audio_input
})
.on('error', console.error)
.pipe(recognizeStream);
console.log('Listening, press Ctrl+C to stop.');
// [END speech_streaming_mic_recognize]
/* .command(
`listen`,
`Detects speech in a microphone input stream. This command requires that you have SoX installed and available in your $PATH. See https://www.npmjs.com/package/node-record-lpcm16#dependencies`,
{},
(opts) => streamingMicRecognize(opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.options({
encoding: {
alias: 'e',
default: 'LINEAR16',
global: true,
requiresArg: true,
type: 'string'
},
sampleRateHertz: {
alias: 'r',
default: 16000,
global: true,
requiresArg: true,
type: 'number'
},
languageCode: {
alias: 'l',
default: 'en-US',
global: true,
requiresArg: true,
type: 'string'
}
}) */
}
streamingMicRecognize();

Google speech recognition api is too slow

Actually I am trying to store audio stream from my web page to my nodejs server using socket.io. And after storing it on my server I am trying to perform speech recognition on the stored file. I have following code running well but it is too slow. I have all environment variables and configurations in place. After collecting statistics for many requests the response time is varying between 7 seconds to 18 seconds.
var http = require('http');
var wav = require('wav');
var app = http.createServer(function ejecute(request, response) {});
var io = require('socket.io').listen(app);
var fs = require('fs');
var Speech = require('#google-cloud/speech');
io.on('connection', function(socket) {
var fileWriter = null;
socket.on('stream', function(data) {
if (!fileWriter) {
fileWriter = new wav.FileWriter('demo.wav', {
channels: 1,
sampleRate: 16000,
bitDepth: 16
});
}
if (!fileWriter._writableState.ended)
fileWriter.write(data);
});
socket.on('end', function(data) {
fileWriter.end();
streamingRecognize('demo.wav');
});
});
function streamingRecognize(filename) {
const speech = Speech();
const request = {
encoding: 'LINEAR16',
languageCode: 'en-US',
sampleRateHertz: 16000
};
speech.recognize(filename, request)
.then((results) => {
const transcription = results[0];
console.log(`Transcription: ${transcription}`);
})
.catch((err) => {
console.error('ERROR:', err);
});
}
app.listen(3000);
Can anyone help me out here? What wrong am I doing?
Here is reference I am using
https://cloud.google.com/speech/docs/how-to
I can use Web Speech recognizer too. But I need to provide cross browser support.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Send audio stream from microphone to Google Speech - Javascript - node.js

Related

Google speech to text not working on nodejs

Convert mediarecorder blobs to a type that google speech to text can transcribe

Save speech to text in local using node js

Google Speech API not working in Node.js

Google speech recognition api is too slow

Categories

Resources