I am trying to send microphone input at the client(nuxt) side to the node + socket.io server and then to the google speech api. I am getting stream from navigator.mediaDevices.getUserMedia({ audio: true }) and send it to back end using socket.io-stream. My client side code as follows.
import ss from 'socket.io-stream'
navigator.mediaDevices.getUserMedia({ audio: true }).then((mediaStream) => {
ss(this.$socket).emit('audio', mediaStream);
});
And my server code as follows.
const io = require('socket.io')(3555);
const ss = require('socket.io-stream');
const speech = require('#google-cloud/speech');
io.on('connection', (socket) => {
const client = new speech.SpeechClient({ keyFilename: 'key.json' });
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: true,
};
ss(socket).on('audio', (stream) => {
const recognizeStream = client.streamingRecognize(request)
.on('error', console.error)
.on('data', data => {
process.stdout.write(
data.results[0] && data.results[0].alternatives[0]
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`
);
});
stream.pipe(recognizeStream);
});
});
But this code doesn't work and display the error TypeError: stream.pipe is not a function.
Someone please point out the error or tell me a way to achieve this. Thank you!
Related
I have created app for speech to text converter. react frontend and nodejs API.i record audio from react and post it to nodejs.but google API result is empty.how can I fix it?
why getting always empty results?
that's my code.
ReactMic Recorder
<ReactMic
record={record}
className="sound-wave"
onStop={onStop}
onData={onData}
strokeColor="#000000"
backgroundColor="#FF4081"
mimeType="audio/wav"/>
<button onClick={startRecording} type="button">Start</button>
<button onClick={stopRecording} type="button">Stop</button>
NodeJs API
app.post('/SpeechConvert', (req, res) => {
const client = new speech.SpeechClient();
console.log(req.files.file);
req.files.file.mv('./input.wav',function (err) {
if (err) {
console.log(err);
}
})
async function speechToText() {
// The name of the audio file to transcribe
const fileData = req.files.file.data;
// Reads a local audio file and converts it to base64
const file = fs.readFileSync('input.wav');
const audioBytes = fileData.toString('base64');
// console.log(audioBytes);
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes,
};
const config = {
enableAutomaticPunctuation: true,
encoding: 'LINEAR16',
sampleRateHertz: 44100,
languageCode: 'en-US',
};
const request = {
audio: audio,
config: config,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
console.log(response);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
res.send({ 'transcription': transcription, 'msg': 'The Audio successfully converted to the text' });
}
speechToText().catch(console.error);
});
can anyone help me to fix this?
I am making an app where the user browser records the user speaking and sends it to the server which then passes it on to the Google speech to the text interface. I am using mediaRecorder to get 1-second blobs which are sent to a server. On the server-side, I send these blobs over to the Google speech to the text interface. However, I am getting an empty transcriptions.
I know what the issue is. Mediarecorder's default Mime Type id audio/WebM codec=opus, which is not accepted by google's speech to text API. After doing some research, I realize I need to use ffmpeg to convert blobs to LInear16. However, ffmpeg only accepts audio FILES and I want to be able to convert BLOBS. Then I can send the resulting converted blobs over to the API interface.
server.js
wsserver.on('connection', socket => {
console.log("Listening on port 3002")
audio = {
content: null
}
socket.on('message',function(message){
// const buffer = new Int16Array(message, 0, Math.floor(data.byteLength / 2));
// console.log(`received from a client: ${new Uint8Array(message)}`);
// console.log(message);
audio.content = message.toString('base64')
console.log(audio.content);
livetranscriber.createRequest(audio).then(request => {
livetranscriber.recognizeStream(request);
});
});
});
livetranscriber
module.exports = {
createRequest: function(audio){
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
return new Promise((resolve, reject, err) =>{
if (err){
reject(err)
}
else{
const request = {
audio: audio,
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
resolve(request);
}
});
},
recognizeStream: async function(request){
const [response] = await client.recognize(request)
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
// console.log(message);
// message.pipe(recognizeStream);
},
}
client
recorder.ondataavailable = function(e) {
console.log('Data', e.data);
var ws = new WebSocket('ws://localhost:3002/websocket');
ws.onopen = function() {
console.log("opening connection");
// const stream = websocketStream(ws)
// const duplex = WebSocket.createWebSocketStream(ws, { encoding: 'utf8' });
var blob = new Blob(e, { 'type' : 'audio/wav; base64' });
ws.send(blob.data);
// e.data).pipe(stream);
// console.log(e.data);
console.log("Sent the message")
};
// chunks.push(e.data);
// socket.emit('data', e.data);
}
I wrote a similar script several years ago. However, I used a JS frontend and a Python backend instead of NodeJS. I remember using a sox transformer to transform the audio input into to an output that the Google Speech API could use.
Perhaps this might be useful for you.
https://github.com/bitnahian/speech-transcriptor/blob/9f186e5416566aa8a6959fc1363d2e398b902822/app.py#L27
TLDR:
Converted from a .wav format to .raw format using ffmpeg and sox.
I'm trying to replicate the code given at https://github.com/googleapis/nodejs-speech/blob/master/samples/recognize.js. There is no error when I run it locally. But here I'm confused on where can I see the result that is created. Is there a way that I can write the result to a file?
Here is the code.
const record = require('node-record-lpcm16');
// Imports the Google Cloud client library
const speech = require('#google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
// Create a recognize stream
const recognizeStream = client
.streamingRecognize(request)
.on('error', console.error)
.on('data', data =>
process.stdout.write(
data.results[0] && data.results[0].alternatives[0] ?
`Transcription: ${data.results[0].alternatives[0].transcript}\n` :
`\n\nReached transcription time limit, press Ctrl+C\n`
)
);
// Start recording and send the microphone input to the Speech API
record
.start({
sampleRateHertz: sampleRateHertz,
threshold: 0,
// Other options, see https://www.npmjs.com/package/node-record-lpcm16#options
verbose: false,
recordProgram: 'sox', // Try also "arecord" or "sox"
silence: '10.0',
})
.on('error', console.error)
.pipe(recognizeStream);
console.log('Listening, press Ctrl+C to stop.');
This is very confusing :(. please let me know how can I achieve this.
Thanks
It's in the "data". Please looking into the code and see how the console logs the data.
Example:
client
.recognize(request)
.then(data => {
const response = data[0];
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: `, transcription);
})
I am using Google Cloud Speech API in Node.js. When I send a local audio file it gives the result but when I try to send a live stream it gets stopped within a second. Can anyone help me with this?
I am using this file: recognize.js
this is the code when I pass the audio file to google speech API. here it is working fine.I am passing the audio file stored in fileName sending into google speech API
function sample() {
const projectId = 'project id';
let file = "conf.json" //google exported this for you
var speech = require('#google-cloud/speech')({
projectId: 'project id',
keyFilename: 'Speech to text-a5ff6058e586.json'
});
const fs = require('fs');
const fileName = 'C:/Users/nap1225/Downloads/audio-files/audio_001.wav';
// const fileName = 'C:/xampp/htdocs/SpeechWatson/public/audio/Us_English_Broadband_Sample_2.wav';
// const fileName = 'Sample 1.wav';
// const fileName = 'C:/Users/nap1225/Desktop/dolo.wav';
// Reads a local audio file and converts it to base64
const fileMp3 = fs.readFileSync(fileName);
const audioBytes = fileMp3.toString('base64');
const audio = {
content: audioBytes
};
const config = {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'en-US',
speechContexts: {
"phrases": ["refsum"]
}
};
const request = {
audio: audio,
config: config
};
speech.recognize(request)
.then((results) => {
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: `, transcription);
})
.catch((err) => {
console.error('ERROR:', err);
});
}
sample();
2.here is the code I am sending stream audio.when I execute this code in, it is recording for 1 second and get stopped
function streamingMicRecognize () {
// [START speech_streaming_mic_recognize]
// Imports the Google Cloud client library
const record = require('node-record-lpcm16');
const projectId = 'project id';
let file="conf.json"//google exported this for you
const speech = require('#google-cloud/speech')({
projectId: 'project id',
keyFilename: 'Speech to text-a5ff6058e586.json'
});
// Instantiates a client
//const speech = Speech();
// The encoding of the audio file, e.g. 'LINEAR16'
const encoding = 'LINEAR16';
// The sample rate of the audio file in hertz, e.g. 16000
const sampleRateHertz = 16000;
// The BCP-47 language code to use, e.g. 'en-US'
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
},
interimResults: true, // If you want interim results, set this to true
singleUtterance: false
};
// Create a recognize stream
const recognizeStream = speech.streamingRecognize(request)
.on('error', console.error)
.on('data', (data) =>
process.stdout.write(
(data.results[0] && data.results[0].alternatives[0])
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`));
// Start recording and send the microphone input to the Speech API
record
.start({
sampleRateHertz: sampleRateHertz,
threshold: 20,
// Other options, see https://www.npmjs.com/package/node-record-lpcm16#options
verbose: false,
recordProgram: 'sox', // Try also "arecord" or "sox"
silence: '10.0',
device : 'plughw:0'
// device : settingsRecord.audio_input
})
.on('error', console.error)
.pipe(recognizeStream);
console.log('Listening, press Ctrl+C to stop.');
// [END speech_streaming_mic_recognize]
/* .command(
`listen`,
`Detects speech in a microphone input stream. This command requires that you have SoX installed and available in your $PATH. See https://www.npmjs.com/package/node-record-lpcm16#dependencies`,
{},
(opts) => streamingMicRecognize(opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.options({
encoding: {
alias: 'e',
default: 'LINEAR16',
global: true,
requiresArg: true,
type: 'string'
},
sampleRateHertz: {
alias: 'r',
default: 16000,
global: true,
requiresArg: true,
type: 'number'
},
languageCode: {
alias: 'l',
default: 'en-US',
global: true,
requiresArg: true,
type: 'string'
}
}) */
}
streamingMicRecognize();
Actually I am trying to store audio stream from my web page to my nodejs server using socket.io. And after storing it on my server I am trying to perform speech recognition on the stored file. I have following code running well but it is too slow. I have all environment variables and configurations in place. After collecting statistics for many requests the response time is varying between 7 seconds to 18 seconds.
var http = require('http');
var wav = require('wav');
var app = http.createServer(function ejecute(request, response) {});
var io = require('socket.io').listen(app);
var fs = require('fs');
var Speech = require('#google-cloud/speech');
io.on('connection', function(socket) {
var fileWriter = null;
socket.on('stream', function(data) {
if (!fileWriter) {
fileWriter = new wav.FileWriter('demo.wav', {
channels: 1,
sampleRate: 16000,
bitDepth: 16
});
}
if (!fileWriter._writableState.ended)
fileWriter.write(data);
});
socket.on('end', function(data) {
fileWriter.end();
streamingRecognize('demo.wav');
});
});
function streamingRecognize(filename) {
const speech = Speech();
const request = {
encoding: 'LINEAR16',
languageCode: 'en-US',
sampleRateHertz: 16000
};
speech.recognize(filename, request)
.then((results) => {
const transcription = results[0];
console.log(`Transcription: ${transcription}`);
})
.catch((err) => {
console.error('ERROR:', err);
});
}
app.listen(3000);
Can anyone help me out here? What wrong am I doing?
Here is reference I am using
https://cloud.google.com/speech/docs/how-to
I can use Web Speech recognizer too. But I need to provide cross browser support.