Google speech recognition api is too slow - node.js

Actually I am trying to store audio stream from my web page to my nodejs server using socket.io. And after storing it on my server I am trying to perform speech recognition on the stored file. I have following code running well but it is too slow. I have all environment variables and configurations in place. After collecting statistics for many requests the response time is varying between 7 seconds to 18 seconds.
var http = require('http');
var wav = require('wav');
var app = http.createServer(function ejecute(request, response) {});
var io = require('socket.io').listen(app);
var fs = require('fs');
var Speech = require('#google-cloud/speech');
io.on('connection', function(socket) {
var fileWriter = null;
socket.on('stream', function(data) {
if (!fileWriter) {
fileWriter = new wav.FileWriter('demo.wav', {
channels: 1,
sampleRate: 16000,
bitDepth: 16
});
}
if (!fileWriter._writableState.ended)
fileWriter.write(data);
});
socket.on('end', function(data) {
fileWriter.end();
streamingRecognize('demo.wav');
});
});
function streamingRecognize(filename) {
const speech = Speech();
const request = {
encoding: 'LINEAR16',
languageCode: 'en-US',
sampleRateHertz: 16000
};
speech.recognize(filename, request)
.then((results) => {
const transcription = results[0];
console.log(`Transcription: ${transcription}`);
})
.catch((err) => {
console.error('ERROR:', err);
});
}
app.listen(3000);
Can anyone help me out here? What wrong am I doing?
Here is reference I am using
https://cloud.google.com/speech/docs/how-to
I can use Web Speech recognizer too. But I need to provide cross browser support.

Related

how to solve audio encoding error in Media-translation GCP API?

Here's my code.
I have went through the google cloud platform API documentation, and followed as per the GCP DOC steps correctly. But still unable to fix the encoding error, which you can see it below. I'm trying to translate an audio clip from en-US(english) to hi-IN (hindi), and it would be helpful if you can give some alternative ways for this solution.
function main(filename, encoding, sourceLanguage, targetLanguage) {
const fs = require('fs');
const {
SpeechTranslationServiceClient,
} = require('#google-cloud/media-translation');
const client = new SpeechTranslationServiceClient();
async function quickstart() {
const filename = './16kmonoceo.wav';
const encoding = 'LINEAR16';
const sourceLanguage = 'en-US';
const targetLangauge = 'hi-IN';
const config = {
audioConfig: {
audioEncoding: encoding,
sourceLanguageCode: sourceLanguage,
targetLanguageCode: targetLangauge,
},
};
const initialRequest = {
streamingConfig: config,
audioContent: null,
};
const readStream = fs.createReadStream(filename, {
highWaterMark: 4096,
encoding: 'base64',
});
const chunks = [];
readStream
.on('data', chunk => {
const request = {
streamingConfig: config,
audioContent: chunk.toString(),
};
chunks.push(request);
})
.on('close', () => {
// Config-only request should be first in stream of requests
stream.write(initialRequest);
for (let i = 0; i < chunks.length; i++) {
stream.write(chunks[i]);
}
stream.end();
});
const stream = client.streamingTranslateSpeech().on('data', response => {
const {result} = response;
if (result.textTranslationResult.isFinal) {
console.log(
`\nFinal translation: ${result.textTranslationResult.translation}`
);
console.log(`Final recognition result: ${result.recognitionResult}`);
} else {
console.log(
`\nPartial translation: ${result.textTranslationResult.translation}`
);
console.log(`Partial recognition result: ${result.recognitionResult}`);
}
});
}
quickstart();
}
main(...process.argv.slice(2));
here my error from command line.
CHECK ERROR MESSAGE
I'm using windows 10 and IDE VS CODE.
This is a case where careful reading of the error message helps.
Some module gacked on "LINEAR16" as the audioEncoding value saying there's no encoding with that name.
A quick look at the documentation shows "linear16" (lower case) as the value to use.

Google speech to text not working on nodejs

I have created app for speech to text converter. react frontend and nodejs API.i record audio from react and post it to nodejs.but google API result is empty.how can I fix it?
why getting always empty results?
that's my code.
ReactMic Recorder
<ReactMic
record={record}
className="sound-wave"
onStop={onStop}
onData={onData}
strokeColor="#000000"
backgroundColor="#FF4081"
mimeType="audio/wav"/>
<button onClick={startRecording} type="button">Start</button>
<button onClick={stopRecording} type="button">Stop</button>
NodeJs API
app.post('/SpeechConvert', (req, res) => {
const client = new speech.SpeechClient();
console.log(req.files.file);
req.files.file.mv('./input.wav',function (err) {
if (err) {
console.log(err);
}
})
async function speechToText() {
// The name of the audio file to transcribe
const fileData = req.files.file.data;
// Reads a local audio file and converts it to base64
const file = fs.readFileSync('input.wav');
const audioBytes = fileData.toString('base64');
// console.log(audioBytes);
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes,
};
const config = {
enableAutomaticPunctuation: true,
encoding: 'LINEAR16',
sampleRateHertz: 44100,
languageCode: 'en-US',
};
const request = {
audio: audio,
config: config,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
console.log(response);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
res.send({ 'transcription': transcription, 'msg': 'The Audio successfully converted to the text' });
}
speechToText().catch(console.error);
});
can anyone help me to fix this?

Convert mediarecorder blobs to a type that google speech to text can transcribe

I am making an app where the user browser records the user speaking and sends it to the server which then passes it on to the Google speech to the text interface. I am using mediaRecorder to get 1-second blobs which are sent to a server. On the server-side, I send these blobs over to the Google speech to the text interface. However, I am getting an empty transcriptions.
I know what the issue is. Mediarecorder's default Mime Type id audio/WebM codec=opus, which is not accepted by google's speech to text API. After doing some research, I realize I need to use ffmpeg to convert blobs to LInear16. However, ffmpeg only accepts audio FILES and I want to be able to convert BLOBS. Then I can send the resulting converted blobs over to the API interface.
server.js
wsserver.on('connection', socket => {
console.log("Listening on port 3002")
audio = {
content: null
}
socket.on('message',function(message){
// const buffer = new Int16Array(message, 0, Math.floor(data.byteLength / 2));
// console.log(`received from a client: ${new Uint8Array(message)}`);
// console.log(message);
audio.content = message.toString('base64')
console.log(audio.content);
livetranscriber.createRequest(audio).then(request => {
livetranscriber.recognizeStream(request);
});
});
});
livetranscriber
module.exports = {
createRequest: function(audio){
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
return new Promise((resolve, reject, err) =>{
if (err){
reject(err)
}
else{
const request = {
audio: audio,
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
resolve(request);
}
});
},
recognizeStream: async function(request){
const [response] = await client.recognize(request)
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
// console.log(message);
// message.pipe(recognizeStream);
},
}
client
recorder.ondataavailable = function(e) {
console.log('Data', e.data);
var ws = new WebSocket('ws://localhost:3002/websocket');
ws.onopen = function() {
console.log("opening connection");
// const stream = websocketStream(ws)
// const duplex = WebSocket.createWebSocketStream(ws, { encoding: 'utf8' });
var blob = new Blob(e, { 'type' : 'audio/wav; base64' });
ws.send(blob.data);
// e.data).pipe(stream);
// console.log(e.data);
console.log("Sent the message")
};
// chunks.push(e.data);
// socket.emit('data', e.data);
}
I wrote a similar script several years ago. However, I used a JS frontend and a Python backend instead of NodeJS. I remember using a sox transformer to transform the audio input into to an output that the Google Speech API could use.
Perhaps this might be useful for you.
https://github.com/bitnahian/speech-transcriptor/blob/9f186e5416566aa8a6959fc1363d2e398b902822/app.py#L27
TLDR:
Converted from a .wav format to .raw format using ffmpeg and sox.

Send audio stream from microphone to Google Speech - Javascript

I am trying to send microphone input at the client(nuxt) side to the node + socket.io server and then to the google speech api. I am getting stream from navigator.mediaDevices.getUserMedia({ audio: true }) and send it to back end using socket.io-stream. My client side code as follows.
import ss from 'socket.io-stream'
navigator.mediaDevices.getUserMedia({ audio: true }).then((mediaStream) => {
ss(this.$socket).emit('audio', mediaStream);
});
And my server code as follows.
const io = require('socket.io')(3555);
const ss = require('socket.io-stream');
const speech = require('#google-cloud/speech');
io.on('connection', (socket) => {
const client = new speech.SpeechClient({ keyFilename: 'key.json' });
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: true,
};
ss(socket).on('audio', (stream) => {
const recognizeStream = client.streamingRecognize(request)
.on('error', console.error)
.on('data', data => {
process.stdout.write(
data.results[0] && data.results[0].alternatives[0]
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`
);
});
stream.pipe(recognizeStream);
});
});
But this code doesn't work and display the error TypeError: stream.pipe is not a function.
Someone please point out the error or tell me a way to achieve this. Thank you!

Node JS radio (audio stream)

Good evening! I just try to do my own internet radio, using Node JS. I saw a lot of questions and tutorials about it, but all of them are old and hard to understand.
const fs = require('fs'),
files = fs.readdirSync('./music'), // I have some mp3 in music folder
clips = [],
dhh = fs.createWriteStream('./tracks.mp3'); // create output stream
let stream, currentfile;
files.forEach(file => {
clips.push(file);
});
// recursive function
const main = () => {
if (!clips.length) {
dhh.end('Done');
return;
}
currentfile = './music/' + clips.shift();
stream = fs.createReadStream(currentfile);
stream.pipe(dhh, { end: false });
stream.on('end', function() {
console.log(currentfile + ' appended');
main();
});
};
main();
So I have all my mp3 in one single file. What can I do to stream this file to lot of different users, when they are connected. Lot of answers about it advice use BinaryJS, but it was written about 5 years ago.
I just do not how to start, so I need your help. Thank you!
I tried something like this
const http = require('http'),
fs = require('fs'),
filePath = './music/my.mp3',
stat = fs.statSync(filePath);
http.createServer(function(request, response) {
fs.createReadStream(filePath).pipe(response);
response.writeHead(200, {
'Content-Type': 'audio/mpeg',
'Content-Length': stat.size,
});
}).listen(4000);
When user connect to port 4000, music starts playing, but it is not online stuff, when another user connect music also playing from beginning, but I want to do it like online radio. But it doesn't work :(
I'm also tryin to do the same thing ur doing, and found a very informative blog article here. There's one just one thing in it, it doesn't explain how to add writables/ consumers to the server, so it is incomplete. But you can create 90% of your radio with the rest. Also I used a different approach so I'd like to share it here to help another soul. ('writable1' is not defined)Feel free to correct the code.
const express = require('express'),
router = express.Router(),
fs = require('fs');
Throttle = require('throttle'),
ffprobe = require('ffprobe'),
ffprobeStatic = require('ffprobe-static');
// Play all songs from db
router.get('/', function(req, res){
const bitRate = ffprobe('myAudio.mp3', { path: ffprobeStatic.path }, function(err, info){
console.log(info.streams[0].bit_rate);
});
const readable = fs.createReadStream('myAudio.mp3');
const throttle = new Throttle(bitRate / 8);
const writables = [writable1, writable2, writable3];
readable.pipe(throttle).on('data', (chunk) => {
for (const writable of writables) {
writable.write(chunk);
}
});
});
module.exports = router;

Resources