Save speech to text in local using node js - node.js

I'm trying to replicate the code given at https://github.com/googleapis/nodejs-speech/blob/master/samples/recognize.js. There is no error when I run it locally. But here I'm confused on where can I see the result that is created. Is there a way that I can write the result to a file?
Here is the code.
const record = require('node-record-lpcm16');
// Imports the Google Cloud client library
const speech = require('#google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
// Create a recognize stream
const recognizeStream = client
.streamingRecognize(request)
.on('error', console.error)
.on('data', data =>
process.stdout.write(
data.results[0] && data.results[0].alternatives[0] ?
`Transcription: ${data.results[0].alternatives[0].transcript}\n` :
`\n\nReached transcription time limit, press Ctrl+C\n`
)
);
// Start recording and send the microphone input to the Speech API
record
.start({
sampleRateHertz: sampleRateHertz,
threshold: 0,
// Other options, see https://www.npmjs.com/package/node-record-lpcm16#options
verbose: false,
recordProgram: 'sox', // Try also "arecord" or "sox"
silence: '10.0',
})
.on('error', console.error)
.pipe(recognizeStream);
console.log('Listening, press Ctrl+C to stop.');
This is very confusing :(. please let me know how can I achieve this.
Thanks

It's in the "data". Please looking into the code and see how the console logs the data.
Example:
client
.recognize(request)
.then(data => {
const response = data[0];
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: `, transcription);
})

Related

SyntaxError : await is only valid in async function in .js program written for use with Google Cloud Speech to Text (Node.js)

I have viewed the other questions and answers related to this issue, but in a context = different from mine. I adapted this code from the Google and only changed audio file encoding type, sampling rate and language code. Again, this file is coming directly from Google, so it can't be that I have introduced weird stuff. I have also downloaded the appropriate #google-cloud/speech require needed line 1. Given all this, can someone tell me why this is throwing the error shown in the title of this message related to "await" being valid in async functions?
// Imports the Google Cloud client library
const speech = require('#google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
const gcsUri = 'gs://legrandtimonier_de/mac_test.flac';
const encoding = 'FLAC';
const sampleRateHertz = 44100;
const languageCode = 'de-DE';
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
};
const audio = {
uri: gcsUri,
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
const [operation] = await client.longRunningRecognize(request);
//const [operation] = client.longRunningRecognize(request);
// Get a Promise representation of the final result of the job
const [response] = await operation.promise();
const [response] = operation.promise();
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
You cannot await methods in the root of your application, it needs to be used in an async function. Try wrapping your code in a method, and then calling it. Here is an example:
// Imports the Google Cloud client library
const speech = require('#google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
const main = async () => {
/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
const gcsUri = 'gs://legrandtimonier_de/mac_test.flac';
const encoding = 'FLAC';
const sampleRateHertz = 44100;
const languageCode = 'de-DE';
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
};
const audio = {
uri: gcsUri,
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
const [operation] = await client.longRunningRecognize(request);
//const [operation] = client.longRunningRecognize(request);
// Get a Promise representation of the final result of the job
const [response] = await operation.promise();
const [response] = operation.promise();
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
}
main();

Google speech to text not working on nodejs

I have created app for speech to text converter. react frontend and nodejs API.i record audio from react and post it to nodejs.but google API result is empty.how can I fix it?
why getting always empty results?
that's my code.
ReactMic Recorder
<ReactMic
record={record}
className="sound-wave"
onStop={onStop}
onData={onData}
strokeColor="#000000"
backgroundColor="#FF4081"
mimeType="audio/wav"/>
<button onClick={startRecording} type="button">Start</button>
<button onClick={stopRecording} type="button">Stop</button>
NodeJs API
app.post('/SpeechConvert', (req, res) => {
const client = new speech.SpeechClient();
console.log(req.files.file);
req.files.file.mv('./input.wav',function (err) {
if (err) {
console.log(err);
}
})
async function speechToText() {
// The name of the audio file to transcribe
const fileData = req.files.file.data;
// Reads a local audio file and converts it to base64
const file = fs.readFileSync('input.wav');
const audioBytes = fileData.toString('base64');
// console.log(audioBytes);
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes,
};
const config = {
enableAutomaticPunctuation: true,
encoding: 'LINEAR16',
sampleRateHertz: 44100,
languageCode: 'en-US',
};
const request = {
audio: audio,
config: config,
};
// Detects speech in the audio file
const [response] = await client.recognize(request);
console.log(response);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
res.send({ 'transcription': transcription, 'msg': 'The Audio successfully converted to the text' });
}
speechToText().catch(console.error);
});
can anyone help me to fix this?

Convert mediarecorder blobs to a type that google speech to text can transcribe

I am making an app where the user browser records the user speaking and sends it to the server which then passes it on to the Google speech to the text interface. I am using mediaRecorder to get 1-second blobs which are sent to a server. On the server-side, I send these blobs over to the Google speech to the text interface. However, I am getting an empty transcriptions.
I know what the issue is. Mediarecorder's default Mime Type id audio/WebM codec=opus, which is not accepted by google's speech to text API. After doing some research, I realize I need to use ffmpeg to convert blobs to LInear16. However, ffmpeg only accepts audio FILES and I want to be able to convert BLOBS. Then I can send the resulting converted blobs over to the API interface.
server.js
wsserver.on('connection', socket => {
console.log("Listening on port 3002")
audio = {
content: null
}
socket.on('message',function(message){
// const buffer = new Int16Array(message, 0, Math.floor(data.byteLength / 2));
// console.log(`received from a client: ${new Uint8Array(message)}`);
// console.log(message);
audio.content = message.toString('base64')
console.log(audio.content);
livetranscriber.createRequest(audio).then(request => {
livetranscriber.recognizeStream(request);
});
});
});
livetranscriber
module.exports = {
createRequest: function(audio){
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
return new Promise((resolve, reject, err) =>{
if (err){
reject(err)
}
else{
const request = {
audio: audio,
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
resolve(request);
}
});
},
recognizeStream: async function(request){
const [response] = await client.recognize(request)
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
// console.log(message);
// message.pipe(recognizeStream);
},
}
client
recorder.ondataavailable = function(e) {
console.log('Data', e.data);
var ws = new WebSocket('ws://localhost:3002/websocket');
ws.onopen = function() {
console.log("opening connection");
// const stream = websocketStream(ws)
// const duplex = WebSocket.createWebSocketStream(ws, { encoding: 'utf8' });
var blob = new Blob(e, { 'type' : 'audio/wav; base64' });
ws.send(blob.data);
// e.data).pipe(stream);
// console.log(e.data);
console.log("Sent the message")
};
// chunks.push(e.data);
// socket.emit('data', e.data);
}
I wrote a similar script several years ago. However, I used a JS frontend and a Python backend instead of NodeJS. I remember using a sox transformer to transform the audio input into to an output that the Google Speech API could use.
Perhaps this might be useful for you.
https://github.com/bitnahian/speech-transcriptor/blob/9f186e5416566aa8a6959fc1363d2e398b902822/app.py#L27
TLDR:
Converted from a .wav format to .raw format using ffmpeg and sox.

Send audio stream from microphone to Google Speech - Javascript

I am trying to send microphone input at the client(nuxt) side to the node + socket.io server and then to the google speech api. I am getting stream from navigator.mediaDevices.getUserMedia({ audio: true }) and send it to back end using socket.io-stream. My client side code as follows.
import ss from 'socket.io-stream'
navigator.mediaDevices.getUserMedia({ audio: true }).then((mediaStream) => {
ss(this.$socket).emit('audio', mediaStream);
});
And my server code as follows.
const io = require('socket.io')(3555);
const ss = require('socket.io-stream');
const speech = require('#google-cloud/speech');
io.on('connection', (socket) => {
const client = new speech.SpeechClient({ keyFilename: 'key.json' });
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: true,
};
ss(socket).on('audio', (stream) => {
const recognizeStream = client.streamingRecognize(request)
.on('error', console.error)
.on('data', data => {
process.stdout.write(
data.results[0] && data.results[0].alternatives[0]
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`
);
});
stream.pipe(recognizeStream);
});
});
But this code doesn't work and display the error TypeError: stream.pipe is not a function.
Someone please point out the error or tell me a way to achieve this. Thank you!

Google Speech API not working in Node.js

I am using Google Cloud Speech API in Node.js. When I send a local audio file it gives the result but when I try to send a live stream it gets stopped within a second. Can anyone help me with this?
I am using this file: recognize.js
this is the code when I pass the audio file to google speech API. here it is working fine.I am passing the audio file stored in fileName sending into google speech API
function sample() {
const projectId = 'project id';
let file = "conf.json" //google exported this for you
var speech = require('#google-cloud/speech')({
projectId: 'project id',
keyFilename: 'Speech to text-a5ff6058e586.json'
});
const fs = require('fs');
const fileName = 'C:/Users/nap1225/Downloads/audio-files/audio_001.wav';
// const fileName = 'C:/xampp/htdocs/SpeechWatson/public/audio/Us_English_Broadband_Sample_2.wav';
// const fileName = 'Sample 1.wav';
// const fileName = 'C:/Users/nap1225/Desktop/dolo.wav';
// Reads a local audio file and converts it to base64
const fileMp3 = fs.readFileSync(fileName);
const audioBytes = fileMp3.toString('base64');
const audio = {
content: audioBytes
};
const config = {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'en-US',
speechContexts: {
"phrases": ["refsum"]
}
};
const request = {
audio: audio,
config: config
};
speech.recognize(request)
.then((results) => {
const transcription = results[0].results[0].alternatives[0].transcript;
console.log(`Transcription: `, transcription);
})
.catch((err) => {
console.error('ERROR:', err);
});
}
sample();
2.here is the code I am sending stream audio.when I execute this code in, it is recording for 1 second and get stopped
function streamingMicRecognize () {
// [START speech_streaming_mic_recognize]
// Imports the Google Cloud client library
const record = require('node-record-lpcm16');
const projectId = 'project id';
let file="conf.json"//google exported this for you
const speech = require('#google-cloud/speech')({
projectId: 'project id',
keyFilename: 'Speech to text-a5ff6058e586.json'
});
// Instantiates a client
//const speech = Speech();
// The encoding of the audio file, e.g. 'LINEAR16'
const encoding = 'LINEAR16';
// The sample rate of the audio file in hertz, e.g. 16000
const sampleRateHertz = 16000;
// The BCP-47 language code to use, e.g. 'en-US'
const languageCode = 'en-US';
const request = {
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
},
interimResults: true, // If you want interim results, set this to true
singleUtterance: false
};
// Create a recognize stream
const recognizeStream = speech.streamingRecognize(request)
.on('error', console.error)
.on('data', (data) =>
process.stdout.write(
(data.results[0] && data.results[0].alternatives[0])
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
: `\n\nReached transcription time limit, press Ctrl+C\n`));
// Start recording and send the microphone input to the Speech API
record
.start({
sampleRateHertz: sampleRateHertz,
threshold: 20,
// Other options, see https://www.npmjs.com/package/node-record-lpcm16#options
verbose: false,
recordProgram: 'sox', // Try also "arecord" or "sox"
silence: '10.0',
device : 'plughw:0'
// device : settingsRecord.audio_input
})
.on('error', console.error)
.pipe(recognizeStream);
console.log('Listening, press Ctrl+C to stop.');
// [END speech_streaming_mic_recognize]
/* .command(
`listen`,
`Detects speech in a microphone input stream. This command requires that you have SoX installed and available in your $PATH. See https://www.npmjs.com/package/node-record-lpcm16#dependencies`,
{},
(opts) => streamingMicRecognize(opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.options({
encoding: {
alias: 'e',
default: 'LINEAR16',
global: true,
requiresArg: true,
type: 'string'
},
sampleRateHertz: {
alias: 'r',
default: 16000,
global: true,
requiresArg: true,
type: 'number'
},
languageCode: {
alias: 'l',
default: 'en-US',
global: true,
requiresArg: true,
type: 'string'
}
}) */
}
streamingMicRecognize();

Resources