Stream audio to Azure speech api by node.js on browser - node.js

I'm making a demo of speech to text using Azure speech api on browser by node.js. According to API document here, it does specify that it need .wav or .ogg files. But the example down there does a api call through sending byte data to api.
So I've already get my data from microphone in byte array form. Is it the right path to convert it to byte and send it to api? Or is it better for me to save it as a .wav file then send to the api?
So below is my code.
This is stream from microphone part.
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => { handlerFunction(stream) })
function handlerFunction(stream) {
rec = new MediaRecorder(stream);
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive") {
let blob = new Blob(audioChunks, { type: 'audio/wav; codec=audio/pcm; samplerate=16000' });
recordedAudio.src = URL.createObjectURL(blob);
recordedAudio.controls = true;
recordedAudio.autoplay = true;
console.log(blob);
let fileReader = new FileReader();
var arrayBuffer = new Uint8Array(1024);
var reader = new FileReader();
reader.readAsArrayBuffer(blob);
reader.onloadend = function () {
var byteArray = new Uint8Array(reader.result);
console.log("reader result" + reader.result)
etTimeout(() => getText(byteArray), 1000);
}
}
}
}
This is api call part
function getText(audio, callback) {
console.log("in function audio " + audio);
console.log("how many byte?: " + audio.byteLength)
const sendTime = Date.now();
fetch('https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US', {
method: "POST",
headers: {
'Accept': 'application/json',
'Ocp-Apim-Subscription-Key': YOUR_API_KEY,
// 'Transfer-Encoding': 'chunked',
// 'Expect': '100-continue',
'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000'
},
body: audio
})
.then(function (r) {
return r.json();
})
.then(function (response) {
if (sendTime < time) {
return
}
time = sendTime
//callback(response)
}).catch(e => {
console.log("Error", e)
})
}
It returns with 400 (Bad Request) and says :
{Message: "Unsupported audio format"}

Reason:
Note you're not creating a MediaRecorder with a audio/wav mimeType by
new Blob(audioChunks,{type:'audio/wav; codec=audio/pcm; samplerate=16000'})
This statement is only a description for blob. I test my Chrome(v71) with isTypeSupported:
MediaRecorder.isTypeSupported("audio/wav") // return false
MediaRecorder.isTypeSupported("audio/ogg") // return false
MediaRecorder.isTypeSupported("audio/webm") // return true
It seems that the MediaRecorder will only record the audio in audio/webm. Also, when I run the following code on Chrome , the default rec.mimeType is audio/webm;codecs=opus
rec = new MediaRecorder(stream);
According to the Audio formats Requiremnts, the audio/webm is not supported yet.
Approach:
Before calling getText() we need convert the webm to wav firstly. There're quite a lot of libraries that can help us do that. I just copy Jam3's script before your code to convert webm to wav :
// add Jam3's script between Line 2 and Line 94 or import that module as you like
// create a audioContext that helps us decode the webm audio
var audioCtx = new (window.AudioContext || window.webkitAudioContext)();
rec = new MediaRecorder(stream,{
mimeType : 'audio/webm',
codecs : "opus",
});
// ...
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive") {
var blob = new Blob(audioChunks, { 'type': 'audio/webm; codecs=opus' });
var arrayBuffer;
var fileReader = new FileReader();
fileReader.onload = function(event) {
arrayBuffer = event.target.result;
};
fileReader.readAsArrayBuffer(blob);
fileReader.onloadend=function(d){
audioCtx.decodeAudioData(
fileReader.result,
function(buffer) {
var wav = audioBufferToWav(buffer);
setTimeout(() => getText(wav), 1000);
},
function(e){ console.log( e); }
);
};
}
}
And it works fine for me :
As a side note, I suggest you should use your backend to invoke the speech-to-text services. Never invoke azure stt service in a browser. That's because exposing your subscription key to front end is really dangerous. Anyone could inspect the network and steal your key.

Related

save mp4 file in nodejs is saved with some error

I am creating a video/mp4 from a canvas in the front with react:
const canvasResult = document.getElementById(
"canvasResult"
) as HTMLCanvasElement;
const createVideo = () => {
// create video:
const chunks: any[] = []; // here we will store our recorded media chunks (Blobs)
const stream = canvasResult.captureStream(); // grab our canvas MediaStream
const rec = new MediaRecorder(stream); // init the recorder
// every time the recorder has new data, we will store it in our array
rec.ondataavailable = (e) => chunks.push(e.data);
// only when the recorder stops, we construct a complete Blob from all the chunks
rec.onstop = (e) => {
setLoadingCanvaGif(100);
resolve(new Blob(chunks, { type: "video/mp4" }));
};
rec.start();
setTimeout(() => {
clearInterval(canvaInterval);
setShowCanva(false);
rec.stop();
}, 6000); // stop recording in 6s
}
const blobvideo = await createVideo();
const fileVideo = new File([blobvideo], "video.mp4" , {
type: blobvideo.type,
});
let formData = new FormData();
formData.append("file", file);
await axios.post(
`/uploadFile`,
formData,
{
headers: {
"Content-Type": "multipart/form-data",
},
}
);
and receiving it in the backend with nodejs, I save it like this:
// using express-fileupload
const file: UploadedFile = req?.files?.file;
const targetPath = path.join(
__dirname,
`../../uploads`
);
const fileName = path.join(targetPath, `/design_${ms}.mp4`); // ms is a ramdon id
await new Promise<void>((resolve, reject) => {
file.mv(fileName, function (err: any) {
if (err) {
throw {
code: 400,
message: err,
};
}
resolve();
});
});
the problem I have is that when it is saved with the .mp4 extension, the file is not saved correctly, windows shows it like this (case 3):
If i save it as webm
If the .mp4 file (of the case 3) is passed through a video to mp4 converter (https://video-converter.com/es/)
if i save it as mp4
The problem is that when I want to use case 3 (I need it in mp4 and not in webm and I can't manually upload each video to a converter) I can't use it correctly, it generates errors.
note: the three files are played correctly by opening it with vlc or any video player
I believe MediaRecorder in Chrome only supports video/webm mimeType. Your node service will need to convert the file to mp4.
You can use MediaRecorder.isTypeSupported('video/mp4') to check if it is supported.
const getMediaRecorder = (stream) => {
// If video/mp4 is supported
if (MediaRecorder.isTypeSupported('video/mp4')) {
return new MediaRecorder(stream, { mimeType: 'video/mp4' });
}
// Let the browser pick default
return new MediaRecorder(stream);
};

how to solve audio encoding error in Media-translation GCP API?

Here's my code.
I have went through the google cloud platform API documentation, and followed as per the GCP DOC steps correctly. But still unable to fix the encoding error, which you can see it below. I'm trying to translate an audio clip from en-US(english) to hi-IN (hindi), and it would be helpful if you can give some alternative ways for this solution.
function main(filename, encoding, sourceLanguage, targetLanguage) {
const fs = require('fs');
const {
SpeechTranslationServiceClient,
} = require('#google-cloud/media-translation');
const client = new SpeechTranslationServiceClient();
async function quickstart() {
const filename = './16kmonoceo.wav';
const encoding = 'LINEAR16';
const sourceLanguage = 'en-US';
const targetLangauge = 'hi-IN';
const config = {
audioConfig: {
audioEncoding: encoding,
sourceLanguageCode: sourceLanguage,
targetLanguageCode: targetLangauge,
},
};
const initialRequest = {
streamingConfig: config,
audioContent: null,
};
const readStream = fs.createReadStream(filename, {
highWaterMark: 4096,
encoding: 'base64',
});
const chunks = [];
readStream
.on('data', chunk => {
const request = {
streamingConfig: config,
audioContent: chunk.toString(),
};
chunks.push(request);
})
.on('close', () => {
// Config-only request should be first in stream of requests
stream.write(initialRequest);
for (let i = 0; i < chunks.length; i++) {
stream.write(chunks[i]);
}
stream.end();
});
const stream = client.streamingTranslateSpeech().on('data', response => {
const {result} = response;
if (result.textTranslationResult.isFinal) {
console.log(
`\nFinal translation: ${result.textTranslationResult.translation}`
);
console.log(`Final recognition result: ${result.recognitionResult}`);
} else {
console.log(
`\nPartial translation: ${result.textTranslationResult.translation}`
);
console.log(`Partial recognition result: ${result.recognitionResult}`);
}
});
}
quickstart();
}
main(...process.argv.slice(2));
here my error from command line.
CHECK ERROR MESSAGE
I'm using windows 10 and IDE VS CODE.
This is a case where careful reading of the error message helps.
Some module gacked on "LINEAR16" as the audioEncoding value saying there's no encoding with that name.
A quick look at the documentation shows "linear16" (lower case) as the value to use.

Capture WebRTC stream

I got this little proof of concept script that I copy/paste into Google Chrome console to capture live webcam video. I capture the chunks every 5 seconds, turn them into blobs, attach to a form data instance and post to a Node server. Then I clean up. It works, but eventually the browser crashes. RAM and CPU spikes heavily.
It seems the problematic areas are creating the Blobs and FormData variables.
How can I improve the script?
To test, go here:
https://www.earthcam.com/usa/arizona/sedona/redrock/?cam=sedona_hd
Copy/paste the script. Check the tab's RAM and CPU consumption.
let chunks = [];
const getOptions = function() {
let options = { mimeType: 'video/webm;codecs=vp9,opus' };
if (!window.MediaRecorder.isTypeSupported(options.mimeType)) {
console.error(`${options.mimeType} is not supported`);
options = { mimeType: 'video/webm;codecs=vp8,opus' };
if (!window.MediaRecorder.isTypeSupported(options.mimeType)) {
console.error(`${options.mimeType} is not supported`);
options = { mimeType: 'video/webm' };
if (!window.MediaRecorder.isTypeSupported(options.mimeType)) {
console.error(`${options.mimeType} is not supported`);
options = { mimeType: '' };
}
}
}
return options;
};
const captureStream = async function(chunks) {
let blob = new window.Blob(chunks, {
type: 'video/webm',
});
let formData = new window.FormData();
formData.append('upl', blob, 'myFile.webm');
await window.fetch('http://localhost:3000', {
method: 'post',
body: formData,
});
blob = null;
formData = null;
console.log(`Saved ${chunks.length}`);
chunks = [];
};
const recordStream = function() {
if (window.MediaRecorder === undefined) {
return console.log('Not supported');
}
const video = document.querySelector('video');
const stream = video.captureStream();
const options = getOptions();
const mediaRecorder = new window.MediaRecorder(stream, options);
mediaRecorder.ondataavailable = function(e) {
if (e.data && e.data.size > 0) {
chunks.push(e.data);
}
};
mediaRecorder.start(0);
// Capture chunks every 5 sec
setInterval(async function() {
await captureStream(chunks);
}, 5000);
};
recordStream();
When I paste in the code above into the Console it displays this error:
Uncaught SyntaxError: Unexpected token '}'
Adding a preceding { then returns this error:
VM97:3 Uncaught ReferenceError: formData is not defined at <anonymous>:3:11

Convert mediarecorder blobs to a type that google speech to text can transcribe

I am making an app where the user browser records the user speaking and sends it to the server which then passes it on to the Google speech to the text interface. I am using mediaRecorder to get 1-second blobs which are sent to a server. On the server-side, I send these blobs over to the Google speech to the text interface. However, I am getting an empty transcriptions.
I know what the issue is. Mediarecorder's default Mime Type id audio/WebM codec=opus, which is not accepted by google's speech to text API. After doing some research, I realize I need to use ffmpeg to convert blobs to LInear16. However, ffmpeg only accepts audio FILES and I want to be able to convert BLOBS. Then I can send the resulting converted blobs over to the API interface.
server.js
wsserver.on('connection', socket => {
console.log("Listening on port 3002")
audio = {
content: null
}
socket.on('message',function(message){
// const buffer = new Int16Array(message, 0, Math.floor(data.byteLength / 2));
// console.log(`received from a client: ${new Uint8Array(message)}`);
// console.log(message);
audio.content = message.toString('base64')
console.log(audio.content);
livetranscriber.createRequest(audio).then(request => {
livetranscriber.recognizeStream(request);
});
});
});
livetranscriber
module.exports = {
createRequest: function(audio){
const encoding = 'LINEAR16';
const sampleRateHertz = 16000;
const languageCode = 'en-US';
return new Promise((resolve, reject, err) =>{
if (err){
reject(err)
}
else{
const request = {
audio: audio,
config: {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
interimResults: false, // If you want interim results, set this to true
};
resolve(request);
}
});
},
recognizeStream: async function(request){
const [response] = await client.recognize(request)
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
// console.log(message);
// message.pipe(recognizeStream);
},
}
client
recorder.ondataavailable = function(e) {
console.log('Data', e.data);
var ws = new WebSocket('ws://localhost:3002/websocket');
ws.onopen = function() {
console.log("opening connection");
// const stream = websocketStream(ws)
// const duplex = WebSocket.createWebSocketStream(ws, { encoding: 'utf8' });
var blob = new Blob(e, { 'type' : 'audio/wav; base64' });
ws.send(blob.data);
// e.data).pipe(stream);
// console.log(e.data);
console.log("Sent the message")
};
// chunks.push(e.data);
// socket.emit('data', e.data);
}
I wrote a similar script several years ago. However, I used a JS frontend and a Python backend instead of NodeJS. I remember using a sox transformer to transform the audio input into to an output that the Google Speech API could use.
Perhaps this might be useful for you.
https://github.com/bitnahian/speech-transcriptor/blob/9f186e5416566aa8a6959fc1363d2e398b902822/app.py#L27
TLDR:
Converted from a .wav format to .raw format using ffmpeg and sox.

Azure Functions - NodeJS - Response Body as a Stream

I'd like to return a file from Blob Storage when you hit a given Azure Function end-point. This file is binary data.
Per the Azure Storage Blob docs, the most relevant call appears to be the following since its the only one that doesn't require writing the file to an interim file:
getBlobToStream
However this call gets the Blob and writes it to a stream.
Is there a way with Azure Functions to use a Stream as the value of res.body so that I can get the Blob Contents from storage and immediately write it to the response?
To add some code, trying to get something like this to work:
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'DeContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream
};
context.done();
}
});
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!."}
};
context.done();
}
Unfortunately we don't have streaming support implemented in NodeJS just yet - it's on the backlog: https://github.com/Azure/azure-webjobs-sdk-script/issues/1361
If you're not tied to NodeJ open to using a C# function instead, you can use the storage sdk object directly in your input bindings and stream request output, instead of using the intermediate object approach.
While #Matt Manson's answer is definitely correct based on the way I asked my question, the following code snippet might be more useful for someone who stumbles across this question.
While I can't send the Stream to the response body directly, I can use a custom stream which captures the data into a Uint8Array, and then sends that to the response body.
NOTE: If the file is REALLY big, this will use a lot of memory.
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'deContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
//Override the write to store the value to our "contents"
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream.contents
};
context.done();
}
});//*/
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!"}
};
context.done();
}
I tried #Doug's solution from the last comment above, with a few minor mods in my azure function, and so far, after trying 20 different ideas, this is the only one that actually delivered the file to the browser! Thank you, #Doug...
const fs = require("fs");
const stream = require("stream");
...
const AzureBlob = require('#[my_private_artifact]/azure-blob-storage');
const azureStorage = new AzureBlob(params.connectionString);
//Override the write to store the value to our "contents" <-- Doug's solution
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
let azureSpeedResult = await azureStorage.downloadBlobToStream(params.containerName, params.objectId, outputStream);
let headers = {
"Content-Length": azureSpeedResult.size,
"Content-Type": mimeType
};
if (params.action == "download") {
headers["Content-Disposition"] = "attachment; filename=" + params.fileName;
}
context.res = {
status: 200,
headers: headers,
isRaw: true,
body: outputStream.contents
};
context.done();
...

Resources