Write data from Socket.io to Google Cloud Storage - node.js

I get audio data from this package in my React Native project and I then send the data chunks over socket.io to my Node.js server.
AudioRecord.on('data', data => {
// base64-encoded audio data chunks
const binary = Buffer.from(data, 'base64').toString('utf-8')
socketClient.io.emit('binaryData', binary)
})
The data that my server receives is readable because I send that data through this transform onto a speech recognition service that returns accurate results.
const speechAudioInputStreamTransform = new Transform({
transform: (chunk, encoding, callback) => {
if (newStream && lastAudioInput.length !== 0) {
// Approximate math to calculate time of chunks
const chunkTime = streamingLimit / lastAudioInput.length
if (chunkTime !== 0) {
if (bridgingOffset < 0) {
bridgingOffset = 0
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime
}
const chunksFromMS = Math.floor(
(finalRequestEndTime - bridgingOffset) / chunkTime
)
bridgingOffset = Math.floor(
(lastAudioInput.length - chunksFromMS) * chunkTime
)
for (let i = chunksFromMS; i < lastAudioInput.length; i++) {
if (recognizeStream) {
recognizeStream.write(lastAudioInput[i])
}
}
}
newStream = false
}
audioInput.push(chunk)
if (recognizeStream) {
recognizeStream.write(chunk)
}
callback()
},
})
However when I write this data to Google Cloud Storage the file saves and contains data but seems corrupted as it is unplayable.
const { Storage } = require('#google-cloud/storage')
const storage = new Storage(storageOptions)
const bucket = storage.bucket(bucketName)
const file = filename + '.wav'
const storageFile = bucket.file.createWriteStream()
client.on('binaryData', (data) => {
const buffer = Buffer.from(data)
if (recognizeStream != null) {
storageFile.write(buffer)
speechAudioInputStreamTransform.write(buffer)
}
})
When the user on the RN side terminates the session, storageFile.end() is called.
Any help would be greatly appreciated. Thanks

Related

File chunk upload to azure storage blob, file seems broken

I'm trying to upload excel file to azure storage blob in chunks, using the stage block and commitblock from BlobBlockClient Class. File upload seems to success but when i try to download and open the file, there it seems to be broken.
I'm using react and node js to do this. Code follows below
In UI
const chunkSize = (1024 * 1024) * 25; // file chunk size
// here slicing the file and sending it to api method
const fileReader = new FileReader();
const from = currentChunkIndexRef.current * chunkSize;
const to = from + chunkSize;
const blob = file.slice(from, to);
fileReader.onload = ((e: any) => uploadChunksToBlob(e, file, obj));
fileReader.readAsDataURL(blob);
// api method
const uploadChunksToBlob = async (event: any, file: File, obj: any) => {
try {
const totalChunks = Math.ceil(file.size / chunkSize);
const uploadChunkURL = `/upload?currentChunk=${currentChunkIndexRef.current}&totalChunks=${totalChunks}&file=${file.name}&type=${file.type}`;
console.log(event.target.result)
const fileUpload = await fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: JSON.stringify(event.target.result),
});
const fileUploadJson = await fileUpload.json();
const isLastChunk = (totalChunks - 1) === currentChunkIndexRef.current;
if(!isLastChunk) {
console.log({ Chunk: currentChunkIndexRef.current });
currentChunkIndexRef.current = currentChunkIndexRef.current + 1;
// eslint-disable-next-line #typescript-eslint/no-use-before-define
uploadFileToAzureBlob(file, obj);
} else {
console.log("File Uploaded")
}
//
} catch (error) {
console.log("uploadFileToAzureBlob Catch Error" + error);
}
}
// In Node
const sharedKeyCredential = new StorageSharedKeyCredential(
config.StorageAccountName,
config.StorageAccountAccessKey
);
const pipeline = newPipeline(sharedKeyCredential);
const blobServiceClient = new BlobServiceClient(
`https://${config.StorageAccountName}.blob.core.windows.net`,
pipeline
);
const containerName = getContainerName(req.headers.key, req.headers.clientcode);
const identifier = uuid.v4();
const blobName = getBlobName(identifier, file);
const containerClient = blobServiceClient.getContainerClient(containerName);
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
try {
let bufferObj = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8"); // Create buffer object, specifying utf8 as encoding
let base64String = bufferObj.toString("base64"); // Encode the Buffer as a base64 string
blockIds = [...blockIds, base64String];
const bufferedData = Buffer.from(req.body);
let resultOfUnitArray = new Uint8Array(bufferedData.length);
for (let j = 0; j < bufferedData.length; j++) {
resultOfUnitArray[j] = bufferedData.toString().charCodeAt(j);
} // Converting string to bytes
const stageBlockResponse = await blockBlobClient.stageBlock(base64String, resultOfUnitArray, resultOfUnitArray.length, {
onProgress: (e) => {
console.log("bytes sent: " + e.loadedBytes);
}
});
if ((Number(totalChunks) - 1) === (Number(currentChunk))) {
const commitblockResponse = await blockBlobClient.commitBlockList(blockIds, {blobHTTPHeaders: req.headers});
res.json({ uuid: identifier, message: 'File uploaded to Azure Blob storage.' });
} else {
res.json({ message: `Current Chunks ${currentChunk} is Successfully Uploaded` });
}
} catch (err) {
console.log({ err })
res.json({ message: err.message });
}
I don't know, what i'm doing wrong here.
Any help would be appreciated
Thank you
The problem is that you convert it into dataURL, that’s where things break.
It appears to me that you're under the wrong impression that you need to first encode a blob into string in order to send it. Well, you don't have to, browser fetch API is capable to handle raw binary payload.
So on the client (browser) side, you don’t need to go through FileReader. Just send the chunk blob directly.
const blob = file.slice(from, to);
// ...
fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: blob,
});
On the server (node.js) side, you'll receive the blob in raw binary form, so you can simply forward that blob untouched to azure storage. There's no need to decode from string and move bytes onto resultOfUnitArray like you currently do.
const base64String = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8").toString("base64");
const bufferedData = Buffer.from(req.body);
const stageBlockResponse = await blockBlobClient.stageBlock(
base64String,
bufferedData,
bufferedData.length
);

Amazon Transcribe streaming with Node.js using websocket

I am working on a whatsapp chatbot where I receive audio file(ogg format) file url from Whatsapp and I get buffer and upload that file on S3(sample.ogg) Now what is want to use AWS Transcribe Streaming so I am creating readStream of file and sending to AWS transcribe I am using websocket but I am receiving Empty response of Sometimes when I Mhm mm mm response. Please can anyone tell what wrong I am doing in my code
const express = require('express')
const app = express()
const fs = require('fs');
const crypto = require('crypto'); // tot sign our pre-signed URL
const v4 = require('./aws-signature-v4'); // to generate our pre-signed URL
const marshaller = require("#aws-sdk/eventstream-marshaller"); // for converting binary event stream messages to and from JSON
const util_utf8_node = require("#aws-sdk/util-utf8-node");
var WebSocket = require('ws') //for opening a web socket
// our converter between binary event streams messages and JSON
const eventStreamMarshaller = new marshaller.EventStreamMarshaller(util_utf8_node.toUtf8, util_utf8_node.fromUtf8);
// our global variables for managing state
let languageCode;
let region = 'ap-south-1';
let sampleRate;
let inputSampleRate;
let transcription = "";
let socket;
let micStream;
let socketError = false;
let transcribeException = false;
// let languageCode = 'en-us'
app.listen(8081, (error, data) => {
if(!error) {
console.log(`running at 8080----->>>>`)
}
})
let handleEventStreamMessage = function (messageJson) {
let results = messageJson.Transcript.Results;
if (results.length > 0) {
if (results[0].Alternatives.length > 0) {
let transcript = results[0].Alternatives[0].Transcript;
// fix encoding for accented characters
transcript = decodeURIComponent(escape(transcript));
console.log(`Transcpted is----->>${transcript}`)
}
}
}
function downsampleBuffer (buffer, inputSampleRate = 44100, outputSampleRate = 16000){
if (outputSampleRate === inputSampleRate) {
return buffer;
}
var sampleRateRatio = inputSampleRate / outputSampleRate;
var newLength = Math.round(buffer.length / sampleRateRatio);
var result = new Float32Array(newLength);
var offsetResult = 0;
var offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
var accum = 0,
count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++ ) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
}
function pcmEncode(input) {
var offset = 0;
var buffer = new ArrayBuffer(input.length * 2);
var view = new DataView(buffer);
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
return buffer;
}
function getAudioEventMessage(buffer) {
// wrap the audio data in a JSON envelope
return {
headers: {
':message-type': {
type: 'string',
value: 'event'
},
':event-type': {
type: 'string',
value: 'AudioEvent'
}
},
body: buffer
};
}
function convertAudioToBinaryMessage(raw) {
if (raw == null)
return;
// downsample and convert the raw audio bytes to PCM
let downsampledBuffer = downsampleBuffer(raw, inputSampleRate);
let pcmEncodedBuffer = pcmEncode(downsampledBuffer);
setTimeout(function() {}, 1);
// add the right JSON headers and structure to the message
let audioEventMessage = getAudioEventMessage(Buffer.from(pcmEncodedBuffer));
//convert the JSON object + headers into a binary event stream message
let binary = eventStreamMarshaller.marshall(audioEventMessage);
return binary;
}
function createPresignedUrl() {
let endpoint = "transcribestreaming." + "us-east-1" + ".amazonaws.com:8443";
// get a preauthenticated URL that we can use to establish our WebSocket
return v4.createPresignedURL(
'GET',
endpoint,
'/stream-transcription-websocket',
'transcribe',
crypto.createHash('sha256').update('', 'utf8').digest('hex'), {
'key': <AWS_KEY>,
'secret': <AWS_SECRET_KEY>,
'protocol': 'wss',
'expires': 15,
'region': 'us-east-1',
'query': "language-code=" + 'en-US' + "&media-encoding=pcm&sample-rate=" + 8000
}
);
}
function showError(message) {
console.log("Error: ",message)
}
app.get('/convert', (req, res) => {
var file = 'recorded.mp3'
const eventStreamMarshaller = new marshaller.EventStreamMarshaller(util_utf8_node.toUtf8, util_utf8_node.fromUtf8);
let url = createPresignedUrl();
let socket = new WebSocket(url);
socket.binaryType = "arraybuffer";
let output = '';
const readStream = fs.createReadStream(file, { highWaterMark: 32 * 256 })
readStream.setEncoding('binary')
//let sampleRate = 0;
let inputSampleRate = 44100
readStream.on('end', function() {
console.log('finished reading----->>>>');
// write to file here.
// Send an empty frame so that Transcribe initiates a closure of the WebSocket after submitting all transcripts
let emptyMessage = getAudioEventMessage(Buffer.from(new Buffer([])));
let emptyBuffer = eventStreamMarshaller.marshall(emptyMessage);
socket.send(emptyBuffer);
})
// when we get audio data from the mic, send it to the WebSocket if possible
socket.onopen = function() {
readStream.on('data', function(chunk) {
let binary = convertAudioToBinaryMessage(chunk);
if (socket.readyState === socket.OPEN) {
console.log(`sending to steaming API------->>>>`)
socket.send(binary);
}
});
// the audio stream is raw audio bytes. Transcribe expects PCM with additional metadata, encoded as binary
}
// the audio stream is raw audio bytes. Transcribe expects PCM with additional metadata, encoded as binary
socket.onerror = function () {
socketError = true;
showError('WebSocket connection error. Try again.');
};
// handle inbound messages from Amazon Transcribe
socket.onmessage = function (message) {
//convert the binary event stream message to JSON
let messageWrapper = eventStreamMarshaller.unmarshall(Buffer(message.data));
//console.log(`messag -->>${JSON.stringify(messageWrapper)}`)
let messageBody = JSON.parse(String.fromCharCode.apply(String, messageWrapper.body));
console.log("results:.. ",JSON.stringify(messageBody))
if (messageWrapper.headers[":message-type"].value === "event") {
handleEventStreamMessage(messageBody);
}
else {
transcribeException = true;
showError(messageBody.Message);
}
}
let closeSocket = function () {
if (socket.OPEN) {
// Send an empty frame so that Transcribe initiates a closure of the WebSocket after submitting all transcripts
let emptyMessage = getAudioEventMessage(Buffer.from(new Buffer([])));
let emptyBuffer = eventStreamMarshaller.marshall(emptyMessage);
socket.send(emptyBuffer);
}
}
})

Video stream from Blob NodeJS

I am recording MediaStream on client side in this way:
handleStream(stream) {
const ws = new WebSocket('ws://localhost:5432/binary');
var recorder = new MediaRecorder(stream);
recorder.ondataavailable = function(event) {
ws.send(event.data);
};
recorder.start();
}
This data are accepted on server side like this:
const wss = new WebSocket.Server({ port: 5432 });
wss.on('connection', function connection(ws) {
ws.on('message', function incoming(message) {
writeToDisk(message, 'video.webm');
});
});
function writeToDisk(dataURL, fileName) {
var fileBuffer = new Buffer(dataURL, 'base64');
fs.writeFileSync(fileName, fileBuffer);
}
It works like a charm, but I want to take the Buffer and make video live stream served by server side. Is there any way how to do it?
Thanks for your help.
I have already done this here.
You can use the MediaRecorder class to split the video into chunks and send them to the server for broadcast.
this._mediaRecorder = new MediaRecorder(this._stream, this._streamOptions);
this._mediaRecorder.ondataavailable = e => this._videoStreamer.pushChunk(e.data);
this._mediaRecorder.start();
...
this._mediaRecorder.requestData()
Do not forget to restart recording at intervals, so that new clients should not download all the video to connect to stream. Also, during the change of chunks, you should replace <video> by <image> or update video's poster so that the gluing goes smoothly.
async function imageBitmapToBlob(img) {
return new Promise(res => {
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
canvas.getContext('2d').drawImage(img,0,0);
canvas.toBlob(res);
});
}
...
const stream = document.querySelector('video').captureStream();
if(stream.active==true) {
const track = stream.getVideoTracks()[0];
const capturer = new ImageCapture(track);
const bitmap = await imageBitmapToBlob(await capturer.grabFrame());
URL.revokeObjectURL(this._oldPosterUrl);
this._video.poster = this._oldPosterUrl = URL.createObjectURL(bitmap);
track.stop();
}
You can glue Blob objects through their constructor. In the process of obtaining a new chunk, do not forget to clear the memory for the old video with URL.revokeObjectURL() and update video's current time
_updateVideo = async (newBlob = false) => {
const stream = this._video.captureStream();
if(stream.active==true) {
const track = stream.getVideoTracks()[0];
const capturer = new ImageCapture(track);
const bitmap = await imageBitmapToBlob(await capturer.grabFrame());
URL.revokeObjectURL(this._oldPosterUrl);
this._video.poster = this._oldPosterUrl = URL.createObjectURL(bitmap);
track.stop();
}
let data = null;
if(newBlob === true) {
const index = this._recordedChunks.length - 1;
data = [this._recordedChunks[index]];
} else {
data = this._recordedChunks;
}
const blob = new Blob(data, this._options);
const time = this._video.currentTime;
URL.revokeObjectURL(this._oldVideoUrl);
const url = this._oldVideoUrl = URL.createObjectURL(blob);
if(newBlob === true) {
this._recordedChunks = [blob];
}
this._size = blob.size;
this._video.src = url;
this._video.currentTime = time;
}
You should use two WebSocket for video broadcast and two for listening. One WebSocket transfers only video chunks, the second only new blobs with video headers (restart recording at intervals).
const blobWebSocket = new WebSocket(`ws://127.0.0.1:${blobPort}/`);
blobWebSocket.onmessage = (e) => {
console.log({blob:e.data});
this._videoWorker.pushBlob(e.data);
}
const chunkWebSocket = new WebSocket(`ws://127.0.0.1:${chunkPort}/`);
chunkWebSocket.onmessage = (e) => {
console.log({chunk:e.data});
this._videoWorker.pushChunk(e.data);
}
After connecting, the server sends the client all the current video blob and begins to dynamically send new chunks to the client.
const wss = new WebSocket.Server({ port });
let buffer = new Buffer.alloc(0);
function chunkHandler(buf,isBlob=false) {
console.log({buf,isBlob});
if(isBlob === true) {
//broadcast(wss,buf);
buffer = buf;
} else {
const totalLenght = buffer.length + buf.length;
buffer = Buffer.concat([buffer,buf],totalLenght);
broadcast(wss,buf);
}
}
wss.on('connection', function connection(ws) {
if(buffer.length !== 0) {
ws.send(buffer);
}
});

How to use streams to encrypt and then stream the data to GridFs? (mongodb)

I'm currently building an application that takes in a user upload and then I want to encrypt it then save it to mongodb using the grid-fs library. I'm using multer to receive the file and them I'm trying to encrypt it then save it to the mongodb using the gridfs-stream library. Can someone take a look at this and help me out? I'm not sure how to work with streams in nodejs.
import { createCipher } from "crypto";
import { AES_PASSWORD } from "../config";
import stream from "stream";
import GridFs from "../models/GridFs";
const cipher = createCipher("aes-256-cbc", AES_PASSWORD);
const maxFileSize = 1024 * 1024 * 10; // 10MB
const fileChunkSize = 2 * 255 * 1024; // 510kb
const uploadController = (req,res,next) => {
// User information
const user_id = req.user._id;
// File meta data from the file infromation
const {
mimetype,
buffer,
size
} = req.file;
const content_type = mimetype;
// File information provided by Resumable.js
const {
resumableChunkNumber,
resumableChunkSize,
resumableCurrentChunkSize,
resumableTotalSize,
resumableType,
resumableIdentifier,
resumableFilename,
resumableRelativePath,
resumableTotalChunks
} = req.body;
/*
* Developer's note:
* Somehow the data being posted automatically by resumable.js
* is of type string. Thus we have to parse the string here
* to prevent errors when doing any comparisons
*/
const data = buffer;
const chunk_n = parseInt(resumableChunkNumber) - 1;
const chunk_total = parseInt(resumableTotalChunks);
const chunk_size = parseInt(resumableChunkSize);
const file_length = parseInt(resumableTotalSize);
const uid = resumableIdentifier;
const file_name = resumableFilename;
// Verify Chunk size as configured
if(chunk_size != fileChunkSize){
return res.status(400).json({
error: "chunk_size is not equal to configured chunk size as the backend"
});
}
// Ensure file size is not larger than max allowable at the moment
if( file_length && file_length > maxFileSize) {
return res.status(400).json({
error: "file length is greater than configured maxFileSize"
});
}
// Basic logic in ensuring the max number of chunks is correct when
// there is overlap
let expectedNumberOfChunks = file_length / chunk_size;
if(file_length % chunk_size){
expectedNumberOfChunks += 1;
}
if(chunk_n > expectedNumberOfChunks){
return res.status(400).json({
error: "chunk_n is greater than the number of expected chunks"
});
}
const writeStream = GridFs.createWriteStream({
content_type,
chunk_size,
chunk_n,
uid,
file_name,
file_length,
user_id
});
// All is good Start encrytion
const bufferStream = new stream.PassThrough();
try{
bufferStream.write(buffer);
bufferStream.pipe(cipher).pipe(writeStream).on("close", () => {
buffer.end();
return res.status(200).send("ok");
});
} catch(error){
console.log(error);
return res.status(500).json({ error });
}
};
export default uploadController;
Nothing is happening and the request just times out. I'm lost for words at the moment.

Uploading series of large files in node to a web API

I am trying to create a node script to traverse a folder of large files (mp3s containing 1 hour music sets) and upload them to Mixcloud via their API. It works except in the case where it hits the API rate limit and needs to wait x seconds - I set a timeout and loop back to beginning, but after the set amount of time, the process exits and doesn't log any useful output. Is this due to a bug in my code, or the way I'm running it? I'm currently running it with sudo nohup node index.js > log.log, is this incorrect? my end goal is a script that can be run at the end of every day of our radio station and upload the archived files.
const restler = require('restler');
const fs = require('fs');
const readDir = require('readdir');
const powerOff = require('power-off');
const options = {
folder: 'files',
completefolder: 'complete',
accesstoken: 'xxxxxxxxxxxxxxxxxxxxxxx'
}
// shut down computer
const shutDown = () => {powerOff((err, stderr, stdout) => {
if(!err && !stderr) {
console.log(stdout);
}
})
};
// uploadFile uploads file with restler to mixcloud, if api returns rate limiting object, try again in x seconds
const uploadFile = (folder, filename) => {
const filepath = `./${folder}/${filename}`
fs.stat(`./${folder}/${filename}`, function(err, stats) {
const size = stats.size;
restler.post(`https://api.mixcloud.com/upload/?access_token=${options.accesstoken}`, {
multipart: true,
data: {
"mp3": restler.file(`./${folder}/${filename}`, null, size, null, 'audio/mpeg'),
"name": filename,
// "unlisted": true
// more data can be added here depending on changes in workflow, automate images etc
}
}).on("complete", function(data) {
const returned = JSON.parse(data);
if (returned.error) {
if (returned.error.type == "RateLimitException") {
// try again in x seconds
console.log(`uploading too fast, retrying upload of ${filename}after ${returned.error.retry_after} seconds`);
setTimeout(() => {(folder, filename) => uploadFile}, returned.error.retry_after*1000);
}
else {
console.log('non-rate-limiting error');
console.log(returned);
}
}
else {
console.log('Success!');
console.log(returned);
// move uploaded files into completed folder
fs.rename(`./${folder}/${filename}`, `./${options.completefolder}/${filename}`, (err)=> {
if (err) {
console.log(err)
}
else {
counter += 1;
console.log(counter);
if (counter === files.length) {
console.log('done');
shutDown();
}
}
})
}
});
});
};
// get all mp3s and upload all of them
const files = readDir.readSync(`./${options.folder}`, ['**.mp3'] );
let counter = 0;
for (var i = 0; i < files.length; i++) {
uploadFile(options.folder, files[i])
};

Resources