Azure Functions - NodeJS - Response Body as a Stream - node.js

I'd like to return a file from Blob Storage when you hit a given Azure Function end-point. This file is binary data.
Per the Azure Storage Blob docs, the most relevant call appears to be the following since its the only one that doesn't require writing the file to an interim file:
getBlobToStream
However this call gets the Blob and writes it to a stream.
Is there a way with Azure Functions to use a Stream as the value of res.body so that I can get the Blob Contents from storage and immediately write it to the response?
To add some code, trying to get something like this to work:
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'DeContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream
};
context.done();
}
});
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!."}
};
context.done();
}

Unfortunately we don't have streaming support implemented in NodeJS just yet - it's on the backlog: https://github.com/Azure/azure-webjobs-sdk-script/issues/1361
If you're not tied to NodeJ open to using a C# function instead, you can use the storage sdk object directly in your input bindings and stream request output, instead of using the intermediate object approach.

While #Matt Manson's answer is definitely correct based on the way I asked my question, the following code snippet might be more useful for someone who stumbles across this question.
While I can't send the Stream to the response body directly, I can use a custom stream which captures the data into a Uint8Array, and then sends that to the response body.
NOTE: If the file is REALLY big, this will use a lot of memory.
'use strict';
const azure = require('azure-storage'),
stream = require('stream');
const BLOB_CONTAINER = 'deContainer';
module.exports = function(context){
var file = context.bindingData.file;
var blobService = azure.createBlobService();
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
//Override the write to store the value to our "contents"
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
blobService.getBlobToStream(BLOB_CONTAINER, file, outputStream, function(error, serverBlob) {
if(error) {
FileNotFound(context);
} else {
context.res = {
status: 200,
headers: {
},
isRaw: true,
body : outputStream.contents
};
context.done();
}
});//*/
}
function FileNotFound(context){
context.res = {
status: 404,
headers: {
"Content-Type" : "application/json"
},
body : { "Message" : "No esta aqui!"}
};
context.done();
}

I tried #Doug's solution from the last comment above, with a few minor mods in my azure function, and so far, after trying 20 different ideas, this is the only one that actually delivered the file to the browser! Thank you, #Doug...
const fs = require("fs");
const stream = require("stream");
...
const AzureBlob = require('#[my_private_artifact]/azure-blob-storage');
const azureStorage = new AzureBlob(params.connectionString);
//Override the write to store the value to our "contents" <-- Doug's solution
var outputStream = new stream.Writable();
outputStream.contents = new Uint8Array(0);//Initialize contents.
outputStream._write = function (chunk, encoding, done) {
var curChunk = new Uint8Array(chunk);
var tmp = new Uint8Array(this.contents.byteLength + curChunk.byteLength);
tmp.set(this.contents, 0);
tmp.set(curChunk, this.contents.byteLength);
this.contents = tmp;
done();
};
let azureSpeedResult = await azureStorage.downloadBlobToStream(params.containerName, params.objectId, outputStream);
let headers = {
"Content-Length": azureSpeedResult.size,
"Content-Type": mimeType
};
if (params.action == "download") {
headers["Content-Disposition"] = "attachment; filename=" + params.fileName;
}
context.res = {
status: 200,
headers: headers,
isRaw: true,
body: outputStream.contents
};
context.done();
...

Related

File chunk upload to azure storage blob, file seems broken

I'm trying to upload excel file to azure storage blob in chunks, using the stage block and commitblock from BlobBlockClient Class. File upload seems to success but when i try to download and open the file, there it seems to be broken.
I'm using react and node js to do this. Code follows below
In UI
const chunkSize = (1024 * 1024) * 25; // file chunk size
// here slicing the file and sending it to api method
const fileReader = new FileReader();
const from = currentChunkIndexRef.current * chunkSize;
const to = from + chunkSize;
const blob = file.slice(from, to);
fileReader.onload = ((e: any) => uploadChunksToBlob(e, file, obj));
fileReader.readAsDataURL(blob);
// api method
const uploadChunksToBlob = async (event: any, file: File, obj: any) => {
try {
const totalChunks = Math.ceil(file.size / chunkSize);
const uploadChunkURL = `/upload?currentChunk=${currentChunkIndexRef.current}&totalChunks=${totalChunks}&file=${file.name}&type=${file.type}`;
console.log(event.target.result)
const fileUpload = await fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: JSON.stringify(event.target.result),
});
const fileUploadJson = await fileUpload.json();
const isLastChunk = (totalChunks - 1) === currentChunkIndexRef.current;
if(!isLastChunk) {
console.log({ Chunk: currentChunkIndexRef.current });
currentChunkIndexRef.current = currentChunkIndexRef.current + 1;
// eslint-disable-next-line #typescript-eslint/no-use-before-define
uploadFileToAzureBlob(file, obj);
} else {
console.log("File Uploaded")
}
//
} catch (error) {
console.log("uploadFileToAzureBlob Catch Error" + error);
}
}
// In Node
const sharedKeyCredential = new StorageSharedKeyCredential(
config.StorageAccountName,
config.StorageAccountAccessKey
);
const pipeline = newPipeline(sharedKeyCredential);
const blobServiceClient = new BlobServiceClient(
`https://${config.StorageAccountName}.blob.core.windows.net`,
pipeline
);
const containerName = getContainerName(req.headers.key, req.headers.clientcode);
const identifier = uuid.v4();
const blobName = getBlobName(identifier, file);
const containerClient = blobServiceClient.getContainerClient(containerName);
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
try {
let bufferObj = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8"); // Create buffer object, specifying utf8 as encoding
let base64String = bufferObj.toString("base64"); // Encode the Buffer as a base64 string
blockIds = [...blockIds, base64String];
const bufferedData = Buffer.from(req.body);
let resultOfUnitArray = new Uint8Array(bufferedData.length);
for (let j = 0; j < bufferedData.length; j++) {
resultOfUnitArray[j] = bufferedData.toString().charCodeAt(j);
} // Converting string to bytes
const stageBlockResponse = await blockBlobClient.stageBlock(base64String, resultOfUnitArray, resultOfUnitArray.length, {
onProgress: (e) => {
console.log("bytes sent: " + e.loadedBytes);
}
});
if ((Number(totalChunks) - 1) === (Number(currentChunk))) {
const commitblockResponse = await blockBlobClient.commitBlockList(blockIds, {blobHTTPHeaders: req.headers});
res.json({ uuid: identifier, message: 'File uploaded to Azure Blob storage.' });
} else {
res.json({ message: `Current Chunks ${currentChunk} is Successfully Uploaded` });
}
} catch (err) {
console.log({ err })
res.json({ message: err.message });
}
I don't know, what i'm doing wrong here.
Any help would be appreciated
Thank you
The problem is that you convert it into dataURL, that’s where things break.
It appears to me that you're under the wrong impression that you need to first encode a blob into string in order to send it. Well, you don't have to, browser fetch API is capable to handle raw binary payload.
So on the client (browser) side, you don’t need to go through FileReader. Just send the chunk blob directly.
const blob = file.slice(from, to);
// ...
fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: blob,
});
On the server (node.js) side, you'll receive the blob in raw binary form, so you can simply forward that blob untouched to azure storage. There's no need to decode from string and move bytes onto resultOfUnitArray like you currently do.
const base64String = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8").toString("base64");
const bufferedData = Buffer.from(req.body);
const stageBlockResponse = await blockBlobClient.stageBlock(
base64String,
bufferedData,
bufferedData.length
);

Read data from .xlsx file on S3 using Nodejs Lambda

I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.
using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));
You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter
I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.
Solved by using xlsx NPM library. Using a stream and giving it buffers.

Stream audio to Azure speech api by node.js on browser

I'm making a demo of speech to text using Azure speech api on browser by node.js. According to API document here, it does specify that it need .wav or .ogg files. But the example down there does a api call through sending byte data to api.
So I've already get my data from microphone in byte array form. Is it the right path to convert it to byte and send it to api? Or is it better for me to save it as a .wav file then send to the api?
So below is my code.
This is stream from microphone part.
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => { handlerFunction(stream) })
function handlerFunction(stream) {
rec = new MediaRecorder(stream);
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive") {
let blob = new Blob(audioChunks, { type: 'audio/wav; codec=audio/pcm; samplerate=16000' });
recordedAudio.src = URL.createObjectURL(blob);
recordedAudio.controls = true;
recordedAudio.autoplay = true;
console.log(blob);
let fileReader = new FileReader();
var arrayBuffer = new Uint8Array(1024);
var reader = new FileReader();
reader.readAsArrayBuffer(blob);
reader.onloadend = function () {
var byteArray = new Uint8Array(reader.result);
console.log("reader result" + reader.result)
etTimeout(() => getText(byteArray), 1000);
}
}
}
}
This is api call part
function getText(audio, callback) {
console.log("in function audio " + audio);
console.log("how many byte?: " + audio.byteLength)
const sendTime = Date.now();
fetch('https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US', {
method: "POST",
headers: {
'Accept': 'application/json',
'Ocp-Apim-Subscription-Key': YOUR_API_KEY,
// 'Transfer-Encoding': 'chunked',
// 'Expect': '100-continue',
'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000'
},
body: audio
})
.then(function (r) {
return r.json();
})
.then(function (response) {
if (sendTime < time) {
return
}
time = sendTime
//callback(response)
}).catch(e => {
console.log("Error", e)
})
}
It returns with 400 (Bad Request) and says :
{Message: "Unsupported audio format"}
Reason:
Note you're not creating a MediaRecorder with a audio/wav mimeType by
new Blob(audioChunks,{type:'audio/wav; codec=audio/pcm; samplerate=16000'})
This statement is only a description for blob. I test my Chrome(v71) with isTypeSupported:
MediaRecorder.isTypeSupported("audio/wav") // return false
MediaRecorder.isTypeSupported("audio/ogg") // return false
MediaRecorder.isTypeSupported("audio/webm") // return true
It seems that the MediaRecorder will only record the audio in audio/webm. Also, when I run the following code on Chrome , the default rec.mimeType is audio/webm;codecs=opus
rec = new MediaRecorder(stream);
According to the Audio formats Requiremnts, the audio/webm is not supported yet.
Approach:
Before calling getText() we need convert the webm to wav firstly. There're quite a lot of libraries that can help us do that. I just copy Jam3's script before your code to convert webm to wav :
// add Jam3's script between Line 2 and Line 94 or import that module as you like
// create a audioContext that helps us decode the webm audio
var audioCtx = new (window.AudioContext || window.webkitAudioContext)();
rec = new MediaRecorder(stream,{
mimeType : 'audio/webm',
codecs : "opus",
});
// ...
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive") {
var blob = new Blob(audioChunks, { 'type': 'audio/webm; codecs=opus' });
var arrayBuffer;
var fileReader = new FileReader();
fileReader.onload = function(event) {
arrayBuffer = event.target.result;
};
fileReader.readAsArrayBuffer(blob);
fileReader.onloadend=function(d){
audioCtx.decodeAudioData(
fileReader.result,
function(buffer) {
var wav = audioBufferToWav(buffer);
setTimeout(() => getText(wav), 1000);
},
function(e){ console.log( e); }
);
};
}
}
And it works fine for me :
As a side note, I suggest you should use your backend to invoke the speech-to-text services. Never invoke azure stt service in a browser. That's because exposing your subscription key to front end is really dangerous. Anyone could inspect the network and steal your key.

How to consume MTOM SOAP web service in node.js?

I need to download or process a file from a soap based web service in node.js.
can someone suggest me on how to handle this in node.js
I tried with 'node-soap' or 'soap' NPM module. it worked for normal soap web service. But, not for binary steam or MTOM based SOAP web service
I want to try to answer this... It's quite interesting that 2 years and 2 months later I can not figure it out how to easily solve the same problem.
I'm trying to get the attachment from a response like:
...
headers: { 'cache-control': 'no-cache="set-cookie"',
'content-type': 'multipart/related;boundary="----=_Part_61_425861994.1525782562904";type="application/xop+xml";start="";start-info="text/xml"',
...
body: '------=_Part_61_425861994.1525782562904\r\nContent-Type:
application/xop+xml; charset=utf-8;
type="text/xml"\r\nContent-Transfer-Encoding: 8bit\r\nContent-ID:
\r\n\r\n....\r\n------=_Part_61_425861994.1525782562904\r\nContent-Type:
application/octet-stream\r\nContent-Transfer-Encoding:
binary\r\nContent-ID:
\r\n\r\n�PNG\r\n\u001a\n\u0000\u0000\u0000\rIHDR\u0000\u0000\u0002,\u0000\u0000\u0005�\b\u0006\u0........binary....
I tried ws.js but no solution.
My solution:
var request = require("request");
var bsplit = require('buffer-split')
// it will extract "----=_Part_61_425861994.1525782562904" from the response
function getBoundaryFromResponse(response) {
var contentType = response.headers['content-type']
if (contentType && contentType.indexOf('boundary=') != -1 ) {
return contentType.split(';')[1].replace('boundary=','').slice(1, -1)
}
return null
}
function splitBufferWithPattern(binaryData, boundary) {
var b = new Buffer(binaryData),
delim = new Buffer(boundary),
result = bsplit(b, delim);
return result
}
var options = {
method: 'POST',
url: 'http://bla.blabal.../file',
gzip: true,
headers: {
SOAPAction: 'downloadFile',
'Content-Type': 'text/xml;charset=UTF-8'
},
body: '<soapenv: ... xml request of the file ... elope>'
};
var data = [];
var buffer = null;
var filename = "test.png"
request(options, function (error, response, body) {
if (error) throw new Error(error);
if (filename && buffer) {
console.log("filename: " + filename)
console.log(buffer.toString('base64'))
// after this, we can save the file from base64 ...
}
})
.on('data', function (chunk) {
data.push(chunk)
})
.on('end', function () {
var onlyPayload = splitBufferWithPattern(Buffer.concat(data), '\r\n\r\n') // this will get from PNG
buffer = onlyPayload[2]
buffer = splitBufferWithPattern(buffer, '\r\n-')[0]
console.log('Downloaded.');
})
I am not sure it will works in most of the cases. It looks like unstable code to my eyes and so I'm looking for something better.
Use ws.js
Here is how to fetch the file attachments:
const ws = require('ws.js')
const { Http, Mtom } = ws
var handlers = [ new Mtom(), new Http()];
var request = '<s:Envelope xmlns:s="http://www.w3.org/2003/05/soap-envelope">' +
'<s:Body>' +
'<EchoFiles xmlns="http://tempuri.org/">' +
'<File1 />' +
'</EchoFiles>' +
'</s:Body>' +
'</s:Envelope>'
var ctx = { request: request
, contentType: "application/soap+xml"
, url: "http://localhost:7171/Service/mtom"
, action: "http://tempuri.org/IService/EchoFiles"
}
ws.send(handlers, ctx, function(ctx) {
//read an attachment from the soap response
var file = ws.getAttachment(ctx, "response", "//*[local-name(.)='File1']")
// work with the file
fs.writeFileSync("result.jpg", file)
})
Two limitations:
No basic auth provided out-of-box, patch required https://github.com/yaronn/ws.js/pull/40
If the file name is an url, you need to apply another patch at mtom.js. Replace:
.
xpath = "//*[#href='cid:" + encodeURIComponent(id) + "']//parent::*"
with:
xpath = "//*[#href='cid:" + id + "']//parent::*"

How to get binary data from ng-file-upload file object?

I'm attempting to use the ng-file-upload directive to provide file upload functionality in my angular app.
I've got it working for the most part - I can select multiple files and loop through to grab the file name and file types. I just can't seem to figure out where the actual binary data of each file is stored in the file object.
I tried using the approach outlined in this post - AngularJS Upload a file and send it to a DB, but that results in a an error that "$q is not defined".
function create_blob(file) {
var deferred = $q.defer();
var reader = new FileReader();
reader.onload = function () {
deferred.resolve(reader.result);
};
reader.readAsDataURL(file);
return deferred.promise;
}
So then I tried the approach outlined in this post - Send an uploaded image to the server and save it in the server, but again I'm running into an error reading "dataURI.split is not a function".
function dataURItoBlob(dataURI) {
var binary = atob(dataURI.split(',')[1]);
var mimeString = dataURI.split(',')[0].split(':')[1].split(';')[0];
var array = [];
for (var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i));
}
return new Blob([new Uint8Array(array)], {
type: mimeString
});
}
The code I'm using is as follows:
function create_blob(file) {
var deferred = $q.defer();
var reader = new FileReader();
reader.onload = function () {
deferred.resolve(reader.result);
};
reader.readAsDataURL(file);
return deferred.promise;
}
function dataURItoBlob(dataURI) {
var binary = atob(dataURI.split(',')[1]);
var mimeString = dataURI.split(',')[0].split(':')[1].split(';')[0];
var array = [];
for (var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i));
}
return new Blob([new Uint8Array(array)], {
type: mimeString
});
}
$scope.uploadFiles = function (files) {
$scope.files = files;
angular.forEach(files, function (file) {
if (file && !file.$error) {
//var reader = new FileReader();
//console.log(reader.readAsDataURL(file));
//var binary = create_blob(file);
var fileBinary = dataURItoBlob(file);
$http({
url: root + '/DesktopModules/ServiceProxy/API/NetSuite/InsertCaseFile',
method: "POST",
//headers: { 'caseId': id, 'fileName': file.name, fileContent: $.base64.encode(file) }
headers: { 'caseId': id, 'fileName': file.name, fileContent: fileBinary }
}).
success(function (data, status, headers, config) {
//if (data == true) {
// getCase();
// $scope.newMessage = "";
// //toaster.pop('success', "", "Message succesfully submitted.",0);
//}
}).
error(function (data, status, headers, config) {
});
file.upload.progress(function (evt) {
file.progress = Math.min(100, parseInt(100.0 * evt.loaded / evt.total));
});
}
});
}
What am I overlooking?
It depends on what format your DB is accepting for file upload. If it support multipart form data, then you can just use
Upload.upload({file: file, url: my/db/url}).then(...);
if it accepts post requests with file's binary as content of the request (like CouchDB, imgur, ...) then you can do
Upload.http({data: file, url: my/db/url, headers: {'Content-Type': file.type}})...;
if you db just accept json objects and you want to store the file as base64 data url in the database like this question then you can do
Upload.dataUrl(file, true).then(function(dataUrl) {
$http.post(url, {
fileBase64DataUrl: dataUrl,
fileName: file.name,
id: uniqueId
});
})

Resources