Azure Storage Download to Blob prematurely ends Function App call - node.js

I am using Nodejs to download a file to a buffer to use for processing in my code. The relevant code is
let bbc = containerClient.getBlockBlobClient(
userId + "/" + documentUuids[i] + ".pdf"
);
let blob;
try {
console.log("downloading blob")
blob = await bbc.downloadToBuffer();
console.log("downloaded blob ")
} catch (e) {
console.log(userId + "/" + documentUuids[i] + ".pdf")
console.log(e);
}
However, instead of waiting for the download and then proceeding with the rest of the code, the line blob = await bbc.downloadToBuffer(); prematurely ends the function app and returns a 200 with no body. In the console I then see the message
Warning: Unexpected call to 'log' on the context object after function execution has completed. Please check for asynchronous calls that are not awaited or calls to 'done' made before function execution completes. Function name: BasketsCreateUpdate. Invocation Id: 59f57785-6390-4b93-a69e-8244dc688d37. Learn more: https://go.microsoft.com/fwlink/?linkid=2097909
and eventually in my logs, I see the required output, but the function has already prematurely returned an empty body. I have no idea why this is happening, and I would appreciate any help.

There is nothing wrong with your blob downloading code, I assume there shoule be something wrong that you handle the result inside your js function. I writed a simple demo that get the content of a .txt for you whcih could meet your requirement:
module.exports = async function (context, req) {
const { BlobServiceClient, StorageSharedKeyCredential } = require("#azure/storage-blob");
const account = ''
const accountKey = ''
const container = ''
const blobName = ''
async function test(context){
const sharedKeyCredential = new StorageSharedKeyCredential(account, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${account}.blob.core.windows.net`,
sharedKeyCredential
);
const bbc = blobServiceClient.getContainerClient(container).getBlockBlobClient(blobName);
context.log('=================> download start');
let blob = await bbc.downloadToBuffer();
context.log('=================> download complete');
return blob.toString('utf-8');
}
var result = await test(context);
context.res = {
body: result
};
}
Result:

Related

Puppeteer to convert html to pdf using Nodejs in Durable functions(fan out fan in)

I'm working on a small project to convert a large xml to several formatted pdf documents. The large xml contains multiple similar format xmls. So I'm using a single html template for printing all the documents. After producing all the pdf documents I also need to produce a metadata file with some basic info on each document that was printed.
I thought using the fan out fan in scenario of durable functions is a perfect for my use case. I'm working with Nodejs. I setup all my code and it seems to be working fine locally. The Orchestration function looks like the below.
const df = require("durable-functions");
module.exports = df.orchestrator(function* (context) {
var xmldata = yield context.df.callActivity("DurablegetblobJS1","");
var tasks = [];
for (file of xmldata) {
tasks.push(context.df.callActivity("Durableactivityjs2", file));
}
const outputs = yield context.df.Task.all(tasks);
var finalout = "";
for (out of outputs){
console.log('I am done1 :' + out );
finalout = finalout + out;
}
return finalout;
});
DurablegetblobJS1 : Fetches the entire xmls and splits it into multiple smaller xmls(1 per document).
Durableactivityjs2 : Fetches the html template, extracts the different values from the individual xmls and applies them to the html and finally prints out the pdf into an azure storage. It returns the name of the pdf document that was printed for creation of the metadata file. The code for this is below.
var fs = require('fs');
var xml2js = require('xml2js');
var html_to_pdf = require('html-pdf-node');
var parser = new xml2js.Parser();
module.exports = async function (context) {
//console.log("Hello from activity :")
var xmldict = {}
var xmltext = context.bindings.name;
//Extract the nodes and attributes
metadata(xmltext,xmldict);
report(xmltext,xmldict);
context.log(xmldict)
const { BlobServiceClient } = require("#azure/storage-blob");
// Load the .env file if it exists
require("dotenv").config();
const AZURE_STORAGE_CONNECTION_STRING = process.env.STORAGE_CONNECTION_STRING || "";
const blobServiceClient = BlobServiceClient.fromConnectionString(
AZURE_STORAGE_CONNECTION_STRING
);
var containerClient = blobServiceClient.getContainerClient('test');
var blobname = 'comb_template.html';
var blockBlobClient = containerClient.getBlockBlobClient(blobname);
var downloadBlockBlobResponse = await blockBlobClient.download(0);
var html_template = await streamToText(downloadBlockBlobResponse.readableStreamBody);
let options = { format: 'A4'};
let file = { content: html_template};
const x = await writepdf1(file, options,blobServiceClient,xmldict);
console.log("Written Blob PDF");
return x;
};
async function writepdf1(file, options,blobServiceClient,xmldict){
const pdfBuffer = await html_to_pdf.generatePdf(file, options);
const containerClient = blobServiceClient.getContainerClient('test2');
const targetblob = xmldict['OU'] + '/' + xmldict['ReportName'] + '/' + xmldict['OU'] + '_' + xmldict['ReportName'] + '_' + xmldict['DocumentID'] + '_' + '.pdf';
console.log('Blob name :' + targetblob);
const blockBlobClient_t = containerClient.getBlockBlobClient(targetblob);
const uploadBlobResponse = await blockBlobClient_t.upload(pdfBuffer, pdfBuffer.length);
return targetblob;
}
async function streamToText(readable) {
readable.setEncoding('utf8');
let data = '';
for await (const chunk of readable) {
data += chunk;
}
return data;
}
function metadata(xmltext,xmldict){
parser.parseString(xmltext, function (err, result) {
var test1 = result['HPDPSMsg']['DocumentRequest'][0]['MetaData'][0];
Object.entries(test1).forEach(([key, value]) => {
xmldict[key] = value[0];
});
});
}
function report(xmltext,xmldict){
parser.parseString(xmltext, function (err, result) {
var test2 = result['HPDPSMsg']['DocumentRequest'][0]['Report'][0]['$'];
Object.entries(test2).forEach(([key, value]) => {
xmldict[key] = value;
});
});
}
However, when I deploy the entire project into a azure premium function(EP1 - Windows), I see some errors in app insights when I try and execute my function and the pdfs are never generated.
Activity function 'Durableactivityjs2' failed: Could not find browser
revision 818858. Run "PUPPETEER_PRODUCT=firefox npm install" or
"PUPPETEER_PRODUCT=firefox yarn install" to download a supported
Firefox browser binary
I'm a bit clueless how I'm supposed to resolve this. Any help or suggestions would be appreciated.

Firebase Storage + docxtemplater in nodejs

I am having trouble loading a firebase storage document in node js (preferably in binary) so that I can generate a docxtemplater document on it. I'm quite new to docxtemplater and would really like to use it for my webapp
Is this something that can be done?
Below is the code I get but I dont think it's loading the document from my firebase storage properly:
const functions = require('firebase-functions');
const admin = require('firebase-admin');
const {Storage} = require('#google-cloud/storage');
var PizZip = require('pizzip');
var Docxtemplater = require('docxtemplater');
admin.initializeApp();
const BUCKET = 'gs://mpcwapp.appspot.com';
const https = require('https');
const storage = new Storage({
projectId: 'myapp' });
const cors = require('cors')({origin: true});
exports.test2 = functions.https.onCall((data, context) => {
// The error object contains additional information when logged with JSON.stringify (it contains a properties object containing all suberrors).
function replaceErrors(key, value) {
if (value instanceof Error) {
return Object.getOwnPropertyNames(value).reduce(function(error, key) {
error[key] = value[key];
return error;
}, {});
}
return value;
}
function errorHandler(error) {
console.log(JSON.stringify({error: error}, replaceErrors));
if (error.properties && error.properties.errors instanceof Array) {
const errorMessages = error.properties.errors.map(function (error) {
return error.properties.explanation;
}).join("\n");
console.log('errorMessages', errorMessages);
// errorMessages is a humanly readable message looking like this :
// 'The tag beginning with "foobar" is unopened'
}
throw error;
}
//Load the docx file as a binary
let file_name = 'input.docx';
const myFile =storage.bucket(BUCKET).file(file_name);
var content = myFile.createReadStream();
var zip = new PizZip(content);
var doc;
try {
doc = new Docxtemplater(zip);
} catch(error) {
// Catch compilation errors (errors caused by the compilation of the template : misplaced tags)
errorHandler(error);
}
//set the templateVariables
doc.setData({
first_name: 'John',
last_name: 'Doe',
phone: '0652455478',
description: 'New Website'
});
try {
// render the document (replace all occurences of {first_name} by John, {last_name} by Doe, ...)
doc.render();
}
catch (error) {
// Catch rendering errors (errors relating to the rendering of the template : angularParser throws an error)
errorHandler(error);
}
var buf = doc.getZip()
.generate({type: 'nodebuffer'});
buf.pipe(myFile.createWriteStream());
});
Any help will be appreciated I am really stuck.
First, if you're deploying your code logic in Firebase Functions, make sure it's inside your list of exports.
I tried to reproduce the behavior of your code and noticed that the root cause of the error is because of this part in your code:
var content = myFile.createReadStream();
var zip = new PizZip(content);
pizzip appears to accept a Buffer input according to this documentation. However, createReadStream() returns ReadableStream so there's a mismatch between the required parameters.
There are two solutions I can think of:
First, download and store the file (in /tmp). Then read the file using fs.readFileSync().
Skip saving the file to the file system and get the buffer of the file object.
For the 2nd option, you need to understand how streams work. This answer can give you a good head start. As example, you can get the buffer from ReadableStream like this:
const remoteFile = storage.bucket("bucket-name").file("file-name")
const readable = remoteFile.createReadStream()
var buffers = [];
readable.on('data', (buffer) => {
buffers.push(buffer)
});
readable.on('end', () => {
var buffer = Buffer.concat(buffers);
var zip = new PizZip(buffer);
var doc;
try {
doc = new Docxtemplater(zip);
} catch(error) {
errorHandler(error);
}
// ... rest of your code
});

Blob downloadToFile() not downloading large files Nodejs

I have implemented ** azure blob/storage** in my project. And it works fine on smaller files but when I try to download a file more than 40 MB. it does not return from blob.downlaodToFile()
Here is my code.
Api call
exports.downloadSingle = function(req,res){
downloadService.downloadSingleFile(req.params.id).then(function (result) {
res.send(result);
})
}
Downlaod service
var filesLocalLinks = [];
const request = require('request-promise');
const fs = require('fs');
var download = require('download-file')
let promise = new Promise((resolve, reject) => {
filemodel.findOne({_id: id,cancelled: false}).exec(function(error,result){
if(error){
resolve(error);
}else{
blobDownload.downloadFile(result.blobName,result.containerName).then(function(blobResponse){
var filename = path.resolve(__dirname+'/../services/uploads/'+result.fileName);
filename = filename.replace('/myProject','');
filename = process.env.BASE_URL+'/myProject/services'+filename.split('/services')[1];
resolve({file: filename, filename: result.originalname})
}).catch(function(error){
reject(error);
})
}
})
})
let result =await promise;
return {file: result.file,filename: result.filename };
}
Blob service
let promise = new Promise(async (resolve, reject) => {
const account = process.env.BLOB_ACCOUNT;
const accountKey = process.env.BLOB_ACCOUNT_KEY;
const sharedKeyCredential = new StorageSharedKeyCredential(account, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${account}.blob.core.windows.net`,
sharedKeyCredential
);
const containerClient = blobServiceClient.getContainerClient(containerName);
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
const blobClient = containerClient.getBlobClient(blobName);
var blobResponse = await blobClient.downloadToFile(blobName);
resolve(blobResponse)
})
let result =await promise;
return result;
}
In blob service, this line take too much time that the page responds page not working something var blobResponse = await blobClient.downloadToFile(blobName);
Can anyone help me
Basically the flow of code is, first it download the file to our server directory and then the user can download it. I figured it out that the code was working fine, but the issue was of the memory of our server, as we increased the memory, the issue is resolved. But it took my 2 days to figure it out
I came across that out of the blue.. but I noticed that you put blobName as a parameter of downloadToFile call, instead of the target file's name, not to mention that the line const blockBlobClient = containerClient.getBlockBlobClient(blobName); seems to be unnecessary, which suggests that you didn't look into your own code properly...

How to use bucket.upload() instead of file.createWriteStream() in Google Cloud Storage?

I'm trying to get the permanent (unsigned) download URL after uploading a file to Google Cloud Storage. I can get the signed download URL using file.createWriteStream() but file.createWriteStream() doesn't return the UploadResponse that includes the unsigned download URL. bucket.upload() includes the UploadResponse, and Get Download URL from file uploaded with Cloud Functions for Firebase has several answers explaining how to get the unsigned download URL from the UploadResponse. How do I change file.createWriteStream() in my code to bucket.upload()? Here's my code:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage({ projectId: 'my-app' });
const bucket = storage.bucket('my-app.appspot.com');
var file = bucket.file('Audio/' + longLanguage + '/' + pronunciation + '/' + wordFileType);
const config = {
action: 'read',
expires: '03-17-2025',
content_type: 'audio/mp3'
};
function oedPromise() {
return new Promise(function(resolve, reject) {
http.get(oedAudioURL, function(response) {
response.pipe(file.createWriteStream(options))
.on('error', function(error) {
console.error(error);
reject(error);
})
.on('finish', function() {
file.getSignedUrl(config, function(err, url) {
if (err) {
console.error(err);
return;
} else {
resolve(url);
}
});
});
});
});
}
I tried this, it didn't work:
function oedPromise() {
return new Promise(function(resolve, reject) {
http.get(oedAudioURL, function(response) {
bucket.upload(response, options)
.then(function(uploadResponse) {
console.log('Then do something with UploadResponse.');
})
.catch(error => console.error(error));
});
});
}
The error message was Path must be a string. In other words, response is a variable but needs to be a string.
I used the Google Cloud text-to-speech API to simulate what you are doing. Getting the text to create the audio file from a text file. Once the file was created, I used the upload method to add it to my bucket and the makePublic method to got its public URL. Also I used the async/await feature offered by node.js instead of function chaining (using then) to avoid the 'No such object: ..." error produced because the makePublic method is executed before the file finishes uploading to the bucket.
// Imports the Google Cloud client library
const {Storage} = require('#google-cloud/storage');
// Creates a client using Application Default Credentials
const storage = new Storage();
// Imports the Google Cloud client library
const textToSpeech = require('#google-cloud/text-to-speech');
// Get the bucket
const myBucket = storage.bucket('my_bucket');
// Import other required libraries
const fs = require('fs');
const util = require('util');
// Create a client
const client = new textToSpeech.TextToSpeechClient();
// Create the variable to save the text to create the audio file
var text = "";
// Function that reads my_text.txt file (which contains the text that will be
// used to create my_audio.mp3) and saves its content in a variable.
function readFile() {
// This line opens the file as a readable stream
var readStream = fs.createReadStream('/home/usr/my_text.txt');
// Read and display the file data on console
readStream.on('data', function (data) {
text = data.toString();
});
// Execute the createAndUploadFile() fuction until the whole file is read
readStream.on('end', function (data) {
createAndUploadFile();
});
}
// Function that uploads the file to the bucket and generates it public URL.
async function createAndUploadFile() {
// Construct the request
const request = {
input: {text: text},
// Select the language and SSML voice gender (optional)
voice: {languageCode: 'en-US', ssmlGender: 'NEUTRAL'},
// select the type of audio encoding
audioConfig: {audioEncoding: 'MP3'},
};
// Performs the text-to-speech request
const [response] = await client.synthesizeSpeech(request);
// Write the binary audio content to a local file
const writeFile = util.promisify(fs.writeFile);
await writeFile('my_audio.mp3', response.audioContent, 'binary');
console.log('Audio content written to file: my_audio.mp3');
// Wait for the myBucket.upload() function to complete before moving on to the
// next line to execute it
let res = await myBucket.upload('/home/usr/my_audio.mp3');
// If there is an error, it is printed
if (res.err) {
console.log('error');
}
// If not, the makePublic() fuction is executed
else {
// Get the file in the bucket
let file = myBucket.file('my_audio.mp3');
file.makePublic();
}
}
readFile();
bucket.upload() is a convenience wrapper around file.createWriteStream() that takes a local filesystem path and upload the file into the bucket as an object:
bucket.upload("path/to/local/file.ext", options)
.then(() => {
// upload has completed
});
To generate a signed URL, you'll need to get a file object from the bucket:
const theFile = bucket.file('file_name');
The file name will either be that of your local file, or if you specified an alternate remote name options.destination for the file on GCS.
Then, use File.getSignedUrl() to get a signed URL:
bucket.upload("path/to/local/file.ext", options)
.then(() => {
const theFile = bucket.file('file.ext');
return theFile.getSignedURL(signedUrlOptions); // getSignedURL returns a Promise
})
.then((signedUrl) => {
// do something with the signedURL
});
See:
Bucket.upload() documentation
File.getSignedUrl() documentation
You can make a specific file in a bucket publicly readable with the method makePublic.
From the docs:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
// 'my-bucket' is your bucket's name
const myBucket = storage.bucket('my-bucket');
// 'my-file' is the path to your file inside your bucket
const file = myBucket.file('my-file');
file.makePublic(function(err, apiResponse) {});
//-
// If the callback is omitted, we'll return a Promise.
//-
file.makePublic().then(function(data) {
const apiResponse = data[0];
});
Now the URI http://storage.googleapis.com/[BUCKET_NAME]/[OBJECT_NAME] is a public link to the file, as explained here.
The point is that you only need this minimal code to make an object public, for instance with a Cloud Function. Then you already know how the public link is and can use it directly in your app.

Save an audiofile from Google Text-to-Speech to Firebase Storage using Google Cloud Storage?

We're trying to get an audiofile from Google Text-to-Speech and save it to Firebase Storage, using a Google Cloud Function. The documentation for Google Text-to-Speech show how to get an audiofile and save it locally:
// Performs the Text-to-Speech request
const [response] = await client.synthesizeSpeech(request);
// Write the binary audio content to a local file
const writeFile = util.promisify(fs.writeFile);
await writeFile('output.mp3', response.audioContent, 'binary');
console.log('Audio content written to file: output.mp3');
This results in an error message Error: EROFS: read-only file system. Google Cloud Storage doesn't allow writing files locally.
Using Firebase Storage bucket.upload() has a few problems:
const destinationPath = 'Audio/Spanish' + filename.ogg;
// Performs the Text-to-Speech request
const [response] = await client.synthesizeSpeech(request);
// response.audioContent is the downloaded file
await bucket.upload(response.audioContent, {
destination: destinationPath
));
The error message is TypeError: Path must be a string. The first parameter of bucket.upload() is The fully qualified path to the file you wish to upload to your bucket. and is expected to be a string so response.audioContent doesn't work.
The documentation for bucket.upload() suggests that destination: destinationPath is where we should put the path to the Firebase Storage location. Is this correct?
How do we take the audiofile from Google Text-to-Speech (response.audioContent) and save it as a string that bucket.upload() can access? Or should we use something else instead of bucket.upload()?
Here's our full cloud function:
exports.Google_T2S = functions.firestore.document('Users/{userID}/Spanish/T2S_Request').onUpdate((change, context) => {
if (change.after.data().word != undefined) {
// Performs the Text-to-Speech request
async function test() {
try {
const word = change.after.data().word; // the text
const longLanguage = 'Spanish';
const audioFormat = '.mp3';
// copied from https://cloud.google.com/text-to-speech/docs/quickstart-client-libraries#client-libraries-usage-nodejs
const fs = require('fs');
const util = require('util');
const textToSpeech = require('#google-cloud/text-to-speech'); // Imports the Google Cloud client library
const client = new textToSpeech.TextToSpeechClient(); // Creates a client
let myWordFile = word.replace(/ /g,"_"); // replace spaces with underscores in the file name
myWordFile = myWordFile.toLowerCase(); // convert the file name to lower case
myWordFile = myWordFile + audioFormat; // append .mp3 to the file name;
// copied from https://cloud.google.com/blog/products/gcp/use-google-cloud-client-libraries-to-store-files-save-entities-and-log-data
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const bucket = storage.bucket('myProject-cd99d.appspot.com');
const destinationPath = 'Audio/Spanish/' + myWordFile;
const request = { // Construct the request
input: {text: word},
// Select the language and SSML Voice Gender (optional)
voice: {languageCode: 'es-ES', ssmlGender: 'FEMALE'},
// Select the type of audio encoding
audioConfig: {audioEncoding: 'MP3'},
};
const [response] = await client.synthesizeSpeech(request);
// Write the binary audio content to a local file
const writeFile = util.promisify(fs.writeFile);
await writeFile('output.mp3', response.audioContent, 'binary');
console.log('Audio content written to file: output.mp3')
// response.audioContent is the downloaded file
await bucket.upload(response.audioContent, {
destination: destinationPath
});
}
catch (error) {
console.error(error);
}
}
test();
} // close if
return 0; // prevents an error message "Function returned undefined, expected Promise or value"
});
file.save() was the answer. util.promisify was unnecessary, and causes an error message about original something. Here's the finished cloud function:
const functions = require('firebase-functions');
// // Create and Deploy Your First Cloud Functions
// // https://firebase.google.com/docs/functions/write-firebase-functions
//
// exports.helloWorld = functions.https.onRequest((request, response) => {
// response.send("Hello from Firebase!");
// });
async function textToSpeechRequest()
{
try
{
const word = change.after.data().word; // the text
const longLanguage = 'Spanish';
const audioFormat = '.mp3';
// copied from https://cloud.google.com/text-to-speech/docs/quickstart-client-libraries#client-libraries-usage-nodejs
const util = require('util');
const textToSpeech = require('#google-cloud/text-to-speech'); // Imports the Google Cloud client library
const client = new textToSpeech.TextToSpeechClient(); // Creates a client
let myWordFile = word.replace(/ /g,"_"); // replace spaces with underscores in the file name
myWordFile = myWordFile.toLowerCase(); // convert the file name to lower case
myWordFile = myWordFile + audioFormat; // append .mp3 to the file name;
// copied from https://cloud.google.com/blog/products/gcp/use-google-cloud-client-libraries-to-store-files-save-entities-and-log-data
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
//const bucket = storage.bucket('myProject-cd99d.appspot.com');
var file = bucket.file('Audio/Spanish/' + myWordFile);
const request = { // Construct the request
input: {text: word},
// Select the language and SSML Voice Gender (optional)
voice: {languageCode: 'es-ES', ssmlGender: 'FEMALE'},
// Select the type of audio encoding
audioConfig: {audioEncoding: 'MP3'},
};
const options = { // construct the file to write
metadata: {
contentType: 'audio/mpeg',
metadata: {
source: 'Google Text-to-Speech'
}
}
};
// copied from https://cloud.google.com/text-to-speech/docs/quickstart-client-libraries#client-libraries-usage-nodejs
const [response] = await client.synthesizeSpeech(request);
// Write the binary audio content to a local file
// response.audioContent is the downloaded file
return await file.save(response.audioContent, options)
.then(() => {
console.log("File written to Firebase Storage.")
return;
})
.catch((error) => {
console.error(error);
});
} // close try
catch (error) {
console.error(error);
} // close catch
} // close async function declaration
exports.Google_T2S = functions.firestore.document('Users/{userID}/Spanish/T2S_Request').onUpdate((change, context) => {
if (change.after.data().word !== undefined)
{
textToSpeechRequest();
} // close if
}); // close Google_T2S
We're getting an error TypeError: [ERR_INVALID_ARG_TYPE]: The "original" argument must be of type function at Object.promisify. This error doesn't appear to effect the cloud function.
To reiterate the stuff that didn't work, fs.createWriteStream didn't work because Google Cloud Functions can't handle Node file system commands. Instead, Google Cloud Functions have their own methods that wrap the Node file system commands. bucket.upload() will upload a local file to a bucket, but the path to the local file has to be a string, not a buffer or a stream coming from an API. file.save() is documented as
Write arbitrary data to a file.
This is a convenience method which wraps File#createWriteStream.
That's what I want! If there's one thing about my data, it's arbitrary. Or maybe contrary by nature. After that we just had to straighten out the contentType (audio/mpeg, not mp3) and the file path.

Resources