Blob downloadToFile() not downloading large files Nodejs - node.js

I have implemented ** azure blob/storage** in my project. And it works fine on smaller files but when I try to download a file more than 40 MB. it does not return from blob.downlaodToFile()
Here is my code.
Api call
exports.downloadSingle = function(req,res){
downloadService.downloadSingleFile(req.params.id).then(function (result) {
res.send(result);
})
}
Downlaod service
var filesLocalLinks = [];
const request = require('request-promise');
const fs = require('fs');
var download = require('download-file')
let promise = new Promise((resolve, reject) => {
filemodel.findOne({_id: id,cancelled: false}).exec(function(error,result){
if(error){
resolve(error);
}else{
blobDownload.downloadFile(result.blobName,result.containerName).then(function(blobResponse){
var filename = path.resolve(__dirname+'/../services/uploads/'+result.fileName);
filename = filename.replace('/myProject','');
filename = process.env.BASE_URL+'/myProject/services'+filename.split('/services')[1];
resolve({file: filename, filename: result.originalname})
}).catch(function(error){
reject(error);
})
}
})
})
let result =await promise;
return {file: result.file,filename: result.filename };
}
Blob service
let promise = new Promise(async (resolve, reject) => {
const account = process.env.BLOB_ACCOUNT;
const accountKey = process.env.BLOB_ACCOUNT_KEY;
const sharedKeyCredential = new StorageSharedKeyCredential(account, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${account}.blob.core.windows.net`,
sharedKeyCredential
);
const containerClient = blobServiceClient.getContainerClient(containerName);
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
const blobClient = containerClient.getBlobClient(blobName);
var blobResponse = await blobClient.downloadToFile(blobName);
resolve(blobResponse)
})
let result =await promise;
return result;
}
In blob service, this line take too much time that the page responds page not working something var blobResponse = await blobClient.downloadToFile(blobName);
Can anyone help me

Basically the flow of code is, first it download the file to our server directory and then the user can download it. I figured it out that the code was working fine, but the issue was of the memory of our server, as we increased the memory, the issue is resolved. But it took my 2 days to figure it out

I came across that out of the blue.. but I noticed that you put blobName as a parameter of downloadToFile call, instead of the target file's name, not to mention that the line const blockBlobClient = containerClient.getBlockBlobClient(blobName); seems to be unnecessary, which suggests that you didn't look into your own code properly...

Related

Puppeteer to convert html to pdf using Nodejs in Durable functions(fan out fan in)

I'm working on a small project to convert a large xml to several formatted pdf documents. The large xml contains multiple similar format xmls. So I'm using a single html template for printing all the documents. After producing all the pdf documents I also need to produce a metadata file with some basic info on each document that was printed.
I thought using the fan out fan in scenario of durable functions is a perfect for my use case. I'm working with Nodejs. I setup all my code and it seems to be working fine locally. The Orchestration function looks like the below.
const df = require("durable-functions");
module.exports = df.orchestrator(function* (context) {
var xmldata = yield context.df.callActivity("DurablegetblobJS1","");
var tasks = [];
for (file of xmldata) {
tasks.push(context.df.callActivity("Durableactivityjs2", file));
}
const outputs = yield context.df.Task.all(tasks);
var finalout = "";
for (out of outputs){
console.log('I am done1 :' + out );
finalout = finalout + out;
}
return finalout;
});
DurablegetblobJS1 : Fetches the entire xmls and splits it into multiple smaller xmls(1 per document).
Durableactivityjs2 : Fetches the html template, extracts the different values from the individual xmls and applies them to the html and finally prints out the pdf into an azure storage. It returns the name of the pdf document that was printed for creation of the metadata file. The code for this is below.
var fs = require('fs');
var xml2js = require('xml2js');
var html_to_pdf = require('html-pdf-node');
var parser = new xml2js.Parser();
module.exports = async function (context) {
//console.log("Hello from activity :")
var xmldict = {}
var xmltext = context.bindings.name;
//Extract the nodes and attributes
metadata(xmltext,xmldict);
report(xmltext,xmldict);
context.log(xmldict)
const { BlobServiceClient } = require("#azure/storage-blob");
// Load the .env file if it exists
require("dotenv").config();
const AZURE_STORAGE_CONNECTION_STRING = process.env.STORAGE_CONNECTION_STRING || "";
const blobServiceClient = BlobServiceClient.fromConnectionString(
AZURE_STORAGE_CONNECTION_STRING
);
var containerClient = blobServiceClient.getContainerClient('test');
var blobname = 'comb_template.html';
var blockBlobClient = containerClient.getBlockBlobClient(blobname);
var downloadBlockBlobResponse = await blockBlobClient.download(0);
var html_template = await streamToText(downloadBlockBlobResponse.readableStreamBody);
let options = { format: 'A4'};
let file = { content: html_template};
const x = await writepdf1(file, options,blobServiceClient,xmldict);
console.log("Written Blob PDF");
return x;
};
async function writepdf1(file, options,blobServiceClient,xmldict){
const pdfBuffer = await html_to_pdf.generatePdf(file, options);
const containerClient = blobServiceClient.getContainerClient('test2');
const targetblob = xmldict['OU'] + '/' + xmldict['ReportName'] + '/' + xmldict['OU'] + '_' + xmldict['ReportName'] + '_' + xmldict['DocumentID'] + '_' + '.pdf';
console.log('Blob name :' + targetblob);
const blockBlobClient_t = containerClient.getBlockBlobClient(targetblob);
const uploadBlobResponse = await blockBlobClient_t.upload(pdfBuffer, pdfBuffer.length);
return targetblob;
}
async function streamToText(readable) {
readable.setEncoding('utf8');
let data = '';
for await (const chunk of readable) {
data += chunk;
}
return data;
}
function metadata(xmltext,xmldict){
parser.parseString(xmltext, function (err, result) {
var test1 = result['HPDPSMsg']['DocumentRequest'][0]['MetaData'][0];
Object.entries(test1).forEach(([key, value]) => {
xmldict[key] = value[0];
});
});
}
function report(xmltext,xmldict){
parser.parseString(xmltext, function (err, result) {
var test2 = result['HPDPSMsg']['DocumentRequest'][0]['Report'][0]['$'];
Object.entries(test2).forEach(([key, value]) => {
xmldict[key] = value;
});
});
}
However, when I deploy the entire project into a azure premium function(EP1 - Windows), I see some errors in app insights when I try and execute my function and the pdfs are never generated.
Activity function 'Durableactivityjs2' failed: Could not find browser
revision 818858. Run "PUPPETEER_PRODUCT=firefox npm install" or
"PUPPETEER_PRODUCT=firefox yarn install" to download a supported
Firefox browser binary
I'm a bit clueless how I'm supposed to resolve this. Any help or suggestions would be appreciated.

Downloading many images with node.js + axios stops downloading suddenly after a while with no errors

its that time again when I'm clueless & come humbly to ask for help!
I am trying to download 4500 images at once, average 1mb size, all the images get created & download starts, after about 2gb downloaded (so half) some images are complete, some partial, some empty, task manager confirms the download stops suddenly.
What could possibly be the issue? No matter how much I wait, nothing happens, at least if I got an error I would try something else...
Please advice if possible, thank you!
//get all json files from a folder
const fs = require("fs");
const path = require("path");
const axios = require("axios");
let urlsArray = [];
const collection = "rebels";
const folder = collection + "_json";
const getFiles = (folder) => {
const directoryPath = path.join(__dirname, folder);
return fs.readdirSync(directoryPath);
};
const files = getFiles(folder);
//inside the folder there are json files with metadata
//for each json file parse it and get the image url
files.forEach((file) => {
const filePath = path.join(__dirname, folder, file);
const fileContent = fs.readFileSync(filePath, "utf8");
const parsedJson = JSON.parse(fileContent);
const imageFromMetadata = parsedJson.image;
const url = imageFromMetadata.replace("ipfs://", "https://ipfs.io/ipfs/");
let nr = file.replace(".json", "");
urlsArray.push({ url, nr });
});
//foreach url create a promise to download with axios
const downloadImage = (url, nr) => {
const writer = fs.createWriteStream(
process.cwd() + `/${collection}_images2/${nr}.png`
);
return axios({
url,
method: "GET",
responseType: "stream",
}).then((response) => {
return new Promise((resolve, reject) => {
response.data.pipe(writer);
writer.on("finish", resolve);
writer.on("error", reject);
});
});
};
const promiseAll = async () => {
const promises = urlsArray.map((data) => {
console.log(`trying to download image nr ${data.nr} from ${data.url}`);
return downloadImage(data.url, data.nr);
});
await Promise.allSettled(promises);
};
promiseAll();
//download all
Since Promise.allSettled() never rejects, nothing in your code will report on any rejected promises that it sees. So, I'd suggest you iterate its results and see if you have any rejected promises there.
You can do that like this:
const results = await Promise.allSettled(promises);
console.log(`results.length = ${results.length}`);
for (const r of results) {
if (r.status === "rejected") {
console.log(r.reason);
}
}
console.log("all done");
This will verify that you got through the end of the Promise.allSettled(promises) and will verify that you got non-zero results and will log any rejected promises you got.

Azure Storage Download to Blob prematurely ends Function App call

I am using Nodejs to download a file to a buffer to use for processing in my code. The relevant code is
let bbc = containerClient.getBlockBlobClient(
userId + "/" + documentUuids[i] + ".pdf"
);
let blob;
try {
console.log("downloading blob")
blob = await bbc.downloadToBuffer();
console.log("downloaded blob ")
} catch (e) {
console.log(userId + "/" + documentUuids[i] + ".pdf")
console.log(e);
}
However, instead of waiting for the download and then proceeding with the rest of the code, the line blob = await bbc.downloadToBuffer(); prematurely ends the function app and returns a 200 with no body. In the console I then see the message
Warning: Unexpected call to 'log' on the context object after function execution has completed. Please check for asynchronous calls that are not awaited or calls to 'done' made before function execution completes. Function name: BasketsCreateUpdate. Invocation Id: 59f57785-6390-4b93-a69e-8244dc688d37. Learn more: https://go.microsoft.com/fwlink/?linkid=2097909
and eventually in my logs, I see the required output, but the function has already prematurely returned an empty body. I have no idea why this is happening, and I would appreciate any help.
There is nothing wrong with your blob downloading code, I assume there shoule be something wrong that you handle the result inside your js function. I writed a simple demo that get the content of a .txt for you whcih could meet your requirement:
module.exports = async function (context, req) {
const { BlobServiceClient, StorageSharedKeyCredential } = require("#azure/storage-blob");
const account = ''
const accountKey = ''
const container = ''
const blobName = ''
async function test(context){
const sharedKeyCredential = new StorageSharedKeyCredential(account, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${account}.blob.core.windows.net`,
sharedKeyCredential
);
const bbc = blobServiceClient.getContainerClient(container).getBlockBlobClient(blobName);
context.log('=================> download start');
let blob = await bbc.downloadToBuffer();
context.log('=================> download complete');
return blob.toString('utf-8');
}
var result = await test(context);
context.res = {
body: result
};
}
Result:

SharedKeyCredential is not a constructor - Azure Blob Storage + Nodejs

I'm trying to delete an image in my aucitonImages container, but when I execute the function from postman, I get SharedKeyCredential is not a constructor I've been following the documentation and I think I have everything setup, but I don't see what's different in my code from the docs. I appreciate any help!
app.delete("/api/removeauctionimages", upload, async (req, res, next) => {
const { ContainerURL, ServiceURL, StorageURL, SharedKeyCredential } = require("#azure/storage-blob");
const credentials = new SharedKeyCredential(process.env.AZURE_STORAGE_ACCOUNT, process.env.AZURE_STORAGE_ACCESS_KEY);
const pipeline = StorageURL.newPipeline(credentials);
const serviceURL = new ServiceURL(`https://${STORAGE_ACCOUNT_NAME}.blob.core.windows.net`, pipeline);
const containerName = "auctionImages";
const blobName = "myimage.png";
const containerURL = ContainerURL.fromServiceURL(serviceURL, containerName);
const blockBlobURL = BlockBlobURL.fromContainerURL(containerURL, blobName);
await blockBlobURL.delete(aborter)
console.log(`Block blob "${blobName}" is deleted`);
});
Based on the SDK Version 12.1.0 documentation here, looks like Microsoft changed SharedKeyCredential to StorageSharedKeyCredential.
Can you try with that?
Also, please see the samples for this version of SDK here: https://github.com/Azure/azure-sdk-for-js/tree/master/sdk/storage/storage-blob/samples/javascript.
Here's the code I wrote using v12.1.0 of Node SDK:
const { StorageSharedKeyCredential, BlobServiceClient } = require("#azure/storage-blob");
const sharedKeyCredential = new StorageSharedKeyCredential(process.env.AZURE_STORAGE_ACCOUNT, process.env.AZURE_STORAGE_ACCESS_KEY);
const blobServiceClient = new BlobServiceClient(
`https://${process.env.AZURE_STORAGE_ACCOUNT}.blob.core.windows.net`,
sharedKeyCredential
);
const containerName = `temp`;
const blobName = 'test.png';
const containerClient = blobServiceClient.getContainerClient(containerName);
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
await blockBlobClient.delete();

How to Delete a blob from Azure blob v12 SDK for Node.js

How can I delete Azure Blob through Node.js and I am using Azure library v12 SDK for Node.js (https://learn.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-nodejs)
I could not find delete blob method, I want to delete blob by name.
Just as #Georage said in the comment, you can use the delete method to delete a blob.
Here is my demo:
const { BlobServiceClient,ContainerClient, StorageSharedKeyCredential } = require("#azure/storage-blob");
// Load the .env file if it exists
require("dotenv").config();
async function streamToString(readableStream) {
return new Promise((resolve, reject) => {
const chunks = [];
readableStream.on("data", (data) => {
chunks.push(data.toString());
});
readableStream.on("end", () => {
resolve(chunks.join(""));
});
readableStream.on("error", reject);
});
}
async function main() {
const AZURE_STORAGE_CONNECTION_STRING = process.env.AZURE_STORAGE_CONNECTION_STRING;
const blobServiceClient = await BlobServiceClient.fromConnectionString(AZURE_STORAGE_CONNECTION_STRING);
const containerClient = await blobServiceClient.getContainerClient("test");
const blockBlobClient = containerClient.getBlockBlobClient("test.txt")
const downloadBlockBlobResponse = await blockBlobClient.download(0);
console.log(await streamToString(downloadBlockBlobResponse.readableStreamBody));
const blobDeleteResponse = blockBlobClient.delete();
console.log((await blobDeleteResponse).clientRequestId);
}
main().catch((err) => {
console.error("Error running sample:", err.message);
});
After running this sample, the test.txt file was removed from the test container.
While Jack's answer works, it is more complicated than it needs to be. Instead of creating the blockBlobClient and then deleting it, a simpler way would be to use:
containerClient.deleteBlob('blob-name')

Resources