List azure file shares snapshots with node.js - node.js

Is there any way to list in node.js the list of snapshots that a file share has?
Example code:
const { ShareServiceClient, StorageSharedKeyCredential } = require("#azure/storage-file-share");
const credential = new StorageSharedKeyCredential(AZURE_STORAGE_ACCOUNT,AZURE_STORAGE_ACCESS_KEY);
const shareServiceClient = new ShareServiceClient(AZURE_STORAGE_CONNECTION_STRING,credential);
var shareName = "xxxxx";
var shareClient = shareServiceClient.getShareClient(shareName);
// Create a snapshot:
await shareClient.createSnapshot();
How to list the snapshots that this shareName has?

As such there's no special method to list snapshots for a file share. You will need to call listShares method of ShareServiceClient
(#azure/storage-file-share version 12.5.0) with includeSnapshots parameter as true and prefix as the share name.
Here's the sample code to do so (untested code):
const shareName = 'share-name';
const listingOptions = {
prefix: shareName,
includeSnapshots: true
};
shareServiceClient.listShares(listingOptions).byPage().next()
.then((result) => {
const shareItems = result.value.shareItems;
//Filter results where name of the share is same as share name and is a snapshot
const shareSnapshots = shareItems.filter(s => s.name === shareName && s.snapshot && s.snapshot !== '');
console.log(shareSnapshots);
})
.catch((error) => {
console.log(error);
})

Related

Puppeteer to convert html to pdf using Nodejs in Durable functions(fan out fan in)

I'm working on a small project to convert a large xml to several formatted pdf documents. The large xml contains multiple similar format xmls. So I'm using a single html template for printing all the documents. After producing all the pdf documents I also need to produce a metadata file with some basic info on each document that was printed.
I thought using the fan out fan in scenario of durable functions is a perfect for my use case. I'm working with Nodejs. I setup all my code and it seems to be working fine locally. The Orchestration function looks like the below.
const df = require("durable-functions");
module.exports = df.orchestrator(function* (context) {
var xmldata = yield context.df.callActivity("DurablegetblobJS1","");
var tasks = [];
for (file of xmldata) {
tasks.push(context.df.callActivity("Durableactivityjs2", file));
}
const outputs = yield context.df.Task.all(tasks);
var finalout = "";
for (out of outputs){
console.log('I am done1 :' + out );
finalout = finalout + out;
}
return finalout;
});
DurablegetblobJS1 : Fetches the entire xmls and splits it into multiple smaller xmls(1 per document).
Durableactivityjs2 : Fetches the html template, extracts the different values from the individual xmls and applies them to the html and finally prints out the pdf into an azure storage. It returns the name of the pdf document that was printed for creation of the metadata file. The code for this is below.
var fs = require('fs');
var xml2js = require('xml2js');
var html_to_pdf = require('html-pdf-node');
var parser = new xml2js.Parser();
module.exports = async function (context) {
//console.log("Hello from activity :")
var xmldict = {}
var xmltext = context.bindings.name;
//Extract the nodes and attributes
metadata(xmltext,xmldict);
report(xmltext,xmldict);
context.log(xmldict)
const { BlobServiceClient } = require("#azure/storage-blob");
// Load the .env file if it exists
require("dotenv").config();
const AZURE_STORAGE_CONNECTION_STRING = process.env.STORAGE_CONNECTION_STRING || "";
const blobServiceClient = BlobServiceClient.fromConnectionString(
AZURE_STORAGE_CONNECTION_STRING
);
var containerClient = blobServiceClient.getContainerClient('test');
var blobname = 'comb_template.html';
var blockBlobClient = containerClient.getBlockBlobClient(blobname);
var downloadBlockBlobResponse = await blockBlobClient.download(0);
var html_template = await streamToText(downloadBlockBlobResponse.readableStreamBody);
let options = { format: 'A4'};
let file = { content: html_template};
const x = await writepdf1(file, options,blobServiceClient,xmldict);
console.log("Written Blob PDF");
return x;
};
async function writepdf1(file, options,blobServiceClient,xmldict){
const pdfBuffer = await html_to_pdf.generatePdf(file, options);
const containerClient = blobServiceClient.getContainerClient('test2');
const targetblob = xmldict['OU'] + '/' + xmldict['ReportName'] + '/' + xmldict['OU'] + '_' + xmldict['ReportName'] + '_' + xmldict['DocumentID'] + '_' + '.pdf';
console.log('Blob name :' + targetblob);
const blockBlobClient_t = containerClient.getBlockBlobClient(targetblob);
const uploadBlobResponse = await blockBlobClient_t.upload(pdfBuffer, pdfBuffer.length);
return targetblob;
}
async function streamToText(readable) {
readable.setEncoding('utf8');
let data = '';
for await (const chunk of readable) {
data += chunk;
}
return data;
}
function metadata(xmltext,xmldict){
parser.parseString(xmltext, function (err, result) {
var test1 = result['HPDPSMsg']['DocumentRequest'][0]['MetaData'][0];
Object.entries(test1).forEach(([key, value]) => {
xmldict[key] = value[0];
});
});
}
function report(xmltext,xmldict){
parser.parseString(xmltext, function (err, result) {
var test2 = result['HPDPSMsg']['DocumentRequest'][0]['Report'][0]['$'];
Object.entries(test2).forEach(([key, value]) => {
xmldict[key] = value;
});
});
}
However, when I deploy the entire project into a azure premium function(EP1 - Windows), I see some errors in app insights when I try and execute my function and the pdfs are never generated.
Activity function 'Durableactivityjs2' failed: Could not find browser
revision 818858. Run "PUPPETEER_PRODUCT=firefox npm install" or
"PUPPETEER_PRODUCT=firefox yarn install" to download a supported
Firefox browser binary
I'm a bit clueless how I'm supposed to resolve this. Any help or suggestions would be appreciated.

Reading multiple files from google bucket & loading into BQ using nodejs

When i try to read files from google bucket and load data in bigquery table, google bucket throws me timeout error. is there a way to read files synchronously and load to bigquery table.
This one works for when files are less, and tried using then as well which also gives same error.
const { BigQuery } = require('#google-cloud/bigquery');
const { Storage } = require('#google-cloud/storage');
var fs = require("fs");
const bucketName = 'bucketname';
const gcpProject = "projectname";
const datasetprojectname = "bqprojectname";
const datasetId = "dsname";
const tableId = "tablename";
exports.helloworld = async (req, res) => {
const bigquery = new BigQuery({ projectId: datasetprojectname });
const storage = new Storage(gcpProject);
const loaddatabq = new Storage(gcpProject);
const bucket = storage.bucket(bucketName);
const fileoptions = {
prefix: "singlefile"
};
var filecount = 0;
var errcount = 0;
var filemoveerrcount = 0;
const [getfilename] = await bucket.getFiles(fileoptions);
var filenamespring = "";
var getjson = null;
getfilename.forEach(async files => {
try {
filecount++;
var filename = files.name;
if (filename != "singlefile/") {
var contents = await files.download(files.name);
await bigquery.dataset(datasetId).table(tableId).insert(JSON.parse(contents).body);
}
}
catch (err) {
}
});
};
If your file are in JSONL (1 JSON document per line, JSON Line), you can use the load job to achieve this.
You can filter on the file that you want by using wildcard character. It will be more efficient than a for loop.
This solution is also cheaper. You are limited to 1500 loads per table and per day, but the load is free. In your current code you use the streaming API, and you pay for it ($0.05 per Gb)

download subdirectory/directory from blob storage using nodejs and blob service?

I have implemented download files using getBlobProperties() and createReadStream(containerName, fullPath, options) methods of blob service. now, I am trying to download directory/subdirectory inside my containers using the same methods but it is not working, and throwing error, specified blob does not exist. though I know the reason for this error how Can I overcome this issue as I don't want to loop the blobs and download each file separately?. I want a complete folder to be downloaded.
here is the API:
exports.getBlobChunk = function (req, res) {
var userrole = utils.sanitizeStr(req.body.userrole);
var srcFilePath = utils.sanitizeStr(req.body.srcfilePath);
var fileName = utils.sanitizeStr(req.body.srcfileName);
var fullPath = srcFilePath + "/" + fileName;
var startRange = req.headers['startrange'];
var endRange = req.headers['endrange'];
genericHandler.getUserSubscMapping().then(function (results) {
if (results != undefined && results != null) {
var item = results[0].mapping.find(item => item.name == userrole);
var sasurl = item.sasurl;
if (sasurl == null) {
res.status(500).send("Subscription mapping not configured");
return;
}
var host = sasurl.substring(0, sasurl.lastIndexOf("/"));
var containerName = sasurl.substring(sasurl.lastIndexOf("/"), sasurl.indexOf("?")).split("/")[1];
var saskey = sasurl.substring(sasurl.indexOf("?"), sasurl.length);
var download = item.download; // download usage
var blobService = storage.createBlobServiceWithSas(host, saskey);
blobService.getBlobProperties(containerName, fullPath, function (err, properties, status) {
if (err) {
res.send(502, "Error fetching file: %s", err.message);
} else if (!status.isSuccessful) {
res.send(404, "The file %s does not exist", fullPath);
} else {
var contentLength = properties.contentLength / 1024; // bytes to KB
res.header('Content-Type', "application/zip");
res.attachment(fileName);
var options = {
rangeStart: startRange,
rangeEnd: endRange
};
if (startRange == 0) { // update download size on first chunk
exports.updateStorageDownload(userrole, contentLength, download);
}
blobService.createReadStream(containerName, fullPath, options).pipe(res);
}
});
}
Azure Blob storage does not have a concept of folders and everything inside the container is considered a blob including the folders. So you couldn't download directory/subdirectory with folder name.
For example:
Container structure
hello.txt
...
test
test.txt
test1
data.json
You need to download blob file from directory one by one.
const {
BlobServiceClient,
StorageSharedKeyCredential,
} = require("#azure/storage-blob");
// Enter your storage account name and shared key
const account = "";
const accountKey ="";
const containerName = "";
const filePath = "D:/downloads/"
// Use StorageSharedKeyCredential with storage account and account key
// StorageSharedKeyCredential is only available in Node.js runtime, not in browsers
const sharedKeyCredential = new StorageSharedKeyCredential(account, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${account}.blob.core.windows.net`,
sharedKeyCredential,
);
async function listBlobs() {
const containerClient = await blobServiceClient.getContainerClient(containerName);
console.log("list blobs with method listBlobsFlat");
let iter = containerClient.listBlobsFlat({ prefix: "test/" });
for await (const item of iter) {
console.log(`\tBlobItem: name - ${item.name}`);
downloadBlobToLocal(containerClient, item.name, filePath);
}
console.log("list blobs with method listBlobsByHierarchy");
let iter1 = containerClient.listBlobsByHierarchy("/", { prefix: "test/" });
for await (const item of iter1) {
if (item.kind === "prefix") {
console.log(`\tBlobPrefix: ${item.name}`);
await listblob(containerClient, item.name);
} else {
console.log(`\tBlobItem: name - ${item.name}`);
downloadBlobToLocal(containerClient, item.name, filePath);
}
}
}
async function listblob(containerClient, prefix) {
let iter1 = containerClient.listBlobsByHierarchy("/", { prefix: prefix });
for await (const item of iter1) {
if (item.kind === "prefix") {
console.log(`\tBlobPrefix: ${item.name}`);
} else {
console.log(`\tBlobItem: name - ${item.name}`);
downloadBlobToLocal(containerClient, item.name, filePath);
}
}
}
async function downloadBlobToLocal(containerClient, blobName, filePath) {
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
const downloadBlockBlobResponse = await blockBlobClient.downloadToFile(filePath + blobName);
}
listBlobs().catch((err) => {
console.error("Error running sample:", err.message);
});
I've written my own implementation based on this great article:
public async Task<List<BlobDto>> ListWithPrefixAsync(string folder)
{
// Get a reference to a container named in appsettings.json
BlobContainerClient container = new BlobContainerClient(_storageConnectionString, _storageContainerName);
// Create a new list object for
List<BlobDto> files = new List<BlobDto>();
await foreach (BlobItem file in container.GetBlobsAsync(prefix: folder))
{
// Add each file retrieved from the storage container to the files list by creating a BlobDto object
string uri = container.Uri.ToString();
var name = file.Name;
var fullUri = $"{uri}/{name}";
files.Add(new BlobDto
{
Uri = fullUri,
Name = name,
ContentType = file.Properties.ContentType
});
}
// Return all files to the requesting method
return files;
}
The implementation to get the list of blob files in a folder is then very simply like that:
// Get all files at the Azure Storage Location and return them
List<BlobDto>? files = await _storage.ListWithPrefixAsync(prefix);
Hope this helps.
Happy coding!!

Retrieve the attributes from a file using nodejs?

Hi I'm looking for an nodejs code which would probably return the attributes of each file in a folder. I developed the code to retrieve all the file name in a folder and another code to list data's of filename provide by us. But actually I need to return all the files names in a folder with its column name. I'm new to nodejs so someone help me please.
LISTING DATA CODE:
const AWS = require('aws-sdk');
const neatCsv = require('neat-csv');
var s3 = new AWS.S3({});
exports.handler = (event,context,callback)=>{
const params = {
Bucket:'ml-framework-api',
Key: wavicle.csv
};
s3.getObject(params,async(err, result) => {
if (err){
return console.error(err);
}
neatCsv(result.Body).then((parsedData) => {
callback(null,parsedData);
})
})
}
LISTING FILE IN S3:
const AWS = require('aws-sdk')
const s3 = new AWS.S3({
accessKeyId:'-------------',
secretAccessKey:'-------------------',
region:'ap-south-1'
})
const params = {
Bucket:'wavicle'
}
s3.listObjects(params,(err,data)=>{
if(err){
return console.log(err)
}
console.log(data)
})
It's best to start with node's file system api documentation.
Here is a simple example of how to get information about files of a folder (there are many ways, this is quickly from the example in the documentation above):
const fsp = require("fs/promises");
async function dirFilesInfo(path) {
const dir = await fsp.opendir(path);
for await (const dirEntry of dir) {
const fileInfo = await fsp.stat("./" + dirEntry.name);
console.log(dirEntry.name, fileInfo);
}
}
dirFilesInfo("./").catch(console.error);

Url from getSignedUrl will expire in few weeks

I've got storage trigger function which resize and replace uploaded image into storage and then update URL in my database
}).then(() => {
console.log('Original file deleted', filePath)
const logo = storageRef.file(JPEGFilePath)
return logo.getSignedUrl({ action: 'read', expires: date })
// const logo = storageRef.child(JPEGFilePath)
// return logo.getDownloadURL()
// return storageUrl.getDownloadURL(JPEGFilePath)
}).then((url) => {
const newRef = db.collection("user").doc(uid)
return newRef.set({
profile: { profileImg: url[0] }
}, {
merge: true
})
})
here is how I set expiry date
const d = new Date()
const date = new Date(d.setFullYear(d.getFullYear() + 200)).toString()
However the image expire in few weeks (roughly about 2 weeks). Does anyone know how to fix that? I have even played with getDownloadURL as you can see from commented code but that doesn't seems to work in trigger
Per the following links:
https://stackoverflow.com/a/42959262/370321
https://cloud.google.com/nodejs/docs/reference/storage/2.5.x/File#getSignedPolicy
Not sure which version of #google/cloud-storage you're using, but assuming it's 2.5.x, it looks like any value you pass in the date field is passed into new Date(), so it looks like your code should work as I tried it in my dev tools. The only thing I can guess is it doesn't like that you want a file to live for 200 years.
Per the source code:
https://github.com/googleapis/nodejs-storage/blob/master/src/file.ts#L2358
Have you tried a shorter amount of time -- or formatting it in the dateform at mm-dd-yyyy ?
Ok so I have tried something but I have no idea if this will work or not so I'll come back in 2 weeks to mark my question as answered if it will work. For those with the same problem I'll try to recapitulate what I've done.
1/ Download the service account key from console. Here is the link
https://console.firebase.google.com/project/_/settings/serviceaccounts/adminsdk
2/ Save the downloaded JSON file in your function directory
3/ Include the key in your function storage. But be careful how you set the path to the file. Here is my question about it
https://stackoverflow.com/a/56407592/11486115
UPDATE
I just found mistake in my function. My URL was provided by cloud function by mistake (commented code)
Here is complete function
const {
db
} = require('../../admin')
const projectId = "YOUR-PROJECT-ID"
const { Storage } = require('#google-cloud/storage');
const storage = new Storage({ projectId: projectId ,keyFilename: 'PATH-TO-SERVICE-ACCOUNT'})
const os = require('os');
const fs = require('fs');
const path = require('path');
const spawn = require('child-process-promise').spawn
const JPEG_EXTENSION = '.jpg'
exports.handler = ((object) => {
const bucket = object.bucket;
const contentType = object.contentType;
const filePath = object.name
const JPEGFilePath = path.normalize(path.format({ dir: path.dirname(filePath), name: 'profileImg', ext: JPEG_EXTENSION }))
const destBucket = storage.bucket(bucket)
const tempFilePath = path.join(os.tmpdir(), path.basename(filePath))
const tempLocalJPEGFile = path.join(os.tmpdir(), path.basename(JPEGFilePath))
const metadata = {
contentType: contentType
}
const uid = filePath.split("/").slice(1, 2).join("")
const d = new Date()
const date = new Date(d.setFullYear(d.getFullYear() + 200)).toString()
if (!object.contentType.startsWith('image/')) {
return destBucket.file(filePath).delete().then(() => {
console.log('File is not an image ', filePath, ' DELETED')
return null
});
}
if (object.metadata.modified) {
console.log('Image processed')
return null
}
return destBucket.file(filePath).download({
destination: tempFilePath
})
.then(() => {
console.log('The file has been downloaded to', tempFilePath)
return spawn('convert', [tempFilePath, '-resize', '100x100', tempLocalJPEGFile])
}).then(() => {
console.log('JPEG image created at', tempLocalJPEGFile)
metadata.modified = true
return destBucket.upload(tempLocalJPEGFile,
{
destination: JPEGFilePath,
metadata: { metadata: metadata }
})
}).then(() => {
console.log('JPEG image uploaded to Storage at', JPEGFilePath)
return destBucket.file(filePath).delete()
}).then(() => {
console.log('Original file deleted', filePath)
//const logo = storageRef.file(JPEGFilePath)
const logo = destBucket.file(JPEGFilePath)
return logo.getSignedUrl({ action: 'read', expires: date })
}).then((url) => {
const newRef = db.collection("user").doc(uid)
return newRef.set({
profile: { profileImg: url[0] }
}, {
merge: true
})
}).then(() => {
fs.unlinkSync(tempFilePath);
fs.unlinkSync(tempLocalJPEGFile)
console.log(uid, 'user database updated ')
return null
})
})
I'm pretty confident that this will work now.

Resources