How to copy folders from blob storage? - node.js

The functionality I am trying to implement is copy paste files/folders from one source to another(same container). Can I use same method to copy folders like I do for copy files?
startcopyblob() throws error while copy pasting folders.
input:
newFileName:'new folder_copy1'
newFilePath:'603487d1e966a91fd86b6c11/spe9_rs_2021-03-17_17-14-38/output'
oldFilePath:'603487d1e966a91fd86b6c11/spe9_rs_2021-02-23_11-14-41/output/new folder'
error:
code:'CannotVerifyCopySource'
message:'The specified blob does not exist.
name:'StorageError'
requestId:'4a8a76bf-701e-0078-17c8-1b4439000000'
stack:'StorageError: The specified blob does not exist.
statusCode:404
Uncaught Error [ERR_HTTP_HEADERS_SENT]: Cannot set headers after they are sent to the client
code:
var host = sasurl.substring(0, sasurl.lastIndexOf("/"));
var containerName = sasurl.substring(sasurl.lastIndexOf("/"), sasurl.indexOf("?")).split("/")[1];
var saskey = sasurl.substring(sasurl.indexOf("?"), sasurl.length);
var blobService = storage.createBlobServiceWithSas(host, saskey);
pasteFiles.forEach(elem => {
var storageuri = host + "/" + containerName + "/" + elem["oldFilePath"] + saskey;
var blobName = elem["newFilePath"] + "/" + elem["newFileName"];
blobService.startCopyBlob(storageuri, containerName, blobName, err => {
if (err) {
console.log(err)
return res.status(500).json({
message: 'error',
status: err
})
.....
..

Azure blob storage has a 2 level hierarchy - blob container and blob. It is based on a flat storage scheme, not a hierarchical scheme. It does not have a directory structure. We just can specify a character or string delimiter within a blob name to create a virtual hierarchy. So if we want to copy one folder with azure node blob sdk, we need to copy blobs in the folder one by one.
For example
sdk
npm i #azure/storage-blob
code
const {
BlobServiceClient,
StorageSharedKeyCredential,
generateBlobSASQueryParameters,
ContainerSASPermissions,
} = require("#azure/storage-blob");
const accountName = "andyprivate";
const accountKey =
"";
const creds = new StorageSharedKeyCredential(accountName, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${accountName}.blob.core.windows.net`,
creds
);
async function test() {
try {
const sourceContainerClient = blobServiceClient.getContainerClient("input");
const desContainerClient = blobServiceClient.getContainerClient("output");
const blobSAS = generateBlobSASQueryParameters(
{
expiresOn: new Date(new Date().valueOf() + 86400000),
containerName: sourceContainerClient.containerName,
permissions: ContainerSASPermissions.parse("rl"),
},
creds
).toString();
for await (const response of sourceContainerClient
.listBlobsFlat({ prefix: "<your folder name>/" })
.byPage()) {
for (const blob of response.segment.blobItems) {
console.log(`Blob name : ${blob.name}`);
const sourceBlob = sourceContainerClient.getBlobClient(blob.name);
const sourceUrl = sourceBlob.url + "?" + blobSAS;
const res = await (
await desContainerClient
.getBlobClient(blob.name)
.beginCopyFromURL(sourceUrl)
).pollUntilDone();
console.log(res.copyStatus);
}
}
} catch (error) {
console.log(error);
}
}
test();
Besides, if you want to directly copy one folder from one container from another container, we can use azcopy implement it. For more details, please refer to here and here
For example
npm i #azure/storage-blob #azure-tools/azcopy-node #azure-tools/azcopy-<your system win32 linux win64>
code
const {
StorageSharedKeyCredential,
generateAccountSASQueryParameters,
AccountSASPermissions,
AccountSASResourceTypes,
AccountSASServices,
} = require("#azure/storage-blob");
const accountName = "andyprivate";
const accountKey =
"";
const creds = new StorageSharedKeyCredential(accountName, accountKey);
//create account sas token
const accountSas = generateAccountSASQueryParameters(
{
startsOn: new Date(new Date().valueOf() - 8640),
expiresOn: new Date(new Date().valueOf() + 86400000),
resourceTypes: AccountSASResourceTypes.parse("sco").toString(),
permissions: AccountSASPermissions.parse("rwdlacup").toString(),
services: AccountSASServices.parse("b").toString(),
},
creds
).toString();
const { AzCopyClient } = require("#azure-tools/azcopy-node");
let copyClient = new AzCopyClient();
async function copy() {
try {
let jobId = await copyClient.copy(
{
type: "RemoteSas",
resourceUri: "https://<>.blob.core.windows.net/input",
sasToken: accountSas,
path: "/<folder name>",
},
{
type: "RemoteSas",
resourceUri: "https://<>.blob.core.windows.net/outcontainer",
sasToken: accountSas,
path: "",
},
{ recursive: true }
);
let status;
while (!status || status.StatusType !== "EndOfJob") {
let jobInfo = await copyClient.getJobInfo(jobId);
status = jobInfo.latestStatus;
await new Promise((resolve, reject) => setTimeout(resolve, 1000));
}
console.log("OK");
} catch (error) {
console.log(error);
}
}
copy();

Related

File chunk upload to azure storage blob, file seems broken

I'm trying to upload excel file to azure storage blob in chunks, using the stage block and commitblock from BlobBlockClient Class. File upload seems to success but when i try to download and open the file, there it seems to be broken.
I'm using react and node js to do this. Code follows below
In UI
const chunkSize = (1024 * 1024) * 25; // file chunk size
// here slicing the file and sending it to api method
const fileReader = new FileReader();
const from = currentChunkIndexRef.current * chunkSize;
const to = from + chunkSize;
const blob = file.slice(from, to);
fileReader.onload = ((e: any) => uploadChunksToBlob(e, file, obj));
fileReader.readAsDataURL(blob);
// api method
const uploadChunksToBlob = async (event: any, file: File, obj: any) => {
try {
const totalChunks = Math.ceil(file.size / chunkSize);
const uploadChunkURL = `/upload?currentChunk=${currentChunkIndexRef.current}&totalChunks=${totalChunks}&file=${file.name}&type=${file.type}`;
console.log(event.target.result)
const fileUpload = await fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: JSON.stringify(event.target.result),
});
const fileUploadJson = await fileUpload.json();
const isLastChunk = (totalChunks - 1) === currentChunkIndexRef.current;
if(!isLastChunk) {
console.log({ Chunk: currentChunkIndexRef.current });
currentChunkIndexRef.current = currentChunkIndexRef.current + 1;
// eslint-disable-next-line #typescript-eslint/no-use-before-define
uploadFileToAzureBlob(file, obj);
} else {
console.log("File Uploaded")
}
//
} catch (error) {
console.log("uploadFileToAzureBlob Catch Error" + error);
}
}
// In Node
const sharedKeyCredential = new StorageSharedKeyCredential(
config.StorageAccountName,
config.StorageAccountAccessKey
);
const pipeline = newPipeline(sharedKeyCredential);
const blobServiceClient = new BlobServiceClient(
`https://${config.StorageAccountName}.blob.core.windows.net`,
pipeline
);
const containerName = getContainerName(req.headers.key, req.headers.clientcode);
const identifier = uuid.v4();
const blobName = getBlobName(identifier, file);
const containerClient = blobServiceClient.getContainerClient(containerName);
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
try {
let bufferObj = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8"); // Create buffer object, specifying utf8 as encoding
let base64String = bufferObj.toString("base64"); // Encode the Buffer as a base64 string
blockIds = [...blockIds, base64String];
const bufferedData = Buffer.from(req.body);
let resultOfUnitArray = new Uint8Array(bufferedData.length);
for (let j = 0; j < bufferedData.length; j++) {
resultOfUnitArray[j] = bufferedData.toString().charCodeAt(j);
} // Converting string to bytes
const stageBlockResponse = await blockBlobClient.stageBlock(base64String, resultOfUnitArray, resultOfUnitArray.length, {
onProgress: (e) => {
console.log("bytes sent: " + e.loadedBytes);
}
});
if ((Number(totalChunks) - 1) === (Number(currentChunk))) {
const commitblockResponse = await blockBlobClient.commitBlockList(blockIds, {blobHTTPHeaders: req.headers});
res.json({ uuid: identifier, message: 'File uploaded to Azure Blob storage.' });
} else {
res.json({ message: `Current Chunks ${currentChunk} is Successfully Uploaded` });
}
} catch (err) {
console.log({ err })
res.json({ message: err.message });
}
I don't know, what i'm doing wrong here.
Any help would be appreciated
Thank you
The problem is that you convert it into dataURL, that’s where things break.
It appears to me that you're under the wrong impression that you need to first encode a blob into string in order to send it. Well, you don't have to, browser fetch API is capable to handle raw binary payload.
So on the client (browser) side, you don’t need to go through FileReader. Just send the chunk blob directly.
const blob = file.slice(from, to);
// ...
fetch(uploadChunkURL, {
method: "POST",
headers: { "Content-Type": "application/octet-stream" },
body: blob,
});
On the server (node.js) side, you'll receive the blob in raw binary form, so you can simply forward that blob untouched to azure storage. There's no need to decode from string and move bytes onto resultOfUnitArray like you currently do.
const base64String = Buffer.from(`${file}_${Number(currentChunk)}`, "utf8").toString("base64");
const bufferedData = Buffer.from(req.body);
const stageBlockResponse = await blockBlobClient.stageBlock(
base64String,
bufferedData,
bufferedData.length
);

download subdirectory/directory from blob storage using nodejs and blob service?

I have implemented download files using getBlobProperties() and createReadStream(containerName, fullPath, options) methods of blob service. now, I am trying to download directory/subdirectory inside my containers using the same methods but it is not working, and throwing error, specified blob does not exist. though I know the reason for this error how Can I overcome this issue as I don't want to loop the blobs and download each file separately?. I want a complete folder to be downloaded.
here is the API:
exports.getBlobChunk = function (req, res) {
var userrole = utils.sanitizeStr(req.body.userrole);
var srcFilePath = utils.sanitizeStr(req.body.srcfilePath);
var fileName = utils.sanitizeStr(req.body.srcfileName);
var fullPath = srcFilePath + "/" + fileName;
var startRange = req.headers['startrange'];
var endRange = req.headers['endrange'];
genericHandler.getUserSubscMapping().then(function (results) {
if (results != undefined && results != null) {
var item = results[0].mapping.find(item => item.name == userrole);
var sasurl = item.sasurl;
if (sasurl == null) {
res.status(500).send("Subscription mapping not configured");
return;
}
var host = sasurl.substring(0, sasurl.lastIndexOf("/"));
var containerName = sasurl.substring(sasurl.lastIndexOf("/"), sasurl.indexOf("?")).split("/")[1];
var saskey = sasurl.substring(sasurl.indexOf("?"), sasurl.length);
var download = item.download; // download usage
var blobService = storage.createBlobServiceWithSas(host, saskey);
blobService.getBlobProperties(containerName, fullPath, function (err, properties, status) {
if (err) {
res.send(502, "Error fetching file: %s", err.message);
} else if (!status.isSuccessful) {
res.send(404, "The file %s does not exist", fullPath);
} else {
var contentLength = properties.contentLength / 1024; // bytes to KB
res.header('Content-Type', "application/zip");
res.attachment(fileName);
var options = {
rangeStart: startRange,
rangeEnd: endRange
};
if (startRange == 0) { // update download size on first chunk
exports.updateStorageDownload(userrole, contentLength, download);
}
blobService.createReadStream(containerName, fullPath, options).pipe(res);
}
});
}
Azure Blob storage does not have a concept of folders and everything inside the container is considered a blob including the folders. So you couldn't download directory/subdirectory with folder name.
For example:
Container structure
hello.txt
...
test
test.txt
test1
data.json
You need to download blob file from directory one by one.
const {
BlobServiceClient,
StorageSharedKeyCredential,
} = require("#azure/storage-blob");
// Enter your storage account name and shared key
const account = "";
const accountKey ="";
const containerName = "";
const filePath = "D:/downloads/"
// Use StorageSharedKeyCredential with storage account and account key
// StorageSharedKeyCredential is only available in Node.js runtime, not in browsers
const sharedKeyCredential = new StorageSharedKeyCredential(account, accountKey);
const blobServiceClient = new BlobServiceClient(
`https://${account}.blob.core.windows.net`,
sharedKeyCredential,
);
async function listBlobs() {
const containerClient = await blobServiceClient.getContainerClient(containerName);
console.log("list blobs with method listBlobsFlat");
let iter = containerClient.listBlobsFlat({ prefix: "test/" });
for await (const item of iter) {
console.log(`\tBlobItem: name - ${item.name}`);
downloadBlobToLocal(containerClient, item.name, filePath);
}
console.log("list blobs with method listBlobsByHierarchy");
let iter1 = containerClient.listBlobsByHierarchy("/", { prefix: "test/" });
for await (const item of iter1) {
if (item.kind === "prefix") {
console.log(`\tBlobPrefix: ${item.name}`);
await listblob(containerClient, item.name);
} else {
console.log(`\tBlobItem: name - ${item.name}`);
downloadBlobToLocal(containerClient, item.name, filePath);
}
}
}
async function listblob(containerClient, prefix) {
let iter1 = containerClient.listBlobsByHierarchy("/", { prefix: prefix });
for await (const item of iter1) {
if (item.kind === "prefix") {
console.log(`\tBlobPrefix: ${item.name}`);
} else {
console.log(`\tBlobItem: name - ${item.name}`);
downloadBlobToLocal(containerClient, item.name, filePath);
}
}
}
async function downloadBlobToLocal(containerClient, blobName, filePath) {
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
const downloadBlockBlobResponse = await blockBlobClient.downloadToFile(filePath + blobName);
}
listBlobs().catch((err) => {
console.error("Error running sample:", err.message);
});
I've written my own implementation based on this great article:
public async Task<List<BlobDto>> ListWithPrefixAsync(string folder)
{
// Get a reference to a container named in appsettings.json
BlobContainerClient container = new BlobContainerClient(_storageConnectionString, _storageContainerName);
// Create a new list object for
List<BlobDto> files = new List<BlobDto>();
await foreach (BlobItem file in container.GetBlobsAsync(prefix: folder))
{
// Add each file retrieved from the storage container to the files list by creating a BlobDto object
string uri = container.Uri.ToString();
var name = file.Name;
var fullUri = $"{uri}/{name}";
files.Add(new BlobDto
{
Uri = fullUri,
Name = name,
ContentType = file.Properties.ContentType
});
}
// Return all files to the requesting method
return files;
}
The implementation to get the list of blob files in a folder is then very simply like that:
// Get all files at the Azure Storage Location and return them
List<BlobDto>? files = await _storage.ListWithPrefixAsync(prefix);
Hope this helps.
Happy coding!!

Copy Azure File Share to Blob with node.js

Is there an example (in node.js) to copy an Azure File Share to Blob?
Server to Server without downloading the file previously.
Thanks in advance.
My code:
var arrFolders = [];
arrFolders.push("");
do
{
let directoryName = arrFolders.pop();
console.log(`List directories and files under directory ${directoryName}`);
let i = 1;
const directoryClient = shareClient.getDirectoryClient(directoryName);
for await (const entity of directoryClient.listFilesAndDirectories())
{
if (entity.kind === "directory")
{
console.log(`${i++} - directory\t: ${entity.name}`);
arrFolders.push((directoryName=="") ? entity.name : directoryName + "\\" + entity.name);
}
else
{
console.log(`${i++} - file\t: ${entity.name}`);
// Copy Files to Blob....
// ?????
}
}
} while (arrFolders.length > 0);
console.log("End list directories and files");
If you want to copy Azure file to Azure Blob, we can use the rest API Copy Blob.
In the node sdk, we can use the method BlobClient.beginCopyFromURL in the package #azure/storage-blob to implement it.
For example
const { BlobServiceClient } = require("#azure/storage-blob");
const {
ShareClient,
generateAccountSASQueryParameters,
StorageSharedKeyCredential,
AccountSASResourceTypes,
AccountSASPermissions,
AccountSASServices,
} = require("#azure/storage-file-share");
const fileAccountName = "";
const fileAccountKey ="";
const blobConStr ="";
async function copy() {
// create account sas token for file service
var fileCreds = new StorageSharedKeyCredential(
fileAccountName,
fileAccountKey
);
var accountSas = generateAccountSASQueryParameters(
{
startsOn: new Date(new Date().valueOf() - 8640),
expiresOn: new Date(new Date().valueOf() + 86400000),
resourceTypes: AccountSASResourceTypes.parse("sco").toString(),
permissions: AccountSASPermissions.parse("rwdlc").toString(),
services: AccountSASServices.parse("f").toString(),
},
fileCreds
).toString();
//get file share client
var shareClient = new ShareClient(
`https://${fileAccountName}.file.core.windows.net/<shareName>`,
fileCreds
);
//get blob container client
var blobServiceClient = BlobServiceClient.fromConnectionString(blobConStr);
var containerClient = blobServiceClient.getContainerClient("<containerName>");
await containerClient.createIfNotExists();
// list files and copy files to azure blob
var arrFolders = [];
arrFolders.push("input");
do {
let directoryName = arrFolders.pop();
console.log(`List directories and files under directory ${directoryName}`);
let i = 1;
const directoryClient = shareClient.getDirectoryClient(directoryName);
for await (const entity of directoryClient.listFilesAndDirectories()) {
if (entity.kind === "directory") {
console.log(`${i++} - directory\t: ${entity.name}`);
arrFolders.push(
directoryName == "" ? entity.name : directoryName + "\\" + entity.name
);
} else {
console.log(`${i++} - file\t: ${entity.name}`);
var fileClient = directoryClient.getFileClient(entity.name);
var soureUrl = fileClient.url + "?" + accountSas;
try {
var res = await (
await containerClient
.getBlobClient(entity.name)
.beginCopyFromURL(soureUrl)
).pollUntilDone();
console.log(res.copyStatus);
} catch (error) {
throw error;
}
}
}
} while (arrFolders.length > 0);
}

How to access blob metadata using the Azure JavaScript SDK?

How do I read metadata for a blob in azure via the JavaScript SDK?
When I iterate the blobs returned from the specified container I see a metadata property:
But it's undefined, even though there is definitely metadata associated with the blob:
Is there something else I need to do to get the metadata to populate?
import { BlobServiceClient, SharedKeyCredential } from "#azure/storage-blob";
const account = "<redacted>";
const accountKey = "<redacted>";
const sharedKeyCredential = new SharedKeyCredential(account, accountKey);
const blobServiceClient = new BlobServiceClient(`https://${account}.blob.core.windows.net`, sharedKeyCredential);
const containerClient = blobServiceClient.getContainerClient(podcastName);
const blobs = await containerClient.listBlobsFlat({ include: ["metadata"] });
for await (const blob of blobs) {
console.log(blob.name);
//blob.metadata is undefined
}
// package.json relevant dependencies
"dependencies": {
"#azure/storage-blob": "^12.0.0-preview.2
}
// You can try this:
for await (const blob of containerClient.listBlobsFlat()) {
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
const meta = (await blockBlobClient.getProperties()).metadata;
console.log(meta);
// process metadata
}
I am assuming you have already declared blockBlobClient and containerClient.
If you haven't already declared blockBlobClient and containerClient ,then you can refer here
I test it's null, then I use getProperties() to get the metadata and it worked, you could have a try.
const containerName = "test";
const blobName = "test.txt";
let response;
let marker;
do {
response = await containerURL.listBlobFlatSegment(aborter);
marker = response.marker;
for(let blob of response.segment.blobItems) {
const url= BlockBlobURL.fromContainerURL(containerURL,blob.name);
const pro=await url.getProperties(aborter);
console.log(pro.metadata);
}
} while (marker);
You can fetch the properties for a blob with the getBlobMetadata method.
var storage = require('azure-storage');
var blobService = storage.createBlobService();
var containerName = 'your-container-name';
var blobName = 'my-awesome-blob';
blobService.getBlobMetadata(containerName, blobName, function(err, result, response) {
if (err) {
console.error("Couldn't fetch metadata for blob %s", blobName);
console.error(err);
} else if (!response.isSuccessful) {
console.error("Blob %s wasn't found container %s", blobName, containerName);
} else {
console.log("Successfully fetched metadata for blob %s", blobName);
console.log(result.metadata);
}
});
For more details, you could refer to this article.
in v12 you can retrieve metadata when listing blobs by passing the option includeMetadata: true
await containerClient.listBlobsFlat({ includeMetadata: true });
https://github.com/Azure/azure-sdk-for-js/blob/d2730549e078571df008e929f19c07aaf8f9efd9/sdk/storage/storage-blob/test/containerclient.spec.ts#L198

Download a excel file from azure blob and process its data without needing to save file to local directory

I want to download an excel file from azure blob and process it's data using the 'xlsx' npm module.
I have achieved this with saving the file to local directory on my node.js server.
But I have to Implement this without needing to save the file locally on server.
How do I achieve this ?
Following is my js file using - download to local directory method.
const xlsx = require('xlsx');
const azureStorageConfig = {
accountName: "",
accountKey: "",
blobURL: "",
containerName: "test-container"
};
let fileName = "test_blob.xlsx";
const downloadBlob = async (blobName, downloadFilePath) => {
return new Promise((resolve, reject) => {
const name = path.basename(blobName);
const blobService = azureStorage.createBlobService(azureStorageConfig.accountName,azureStorageConfig.accountKey);
blobService.getBlobToLocalFile(azureStorageConfig.containerName,blobName,`${downloadFilePath}${name}`, function(error, serverBlob) {
if (error) {
reject(error);
} else {
resolve(downloadFilePath);
}
});
});
};
downloadBlob(fileName,'./local_dir/').then((downloadFilePath)=>{
parseExcel(downloadFilePath + fileName);
});
const parseExcel = function(downloaded_file_path){
let workbook = xlsx.readFile(downloaded_file_path);
// Parse further
}
How this code will change when following a process which does not require saving the file to local directory ?
As reference for you, here is my idea with sample code for your needs, as below.
Generate a blob url with SAS token
var azure = require('azure-storage');
var accountName = '<your account name>';
var accountKey = '<your account key>';
var blobService = azure.createBlobService(accountName, accountKey);
var containerName = 'test-container';
var blobName = 'test_blob.xlsx';
var startDate = new Date();
var expiryDate = new Date(startDate);
expiryDate.setMinutes(startDate.getMinutes() + 100);
startDate.setMinutes(startDate.getMinutes() - 100);
var sharedAccessPolicy = {
AccessPolicy: {
Permissions: azure.BlobUtilities.SharedAccessPermissions.READ,
Start: startDate,
Expiry: expiryDate
}
};
var token = blobService.generateSharedAccessSignature(containerName, blobName, sharedAccessPolicy);
var sasUrl = blobService.getUrl(containerName, blobName, token);
Read blob body bytes via request, then to use XLSX.read(data, read_opts) to parse blob body as Uint8Array.
var request = require('request');
var XLSX = require('xlsx');
request(sasUrl, {encoding: null}, function (error, response, body) {
var workbook = XLSX.read(body, {type:"buffer"});
console.log(workbook.Sheets.Sheet1);
});
Hope it helps.

Resources