I know that the Storage Data Movement Library is supposed to be faster when uploading and downloading files to and from blob storage, but I am not seeing the performance benefits of it when compared to Azure SDK v12. I got an average of 37.463 seconds with Azure SDK v12 and 41.863 seconds using Storage Data Movement Library (SDML).
Here is the code using SDML:
namespace FunctionApp
{
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.Storage;
using Microsoft.Azure.Storage.Blob;
using Microsoft.Azure.Storage.DataMovement;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Extensions.Logging;
using System;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using System.Web.Http;
public static class Function1
{
[FunctionName("A")]
public static async Task<IActionResult> HttpStart(
[HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = "testRoute")] HttpRequestMessage req,
ILogger log)
{
Stopwatch timer = new Stopwatch();
timer.Start();
try
{
ServicePointManager.Expect100Continue = false;
ServicePointManager.DefaultConnectionLimit = Environment.ProcessorCount * 8;
TransferManager.Configurations.ParallelOperations = 64;
string fileToDownload = "<URI to zip file in blob storage containing two 300MB files";
string connectionString = "<connection string to storage account>";
string containerName = "<container to upload files to>";
using MemoryStream test = new MemoryStream();
CloudBlockBlob sourceBlob = new CloudBlockBlob(new Uri(fileToDownload));
await TransferManager.DownloadAsync(sourceBlob, test);
CloudStorageAccount account = CloudStorageAccount.Parse(connectionString);
CloudBlobClient blobClient = account.CreateCloudBlobClient();
CloudBlobContainer container = blobClient.GetContainerReference(containerName);
using ZipArchive zipArchive = new ZipArchive(test);
foreach (ZipArchiveEntry file in zipArchive.Entries)
{
if (!string.IsNullOrEmpty(file.Name))
{
CloudBlockBlob destBlob = container.GetBlockBlobReference(file.FullName);
using Stream stream = file.Open();
await TransferManager.UploadAsync(stream, destBlob);
}
}
}
catch (Exception exception)
{
return new InternalServerErrorResult();
}
timer.Stop();
return new OkObjectResult(timer.ElapsedMilliseconds);
}
}
}
Here is the code using Azure SDK v12:
namespace FunctionApp
{
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Specialized;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Extensions.Logging;
using System;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using System.Web.Http;
public static class Function1
{
[FunctionName("A")]
public static async Task<IActionResult> HttpStart(
[HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = "testRoute")] HttpRequestMessage req,
ILogger log)
{
Stopwatch timer = new Stopwatch();
timer.Start();
try
{
ServicePointManager.Expect100Continue = false;
ServicePointManager.DefaultConnectionLimit = Environment.ProcessorCount * 8;
string fileToDownload = "<URI to zip file in blob storage containing two 300MB files";
string connectionString = "<connection string to storage account>";
string containerName = "<container to upload files to>";
using MemoryStream test = new MemoryStream();
BlockBlobClient client = new BlockBlobClient(new Uri(fileToDownload));
await client.DownloadToAsync(test);
BlobContainerClient containerClient = new BlobContainerClient(connectionString, containerName);
using ZipArchive zipArchive = new ZipArchive(test);
foreach (ZipArchiveEntry file in zipArchive.Entries)
{
if (!string.IsNullOrEmpty(file.Name))
{
BlockBlobClient blockBlobClient = containerClient.GetBlockBlobClient(file.FullName);
using Stream stream = file.Open();
await blockBlobClient.UploadAsync(stream);
}
}
}
catch (Exception exception)
{
return new InternalServerErrorResult();
}
timer.Stop();
return new OkObjectResult(timer.ElapsedMilliseconds) ;
}
}
}
For Data Movement library, you may set ParallelOperations and BlockSize, like below:
TransferManager.Configurations.ParallelOperations = 20;
TransferManager.Configurations.BlockSize = 20971520*2; //20M
I did the test at my side, SDML is more faster.
Related
How to get Multiple Filename from container using blobtrigger azure function c#?
Update:
Sample:
using System;
using System.IO;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Models;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Host;
using Microsoft.Extensions.Logging;
using Microsoft.WindowsAzure.Storage.Blob;
namespace FunctionApp116
{
public static class Function1
{
[FunctionName("Function1")]
public static void Run([BlobTrigger("test/{name}", Connection = "str")]CloudBlockBlob myBlob,ILogger log)
{
string blobname = myBlob.Name;
string containername = myBlob.Container.Name;
BlobServiceClient blobServiceClient = new BlobServiceClient(Environment.GetEnvironmentVariable("str"));
BlobContainerClient containerClient = blobServiceClient.GetBlobContainerClient(containername);
log.LogInformation($"C# Blob trigger function Processed blob\n Name:{blobname}" + $" Container Name is {containername}");
foreach (BlobItem blobItem in containerClient.GetBlobs())
{
log.LogInformation("\t" + blobItem.Name);
}
}
}
}
Original Answer:
The blob that blob trigger can input can not be more than one.
But, you can use blob storage sdk to get multiple blobs from the same container.
https://learn.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-dotnet#code-examples
if you offer what language you are using, i can post a sample.
I know we can manage a file in ADLs gen 2 using .net sdk as mentioned in below article
https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-directory-file-acl-dotnet
I just want to know if we can also use binders such as cloudBlockBlob too in ADLS gen 2 as we can in regular azure storage account.
After testing, the cloudBlockBlob binding can be used in ADLs gen 2.
I use this code to upload files to the adls2 account:
using System;
using Azure;
using Azure.Storage.Files.DataLake;
using Azure.Storage.Files.DataLake.Models;
using Azure.Storage;
using System.IO;
namespace Frankadls
{
class Program
{
static async System.Threading.Tasks.Task Main(string[] args)
{
Console.WriteLine("Hello World!");
string accountName = "";
string accountKey = "";
StorageSharedKeyCredential sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey);
string dfsUri = "https://" + accountName + ".dfs.core.windows.net";
DataLakeServiceClient dataLakeServiceClient = new DataLakeServiceClient(new Uri(dfsUri), sharedKeyCredential);
DataLakeFileSystemClient dataLakeFileSystemClient = await dataLakeServiceClient.CreateFileSystemAsync("test1");
DataLakeDirectoryClient directoryClient = await dataLakeFileSystemClient.CreateDirectoryAsync("my-directory");
DataLakeFileClient fileClient = await directoryClient.CreateFileAsync("uploaded-file.txt");
FileStream fileStream = File.OpenRead("");
long fileSize = fileStream.Length;
await fileClient.AppendAsync(fileStream, offset: 0);
await fileClient.FlushAsync(position: fileSize);
}
}
}
This code. Using the cloudBlockBlob input binding, it can be triggered successfully:
using System;
using System.IO;
using Microsoft.Azure.WebJobs;
using Microsoft.Extensions.Logging;
using Microsoft.WindowsAzure.Storage.Blob;
namespace Frankblobtrigger
{
public static class Function1
{
[FunctionName("Function1")]
public static void Run([BlobTrigger("test1/{name}", Connection = "conn")]Stream myBlob, string name,
[Blob("test1/{name}", FileAccess.Read, Connection = "conn")] CloudBlockBlob blob,
ILogger log)
{
log.LogInformation($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes");
log.LogInformation(blob.Uri.AbsoluteUri);
}
}
}
Yes, for.net, just use blob output binding. And you should be able to use cloudblockblob to upload.
Note: This is a share.
Couple days ago I tried to use Azure Function to build an API manipulating "blob storage operations CRUD", I having investigated a solution to solve the download operation, since the majority internet solutions I found work it locally but while deploy my function the Web server needs the grant permission path to Create File and download locally which generated the error:"Access to path is denied".
Then I Solved download via HTTP response whit Azure function V2, C# .net core 2.1
This is the basic code it works me, I hope it helps you...
using System.Threading.Tasks;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.AspNetCore.Http;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using Microsoft.WindowsAzure.Storage.Auth;
using System.IO;
using System.Net.Http.Headers;
using System.Net.Http;
using System.Net;
namespace BloApi
{
public static class BlobOperations
{
[FunctionName("DownloadBlob")]
public static async Task<HttpResponseMessage> DownloadBlob(
[HttpTrigger(AuthorizationLevel.Anonymous, "get", Route = "DownloadBlob/{name}")] HttpRequest req, string name)
{
StorageCredentials storageCredentials = new StorageCredentials("Storage",
"CamEKgqVaylmQ.....ow2VHlyCww==");
CloudStorageAccount storageAccount = new CloudStorageAccount(storageCredentials, true);
CloudBlobContainer container = storageAccount.CreateCloudBlobClient().GetContainerReference("MyBlobContainer");
var blobName = name;
CloudBlockBlob block = container.GetBlockBlobReference(blobName);
HttpResponseMessage message = new HttpResponseMessage(HttpStatusCode.OK);
Stream blobStream = await block.OpenReadAsync();
message.Content = new StreamContent(blobStream);
message.Content.Headers.ContentLength = block.Properties.Length;
message.StatusCode = HttpStatusCode.OK;
message.Content.Headers.ContentType = new MediaTypeHeaderValue(block.Properties.ContentType);
message.Content.Headers.ContentDisposition = new ContentDispositionHeaderValue("attachment")
{
FileName = $"CopyOf_{block.Name}",
Size = block.Properties.Length
};
return message;
}
}
}
I need to parse and extract Column information from an Excel spreadsheet using Azure Logic Apps
I have already setup the ability for my Logic App to retrieve the latest unread Emails from my Outlook. Also, my Logic App does a FOR EACH to read all attachments (from unread emails) and make sure they are Excel files (based on filename extension).
I have a basic Excel file that contains 3 columns "Product, Description, Price" I need to parse each row (only Product and Price) column.
I will add the ability to store that parsed into into my SQL table hosted on Azure.
Here you go, you could use HTTP Request as well.
using ExcelDataReader;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Extensions.Logging;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using Nancy.Json;
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
namespace ConvertExcelToJSon
{
public static class Function1
{
[FunctionName("ConvertToJson")]
public static async Task<IActionResult> Run(
[HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
ILogger log)
{
log.LogInformation("C# HTTP trigger function processed a request.");
string requestBody = await new StreamReader(req.Body).ReadToEndAsync();
dynamic data = JsonConvert.DeserializeObject(requestBody);
string blobName = data?.blobName;
string[] splitBlob = blobName.Split('/');
Stream blob = await GetBlobStreamAsync(splitBlob[1], splitBlob[2] + "/" + splitBlob[3]);
DataSet ds = CreateDataSet(blob);
List<Simple> simpleList = new List<Simple>();
foreach (DataTable table in ds.Tables)
{
return (ActionResult)new OkObjectResult(DataTableToJSON(table));
}
return blobName != null
? (ActionResult)new OkObjectResult($"Hello, {blobName}")
: new BadRequestObjectResult("Please pass a name on the query string or in the request body");
}
public static string DataTableToJSON(DataTable table)
{
List<Dictionary<string, object>> list = new List<Dictionary<string, object>>();
foreach (DataRow row in table.Rows)
{
Dictionary<string, object> dict = new Dictionary<string, object>();
foreach (DataColumn col in table.Columns)
{
dict[col.ColumnName] = (Convert.ToString(row[col]));
}
list.Add(dict);
}
JavaScriptSerializer serializer = new JavaScriptSerializer();
return serializer.Serialize(list);
}
public static string AppSetting(this string Key)
{
string ret = string.Empty;
if (Environment.GetEnvironmentVariable(Key) != null)
{
ret = Environment.GetEnvironmentVariable(Key);
}
return ret;
}
public static async Task<Stream> GetBlobStreamAsync(string containerName, string blobName)
{
Stream myBlob = new MemoryStream();
if (CloudStorageAccount.TryParse("AzureWebJobsStorage".AppSetting(), out CloudStorageAccount storageAccount))
{
CloudBlobClient cloudBlobClient = storageAccount.CreateCloudBlobClient();
CloudBlobContainer container = cloudBlobClient.GetContainerReference(containerName);
CloudBlob myBloab = container.GetBlobReference(blobName);
await myBloab.DownloadToStreamAsync(myBlob);
myBlob.Seek(0, SeekOrigin.Begin);
}
return myBlob;
}
public static DataSet CreateDataSet(Stream stream)
{
DataSet ds;
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
IExcelDataReader reader = null;
reader = ExcelReaderFactory.CreateOpenXmlReader(stream);
ds = reader.AsDataSet(new ExcelDataSetConfiguration()
{
ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration()
{
UseHeaderRow = true,
}
});
return ds;
}
}
}
I suggest you call an Azure Function from your logic App and use the Function to convert the Excel into a JSON Object. (I am currently doing this very succesfully) I use ExcelDataReader parse to parse a Blob that the Logic App creates. Send the blob location in the request and respond back with the JSON.
TARGET: Do the Azure Function tutorial on and copied code, but got several errors when executing locally on VS2017. I appreciate you help.
https://www.cyotek.com/blog/upload-data-to-blob-storage-with-azure-functions
ERROR 1 - related to Run:
CS0116 A namespace cannot directly contain members such as fields or methods UploadToBlobFunctionApp C:\AzureFunctions\UploadToBlobFunctionApp\UploadToBlobFunctionApp\UploadToBlobFunction.cs 15 Active
ERROR 2 - related to Task CreateBlob:
CS0116 A namespace cannot directly contain members such as fields or methods UploadToBlobFunctionApp
C:\AzureFunctions\UploadToBlobFunctionApp\UploadToBlobFunctionApp\UploadToBlobFunction.cs 45 Active
ERROR 3 - related to await CreateBlob:
CS0103 The name 'CreateBlob' does not exist in the current context UploadToBlobFunctionApp C:\AzureFunctions\UploadToBlobFunctionApp\UploadToBlobFunctionApp\UploadToBlobFunction.cs 36 Active
CODE Function.cs:
using System;
using System.Configuration;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using Microsoft.Azure;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Host;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
public static async Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log)
{
HttpStatusCode result;
string contentType;
result = HttpStatusCode.BadRequest;
contentType = req.Content.Headers?.ContentType?.MediaType;
if (contentType == "application/json")
{
string body;
body = await req.Content.ReadAsStringAsync();
if (!string.IsNullOrEmpty(body))
{
string name;
name = Guid.NewGuid().ToString("n");
await CreateBlob(name + ".json", body, log);
result = HttpStatusCode.OK;
}
}
return req.CreateResponse(result, string.Empty);
}
private async static Task CreateBlob(string name, string data,
TraceWriter log)
{
string accessKey;
string accountName;
string connectionString;
CloudStorageAccount storageAccount;
CloudBlobClient client;
CloudBlobContainer container;
CloudBlockBlob blob;
accessKey = "qwertyw4VhRajxlZn9C4hTMB8oSwE4klNUsvTy9VeTCIQ11111vFVVGExDwJ+JUboFv2B79j+W6foqLWE92w==";
accountName = "mystorage";
connectionString = "DefaultEndpointsProtocol=https;AccountName=" + accountName + ";AccountKey=" + accessKey + ";EndpointSuffix=core.windows.net";
storageAccount = CloudStorageAccount.Parse(connectionString);
client = storageAccount.CreateCloudBlobClient();
container = client.GetContainerReference("functionupload");
await container.CreateIfNotExistsAsync();
blob = container.GetBlockBlobReference(name);
blob.Properties.ContentType = "application/json";
using (Stream stream = new MemoryStream(Encoding.UTF8.GetBytes(data)))
{
await blob.UploadFromStreamAsync(stream);
}
}
The example that you are referencing is using scripted functions (csx file). They are mostly used while editing code directly in Azure portal.
I think you are trying to create a precompiled application with csproj and cs files. In this case, your code should be a valid C#, i.e. all methods should be inside classes.
Have a look at this example.
You can also use attributes to mark your functions and triggers instead of authoring function.json manually, see examples here.