问题
I know that the Storage Data Movement Library is supposed to be faster when uploading and downloading files to and from blob storage, but I am not seeing the performance benefits of it when compared to Azure SDK v12. I got an average of 37.463 seconds with Azure SDK v12 and 41.863 seconds using Storage Data Movement Library (SDML).
Here is the code using SDML:
namespace FunctionApp
{
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.Storage;
using Microsoft.Azure.Storage.Blob;
using Microsoft.Azure.Storage.DataMovement;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Extensions.Logging;
using System;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using System.Web.Http;
public static class Function1
{
[FunctionName("A")]
public static async Task<IActionResult> HttpStart(
[HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = "testRoute")] HttpRequestMessage req,
ILogger log)
{
Stopwatch timer = new Stopwatch();
timer.Start();
try
{
ServicePointManager.Expect100Continue = false;
ServicePointManager.DefaultConnectionLimit = Environment.ProcessorCount * 8;
TransferManager.Configurations.ParallelOperations = 64;
string fileToDownload = "<URI to zip file in blob storage containing two 300MB files";
string connectionString = "<connection string to storage account>";
string containerName = "<container to upload files to>";
using MemoryStream test = new MemoryStream();
CloudBlockBlob sourceBlob = new CloudBlockBlob(new Uri(fileToDownload));
await TransferManager.DownloadAsync(sourceBlob, test);
CloudStorageAccount account = CloudStorageAccount.Parse(connectionString);
CloudBlobClient blobClient = account.CreateCloudBlobClient();
CloudBlobContainer container = blobClient.GetContainerReference(containerName);
using ZipArchive zipArchive = new ZipArchive(test);
foreach (ZipArchiveEntry file in zipArchive.Entries)
{
if (!string.IsNullOrEmpty(file.Name))
{
CloudBlockBlob destBlob = container.GetBlockBlobReference(file.FullName);
using Stream stream = file.Open();
await TransferManager.UploadAsync(stream, destBlob);
}
}
}
catch (Exception exception)
{
return new InternalServerErrorResult();
}
timer.Stop();
return new OkObjectResult(timer.ElapsedMilliseconds);
}
}
}
Here is the code using Azure SDK v12:
namespace FunctionApp
{
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Specialized;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Extensions.Logging;
using System;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using System.Web.Http;
public static class Function1
{
[FunctionName("A")]
public static async Task<IActionResult> HttpStart(
[HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = "testRoute")] HttpRequestMessage req,
ILogger log)
{
Stopwatch timer = new Stopwatch();
timer.Start();
try
{
ServicePointManager.Expect100Continue = false;
ServicePointManager.DefaultConnectionLimit = Environment.ProcessorCount * 8;
string fileToDownload = "<URI to zip file in blob storage containing two 300MB files";
string connectionString = "<connection string to storage account>";
string containerName = "<container to upload files to>";
using MemoryStream test = new MemoryStream();
BlockBlobClient client = new BlockBlobClient(new Uri(fileToDownload));
await client.DownloadToAsync(test);
BlobContainerClient containerClient = new BlobContainerClient(connectionString, containerName);
using ZipArchive zipArchive = new ZipArchive(test);
foreach (ZipArchiveEntry file in zipArchive.Entries)
{
if (!string.IsNullOrEmpty(file.Name))
{
BlockBlobClient blockBlobClient = containerClient.GetBlockBlobClient(file.FullName);
using Stream stream = file.Open();
await blockBlobClient.UploadAsync(stream);
}
}
}
catch (Exception exception)
{
return new InternalServerErrorResult();
}
timer.Stop();
return new OkObjectResult(timer.ElapsedMilliseconds) ;
}
}
}
回答1:
For Data Movement library, you may set ParallelOperations
and BlockSize
, like below:
TransferManager.Configurations.ParallelOperations = 20;
TransferManager.Configurations.BlockSize = 20971520*2; //20M
I did the test at my side, SDML is more faster.
来源:https://stackoverflow.com/questions/61879697/performance-of-azure-sdk-v12-vs-storage-data-movement-library