问题
As we were switching from synchronous to asynchronous code we found that reading small chunks of data across a huge file (in a loop or in parallel) was much slower than using synchronous code.
In our example we need to retrieve 8000 samples of 1000 bytes uniformly across a huge file.
Below are the results we are getting. Synchronous code is at least 8 times faster. Any suggestions on how to speed up the async code? Full solution is here: https://github.com/virzak/DotNetPerformance
Method | Job | Runtime | Mean | Error | StdDev | --------------------------- |---------- |-------- |----------:|----------:|----------:| ReadSamplesAsync | ClrQuick | Clr | 625.99 ms | 14.153 ms | 8.422 ms | ReadSamplesInParallelAsync | ClrQuick | Clr | 627.81 ms | 15.147 ms | 10.019 ms | ReadSamples | ClrQuick | Clr | 40.56 ms | 3.257 ms | 2.155 ms | ReadSamplesAsync | CoreQuick | Core | 327.29 ms | 39.089 ms | 25.855 ms | ReadSamplesInParallelAsync | CoreQuick | Core | 318.27 ms | 13.937 ms | 9.218 ms | ReadSamples | CoreQuick | Core | 39.74 ms | 2.531 ms | 1.674 ms |
Our code:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace DotNetPerformance
{
public class ReadAsyncLoop
{
public ReadAsyncLoop(string fileName)
{
FilePath = fileName;
var fi = new FileInfo(FilePath);
FileSize = fi.Length;
SampleCount = FileSize / sampleSize;
}
const int sampleSize = 1000;
const int sampleCount = 8000;
long SampleCount { get; }
string FilePath { get; }
long FileSize { get; }
public async Task CompareRunningTimes()
{
var sw = new Stopwatch();
sw.Start();
await ReadBytesAsync().ConfigureAwait(false);
sw.Stop();
Console.WriteLine($"FINISHED (Async) in {sw.ElapsedMilliseconds} ms");
sw.Restart();
await ReadBytesInParallelAsync().ConfigureAwait(false);
sw.Stop();
Console.WriteLine($"FINISHED (Async in parallel) in {sw.ElapsedMilliseconds} ms");
sw.Restart();
ReadBytes();
sw.Stop();
Console.WriteLine($"FINISHED (Sync) in {sw.ElapsedMilliseconds} ms");
}
public async Task ReadBytesInParallelAsync()
{
var fs = new FileStream(FilePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.Asynchronous);
var tasks = new List<Task>();
for (var i = 0; i < sampleCount; ++i)
{
var sampleIndex = (long)(i / (double)SampleCount * FileSize);
var bytes = new byte[sampleSize];
tasks.Add(ReadSampleBytesFromFileAsync(fs, sampleIndex, bytes));
}
await Task.WhenAll(tasks).ConfigureAwait(false);
}
public async Task ReadBytesAsync()
{
var fs = new FileStream(FilePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.Asynchronous);
for (var i = 0; i < sampleCount; ++i)
{
var sampleIndex = (long)(i / (double)SampleCount * FileSize);
var bytes = new byte[sampleSize];
await ReadSampleBytesFromFileAsync(fs, sampleIndex, bytes).ConfigureAwait(false);
}
}
public void ReadBytes()
{
var fs = new FileStream(FilePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096);
for (var i = 0; i < sampleCount; ++i)
{
var sampleIndex = (long)(i / (double)SampleCount * FileSize);
var bytes = new byte[sampleSize];
ReadSampleBytesFromFile(fs, sampleIndex, bytes);
}
}
static private async Task ReadSampleBytesFromFileAsync(FileStream fs, long sampleGlobalIndex, byte[] sampleBytes, CancellationToken ct = default)
{
var seekPosition = sampleGlobalIndex * sampleSize;
var lActual = fs.Seek(seekPosition, SeekOrigin.Begin);
await fs.ReadAsync(sampleBytes, 0, sampleSize, ct).ConfigureAwait(false);
}
static private void ReadSampleBytesFromFile(FileStream fs, long sampleGlobalIndex, byte[] sampleBytes)
{
var seekPosition = sampleGlobalIndex * sampleSize;
var lActual = fs.Seek(seekPosition, SeekOrigin.Begin);
fs.Read(sampleBytes, 0, sampleSize);
}
}
}
来源:https://stackoverflow.com/questions/51560443/asynchronous-random-file-access-with-filestream-read-vs-readasync-in-a-loop-pa