// let\'s say there is a list of 1000+ URLs
string[] urls = { \"http://google.com\", \"http://yahoo.com\", ... };
// now let\'s send HTTP requests to each of these
This is my second answer, with a possibly improved version of Theo Yaung's solution (the accepted answer). This is based too on a SemaphoreSlim and does a lazy enumeration of the urls, but is not relying on the Task.WhenAll for awaiting the tasks to complete. The SemaphoreSlim
is used for this purpose too. This can be an advantage because it means that the completed tasks need not be referenced during the whole operation. Instead each task is eligible for garbage collection immediately after its completion.
Two overloads of the ForEachAsync
extension method are provided (the name is borrowed from Dogu Arslan's answer, the next most popular answer). One is for tasks that return a result, and one for tasks that do not. A nice extra feature is the onErrorContinue
parameter, that controls the behavior in case of exceptions. The default is false
, which mimics the behavior of Parallel.ForEach (that stops processing shortly after an exception), and not the behavior of Task.WhenAll
(that waits for all tasks to complete).
public static async Task ForEachAsync(
this IEnumerable source,
Func> taskFactory,
int concurrencyLevel = 1,
bool onErrorContinue = false)
{
// Arguments validation omitted
var throttler = new SemaphoreSlim(concurrencyLevel);
var results = new List();
var exceptions = new ConcurrentQueue();
int index = 0;
foreach (var item in source)
{
var localIndex = index++;
lock (results) results.Add(default); // Reserve space in the list
await throttler.WaitAsync(); // continue on captured context
if (!onErrorContinue && !exceptions.IsEmpty) { throttler.Release(); break; }
Task task;
try { task = taskFactory(item); } // or Task.Run(() => taskFactory(item))
catch (Exception ex)
{
exceptions.Enqueue(ex); throttler.Release();
if (onErrorContinue) continue; else break;
}
_ = task.ContinueWith(t =>
{
try { lock (results) results[localIndex] = t.GetAwaiter().GetResult(); }
catch (Exception ex) { exceptions.Enqueue(ex); }
finally { throttler.Release(); }
}, default, TaskContinuationOptions.ExecuteSynchronously,
TaskScheduler.Default);
}
// Wait for the last operations to complete
for (int i = 0; i < concurrencyLevel; i++)
{
await throttler.WaitAsync().ConfigureAwait(false);
}
if (!exceptions.IsEmpty) throw new AggregateException(exceptions);
lock (results) return results.ToArray();
}
public static Task ForEachAsync(
this IEnumerable source,
Func taskFactory,
int concurrencyLevel = 1,
bool onErrorContinue = false)
{
// Arguments validation omitted
return ForEachAsync(source, async item =>
{
await taskFactory(item).ConfigureAwait(false); return null;
}, concurrencyLevel, onErrorContinue);
}
The taskFactory
is invoked on the context of the caller. This can be desirable because it allows (for example) UI elements to be accessed inside the lambda. In case it is preferable to invoke it on the ThreadPool
context, you can just replace the taskFactory(item)
with Task.Run(() => taskFactory(item))
.
To keep things simple, the Task ForEachAsync
is implemented not optimally by calling the generic Task
overload.
Usage example:
await urls.ForEachAsync(async url =>
{
var html = await httpClient.GetStringAsync(url);
TextBox1.AppendText($"Url: {url}, {html.Length:#,0} chars\r\n");
}, concurrencyLevel: 10, onErrorContinue: true);