Does it make sense to you to use for every normal foreach a parallel.foreach loop ?
When should I start using parallel.foreach, only iterating 1,000,000 items?
No, you should definitely not do that. The important point here is not really the number of iterations, but the work to be done. If your work is really simple, executing 1000000 delegates in parallel will add a huge overhead and will most likely be slower than a traditional single threaded solution. You can get around this by partitioning the data, so you execute chunks of work instead.
E.g. consider the situation below:
Input = Enumerable.Range(1, Count).ToArray();
Result = new double[Count];
Parallel.ForEach(Input, (value, loopState, index) => { Result[index] = value*Math.PI; });
The operation here is so simple, that the overhead of doing this in parallel will dwarf the gain of using multiple cores. This code runs significantly slower than a regular foreach loop.
By using a partition we can reduce the overhead and actually observe a gain in performance.
Parallel.ForEach(Partitioner.Create(0, Input.Length), range => {
for (var index = range.Item1; index < range.Item2; index++) {
Result[index] = Input[index]*Math.PI;
}
});
The morale of the story here is that parallelism is hard and you should only employ this after looking closely at the situation at hand. Additionally, you should profile the code both before and after adding parallelism.
Remember that regardless of any potential performance gain parallelism always adds complexity to the code, so if the performance is already good enough, there's little reason to add the complexity.
These are my benchmarks showing pure serial is slowest, along with various levels of partitioning.
class Program
{
static void Main(string[] args)
{
NativeDllCalls(true, 1, 400000000, 0); // Seconds: 0.67 |) 595,203,995.01 ops
NativeDllCalls(true, 1, 400000000, 3); // Seconds: 0.91 |) 439,052,826.95 ops
NativeDllCalls(true, 1, 400000000, 4); // Seconds: 0.80 |) 501,224,491.43 ops
NativeDllCalls(true, 1, 400000000, 8); // Seconds: 0.63 |) 635,893,653.15 ops
NativeDllCalls(true, 4, 100000000, 0); // Seconds: 0.35 |) 1,149,359,562.48 ops
NativeDllCalls(true, 400, 1000000, 0); // Seconds: 0.24 |) 1,673,544,236.17 ops
NativeDllCalls(true, 10000, 40000, 0); // Seconds: 0.22 |) 1,826,379,772.84 ops
NativeDllCalls(true, 40000, 10000, 0); // Seconds: 0.21 |) 1,869,052,325.05 ops
NativeDllCalls(true, 1000000, 400, 0); // Seconds: 0.24 |) 1,652,797,628.57 ops
NativeDllCalls(true, 100000000, 4, 0); // Seconds: 0.31 |) 1,294,424,654.13 ops
NativeDllCalls(true, 400000000, 0, 0); // Seconds: 1.10 |) 364,277,890.12 ops
}
static void NativeDllCalls(bool useStatic, int nonParallelIterations, int parallelIterations = 0, int maxParallelism = 0)
{
if (useStatic) {
Iterate<string, object>(
(msg, cntxt) => {
ServiceContracts.ForNativeCall.SomeStaticCall(msg);
}
, "test", null, nonParallelIterations,parallelIterations, maxParallelism );
}
else {
var instance = new ServiceContracts.ForNativeCall();
Iterate(
(msg, cntxt) => {
cntxt.SomeCall(msg);
}
, "test", instance, nonParallelIterations, parallelIterations, maxParallelism);
}
}
static void Iterate<T, C>(Action<T, C> action, T testMessage, C context, int nonParallelIterations, int parallelIterations=0, int maxParallelism= 0)
{
var start = DateTime.UtcNow;
if(nonParallelIterations == 0)
nonParallelIterations = 1; // normalize values
if(parallelIterations == 0)
parallelIterations = 1;
if (parallelIterations > 1) {
ParallelOptions options;
if (maxParallelism == 0) // default max parallelism
options = new ParallelOptions();
else
options = new ParallelOptions { MaxDegreeOfParallelism = maxParallelism };
if (nonParallelIterations > 1) {
Parallel.For(0, parallelIterations, options
, (j) => {
for (int i = 0; i < nonParallelIterations; ++i) {
action(testMessage, context);
}
});
}
else { // no nonParallel iterations
Parallel.For(0, parallelIterations, options
, (j) => {
action(testMessage, context);
});
}
}
else {
for (int i = 0; i < nonParallelIterations; ++i) {
action(testMessage, context);
}
}
var end = DateTime.UtcNow;
Console.WriteLine("\tSeconds: {0,8:0.00} |) {1,16:0,000.00} ops",
(end - start).TotalSeconds, (Math.Max(parallelIterations, 1) * nonParallelIterations / (end - start).TotalSeconds));
}
}