First off, I know my title can be formulated better, but my math classes are so far gone I can\'t remember the correct words anymore..
I need to do something like this (
Even faster is to unroll the loop
Test("Regular Loop", () =>
{
int result = 0;
for (int i = 0; i < digits1.Length; i++)
{
result += digits1[i] * digits2[i];
}
return result;
});
// This will fail if vectors are not a multiple of 4 in length.
Test("Unroll 4x", () =>
{
int result = 0;
for (int i = 0; i < digits1.Length; i+=4)
{
result += digits1[i] * digits2[i];
result += digits1[i+1] * digits2[i+1];
result += digits1[i+2] * digits2[i+2];
result += digits1[i+3] * digits2[i+3];
}
return result;
});
Test("Dynamic unroll", () =>
{
int result = 0;
int limit = (digits1.Length/8)*8;
int reminderLimit = digits1.Length;
if (digits1.Length >= 8)
{
for (int i = 0; i < limit; i+=8)
{
result += digits1[i] * digits2[i];
result += digits1[i+1] * digits2[i+1];
result += digits1[i+2] * digits2[i+2];
result += digits1[i+3] * digits2[i+3];
result += digits1[i+4] * digits2[i+4];
result += digits1[i+5] * digits2[i+5];
result += digits1[i+6] * digits2[i+6];
result += digits1[i+7] * digits2[i+7];
}
reminderLimit = digits1.Length % 8;
}
switch(reminderLimit)
{
case 7: {
result += digits1[limit] * digits2[limit];
result += digits1[limit+1] * digits2[limit+1];
result += digits1[limit+2] * digits2[limit+2];
result += digits1[limit+3] * digits2[limit+3];
result += digits1[limit+4] * digits2[limit+4];
result += digits1[limit+5] * digits2[limit+5];
result += digits1[limit+6] * digits2[limit+6];
break;
}
case 6: {
result += digits1[limit] * digits2[limit];
result += digits1[limit+1] * digits2[limit+1];
result += digits1[limit+2] * digits2[limit+2];
result += digits1[limit+3] * digits2[limit+3];
result += digits1[limit+4] * digits2[limit+4];
result += digits1[limit+5] * digits2[limit+5];
break;
}
case 5: {
result += digits1[limit] * digits2[limit];
result += digits1[limit+1] * digits2[limit+1];
result += digits1[limit+2] * digits2[limit+2];
result += digits1[limit+3] * digits2[limit+3];
result += digits1[limit+4] * digits2[limit+4];
break;
}
case 4: {
result += digits1[limit] * digits2[limit];
result += digits1[limit+1] * digits2[limit+1];
result += digits1[limit+2] * digits2[limit+2];
result += digits1[limit+3] * digits2[limit+3];
break;
}
case 3: {
result += digits1[limit] * digits2[limit];
result += digits1[limit+1] * digits2[limit+1];
result += digits1[limit+2] * digits2[limit+2];
break;
}
case 2: {
result += digits1[limit] * digits2[limit];
result += digits1[limit+1] * digits2[limit+1];
break;
}
case 1: {
result += digits1[limit] * digits2[limit];
break;
}
default :
{
break;
}
}
return result;
});
There is a huge difference is running time between debug and release mode of C# code, this is run with release mode:
Regular Loop
Iterations: 1000000
Time(ms): 0/ 0/ 0 ( 596)
Ticks: 1/ 2,071213/ 455 ( 2154248)
Unroll 4x
Iterations: 1000000
Time(ms): 0/ 2E-06/ 1 ( 575)
Ticks: 1/ 1,984301/ 3876 ( 2076105)
Dynamic unroll
Iterations: 1000000
Time(ms): 0/ 0/ 0 ( 430)
Ticks: 1/ 1,4635/ 3228 ( 1554830)
Debug mode:
Regular Loop
Iterations: 1000000
Time(ms): 0/ 1E-06/ 1 ( 1296)
Ticks: 4/ 4,529916/ 3907 ( 4678354)
Unroll 4x
Iterations: 1000000
Time(ms): 0/ 0/ 0 ( 871)
Ticks: 2/ 3,048466/ 701 ( 3145277)
Dynamic unroll
Iterations: 1000000
Time(ms): 0/ 0/ 0 ( 819)
Ticks: 2/ 2,858588/ 1398 ( 2957179)