Sum of products of two arrays (dotproduct)

前端 未结 6 1345
难免孤独
难免孤独 2021-02-04 05:49

First off, I know my title can be formulated better, but my math classes are so far gone I can\'t remember the correct words anymore..

I need to do something like this (

6条回答
  •  情歌与酒
    2021-02-04 06:15

    Even faster is to unroll the loop

    Test("Regular Loop", () =>
                {
                    int result = 0;
                    for (int i = 0; i < digits1.Length; i++)
                    {
                        result += digits1[i] * digits2[i];
                    }
                    return result;
                });
                // This will fail if vectors are not a multiple of 4 in length.
                Test("Unroll 4x", () =>
                {
                    int result = 0;
                    for (int i = 0; i < digits1.Length; i+=4)
                    {
                        result += digits1[i] * digits2[i];
                        result += digits1[i+1] * digits2[i+1];
                        result += digits1[i+2] * digits2[i+2];
                        result += digits1[i+3] * digits2[i+3];
                    }
                    return result;
                });
    
                Test("Dynamic unroll", () =>
                {
                    int result = 0;
                    int limit = (digits1.Length/8)*8;
                    int reminderLimit = digits1.Length;
    
                    if (digits1.Length >= 8)
                    {
                        for (int i = 0; i < limit; i+=8)
                        {
                            result += digits1[i] * digits2[i];
                            result += digits1[i+1] * digits2[i+1];
                            result += digits1[i+2] * digits2[i+2];
                            result += digits1[i+3] * digits2[i+3];
                            result += digits1[i+4] * digits2[i+4];
                            result += digits1[i+5] * digits2[i+5];
                            result += digits1[i+6] * digits2[i+6];
                            result += digits1[i+7] * digits2[i+7];
                        }
    
                        reminderLimit = digits1.Length % 8;
                    }
    
                    switch(reminderLimit)
                    {
                        case 7: {
                                    result += digits1[limit] * digits2[limit];
                                    result += digits1[limit+1] * digits2[limit+1];
                                    result += digits1[limit+2] * digits2[limit+2];
                                    result += digits1[limit+3] * digits2[limit+3];
                                    result += digits1[limit+4] * digits2[limit+4];
                                    result += digits1[limit+5] * digits2[limit+5];
                                    result += digits1[limit+6] * digits2[limit+6];
                                    break;
                                }
    
                        case 6: {
                                    result += digits1[limit] * digits2[limit];
                                    result += digits1[limit+1] * digits2[limit+1];
                                    result += digits1[limit+2] * digits2[limit+2];
                                    result += digits1[limit+3] * digits2[limit+3];
                                    result += digits1[limit+4] * digits2[limit+4];
                                    result += digits1[limit+5] * digits2[limit+5];      
                                    break;
                                }
    
                        case 5: {
                                    result += digits1[limit] * digits2[limit];
                                    result += digits1[limit+1] * digits2[limit+1];
                                    result += digits1[limit+2] * digits2[limit+2];
                                    result += digits1[limit+3] * digits2[limit+3];
                                    result += digits1[limit+4] * digits2[limit+4];
                                    break;
                                }
    
                        case 4: {
                                    result += digits1[limit] * digits2[limit];
                                    result += digits1[limit+1] * digits2[limit+1];
                                    result += digits1[limit+2] * digits2[limit+2];
                                    result += digits1[limit+3] * digits2[limit+3];
                                    break;
                                }
    
                        case 3: {
                                    result += digits1[limit] * digits2[limit];
                                    result += digits1[limit+1] * digits2[limit+1];
                                    result += digits1[limit+2] * digits2[limit+2];
                                    break;
                                }
    
                        case 2: {
                                    result += digits1[limit] * digits2[limit];
                                    result += digits1[limit+1] * digits2[limit+1];
                                    break;
                                }
    
                        case 1: {
                                    result += digits1[limit] * digits2[limit];
                                    break;
                                }
                        default :
                                {
                                    break;
                                }
                    }
    
                    return result;
                });
    

    There is a huge difference is running time between debug and release mode of C# code, this is run with release mode:

    Regular Loop
    Iterations: 1000000
    Time(ms):   0/         0/       0 (       596)
    Ticks:      1/  2,071213/     455 (   2154248)
    
    Unroll 4x
    Iterations: 1000000
    Time(ms):   0/     2E-06/       1 (       575)
    Ticks:      1/  1,984301/    3876 (   2076105)
    
    Dynamic unroll
    Iterations: 1000000
    Time(ms):   0/         0/       0 (       430)
    Ticks:      1/    1,4635/    3228 (   1554830)
    

    Debug mode:

    Regular Loop
    Iterations: 1000000
    Time(ms):   0/     1E-06/       1 (      1296)
    Ticks:      4/  4,529916/    3907 (   4678354)
    
    Unroll 4x
    Iterations: 1000000
    Time(ms):   0/         0/       0 (       871)
    Ticks:      2/  3,048466/     701 (   3145277)
    
    Dynamic unroll
    Iterations: 1000000
    Time(ms):   0/         0/       0 (       819)
    Ticks:      2/  2,858588/    1398 (   2957179)
    

提交回复
热议问题