Calling std::nth_element() function extremely frequently

后端 未结 3 1362
渐次进展
渐次进展 2021-02-13 01:42

I did not find this specific topic anywhere...

I am calling the nth_element() algorithm about 400,000 times per second on different data in a std::vector of 23 integers,

3条回答
  •  你的背包
    2021-02-13 02:01

    You mentioned that the size of the array was always known to be 23. Moreover, the type used is unsigned short. In this case, you might try to use a sorting network of size 23; since your type is unsigned short, sorting the whole array with a sorting network might be even faster than partially sorting it with std::nth_element. Here is a very straightforward C++14 implementation of a sorting network of size 23 with 118 compare-exchange units, as described by Using Symmetry and Evolutionary Search to Minimize Sorting Networks:

    template>
    void network_sort23(RandomIt first, Compare compare={})
    {
        swap_if(first[1u], first[20u], compare);
        swap_if(first[2u], first[21u], compare);
        swap_if(first[5u], first[13u], compare);
        swap_if(first[9u], first[17u], compare);
        swap_if(first[0u], first[7u], compare);
        swap_if(first[15u], first[22u], compare);
        swap_if(first[4u], first[11u], compare);
        swap_if(first[6u], first[12u], compare);
        swap_if(first[10u], first[16u], compare);
        swap_if(first[8u], first[18u], compare);
        swap_if(first[14u], first[19u], compare);
        swap_if(first[3u], first[8u], compare);
        swap_if(first[4u], first[14u], compare);
        swap_if(first[11u], first[18u], compare);
        swap_if(first[2u], first[6u], compare);
        swap_if(first[16u], first[20u], compare);
        swap_if(first[0u], first[9u], compare);
        swap_if(first[13u], first[22u], compare);
        swap_if(first[5u], first[15u], compare);
        swap_if(first[7u], first[17u], compare);
        swap_if(first[1u], first[10u], compare);
        swap_if(first[12u], first[21u], compare);
        swap_if(first[8u], first[19u], compare);
        swap_if(first[17u], first[22u], compare);
        swap_if(first[0u], first[5u], compare);
        swap_if(first[20u], first[21u], compare);
        swap_if(first[1u], first[2u], compare);
        swap_if(first[18u], first[19u], compare);
        swap_if(first[3u], first[4u], compare);
        swap_if(first[21u], first[22u], compare);
        swap_if(first[0u], first[1u], compare);
        swap_if(first[19u], first[22u], compare);
        swap_if(first[0u], first[3u], compare);
        swap_if(first[12u], first[13u], compare);
        swap_if(first[9u], first[10u], compare);
        swap_if(first[6u], first[15u], compare);
        swap_if(first[7u], first[16u], compare);
        swap_if(first[8u], first[11u], compare);
        swap_if(first[11u], first[14u], compare);
        swap_if(first[4u], first[11u], compare);
        swap_if(first[6u], first[8u], compare);
        swap_if(first[14u], first[16u], compare);
        swap_if(first[17u], first[20u], compare);
        swap_if(first[2u], first[5u], compare);
        swap_if(first[9u], first[12u], compare);
        swap_if(first[10u], first[13u], compare);
        swap_if(first[15u], first[18u], compare);
        swap_if(first[10u], first[11u], compare);
        swap_if(first[4u], first[7u], compare);
        swap_if(first[20u], first[21u], compare);
        swap_if(first[1u], first[2u], compare);
        swap_if(first[7u], first[15u], compare);
        swap_if(first[3u], first[9u], compare);
        swap_if(first[13u], first[19u], compare);
        swap_if(first[16u], first[18u], compare);
        swap_if(first[8u], first[14u], compare);
        swap_if(first[4u], first[6u], compare);
        swap_if(first[18u], first[21u], compare);
        swap_if(first[1u], first[4u], compare);
        swap_if(first[19u], first[21u], compare);
        swap_if(first[1u], first[3u], compare);
        swap_if(first[9u], first[10u], compare);
        swap_if(first[11u], first[13u], compare);
        swap_if(first[2u], first[6u], compare);
        swap_if(first[16u], first[20u], compare);
        swap_if(first[4u], first[9u], compare);
        swap_if(first[13u], first[18u], compare);
        swap_if(first[19u], first[20u], compare);
        swap_if(first[2u], first[3u], compare);
        swap_if(first[18u], first[20u], compare);
        swap_if(first[2u], first[4u], compare);
        swap_if(first[5u], first[17u], compare);
        swap_if(first[12u], first[14u], compare);
        swap_if(first[8u], first[12u], compare);
        swap_if(first[5u], first[7u], compare);
        swap_if(first[15u], first[17u], compare);
        swap_if(first[5u], first[8u], compare);
        swap_if(first[14u], first[17u], compare);
        swap_if(first[3u], first[5u], compare);
        swap_if(first[17u], first[19u], compare);
        swap_if(first[3u], first[4u], compare);
        swap_if(first[18u], first[19u], compare);
        swap_if(first[6u], first[10u], compare);
        swap_if(first[11u], first[16u], compare);
        swap_if(first[13u], first[16u], compare);
        swap_if(first[6u], first[9u], compare);
        swap_if(first[16u], first[17u], compare);
        swap_if(first[5u], first[6u], compare);
        swap_if(first[4u], first[5u], compare);
        swap_if(first[7u], first[9u], compare);
        swap_if(first[17u], first[18u], compare);
        swap_if(first[12u], first[15u], compare);
        swap_if(first[14u], first[15u], compare);
        swap_if(first[8u], first[12u], compare);
        swap_if(first[7u], first[8u], compare);
        swap_if(first[13u], first[15u], compare);
        swap_if(first[15u], first[17u], compare);
        swap_if(first[5u], first[7u], compare);
        swap_if(first[9u], first[10u], compare);
        swap_if(first[10u], first[14u], compare);
        swap_if(first[6u], first[11u], compare);
        swap_if(first[14u], first[16u], compare);
        swap_if(first[15u], first[16u], compare);
        swap_if(first[6u], first[7u], compare);
        swap_if(first[10u], first[11u], compare);
        swap_if(first[9u], first[12u], compare);
        swap_if(first[11u], first[13u], compare);
        swap_if(first[13u], first[14u], compare);
        swap_if(first[8u], first[9u], compare);
        swap_if(first[7u], first[8u], compare);
        swap_if(first[14u], first[15u], compare);
        swap_if(first[9u], first[10u], compare);
        swap_if(first[8u], first[9u], compare);
        swap_if(first[12u], first[14u], compare);
        swap_if(first[11u], first[12u], compare);
        swap_if(first[12u], first[13u], compare);
        swap_if(first[10u], first[11u], compare);
        swap_if(first[11u], first[12u], compare);
    }
    

    The swap_if utility function compares two parameters x and y with the predicate compare and swaps them if compare(y, x). My example uses a a generic swap_if function, but you can used an optimized version if you known that you will be comparing unsigned short values with operator< anyway (you might not need such a function if your compiler recognizes and optimizes the compare-exchange, but unfortunately, not all compilers do that - I am using g++5.2 with -O3 and I still need the following function for performance):

    void swap_if(unsigned short& x, unsigned short& y)
    {
        unsigned short dx = x;
        unsigned short dy = y;
        unsigned short tmp = x = std::min(dx, dy);
        y ^= dx ^ tmp;
    }
    

    Now, just to make sure that it is indeed faster, I decided to time std::nth_element when required to partial sort only the first 10 elements vs. sorting the whole 23 elements with the sorting network (1000000 times with different shuffled arrays). Here is what I get:

    std::nth_element    1158ms
    network_sort23      487ms
    

    That said, my computer has been running for a bit of time and is a bit slow, but the difference in performance is neat. I believe that this difference will remain the same when I restart my computer. I may try it later and let you know.

    Regarding how these times were generated, I used a modified version of this benchmark from my cpp-sort library. The original sorting network and swap_if functions come from there as well, so you can be sure that they have been tested more than once :)

    EDIT: here are the results now that I have restarted my computer. The network_sort23 version is still two times faster than std::nth_element:

    std::nth_element    369ms
    network_sort23      154ms
    

    EDIT²: if all you need in the median, you can trivially delete the compare-exchange units that are not needed to compute the final value that will be at the 11th position. The resulting median-finding network of size 23 that follows uses a different size-23 sorting network than the previous one, and it yields slightly better results:

    swap_if(first[0u], first[1u], compare);
    swap_if(first[2u], first[3u], compare);
    swap_if(first[4u], first[5u], compare);
    swap_if(first[6u], first[7u], compare);
    swap_if(first[8u], first[9u], compare);
    swap_if(first[10u], first[11u], compare);
    swap_if(first[1u], first[3u], compare);
    swap_if(first[5u], first[7u], compare);
    swap_if(first[9u], first[11u], compare);
    swap_if(first[0u], first[2u], compare);
    swap_if(first[4u], first[6u], compare);
    swap_if(first[8u], first[10u], compare);
    swap_if(first[1u], first[2u], compare);
    swap_if(first[5u], first[6u], compare);
    swap_if(first[9u], first[10u], compare);
    swap_if(first[1u], first[5u], compare);
    swap_if(first[6u], first[10u], compare);
    swap_if(first[5u], first[9u], compare);
    swap_if(first[2u], first[6u], compare);
    swap_if(first[1u], first[5u], compare);
    swap_if(first[6u], first[10u], compare);
    swap_if(first[0u], first[4u], compare);
    swap_if(first[7u], first[11u], compare);
    swap_if(first[3u], first[7u], compare);
    swap_if(first[4u], first[8u], compare);
    swap_if(first[0u], first[4u], compare);
    swap_if(first[7u], first[11u], compare);
    swap_if(first[1u], first[4u], compare);
    swap_if(first[7u], first[10u], compare);
    swap_if(first[3u], first[8u], compare);
    swap_if(first[2u], first[3u], compare);
    swap_if(first[8u], first[9u], compare);
    swap_if(first[2u], first[4u], compare);
    swap_if(first[7u], first[9u], compare);
    swap_if(first[3u], first[5u], compare);
    swap_if(first[6u], first[8u], compare);
    swap_if(first[3u], first[4u], compare);
    swap_if(first[5u], first[6u], compare);
    swap_if(first[7u], first[8u], compare);
    swap_if(first[12u], first[13u], compare);
    swap_if(first[14u], first[15u], compare);
    swap_if(first[16u], first[17u], compare);
    swap_if(first[18u], first[19u], compare);
    swap_if(first[20u], first[21u], compare);
    swap_if(first[13u], first[15u], compare);
    swap_if(first[17u], first[19u], compare);
    swap_if(first[12u], first[14u], compare);
    swap_if(first[16u], first[18u], compare);
    swap_if(first[20u], first[22u], compare);
    swap_if(first[13u], first[14u], compare);
    swap_if(first[17u], first[18u], compare);
    swap_if(first[21u], first[22u], compare);
    swap_if(first[13u], first[17u], compare);
    swap_if(first[18u], first[22u], compare);
    swap_if(first[17u], first[21u], compare);
    swap_if(first[14u], first[18u], compare);
    swap_if(first[13u], first[17u], compare);
    swap_if(first[18u], first[22u], compare);
    swap_if(first[12u], first[16u], compare);
    swap_if(first[15u], first[19u], compare);
    swap_if(first[16u], first[20u], compare);
    swap_if(first[12u], first[16u], compare);
    swap_if(first[13u], first[16u], compare);
    swap_if(first[19u], first[22u], compare);
    swap_if(first[15u], first[20u], compare);
    swap_if(first[14u], first[15u], compare);
    swap_if(first[20u], first[21u], compare);
    swap_if(first[14u], first[16u], compare);
    swap_if(first[19u], first[21u], compare);
    swap_if(first[15u], first[17u], compare);
    swap_if(first[18u], first[20u], compare);
    swap_if(first[15u], first[16u], compare);
    swap_if(first[17u], first[18u], compare);
    swap_if(first[19u], first[20u], compare);
    swap_if(first[0u], first[12u], compare);
    swap_if(first[2u], first[14u], compare);
    swap_if(first[4u], first[16u], compare);
    swap_if(first[6u], first[18u], compare);
    swap_if(first[8u], first[20u], compare);
    swap_if(first[10u], first[22u], compare);
    swap_if(first[2u], first[12u], compare);
    swap_if(first[10u], first[20u], compare);
    swap_if(first[4u], first[12u], compare);
    swap_if(first[6u], first[14u], compare);
    swap_if(first[8u], first[16u], compare);
    swap_if(first[10u], first[18u], compare);
    swap_if(first[8u], first[12u], compare);
    swap_if(first[10u], first[14u], compare);
    swap_if(first[10u], first[12u], compare);
    swap_if(first[1u], first[13u], compare);
    swap_if(first[3u], first[15u], compare);
    swap_if(first[5u], first[17u], compare);
    swap_if(first[7u], first[19u], compare);
    swap_if(first[9u], first[21u], compare);
    swap_if(first[3u], first[13u], compare);
    swap_if(first[11u], first[21u], compare);
    swap_if(first[5u], first[13u], compare);
    swap_if(first[7u], first[15u], compare);
    swap_if(first[9u], first[17u], compare);
    swap_if(first[11u], first[19u], compare);
    swap_if(first[9u], first[13u], compare);
    swap_if(first[11u], first[15u], compare);
    swap_if(first[11u], first[13u], compare);
    swap_if(first[11u], first[12u], compare);
    

    There are probably smarter ways to generate median-finding networks, but I don't think that extensive research has been done on the subject. Therefore, it's probably the best method you can use as of now. The result isn't awesome but it still uses 104 compare-exchange units instead of 118.

提交回复
热议问题