Why is `std::copy` 5x (!) slower than `memcpy` for reading one int from a char buffer, in my test program?

前端 未结 6 1745
名媛妹妹
名媛妹妹 2021-02-05 14:31

This is a follow-up to this question where I posted this program:

#include 
#include 
#include 
#include 

        
6条回答
  •  旧巷少年郎
    2021-02-05 15:24

    I agree with @rici's comment about developing a more meaningful benchmark so I rewrote your test to benchmark copying of two vectors using memcpy(), memmove(), std::copy() and the std::vector assignment operator:

    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    
    typedef std::vector vector_type;
    
    void test_memcpy(vector_type & destv, vector_type const & srcv)
    {
        vector_type::pointer       const dest = destv.data();
        vector_type::const_pointer const src  = srcv.data();
    
        std::memcpy(dest, src, srcv.size() * sizeof(vector_type::value_type));
    }
    
    void test_memmove(vector_type & destv, vector_type const & srcv)
    {
        vector_type::pointer       const dest = destv.data();
        vector_type::const_pointer const src  = srcv.data();
    
        std::memmove(dest, src, srcv.size() * sizeof(vector_type::value_type));
    }
    
    void test_std_copy(vector_type & dest, vector_type const & src)
    {
        std::copy(src.begin(), src.end(), dest.begin());
    }
    
    void test_assignment(vector_type & dest, vector_type const & src)
    {
        dest = src;
    }
    
    auto
    benchmark(std::function copy_func)
        ->decltype(std::chrono::milliseconds().count())
    {
        std::random_device rd;
        std::mt19937 generator(rd());
        std::uniform_int_distribution distribution;
    
        static vector_type::size_type const num_elems = 2000;
    
        vector_type dest(num_elems);
        vector_type src(num_elems);
    
        // Fill the source and destination vectors with random data.
        for (vector_type::size_type i = 0; i < num_elems; ++i) {
            src.push_back(distribution(generator));
            dest.push_back(distribution(generator));
        }
    
        static int const iterations = 50000;
    
        std::chrono::time_point start, end;
    
        start = std::chrono::system_clock::now();
    
        for (int i = 0; i != iterations; ++i)
            copy_func(dest, src);
    
        end = std::chrono::system_clock::now();
    
        assert(src == dest);
    
        return
            std::chrono::duration_cast(
                end - start).count();
    }
    
    int main()
    {
        std::cout
            << "memcpy:     " << benchmark(test_memcpy)     << " ms" << std::endl
            << "memmove:    " << benchmark(test_memmove)    << " ms" << std::endl
            << "std::copy:  " << benchmark(test_std_copy)   << " ms" << std::endl
            << "assignment: " << benchmark(test_assignment) << " ms" << std::endl
            << std::endl;
    }
    

    I went a little overboard with C++11 just for fun.

    Here are the results I get on my 64 bit Ubuntu box with g++ 4.6.3:

    $ g++ -O3 -std=c++0x foo.cpp ; ./a.out 
    memcpy:     33 ms
    memmove:    33 ms
    std::copy:  33 ms
    assignment: 34 ms
    

    The results are all quite comparable! I get comparable times in all test cases when I change the integer type, e.g. to long long, in the vector as well.

    Unless my benchmark rewrite is broken, it looks like your own benchmark isn't performing a valid comparison. HTH!

提交回复
热议问题