问题
Library code:
class Resource
{
public:
typedef void (*func_sig)(int, char, double, void*);
//Registration
registerCallback(void* app_obj, func_sig func)
{
_app_obj = app_obj;
_func = func;
}
//Calling when the time comes
void call_app_code()
{
_func(231,'a',432.4234,app_obj);
}
//Other useful methods
private:
void* app_obj;
func_sig _func;
//Other members
};
Application Code:
class App
{
public:
void callme(int, char, double);
//other functions, members;
};
void callHelper(int i, char c, double d, void* app_obj)
{
static_cast<App*>(app_obj)->callme(i,c,d);
}
int main()
{
App a;
Resource r;
r.registercallback(&a, callHelper);
//Do something
}
The above is a minimal implementation of callback mechanism. It is more verbose, doesn't support binding, placeholders etc., like std::function.
If I use a std::function
or boost::function
for the above usecase, will there be any performance drawbacks? This callback is going to be in the very very critical path of a real time application. I heard that boost::function uses virtual functions to do the actual dispatch. Will that be optimized out if there are no binding/placeholders involved?
Update
For those interested in inspecting the assemblies in latest compilers: https://gcc.godbolt.org/z/-6mQvt
回答1:
I wondered myself quite frequently already, so I started writing some very minimal benchmark that attempts to simulate the performance by looped atomic counters for each function-pointer callback version.
Keep in mind, these are bare calls to functions that do only one thing, atomically incrementing its counter;
By checking the generated assembler output you may find out, that a bare C-function pointer loop is compiled into 3 CPU instructions;
a C++11's std::function
call just adds 2 more CPU instructions, thus 5 in our example. As a conclusion: it absolutely doesn't matter what way of function pointer technique you use, the overhead differences are in any case very small.
((Confusing however is that the assigned lambda expression seems to run faster than the others, even than the C-one.))
Compile the example with: clang++ -o tests/perftest-fncb tests/perftest-fncb.cpp -std=c++11 -pthread -lpthread -lrt -O3 -march=native -mtune=native
#include <functional>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
typedef unsigned long long counter_t;
struct Counter {
volatile counter_t bare;
volatile counter_t cxx;
volatile counter_t cxo1;
volatile counter_t virt;
volatile counter_t lambda;
Counter() : bare(0), cxx(0), cxo1(0), virt(0), lambda(0) {}
} counter;
void bare(Counter* counter) { __sync_fetch_and_add(&counter->bare, 1); }
void cxx(Counter* counter) { __sync_fetch_and_add(&counter->cxx, 1); }
struct CXO1 {
void cxo1(Counter* counter) { __sync_fetch_and_add(&counter->cxo1, 1); }
virtual void virt(Counter* counter) { __sync_fetch_and_add(&counter->virt, 1); }
} cxo1;
void (*bare_cb)(Counter*) = nullptr;
std::function<void(Counter*)> cxx_cb;
std::function<void(Counter*)> cxo1_cb;
std::function<void(Counter*)> virt_cb;
std::function<void(Counter*)> lambda_cb;
void* bare_main(void* p) { while (true) { bare_cb(&counter); } }
void* cxx_main(void* p) { while (true) { cxx_cb(&counter); } }
void* cxo1_main(void* p) { while (true) { cxo1_cb(&counter); } }
void* virt_main(void* p) { while (true) { virt_cb(&counter); } }
void* lambda_main(void* p) { while (true) { lambda_cb(&counter); } }
int main()
{
pthread_t bare_thread;
pthread_t cxx_thread;
pthread_t cxo1_thread;
pthread_t virt_thread;
pthread_t lambda_thread;
bare_cb = &bare;
cxx_cb = std::bind(&cxx, std::placeholders::_1);
cxo1_cb = std::bind(&CXO1::cxo1, &cxo1, std::placeholders::_1);
virt_cb = std::bind(&CXO1::virt, &cxo1, std::placeholders::_1);
lambda_cb = [](Counter* counter) { __sync_fetch_and_add(&counter->lambda, 1); };
pthread_create(&bare_thread, nullptr, &bare_main, nullptr);
pthread_create(&cxx_thread, nullptr, &cxx_main, nullptr);
pthread_create(&cxo1_thread, nullptr, &cxo1_main, nullptr);
pthread_create(&virt_thread, nullptr, &virt_main, nullptr);
pthread_create(&lambda_thread, nullptr, &lambda_main, nullptr);
for (unsigned long long n = 1; true; ++n) {
sleep(1);
Counter c = counter;
printf(
"%15llu bare function pointer\n"
"%15llu C++11 function object to bare function\n"
"%15llu C++11 function object to object method\n"
"%15llu C++11 function object to object method (virtual)\n"
"%15llu C++11 function object to lambda expression %30llu-th second.\n\n",
c.bare, c.cxx, c.cxo1, c.virt, c.lambda, n
);
}
}
回答2:
std::function
performs type erasure on the function type and there is more than one way to implement it, so you maybe should add which version of which compiler you are using to get an exact answer.
boost::function
is largely identical to a std::function
and comes with an FAQ entry on call overhead and some general section on performance. Those give some hints on how a function object performs. If this applies in your case, depends on your implementation but numbers shouldn't be significantly different.
回答3:
I run a quick benchmark using Google Benchmark Those are the results:
Run on (4 X 2712 MHz CPU s)
----------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------
RawFunctionPointer 11 ns 11 ns 56000000
StdBind 12 ns 12 ns 64000000
StdFunction 11 ns 11 ns 56000000
Lambda 9 ns 9 ns 64000000
It seems that the most optimal solution is using lambdas (just like user christianparpart mentioned in this thread). The code I used for benchmark can be found below.
#include <benchmark/benchmark.h>
#include <cstdlib>
#include <cstdio>
#include <functional>
static volatile int global_var = 0;
void my_int_func(int x)
{
global_var = x + x + 3;
benchmark::DoNotOptimize(global_var);
benchmark::DoNotOptimize(x);
}
static void RawFunctionPointer(benchmark::State &state)
{
void (*bar)(int) = &my_int_func;
srand (time(nullptr));
for (auto _ : state)
{
bar(rand());
benchmark::DoNotOptimize(my_int_func);
benchmark::DoNotOptimize(bar);
}
}
static void StdFunction(benchmark::State &state)
{
std::function<void(int)> bar = my_int_func;
srand (time(nullptr));
for (auto _ : state)
{
bar(rand());
benchmark::DoNotOptimize(my_int_func);
benchmark::DoNotOptimize(bar);
}
}
static void StdBind(benchmark::State &state)
{
auto bar = std::bind(my_int_func, std::placeholders::_1);
srand (time(nullptr));
for (auto _ : state)
{
bar(rand());
benchmark::DoNotOptimize(my_int_func);
benchmark::DoNotOptimize(bar);
}
}
static void Lambda(benchmark::State &state)
{
auto bar = [](int x) {
global_var = x + x + 3;
benchmark::DoNotOptimize(global_var);
benchmark::DoNotOptimize(x);
};
srand (time(nullptr));
for (auto _ : state)
{
bar(rand());
benchmark::DoNotOptimize(my_int_func);
benchmark::DoNotOptimize(bar);
}
}
BENCHMARK(RawFunctionPointer);
BENCHMARK(StdBind);
BENCHMARK(StdFunction);
BENCHMARK(Lambda);
BENCHMARK_MAIN();
来源:https://stackoverflow.com/questions/14306497/performance-of-stdfunction-compared-to-raw-function-pointer-and-void-this