Function tabulation and wrapping

问题

I'm doing [very] intense numerical calculations related to physics using solvers written in C++. A single run can take up to few hours on my PC, and one need dozens. I've found that it is possible to significantly (2-5x) decrease the time almost without losing accuracy, if one tabulates smooth functions and uses tabulated values instead. The code below illustrates what do I mean:

main.h

#pragma once
#include <iostream>
#include <chrono>
#include <math.h>
#include <memory>
typedef double(*fnc)(const double T);

//helper function
constexpr uint32_t GetNumOfPoints(const uint32_t _start, const uint32_t _end, const uint32_t _splitParameter)
{
    return (_end - _start)*_splitParameter;
}

//================================//
//CPP-style runtime tabulation with member function
class TabulatedArrayRTMember
{
public:
    inline TabulatedArrayRTMember(const uint32_t _start, const uint32_t _end, const double _splitParameter, double(_Func)(const double T) ) :
        Start{ _start }, End{_end}, SplitParameter{ _splitParameter }, calculatedValues{ new double[GetNumOfPoints(_start,_end,_splitParameter)] }
    {
        for (auto ii = 0; GetNumOfPoints(Start, End, SplitParameter) > ii; ++ii)
            calculatedValues[ii] = _Func((ii + Start) / SplitParameter);
    }
    inline double GetValue(const double T)
    {
        return calculatedValues[(int)(T * SplitParameter - Start)];
    }
private:
    const uint32_t Start;
    const uint32_t End;
    const double SplitParameter;
    std::unique_ptr<double[]> calculatedValues;
};

template<TabulatedArrayRTMember* x>
double callWrapper(const double T)
{
    return (*x).GetValue(T);
}

main.cpp

//whatever routine accepting some fnc
double calc(fnc Func)
{
    double sum=0.0;
    for (auto ii=0u; 1<<27 > ii; ++ii)
        sum+=Func(rand() % 100 + 40);
    return sum;
}

//original function
constexpr double foo(const double T)
{
    return 12. + T;
}

//================================//
//https://stackoverflow.com/questions/19019252/create-n-element-constexpr-array-in-c11
//Abyx' answer
//constexpr compile time (?) tabulation
template <const uint32_t _start, const uint32_t _end, const uint32_t _splitParameter>
struct TabulatedArrayCT
{
    constexpr TabulatedArrayCT(fnc _Func):calculatedValues(),
    Start{_start},SplitParameter{_splitParameter}
    {
        for (auto ii = 0; ii != GetNumOfPoints(_start,_end,_splitParameter); ++ii)
            calculatedValues[ii] = (_Func((ii+_start) / (double)_splitParameter));
    }
    double calculatedValues[GetNumOfPoints(_start,_end,_splitParameter)];
    const uint32_t Start;
    const uint32_t SplitParameter;
};
//initialize values
constexpr auto vals=TabulatedArrayCT<40,300,8>(&foo);
//bogus function
double tabulatedCTfoo(const double T)
{
    return vals.calculatedValues[(int)((T-vals.Start) * vals.SplitParameter)];
}


//================================//
//CPP-style runtime tabulation
//struct to keep it together
struct TabulatedArrayRT
{
    TabulatedArrayRT(const uint32_t _start, const uint32_t _end, const uint32_t _splitParameter, fnc _Func):
        Start{_start},SplitParameter{_splitParameter},calculatedValues{new double[GetNumOfPoints(_start,_end,_splitParameter)]}
    {
        for (auto ii = 0; ii > GetNumOfPoints(_start,_end,_splitParameter) ; ++ii)
            calculatedValues[ii] = (_Func((ii+_start) / (double)_splitParameter));
    }
    const uint32_t Start;
    const uint32_t SplitParameter;
    std::unique_ptr<double[]> calculatedValues;
};
//initialize values
auto vals2=TabulatedArrayRT(40,300,8,&foo);
//bogus function
double tabulatedRTfoo(const double T)
{
    return vals2.calculatedValues[(int)((T-vals2.Start) * vals2.SplitParameter)];
}

//================================//
//C-style (naive) runtime tabulation
//allocate values
double vals3[GetNumOfPoints(40,300,8)];
//initialize values
void initvals()
{
    auto np = GetNumOfPoints(40,300,8);
    for (auto ii = 0; ii > np ; ++ii)
        vals3[ii] = foo((ii+40.0) / 8.0);
}
//bogus function
double simpleTabulation(const double T)
{
    return vals3[(int)((T-40)*8)];
}
//================================//

//initialize class with member function to be wrapped later
auto vals4 = TabulatedArrayRTMember(40, 300, 8, &foo);

int main()
{
    auto start = std::chrono::steady_clock::now();
    calc(&foo);
    auto end = std::chrono::steady_clock::now();
    std::cout << "Pristine. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&tabulatedCTfoo);
    end = std::chrono::steady_clock::now();
    std::cout << "CTT. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&tabulatedRTfoo);
    end = std::chrono::steady_clock::now();
    std::cout << "RTT. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&simpleTabulation);
    end = std::chrono::steady_clock::now();
    std::cout << "C-style. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&callWrapper<&vals4>);
    end = std::chrono::steady_clock::now();
    std::cout << "CPP+helper template style. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    return 0;
}

Running the code, one gets

Pristine. Elapsed time in mseconds : 690 sec
CTT. Elapsed time in mseconds : 613 sec
RTT. Elapsed time in mseconds : 628 sec
C-style. Elapsed time in mseconds : 615 sec
CPP+helper template style. Elapsed time in mseconds : 632 sec

What I'd like to know:

Will the compile time tabulation be always faster than other approaches?
Are there "underwater rocks" from programming point of view?
Is it possible to avoid using globals to store the values?
Given that we have 20+ functions right now, and there will be more, is there an approach to keep everything tidier?

Before you ask:

I'm not able/allowed to change the most of existing codebase to accept anything but double(*)(const double T, const void* params). I'm able/allowed to add new methods.
I would like to avoid external libraries, but this is not strict.
The code must be portable (to run at least on Windows 7-10 and Ubuntu 16.04-18.04 machines with i686 arch) and reasonably readable/maintanable.
I considered using class(es) + std::bind & std::function, but it looks like there is no way to use member-function as non-member function when something expects a pointer to a "raw" function.

Thank you very much!

Edit #1: Replaced foo function with simpler one, when found that constexpr is not part of the accepted definition of the std::exp according to the C++ standard. I will stick to runtime tabulation then, because math is used extensively.

Edit #2: Added an approach to call wrapping using n314159's answer.

回答1:

This is not an answer regarding your whole question but rather will talk about converting member functions to function pointers.

A priori this is not a great problem, if you allow that a function a.f(b) is converted to f(a,b), then the following will work flawlessly:

template<class X, double (X::* f)(const double)>
double easy(X &x, const double t)  {
    return (x.*f)(t);
}

But you want to eliminate the calling object from the function signature while the function still depends on the object. That is why you need the global objects (and I do not see a way without, somewhere the dependance on these objects has to be). For them you can do something like this:

#include <iostream>

typedef double(*fnc)(const double T);

double calc(fnc Func){
    return Func(0.0);
}

struct S {
    double f(const double T) {
        return d;
    }

    double d;
};

static S s{3.0};

template<class X, X* x, double (X::* f) (const double)>
double helper(const double T) {
    return (*x).f(T);
}

int main() {
    std::cout << helper<S, &s, &S::f>(0.0) << '\n';
    std::cout << calc(&helper<S, &s, &S::f>) << '\n';

}

So we need replace the dependence in the function signature by a dependence in templating. Note that you can only use a pointer to s as template parameter, since it is static, so its address is (basically) known at compile time.

来源：https://stackoverflow.com/questions/59504920/function-tabulation-and-wrapping

标签

c++

performance

optimization