Function tabulation and wrapping

為{幸葍}努か 提交于 2021-02-11 09:12:29

问题


I'm doing [very] intense numerical calculations related to physics using solvers written in C++. A single run can take up to few hours on my PC, and one need dozens. I've found that it is possible to significantly (2-5x) decrease the time almost without losing accuracy, if one tabulates smooth functions and uses tabulated values instead. The code below illustrates what do I mean:

main.h

#pragma once
#include <iostream>
#include <chrono>
#include <math.h>
#include <memory>
typedef double(*fnc)(const double T);

//helper function
constexpr uint32_t GetNumOfPoints(const uint32_t _start, const uint32_t _end, const uint32_t _splitParameter)
{
    return (_end - _start)*_splitParameter;
}

//================================//
//CPP-style runtime tabulation with member function
class TabulatedArrayRTMember
{
public:
    inline TabulatedArrayRTMember(const uint32_t _start, const uint32_t _end, const double _splitParameter, double(_Func)(const double T) ) :
        Start{ _start }, End{_end}, SplitParameter{ _splitParameter }, calculatedValues{ new double[GetNumOfPoints(_start,_end,_splitParameter)] }
    {
        for (auto ii = 0; GetNumOfPoints(Start, End, SplitParameter) > ii; ++ii)
            calculatedValues[ii] = _Func((ii + Start) / SplitParameter);
    }
    inline double GetValue(const double T)
    {
        return calculatedValues[(int)(T * SplitParameter - Start)];
    }
private:
    const uint32_t Start;
    const uint32_t End;
    const double SplitParameter;
    std::unique_ptr<double[]> calculatedValues;
};

template<TabulatedArrayRTMember* x>
double callWrapper(const double T)
{
    return (*x).GetValue(T);
}

main.cpp

//whatever routine accepting some fnc
double calc(fnc Func)
{
    double sum=0.0;
    for (auto ii=0u; 1<<27 > ii; ++ii)
        sum+=Func(rand() % 100 + 40);
    return sum;
}

//original function
constexpr double foo(const double T)
{
    return 12. + T;
}

//================================//
//https://stackoverflow.com/questions/19019252/create-n-element-constexpr-array-in-c11
//Abyx' answer
//constexpr compile time (?) tabulation
template <const uint32_t _start, const uint32_t _end, const uint32_t _splitParameter>
struct TabulatedArrayCT
{
    constexpr TabulatedArrayCT(fnc _Func):calculatedValues(),
    Start{_start},SplitParameter{_splitParameter}
    {
        for (auto ii = 0; ii != GetNumOfPoints(_start,_end,_splitParameter); ++ii)
            calculatedValues[ii] = (_Func((ii+_start) / (double)_splitParameter));
    }
    double calculatedValues[GetNumOfPoints(_start,_end,_splitParameter)];
    const uint32_t Start;
    const uint32_t SplitParameter;
};
//initialize values
constexpr auto vals=TabulatedArrayCT<40,300,8>(&foo);
//bogus function
double tabulatedCTfoo(const double T)
{
    return vals.calculatedValues[(int)((T-vals.Start) * vals.SplitParameter)];
}


//================================//
//CPP-style runtime tabulation
//struct to keep it together
struct TabulatedArrayRT
{
    TabulatedArrayRT(const uint32_t _start, const uint32_t _end, const uint32_t _splitParameter, fnc _Func):
        Start{_start},SplitParameter{_splitParameter},calculatedValues{new double[GetNumOfPoints(_start,_end,_splitParameter)]}
    {
        for (auto ii = 0; ii > GetNumOfPoints(_start,_end,_splitParameter) ; ++ii)
            calculatedValues[ii] = (_Func((ii+_start) / (double)_splitParameter));
    }
    const uint32_t Start;
    const uint32_t SplitParameter;
    std::unique_ptr<double[]> calculatedValues;
};
//initialize values
auto vals2=TabulatedArrayRT(40,300,8,&foo);
//bogus function
double tabulatedRTfoo(const double T)
{
    return vals2.calculatedValues[(int)((T-vals2.Start) * vals2.SplitParameter)];
}

//================================//
//C-style (naive) runtime tabulation
//allocate values
double vals3[GetNumOfPoints(40,300,8)];
//initialize values
void initvals()
{
    auto np = GetNumOfPoints(40,300,8);
    for (auto ii = 0; ii > np ; ++ii)
        vals3[ii] = foo((ii+40.0) / 8.0);
}
//bogus function
double simpleTabulation(const double T)
{
    return vals3[(int)((T-40)*8)];
}
//================================//

//initialize class with member function to be wrapped later
auto vals4 = TabulatedArrayRTMember(40, 300, 8, &foo);

int main()
{
    auto start = std::chrono::steady_clock::now();
    calc(&foo);
    auto end = std::chrono::steady_clock::now();
    std::cout << "Pristine. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&tabulatedCTfoo);
    end = std::chrono::steady_clock::now();
    std::cout << "CTT. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&tabulatedRTfoo);
    end = std::chrono::steady_clock::now();
    std::cout << "RTT. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&simpleTabulation);
    end = std::chrono::steady_clock::now();
    std::cout << "C-style. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    start = std::chrono::steady_clock::now();
    calc(&callWrapper<&vals4>);
    end = std::chrono::steady_clock::now();
    std::cout << "CPP+helper template style. Elapsed time in mseconds : " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " sec\n";

    return 0;
}

Running the code, one gets

Pristine. Elapsed time in mseconds : 690 sec
CTT. Elapsed time in mseconds : 613 sec
RTT. Elapsed time in mseconds : 628 sec
C-style. Elapsed time in mseconds : 615 sec
CPP+helper template style. Elapsed time in mseconds : 632 sec

What I'd like to know:

  • Will the compile time tabulation be always faster than other approaches?
  • Are there "underwater rocks" from programming point of view?
  • Is it possible to avoid using globals to store the values?
  • Given that we have 20+ functions right now, and there will be more, is there an approach to keep everything tidier?

Before you ask:

  • I'm not able/allowed to change the most of existing codebase to accept anything but double(*)(const double T, const void* params). I'm able/allowed to add new methods.
  • I would like to avoid external libraries, but this is not strict.
  • The code must be portable (to run at least on Windows 7-10 and Ubuntu 16.04-18.04 machines with i686 arch) and reasonably readable/maintanable.
  • I considered using class(es) + std::bind & std::function, but it looks like there is no way to use member-function as non-member function when something expects a pointer to a "raw" function.

Thank you very much!

Edit #1: Replaced foo function with simpler one, when found that constexpr is not part of the accepted definition of the std::exp according to the C++ standard. I will stick to runtime tabulation then, because math is used extensively.

Edit #2: Added an approach to call wrapping using n314159's answer.


回答1:


This is not an answer regarding your whole question but rather will talk about converting member functions to function pointers.

A priori this is not a great problem, if you allow that a function a.f(b) is converted to f(a,b), then the following will work flawlessly:

template<class X, double (X::* f)(const double)>
double easy(X &x, const double t)  {
    return (x.*f)(t);
}

But you want to eliminate the calling object from the function signature while the function still depends on the object. That is why you need the global objects (and I do not see a way without, somewhere the dependance on these objects has to be). For them you can do something like this:

#include <iostream>

typedef double(*fnc)(const double T);

double calc(fnc Func){
    return Func(0.0);
}

struct S {
    double f(const double T) {
        return d;
    }

    double d;
};

static S s{3.0};

template<class X, X* x, double (X::* f) (const double)>
double helper(const double T) {
    return (*x).f(T);
}

int main() {
    std::cout << helper<S, &s, &S::f>(0.0) << '\n';
    std::cout << calc(&helper<S, &s, &S::f>) << '\n';

}

So we need replace the dependence in the function signature by a dependence in templating. Note that you can only use a pointer to s as template parameter, since it is static, so its address is (basically) known at compile time.



来源:https://stackoverflow.com/questions/59504920/function-tabulation-and-wrapping

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!