Matrix inversion in OpenCL

前端 未结 6 1215
终归单人心
终归单人心 2021-02-06 06:39

I am trying to accelerate some computations using OpenCL and part of the algorithm consists of inverting a matrix. Is there any open-source library or freely available code to c

相关标签:
6条回答
  • 2021-02-06 07:16

    I make calculus up to 2k x 2k at CPU using multithread using eigen lib, so it is now 3.5-3.65 times faster (depends of matrix size) than using one thread. I used an Intel Xeon 3.5Ghz E5-1620 v3 processor and 16Gb ram. (Unfortunately I deleted the old version to add exact values, but if has some priority I could rewrite the sw)

    This is my matrix inverse algorithm I used to compare with. (It is correct as I stated making a lot of tests again excel result):

    /*
    Uses 2D arrays.
    Main routines are:
    init_2Dvector() that initializes any 2d vector (can be uchar, char, int, float or double)
    multiply_2Dvector()
    inverse()
    */
    
    #include<iostream>
    #include <vector>
    #include <stdlib.h>
    #include <time.h>
    
    using namespace std;
    
    /*
    void print_2Dvector(vector<vector<double> >& vec)
    {
        size_t xmax, ymax;
        ymax = vec.size();
        xmax = vec[0].size(); 
    
        int x, y;
        for (y = 0; y < ymax; y++)
        {
            for (x = 0; x < xmax; x++)
                cout << vec[y][x] << " \t";
            cout << endl;
        }
    }*/
    
    void print_2Dvector(vector<vector<double> >& vec,char *format="%lg \t")
    {
        size_t xmax, ymax;
        ymax = vec.size();
        xmax = vec[0].size();
    
        int x, y;
        for (y = 0; y < ymax; y++)
        {
            {
                for (x = 0; x < xmax; x++)
                    printf(format, vec[y][x]);
            }
            cout << endl;
        }
    }
    
    //Resizes to y_dim,x_dim any kind of 2d array:
    template<typename T>
    void init_2Dvector(vector<vector<T> >& vec, size_t y_dim, size_t x_dim)
    {
        vec.resize(y_dim);
        for (size_t i = 0; i < vec.size(); i++)
            vec[i].resize(x_dim);
    }
    //Returns vec1*vec2. vec1 and 2 are not touch
    vector< vector<double> > multiply_2Dvector(vector< vector<double> > vec1, vector< vector<double> > vec2)
    {
        size_t xmax, ymax;
        ymax = vec1.size();   
        xmax = vec1[0].size();
        vector< vector<double> > vec3;
        if ((ymax != vec2[0].size()) || (xmax != vec2.size()))
        {
            cout << "ERROR on dim2_multiply() dimensions of vec2 not corresponding with vec1 ones" << endl; getchar(); return{};//returns a null
        }
        init_2Dvector(vec3, ymax, ymax);
        cout << "dimensions of vec3=" << vec3.size() << " x " << vec3[0].size() << endl;
        double xx;
        for (int y = 0; y < ymax; y++)
            for (int x = 0; x < ymax; x++)
            {
                xx = 0.0;
                for (int t = 0; t < xmax; t++)
                    xx += vec1[y][t] * vec2[t][x];
                vec3[y][x] = xx;
            }
        return vec3;//ok
    }
    
    //returns inverse of x2, x2 is not modified
    vector< vector<double> > inverse(vector< vector<double> > x)
    {
        if (x.size() != x[0].size())
        {
            cout << "ERROR on inverse() not square array" << endl; getchar(); return{};//returns a null
        }
    
        size_t dim = x.size();
        int i, j, ord;
        vector< vector<double> > y(dim,vector<double>(dim));//output
        //init_2Dvector(y, dim, dim);
        //1. Unity array y: 
        for (i = 0; i < dim; i++)
        {
            y[i][i] = 1.0;
            for (j = i+1; j < dim; j++)
            {
                y[i][j]= y[j][i] = 0.0;
            }
        }
    
        double diagon, coef;
        double *ptrx, *ptry, *ptrx2, *ptry2;
        for (ord = 0; ord<dim; ord++)
        {
            //2 Hacemos diagonal de x =1
            int i2;
            if (fabs(x[ord][ord])<1e-15) //Si el elemento diagonal es 0 sumamos una columna que no sea 0 el elemento correspondiente
            {
                for (i2 = ord + 1; i2<dim; i2++)
                {
                    if (fabs(x[i2][ord])>1e-15) break;
                }
                if (i2 >= dim)
                    return{};//error, returns null
                for (i = 0; i<dim; i++)//sumo la linea que no es 0 el de la misma fila de ord
                {
                    x[ord][i] += x[i2][i];
                    y[ord][i] += y[i2][i];
                }
            }
            diagon = 1.0/x[ord][ord];
            ptry = &y[ord][0];
            ptrx = &x[ord][0];
            for (i = 0; i < dim; i++)
            {
                *ptry++ *= diagon;
                *ptrx++ *= diagon;
            }
    
            //Hacemos '0' la columna ord salvo elemento diagonal:
            for (i = 0; i<dim; i++)//Empezamos por primera fila
            {
                if (i == ord) continue;
                coef = x[i][ord];//elemento ha hacer 0 
                if (fabs(coef)<1e-15) continue; //si es cero se evita
                ptry = &y[i][0];
                ptry2 = &y[ord][0];
                ptrx = &x[i][0];
                ptrx2 = &x[ord][0];
                for (j = 0; j < dim; j++)
                {
                    *ptry++ = *ptry - coef * (*ptry2++);//1ª matriz
                    *ptrx++ = *ptrx - coef * (*ptrx2++);//2ª matriz
                }
            }
        }//end ord
        return y;
    }
    
    
    void test_5_inverse()
    {
        vector< vector<double> > vec1 = {
            {0,-5,0,7,33,11,-1},
            {72,0,-11,7,9,33,5 },
            {-13,31,-5,15,29,30,24 },
            {-24,9,8,-23,31,-12,4 },
            {-3,-22,4,-24,-5,27,-10 },
            {-10,-21,-16,-32,-11,20,14 },
            {5,30,13,-32,29,-13,-13 }
        };
        vector< vector<double> > vec2;
        vec2 = inverse(vec1);
        vector< vector<double> > vec3;
        vec3 = multiply_2Dvector(vec1, vec2);
    
        cout << "initial array (must be unmodified):" << endl;
        print_2Dvector(vec1);
    
        cout << "Must be diagon array:" << endl;
        print_2Dvector(vec3," %8.3lf");
        cout << endl;
    }
    
    
    void test_6_inverse(int dim)
    {
        vector< vector<double> > vec1(dim, vector<double>(dim));
        for (int i=0;i<dim;i++)
            for (int j = 0; j < dim; j++)
            {
                vec1[i][j] = (-1.0 + 2.0*rand() / RAND_MAX) * 10000;
            }
    
        vector< vector<double> > vec2;
        double ini, end;
        ini = (double)clock();
        vec2 = inverse(vec1);
        end = (double)clock();
        cout << "Time inverse =" << (end - ini) / CLOCKS_PER_SEC << endl;
        vector< vector<double> > vec3;
        vec3 = multiply_2Dvector(vec1, vec2);
    
        cout << "initial array (must be unmodified):" << endl;
        //print_2Dvector(vec1);
    
        cout << "Must be diagon array:" << endl;
        //print_2Dvector(vec3, " %8.3lf");
        cout << endl;
    }
    
    int main()
    {
        vector< vector<double> > vec1;
        init_2Dvector(vec1, 10, 5);    //size_t ymax = vec1.size(),xmax = vec1[0].size();
        //test_2_dimension_vector();
        //test_one_dimension_vector();
        test_5_inverse();
        test_6_inverse(1300);
        cout << endl << "=== END ===" << endl; getchar(); 
        return 1;
    }
    
    0 讨论(0)
  • 2021-02-06 07:21

    I know this is kind of late, but if you are trying to do any matrix calculations on a matrix that is that small (60-100 rows), then the computations are going to be much faster on a CPU rather than a GPU because of the time it takes to copy the information from main memory to the GPU's memory. If you are wanting to do this, then I would suggest looking into using a parallel language such as OpenMP or MPI as these would allow you to parallelize your code to speed up the calculations on the CPU.

    0 讨论(0)
  • 2021-02-06 07:26

    The original question (now 7 years old) actually was solved 4 years later in a paper describing matrix inversion in CUDA based on Gauss-Jordan. It attempts to distribute the calculations across different threads, and gives detailed performance indications for matrices up to 2048 in size.

    While not OpenCL, the general ideas will translate from CUDA quite easily.

    0 讨论(0)
  • 2021-02-06 07:34

    Look at ViennaCL: http://viennacl.sourceforge.net/

    0 讨论(0)
  • 2021-02-06 07:38

    I don't have an implementation in Open CL, but both "Numerical Recipes" and Gil Strang's "Into to Applied Math" have wonderful explanations that would be easy to code. "NR" has C code that you could adapt.

    calculate the inverse

    This is incorrect. You aren't calculating an inverse with LU decomposition, you're decomposing the matrix. If you wanted the inverse, you'd have to do forward back substitution with a series of unit vectors. It's a small but important difference.

    0 讨论(0)
  • 2021-02-06 07:40

    Check CULA

    http://www.culatools.com/ http://www.culatools.com/versions/basic

    0 讨论(0)
提交回复
热议问题