Thrust Sort by key on the fly or different approach?

前端 未结 1 1235
南方客
南方客 2021-01-21 12:01

I was wondering if it is possible to sort by keys using Thrust Library without the need of creating a Vector to store the keys (on the fly). For example I have the following two

相关标签:
1条回答
  • 2021-01-21 12:37

    The original thrust example you linked performed a row-sum on an underlying data set that had row-major storage. Your question is essentially how to do the same thing when the underlying storage is column-major.

    We can use essentially the same method, but we must use permutation iterators to convert the underlying column-major storage to row-major storage "on the fly".

    For this, we can borrow the functor I described here.

    Here is a fully worked example:

    $ cat t466.cu
    #include <thrust/host_vector.h>
    #include <thrust/device_vector.h>
    #include <thrust/reduce.h>
    #include <thrust/functional.h>
    #include <thrust/sequence.h>
    #include <thrust/iterator/transform_iterator.h>
    #include <thrust/iterator/permutation_iterator.h>
    #include <thrust/iterator/counting_iterator.h>
    #include <iostream>
    
    #define COLS 3
    #define ROWS 3
    #define DSIZE (COLS*ROWS)
    #define INIT 10
    #define STEP 10
    
    // convert a linear index to a row index
    template <typename T>
    struct linear_index_to_row_index : public thrust::unary_function<T,T>
    {
      T C; // number of columns
    
      __host__ __device__
      linear_index_to_row_index(T C) : C(C) {}
    
      __host__ __device__
      T operator()(T i)
      {
        return i % C;
      }
    };
    
    struct rm2cm_idx_functor : public thrust::unary_function<int, int>
    {
      int r;
      int c;
    
      rm2cm_idx_functor(int _r, int _c) : r(_r), c(_c) {};
    
      __host__ __device__
      int operator() (int idx)  {
        unsigned my_r = idx/c;
        unsigned my_c = idx%c;
        return (my_c * r) + my_r;
      }
    };
    
    
    int main(void)
    {
      int C = COLS;     // number of columns
      int R = ROWS;     // number of rows
      thrust::host_vector<int> h_vals(DSIZE);
      // initialize data
      thrust::sequence(h_vals.begin(), h_vals.end(), INIT, STEP);
      thrust::device_vector<int> vals = h_vals;
      std::cout << " Initial data: " << std::endl;
      thrust::copy(h_vals.begin(), h_vals.end(), std::ostream_iterator<int>(std::cout, ","));
      std::cout << std::endl;
      // allocate storage for row sums and indices
      thrust::device_vector<int> row_sums(R);
      thrust::device_vector<int> row_indices(R);
    
      // compute row sums by summing values with equal row indices
      thrust::reduce_by_key
        (thrust::make_permutation_iterator(thrust::make_transform_iterator(thrust::counting_iterator<int>(0), linear_index_to_row_index<int>(R)), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), rm2cm_idx_functor(R, C))),
         thrust::make_permutation_iterator(thrust::make_transform_iterator(thrust::counting_iterator<int>(0), linear_index_to_row_index<int>(R)) + (R*C), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), rm2cm_idx_functor(R, C)) + (R*C)),
         thrust::make_permutation_iterator(vals.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), rm2cm_idx_functor(R, C))),
         row_indices.begin(),
         row_sums.begin(),
         thrust::equal_to<int>(),
         thrust::plus<int>());
    
      // print data
      thrust::host_vector<int> h_row_sums = row_sums;
      std::cout << " Results: " << std::endl;
      thrust::copy(h_row_sums.begin(), h_row_sums.end(), std::ostream_iterator<int>(std::cout, ","));
      std::cout << std::endl;
      return 0;
    }
    
    $ nvcc -arch=sm_20 -o t466 t466.cu
    $ ./t466
     Initial data:
    10,20,30,40,50,60,70,80,90,
     Results:
    120,150,180,
    $
    

    Note that I have also changed the linear_index_to_row_index functor to give me a row index suitably organized for underlying column-major storage (the previous functor returned the index when the underlying storage was assumed to be row-major). This only involved changing the division operation to a modulo operation and pass R instead of C to initialize the functor, so note the subtle difference.

    0 讨论(0)
提交回复
热议问题