Using openMP to get the index of minimum element parallelly

前端未结

关注

 3  1540

I tried to write this code

float* theArray; // the array to find the minimum value
int   index, i;
float thisValue, min;

index    = 0;
min = theArray[0];
#pragm


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  旧巷少年郎        
                
              
                            
                2021-02-06 10:16
              
            
            
                                                                       
Because you're not only trying to find the minimal value (reduction(min:___)) but also retain the index, you need to make the check critical. This can significantly slow down the loop (as reported). In general, make sure that there is enough work so you don't encounter overhead as in this question. An alternative would be to have each thread find the minimum and it's index and save them to a unique variable and have the master thread do a final check on those as in the following program.

#include <iostream>
#include <vector>
#include <ctime>
#include <random>
#include <omp.h>

using std::cout;
using std::vector;

void initializeVector(vector<double>& v)
{
    std::mt19937 generator(time(NULL));
    std::uniform_real_distribution<double> dis(0.0, 1.0);
    v.resize(100000000);
    for(int i = 0; i < v.size(); i++)
    {
        v[i] = dis(generator);
    }
}

int main()
{
    vector<double> vec;
    initializeVector(vec);

    float minVal = vec[0];
    int minInd = 0;

    int startTime = clock();

    for(int i = 1; i < vec.size(); i++)
    {
        if(vec[i] < minVal)
        {
            minVal = vec[i];
            minInd = i;
        }

    }

    int elapsedTime1 = clock() - startTime;

    // Change the number of threads accordingly
    vector<float> threadRes(4, std::numeric_limits<float>::max());
    vector<int>   threadInd(4);

    startTime = clock();
#pragma omp parallel for
    for(int i = 0; i < vec.size(); i++)
    {
        {
            if(vec[i] < threadRes[omp_get_thread_num()])
            {
                threadRes[omp_get_thread_num()] = vec[i];
                threadInd[omp_get_thread_num()] = i;
            }
        }

    }

    float minVal2 = threadRes[0];
    int minInd2 = threadInd[0];

    for(int i = 1; i < threadRes.size(); i++)
    {
        if(threadRes[i] < minVal2)
        {
            minVal2 = threadRes[i];
            minInd2 = threadInd[i];
        }
    }

    int elapsedTime2 = clock() - startTime;

    cout << "Min " << minVal << " at " << minInd << " took " << elapsedTime1 << std::endl;
    cout << "Min " << minVal2 << " at " << minInd2 << " took " << elapsedTime2 << std::endl;
}


Please note that with optimizations on and nothing else to be done in the loop, the serial version seems to remain king. With optimizations turned off, OMP gains the upper hand.

P.S. you wrote reduction(min:min_dist) and the proceeded to use min instead of min_dist.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  一向        
                
              
                            
                2021-02-06 10:20
              
            
            
                                                                       
I don't know of an elegant want to do a minimum reduction and save an index. I do this by finding the local minimum and index for each thread and then the global minimum and index in a critical section.

index = 0;
min = theArray[0];
#pragma omp parallel
{
    int index_local = index;
    float min_local = min;  
    #pragma omp for nowait
    for (i = 1; i < size; i++) {        
        if (theArray[i] < min_local) {
            min_local = theArray[i];
            index_local = i;
        }
    }
    #pragma omp critical 
    {
        if (min_local < min) {
            min = min_local;
            index = index_local;
        }
    }
}


With OpenMP 4.0 it's possible to use user-defined reductions. A user-defined minimum reduction can be defined like this

struct Compare { float val; sizt_t index; };    
#pragma omp declare reduction(minimum : struct Compare : omp_out = omp_in.val < omp_out.val ? omp_in : omp_out)


Then the reduction can be done like this

struct Compare min; 
min.val = theArray[0]; 
min.index = 0;
#pragma omp parallel for reduction(minimum:min)
for(int i = 1; i<size; i++) {
    if(theArray[i]<min.val) { 
        min.val = a[i];
        min.index = i;
    }
}


That works for C and C++. User defined reductions have other advantages besides simplified code. There are multiple algorithms for doing reductions. For example the merging can be done in O(number of threads) or O(Log(number of threads). The first solution I gave does this in O(number of threads) however using user-defined reductions let's OpenMP choose the algorithm.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  无人共我        
                
              
                            
                2021-02-06 10:27
              
            
            
                                                                       
Basic Idea

This can be accomplished without any parellelization-breaking critical or atomic sections by creating a custom reduction. Basically, define an object that stores both the index and value, and then create a function that sorts two of these objects by only the value, not the index.

Details

An object to store an index and value together:

typedef std::pair<unsigned int, float> IndexValuePair;


You can access the index by accessing the first property and the value by accessing the second property, i.e.,

IndexValuePair obj(0, 2.345);
unsigned int ix = obj.first;  // 0
float val = obj.second; // 2.345


Define a function to sort two IndexValuePair objects:

IndexValuePair myMin(IndexValuePair a, IndexValuePair b){
    return a.second < b.second ? a : b;
}


Then, construct a custom reduction following the guidelines in the OpenMP documentation:

#pragma omp declare reduction \
(minPair:IndexValuePair:omp_out=myMin(omp_out, omp_in)) \
initializer(omp_priv = IndexValuePair(0, 1000))


In this case, I've chosen to initialize the index to 0 and the value to 1000. The value should be initialized to some number larger than the largest value you expect to sort.

Functional Example

Finally, combine all these pieces with the parallel for loop!

// Compile with g++ -std=c++11 -fopenmp demo.cpp
#include <iostream>
#include <utility>
#include <vector>

typedef std::pair<unsigned int, float> IndexValuePair;

IndexValuePair myMin(IndexValuePair a, IndexValuePair b){
    return a.second < b.second ? a : b;
}

int main(){

    std::vector<float> vals {10, 4, 6, 2, 8, 0, -1, 2, 3, 4, 4, 8};
    unsigned int i;

    IndexValuePair minValueIndex(0, 1000);

    #pragma omp declare reduction \
        (minPair:IndexValuePair:omp_out=myMin(omp_out, omp_in)) \
        initializer(omp_priv = IndexValuePair(0, 1000))

    #pragma omp parallel for reduction(minPair:minValueIndex)
    for(i = 0; i < vals.size(); i++){

        if(vals[i] < minValueIndex.second){
            minValueIndex.first = i;
            minValueIndex.second = vals[i];
        }
    }

    std::cout << "minimum value = " << minValueIndex.second << std::endl;   // Should be -1
    std::cout << "index = " << minValueIndex.first << std::endl;    // Should be 6


    return EXIT_SUCCESS;

}

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复