Effectiveness of GCC optmization on bit operations

后端未结

关注

 5  1478

暗喜 2021-02-19 14:25

Here are two ways to set an individual bit in C on x86-64:

inline void SetBitC(long *array, int bit) {
   //Pure C version
   *array |= 1<


      
      
        
          5条回答        

        
                    
            
            
                         
                
              
              
                
                   Happy的楠姐
                                             
                
                
                (楼主)
            
              
              
                2021-02-19 15:01
              

            
            
                        
For such code:

#include 
#include 

int main() {
  volatile long long i = 0;
  time_t start = time (NULL);
  for (long long n = 0; n < (1LL << 32); n++) {
    i |= 1 << 10;
  }
  time_t end = time (NULL);
  printf("C took %ds\n", (int)(end - start));
  start = time (NULL);
  for (long long n = 0; n < (1LL << 32); n++) {
    __asm__ ("bts %[bit], %[i]"
                  : [i] "=r"(i)
                  : "[i]"(i), [bit] "i" (10));
  }
  end = time (NULL);
  printf("ASM took %ds\n", (int)(end - start));
}


the result was:

C took 12s
ASM took 10s


My flag was (-std=gnu99 -O2 -march=core2). Without the volatile the loop was optimized out. gcc 4.4.2.

No difference was with:

__asm__ ("bts %[bit], %[i]"
              : [i] "+m"(i)
              : [bit] "r" (10));


So probably the answer was - noone cares. In microbenchmark the only difference is the one between those two methods but in real life I belive such code does not take much CPU. 

Additionally for such code:

#include 
#include 

int main() {
  volatile long long i = 0;
  time_t start = time (NULL);
  for (long long n = 0; n < (1L << 32); n++) {
    i |= 1 << (n % 32);
  }
  time_t end = time (NULL);
  printf("C took %ds\n", (int)(end - start));
  start = time (NULL);
  for (long long n = 0; n < (1L << 32); n++) {
    __asm__ ("bts %[bit], %[i]"
                  : [i] "+m"(i)
                  : [bit] "r" (n % 32));
  }
  end = time (NULL);
  printf("ASM took %ds\n", (int)(end - start));
}


The result was:

C took 9s
ASM took 10s


Both results were 'stable'. Testing CPU 'Intel(R) Core(TM)2 Duo CPU T9600 @ 2.80GHz'.
    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它5个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复