What is different about C++ math.h abs() compared to my abs()

后端 未结 8 617
伪装坚强ぢ
伪装坚强ぢ 2021-02-07 07:44

I am currently writing some glsl like vector math classes in C++, and I just implemented an abs() function like this:

template
static         


        
相关标签:
8条回答
  • 2021-02-07 08:35

    What is your compiler and settings? I'm sure MS and GCC implement "intrinsic functions" for many math and string operations.

    The following line:

    printf("%.3f", abs(1.25));
    

    falls into the following "fabs" code path (in msvcr90d.dll):

    004113DE  sub         esp,8 
    004113E1  fld         qword ptr [__real@3ff4000000000000 (415748h)] 
    004113E7  fstp        qword ptr [esp] 
    004113EA  call        abs (4110FFh) 
    

    abs call the C runtime 'fabs' implementation on MSVCR90D (rather large):

    102F5730  mov         edi,edi 
    102F5732  push        ebp  
    102F5733  mov         ebp,esp 
    102F5735  sub         esp,14h 
    102F5738  fldz             
    102F573A  fstp        qword ptr [result] 
    102F573D  push        0FFFFh 
    102F5742  push        133Fh 
    102F5747  call        _ctrlfp (102F6140h) 
    102F574C  add         esp,8 
    102F574F  mov         dword ptr [savedcw],eax 
    102F5752  movzx       eax,word ptr [ebp+0Eh] 
    102F5756  and         eax,7FF0h 
    102F575B  cmp         eax,7FF0h 
    102F5760  jne         fabs+0D2h (102F5802h) 
    102F5766  sub         esp,8 
    102F5769  fld         qword ptr [x] 
    102F576C  fstp        qword ptr [esp] 
    102F576F  call        _sptype (102F9710h) 
    102F5774  add         esp,8 
    102F5777  mov         dword ptr [ebp-14h],eax 
    102F577A  cmp         dword ptr [ebp-14h],1 
    102F577E  je          fabs+5Eh (102F578Eh) 
    102F5780  cmp         dword ptr [ebp-14h],2 
    102F5784  je          fabs+77h (102F57A7h) 
    102F5786  cmp         dword ptr [ebp-14h],3 
    102F578A  je          fabs+8Fh (102F57BFh) 
    102F578C  jmp         fabs+0A8h (102F57D8h) 
    102F578E  push        0FFFFh 
    102F5793  mov         ecx,dword ptr [savedcw] 
    102F5796  push        ecx  
    102F5797  call        _ctrlfp (102F6140h) 
    102F579C  add         esp,8 
    102F579F  fld         qword ptr [x] 
    102F57A2  jmp         fabs+0F8h (102F5828h) 
    102F57A7  push        0FFFFh 
    102F57AC  mov         edx,dword ptr [savedcw] 
    102F57AF  push        edx  
    102F57B0  call        _ctrlfp (102F6140h) 
    102F57B5  add         esp,8 
    102F57B8  fld         qword ptr [x] 
    102F57BB  fchs             
    102F57BD  jmp         fabs+0F8h (102F5828h) 
    102F57BF  mov         eax,dword ptr [savedcw] 
    102F57C2  push        eax  
    102F57C3  sub         esp,8 
    102F57C6  fld         qword ptr [x] 
    102F57C9  fstp        qword ptr [esp] 
    102F57CC  push        15h  
    102F57CE  call        _handle_qnan1 (102F98C0h) 
    102F57D3  add         esp,10h 
    102F57D6  jmp         fabs+0F8h (102F5828h) 
    102F57D8  mov         ecx,dword ptr [savedcw] 
    102F57DB  push        ecx  
    102F57DC  fld         qword ptr [x] 
    102F57DF  fadd        qword ptr [__real@3ff0000000000000 (1022CF68h)] 
    102F57E5  sub         esp,8 
    102F57E8  fstp        qword ptr [esp] 
    102F57EB  sub         esp,8 
    102F57EE  fld         qword ptr [x] 
    102F57F1  fstp        qword ptr [esp] 
    102F57F4  push        15h  
    102F57F6  push        8    
    102F57F8  call        _except1 (102F99B0h) 
    102F57FD  add         esp,1Ch 
    102F5800  jmp         fabs+0F8h (102F5828h) 
    102F5802  mov         edx,dword ptr [ebp+0Ch] 
    102F5805  and         edx,7FFFFFFFh 
    102F580B  mov         dword ptr [ebp-0Ch],edx 
    102F580E  mov         eax,dword ptr [x] 
    102F5811  mov         dword ptr [result],eax 
    102F5814  push        0FFFFh 
    102F5819  mov         ecx,dword ptr [savedcw] 
    102F581C  push        ecx  
    102F581D  call        _ctrlfp (102F6140h) 
    102F5822  add         esp,8 
    102F5825  fld         qword ptr [result] 
    102F5828  mov         esp,ebp 
    102F582A  pop         ebp  
    102F582B  ret   
    

    In release mode, the FPU FABS instruction is used instead (takes 1 clock cycle only on FPU >= Pentium), the dissasembly output is:

    00401006  fld         qword ptr [__real@3ff4000000000000 (402100h)] 
    0040100C  sub         esp,8 
    0040100F  fabs             
    00401011  fstp        qword ptr [esp] 
    00401014  push        offset string "%.3f" (4020F4h) 
    00401019  call        dword ptr [__imp__printf (4020A0h)] 
    
    0 讨论(0)
  • 2021-02-07 08:36

    Probably the library version of abs is an intrinsic function, whose behavior is exactly known by the compiler, which can even compute the value at compile time (since in your case it's known) and optimize the call away. You should try your benchmark with a value known only at runtime (provided by the user or got with rand() before the two cycles).

    If there's still a difference, it may be because the library abs is written directly in hand-forged assembly with magic tricks, so it could be a little faster than the generated one.

    0 讨论(0)
提交回复
热议问题