What is different about C++ math.h abs() compared to my abs()

后端 未结 8 673
伪装坚强ぢ
伪装坚强ぢ 2021-02-07 07:44

I am currently writing some glsl like vector math classes in C++, and I just implemented an abs() function like this:

template
static         


        
8条回答
  •  感情败类
    2021-02-07 08:35

    What is your compiler and settings? I'm sure MS and GCC implement "intrinsic functions" for many math and string operations.

    The following line:

    printf("%.3f", abs(1.25));
    

    falls into the following "fabs" code path (in msvcr90d.dll):

    004113DE  sub         esp,8 
    004113E1  fld         qword ptr [__real@3ff4000000000000 (415748h)] 
    004113E7  fstp        qword ptr [esp] 
    004113EA  call        abs (4110FFh) 
    

    abs call the C runtime 'fabs' implementation on MSVCR90D (rather large):

    102F5730  mov         edi,edi 
    102F5732  push        ebp  
    102F5733  mov         ebp,esp 
    102F5735  sub         esp,14h 
    102F5738  fldz             
    102F573A  fstp        qword ptr [result] 
    102F573D  push        0FFFFh 
    102F5742  push        133Fh 
    102F5747  call        _ctrlfp (102F6140h) 
    102F574C  add         esp,8 
    102F574F  mov         dword ptr [savedcw],eax 
    102F5752  movzx       eax,word ptr [ebp+0Eh] 
    102F5756  and         eax,7FF0h 
    102F575B  cmp         eax,7FF0h 
    102F5760  jne         fabs+0D2h (102F5802h) 
    102F5766  sub         esp,8 
    102F5769  fld         qword ptr [x] 
    102F576C  fstp        qword ptr [esp] 
    102F576F  call        _sptype (102F9710h) 
    102F5774  add         esp,8 
    102F5777  mov         dword ptr [ebp-14h],eax 
    102F577A  cmp         dword ptr [ebp-14h],1 
    102F577E  je          fabs+5Eh (102F578Eh) 
    102F5780  cmp         dword ptr [ebp-14h],2 
    102F5784  je          fabs+77h (102F57A7h) 
    102F5786  cmp         dword ptr [ebp-14h],3 
    102F578A  je          fabs+8Fh (102F57BFh) 
    102F578C  jmp         fabs+0A8h (102F57D8h) 
    102F578E  push        0FFFFh 
    102F5793  mov         ecx,dword ptr [savedcw] 
    102F5796  push        ecx  
    102F5797  call        _ctrlfp (102F6140h) 
    102F579C  add         esp,8 
    102F579F  fld         qword ptr [x] 
    102F57A2  jmp         fabs+0F8h (102F5828h) 
    102F57A7  push        0FFFFh 
    102F57AC  mov         edx,dword ptr [savedcw] 
    102F57AF  push        edx  
    102F57B0  call        _ctrlfp (102F6140h) 
    102F57B5  add         esp,8 
    102F57B8  fld         qword ptr [x] 
    102F57BB  fchs             
    102F57BD  jmp         fabs+0F8h (102F5828h) 
    102F57BF  mov         eax,dword ptr [savedcw] 
    102F57C2  push        eax  
    102F57C3  sub         esp,8 
    102F57C6  fld         qword ptr [x] 
    102F57C9  fstp        qword ptr [esp] 
    102F57CC  push        15h  
    102F57CE  call        _handle_qnan1 (102F98C0h) 
    102F57D3  add         esp,10h 
    102F57D6  jmp         fabs+0F8h (102F5828h) 
    102F57D8  mov         ecx,dword ptr [savedcw] 
    102F57DB  push        ecx  
    102F57DC  fld         qword ptr [x] 
    102F57DF  fadd        qword ptr [__real@3ff0000000000000 (1022CF68h)] 
    102F57E5  sub         esp,8 
    102F57E8  fstp        qword ptr [esp] 
    102F57EB  sub         esp,8 
    102F57EE  fld         qword ptr [x] 
    102F57F1  fstp        qword ptr [esp] 
    102F57F4  push        15h  
    102F57F6  push        8    
    102F57F8  call        _except1 (102F99B0h) 
    102F57FD  add         esp,1Ch 
    102F5800  jmp         fabs+0F8h (102F5828h) 
    102F5802  mov         edx,dword ptr [ebp+0Ch] 
    102F5805  and         edx,7FFFFFFFh 
    102F580B  mov         dword ptr [ebp-0Ch],edx 
    102F580E  mov         eax,dword ptr [x] 
    102F5811  mov         dword ptr [result],eax 
    102F5814  push        0FFFFh 
    102F5819  mov         ecx,dword ptr [savedcw] 
    102F581C  push        ecx  
    102F581D  call        _ctrlfp (102F6140h) 
    102F5822  add         esp,8 
    102F5825  fld         qword ptr [result] 
    102F5828  mov         esp,ebp 
    102F582A  pop         ebp  
    102F582B  ret   
    

    In release mode, the FPU FABS instruction is used instead (takes 1 clock cycle only on FPU >= Pentium), the dissasembly output is:

    00401006  fld         qword ptr [__real@3ff4000000000000 (402100h)] 
    0040100C  sub         esp,8 
    0040100F  fabs             
    00401011  fstp        qword ptr [esp] 
    00401014  push        offset string "%.3f" (4020F4h) 
    00401019  call        dword ptr [__imp__printf (4020A0h)] 
    

提交回复
热议问题