How to convert an unsigned integer to floating-point in x86 (32-bit) assembly?

后端 未结 2 1099
傲寒
傲寒 2021-02-10 03:55

I need to convert both 32-bit and 64-bit unsigned integers into floating-point values in xmm registers. There are x86 instructions to convert signed integer

相关标签:
2条回答
  • 2021-02-10 04:12

    Here's what GCC generates. I wrapped them in functions, but you can easily remove the stack handling. Not all of them use SSE to do the actual work (the ulonglong conversions don't), if you find the corresponding instructions, please tell me. Clang generates almost the same.

    % cat tofloats.c 
    double ulonglong2double(unsigned long long a) {
        return a;
    }
    float ulonglong2float(unsigned long long a) {
        return a;
    }
    double uint2double(unsigned int a) {
        return a;
    }
    float uint2float(unsigned int a) {
        return a;
    }
    % gcc -msse4.2 -g -Os -c tofloats.c && objdump -d tofloats.o
    00000000 <ulonglong2double>:
       0:   55                      push   %ebp
       1:   89 e5                   mov    %esp,%ebp
       3:   83 ec 10                sub    $0x10,%esp
       6:   8b 55 0c                mov    0xc(%ebp),%edx
       9:   8b 45 08                mov    0x8(%ebp),%eax
       c:   89 55 f4                mov    %edx,-0xc(%ebp)
       f:   85 d2                   test   %edx,%edx
      11:   89 45 f0                mov    %eax,-0x10(%ebp)
      14:   df 6d f0                fildll -0x10(%ebp)
      17:   79 06                   jns    1f <ulonglong2double+0x1f>
      19:   d8 05 00 00 00 00       fadds  0x0
      1f:   dd 5d f8                fstpl  -0x8(%ebp)
      22:   dd 45 f8                fldl   -0x8(%ebp)
      25:   c9                      leave  
      26:   c3                      ret    
    
    00000027 <ulonglong2float>:
      27:   55                      push   %ebp
      28:   89 e5                   mov    %esp,%ebp
      2a:   83 ec 10                sub    $0x10,%esp
      2d:   8b 55 0c                mov    0xc(%ebp),%edx
      30:   8b 45 08                mov    0x8(%ebp),%eax
      33:   89 55 f4                mov    %edx,-0xc(%ebp)
      36:   85 d2                   test   %edx,%edx
      38:   89 45 f0                mov    %eax,-0x10(%ebp)
      3b:   df 6d f0                fildll -0x10(%ebp)
      3e:   79 06                   jns    46 <ulonglong2float+0x1f>
      40:   d8 05 00 00 00 00       fadds  0x0
      46:   d9 5d fc                fstps  -0x4(%ebp)
      49:   d9 45 fc                flds   -0x4(%ebp)
      4c:   c9                      leave  
      4d:   c3                      ret    
    
    0000004e <uint2double>:
      4e:   55                      push   %ebp
      4f:   89 e5                   mov    %esp,%ebp
      51:   83 ec 08                sub    $0x8,%esp
      54:   66 0f 6e 45 08          movd   0x8(%ebp),%xmm0
      59:   66 0f d6 45 f8          movq   %xmm0,-0x8(%ebp)
      5e:   df 6d f8                fildll -0x8(%ebp)
      61:   c9                      leave  
      62:   c3                      ret    
    
    00000063 <uint2float>:
      63:   55                      push   %ebp
      64:   89 e5                   mov    %esp,%ebp
      66:   83 ec 08                sub    $0x8,%esp
      69:   66 0f 6e 45 08          movd   0x8(%ebp),%xmm0
      6e:   66 0f d6 45 f8          movq   %xmm0,-0x8(%ebp)
      73:   df 6d f8                fildll -0x8(%ebp)
      76:   c9                      leave  
      77:   c3                      ret
    

    Here are the bonus points (conversion into ints):

    % cat toints.c                                      
    unsigned long long float2ulonglong(float a) {
        return a;
    }
    unsigned long long double2ulonglong(double a) {
        return a;
    }
    unsigned int float2uint(float a) {
        return a;
    }
    unsigned int double2uint(double a) {
        return a;
    }
    % gcc -msse4.2 -g -Os -c toints.c && objdump -d toints.o  
    00000000 <float2ulonglong>:
       0:   55                      push   %ebp
       1:   89 e5                   mov    %esp,%ebp
       3:   53                      push   %ebx
       4:   83 ec 0c                sub    $0xc,%esp
       7:   d9 45 08                flds   0x8(%ebp)
       a:   d9 05 00 00 00 00       flds   0x0
      10:   d9 c9                   fxch   %st(1)
      12:   db e9                   fucomi %st(1),%st
      14:   73 0d                   jae    23 <float2ulonglong+0x23>
      16:   dd d9                   fstp   %st(1)
      18:   dd 4d f0                fisttpll -0x10(%ebp)
      1b:   8b 45 f0                mov    -0x10(%ebp),%eax
      1e:   8b 55 f4                mov    -0xc(%ebp),%edx
      21:   eb 13                   jmp    36 <float2ulonglong+0x36>
      23:   de e1                   fsubp  %st,%st(1)
      25:   dd 4d f0                fisttpll -0x10(%ebp)
      28:   8b 55 f4                mov    -0xc(%ebp),%edx
      2b:   8b 45 f0                mov    -0x10(%ebp),%eax
      2e:   8d 8a 00 00 00 80       lea    -0x80000000(%edx),%ecx
      34:   89 ca                   mov    %ecx,%edx
      36:   83 c4 0c                add    $0xc,%esp
      39:   5b                      pop    %ebx
      3a:   5d                      pop    %ebp
      3b:   c3                      ret    
    
    0000003c <double2ulonglong>:
      3c:   55                      push   %ebp
      3d:   89 e5                   mov    %esp,%ebp
      3f:   53                      push   %ebx
      40:   83 ec 0c                sub    $0xc,%esp
      43:   dd 45 08                fldl   0x8(%ebp)
      46:   d9 05 00 00 00 00       flds   0x0
      4c:   d9 c9                   fxch   %st(1)
      4e:   db e9                   fucomi %st(1),%st
      50:   73 0d                   jae    5f <double2ulonglong+0x23>
      52:   dd d9                   fstp   %st(1)
      54:   dd 4d f0                fisttpll -0x10(%ebp)
      57:   8b 45 f0                mov    -0x10(%ebp),%eax
      5a:   8b 55 f4                mov    -0xc(%ebp),%edx
      5d:   eb 13                   jmp    72 <double2ulonglong+0x36>
      5f:   de e1                   fsubp  %st,%st(1)
      61:   dd 4d f0                fisttpll -0x10(%ebp)
      64:   8b 55 f4                mov    -0xc(%ebp),%edx
      67:   8b 45 f0                mov    -0x10(%ebp),%eax
      6a:   8d 8a 00 00 00 80       lea    -0x80000000(%edx),%ecx
      70:   89 ca                   mov    %ecx,%edx
      72:   83 c4 0c                add    $0xc,%esp
      75:   5b                      pop    %ebx
      76:   5d                      pop    %ebp
      77:   c3                      ret    
    
    00000078 <float2uint>:
      78:   55                      push   %ebp
      79:   89 e5                   mov    %esp,%ebp
      7b:   83 ec 08                sub    $0x8,%esp
      7e:   d9 45 08                flds   0x8(%ebp)
      81:   dd 4d f8                fisttpll -0x8(%ebp)
      84:   8b 45 f8                mov    -0x8(%ebp),%eax
      87:   c9                      leave  
      88:   c3                      ret    
    
    00000089 <double2uint>:
      89:   55                      push   %ebp
      8a:   89 e5                   mov    %esp,%ebp
      8c:   83 ec 08                sub    $0x8,%esp
      8f:   dd 45 08                fldl   0x8(%ebp)
      92:   dd 4d f8                fisttpll -0x8(%ebp)
      95:   8b 45 f8                mov    -0x8(%ebp),%eax
      98:   c9                      leave  
      99:   c3                      ret    
    

    There functions take input from the stack and return it over the stack. If you need the result in an XMM register by the end of the function, you can use movd/movq to take them from the stack to the XMM. If the function is returning a double, your result is on -0x8(%ebp). If it's a float, result is in -0x4(%ebp). Ulonglongs have the lengths of doubles and ints have the lengths of floats.

    fisttpll: Store Integer with Truncation

    FISTTP converts the value in ST into a signed integer using truncation (chop) as rounding mode, transfers the result to the destination, and pop ST. FISTTP accepts word, short integer, and long integer destinations.

    fucomi: Compare Floating Point Values and Set EFLAGS

    Performs an unordered comparison of the contents of registers ST(0) and ST(i) and sets the status flags ZF, PF, and CF in the EFLAGS register according to the results (see the table below). The sign of zero is ignored for comparisons, so that –0.0 is equal to +0.0.

    0 讨论(0)
  • 2021-02-10 04:15

    Shamelessly using Janus answer as a template (after all I really like C++):

    Generate with gcc -march=native -O3 on a i7, so this is with up to and including -mavx. uint2float and vice versa are as expected, the long conversions just have a special case for numbers greater than 263-1.

    0000000000000000 <ulong2double>:
       0:   48 85 ff                test   %rdi,%rdi
       3:   78 0b                   js     10 <ulong2double+0x10>
       5:   c4 e1 fb 2a c7          vcvtsi2sd %rdi,%xmm0,%xmm0
       a:   c3                      retq   
       b:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
      10:   48 89 f8                mov    %rdi,%rax
      13:   83 e7 01                and    $0x1,%edi
      16:   48 d1 e8                shr    %rax
      19:   48 09 f8                or     %rdi,%rax
      1c:   c4 e1 fb 2a c0          vcvtsi2sd %rax,%xmm0,%xmm0
      21:   c5 fb 58 c0             vaddsd %xmm0,%xmm0,%xmm0
      25:   c3                      retq   
    
    0000000000000030 <ulong2float>:
      30:   48 85 ff                test   %rdi,%rdi
      33:   78 0b                   js     40 <ulong2float+0x10>
      35:   c4 e1 fa 2a c7          vcvtsi2ss %rdi,%xmm0,%xmm0
      3a:   c3                      retq   
      3b:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
      40:   48 89 f8                mov    %rdi,%rax
      43:   83 e7 01                and    $0x1,%edi
      46:   48 d1 e8                shr    %rax
      49:   48 09 f8                or     %rdi,%rax
      4c:   c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
      51:   c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
      55:   c3                      retq   
    
    0000000000000060 <uint2double>:
      60:   89 ff                   mov    %edi,%edi
      62:   c4 e1 fb 2a c7          vcvtsi2sd %rdi,%xmm0,%xmm0
      67:   c3                      retq   
    
    0000000000000070 <uint2float>:
      70:   89 ff                   mov    %edi,%edi
      72:   c4 e1 fa 2a c7          vcvtsi2ss %rdi,%xmm0,%xmm0
      77:   c3                      retq 
    
    0 讨论(0)
提交回复
热议问题