I need to convert both 32-bit and 64-bit unsigned integers into floating-point values in xmm registers. There are x86 instructions to convert signed integer
Here's what GCC generates. I wrapped them in functions, but you can easily remove the stack handling. Not all of them use SSE to do the actual work (the ulonglong conversions don't), if you find the corresponding instructions, please tell me. Clang generates almost the same.
% cat tofloats.c
double ulonglong2double(unsigned long long a) {
return a;
}
float ulonglong2float(unsigned long long a) {
return a;
}
double uint2double(unsigned int a) {
return a;
}
float uint2float(unsigned int a) {
return a;
}
% gcc -msse4.2 -g -Os -c tofloats.c && objdump -d tofloats.o
00000000 <ulonglong2double>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 ec 10 sub $0x10,%esp
6: 8b 55 0c mov 0xc(%ebp),%edx
9: 8b 45 08 mov 0x8(%ebp),%eax
c: 89 55 f4 mov %edx,-0xc(%ebp)
f: 85 d2 test %edx,%edx
11: 89 45 f0 mov %eax,-0x10(%ebp)
14: df 6d f0 fildll -0x10(%ebp)
17: 79 06 jns 1f <ulonglong2double+0x1f>
19: d8 05 00 00 00 00 fadds 0x0
1f: dd 5d f8 fstpl -0x8(%ebp)
22: dd 45 f8 fldl -0x8(%ebp)
25: c9 leave
26: c3 ret
00000027 <ulonglong2float>:
27: 55 push %ebp
28: 89 e5 mov %esp,%ebp
2a: 83 ec 10 sub $0x10,%esp
2d: 8b 55 0c mov 0xc(%ebp),%edx
30: 8b 45 08 mov 0x8(%ebp),%eax
33: 89 55 f4 mov %edx,-0xc(%ebp)
36: 85 d2 test %edx,%edx
38: 89 45 f0 mov %eax,-0x10(%ebp)
3b: df 6d f0 fildll -0x10(%ebp)
3e: 79 06 jns 46 <ulonglong2float+0x1f>
40: d8 05 00 00 00 00 fadds 0x0
46: d9 5d fc fstps -0x4(%ebp)
49: d9 45 fc flds -0x4(%ebp)
4c: c9 leave
4d: c3 ret
0000004e <uint2double>:
4e: 55 push %ebp
4f: 89 e5 mov %esp,%ebp
51: 83 ec 08 sub $0x8,%esp
54: 66 0f 6e 45 08 movd 0x8(%ebp),%xmm0
59: 66 0f d6 45 f8 movq %xmm0,-0x8(%ebp)
5e: df 6d f8 fildll -0x8(%ebp)
61: c9 leave
62: c3 ret
00000063 <uint2float>:
63: 55 push %ebp
64: 89 e5 mov %esp,%ebp
66: 83 ec 08 sub $0x8,%esp
69: 66 0f 6e 45 08 movd 0x8(%ebp),%xmm0
6e: 66 0f d6 45 f8 movq %xmm0,-0x8(%ebp)
73: df 6d f8 fildll -0x8(%ebp)
76: c9 leave
77: c3 ret
Here are the bonus points (conversion into ints):
% cat toints.c
unsigned long long float2ulonglong(float a) {
return a;
}
unsigned long long double2ulonglong(double a) {
return a;
}
unsigned int float2uint(float a) {
return a;
}
unsigned int double2uint(double a) {
return a;
}
% gcc -msse4.2 -g -Os -c toints.c && objdump -d toints.o
00000000 <float2ulonglong>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 53 push %ebx
4: 83 ec 0c sub $0xc,%esp
7: d9 45 08 flds 0x8(%ebp)
a: d9 05 00 00 00 00 flds 0x0
10: d9 c9 fxch %st(1)
12: db e9 fucomi %st(1),%st
14: 73 0d jae 23 <float2ulonglong+0x23>
16: dd d9 fstp %st(1)
18: dd 4d f0 fisttpll -0x10(%ebp)
1b: 8b 45 f0 mov -0x10(%ebp),%eax
1e: 8b 55 f4 mov -0xc(%ebp),%edx
21: eb 13 jmp 36 <float2ulonglong+0x36>
23: de e1 fsubp %st,%st(1)
25: dd 4d f0 fisttpll -0x10(%ebp)
28: 8b 55 f4 mov -0xc(%ebp),%edx
2b: 8b 45 f0 mov -0x10(%ebp),%eax
2e: 8d 8a 00 00 00 80 lea -0x80000000(%edx),%ecx
34: 89 ca mov %ecx,%edx
36: 83 c4 0c add $0xc,%esp
39: 5b pop %ebx
3a: 5d pop %ebp
3b: c3 ret
0000003c <double2ulonglong>:
3c: 55 push %ebp
3d: 89 e5 mov %esp,%ebp
3f: 53 push %ebx
40: 83 ec 0c sub $0xc,%esp
43: dd 45 08 fldl 0x8(%ebp)
46: d9 05 00 00 00 00 flds 0x0
4c: d9 c9 fxch %st(1)
4e: db e9 fucomi %st(1),%st
50: 73 0d jae 5f <double2ulonglong+0x23>
52: dd d9 fstp %st(1)
54: dd 4d f0 fisttpll -0x10(%ebp)
57: 8b 45 f0 mov -0x10(%ebp),%eax
5a: 8b 55 f4 mov -0xc(%ebp),%edx
5d: eb 13 jmp 72 <double2ulonglong+0x36>
5f: de e1 fsubp %st,%st(1)
61: dd 4d f0 fisttpll -0x10(%ebp)
64: 8b 55 f4 mov -0xc(%ebp),%edx
67: 8b 45 f0 mov -0x10(%ebp),%eax
6a: 8d 8a 00 00 00 80 lea -0x80000000(%edx),%ecx
70: 89 ca mov %ecx,%edx
72: 83 c4 0c add $0xc,%esp
75: 5b pop %ebx
76: 5d pop %ebp
77: c3 ret
00000078 <float2uint>:
78: 55 push %ebp
79: 89 e5 mov %esp,%ebp
7b: 83 ec 08 sub $0x8,%esp
7e: d9 45 08 flds 0x8(%ebp)
81: dd 4d f8 fisttpll -0x8(%ebp)
84: 8b 45 f8 mov -0x8(%ebp),%eax
87: c9 leave
88: c3 ret
00000089 <double2uint>:
89: 55 push %ebp
8a: 89 e5 mov %esp,%ebp
8c: 83 ec 08 sub $0x8,%esp
8f: dd 45 08 fldl 0x8(%ebp)
92: dd 4d f8 fisttpll -0x8(%ebp)
95: 8b 45 f8 mov -0x8(%ebp),%eax
98: c9 leave
99: c3 ret
There functions take input from the stack and return it over the stack. If you need the result in an XMM register by the end of the function, you can use movd/movq to take them from the stack to the XMM. If the function is returning a double, your result is on -0x8(%ebp). If it's a float, result is in -0x4(%ebp). Ulonglongs have the lengths of doubles and ints have the lengths of floats.
FISTTP converts the value in ST into a signed integer using truncation (chop) as rounding mode, transfers the result to the destination, and pop ST. FISTTP accepts word, short integer, and long integer destinations.
Performs an unordered comparison of the contents of registers ST(0) and ST(i) and sets the status flags ZF, PF, and CF in the EFLAGS register according to the results (see the table below). The sign of zero is ignored for comparisons, so that –0.0 is equal to +0.0.
Shamelessly using Janus answer as a template (after all I really like C++):
Generate with gcc -march=native -O3
on a i7, so this is with up to and including -mavx
.
uint2float
and vice versa are as expected, the long conversions just have a special case for numbers greater than 263-1.
0000000000000000 <ulong2double>:
0: 48 85 ff test %rdi,%rdi
3: 78 0b js 10 <ulong2double+0x10>
5: c4 e1 fb 2a c7 vcvtsi2sd %rdi,%xmm0,%xmm0
a: c3 retq
b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
10: 48 89 f8 mov %rdi,%rax
13: 83 e7 01 and $0x1,%edi
16: 48 d1 e8 shr %rax
19: 48 09 f8 or %rdi,%rax
1c: c4 e1 fb 2a c0 vcvtsi2sd %rax,%xmm0,%xmm0
21: c5 fb 58 c0 vaddsd %xmm0,%xmm0,%xmm0
25: c3 retq
0000000000000030 <ulong2float>:
30: 48 85 ff test %rdi,%rdi
33: 78 0b js 40 <ulong2float+0x10>
35: c4 e1 fa 2a c7 vcvtsi2ss %rdi,%xmm0,%xmm0
3a: c3 retq
3b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
40: 48 89 f8 mov %rdi,%rax
43: 83 e7 01 and $0x1,%edi
46: 48 d1 e8 shr %rax
49: 48 09 f8 or %rdi,%rax
4c: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0
51: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0
55: c3 retq
0000000000000060 <uint2double>:
60: 89 ff mov %edi,%edi
62: c4 e1 fb 2a c7 vcvtsi2sd %rdi,%xmm0,%xmm0
67: c3 retq
0000000000000070 <uint2float>:
70: 89 ff mov %edi,%edi
72: c4 e1 fa 2a c7 vcvtsi2ss %rdi,%xmm0,%xmm0
77: c3 retq