I'm wondering if there really is no 128-bit division intrinsic function in Visual C++?
There is a 64x64=128 bit multiplication intrinsic function called _umul128(), which nicely matches the MUL x64 assembler instruction.
Naturally, I assumed there would be a 128/64=64 bit division intrinsic as well (modelling the DIV instruction), but to my amazement neither Visual C++ nor Intel C++ seem to have it, at least it's not listed in intrin.h.
Can someone confirm that? I tried grep'ing for the function names in the compiler execuable files, but couldn't find _umul128 in the first place, so I guess I looked in the wrong spot.
Update: at least I have now found the pattern "umul128" (without the leading underscore) in c1.dll of Visual C++ 2010. All the other intrinsics are listed around it, but unfortunately no "udiv128" or the like :( So it seems they really have "forgotten" to implement it.
To clarify: I'm not only looking for a 128 bit data type, but a way to divide a 128 bit scalar int by a 64-bit int in C++. Either an intrinsic function or native 128-bit integer support would solve my problem.
Edit: The answer is no, there is no _udiv128 intrinsic in Visual Studio 2010 or 2012.
If you don't mind little hacks, this may help (64-bit mode only, not tested):
#include <windows.h> #include <stdio.h> unsigned char udiv128Data[] = { 0x48, 0x89, 0xD0, // mov rax,rdx 0x48, 0x89, 0xCA, // mov rdx,rcx 0x49, 0xF7, 0xF0, // div r8 0x49, 0x89, 0x11, // mov [r9],rdx 0xC3 // ret }; unsigned char sdiv128Data[] = { 0x48, 0x89, 0xD0, // mov rax,rdx 0x48, 0x89, 0xCA, // mov rdx,rcx 0x49, 0xF7, 0xF8, // idiv r8 0x49, 0x89, 0x11, // mov [r9],rdx 0xC3 // ret }; unsigned __int64 (__fastcall *udiv128)(unsigned __int64 numhi, unsigned __int64 numlo, unsigned __int64 den, unsigned __int64* rem) = (unsigned __int64 (__fastcall *)(unsigned __int64, unsigned __int64, unsigned __int64, unsigned __int64*))udiv128Data; __int64 (__fastcall *sdiv128)(__int64 numhi, __int64 numlo, __int64 den, __int64* rem) = (__int64 (__fastcall *)(__int64, __int64, __int64, __int64*))sdiv128Data; int main(void) { DWORD dummy; unsigned __int64 ur; __int64 sr; VirtualProtect(udiv128Data, sizeof(udiv128Data), PAGE_EXECUTE_READWRITE, &dummy); VirtualProtect(sdiv128Data, sizeof(sdiv128Data), PAGE_EXECUTE_READWRITE, &dummy); printf("0x00000123456789ABCDEF000000000000 / 0x0001000000000000 = 0x%llX\n", udiv128(0x00000123456789AB, 0xCDEF000000000000, 0x0001000000000000, &ur)); printf("-6 / -2 = %lld\n", sdiv128(-1, -6, -2, &sr)); return 0; }
A small improvement - one less instruction
extern "C" digit64 udiv128(digit64 low, digit64 hi, digit64 divisor, digit64 *remainder); ; Arguments ; RCX Low Digit ; RDX High Digit ; R8 Divisor ; R9 *Remainder ; RAX Quotient upon return .code udiv128 proc mov rax, rcx ; Put the low digit in place (hi is already there) div r8 ; 128 bit divide rdx-rax/r8 = rdx remainder, rax quotient mov [r9], rdx ; Save the reminder ret ; Return the quotient udiv128 endp end