I am thinking on how to implement the conversion of an integer (4byte, unsigned) to string with SSE instructions. The usual routine is to divide the number and store it in a loc
That's part of my code in asm. It works only for range 255-0 It can be faster however here you can find direction and main idea.
4 imuls 1 memory read 1 memory write
You can try to reduce 2 imule's and use lea's with shifting. However you can't find anything faster in C/C++/Python ;)
void itoa_asm(unsigned char inVal, char *str)
{
__asm
{
// eax=100's -> (some_integer/100) = (some_integer*41) >> 12
movzx esi,inVal
mov eax,esi
mov ecx,41
imul eax,ecx
shr eax,12
mov edx,eax
imul edx,100
mov edi,edx
// ebx=10's -> (some_integer/10) = (some_integer*205) >> 11
mov ebx,esi
sub ebx,edx
mov ecx,205
imul ebx,ecx
shr ebx,11
mov edx,ebx
imul edx,10
// ecx = 1
mov ecx,esi
sub ecx,edx // -> sub 10's
sub ecx,edi // -> sub 100's
add al,'0'
add bl,'0'
add cl,'0'
//shl eax,
shl ebx,8
shl ecx,16
or eax,ebx
or eax,ecx
mov edi,str
mov [edi],eax
}
}