optimized itoa function

后端 未结 8 1506
不知归路
不知归路 2021-02-04 06:48

I am thinking on how to implement the conversion of an integer (4byte, unsigned) to string with SSE instructions. The usual routine is to divide the number and store it in a loc

8条回答
  •  小鲜肉
    小鲜肉 (楼主)
    2021-02-04 07:12

    That's part of my code in asm. It works only for range 255-0 It can be faster however here you can find direction and main idea.

    4 imuls 1 memory read 1 memory write

    You can try to reduce 2 imule's and use lea's with shifting. However you can't find anything faster in C/C++/Python ;)

    void itoa_asm(unsigned char inVal, char *str)
    {
        __asm
        {
            // eax=100's      -> (some_integer/100) = (some_integer*41) >> 12
            movzx esi,inVal
            mov eax,esi
            mov ecx,41
            imul eax,ecx
            shr eax,12
    
            mov edx,eax
            imul edx,100
            mov edi,edx
    
            // ebx=10's       -> (some_integer/10) = (some_integer*205) >> 11
            mov ebx,esi
            sub ebx,edx
            mov ecx,205
            imul ebx,ecx
            shr ebx,11
    
            mov edx,ebx
            imul edx,10
    
            // ecx = 1
            mov ecx,esi
            sub ecx,edx    // -> sub 10's
            sub ecx,edi    // -> sub 100's
    
            add al,'0'
            add bl,'0'
            add cl,'0'
            //shl eax,
            shl ebx,8
            shl ecx,16
            or eax,ebx
            or eax,ecx
    
            mov edi,str
            mov [edi],eax
    
        }
    
    }
    

提交回复
热议问题