How to generate a sse4.2 popcnt machine instruction

后端 未结 3 393
借酒劲吻你
借酒劲吻你 2021-02-02 16:40

Using the c program:

int main(int argc , char** argv)
{

  return  __builtin_popcountll(0xf0f0f0f0f0f0f0f0);

}

and the compiler line (gcc 4.4

3条回答
  •  难免孤独
    2021-02-02 17:24

    For __builtin_popcountll in GCC, all you need to do is add -mpopcnt

    #include 
    int main(int argc, char **argv) {
        return __builtin_popcountll(atoi(argv[1]));
    }
    

    with -mpopcnt

    $ otool -tvV a.out
    a.out:
    (__TEXT,__text) section
    _main:
    0000000100000f66    pushq   %rbp
    0000000100000f67    movq    %rsp, %rbp
    0000000100000f6a    subq    $0x10, %rsp
    0000000100000f6e    movq    %rdi, -0x8(%rbp)
    0000000100000f72    movq    -0x8(%rbp), %rax
    0000000100000f76    addq    $0x8, %rax
    0000000100000f7a    movq    (%rax), %rax
    0000000100000f7d    movq    %rax, %rdi
    0000000100000f80    callq   0x100000f8e ## symbol stub for: _atoi
    0000000100000f85    cltq
    0000000100000f87    popcntq %rax, %rax
    0000000100000f8c    leave
    0000000100000f8d    retq
    

    without -mpopcnt

    a.out:
    (__TEXT,__text) section
    _main:
    0000000100000f55    pushq   %rbp
    0000000100000f56    movq    %rsp, %rbp
    0000000100000f59    subq    $0x10, %rsp
    0000000100000f5d    movq    %rdi, -0x8(%rbp)
    0000000100000f61    movq    -0x8(%rbp), %rax
    0000000100000f65    addq    $0x8, %rax
    0000000100000f69    movq    (%rax), %rax
    0000000100000f6c    movq    %rax, %rdi
    0000000100000f6f    callq   0x100000f86 ## symbol stub for: _atoi
    0000000100000f74    cltq
    0000000100000f76    movq    %rax, %rdi
    0000000100000f79    callq   0x100000f80 ## symbol stub for: ___popcountdi2
    0000000100000f7e    leave
    0000000100000f7f    retq
    

    Notes

    Be sure to check the ABM bit (bit 23) of CPUID feature bits before using POPCNTQ

提交回复
热议问题