multi-word addition using the carry flag

后端 未结 1 1893
悲&欢浪女
悲&欢浪女 2020-11-30 09:13

GCC has 128-bit integers. Using these I can get the compiler to use the mul (or imul with only one operand) instructions. For example



        
相关标签:
1条回答
  • 2020-11-30 09:26

    I found a solution with ICC 13.0.01 using the _addcarry_u64 intrinsic

    void add256(uint256 *x, uint256 *y) {
        unsigned char c = 0;
        c = _addcarry_u64(c, x->x1, y->x1, &x->x1);
        c = _addcarry_u64(c, x->x2, y->x2, &x->x2);
        c = _addcarry_u64(c, x->x3, y->x3, &x->x3);
            _addcarry_u64(c, x->x4, y->x4, &x->x4);
    }
    

    produces

    L__routine_start_add256_0:
    add256:
            xorl      %r9d, %r9d                                    #25.9
            movq      (%rsi), %rax                                  #22.9
            addq      %rax, (%rdi)                                  #22.9
            movq      8(%rsi), %rdx                                 #23.9
            adcq      %rdx, 8(%rdi)                                 #23.9
            movq      16(%rsi), %rcx                                #24.9
            adcq      %rcx, 16(%rdi)                                #24.9
            movq      24(%rsi), %r8                                 #25.9
            adcq      %r8, 24(%rdi)                                 #25.9
            setb      %r9b                                          #25.9
            ret                                                     #26.1
    

    I compiled with -O3. I don't know how to enable adx with ICC. Maybe I need ICC 14?

    That's exactly 1 addq and three adcq like I expect.

    With Clang the result using -O3 -madx is a mess

    add256(uint256*, uint256*):                  # @add256(uint256*, uint256*)
    movq    (%rsi), %rax
    xorl    %ecx, %ecx
    xorl    %edx, %edx
    addb    $-1, %dl
    adcq    %rax, (%rdi)
    addb    $-1, %cl
    movq    (%rdi), %rcx
    adcxq   %rax, %rcx
    setb    %al
    movq    8(%rsi), %rcx
    movb    %al, %dl
    addb    $-1, %dl
    adcq    %rcx, 8(%rdi)
    addb    $-1, %al
    movq    8(%rdi), %rax
    adcxq   %rcx, %rax
    setb    %al
    movq    16(%rsi), %rcx
    movb    %al, %dl
    addb    $-1, %dl
    adcq    %rcx, 16(%rdi)
    addb    $-1, %al
    movq    16(%rdi), %rax
    adcxq   %rcx, %rax
    setb    %al
    movq    24(%rsi), %rcx
    addb    $-1, %al
    adcq    %rcx, 24(%rdi)
    retq
    

    Without enabling -madx in Clang the result is not much better.

    Edit: Apperently MSVC already has _addcarry_u64. I tried it and it's as good as ICC (1x add and 3x adc).

    0 讨论(0)
提交回复
热议问题