Can I use GCC's __builtin_expect() with ternary operator in C

后端 未结 1 823
攒了一身酷
攒了一身酷 2021-02-20 06:59

The GCC manual only shows examples where __builtin_expect() is placed around the entire condition of an \'if\' statement.

I also noticed that GCC does not complain if I

1条回答
  •  轻奢々
    轻奢々 (楼主)
    2021-02-20 07:20

    It apparently works for both ternary and regular if statements.

    First, let's take a look at the following three code samples, two of which use __builtin_expect in both regular-if and ternary-if styles, and a third which does not use it at all.

    builtin.c:

    int main()
    {
        char c = getchar();
        const char *printVal;
        if (__builtin_expect(c == 'c', 1))
        {
            printVal = "Took expected branch!\n";
        }
        else
        {
            printVal = "Boo!\n";
        }
    
        printf(printVal);
    }
    

    ternary.c:

    int main()
    {
        char c = getchar();
        const char *printVal = __builtin_expect(c == 'c', 1) 
            ? "Took expected branch!\n"
            : "Boo!\n";
    
        printf(printVal);
    }
    

    nobuiltin.c:

    int main()
    {
        char c = getchar();
        const char *printVal;
        if (c == 'c')
        {
            printVal = "Took expected branch!\n";
        }
        else
        {
            printVal = "Boo!\n";
        }
    
        printf(printVal);
    }
    

    When compiled with -O3, all three result in the same assembly. However, when the -O is left out (on GCC 4.7.2), both ternary.c and builtin.c have the same assembly listing (where it matters):

    builtin.s:

        .file   "builtin.c"
        .section    .rodata
    .LC0:
        .string "Took expected branch!\n"
    .LC1:
        .string "Boo!\n"
        .text
        .globl  main
        .type   main, @function
    main:
    .LFB0:
        .cfi_startproc
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset 5, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register 5
        andl    $-16, %esp
        subl    $32, %esp
        call    getchar
        movb    %al, 27(%esp)
        cmpb    $99, 27(%esp)
        sete    %al
        movzbl  %al, %eax
        testl   %eax, %eax
        je  .L2
        movl    $.LC0, 28(%esp)
        jmp .L3
    .L2:
        movl    $.LC1, 28(%esp)
    .L3:
        movl    28(%esp), %eax
        movl    %eax, (%esp)
        call    printf
        leave
        .cfi_restore 5
        .cfi_def_cfa 4, 4
        ret
        .cfi_endproc
    .LFE0:
        .size   main, .-main
        .ident  "GCC: (Debian 4.7.2-4) 4.7.2"
        .section    .note.GNU-stack,"",@progbits
    

    ternary.s:

        .file   "ternary.c"
        .section    .rodata
    .LC0:
        .string "Took expected branch!\n"
    .LC1:
        .string "Boo!\n"
        .text
        .globl  main
        .type   main, @function
    main:
    .LFB0:
        .cfi_startproc
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset 5, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register 5
        andl    $-16, %esp
        subl    $32, %esp
        call    getchar
        movb    %al, 31(%esp)
        cmpb    $99, 31(%esp)
        sete    %al
        movzbl  %al, %eax
        testl   %eax, %eax
        je  .L2
        movl    $.LC0, %eax
        jmp .L3
    .L2:
        movl    $.LC1, %eax
    .L3:
        movl    %eax, 24(%esp)
        movl    24(%esp), %eax
        movl    %eax, (%esp)
        call    printf
        leave
        .cfi_restore 5
        .cfi_def_cfa 4, 4
        ret
        .cfi_endproc
    .LFE0:
        .size   main, .-main
        .ident  "GCC: (Debian 4.7.2-4) 4.7.2"
        .section    .note.GNU-stack,"",@progbits
    

    Whereas nobuiltin.c does not:

        .file   "nobuiltin.c"
        .section    .rodata
    .LC0:
        .string "Took expected branch!\n"
    .LC1:
        .string "Boo!\n"
        .text
        .globl  main
        .type   main, @function
    main:
    .LFB0:
        .cfi_startproc
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset 5, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register 5
        andl    $-16, %esp
        subl    $32, %esp
        call    getchar
        movb    %al, 27(%esp)
        cmpb    $99, 27(%esp)
        jne .L2
        movl    $.LC0, 28(%esp)
        jmp .L3
    .L2:
        movl    $.LC1, 28(%esp)
    .L3:
        movl    28(%esp), %eax
        movl    %eax, (%esp)
        call    printf
        leave
        .cfi_restore 5
        .cfi_def_cfa 4, 4
        ret
        .cfi_endproc
    .LFE0:
        .size   main, .-main
        .ident  "GCC: (Debian 4.7.2-4) 4.7.2"
        .section    .note.GNU-stack,"",@progbits
    

    The relevant part:

    diff

    Basically, __builtin_expect causes extra code (sete %al...) to be executed before the je .L2 based on the outcome of testl %eax, %eax which the CPU is more likely to predict as being 1 (naive assumption, here) instead of based on the direct comparison of the input char with 'c'. Whereas in the nobuiltin.c case, no such code exists and the je/jne directly follows the comparison with 'c' (cmp $99). Remember, branch prediction is mainly done in the CPU, and here GCC is simply "laying a trap" for the CPU branch predictor to assume which path will be taken (via the extra code and the switching of je and jne, though I do not have a source for this, as Intel's official optimization manual does not mention treating first-encounters with je vs jne differently for branch prediction! I can only assume the GCC team arrived at this via trial and error).

    I am sure there are better test cases where GCC's branch prediction can be seen more directly (instead of observing hints to the CPU), though I do not know how to emulate such a case succinctly/concisely. (Guess: it would likely involve loop unrolling during compilation.)

    0 讨论(0)
提交回复
热议问题