Why the output is different
Because all that matters is the sign (positive, negative or zero) of the return value. strcmp()
is not required to return +1 or -1, nor does it have to return consistent values. I suspect that in the first and third case, the compiler optimizes away the call to strcmp()
and puts -1 into the place of the return value. In the second case, I think the function is actually called.
what is the code of strcmp?
Deducing from the fact that it seemingly returns the difference between the character codes of the first differing character, I'd say this is glibc's strcmp()
:
int
strcmp (p1, p2)
const char *p1;
const char *p2;
{
register const unsigned char *s1 = (const unsigned char *) p1;
register const unsigned char *s2 = (const unsigned char *) p2;
unsigned char c1, c2;
do
{
c1 = (unsigned char) *s1++;
c2 = (unsigned char) *s2++;
if (c1 == '\0')
return c1 - c2;
}
while (c1 == c2);
return c1 - c2;
}
Edit: @AndreyT doesn't believe me, so here's the assembly GCC 4.2 generated for me (OS X 10.7.5 64-bit Intel, default optimization level - no flags):
.section __TEXT,__text,regular,pure_instructions
.globl _main
.align 4, 0x90
_main:
Leh_func_begin1:
pushq %rbp
Ltmp0:
movq %rsp, %rbp
Ltmp1:
subq $32, %rsp
Ltmp2:
leaq L_.str(%rip), %rax
movq %rax, -16(%rbp)
leaq L_.str1(%rip), %rax
movq %rax, -24(%rbp)
movl $-1, %ecx ; <- THIS!
xorb %dl, %dl
leaq L_.str2(%rip), %rsi
movq %rsi, %rdi
movl %ecx, %esi
movq %rax, -32(%rbp)
movb %dl, %al
callq _printf ; <- no call to `strcmp()` so far!
movq -16(%rbp), %rax
movq %rax, %rdi
movq -32(%rbp), %rsi
callq _strcmp ; <- strcmp()
movl %eax, %ecx
xorb %dl, %dl
leaq L_.str2(%rip), %rdi
movl %ecx, %esi
movb %dl, %al
callq _printf ; <- printf()
movq -16(%rbp), %rax
movq -24(%rbp), %rcx
movq %rax, %rdi
movq %rcx, %rsi
callq _strcmp ; <- strcmp()
movl %eax, %ecx
xorb %dl, %dl
leaq L_.str2(%rip), %rdi
movl %ecx, %esi
movb %dl, %al
callq _printf ; <- printf()
movl $0, -8(%rbp)
movl -8(%rbp), %eax
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
addq $32, %rsp
popq %rbp
ret
Leh_func_end1:
.section __TEXT,__cstring,cstring_literals
L_.str:
.asciz "a"
L_.str1:
.asciz "d"
L_.str2:
.asciz "%d\n"
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
EH_frame0:
Lsection_eh_frame:
Leh_frame_common:
Lset0 = Leh_frame_common_end-Leh_frame_common_begin
.long Lset0
Leh_frame_common_begin:
.long 0
.byte 1
.asciz "zR"
.byte 1
.byte 120
.byte 16
.byte 1
.byte 16
.byte 12
.byte 7
.byte 8
.byte 144
.byte 1
.align 3
Leh_frame_common_end:
.globl _main.eh
_main.eh:
Lset1 = Leh_frame_end1-Leh_frame_begin1
.long Lset1
Leh_frame_begin1:
Lset2 = Leh_frame_begin1-Leh_frame_common
.long Lset2
Ltmp3:
.quad Leh_func_begin1-Ltmp3
Lset3 = Leh_func_end1-Leh_func_begin1
.quad Lset3
.byte 0
.byte 4
Lset4 = Ltmp0-Leh_func_begin1
.long Lset4
.byte 14
.byte 16
.byte 134
.byte 2
.byte 4
Lset5 = Ltmp1-Ltmp0
.long Lset5
.byte 13
.byte 6
.align 3
Leh_frame_end1:
.subsections_via_symbols
And the original source code:
#include <stdio.h>
#include <string.h>
int main()
{
const char *a = "a";
const char *d = "d";
printf("%d\n", strcmp("a", "d"));
printf("%d\n", strcmp(a, "d"));
printf("%d\n", strcmp(a, d));
return 0;
}
And the output it generated (screenshot for having a better proof):