How can I implement a string data type in LLVM?

后端 未结 5 1854
眼角桃花
眼角桃花 2021-01-31 10:31

I have been looking at LLVM lately, and I find it to be quite an interesting architecture. However, looking through the tutorial and the reference material, I can\'t see any ex

5条回答
  •  挽巷
    挽巷 (楼主)
    2021-01-31 11:23

    What is a string? An array of characters.

    What is a character? An integer.

    So while I'm no LLVM expert by any means, I would guess that if, eg, you wanted to represent some 8-bit character set, you'd use an array of i8 (8-bit integers), or a pointer to i8. And indeed, if we have a simple hello world C program:

    #include 
    
    int main() {
            puts("Hello, world!");
            return 0;
    }
    

    And we compile it using llvm-gcc and dump the generated LLVM assembly:

    $ llvm-gcc -S -emit-llvm hello.c
    $ cat hello.s
    ; ModuleID = 'hello.c'
    target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
    target triple = "x86_64-linux-gnu"
    @.str = internal constant [14 x i8] c"Hello, world!\00"         ; <[14 x i8]*> [#uses=1]
    
    define i32 @main() {
    entry:
            %retval = alloca i32            ;  [#uses=2]
            %tmp = alloca i32               ;  [#uses=2]
            %"alloca point" = bitcast i32 0 to i32          ;  [#uses=0]
            %tmp1 = getelementptr [14 x i8]* @.str, i32 0, i64 0            ;  [#uses=1]
            %tmp2 = call i32 @puts( i8* %tmp1 ) nounwind            ;  [#uses=0]
            store i32 0, i32* %tmp, align 4
            %tmp3 = load i32* %tmp, align 4         ;  [#uses=1]
            store i32 %tmp3, i32* %retval, align 4
            br label %return
    
    return:         ; preds = %entry
            %retval4 = load i32* %retval            ;  [#uses=1]
            ret i32 %retval4
    }
    
    declare i32 @puts(i8*)
    

    Notice the reference to the puts function declared at the end of the file. In C, puts is

    int puts(const char *s)
    

    In LLVM, it is

    i32 @puts(i8*)
    

    The correspondence should be clear.

    As an aside, the generated LLVM is very verbose here because I compiled without optimizations. If you turn those on, the unnecessary instructions disappear:

    $ llvm-gcc -O2 -S -emit-llvm hello.c
    $ cat hello.s 
    ; ModuleID = 'hello.c'
    target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
    target triple = "x86_64-linux-gnu"
    @.str = internal constant [14 x i8] c"Hello, world!\00"         ; <[14 x i8]*> [#uses=1]
    
    define i32 @main() nounwind  {
    entry:
            %tmp2 = tail call i32 @puts( i8* getelementptr ([14 x i8]* @.str, i32 0, i64 0) ) nounwind              ;  [#uses=0]
            ret i32 0
    }
    
    declare i32 @puts(i8*)
    

提交回复
热议问题