Go - How to create a parser

后端 未结 6 902
醉话见心
醉话见心 2020-12-22 23:16

I want to build a parser but have some problems understanding how to do this.

Sample string I would like to parse

{key1 = value1 | key2 = {key3 = val         


        
相关标签:
6条回答
  • 2020-12-22 23:36

    If you are willing to convert your input to a standard JSON format, why create a parser when there are Go libraries that do the heavy lifting for you?

    Given the following input file (/Users/lex/dev/go/data/jsoncfgo/fritjof.json):

    Input File

    {
       "key1": "value1",
       "key2" :  {
          "key3": "value3"
       },
       "key4": {
          "key5": {
             "key6": "value6"
          }
       }
    }
    

    Code Example

    package main
    
    import (
        "fmt"
        "log"
        "github.com/l3x/jsoncfgo"
    )
    
    
    func main() {
    
        configPath := "/Users/lex/dev/go/data/jsoncfgo/fritjof.json"
        cfg, err := jsoncfgo.ReadFile(configPath)
        if err != nil {
            log.Fatal(err.Error())  // Handle error here
        }
    
        key1 := cfg.RequiredString("key1")
        fmt.Printf("key1: %v\n\n", key1)
    
        key2 := cfg.OptionalObject("key2")
        fmt.Printf("key2: %v\n\n", key2)
    
        key4 := cfg.OptionalObject("key4")
        fmt.Printf("key4: %v\n\n", key4)
    
        if err := cfg.Validate(); err != nil {
            defer log.Fatalf("ERROR - Invalid config file...\n%v", err)
            return
        }
    }
    

    Output

    key1: value1
    
    key2: map[key3:value3]
    
    key4: map[key5:map[key6:value6]]
    

    Notes

    jsoncfgo can handle any level of nested JSON objects.

    For details see:

    • http://l3x.github.io/golang-code-examples/2014/07/24/jsoncfgo-config-file-reader.html
    • http://l3x.github.io/golang-code-examples/2014/07/25/jsoncfgo-config-file-reader-advanced.html
    0 讨论(0)
  • 2020-12-22 23:43

    Be advised that, with Go 1.8 (currently in beta in Q4 2016, released in Q1 2017)

    The yacc tool (previously available by running “go tool yacc”) has been removed.
    As of Go 1.7 it was no longer used by the Go compiler.

    It has moved to the “tools” repository and is now available at golang.org/x/tools/cmd/goyacc.

    0 讨论(0)
  • 2020-12-22 23:54

    Would you like try to parsec for golang edition? I write a rune(for unicode) fork of goparsec(https://github.com/sanyaade-buildtools/goparsec) what is https://github.com/Dwarfartisan/goparsec .

    Haskell parsec is a power tools for make parser. The first perl6 parser named pugs was written by it. My golang Edition is not simple than yacc, but it is easier than yacc.

    For this example, I wrote code as this:

    parser.go

    package main
    
    import (
        "fmt"
        psc "github.com/Dwarfartisan/goparsec"
    )
    
    type kv struct {
        key   string
        value interface{}
    }
    
    var tchar = psc.NoneOf("|{}= ")
    
    func escaped(st psc.ParseState) (interface{}, error) {
        _, err := psc.Try(psc.Rune('\\'))(st)
        if err == nil {
            r, err := psc.AnyRune(st)
            if err == nil {
                switch r.(rune) {
                case 't':
                    return '\t', nil
                case '"':
                    return '"', nil
                case 'n':
                    return '\n', nil
                case '\\':
                    return '\\', nil
                default:
                    return nil, st.Trap("Unknown escape \\%r", r)
                }
            } else {
                return nil, err
            }
        } else {
            return psc.NoneOf("\"")(st)
        }
    }
    
    var token = psc.Either(
        psc.Between(psc.Rune('"'), psc.Rune('"'),
            psc.Try(psc.Bind(psc.Many1(escaped), psc.ReturnString))),
        psc.Bind(psc.Many1(tchar), psc.ReturnString))
    
    // rune with skip spaces
    func syms(r rune) psc.Parser {
        return func(st psc.ParseState) (interface{}, error) {
            _, err := psc.Bind_(psc.Bind_(psc.Many(psc.Space), psc.Rune(r)), psc.Many(psc.Space))(st)
            if err == nil {
                return r, nil
            } else {
                return nil, err
            }
        }
    }
    
    var lbracket = syms('{')
    var rbracket = syms('}')
    var eql = syms('=')
    var vbar = syms('|')
    
    func pair(st psc.ParseState) (interface{}, error) {
        left, err := token(st)
        if err != nil {
            return nil, err
        }
    
        right, err := psc.Bind_(eql, psc.Either(psc.Try(token), mapExpr))(st)
        if err != nil {
            return nil, err
        }
        return kv{left.(string), right}, nil
    }
    func pairs(st psc.ParseState) (interface{}, error) {
        return psc.SepBy1(pair, vbar)(st)
    }
    func mapExpr(st psc.ParseState) (interface{}, error) {
        p, err := psc.Try(psc.Between(lbracket, rbracket, pair))(st)
        if err == nil {
            return p, nil
        }
        ps, err := psc.Between(lbracket, rbracket, pairs)(st)
        if err == nil {
            return ps, nil
        } else {
            return nil, err
        }
    }
    
    func makeMap(data interface{}) interface{} {
        ret := make(map[string]interface{})
        switch val := data.(type) {
        case kv:
            ret[val.key] = makeMap(val.value)
        case string:
            return data
        case []interface{}:
            for _, item := range val {
                it := item.(kv)
                ret[it.key] = makeMap(it.value)
            }
        }
        return ret
    }
    
    func main() {
        input := `{key1 = "\"value1\"\n" | key2 = { key3 = 10 } | key4 = {key5 = { key6 = value6}}}`
        st := psc.MemoryParseState(input)
        ret, err := mapExpr(makeMap(st))
        if err == nil {
            fmt.Println(ret)
        } else {
            fmt.Println(err)
        }
    }
    

    RUN

    go run parser.go
    

    OUTPUT

    map[key1:"value1"
      key2:map[key3:10] key4:map[key5:map[key6:value6]]]
    

    This demo include escape, token, string and key/value map. You can create a parser as package or application.

    0 讨论(0)
  • 2020-12-22 23:59

    That particular format is very similar to json. You could use the following code to leverage that similarity:

        var txt = `{key1 = "\"value1\"\n" | key2 = { key3 = 10 } | key4 = {key5 = { key6 = value6}}}`
        var s scanner.Scanner
        s.Init(strings.NewReader(txt))
        var b []byte
    
    loop:
        for {
            switch tok := s.Scan(); tok {
            case scanner.EOF:
                break loop
            case '|':
                b = append(b, ',')
            case '=':
                b = append(b, ':')
            case scanner.Ident:
                b = append(b, strconv.Quote(s.TokenText())...)
            default:
                b = append(b, s.TokenText()...)
            }
        }
    
        var m map[string]interface{}
        err := json.Unmarshal(b, &m)
        if err != nil {
            // handle error
        }
    
        fmt.Printf("%#v\n",m)
    
    0 讨论(0)
  • 2020-12-23 00:01

    Writing a parser is a complicated topic that is too big to cover in a single answer.

    Rob Pike gave an excellent talk that walks through writing a lexer (which is a half of the parser) in Go: http://www.youtube.com/watch?v=HxaD_trXwRE

    You should also look at e.g. parser code in Go standard library for an example on how to do it: http://golang.org/src/pkg/go/parser/parser.go

    There's also plenty resources on parsing on the internet. They might have examples in other languages but it's just a matter of translating the syntax to Go.

    I recommend reading up on recursive descent parsing (e.g. http://www.cs.binghamton.edu/~zdu/parsdemo/recintro.html) or top down parsing (e.g. http://javascript.crockford.com/tdop/tdop.html, http://effbot.org/zone/simple-top-down-parsing.htm).

    0 讨论(0)
  • 2020-12-23 00:02

    What about using the standard goyacc tool? Here is a skeleton:

    main.y

    %{
    package main
    
    import (
        "fmt"
        "log"
    )
    %}
    
    %union{
        tok int
        val interface{}
        pair struct{key, val interface{}}
        pairs map[interface{}]interface{}
    }
    
    %token KEY
    %token VAL
    
    %type <val> KEY VAL
    %type <pair> pair
    %type <pairs> pairs
    
    %%
    
    goal:
        '{' pairs '}'
        {
            yylex.(*lex).m = $2
        }
    
    pairs:
        pair
        {
            $$ = map[interface{}]interface{}{$1.key: $1.val}
        }
    |   pairs '|' pair
        {
            $$[$3.key] = $3.val
        }
    
    pair:
        KEY '=' VAL
        {
            $$.key, $$.val = $1, $3
        }
    |   KEY '=' '{' pairs '}'
        {
            $$.key, $$.val = $1, $4
        }
    
    
    %%
    
    type token struct {
        tok int
        val interface{}
    }
    
    type lex struct {
        tokens []token
        m map[interface{}]interface{}
    }
    
    func (l *lex) Lex(lval *yySymType) int {
        if len(l.tokens) == 0 {
            return 0
        }
    
        v := l.tokens[0]
        l.tokens = l.tokens[1:]
        lval.val = v.val
        return v.tok
    }
    
    func (l *lex) Error(e string) {
        log.Fatal(e)
    }
    
    func main() {
        l := &lex{
            // {key1 = value1 | key2 = {key3 = value3} | key4 = {key5 = { key6 = value6 }}}
            []token{
                {'{', ""},
                {KEY, "key1"},
                {'=', ""},
                {VAL, "value1"},
                {'|', ""},
                {KEY, "key2"},
                {'=', ""}, 
                {'{', ""},
                {KEY, "key3"},
                {'=', ""},
                {VAL, "value3"},
                {'}', ""},
                {'|', ""},
                {KEY, "key4"},
                {'=', ""},
                {'{', ""},
                {KEY, "key5"},
                {'=', ""},
                {'{', ""},
                {KEY, "key6"},
                {'=', ""},
                {VAL, "value6"},
                {'}', ""},
                {'}', ""},
                {'}', ""},
            },
            map[interface{}]interface{}{},
        }
        yyParse(l)
        fmt.Println(l.m)
    }
    

    Output

    $ go tool yacc -o main.go main.y && go run main.go
    map[key4:map[key5:map[key6:value6]] key1:value1 key2:map[key3:value3]]
    $ 
    
    0 讨论(0)
提交回复
热议问题