I want to build a parser but have some problems understanding how to do this.
Sample string I would like to parse
{key1 = value1 | key2 = {key3 = val
If you are willing to convert your input to a standard JSON format, why create a parser when there are Go libraries that do the heavy lifting for you?
Given the following input file (/Users/lex/dev/go/data/jsoncfgo/fritjof.json):
Input File
{
"key1": "value1",
"key2" : {
"key3": "value3"
},
"key4": {
"key5": {
"key6": "value6"
}
}
}
Code Example
package main
import (
"fmt"
"log"
"github.com/l3x/jsoncfgo"
)
func main() {
configPath := "/Users/lex/dev/go/data/jsoncfgo/fritjof.json"
cfg, err := jsoncfgo.ReadFile(configPath)
if err != nil {
log.Fatal(err.Error()) // Handle error here
}
key1 := cfg.RequiredString("key1")
fmt.Printf("key1: %v\n\n", key1)
key2 := cfg.OptionalObject("key2")
fmt.Printf("key2: %v\n\n", key2)
key4 := cfg.OptionalObject("key4")
fmt.Printf("key4: %v\n\n", key4)
if err := cfg.Validate(); err != nil {
defer log.Fatalf("ERROR - Invalid config file...\n%v", err)
return
}
}
Output
key1: value1
key2: map[key3:value3]
key4: map[key5:map[key6:value6]]
Notes
jsoncfgo can handle any level of nested JSON objects.
For details see:
Be advised that, with Go 1.8 (currently in beta in Q4 2016, released in Q1 2017)
The
yacc
tool (previously available by running “go tool yacc
”) has been removed.
As of Go 1.7 it was no longer used by the Go compiler.It has moved to the “
tools
” repository and is now available at golang.org/x/tools/cmd/goyacc.
Would you like try to parsec for golang edition? I write a rune(for unicode) fork of goparsec(https://github.com/sanyaade-buildtools/goparsec) what is https://github.com/Dwarfartisan/goparsec .
Haskell parsec is a power tools for make parser. The first perl6 parser named pugs was written by it. My golang Edition is not simple than yacc, but it is easier than yacc.
For this example, I wrote code as this:
package main
import (
"fmt"
psc "github.com/Dwarfartisan/goparsec"
)
type kv struct {
key string
value interface{}
}
var tchar = psc.NoneOf("|{}= ")
func escaped(st psc.ParseState) (interface{}, error) {
_, err := psc.Try(psc.Rune('\\'))(st)
if err == nil {
r, err := psc.AnyRune(st)
if err == nil {
switch r.(rune) {
case 't':
return '\t', nil
case '"':
return '"', nil
case 'n':
return '\n', nil
case '\\':
return '\\', nil
default:
return nil, st.Trap("Unknown escape \\%r", r)
}
} else {
return nil, err
}
} else {
return psc.NoneOf("\"")(st)
}
}
var token = psc.Either(
psc.Between(psc.Rune('"'), psc.Rune('"'),
psc.Try(psc.Bind(psc.Many1(escaped), psc.ReturnString))),
psc.Bind(psc.Many1(tchar), psc.ReturnString))
// rune with skip spaces
func syms(r rune) psc.Parser {
return func(st psc.ParseState) (interface{}, error) {
_, err := psc.Bind_(psc.Bind_(psc.Many(psc.Space), psc.Rune(r)), psc.Many(psc.Space))(st)
if err == nil {
return r, nil
} else {
return nil, err
}
}
}
var lbracket = syms('{')
var rbracket = syms('}')
var eql = syms('=')
var vbar = syms('|')
func pair(st psc.ParseState) (interface{}, error) {
left, err := token(st)
if err != nil {
return nil, err
}
right, err := psc.Bind_(eql, psc.Either(psc.Try(token), mapExpr))(st)
if err != nil {
return nil, err
}
return kv{left.(string), right}, nil
}
func pairs(st psc.ParseState) (interface{}, error) {
return psc.SepBy1(pair, vbar)(st)
}
func mapExpr(st psc.ParseState) (interface{}, error) {
p, err := psc.Try(psc.Between(lbracket, rbracket, pair))(st)
if err == nil {
return p, nil
}
ps, err := psc.Between(lbracket, rbracket, pairs)(st)
if err == nil {
return ps, nil
} else {
return nil, err
}
}
func makeMap(data interface{}) interface{} {
ret := make(map[string]interface{})
switch val := data.(type) {
case kv:
ret[val.key] = makeMap(val.value)
case string:
return data
case []interface{}:
for _, item := range val {
it := item.(kv)
ret[it.key] = makeMap(it.value)
}
}
return ret
}
func main() {
input := `{key1 = "\"value1\"\n" | key2 = { key3 = 10 } | key4 = {key5 = { key6 = value6}}}`
st := psc.MemoryParseState(input)
ret, err := mapExpr(makeMap(st))
if err == nil {
fmt.Println(ret)
} else {
fmt.Println(err)
}
}
go run parser.go
map[key1:"value1"
key2:map[key3:10] key4:map[key5:map[key6:value6]]]
This demo include escape, token, string and key/value map. You can create a parser as package or application.
That particular format is very similar to json. You could use the following code to leverage that similarity:
var txt = `{key1 = "\"value1\"\n" | key2 = { key3 = 10 } | key4 = {key5 = { key6 = value6}}}`
var s scanner.Scanner
s.Init(strings.NewReader(txt))
var b []byte
loop:
for {
switch tok := s.Scan(); tok {
case scanner.EOF:
break loop
case '|':
b = append(b, ',')
case '=':
b = append(b, ':')
case scanner.Ident:
b = append(b, strconv.Quote(s.TokenText())...)
default:
b = append(b, s.TokenText()...)
}
}
var m map[string]interface{}
err := json.Unmarshal(b, &m)
if err != nil {
// handle error
}
fmt.Printf("%#v\n",m)
Writing a parser is a complicated topic that is too big to cover in a single answer.
Rob Pike gave an excellent talk that walks through writing a lexer (which is a half of the parser) in Go: http://www.youtube.com/watch?v=HxaD_trXwRE
You should also look at e.g. parser code in Go standard library for an example on how to do it: http://golang.org/src/pkg/go/parser/parser.go
There's also plenty resources on parsing on the internet. They might have examples in other languages but it's just a matter of translating the syntax to Go.
I recommend reading up on recursive descent parsing (e.g. http://www.cs.binghamton.edu/~zdu/parsdemo/recintro.html) or top down parsing (e.g. http://javascript.crockford.com/tdop/tdop.html, http://effbot.org/zone/simple-top-down-parsing.htm).
What about using the standard goyacc tool? Here is a skeleton:
%{
package main
import (
"fmt"
"log"
)
%}
%union{
tok int
val interface{}
pair struct{key, val interface{}}
pairs map[interface{}]interface{}
}
%token KEY
%token VAL
%type <val> KEY VAL
%type <pair> pair
%type <pairs> pairs
%%
goal:
'{' pairs '}'
{
yylex.(*lex).m = $2
}
pairs:
pair
{
$$ = map[interface{}]interface{}{$1.key: $1.val}
}
| pairs '|' pair
{
$$[$3.key] = $3.val
}
pair:
KEY '=' VAL
{
$$.key, $$.val = $1, $3
}
| KEY '=' '{' pairs '}'
{
$$.key, $$.val = $1, $4
}
%%
type token struct {
tok int
val interface{}
}
type lex struct {
tokens []token
m map[interface{}]interface{}
}
func (l *lex) Lex(lval *yySymType) int {
if len(l.tokens) == 0 {
return 0
}
v := l.tokens[0]
l.tokens = l.tokens[1:]
lval.val = v.val
return v.tok
}
func (l *lex) Error(e string) {
log.Fatal(e)
}
func main() {
l := &lex{
// {key1 = value1 | key2 = {key3 = value3} | key4 = {key5 = { key6 = value6 }}}
[]token{
{'{', ""},
{KEY, "key1"},
{'=', ""},
{VAL, "value1"},
{'|', ""},
{KEY, "key2"},
{'=', ""},
{'{', ""},
{KEY, "key3"},
{'=', ""},
{VAL, "value3"},
{'}', ""},
{'|', ""},
{KEY, "key4"},
{'=', ""},
{'{', ""},
{KEY, "key5"},
{'=', ""},
{'{', ""},
{KEY, "key6"},
{'=', ""},
{VAL, "value6"},
{'}', ""},
{'}', ""},
{'}', ""},
},
map[interface{}]interface{}{},
}
yyParse(l)
fmt.Println(l.m)
}
$ go tool yacc -o main.go main.y && go run main.go
map[key4:map[key5:map[key6:value6]] key1:value1 key2:map[key3:value3]]
$