boost::spirit access position iterator from semantic actions

后端 未结 1 1241
忘了有多久
忘了有多久 2020-12-10 05:04

Lets say I have code like this (line numbers for reference):

1:
2:function FuncName_1 {
3:    var Var_1 = 3;
4:    var  Var_2 = 4;
5:    ...
<
相关标签:
1条回答
  • 2020-12-10 05:56

    This has been a fun exercise, where I finally put together a working demo of on_success[1] to annotate AST nodes.

    Let's assume we want an AST like:

    namespace ast
    {
    struct LocationInfo {
        unsigned line, column, length;
    };
    
    struct Identifier     : LocationInfo {
        std::string name;
    };
    
    struct VarAssignment  : LocationInfo {
        Identifier id;
        int value;
    };
    
    struct SourceCode     : LocationInfo {
        Identifier function;
        std::vector<VarAssignment> assignments;
    };
    }
    

    I know, 'location information' is probably overkill for the SourceCode node, but you know... Anyways, to make it easy to assign attributes to these nodes without requiring semantic actions or lots of specifically crafted constructors:

    #include <boost/fusion/adapted/struct.hpp>
    BOOST_FUSION_ADAPT_STRUCT(ast::Identifier,    (std::string, name))
    BOOST_FUSION_ADAPT_STRUCT(ast::VarAssignment, (ast::Identifier, id)(int, value))
    BOOST_FUSION_ADAPT_STRUCT(ast::SourceCode,    (ast::Identifier, function)(std::vector<ast::VarAssignment>, assignments))
    

    There. Now we can declare the rules to expose these attributes:

    qi::rule<Iterator, ast::SourceCode(),    Skipper> SourceCode;
    qi::rule<Iterator, ast::VarAssignment(), Skipper> VarAssignment;
    qi::rule<Iterator, ast::Identifier()>         Identifier;
    // no skipper, no attributes:
    qi::rule<Iterator> KeywordFunction, KeywordVar, SemiColon;
    

    We don't (essentially) modify the grammar, at all: attribute propagation is "just automatic"[2] :

    KeywordFunction = lit("function");
    KeywordVar      = lit("var");
    SemiColon       = lit(';');
    
    Identifier      = as_string [ alpha >> *(alnum | char_("_")) ];
    VarAssignment   = KeywordVar >> Identifier >> '=' >> int_ >> SemiColon; 
    SourceCode      = KeywordFunction >> Identifier >> '{' >> *VarAssignment >> '}';
    

    The magic

    How do we get the source location information attached to our nodes?

    auto set_location_info = annotate(_val, _1, _3);
    on_success(Identifier,    set_location_info);
    on_success(VarAssignment, set_location_info);
    on_success(SourceCode,    set_location_info);
    

    Now, annotate is just a lazy version of a calleable that is defined as:

    template<typename It>
    struct annotation_f {
        typedef void result_type;
    
        annotation_f(It first) : first(first) {}
        It const first;
    
        template<typename Val, typename First, typename Last>
        void operator()(Val& v, First f, Last l) const {
            do_annotate(v, f, l, first);
        }
      private:
        void static do_annotate(ast::LocationInfo& li, It f, It l, It first) {
            using std::distance;
            li.line   = get_line(f);
            li.column = get_column(first, f);
            li.length = distance(f, l);
        }
        static void do_annotate(...) { }
    };
    

    Due to way in which get_column works, the functor is stateful (as it remembers the start iterator)[3]. As you can see do_annotate just accepts anything that derives from LocationInfo.

    Now, the proof of the pudding:

    std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var  Var_2 = 4; }";
    
    pos_iterator_t first(content.begin()), iter = first, last(content.end());
    ParseGrammar<pos_iterator_t> resolver(first);    //  Our parser
    
    ast::SourceCode program;
    bool ok = phrase_parse(iter,
            last,
            resolver,
            qi::space,
            program);
    
    std::cout << std::boolalpha;
    std::cout << "ok  : " << ok << std::endl;
    std::cout << "full: " << (iter == last) << std::endl;
    if(ok && iter == last)
    {
        std::cout << "OK: Parsing fully succeeded\n\n";
    
        std::cout << "Function name: " << program.function.name << " (see L" << program.printLoc() << ")\n";
        for (auto const& va : program.assignments)
            std::cout << "variable " << va.id.name << " assigned value " << va.value << " at L" << va.printLoc() << "\n";
    }
    else
    {
        int line   = get_line(iter);
        int column = get_column(first, iter);
        std::cout << "-------------------------\n";
        std::cout << "ERROR: Parsing failed or not complete\n";
        std::cout << "stopped at: " << line  << ":" << column << "\n";
        std::cout << "remaining: '" << std::string(iter, last) << "'\n";
        std::cout << "-------------------------\n";
    }
    

    This prints:

    ok  : true
    full: true
    OK: Parsing fully succeeded
    
    Function name: FuncName_1 (see L1:1:56)
    variable Var_1 assigned value 3 at L2:3:14
    variable Var_2 assigned value 4 at L3:3:15
    

    Full Demo Program

    See it Live On Coliru

    Also showing:

    • error handling, e.g.:

      Error: expecting "=" in line 3: 
      
      var  Var_2 - 4; }
                 ^---- here
      ok  : false
      full: false
      -------------------------
      ERROR: Parsing failed or not complete
      stopped at: 1:1
      remaining: 'function FuncName_1 {
      var Var_1 = 3;
      var  Var_2 - 4; }'
      -------------------------
      
    • BOOST_SPIRIT_DEBUG macros

    • A bit of a hacky way to conveniently stream the LocationInfo part of any AST node, sorry :)
    //#define BOOST_SPIRIT_DEBUG
    #define BOOST_SPIRIT_USE_PHOENIX_V3
    #include <boost/fusion/adapted/struct.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/spirit/include/support_line_pos_iterator.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    namespace phx= boost::phoenix;
    
    typedef boost::spirit::line_pos_iterator<std::string::const_iterator> pos_iterator_t;
    
    namespace ast
    {
        namespace manip { struct LocationInfoPrinter; }
    
        struct LocationInfo {
            unsigned line, column, length;
            manip::LocationInfoPrinter printLoc() const;
        };
    
        struct Identifier     : LocationInfo {
            std::string name;
        };
    
        struct VarAssignment  : LocationInfo {
            Identifier id;
            int value;
        };
    
        struct SourceCode     : LocationInfo {
            Identifier function;
            std::vector<VarAssignment> assignments;
        };
    
        ///////////////////////////////////////////////////////////////////////////
        // Completely unnecessary tweak to get a "poor man's" io manipulator going
        // so we can do `std::cout << x.printLoc()` on types of `x` deriving from
        // LocationInfo
        namespace manip {
            struct LocationInfoPrinter {
                LocationInfoPrinter(LocationInfo const& ref) : ref(ref) {}
                LocationInfo const& ref;
                friend std::ostream& operator<<(std::ostream& os, LocationInfoPrinter const& lip) {
                    return os << lip.ref.line << ':' << lip.ref.column << ':' << lip.ref.length;
                }
            };
        }
    
        manip::LocationInfoPrinter LocationInfo::printLoc() const { return { *this }; }
        // feel free to disregard this hack
        ///////////////////////////////////////////////////////////////////////////
    }
    
    BOOST_FUSION_ADAPT_STRUCT(ast::Identifier,    (std::string, name))
    BOOST_FUSION_ADAPT_STRUCT(ast::VarAssignment, (ast::Identifier, id)(int, value))
    BOOST_FUSION_ADAPT_STRUCT(ast::SourceCode,    (ast::Identifier, function)(std::vector<ast::VarAssignment>, assignments))
    
    struct error_handler_f {
        typedef qi::error_handler_result result_type;
        template<typename T1, typename T2, typename T3, typename T4>
            qi::error_handler_result operator()(T1 b, T2 e, T3 where, T4 const& what) const {
                std::cerr << "Error: expecting " << what << " in line " << get_line(where) << ": \n" 
                    << std::string(b,e) << "\n"
                    << std::setw(std::distance(b, where)) << '^' << "---- here\n";
                return qi::fail;
            }
    };
    
    template<typename It>
    struct annotation_f {
        typedef void result_type;
    
        annotation_f(It first) : first(first) {}
        It const first;
    
        template<typename Val, typename First, typename Last>
        void operator()(Val& v, First f, Last l) const {
            do_annotate(v, f, l, first);
        }
      private:
        void static do_annotate(ast::LocationInfo& li, It f, It l, It first) {
            using std::distance;
            li.line   = get_line(f);
            li.column = get_column(first, f);
            li.length = distance(f, l);
        }
        static void do_annotate(...) {}
    };
    
    template<typename Iterator=pos_iterator_t, typename Skipper=qi::space_type>
    struct ParseGrammar: public qi::grammar<Iterator, ast::SourceCode(), Skipper>
    {
        ParseGrammar(Iterator first) : 
            ParseGrammar::base_type(SourceCode),
            annotate(first)
        {
            using namespace qi;
            KeywordFunction = lit("function");
            KeywordVar      = lit("var");
            SemiColon       = lit(';');
    
            Identifier      = as_string [ alpha >> *(alnum | char_("_")) ];
            VarAssignment   = KeywordVar > Identifier > '=' > int_ > SemiColon; // note: expectation points
            SourceCode      = KeywordFunction >> Identifier >> '{' >> *VarAssignment >> '}';
    
            on_error<fail>(VarAssignment, handler(_1, _2, _3, _4));
            on_error<fail>(SourceCode, handler(_1, _2, _3, _4));
    
            auto set_location_info = annotate(_val, _1, _3);
            on_success(Identifier,    set_location_info);
            on_success(VarAssignment, set_location_info);
            on_success(SourceCode,    set_location_info);
    
            BOOST_SPIRIT_DEBUG_NODES((KeywordFunction)(KeywordVar)(SemiColon)(Identifier)(VarAssignment)(SourceCode))
        }
    
        phx::function<error_handler_f> handler;
        phx::function<annotation_f<Iterator>> annotate;
    
        qi::rule<Iterator, ast::SourceCode(),    Skipper> SourceCode;
        qi::rule<Iterator, ast::VarAssignment(), Skipper> VarAssignment;
        qi::rule<Iterator, ast::Identifier()>             Identifier;
        // no skipper, no attributes:
        qi::rule<Iterator> KeywordFunction, KeywordVar, SemiColon;
    };
    
    int main()
    {
        std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var  Var_2 - 4; }";
    
        pos_iterator_t first(content.begin()), iter = first, last(content.end());
        ParseGrammar<pos_iterator_t> resolver(first);    //  Our parser
    
        ast::SourceCode program;
        bool ok = phrase_parse(iter,
                last,
                resolver,
                qi::space,
                program);
    
        std::cout << std::boolalpha;
        std::cout << "ok  : " << ok << std::endl;
        std::cout << "full: " << (iter == last) << std::endl;
        if(ok && iter == last)
        {
            std::cout << "OK: Parsing fully succeeded\n\n";
    
            std::cout << "Function name: " << program.function.name << " (see L" << program.printLoc() << ")\n";
            for (auto const& va : program.assignments)
                std::cout << "variable " << va.id.name << " assigned value " << va.value << " at L" << va.printLoc() << "\n";
        }
        else
        {
            int line   = get_line(iter);
            int column = get_column(first, iter);
            std::cout << "-------------------------\n";
            std::cout << "ERROR: Parsing failed or not complete\n";
            std::cout << "stopped at: " << line  << ":" << column << "\n";
            std::cout << "remaining: '" << std::string(iter, last) << "'\n";
            std::cout << "-------------------------\n";
        }
        return 0;
    }
    

    [1] sadly un(der)documented, except for the conjure sample(s)

    [2] well, I used as_string to get proper assignment to Identifier without too much work

    [3] There could be smarter ways about this in terms of performance, but for now, let's keep it simple

    0 讨论(0)
提交回复
热议问题