Several matches in a one pass parser?

前端 未结 1 1808
梦如初夏
梦如初夏 2021-01-23 07:05

I am trying (yet) to populate several vectors with data parsed from a log. The key is do it as fast and efficient as possible, so I would like to collect all the data in only on

相关标签:
1条回答
  • 2021-01-23 07:49

    First off: I gave you all of that in that answer, under "Separate vectors with a trait". The only difference appears to be the types and the fact that you made LogEvents members global variables (ick).

    On to your question code:

     parse(b, e, *boost::spirit::repository::qi::seek[line], dispatcher());
    

    Why are you passing the dispatcher there? Dispatcher is not a compatible attribute (in fact has only static non-data members).

    So, let's fix it back to a sane data structure (instead of global variables):

    struct ParsedData
    {
        std::vector<Location> _locations;
        std::vector<Event> _events;
        void add(const Location& loc) { _locations.push_back(loc); }
        void add(const Event& ev)     { _events.push_back(ev);  }
    };
    

    Note that the containers aren't global any more and they have proper names.

    The boost::spirit::traits specializations are the same (mutatis mutandis) except that we now have a data instance, so we bind it (again, as in the original example linked above, line 52, so let's fix the usage:

    ParsedData data;
    parse(b, e, *boost::spirit::repository::qi::seek[line], data);
    return data;
    

    From here, it all worked.

    Further Cleanup And Demo

    Notes:

    • there is no reason to use raw char arrays and strlen in C++ (I used std::string)
    • there is no reason to duplicate all the code and name everything _1 or _2. I made main:

      int main() {
          do_test("TEST 1", input1, parse_test_1);
          do_test("TEST 2", input2, parse_test_2);
      }
      
    • there is no reason to use for_each with a lambda where a ranged-for would suffice. This is do_test:

      void do_test(std::string caption, std::string const& input, ParsedData(*f)(It,It)) {
          ParsedData const data = f(input.begin(), input.end());
          std::cout << caption << ":\n";
          std::cout << "Locations:\n";
          for (Location const& loc : data._locations) {
              std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
          }
      
          std::cout << "Events:\n";
          for (Event const& ev : data._events) {
              std::cout << " EVENT(" << ev.event << ") : " << ev.value << std::endl;
          }
      }
      
    • I dropped the time member from Event since it was unused.

    Full Listing

    Live On Coliru

    #include <boost/fusion/adapted/struct.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/repository/include/qi_seek.hpp>
    #include <boost/phoenix/phoenix.hpp>
    #include <cstring> // strlen
    
    typedef std::string::const_iterator It;
    enum kind { SLOPE, GEAR };
    
    struct Location {
        int driver;
        double time;
        double vel;
        double km;
        std::string date;
        std::string road;
    };
    
    struct Event {
        int event;
        double value;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km)
    BOOST_FUSION_ADAPT_STRUCT(Event, event, value)
    
    struct ParsedData {
        std::vector<Location> _locations;
        std::vector<Event> _events;
        void add(const Location& loc) { _locations.push_back(loc); }
        void add(const Event& ev)     { _events.push_back(ev);  }
    };
    
    namespace qi = boost::spirit::qi;
    namespace px = boost::phoenix;
    
    namespace boost { namespace spirit { namespace traits {
        template <> struct is_container<ParsedData> : std::true_type {};
        template <> struct container_value<ParsedData> { typedef boost::variant<Location, Event> type; };
    
        template <typename T> struct push_back_container<ParsedData, T> {
            struct Visitor {
                ParsedData &data;
                typedef void result_type;
                template <typename U> void operator()(U const &ev) const { data.add(ev); }
            };
    
            static bool call(ParsedData &log, T const &attribute) {
                boost::apply_visitor(Visitor{ log }, attribute);
                return true;
            }
        };
    } } } // namespace boost::spirit::traits
    
    ParsedData parse_test_1(It b, It e) {
        using namespace qi;
    
        auto date = copy(
            repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
            repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);
    
        qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
        qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;
    
        qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
            >> double_ >> " s"
            >> " => Driver: "  >> int_
            >> " - Speed: "    >> double_
            >> " - Road: "     >> raw[+graph]
            >> " - Km: "       >> double_
            >> -(slope | gear)
            >> (eol | eoi);
    
        ParsedData data;
        parse(b, e, *boost::spirit::repository::qi::seek[line], data);
        return data;
    }
    
    ParsedData parse_test_2(It b, It e) {
        using namespace qi;
    
        auto date = copy(
            repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
            repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);
    
        qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
        qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;
    
        qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
            >> double_ >> " s"
            >> " => Driver: "  >> int_
            >> " - Speed: "    >> double_
            >> " - Road: "     >> raw[+graph]
            >> " - Km: "       >> double_
            >> -(slope | gear)
            >> (eol | eoi);
    
        ParsedData data;
        parse(b, e, *line, data);
        return data;
    }
    
    //Not all the lines will match the parser!
    static std::string const input1 = 
    "[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
    [2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
    [2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
    [2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";
    
    //All the lines shall match the parser!
    static std::string const input2 = 
    "[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
    [2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
    [2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";
    
    void do_test(std::string caption, std::string const& input, ParsedData(*f)(It,It)) {
        ParsedData const data = f(input.begin(), input.end());
        std::cout << caption << ":\n";
        std::cout << "Locations:\n";
        for (Location const& loc : data._locations) {
            std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
        }
    
        std::cout << "Events:\n";
        for (Event const& ev : data._events) {
            std::cout << " EVENT(" << ev.event << ") : " << ev.value << std::endl;
        }
    }
    
    int main() {
        do_test("TEST 1", input1, parse_test_1);
        do_test("TEST 2", input2, parse_test_2);
    }
    

    Further Observations:

    1. It is unclear to me when you'd expect the Event rules (slope/gear) to match or synthesize an attribute. It's also unclear to me why those would be optional (the location part of a line cannot possibly match without that part).

    2. Also, the natural attribute exposed by a rule like

      qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
          >> double_ >> " s"
          >> " => Driver: "  >> int_
          >> " - Speed: "    >> double_
          >> " - Road: "     >> raw[+graph]
          >> " - Km: "       >> double_
          >> -(slope | gear)
          >> (eol | eoi);
      

      Would have Location contain an extra field:

      struct Location {
          int driver;
          double time;
          double vel;
          double km;
          std::string date;
          std::string road;
          boost::optional<Event> event;
      };
      
      BOOST_FUSION_ADAPT_STRUCT(Event, event, value)
      BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km, event)
      
    3. These rules are odd:

      qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
      qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;
      

      Why not use the symbols approach exactly as I showed in the linked answer (line 57/98)? If you insist on doing it "clumsy", do not use semantic actions (Boost Spirit: "Semantic actions are evil"?) but use qi::attr:

      qi::rule<It, Event()> slope = " - SLOPE: " >> attr(kind::SLOPE) >> double_;
      qi::rule<It, Event()> gear = " - GEAR: " >> attr(kind::GEAR) >> double_;
      

      Among the helpful effects are that your compilation times can be cut in half, and also the attribute values actually propagate (your semantic actions had no effect at all, and actively suppressed automatic attribute propagation...).

    With these improvements in place we get:

    Live On Coliru

    #include <boost/fusion/adapted/struct.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/repository/include/qi_seek.hpp>
    
    typedef std::string::const_iterator It;
    enum kind { SLOPE, GEAR };
    
    struct Event {
        int event;
        double value;
    };
    
    struct Location {
        int driver;
        double time;
        double vel;
        double km;
        std::string date;
        std::string road;
        boost::optional<Event> event;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(Event, event, value)
    BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km, event)
    
    using ParsedData = std::vector<Location>;
    
    namespace qi = boost::spirit::qi;
    namespace px = boost::phoenix;
    
    ParsedData parse_test(It b, It e) {
        using namespace qi;
    
        auto date = copy(
            repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
            repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);
    
        qi::rule<It, Event()> slope = " - SLOPE: " >> attr(kind::SLOPE) >> double_;
        qi::rule<It, Event()> gear = " - GEAR: " >> attr(kind::GEAR) >> double_;
    
        qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
            >> double_ >> " s"
            >> " => Driver: "  >> int_
            >> " - Speed: "    >> double_
            >> " - Road: "     >> raw[+graph]
            >> " - Km: "       >> double_
            >> -(slope | gear)
            >> (eol | eoi);
    
        ParsedData data;
        parse(b, e, *boost::spirit::repository::qi::seek[line], data);
        return data;
    }
    
    //Not all the lines will match the parser!
    static std::string const input = 
    "[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
    [2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
    [2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
    [2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
    [2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";
    
    int main() {
        auto parsed = parse_test(input.begin(), input.end());
        std::cout << "Locations:\n";
        for (Location const& loc : parsed) {
            std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
            if (loc.event)
                std::cout << " - event: " << loc.event->event << " value: " << loc.event->value << "\n";
        }
    }
    

    Printing

    Locations:
    [2018-Mar-13 13:13:59.580482] - 0.2 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
     - event: 0 value: 5.5
    [2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
     - event: 1 value: 1
    [2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
    [2018-Mar-13 13:14:01.1702032018-Mar-13 13:14:01.819966] - 2.44 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
    [2018-Mar-13 13:14:01.1702032018-Mar-13 13:15:01.819966] - 3.44 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90
     - event: 0 value: 10
    
    0 讨论(0)
提交回复
热议问题