Performance issue with parser written with Boost::spirit

前端 未结 4 760
后悔当初
后悔当初 2021-02-10 14:22

I want to parse a file that looks like this (FASTA-like text format):

    >InfoHeader
    \"Some text sequence that h         


        
4条回答
  •  情书的邮戳
    2021-02-10 14:51

    Previous: Step 1. Cleaning up + Profiling
    Next: Step 3: MOAR FASTER WITH ZERO-COPY

    Step 2. Faster with mmap

    Live On Coliru

    #include 
    
    namespace fs = boost::filesystem;
    
    
    class FastaReader {
    
    public:
        typedef std::pair Entry;
        typedef std::vector Data;
    
    private:
        Data fV;
        fs::path file;  
    
    public:
        FastaReader(const fs::path & f);
        ~FastaReader();
    
        const fs::path & getFile() const;
        const Data::const_iterator begin() const;
        const Data::const_iterator end() const;   
    
    private:
        void parse();
    
    };
    
    #include 
    #include 
    #include 
    #include 
    #include 
    
    #include 
    #include 
    #include 
    //#include "fastaReader.hpp"
    
    #include 
    
    using namespace std;
    
    namespace fs = boost::filesystem;
    namespace qi = boost::spirit::qi;
    namespace pt = boost::posix_time;
    namespace io = boost::iostreams;
    
    template 
    struct FastaGrammar : qi::grammar {
        qi::rule fasta;
    
        FastaGrammar() : FastaGrammar::base_type(fasta) {
            using namespace qi;
    
            fasta = *('>' >> *~char_('\n') >> '\n' 
                          >> *~char_('>')) 
                    >> *eol
                    >> eoi
                    ;
    
            BOOST_SPIRIT_DEBUG_NODES((fasta));
        }
    };
    
    
    FastaReader::FastaReader(const fs::path & f) : file(f) {
        parse();
    }
    
    FastaReader::~FastaReader() {}
    
    const fs::path & FastaReader::getFile() const {
        return this->file;
    }
    
    
    const FastaReader::Data::const_iterator FastaReader::begin() const {
        return this->fV.cbegin();
    }
    
    
    const FastaReader::Data::const_iterator FastaReader::end() const {
        return this->fV.cend();
    }
    
    void FastaReader::parse() {
        if (this->file.empty())                throw std::runtime_error("FastaReader: No file specified.");
        if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
    
        typedef char const*                                               iterator_type;
        typedef boost::spirit::classic::position_iterator2 pos_iterator_type;
        typedef FastaGrammar                           fastaGr;
    
        io::mapped_file_source mmap(file.c_str());
    
        static const fastaGr fG{};
        try {
            std::cerr << "Measuring: Parsing." << std::endl;
            const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
    
            pos_iterator_type first(iterator_type{mmap.data()}, iterator_type{mmap.end()}, file.string());
            qi::phrase_parse(first, {}, fG, boost::spirit::ascii::space, this->fV);
    
            const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
            pt::time_duration duration (endMeasurement - startMeasurement);
            std::cerr << duration <<  std::endl;
        } catch (std::exception const& e) {
            cerr << "error message: " << e.what() << endl;
        }   
    }
    
    int main() {
        FastaReader reader("input.txt");
    
        //for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
    }
    

    Indeed on my system it's roughly 3x faster (input is 229 MiB):

    $ ./mapped_file_source
    Measuring: Parsing.
    00:00:07.385787
    

    Next: Step 3: MOAR FASTER WITH ZERO-COPY

提交回复
热议问题