A string tokenizer in C++ that allows multiple separators

后端 未结 3 706
陌清茗
陌清茗 2021-02-15 16:45

Is there a way to tokenize a string in C++ with multiple separators? In C# I would have done:

string[] tokens = \"adsl, dkks; dk\".Split(new [] { \",\", \" \", \         


        
3条回答
  •  我在风中等你
    2021-02-15 17:17

    Here is my version (not heavily tested (yet)):

    std::vector split(std::string const& s,
        std::vector const& delims)
    {
        std::vector parts;
    
        std::vector> poss;
        poss.reserve(delims.size());
    
        std::string::size_type beg = 0;
    
        for(;;)
        {
            poss.clear();
    
            std::string::size_type idx = 0;
            for(auto const& delim: delims)
            {
                if(auto end = s.find(delim, beg) + 1)
                    poss.emplace_back(end - 1, idx);
                ++idx;
            }
    
            if(poss.empty())
                break;
    
            std::sort(std::begin(poss), std::end(poss));
    
            auto old_beg = beg;
    
            for(auto pos: poss)
            {
                parts.emplace_back(std::begin(s) + beg,
                    std::begin(s) + old_beg + pos.first);
                beg = pos.first + delims[pos.second].size();
            }
        }
    
        if(beg < s.size())
            parts.emplace_back(std::begin(s) + beg, std::end(s));
    
        return parts;
    }
    

提交回复
热议问题