Parse (split) a string in C++ using string delimiter (standard C++)

后端 未结 20 2137
时光说笑
时光说笑 2020-11-21 23:44

I am parsing a string in C++ using the following:

using namespace std;

string parsed,input=\"text to be parsed\";
stringstream input_stringstream(input);

i         


        
相关标签:
20条回答
  • 2020-11-22 00:00

    Answer is already there, but selected-answer uses erase function which is very costly, think of some very big string(in MBs). Therefore I use below function.

    vector<string> split(const string& i_str, const string& i_delim)
    {
        vector<string> result;
        
        size_t found = i_str.find(i_delim);
        size_t startIndex = 0;
    
        while(found != string::npos)
        {
            result.push_back(string(i_str.begin()+startIndex, i_str.begin()+found));
            startIndex = found + i_delim.size();
            found = i_str.find(i_delim, startIndex);
        }
        if(startIndex != i_str.size())
            result.push_back(string(i_str.begin()+startIndex, i_str.end()));
        return result;      
    }
    
    0 讨论(0)
  • 2020-11-22 00:02

    This method uses std::string::find without mutating the original string by remembering the beginning and end of the previous substring token.

    #include <iostream>
    #include <string>
    
    int main()
    {
        std::string s = "scott>=tiger";
        std::string delim = ">=";
    
        auto start = 0U;
        auto end = s.find(delim);
        while (end != std::string::npos)
        {
            std::cout << s.substr(start, end - start) << std::endl;
            start = end + delim.length();
            end = s.find(delim, start);
        }
    
        std::cout << s.substr(start, end);
    }
    
    0 讨论(0)
  • 2020-11-22 00:05

    Here's my take on this. It handles the edge cases and takes an optional parameter to remove empty entries from the results.

    bool endsWith(const std::string& s, const std::string& suffix)
    {
        return s.size() >= suffix.size() &&
               s.substr(s.size() - suffix.size()) == suffix;
    }
    
    std::vector<std::string> split(const std::string& s, const std::string& delimiter, const bool& removeEmptyEntries = false)
    {
        std::vector<std::string> tokens;
    
        for (size_t start = 0, end; start < s.length(); start = end + delimiter.length())
        {
             size_t position = s.find(delimiter, start);
             end = position != string::npos ? position : s.length();
    
             std::string token = s.substr(start, end - start);
             if (!removeEmptyEntries || !token.empty())
             {
                 tokens.push_back(token);
             }
        }
    
        if (!removeEmptyEntries &&
            (s.empty() || endsWith(s, delimiter)))
        {
            tokens.push_back("");
        }
    
        return tokens;
    }
    

    Examples

    split("a-b-c", "-"); // [3]("a","b","c")
    
    split("a--c", "-"); // [3]("a","","c")
    
    split("-b-", "-"); // [3]("","b","")
    
    split("--c--", "-"); // [5]("","","c","","")
    
    split("--c--", "-", true); // [1]("c")
    
    split("a", "-"); // [1]("a")
    
    split("", "-"); // [1]("")
    
    split("", "-", true); // [0]()
    
    0 讨论(0)
  • 2020-11-22 00:05

    Function:

    std::vector<std::string> WSJCppCore::split(const std::string& sWhat, const std::string& sDelim) {
        std::vector<std::string> vRet;
        size_t nPos = 0;
        size_t nLen = sWhat.length();
        size_t nDelimLen = sDelim.length();
        while (nPos < nLen) {
            std::size_t nFoundPos = sWhat.find(sDelim, nPos);
            if (nFoundPos != std::string::npos) {
                std::string sToken = sWhat.substr(nPos, nFoundPos - nPos);
                vRet.push_back(sToken);
                nPos = nFoundPos + nDelimLen;
                if (nFoundPos + nDelimLen == nLen) { // last delimiter
                    vRet.push_back("");
                }
            } else {
                std::string sToken = sWhat.substr(nPos, nLen - nPos);
                vRet.push_back(sToken);
                break;
            }
        }
        return vRet;
    }
    

    Unit-tests:

    bool UnitTestSplit::run() {
    bool bTestSuccess = true;
    
        struct LTest {
            LTest(
                const std::string &sStr,
                const std::string &sDelim,
                const std::vector<std::string> &vExpectedVector
            ) {
                this->sStr = sStr;
                this->sDelim = sDelim;
                this->vExpectedVector = vExpectedVector;
            };
            std::string sStr;
            std::string sDelim;
            std::vector<std::string> vExpectedVector;
        };
        std::vector<LTest> tests;
        tests.push_back(LTest("1 2 3 4 5", " ", {"1", "2", "3", "4", "5"}));
        tests.push_back(LTest("|1f|2п|3%^|44354|5kdasjfdre|2", "|", {"", "1f", "2п", "3%^", "44354", "5kdasjfdre", "2"}));
        tests.push_back(LTest("|1f|2п|3%^|44354|5kdasjfdre|", "|", {"", "1f", "2п", "3%^", "44354", "5kdasjfdre", ""}));
        tests.push_back(LTest("some1 => some2 => some3", "=>", {"some1 ", " some2 ", " some3"}));
        tests.push_back(LTest("some1 => some2 => some3 =>", "=>", {"some1 ", " some2 ", " some3 ", ""}));
    
        for (int i = 0; i < tests.size(); i++) {
            LTest test = tests[i];
            std::string sPrefix = "test" + std::to_string(i) + "(\"" + test.sStr + "\")";
            std::vector<std::string> vSplitted = WSJCppCore::split(test.sStr, test.sDelim);
            compareN(bTestSuccess, sPrefix + ": size", vSplitted.size(), test.vExpectedVector.size());
            int nMin = std::min(vSplitted.size(), test.vExpectedVector.size());
            for (int n = 0; n < nMin; n++) {
                compareS(bTestSuccess, sPrefix + ", element: " + std::to_string(n), vSplitted[n], test.vExpectedVector[n]);
            }
        }
    
        return bTestSuccess;
    }
    
    0 讨论(0)
  • 2020-11-22 00:06

    You can also use regex for this:

    std::vector<std::string> split(const std::string str, const std::string regex_str)
    {
        std::regex regexz(regex_str);
        std::vector<std::string> list(std::sregex_token_iterator(str.begin(), str.end(), regexz, -1),
                                      std::sregex_token_iterator());
        return list;
    }
    

    which is equivalent to :

    std::vector<std::string> split(const std::string str, const std::string regex_str)
    {
        std::sregex_token_iterator token_iter(str.begin(), str.end(), regexz, -1);
        std::sregex_token_iterator end;
        std::vector<std::string> list;
        while (token_iter != end)
        {
            list.emplace_back(*token_iter++);
        }
        return list;
    }
    
    

    and use it like this :

    #include <iostream>
    #include <string>
    #include <regex>
    
    std::vector<std::string> split(const std::string str, const std::string regex_str)
    {   // a yet more concise form!
        return { std::sregex_token_iterator(str.begin(), str.end(), std::regex(regex_str), -1), std::sregex_token_iterator() };
    }
    
    int main()
    {
        std::string input_str = "lets split this";
        std::string regex_str = " "; 
        auto tokens = split(input_str, regex_str);
        for (auto& item: tokens)
        {
            std::cout<<item <<std::endl;
        }
    }
    
    

    play with it online! http://cpp.sh/9sumb

    you can simply use substrings, characters,etc like normal, or use actual regular experssions to do the splitting.
    its also concize and C++11!

    0 讨论(0)
  • 2020-11-22 00:06

    A very simple/naive approach:

    vector<string> words_seperate(string s){
        vector<string> ans;
        string w="";
        for(auto i:s){
            if(i==' '){
               ans.push_back(w);
               w="";
            }
            else{
               w+=i;
            }
        }
        ans.push_back(w);
        return ans;
    }
    

    Or you can use boost library split function:

    vector<string> result; 
    boost::split(result, input, boost::is_any_of("\t"));
    

    Or You can try TOKEN or strtok:

    char str[] = "DELIMIT-ME-C++"; 
    char *token = strtok(str, "-"); 
    while (token) 
    { 
        cout<<token; 
        token = strtok(NULL, "-"); 
    } 
    

    Or You can do this:

    char split_with=' ';
    vector<string> words;
    string token; 
    stringstream ss(our_string);
    while(getline(ss , token , split_with)) words.push_back(token);
    
    0 讨论(0)
提交回复
热议问题