Parse string into array based on spaces or “double quotes strings”

前端 未结 4 625
庸人自扰
庸人自扰 2021-01-21 09:29

Im trying to take a user input string and parse is into an array called char *entire_line[100]; where each word is put at a different index of the array but if a part of the str

4条回答
  •  南笙
    南笙 (楼主)
    2021-01-21 09:59

    I wrote a qtok function some time ago that reads quoted words from a string. It's not a state machine and it doesn't make you an array but it's trivial to put the resulting tokens into one. It also handles escaped quotes and trailing and leading spaces:

    #include 
    #include 
    #include 
    
    // Strips backslashes from quotes
    char *unescapeToken(char *token)
    {
        char *in = token;
        char *out = token;
    
        while (*in)
        {
            assert(in >= out);
    
            if ((in[0] == '\\') && (in[1] == '"'))
            {
                *out = in[1];
                out++;
                in += 2;
            }
            else
            {
                *out = *in;
                out++;
                in++; 
            }
        }
        *out = 0;
        return token;
    }
    
    // Returns the end of the token, without chaning it.
    char *qtok(char *str, char **next)
    {
        char *current = str;
        char *start = str;
        int isQuoted = 0;
    
        // Eat beginning whitespace.
        while (*current && isspace(*current)) current++;
        start = current;
    
        if (*current == '"')
        {
            isQuoted = 1;
            // Quoted token
            current++; // Skip the beginning quote.
            start = current;
            for (;;)
            {
                // Go till we find a quote or the end of string.
                while (*current && (*current != '"')) current++;
                if (!*current) 
                {
                    // Reached the end of the string.
                    goto finalize;
                }
                if (*(current - 1) == '\\')
                {
                    // Escaped quote keep going.
                    current++;
                    continue;
                }
                // Reached the ending quote.
                goto finalize; 
            }
        }
        // Not quoted so run till we see a space.
        while (*current && !isspace(*current)) current++;
    finalize:
        if (*current)
        {
            // Close token if not closed already.
            *current = 0;
            current++;
            // Eat trailing whitespace.
            while (*current && isspace(*current)) current++;
        }
        *next = current;
    
        return isQuoted ? unescapeToken(start) : start;
    }
    
    int main()
    {
        char text[] = "   \"some text in quotes\"    plus   four simple words p\"lus something strange\" \"Then some quoted \\\"words\\\", and backslashes: \\ \\ \"  Escapes only work insi\\\"de q\\\"uoted strings\\\"   ";
    
        char *pText = text;
    
        printf("Original: '%s'\n", text);
        while (*pText)
        {
            printf("'%s'\n", qtok(pText, &pText));
        }
    
    }
    

    Outputs:

    Original: '   "some text in quotes"    plus   four simple words p"lus something strange" "Then some quoted \"words\", and backslashes: \ \ "  Escapes only work insi\"de q\"uoted strings\"   '
    'some text in quotes'
    'plus'
    'four'
    'simple'
    'words'
    'p"lus'
    'something'
    'strange"'
    'Then some quoted "words", and backslashes: \ \ '
    'Escapes'
    'only'
    'work'
    'insi\"de'
    'q\"uoted'
    'strings\"'
    

提交回复
热议问题