strange strtok behaviour

后端 未结 2 562
别那么骄傲
别那么骄傲 2021-01-23 07:43
char line[255];
char *token = NULL;
char *line2 = NULL;
char *temporaryToken = NULL;

if( scanf(\" %[^\\n]\", line) > 0)
    token = strtok( line, \";\" ); //divide t         


        
相关标签:
2条回答
  • 2021-01-23 08:25

    strtok modifies the original string. If you want to mix calls like that, you either need to make a copy or use strtok_r.

    0 讨论(0)
  • 2021-01-23 08:30

    There are two problems with strtok().

    1. It modifies its input string.
    2. Only one set of strtok() calls can be active at a time.

    I think your problem is the latter. You also have an indentation problem in the code:

    if (scanf(" %[^\n]", line) > 0)
        token = strtok( line, ";" );
    do
    {
        line2 = token;
        temporaryToken = strtok(line2, " ");
        do
        {
            //divide the line2 by spaces into command and args, not the question here]
            temporaryToken = strtok(NULL, " ");
        } while (temporaryToken != NULL);
        token = strtok( NULL, ";" );
    } while(token != NULL);
    

    You probably intended it to read:

    if (scanf(" %[^\n]", line) > 0)
    {
        token = strtok(line, ";");
        do
        {
            line2 = token;
            temporaryToken = strtok(line2, " ");
            do
            {
                //divide the line2 by spaces into command and args, not the question here]
                temporaryToken = strtok(NULL, " ");
            } while (temporaryToken != NULL);
            token = strtok(NULL, ";");
        } while (token != NULL);
    }
    

    Assuming this is what you intended, you still have the problem that there is one strtok() running on line, and then a second one running on line2. The trouble is, the loop on line2 completely wrecks the interpretation of line. You can't use the nested loops with strtok().

    If you must use something like strtok(), then look for either POSIX strtok_r() or Microsoft's strtok_s() (but note that the C11 standard Annex K version of strtok_s() is different — see Do you use the TR 24731 'safe' functions?).

    if (scanf(" %[^\n]", line) > 0)
    {
        char *end1;
        token = strtok_r(line, ";", &end1);
        do
        {
            char *end2;
            line2 = token;
            temporaryToken = strtok_r(line2, " ", &end2);
            do
            {
                //divide the line2 by spaces into command and args, not the question here]
                temporaryToken = strtok_r(NULL, " ", &end2);
            } while (temporaryToken != NULL);
            token = strtok_r(NULL, ";", &end1);
        } while (token != NULL);
    }
    

    About the Comments

    While you use strtok() or one of its relatives, the input string will be modified, and if you have multiple delimiters, you will not be able to tell which delimiter was present. You can work with a copy of the string, and do comparisons (usually based on offsets from the start of the string).

    Within the limits of using strtok_r(), the solution above 'works'. Here's a test program to demonstrate:

    #include <stdio.h>
    #include <string.h>
    
    int main(void)
    {
        char line[1024];
    
        if (scanf(" %[^\n]", line) > 0)
        {
            char *end1;
            char *token;
            printf("Input: <<%s>>\n", line);
            token = strtok_r(line, ";", &end1);
            do
            {
                char *end2;
                char *line2 = token;
                char *temporaryToken;
                printf("Token1: <<%s>>\n", token);
                temporaryToken = strtok_r(line2, " ", &end2);
                do
                {
                    printf("Token2: <<%s>>\n", temporaryToken);
                    //divide the line2 by spaces into command and args, not the question here]
                    temporaryToken = strtok_r(NULL, " ", &end2);
                } while (temporaryToken != NULL);
                token = strtok_r(NULL, ";", &end1);
            } while (token != NULL);
        }
    
        return 0;
    }
    

    Example input and output:

    $ ./strtok-demo
    ls -la; mkdir lololol; ls -la
    Input: <<ls -la; mkdir lololol; ls -la>>
    Token1: <<ls -la>>
    Token2: <<ls>>
    Token2: <<-la>>
    Token1: << mkdir lololol>>
    Token2: <<mkdir>>
    Token2: <<lololol>>
    Token1: << ls -la>>
    Token2: <<ls>>
    Token2: <<-la>>
    $
    

    Alternative using strcspn() and strspn()

    If you don't want to demolish the original string, you must use other functions than the strtok() family. The functions strcspn() and strspn() are suitable; they are part of Standard C (C89 and later versions), albeit much less well known than some of the other functions. But they're spot on for this task.

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    static char *substrdup(const char *src, size_t len);
    
    int main(void)
    {
        char line[1024];
    
        if (scanf(" %[^\n]", line) > 0)
        {
            char *start1 = line;
            size_t len1;
            printf("Input: <<%s>>\n", line);
            while ((len1 = strcspn(start1, ";")) != 0)
            {
                char *copy = substrdup(start1, len1);
                char *start2 = copy;
                size_t len2;
                printf("Token1: %zd <<%.*s>>\n", len1, (int)len1, start1);
                printf("Copy: <<%s>>\n", copy);
                start2 += strspn(start2, " ");      // Skip leading white space
                while ((len2 = strcspn(start2, " ")) != 0)
                {
                    printf("Token2: %zd <<%.*s>>\n", len2, (int)len2, start2);
                    start2 += len2;
                    start2 += strspn(start2, " ");
                }
                free(copy);
                start1 += len1;
                start1 += strspn(start1, ";");
            }
            printf("Check: <<%s>>\n", line);
        }
    
        return 0;
    }
    
    #include <assert.h>
    
    static char *substrdup(const char *src, size_t len)
    {
        char *copy = malloc(len+1);
        assert(copy != 0);              // Apalling error handling strategy
        memmove(copy, src, len);
        copy[len] = '\0';
        return(copy);
    }
    

    Example input and output:

    $ strcspn-demo
    ls -la; mkdir lololol; ls -la
    Input: <<ls -la; mkdir lololol; ls -la>>
    Token1: 140734970342872 <<>>
    Copy: <<ls -la>>
    Token2: 2 <<ls>>
    Token2: 3 <<-la>>
    Copy: << mkdir lololol>>
    Token2: 5 <<mkdir>>
    Token2: 7 <<lololol>>
    Copy: << ls -la>>
    Token2: 2 <<ls>>
    Token2: 3 <<-la>>
    Check: <<ls -la; mkdir lololol; ls -la>>
    $
    

    This code goes back to the more comfortable while loop, rather than needing to use do-while loops, which is a benefit.

    0 讨论(0)
提交回复
热议问题