Split string with delimiters in C

前端 未结 20 1389
你的背包
你的背包 2020-11-21 11:56

How do I write a function to split and return an array for a string with delimiters in the C programming language?

char* str = \"JAN,FEB,MAR,APR,MAY,JUN,JUL,         


        
相关标签:
20条回答
  • 2020-11-21 12:21

    Explode & implode - initial string remains intact, dynamic memory allocation

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <errno.h>
    
    typedef struct
    {
        uintptr_t   ptr;
        int         size;
    } token_t;
    
    int explode(char *str, int slen, const char *delimiter, token_t **tokens)
    {
        int i = 0, c1 = 0, c2 = 0;
    
        for(i = 0; i <= slen; i++)
        {
                if(str[i] == *delimiter)
                {
                    c1++;
                }
        }
    
        if(c1 == 0)
        {
                return -1;
        }
    
        *tokens = (token_t*)calloc((c1 + 1), sizeof(token_t));
        ((*tokens)[c2]).ptr = (uintptr_t)str;
    
        i = 0; 
        while(i <= slen)
        {
            if((str[i] == *delimiter) || (i == slen))
            {
                    ((*tokens)[c2]).size = (int)((uintptr_t)&(str[i]) - (uintptr_t)(((*tokens)[c2]).ptr));
                    if(i < slen)
                    {
                        c2++;
                        ((*tokens)[c2]).ptr = (uintptr_t)&(str[i + 1]);
                    }
            }
            i++;
        }
        return (c1 + 1);
    }
    
    char* implode(token_t *tokens, int size, const char *delimiter)
    {
        int     i, len = 0;
        char    *str;
    
        for(i = 0; i < len; i++)
        {
            len += tokens[i].size + 1;
        }
    
        str = (char*)calloc(len, sizeof(char));
    
        len = 0;
        for(i = 0; i < size; i++)
        {
            memcpy((void*)&str[len], (void*)tokens[i].ptr, tokens[i].size);
            len += tokens[i].size;
            str[(len++)] = *delimiter;
        }
    
        str[len - 1] = '\0';
    
        return str;
    }
    

    Usage:

    int main(int argc, char **argv)
    {
        int         i, c;
        char        *exp = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
        token_t     *tokens;
        char        *imp;
    
        printf("%s\n", exp);
    
        if((c = explode(exp, strlen(exp), ",", &tokens)) > 0)
        {
            imp = implode(tokens, c, ",");
            printf("%s\n", imp);
    
            for(i = 0; i < c; i++)
            {
                printf("%.*s, %d\n", tokens[i].size, (char*)tokens[i].ptr, tokens[i].size);
            }
        }
    
        free((void*)tokens);
        free((void*)imp);
        return 0;
    }
    
    0 讨论(0)
  • 2020-11-21 12:22

    In the above example, there would be a way to return an array of null terminated strings (like you want) in place in the string. It would not make it possible to pass a literal string though, as it would have to be modified by the function:

    #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    
    char** str_split( char* str, char delim, int* numSplits )
    {
        char** ret;
        int retLen;
        char* c;
    
        if ( ( str == NULL ) ||
            ( delim == '\0' ) )
        {
            /* Either of those will cause problems */
            ret = NULL;
            retLen = -1;
        }
        else
        {
            retLen = 0;
            c = str;
    
            /* Pre-calculate number of elements */
            do
            {
                if ( *c == delim )
                {
                    retLen++;
                }
    
                c++;
            } while ( *c != '\0' );
    
            ret = malloc( ( retLen + 1 ) * sizeof( *ret ) );
            ret[retLen] = NULL;
    
            c = str;
            retLen = 1;
            ret[0] = str;
    
            do
            {
                if ( *c == delim )
                {
                    ret[retLen++] = &c[1];
                    *c = '\0';
                }
    
                c++;
            } while ( *c != '\0' );
        }
    
        if ( numSplits != NULL )
        {
            *numSplits = retLen;
        }
    
        return ret;
    }
    
    int main( int argc, char* argv[] )
    {
        const char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
    
        char* strCpy;
        char** split;
        int num;
        int i;
    
        strCpy = malloc( strlen( str ) * sizeof( *strCpy ) );
        strcpy( strCpy, str );
    
        split = str_split( strCpy, ',', &num );
    
        if ( split == NULL )
        {
            puts( "str_split returned NULL" );
        }
        else
        {
            printf( "%i Results: \n", num );
    
            for ( i = 0; i < num; i++ )
            {
                puts( split[i] );
            }
        }
    
        free( split );
        free( strCpy );
    
        return 0;
    }
    

    There is probably a neater way to do it, but you get the idea.

    0 讨论(0)
  • 2020-11-21 12:22

    Try use this.

    char** strsplit(char* str, const char* delim){
        char** res = NULL;
        char*  part;
        int i = 0;
    
        char* aux = strdup(str);
    
        part = strdup(strtok(aux, delim));
    
        while(part){
            res = (char**)realloc(res, (i + 1) * sizeof(char*));
            *(res + i) = strdup(part);
    
            part = strdup(strtok(NULL, delim));
            i++;
        }
    
        res = (char**)realloc(res, i * sizeof(char*));
        *(res + i) = NULL;
    
        return res;
    }
    
    0 讨论(0)
  • 2020-11-21 12:23

    You can use the strtok() function to split a string (and specify the delimiter to use). Note that strtok() will modify the string passed into it. If the original string is required elsewhere make a copy of it and pass the copy to strtok().

    EDIT:

    Example (note it does not handle consecutive delimiters, "JAN,,,FEB,MAR" for example):

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <assert.h>
    
    char** str_split(char* a_str, const char a_delim)
    {
        char** result    = 0;
        size_t count     = 0;
        char* tmp        = a_str;
        char* last_comma = 0;
        char delim[2];
        delim[0] = a_delim;
        delim[1] = 0;
    
        /* Count how many elements will be extracted. */
        while (*tmp)
        {
            if (a_delim == *tmp)
            {
                count++;
                last_comma = tmp;
            }
            tmp++;
        }
    
        /* Add space for trailing token. */
        count += last_comma < (a_str + strlen(a_str) - 1);
    
        /* Add space for terminating null string so caller
           knows where the list of returned strings ends. */
        count++;
    
        result = malloc(sizeof(char*) * count);
    
        if (result)
        {
            size_t idx  = 0;
            char* token = strtok(a_str, delim);
    
            while (token)
            {
                assert(idx < count);
                *(result + idx++) = strdup(token);
                token = strtok(0, delim);
            }
            assert(idx == count - 1);
            *(result + idx) = 0;
        }
    
        return result;
    }
    
    int main()
    {
        char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
        char** tokens;
    
        printf("months=[%s]\n\n", months);
    
        tokens = str_split(months, ',');
    
        if (tokens)
        {
            int i;
            for (i = 0; *(tokens + i); i++)
            {
                printf("month=[%s]\n", *(tokens + i));
                free(*(tokens + i));
            }
            printf("\n");
            free(tokens);
        }
    
        return 0;
    }
    

    Output:

    $ ./main.exe
    months=[JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC]
    
    month=[JAN]
    month=[FEB]
    month=[MAR]
    month=[APR]
    month=[MAY]
    month=[JUN]
    month=[JUL]
    month=[AUG]
    month=[SEP]
    month=[OCT]
    month=[NOV]
    month=[DEC]
    
    0 讨论(0)
  • 2020-11-21 12:23
    #include <string.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <errno.h>
    
    /**
     *  splits str on delim and dynamically allocates an array of pointers.
     *
     *  On error -1 is returned, check errno
     *  On success size of array is returned, which may be 0 on an empty string
     *  or 1 if no delim was found.  
     *
     *  You could rewrite this to return the char ** array instead and upon NULL
     *  know it's an allocation problem but I did the triple array here.  Note that
     *  upon the hitting two delim's in a row "foo,,bar" the array would be:
     *  { "foo", NULL, "bar" } 
     * 
     *  You need to define the semantics of a trailing delim Like "foo," is that a
     *  2 count array or an array of one?  I choose the two count with the second entry
     *  set to NULL since it's valueless.
     *  Modifies str so make a copy if this is a problem
     */
    int split( char * str, char delim, char ***array, int *length ) {
      char *p;
      char **res;
      int count=0;
      int k=0;
    
      p = str;
      // Count occurance of delim in string
      while( (p=strchr(p,delim)) != NULL ) {
        *p = 0; // Null terminate the deliminator.
        p++; // Skip past our new null
        count++;
      }
    
      // allocate dynamic array
      res = calloc( 1, count * sizeof(char *));
      if( !res ) return -1;
    
      p = str;
      for( k=0; k<count; k++ ){
        if( *p ) res[k] = p;  // Copy start of string
        p = strchr(p, 0 );    // Look for next null
        p++; // Start of next string
      }
    
      *array = res;
      *length = count;
    
      return 0;
    }
    
    char str[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,";
    
    int main() {
      char **res;
      int k=0;
      int count =0;
      int rc;
    
      rc = split( str, ',', &res, &count );
      if( rc ) {
        printf("Error: %s errno: %d \n", strerror(errno), errno);
      }
    
      printf("count: %d\n", count );
      for( k=0; k<count; k++ ) {
        printf("str: %s\n", res[k]);
      }
    
      free(res );
      return 0;
    }
    
    0 讨论(0)
  • 2020-11-21 12:27

    Method below will do all the job (memory allocation, counting the length) for you. More information and description can be found here - Implementation of Java String.split() method to split C string

    int split (const char *str, char c, char ***arr)
    {
        int count = 1;
        int token_len = 1;
        int i = 0;
        char *p;
        char *t;
    
        p = str;
        while (*p != '\0')
        {
            if (*p == c)
                count++;
            p++;
        }
    
        *arr = (char**) malloc(sizeof(char*) * count);
        if (*arr == NULL)
            exit(1);
    
        p = str;
        while (*p != '\0')
        {
            if (*p == c)
            {
                (*arr)[i] = (char*) malloc( sizeof(char) * token_len );
                if ((*arr)[i] == NULL)
                    exit(1);
    
                token_len = 0;
                i++;
            }
            p++;
            token_len++;
        }
        (*arr)[i] = (char*) malloc( sizeof(char) * token_len );
        if ((*arr)[i] == NULL)
            exit(1);
    
        i = 0;
        p = str;
        t = ((*arr)[i]);
        while (*p != '\0')
        {
            if (*p != c && *p != '\0')
            {
                *t = *p;
                t++;
            }
            else
            {
                *t = '\0';
                i++;
                t = ((*arr)[i]);
            }
            p++;
        }
    
        return count;
    }
    

    How to use it:

    int main (int argc, char ** argv)
    {
        int i;
        char *s = "Hello, this is a test module for the string splitting.";
        int c = 0;
        char **arr = NULL;
    
        c = split(s, ' ', &arr);
    
        printf("found %d tokens.\n", c);
    
        for (i = 0; i < c; i++)
            printf("string #%d: %s\n", i, arr[i]);
    
        return 0;
    }
    
    0 讨论(0)
提交回复
热议问题