Split string with delimiters in C

前端 未结 20 1418
你的背包
你的背包 2020-11-21 11:56

How do I write a function to split and return an array for a string with delimiters in the C programming language?

char* str = \"JAN,FEB,MAR,APR,MAY,JUN,JUL,         


        
相关标签:
20条回答
  • 2020-11-21 12:04

    This optimized method create (or update an existing) array of pointers in *result and returns the number of elements in *count.

    Use "max" to indicate the maximum number of strings you expect (when you specify an existing array or any other reaseon), else set it to 0

    To compare against a list of delimiters, define delim as a char* and replace the line:

    if (str[i]==delim) {
    

    with the two following lines:

     char *c=delim; while(*c && *c!=str[i]) c++;
     if (*c) {
    

    Enjoy

    #include <stdlib.h>
    #include <string.h>
    
    char **split(char *str, size_t len, char delim, char ***result, unsigned long *count, unsigned long max) {
      size_t i;
      char **_result;
    
      // there is at least one string returned
      *count=1;
    
      _result= *result;
    
      // when the result array is specified, fill it during the first pass
      if (_result) {
        _result[0]=str;
      }
    
      // scan the string for delimiter, up to specified length
      for (i=0; i<len; ++i) {
    
        // to compare against a list of delimiters,
        // define delim as a string and replace 
        // the next line:
        //     if (str[i]==delim) {
        //
        // with the two following lines:
        //     char *c=delim; while(*c && *c!=str[i]) c++;
        //     if (*c) {
        //       
        if (str[i]==delim) {
    
          // replace delimiter with zero
          str[i]=0;
    
          // when result array is specified, fill it during the first pass
          if (_result) {
            _result[*count]=str+i+1;
          }
    
          // increment count for each separator found
          ++(*count);
    
          // if max is specified, dont go further
          if (max && *count==max)  {
            break;
          }
    
        }
      }
    
      // when result array is specified, we are done here
      if (_result) {
        return _result;
      }
    
      // else allocate memory for result
      // and fill the result array                                                                                    
    
      *result=malloc((*count)*sizeof(char*));
      if (!*result) {
        return NULL;
      }
      _result=*result;
    
      // add first string to result
      _result[0]=str;
    
      // if theres more strings
      for (i=1; i<*count; ++i) {
    
        // find next string
        while(*str) ++str;
        ++str;
    
        // add next string to result
        _result[i]=str;
    
      }
    
      return _result;
    }  
    

    Usage example:

    #include <stdio.h>
    
    int main(int argc, char **argv) {
      char *str="JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
      char **result=malloc(6*sizeof(char*));
      char **result2=0;
      unsigned long count;
      unsigned long count2;
      unsigned long i;
    
      split(strdup(str),strlen(str),',',&result,&count,6);
      split(strdup(str),strlen(str),',',&result2,&count2,0);
    
      if (result)
      for (i=0; i<count; ++i) {
        printf("%s\n",result[i]);
      }
    
      printf("\n");
    
      if (result2)
      for (i=0; i<count2; ++i) {
        printf("%s\n", result2[i]);
      }
    
      return 0;
    
    }
    
    0 讨论(0)
  • 2020-11-21 12:04

    My version:

    int split(char* str, const char delimeter, char*** args) {
        int cnt = 1;
        char* t = str;
    
        while (*t == delimeter) t++;
    
        char* t2 = t;
        while (*(t2++))
            if (*t2 == delimeter && *(t2 + 1) != delimeter && *(t2 + 1) != 0) cnt++;
    
        (*args) = malloc(sizeof(char*) * cnt);
    
        for(int i = 0; i < cnt; i++) {
            char* ts = t;
            while (*t != delimeter && *t != 0) t++;
    
            int len = (t - ts + 1);
            (*args)[i] = malloc(sizeof(char) * len);
            memcpy((*args)[i], ts, sizeof(char) * (len - 1));
            (*args)[i][len - 1] = 0;
    
            while (*t == delimeter) t++;
        }
    
        return cnt;
    }
    
    0 讨论(0)
  • 2020-11-21 12:06

    If you are willing to use an external library, I can't recommend bstrlib enough. It takes a little extra setup, but is easier to use in the long run.

    For example, split the string below, one first creates a bstring with the bfromcstr() call. (A bstring is a wrapper around a char buffer). Next, split the string on commas, saving the result in a struct bstrList, which has fields qty and an array entry, which is an array of bstrings.

    bstrlib has many other functions to operate on bstrings

    Easy as pie...

    #include "bstrlib.h"
    #include <stdio.h>
    int main() {
      int i;
      char *tmp = "Hello,World,sak";
      bstring bstr = bfromcstr(tmp);
      struct bstrList *blist = bsplit(bstr, ',');
      printf("num %d\n", blist->qty);
      for(i=0;i<blist->qty;i++) {
        printf("%d: %s\n", i, bstr2cstr(blist->entry[i], '_'));
      }
    
    }
    
    0 讨论(0)
  • 2020-11-21 12:07

    I think strsep is still the best tool for this:

    while ((token = strsep(&str, ","))) my_fn(token);
    

    That is literally one line that splits a string.

    The extra parentheses are a stylistic element to indicate that we're intentionally testing the result of an assignment, not an equality operator ==.

    For that pattern to work, token and str both have type char *. If you started with a string literal, then you'd want to make a copy of it first:

    // More general pattern:
    const char *my_str_literal = "JAN,FEB,MAR";
    char *token, *str, *tofree;
    
    tofree = str = strdup(my_str_literal);  // We own str's memory now.
    while ((token = strsep(&str, ","))) my_fn(token);
    free(tofree);
    

    If two delimiters appear together in str, you'll get a token value that's the empty string. The value of str is modified in that each delimiter encountered is overwritten with a zero byte - another good reason to copy the string being parsed first.

    In a comment, someone suggested that strtok is better than strsep because strtok is more portable. Ubuntu and Mac OS X have strsep; it's safe to guess that other unixy systems do as well. Windows lacks strsep, but it has strbrk which enables this short and sweet strsep replacement:

    char *strsep(char **stringp, const char *delim) {
      if (*stringp == NULL) { return NULL; }
      char *token_start = *stringp;
      *stringp = strpbrk(token_start, delim);
      if (*stringp) {
        **stringp = '\0';
        (*stringp)++;
      }
      return token_start;
    }
    

    Here is a good explanation of strsep vs strtok. The pros and cons may be judged subjectively; however, I think it's a telling sign that strsep was designed as a replacement for strtok.

    0 讨论(0)
  • 2020-11-21 12:07

    My code (tested):

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    int dtmsplit(char *str, const char *delim, char ***array, int *length ) {
      int i=0;
      char *token;
      char **res = (char **) malloc(0 * sizeof(char *));
    
      /* get the first token */
       token = strtok(str, delim);
       while( token != NULL ) 
       {
            res = (char **) realloc(res, (i + 1) * sizeof(char *));
            res[i] = token;
            i++;
          token = strtok(NULL, delim);
       }
       *array = res;
       *length = i;
      return 1;
    }
    
    int main()
    {
        int i;
        int c = 0;
        char **arr = NULL;
    
        int count =0;
    
        char str[80] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
        c = dtmsplit(str, ",", &arr, &count);
        printf("Found %d tokens.\n", count);
    
        for (i = 0; i < count; i++)
            printf("string #%d: %s\n", i, arr[i]);
    
       return(0);
    }
    

    Result:

    Found 12 tokens.
    string #0: JAN
    string #1: FEB
    string #2: MAR
    string #3: APR
    string #4: MAY
    string #5: JUN
    string #6: JUL
    string #7: AUG
    string #8: SEP
    string #9: OCT
    string #10: NOV
    string #11: DEC
    
    0 讨论(0)
  • 2020-11-21 12:11

    Below is my strtok() implementation from zString library. zstring_strtok() differs from standard library's strtok() in the way it treats consecutive delimiters.

    Just have a look at the code below,sure that you will get an idea about how it works (I tried to use as many comments as I could)

    char *zstring_strtok(char *str, const char *delim) {
        static char *static_str=0;      /* var to store last address */
        int index=0, strlength=0;       /* integers for indexes */
        int found = 0;                  /* check if delim is found */
    
        /* delimiter cannot be NULL
        * if no more char left, return NULL as well
        */
        if (delim==0 || (str == 0 && static_str == 0))
            return 0;
    
        if (str == 0)
            str = static_str;
    
        /* get length of string */
        while(str[strlength])
            strlength++;
    
        /* find the first occurance of delim */
        for (index=0;index<strlength;index++)
            if (str[index]==delim[0]) {
                found=1;
                break;
            }
    
        /* if delim is not contained in str, return str */
        if (!found) {
            static_str = 0;
            return str;
        }
    
        /* check for consecutive delimiters
        *if first char is delim, return delim
        */
        if (str[0]==delim[0]) {
            static_str = (str + 1);
            return (char *)delim;
        }
    
        /* terminate the string
        * this assignmetn requires char[], so str has to
        * be char[] rather than *char
        */
        str[index] = '\0';
    
        /* save the rest of the string */
        if ((str + index + 1)!=0)
            static_str = (str + index + 1);
        else
            static_str = 0;
    
            return str;
    }
    

    Below is an example usage...

      Example Usage
          char str[] = "A,B,,,C";
          printf("1 %s\n",zstring_strtok(s,","));
          printf("2 %s\n",zstring_strtok(NULL,","));
          printf("3 %s\n",zstring_strtok(NULL,","));
          printf("4 %s\n",zstring_strtok(NULL,","));
          printf("5 %s\n",zstring_strtok(NULL,","));
          printf("6 %s\n",zstring_strtok(NULL,","));
    
      Example Output
          1 A
          2 B
          3 ,
          4 ,
          5 C
          6 (null)
    

    The library can be downloaded from Github https://github.com/fnoyanisi/zString

    0 讨论(0)
提交回复
热议问题