Split string with delimiters in C

前端 未结 20 1387
你的背包
你的背包 2020-11-21 11:56

How do I write a function to split and return an array for a string with delimiters in the C programming language?

char* str = \"JAN,FEB,MAR,APR,MAY,JUN,JUL,         


        
相关标签:
20条回答
  • 2020-11-21 12:29

    Two issues surrounding this question are memory management and thread safety. As you can see from the numerous posts, this isn't an easy task to accomplish seamlessly in C. I desired a solution that is:

    • Thread safe. (strtok is not thread safe)
    • Does not employ malloc or any of it's derivatives (to avoid memory management issues)
    • Checks array bounds on the individual fields (to avoid segment faults on unknown data)
    • Works with multi-byte field separators (utf-8)
    • ignores extra fields in the input
    • provides soft error routine for invalid field lengths

    The solution I came up meets all of these criteria. It's probably a little more work to setup than some other solutions posted here, but I think that in practice, the extra work is worth it in order to avoid the common pitfalls of other solutions.

    #include <stdio.h>
    #include <string.h>
    
    struct splitFieldType {
        char *field;
        int   maxLength;
    };
    
    typedef struct splitFieldType splitField;
    
    int strsplit(splitField *fields, int expected, const char *input, const char *fieldSeparator, void (*softError)(int fieldNumber,int expected,int actual))  {
        int i;
        int fieldSeparatorLen=strlen(fieldSeparator);
        const char *tNext, *tLast=input;
    
        for (i=0; i<expected && (tNext=strstr(tLast, fieldSeparator))!=NULL; ++i) {
            int len=tNext-tLast;
            if (len>=fields[i].maxLength) {
                softError(i,fields[i].maxLength-1,len);
                len=fields[i].maxLength-1;
            }
            fields[i].field[len]=0;
            strncpy(fields[i].field,tLast,len);
            tLast=tNext+fieldSeparatorLen;
        }
        if (i<expected) {
            if (strlen(tLast)>fields[i].maxLength) {
                softError(i,fields[i].maxLength,strlen(tLast));
            } else {
                strcpy(fields[i].field,tLast);
            }
            return i+1;
        } else {
            return i;
        }
    }
    
    
    void monthSplitSoftError(int fieldNumber, int expected, int actual) {
        fprintf(stderr,"monthSplit: input field #%d is %d bytes, expected %d bytes\n",fieldNumber+1,actual,expected);
    }
    
    
    int main() {
      const char *fieldSeparator=",";
      const char *input="JAN,FEB,MAR,APRI,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR";
    
      struct monthFieldsType {
        char field1[4];
        char field2[4];
        char field3[4];
        char field4[4];
        char field5[4];
        char field6[4];
        char field7[4];
        char field8[4];
        char field9[4];
        char field10[4];
        char field11[4];
        char field12[4];
      } monthFields;
    
      splitField inputFields[12] = {
        {monthFields.field1,  sizeof(monthFields.field1)},
        {monthFields.field2,  sizeof(monthFields.field2)},
        {monthFields.field3,  sizeof(monthFields.field3)},
        {monthFields.field4,  sizeof(monthFields.field4)},
        {monthFields.field5,  sizeof(monthFields.field5)},
        {monthFields.field6,  sizeof(monthFields.field6)},
        {monthFields.field7,  sizeof(monthFields.field7)},
        {monthFields.field8,  sizeof(monthFields.field8)},
        {monthFields.field9,  sizeof(monthFields.field9)},
        {monthFields.field10, sizeof(monthFields.field10)},
        {monthFields.field11, sizeof(monthFields.field11)},
        {monthFields.field12, sizeof(monthFields.field12)}
      };
    
      int expected=sizeof(inputFields)/sizeof(splitField);
    
      printf("input data: %s\n", input);
      printf("expecting %d fields\n",expected);
    
      int ct=strsplit(inputFields, expected, input, fieldSeparator, monthSplitSoftError);
    
      if (ct!=expected) {
        printf("string split %d fields, expected %d\n", ct,expected);
      }
    
      for (int i=0;i<expected;++i) {
        printf("field %d: %s\n",i+1,inputFields[i].field);
      }
    
      printf("\n");
      printf("Direct structure access, field 10: %s", monthFields.field10);
    }
    

    Below is an example compile and output. Note that in my example, I purposefully spelled out "APRIL" so that you can see how the soft error works.

    $ gcc strsplitExample.c && ./a.out
    input data: JAN,FEB,MAR,APRIL,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR
    expecting 12 fields
    monthSplit: input field #4 is 5 bytes, expected 3 bytes
    field 1: JAN
    field 2: FEB
    field 3: MAR
    field 4: APR
    field 5: MAY
    field 6: JUN
    field 7: JUL
    field 8: AUG
    field 9: SEP
    field 10: OCT
    field 11: NOV
    field 12: DEC
    
    Direct structure access, field 10: OCT
    

    Enjoy!

    0 讨论(0)
  • 2020-11-21 12:30

    My approach is to scan the string and let the pointers point to every character after the deliminators(and the first character), at the same time assign the appearances of deliminator in string to '\0'.
    First make a copy of original string(since it's constant), then get the number of splits by scan it pass it to pointer parameter len. After that, point the first result pointer to the copy string pointer, then scan the copy string: once encounter a deliminator, assign it to '\0' thus the previous result string is terminated, and point the next result string pointer to the next character pointer.

    char** split(char* a_str, const char a_delim, int* len){
        char* s = (char*)malloc(sizeof(char) * strlen(a_str));
        strcpy(s, a_str);
        char* tmp = a_str;
        int count = 0;
        while (*tmp != '\0'){
            if (*tmp == a_delim) count += 1;
            tmp += 1;
        }
        *len = count;
        char** results = (char**)malloc(count * sizeof(char*));
        results[0] = s;
        int i = 1;
        while (*s!='\0'){
            if (*s == a_delim){
                *s = '\0';
                s += 1;
                results[i++] = s;
            }
            else s += 1;
        }
        return results;
    }
    
    0 讨论(0)
提交回复
热议问题