How do I trim leading/trailing whitespace in a standard way?

后端 未结 30 1965
一个人的身影
一个人的身影 2020-11-22 02:06

Is there a clean, preferably standard method of trimming leading and trailing whitespace from a string in C? I\'d roll my own, but I would think this is a common problem wit

相关标签:
30条回答
  • 2020-11-22 02:27

    Here's my C mini library for trimming left, right, both, all, in place and separate, and trimming a set of specified characters (or white space by default).

    contents of strlib.h:

    #ifndef STRLIB_H_
    #define STRLIB_H_ 1
    enum strtrim_mode_t {
        STRLIB_MODE_ALL       = 0, 
        STRLIB_MODE_RIGHT     = 0x01, 
        STRLIB_MODE_LEFT      = 0x02, 
        STRLIB_MODE_BOTH      = 0x03
    };
    
    char *strcpytrim(char *d, // destination
                     char *s, // source
                     int mode,
                     char *delim
                     );
    
    char *strtriml(char *d, char *s);
    char *strtrimr(char *d, char *s);
    char *strtrim(char *d, char *s); 
    char *strkill(char *d, char *s);
    
    char *triml(char *s);
    char *trimr(char *s);
    char *trim(char *s);
    char *kill(char *s);
    #endif
    

    contents of strlib.c:

    #include <strlib.h>
    
    char *strcpytrim(char *d, // destination
                     char *s, // source
                     int mode,
                     char *delim
                     ) {
        char *o = d; // save orig
        char *e = 0; // end space ptr.
        char dtab[256] = {0};
        if (!s || !d) return 0;
    
        if (!delim) delim = " \t\n\f";
        while (*delim) 
            dtab[*delim++] = 1;
    
        while ( (*d = *s++) != 0 ) { 
            if (!dtab[0xFF & (unsigned int)*d]) { // Not a match char
                e = 0;       // Reset end pointer
            } else {
                if (!e) e = d;  // Found first match.
    
                if ( mode == STRLIB_MODE_ALL || ((mode != STRLIB_MODE_RIGHT) && (d == o)) ) 
                    continue;
            }
            d++;
        }
        if (mode != STRLIB_MODE_LEFT && e) { // for everything but trim_left, delete trailing matches.
            *e = 0;
        }
        return o;
    }
    
    // perhaps these could be inlined in strlib.h
    char *strtriml(char *d, char *s) { return strcpytrim(d, s, STRLIB_MODE_LEFT, 0); }
    char *strtrimr(char *d, char *s) { return strcpytrim(d, s, STRLIB_MODE_RIGHT, 0); }
    char *strtrim(char *d, char *s) { return strcpytrim(d, s, STRLIB_MODE_BOTH, 0); }
    char *strkill(char *d, char *s) { return strcpytrim(d, s, STRLIB_MODE_ALL, 0); }
    
    char *triml(char *s) { return strcpytrim(s, s, STRLIB_MODE_LEFT, 0); }
    char *trimr(char *s) { return strcpytrim(s, s, STRLIB_MODE_RIGHT, 0); }
    char *trim(char *s) { return strcpytrim(s, s, STRLIB_MODE_BOTH, 0); }
    char *kill(char *s) { return strcpytrim(s, s, STRLIB_MODE_ALL, 0); }
    

    The one main routine does it all. It trims in place if src == dst, otherwise, it works like the strcpy routines. It trims a set of characters specified in the string delim, or white space if null. It trims left, right, both, and all (like tr). There is not much to it, and it iterates over the string only once. Some folks might complain that trim right starts on the left, however, no strlen is needed which starts on the left anyway. (One way or another you have to get to the end of the string for right trims, so you might as well do the work as you go.) There may be arguments to be made about pipelining and cache sizes and such -- who knows. Since the solution works from left to right and iterates only once, it can be expanded to work on streams as well. Limitations: it does not work on unicode strings.

    0 讨论(0)
  • 2020-11-22 02:27

    Late to the trim party

    Features:
    1. Trim the beginning quickly, as in a number of other answers.
    2. After going to the end, trimming the right with only 1 test per loop. Like @jfm3, but works for an all white-space string)
    3. To avoid undefined behavior when char is a signed char, cast *s to unsigned char.

    Character handling "In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined." C11 §7.4 1

    #include <ctype.h>
    
    // Return a pointer to the trimmed string
    char *string_trim_inplace(char *s) {
      while (isspace((unsigned char) *s)) s++;
      if (*s) {
        char *p = s;
        while (*p) p++;
        while (isspace((unsigned char) *(--p)));
        p[1] = '\0';
      }
    
      // If desired, shift the trimmed string
    
      return s;
    }
    

    @chqrlie commented the above does not shift the trimmed string. To do so....

    // Return a pointer to the (shifted) trimmed string
    char *string_trim_inplace(char *s) {
      char *original = s;
      size_t len = 0;
    
      while (isspace((unsigned char) *s)) {
        s++;
      } 
      if (*s) {
        char *p = s;
        while (*p) p++;
        while (isspace((unsigned char) *(--p)));
        p[1] = '\0';
        // len = (size_t) (p - s);   // older errant code
        len = (size_t) (p - s + 1);  // Thanks to @theriver
      }
    
      return (s == original) ? s : memmove(original, s, len + 1);
    }
    
    0 讨论(0)
  • 2020-11-22 02:27

    This one is short and simple, uses for-loops and doesn't overwrite the string boundaries. You can replace the test with isspace() if needed.

    void trim (char *s)         // trim leading and trailing spaces+tabs
    {
     int i,j,k, len;
    
     j=k=0;
     len = strlen(s);
                        // find start of string
     for (i=0; i<len; i++) if ((s[i]!=32) && (s[i]!=9)) { j=i; break; }
                        // find end of string+1
     for (i=len-1; i>=j; i--) if ((s[i]!=32) && (s[i]!=9)) { k=i+1; break;} 
    
     if (k<=j) {s[0]=0; return;}        // all whitespace (j==k==0)
    
     len=k-j;
     for (i=0; i<len; i++) s[i] = s[j++];   // shift result to start of string
     s[i]=0;                // end the string
    
    }//_trim
    
    0 讨论(0)
  • 2020-11-22 02:27

    Here i use the dynamic memory allocation to trim the input string to the function trimStr. First, we find how many non-empty characters exist in the input string. Then, we allocate a character array with that size and taking care of the null terminated character. When we use this function, we need to free the memory inside of main function.

    #include<stdio.h>
    #include<stdlib.h>
    
    char *trimStr(char *str){
    char *tmp = str;
    printf("input string %s\n",str);
    int nc = 0;
    
    while(*tmp!='\0'){
      if (*tmp != ' '){
      nc++;
     }
     tmp++;
    }
    printf("total nonempty characters are %d\n",nc);
    char *trim = NULL;
    
    trim = malloc(sizeof(char)*(nc+1));
    if (trim == NULL) return NULL;
    tmp = str;
    int ne = 0;
    
    while(*tmp!='\0'){
      if (*tmp != ' '){
         trim[ne] = *tmp;
       ne++;
     }
     tmp++;
    }
    trim[nc] = '\0';
    
    printf("trimmed string is %s\n",trim);
    
    return trim; 
     }
    
    
    int main(void){
    
    char str[] = " s ta ck ove r fl o w  ";
    
    char *trim = trimStr(str);
    
    if (trim != NULL )free(trim);
    
    return 0;
    }
    
    0 讨论(0)
  • 2020-11-22 02:29

    Most of the answers so far do one of the following:

    1. Backtrack at the end of the string (i.e. find the end of the string and then seek backwards until a non-space character is found,) or
    2. Call strlen() first, making a second pass through the whole string.

    This version makes only one pass and does not backtrack. Hence it may perform better than the others, though only if it is common to have hundreds of trailing spaces (which is not unusual when dealing with the output of a SQL query.)

    static char const WHITESPACE[] = " \t\n\r";
    
    static void get_trim_bounds(char  const *s,
                                char const **firstWord,
                                char const **trailingSpace)
    {
        char const *lastWord;
        *firstWord = lastWord = s + strspn(s, WHITESPACE);
        do
        {
            *trailingSpace = lastWord + strcspn(lastWord, WHITESPACE);
            lastWord = *trailingSpace + strspn(*trailingSpace, WHITESPACE);
        }
        while (*lastWord != '\0');
    }
    
    char *copy_trim(char const *s)
    {
        char const *firstWord, *trailingSpace;
        char *result;
        size_t newLength;
    
        get_trim_bounds(s, &firstWord, &trailingSpace);
        newLength = trailingSpace - firstWord;
    
        result = malloc(newLength + 1);
        memcpy(result, firstWord, newLength);
        result[newLength] = '\0';
        return result;
    }
    
    void inplace_trim(char *s)
    {
        char const *firstWord, *trailingSpace;
        size_t newLength;
    
        get_trim_bounds(s, &firstWord, &trailingSpace);
        newLength = trailingSpace - firstWord;
    
        memmove(s, firstWord, newLength);
        s[newLength] = '\0';
    }
    
    0 讨论(0)
  • 2020-11-22 02:30

    Just to keep this growing, one more option with a modifiable string:

    void trimString(char *string)
    {
        size_t i = 0, j = strlen(string);
        while (j > 0 && isspace((unsigned char)string[j - 1])) string[--j] = '\0';
        while (isspace((unsigned char)string[i])) i++;
        if (i > 0) memmove(string, string + i, j - i + 1);
    }
    
    0 讨论(0)
提交回复
热议问题