removing trailing and leading spaces from a file

雨燕双飞 提交于 2021-01-29 08:48:12

问题


I am trying to read lines from a text file of unknown length. In the line there can be leading and trailing white-spaces until the string occurs. So my first step is to read line by line and allocate memory for the strings. Then remove all the leading and trailing white spaces. After that I want to check if the string has any white space characters in it which is an invalid character. For example the string can not look like this "bad string" but can look like this "goodstring". However when I call the function to remove the leading and trailing white spaces it also removes characters before or after a white space.

Could someone tell me what I am doing wrong?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define NCHAR 64

char *readline (FILE *fp, char **buffer);
char *strstrip(char *s);


int main (int argc, char **argv) {

    char *line = NULL;
    size_t idx = 0;
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
    if (!fp) {
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    while (readline (fp, &line)) {  /* read each line in 'fp' */
        printf (" line[%2zu] : %s\n", idx++, line);
        free (line);
        line = NULL;
    }
    if (fp != stdin) fclose (fp);

    return  0;
}

/* read line from 'fp' allocate *buffer NCHAR in size
 * realloc as necessary. Returns a pointer to *buffer
 * on success, NULL otherwise.
 */
char *readline (FILE *fp, char **buffer) 
{
    int ch;
    size_t buflen = 0, nchar = NCHAR;
    size_t n;
    char *invalid_character = " ";

    *buffer = malloc (nchar);    /* allocate buffer nchar in length */
    if (!*buffer) {
        fprintf (stderr, "readline() error: virtual memory exhausted.\n");
        return NULL;
    }

    while ((ch = fgetc(fp)) != '\n' && ch != EOF) 
    {
        (*buffer)[buflen++] = ch;

        if (buflen + 1 >= nchar) {  /* realloc */
            char *tmp = realloc (*buffer, nchar * 2);
            if (!tmp) {
                fprintf (stderr, "error: realloc failed, "
                                "returning partial buffer.\n");
                (*buffer)[buflen] = 0;
                return *buffer;
            }
            *buffer = tmp;
            nchar *= 2;
        }
        strstrip(*buffer); //remove traiing/leading spaces
    }
    (*buffer)[buflen] = 0;           /* nul-terminate */


   if (invalid_character[n = strspn(invalid_character, *buffer)] == '\0') //check if a string has invalid character ' ' in it
    {
        puts(" invalid characters");

    } 

    if (buflen == 0 && ch == EOF) {  /* return NULL if nothing read */
        free (*buffer);
        *buffer = NULL;
    }

    return *buffer;
}
char *strstrip(char *s)
{
    size_t size;
    char *end;

    size = strlen(s);

    if (!size)
        return s;

    end = s + size - 1;
    while (end >= s && isspace(*end))
        end--;
    *(end + 1) = '\0';

    while (*s && isspace(*s))
        s++;

    return s;
}

回答1:


You do not need to worry about the length of the string passed to strstrip(), simply iterate over all characters in the string removing whitespace characters, e.g. the following version removals ALL whitespace from s:

/** remove ALL leading, interleaved and trailing whitespace, in place.
 *  the original start address is preserved but due to reindexing,
 *  the contents of the original are not preserved. returns pointer
 *  to 's'. (ctype.h required)
 */
char *strstrip (char *s)
{
    if (!s) return NULL;                     /* valdiate string not NULL */
    if (!*s) return s;                       /* handle empty string */

    char *p = s, *wp = s;                    /* pointer and write-pointer */

    while (*p) {                             /* loop over each character */
        while (isspace ((unsigned char)*p))  /* if whitespace advance ptr */
            p++;
        *wp++ = *p;                          /* use non-ws char */
        if (*p)
            p++;
    }
    *wp = 0;    /* nul-terminate */

    return s;
}

(note: if the argument to isspace() is type char, a cast to unsigned char is required, see NOTES Section, e.g. man 3 isalpha)

Removing only Excess Whitespace

The following version removes leading and trailing whitespace and collapses multiple sequences of whitespace to a single space:

/** remove excess leading, interleaved and trailing whitespace, in place.
 *  the original start address is preserved but due to reindexing,
 *  the contents of the original are not preserved. returns pointer
 *  to 's'. (ctype.h required) NOTE: LATEST
 */
char *strstrip (char *s)
{
    if (!s) return NULL;                        /* valdiate string not NULL */
    if (!*s) return s;                               /* handle empty string */

    char *p = s, *wp = s;                      /* pointer and write-pointer */

    while (*p) {
        if (isspace((unsigned char)*p)) {                    /* test for ws */
            if (wp > s)                         /* ignore leading ws, while */
                *wp++ = *p;                   /* preserving 1 between words */
            while (*p && isspace (unsigned char)(*p))    /* skip remainder  */
                p++;
            if (!*p)                               /* bail on end-of-string */
                break;
        }
        if (*p == '.')                 /* handle space between word and '.' */
            while (wp > s && isspace ((unsigned char)*(wp - 1)))
                wp--;
        *wp++ = *p;                                      /* use non-ws char */
        p++;
    }
    while (wp > s && isspace ((unsigned char)*(wp - 1))) /* trim trailing ws */
        wp--;
    *wp = 0;    /* nul-terminate */

    return s;
}

(note: s must be mutable and therefore cannot be a string-literal)



来源:https://stackoverflow.com/questions/59257712/removing-trailing-and-leading-spaces-from-a-file

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!