问题
I am trying to read lines from a text file of unknown length.
In the line there can be leading and trailing white-spaces until the string occurs.
So my first step is to read line by line and allocate memory for the strings. Then remove all the leading and trailing white spaces.
After that I want to check if the string has any white space characters in it which is an invalid character. For example the string can not look like this "bad string"
but can look like this "goodstring"
.
However when I call the function to remove the leading and trailing white spaces it also removes characters before or after a white space.
Could someone tell me what I am doing wrong?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define NCHAR 64
char *readline (FILE *fp, char **buffer);
char *strstrip(char *s);
int main (int argc, char **argv) {
char *line = NULL;
size_t idx = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (readline (fp, &line)) { /* read each line in 'fp' */
printf (" line[%2zu] : %s\n", idx++, line);
free (line);
line = NULL;
}
if (fp != stdin) fclose (fp);
return 0;
}
/* read line from 'fp' allocate *buffer NCHAR in size
* realloc as necessary. Returns a pointer to *buffer
* on success, NULL otherwise.
*/
char *readline (FILE *fp, char **buffer)
{
int ch;
size_t buflen = 0, nchar = NCHAR;
size_t n;
char *invalid_character = " ";
*buffer = malloc (nchar); /* allocate buffer nchar in length */
if (!*buffer) {
fprintf (stderr, "readline() error: virtual memory exhausted.\n");
return NULL;
}
while ((ch = fgetc(fp)) != '\n' && ch != EOF)
{
(*buffer)[buflen++] = ch;
if (buflen + 1 >= nchar) { /* realloc */
char *tmp = realloc (*buffer, nchar * 2);
if (!tmp) {
fprintf (stderr, "error: realloc failed, "
"returning partial buffer.\n");
(*buffer)[buflen] = 0;
return *buffer;
}
*buffer = tmp;
nchar *= 2;
}
strstrip(*buffer); //remove traiing/leading spaces
}
(*buffer)[buflen] = 0; /* nul-terminate */
if (invalid_character[n = strspn(invalid_character, *buffer)] == '\0') //check if a string has invalid character ' ' in it
{
puts(" invalid characters");
}
if (buflen == 0 && ch == EOF) { /* return NULL if nothing read */
free (*buffer);
*buffer = NULL;
}
return *buffer;
}
char *strstrip(char *s)
{
size_t size;
char *end;
size = strlen(s);
if (!size)
return s;
end = s + size - 1;
while (end >= s && isspace(*end))
end--;
*(end + 1) = '\0';
while (*s && isspace(*s))
s++;
return s;
}
回答1:
You do not need to worry about the length of the string passed to strstrip()
, simply iterate over all characters in the string removing whitespace characters, e.g. the following version removals ALL whitespace from s
:
/** remove ALL leading, interleaved and trailing whitespace, in place.
* the original start address is preserved but due to reindexing,
* the contents of the original are not preserved. returns pointer
* to 's'. (ctype.h required)
*/
char *strstrip (char *s)
{
if (!s) return NULL; /* valdiate string not NULL */
if (!*s) return s; /* handle empty string */
char *p = s, *wp = s; /* pointer and write-pointer */
while (*p) { /* loop over each character */
while (isspace ((unsigned char)*p)) /* if whitespace advance ptr */
p++;
*wp++ = *p; /* use non-ws char */
if (*p)
p++;
}
*wp = 0; /* nul-terminate */
return s;
}
(note: if the argument to isspace()
is type char
, a cast to unsigned char
is required, see NOTES Section, e.g. man 3 isalpha)
Removing only Excess Whitespace
The following version removes leading and trailing whitespace and collapses multiple sequences of whitespace to a single space:
/** remove excess leading, interleaved and trailing whitespace, in place.
* the original start address is preserved but due to reindexing,
* the contents of the original are not preserved. returns pointer
* to 's'. (ctype.h required) NOTE: LATEST
*/
char *strstrip (char *s)
{
if (!s) return NULL; /* valdiate string not NULL */
if (!*s) return s; /* handle empty string */
char *p = s, *wp = s; /* pointer and write-pointer */
while (*p) {
if (isspace((unsigned char)*p)) { /* test for ws */
if (wp > s) /* ignore leading ws, while */
*wp++ = *p; /* preserving 1 between words */
while (*p && isspace (unsigned char)(*p)) /* skip remainder */
p++;
if (!*p) /* bail on end-of-string */
break;
}
if (*p == '.') /* handle space between word and '.' */
while (wp > s && isspace ((unsigned char)*(wp - 1)))
wp--;
*wp++ = *p; /* use non-ws char */
p++;
}
while (wp > s && isspace ((unsigned char)*(wp - 1))) /* trim trailing ws */
wp--;
*wp = 0; /* nul-terminate */
return s;
}
(note: s
must be mutable and therefore cannot be a string-literal)
来源:https://stackoverflow.com/questions/59257712/removing-trailing-and-leading-spaces-from-a-file