Basically you want to scan the buffer and ignore everything that is between <
and >
:
char *get_text (char *dst, char *src) {
int html = 0;
char ch;
while (ch = *src++) {
if (ch == '<' || ch == '>') {
html = (ch == '<');
} else if (!html) {
*dst++ = ch;
}
}
*dst = '\0';
return dst;
}