There are a couple of different ways to remove HTML tags
from an NSString
in Cocoa
.
One way is to render the string into an
A quick and "dirty" (removes everything between < and >) solution, works with iOS >= 3.2:
-(NSString *) stringByStrippingHTML {
NSRange r;
NSString *s = [[self copy] autorelease];
while ((r = [s rangeOfString:@"<[^>]+>" options:NSRegularExpressionSearch]).location != NSNotFound)
s = [s stringByReplacingCharactersInRange:r withString:@""];
return s;
}
I have this declared as a category os NSString.
I would imagine the safest way would just be to parse for <>s, no? Loop through the entire string, and copy anything not enclosed in <>s to a new string.
You can use like below
-(void)myMethod
{
NSString* htmlStr = @"<some>html</string>";
NSString* strWithoutFormatting = [self stringByStrippingHTML:htmlStr];
}
-(NSString *)stringByStrippingHTML:(NSString*)str
{
NSRange r;
while ((r = [str rangeOfString:@"<[^>]+>" options:NSRegularExpressionSearch]).location != NSNotFound)
{
str = [str stringByReplacingCharactersInRange:r withString:@""];
}
return str;
}
This is the modernization of m.kocikowski answer which removes whitespaces:
@implementation NSString (StripXMLTags)
- (NSString *)stripXMLTags
{
NSRange r;
NSString *s = [self copy];
while ((r = [s rangeOfString:@"<[^>]+>\\s*" options:NSRegularExpressionSearch]).location != NSNotFound)
s = [s stringByReplacingCharactersInRange:r withString:@""];
return s;
}
@end