How do you parse HTML with a variety of languages and parsing libraries?
When answering:
Individual comments will be linked to in answers to questions
Language: Objective-C
Library: libxml2 + Matt Gallagher's libxml2 wrappers + Ben Copsey's ASIHTTPRequest
ASIHTTPRequest *request = [ASIHTTPRequest alloc] initWithURL:[NSURL URLWithString:@"http://stackoverflow.com/questions/773340"];
[request start];
NSError *error = [request error];
if (!error) {
NSData *response = [request responseData];
NSLog(@"Data: %@", [[self query:@"//a[@href]" withResponse:response] description]);
[request release];
}
else
@throw [NSException exceptionWithName:@"kMyHTTPRequestFailed" reason:@"Request failed!" userInfo:nil];
...
- (id) query:(NSString *)xpathQuery WithResponse:(NSData *)resp {
NSArray *nodes = PerformHTMLXPathQuery(resp, xpathQuery);
if (nodes != nil)
return nodes;
return nil;
}