How do you parse HTML with a variety of languages and parsing libraries?
When answering:
Individual comments will be linked to in answers to questions
Language: C#
Library: System.XML (standard .NET)
using System.Collections.Generic;
using System.Xml;
public static void Main(string[] args)
{
List matches = new List();
XmlDocument xd = new XmlDocument();
xd.LoadXml("...");
FindHrefs(xd.FirstChild, matches);
}
static void FindHrefs(XmlNode xn, List matches)
{
if (xn.Attributes != null && xn.Attributes["href"] != null)
matches.Add(xn.Attributes["href"].InnerXml);
foreach (XmlNode child in xn.ChildNodes)
FindHrefs(child, matches);
}