I can easily remove the element just by note.Remove() lik this:
HtmlDocument html = new HtmlDocument();
html.Load(Server.MapPath(@\"~\\Site\\themes\\default
Adding my two cents because none of these approaches handled what I wanted (to remove a set of given tags like p
and div
and handle nesting properly while preserving inner tags).
Here's what I came up with and passes all my unit tests with what I would consider most of the cases I need to deal with:
var htmlDoc = new HtmlDocument();
// load html
htmlDoc.LoadHtml(html);
var tags = (from tag in htmlDoc.DocumentNode.Descendants()
where tagNames.Contains(tag.Name)
select tag).Reverse();
// find formatting tags
foreach (var item in tags)
{
if (item.PreviousSibling == null)
{
// Prepend children to parent node in reverse order
foreach (HtmlNode node in item.ChildNodes.Reverse())
{
item.ParentNode.PrependChild(node);
}
}
else
{
// Insert children after previous sibling
foreach (HtmlNode node in item.ChildNodes)
{
item.ParentNode.InsertAfter(node, item.PreviousSibling);
}
}
// remove from tree
item.Remove();
}
// return transformed doc
html = htmlDoc.DocumentNode.WriteContentTo().Trim();
Here are the cases I used to test:
[TestMethod]
public void StripTags_CanStripSingleTag()
{
var input = "tag
";
var expected = "tag";
var actual = HtmlUtilities.StripTags(input, "p");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripNestedTag()
{
var input = "tag
inner
";
var expected = "tag inner";
var actual = HtmlUtilities.StripTags(input, "p");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripTwoTopLevelTags()
{
var input = "tag
block";
var expected = "tag block";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripMultipleNestedTags_2LevelsDeep()
{
var input = "tag
inner";
var expected = "tag inner";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripMultipleNestedTags_3LevelsDeep()
{
var input = "tag
inner superinner
";
var expected = "tag inner superinner";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripTwoTopLevelMultipleNestedTags_3LevelsDeep()
{
var input = "tag
inner superinner
inner
toplevel";
var expected = "tag inner superinner inner toplevel";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_IgnoresTagsThatArentSpecified()
{
var input = "tag
inner superinner";
var expected = "tag inner superinner";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
input = "tag
inner ";
expected = "tag inner ";
actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripSelfClosingAndUnclosedTagsLikeBr()
{
var input = "tag
";
var expected = "tag";
var actual = HtmlUtilities.StripTags(input, "p", "br");
Assert.AreEqual(expected, actual);
}
It may not handle everything probably but it works for my needs.