I can easily remove the element just by note.Remove() lik this:
HtmlDocument html = new HtmlDocument();
html.Load(Server.MapPath(@\"~\\Site\\themes\\default
How about this?
var removedNodes = document.SelectNodes("//removeme");
if(removedNodes != null)
foreach(var rn in removedNodes){
HtmlTextNode innernodes =document.CreateTextNode(rn.InnerHtml);
rn.ParnetNode.ReplaceChild(innernodes, rn);
}
Adding my two cents because none of these approaches handled what I wanted (to remove a set of given tags like p
and div
and handle nesting properly while preserving inner tags).
Here's what I came up with and passes all my unit tests with what I would consider most of the cases I need to deal with:
var htmlDoc = new HtmlDocument();
// load html
htmlDoc.LoadHtml(html);
var tags = (from tag in htmlDoc.DocumentNode.Descendants()
where tagNames.Contains(tag.Name)
select tag).Reverse();
// find formatting tags
foreach (var item in tags)
{
if (item.PreviousSibling == null)
{
// Prepend children to parent node in reverse order
foreach (HtmlNode node in item.ChildNodes.Reverse())
{
item.ParentNode.PrependChild(node);
}
}
else
{
// Insert children after previous sibling
foreach (HtmlNode node in item.ChildNodes)
{
item.ParentNode.InsertAfter(node, item.PreviousSibling);
}
}
// remove from tree
item.Remove();
}
// return transformed doc
html = htmlDoc.DocumentNode.WriteContentTo().Trim();
Here are the cases I used to test:
[TestMethod]
public void StripTags_CanStripSingleTag()
{
var input = "<p>tag</p>";
var expected = "tag";
var actual = HtmlUtilities.StripTags(input, "p");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripNestedTag()
{
var input = "<p>tag <p>inner</p></p>";
var expected = "tag inner";
var actual = HtmlUtilities.StripTags(input, "p");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripTwoTopLevelTags()
{
var input = "<p>tag</p> <div>block</div>";
var expected = "tag block";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripMultipleNestedTags_2LevelsDeep()
{
var input = "<p>tag <div>inner</div></p>";
var expected = "tag inner";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripMultipleNestedTags_3LevelsDeep()
{
var input = "<p>tag <div>inner <p>superinner</p></div></p>";
var expected = "tag inner superinner";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripTwoTopLevelMultipleNestedTags_3LevelsDeep()
{
var input = "<p>tag <div>inner <p>superinner</p></div></p> <div><p>inner</p> toplevel</div>";
var expected = "tag inner superinner inner toplevel";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_IgnoresTagsThatArentSpecified()
{
var input = "<p>tag <div>inner <a>superinner</a></div></p>";
var expected = "tag inner <a>superinner</a>";
var actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
input = "<wrapper><p>tag <div>inner</div></p></wrapper>";
expected = "<wrapper>tag inner</wrapper>";
actual = HtmlUtilities.StripTags(input, "p", "div");
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void StripTags_CanStripSelfClosingAndUnclosedTagsLikeBr()
{
var input = "<p>tag</p><br><br/>";
var expected = "tag";
var actual = HtmlUtilities.StripTags(input, "p", "br");
Assert.AreEqual(expected, actual);
}
It may not handle everything probably but it works for my needs.
Perhaps this might be what you're looking for?
foreach (HtmlNode node in html.DocumentNode.SelectNodes("//removeme"))
{
HtmlNodeCollection children = node.ChildNodes; //get <removeme>'s children
HtmlNode parent = node.ParentNode; //get <removeme>'s parent
node.Remove(); //remove <removeme>
parent.AppendChildren(children); //append the children to the parent
}
Edit: L.B's answer is much cleaner. Go with his!
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(html);
var node = doc.DocumentNode.SelectSingleNode("//removeme");
node.ParentNode.RemoveChild(node, true);