Html Agility Pack - Remove element, but not innerHtml

后端 未结 10 1940
予麋鹿
予麋鹿 2021-01-06 11:46

I can easily remove the element just by note.Remove() lik this:

HtmlDocument html = new HtmlDocument();

html.Load(Server.MapPath(@\"~\\Site\\themes\\default         


        
相关标签:
10条回答
  • 2021-01-06 12:01

    How about this?

    var removedNodes = document.SelectNodes("//removeme");
    if(removedNodes != null)
        foreach(var rn in removedNodes){
            HtmlTextNode innernodes =document.CreateTextNode(rn.InnerHtml);
            rn.ParnetNode.ReplaceChild(innernodes, rn);
        }
    
    0 讨论(0)
  • 2021-01-06 12:07

    Adding my two cents because none of these approaches handled what I wanted (to remove a set of given tags like p and div and handle nesting properly while preserving inner tags).

    Here's what I came up with and passes all my unit tests with what I would consider most of the cases I need to deal with:

    var htmlDoc = new HtmlDocument();
    
    // load html
    htmlDoc.LoadHtml(html);
    
    var tags = (from tag in htmlDoc.DocumentNode.Descendants()
               where tagNames.Contains(tag.Name)
               select tag).Reverse();
    
    // find formatting tags
    foreach (var item in tags)
    {
        if (item.PreviousSibling == null)
        {
            // Prepend children to parent node in reverse order
            foreach (HtmlNode node in item.ChildNodes.Reverse())
            {
                item.ParentNode.PrependChild(node);
            }                        
        }
        else
        {
            // Insert children after previous sibling
            foreach (HtmlNode node in item.ChildNodes)
            {
                item.ParentNode.InsertAfter(node, item.PreviousSibling);
            }
        }
    
        // remove from tree
        item.Remove();
    }
    
    // return transformed doc
    html = htmlDoc.DocumentNode.WriteContentTo().Trim();
    

    Here are the cases I used to test:

    [TestMethod]
    public void StripTags_CanStripSingleTag()
    {
        var input = "<p>tag</p>";
        var expected = "tag";
        var actual = HtmlUtilities.StripTags(input, "p");
    
        Assert.AreEqual(expected, actual);
    }
    
    [TestMethod]
    public void StripTags_CanStripNestedTag()
    {
        var input = "<p>tag <p>inner</p></p>";
        var expected = "tag inner";
        var actual = HtmlUtilities.StripTags(input, "p");
    
        Assert.AreEqual(expected, actual);
    }
    
    [TestMethod]
    public void StripTags_CanStripTwoTopLevelTags()
    {
        var input = "<p>tag</p> <div>block</div>";
        var expected = "tag block";
        var actual = HtmlUtilities.StripTags(input, "p", "div");
    
        Assert.AreEqual(expected, actual);
    }
    
    [TestMethod]
    public void StripTags_CanStripMultipleNestedTags_2LevelsDeep()
    {
        var input = "<p>tag <div>inner</div></p>";
        var expected = "tag inner";
        var actual = HtmlUtilities.StripTags(input, "p", "div");
    
        Assert.AreEqual(expected, actual);
    }
    
    [TestMethod]
    public void StripTags_CanStripMultipleNestedTags_3LevelsDeep()
    {
        var input = "<p>tag <div>inner <p>superinner</p></div></p>";
        var expected = "tag inner superinner";
        var actual = HtmlUtilities.StripTags(input, "p", "div");
    
        Assert.AreEqual(expected, actual);
    }
    
    [TestMethod]
    public void StripTags_CanStripTwoTopLevelMultipleNestedTags_3LevelsDeep()
    {
        var input = "<p>tag <div>inner <p>superinner</p></div></p> <div><p>inner</p> toplevel</div>";
        var expected = "tag inner superinner inner toplevel";
        var actual = HtmlUtilities.StripTags(input, "p", "div");
    
        Assert.AreEqual(expected, actual);
    }
    
    [TestMethod]
    public void StripTags_IgnoresTagsThatArentSpecified()
    {
        var input = "<p>tag <div>inner <a>superinner</a></div></p>";
        var expected = "tag inner <a>superinner</a>";
        var actual = HtmlUtilities.StripTags(input, "p", "div");
    
        Assert.AreEqual(expected, actual);
    
        input = "<wrapper><p>tag <div>inner</div></p></wrapper>";
        expected = "<wrapper>tag inner</wrapper>";
        actual = HtmlUtilities.StripTags(input, "p", "div");
    
        Assert.AreEqual(expected, actual);
    }
    
    [TestMethod]
    public void StripTags_CanStripSelfClosingAndUnclosedTagsLikeBr()
    {
        var input = "<p>tag</p><br><br/>";
        var expected = "tag";
        var actual = HtmlUtilities.StripTags(input, "p", "br");
    
        Assert.AreEqual(expected, actual);
    }
    

    It may not handle everything probably but it works for my needs.

    0 讨论(0)
  • 2021-01-06 12:12

    Perhaps this might be what you're looking for?

    foreach (HtmlNode node in html.DocumentNode.SelectNodes("//removeme"))
    {
        HtmlNodeCollection children = node.ChildNodes; //get <removeme>'s children
        HtmlNode parent = node.ParentNode; //get <removeme>'s parent
        node.Remove(); //remove <removeme>
        parent.AppendChildren(children); //append the children to the parent
    }
    

    Edit: L.B's answer is much cleaner. Go with his!

    0 讨论(0)
  • 2021-01-06 12:13
    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
    doc.LoadHtml(html);
    
    var node = doc.DocumentNode.SelectSingleNode("//removeme");
    node.ParentNode.RemoveChild(node, true);
    
    0 讨论(0)
提交回复
热议问题