Server side html filtering library in asp.net mvc

∥☆過路亽.° 提交于 2020-01-04 12:45:46

问题


I'm using TinyMCE. I need server side Html input filtering function or library. TinyMCE recommended

  • htmlLawed
  • HTMLPurifier
  • Zend Filter Input

All of this only compatible with PHP. But i'm using asp.net MVC

Please tell me, is there any available filtering library or attribute in asp.net MVC

For example like this

[HtmlInputFilter] //<<--
public ActionResult Post(string html){
...
}

回答1:


You could use the Microsoft AntiXSS library to sanitize the HTML.

If you want to handle your own whitelists of tags and attributes the AntiXSS library is not suitable as it doesn't provide you with the necessary customization hooks. One possibility is to roll your own sanitizer (at your own risk of course). For example you could use the HTML Agility Pack for that. Here's a blog post illustrating a sample sanitizer:

public static class HtmlUtility
{
    // Original list courtesy of Robert Beal :
    // http://www.robertbeal.com/37/sanitising-html

    private static readonly Dictionary<string, string[]> ValidHtmlTags =
        new Dictionary<string, string[]>
        {
            {"p", new string[]          {"style", "class", "align"}},
            {"div", new string[]        {"style", "class", "align"}},
            {"span", new string[]       {"style", "class"}},
            {"br", new string[]         {"style", "class"}},
            {"hr", new string[]         {"style", "class"}},
            {"label", new string[]      {"style", "class"}},

            {"h1", new string[]         {"style", "class"}},
            {"h2", new string[]         {"style", "class"}},
            {"h3", new string[]         {"style", "class"}},
            {"h4", new string[]         {"style", "class"}},
            {"h5", new string[]         {"style", "class"}},
            {"h6", new string[]         {"style", "class"}},

            {"font", new string[]       {"style", "class", "color", "face", "size"}},
            {"strong", new string[]     {"style", "class"}},
            {"b", new string[]          {"style", "class"}},
            {"em", new string[]         {"style", "class"}},
            {"i", new string[]          {"style", "class"}},
            {"u", new string[]          {"style", "class"}},
            {"strike", new string[]     {"style", "class"}},
            {"ol", new string[]         {"style", "class"}},
            {"ul", new string[]         {"style", "class"}},
            {"li", new string[]         {"style", "class"}},
            {"blockquote", new string[] {"style", "class"}},
            {"code", new string[]       {"style", "class"}},

            {"a", new string[]          {"style", "class", "href", "title"}},
            {"img", new string[]        {"style", "class", "src", "height", "width",
                "alt", "title", "hspace", "vspace", "border"}},

            {"table", new string[]      {"style", "class"}},
            {"thead", new string[]      {"style", "class"}},
            {"tbody", new string[]      {"style", "class"}},
            {"tfoot", new string[]      {"style", "class"}},
            {"th", new string[]         {"style", "class", "scope"}},
            {"tr", new string[]         {"style", "class"}},
            {"td", new string[]         {"style", "class", "colspan"}},

            {"q", new string[]          {"style", "class", "cite"}},
            {"cite", new string[]       {"style", "class"}},
            {"abbr", new string[]       {"style", "class"}},
            {"acronym", new string[]    {"style", "class"}},
            {"del", new string[]        {"style", "class"}},
            {"ins", new string[]        {"style", "class"}}
        };

    /// <summary>
    /// Takes raw HTML input and cleans against a whitelist
    /// </summary>
    /// <param name="source">Html source</param>
    /// <returns>Clean output</returns>
    public static string SanitizeHtml(string source)
    {
        HtmlDocument html = GetHtml(source);
        if (html == null) return String.Empty;

        // All the nodes
        HtmlNode allNodes = html.DocumentNode;

        // Select whitelist tag names
        string[] whitelist = (from kv in ValidHtmlTags
                              select kv.Key).ToArray();

        // Scrub tags not in whitelist
        CleanNodes(allNodes, whitelist);

        // Filter the attributes of the remaining
        foreach (KeyValuePair<string, string[]> tag in ValidHtmlTags)
        {
            IEnumerable<HtmlNode> nodes = (from n in allNodes.DescendantsAndSelf()
                                           where n.Name == tag.Key
                                           select n);

            if (nodes == null) continue;

            foreach (var n in nodes)
            {
                if (!n.HasAttributes) continue;

                // Get all the allowed attributes for this tag
                HtmlAttribute[] attr = n.Attributes.ToArray();
                foreach (HtmlAttribute a in attr)
                {
                    if (!tag.Value.Contains(a.Name))
                    {
                        a.Remove(); // Wasn't in the list
                    }
                    else
                    {
                        // AntiXss
                        a.Value =
                            Microsoft.Security.Application.Encoder.UrlPathEncode(a.Value);
                    }
                }
            }
        }

        return allNodes.InnerHtml;
    }

    /// <summary>
    /// Takes a raw source and removes all HTML tags
    /// </summary>
    /// <param name="source"></param>
    /// <returns></returns>
    public static string StripHtml(string source)
    {
        source = SanitizeHtml(source);

        // No need to continue if we have no clean Html
        if (String.IsNullOrEmpty(source))
            return String.Empty;

        HtmlDocument html = GetHtml(source);
        StringBuilder result = new StringBuilder();

        // For each node, extract only the innerText
        foreach (HtmlNode node in html.DocumentNode.ChildNodes)
            result.Append(node.InnerText);

        return result.ToString();
    }

    /// <summary>
    /// Recursively delete nodes not in the whitelist
    /// </summary>
    private static void CleanNodes(HtmlNode node, string[] whitelist)
    {
        if (node.NodeType == HtmlNodeType.Element)
        {
            if (!whitelist.Contains(node.Name))
            {
                node.ParentNode.RemoveChild(node);
                return; // We're done
            }
        }

        if (node.HasChildNodes)
            CleanChildren(node, whitelist);
    }

    /// <summary>
    /// Apply CleanNodes to each of the child nodes
    /// </summary>
    private static void CleanChildren(HtmlNode parent, string[] whitelist)
    {
        for (int i = parent.ChildNodes.Count - 1; i >= 0; i--)
            CleanNodes(parent.ChildNodes[i], whitelist);
    }

    /// <summary>
    /// Helper function that returns an HTML document from text
    /// </summary>
    private static HtmlDocument GetHtml(string source)
    {
        HtmlDocument html = new HtmlDocument();
        html.OptionFixNestedTags = true;
        html.OptionAutoCloseOnEnd = true;
        html.OptionDefaultStreamEncoding = Encoding.UTF8;

        html.LoadHtml(source);

        return html;
    }
}


来源:https://stackoverflow.com/questions/15216706/server-side-html-filtering-library-in-asp-net-mvc

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!