Fastest method to escape HTML tags as HTML entities?

前端 未结 12 1410
-上瘾入骨i
-上瘾入骨i 2020-11-22 09:24

I\'m writing a Chrome extension that involves doing a lot of the following job: sanitizing strings that might contain HTML tags, by converting

相关标签:
12条回答
  • 2020-11-22 09:50

    All-in-one script:

    // HTML entities Encode/Decode
    
    function htmlspecialchars(str) {
        var map = {
            "&": "&",
            "<": "&lt;",
            ">": "&gt;",
            "\"": "&quot;",
            "'": "&#39;" // ' -> &apos; for XML only
        };
        return str.replace(/[&<>"']/g, function(m) { return map[m]; });
    }
    function htmlspecialchars_decode(str) {
        var map = {
            "&amp;": "&",
            "&lt;": "<",
            "&gt;": ">",
            "&quot;": "\"",
            "&#39;": "'"
        };
        return str.replace(/(&amp;|&lt;|&gt;|&quot;|&#39;)/g, function(m) { return map[m]; });
    }
    function htmlentities(str) {
        var textarea = document.createElement("textarea");
        textarea.innerHTML = str;
        return textarea.innerHTML;
    }
    function htmlentities_decode(str) {
        var textarea = document.createElement("textarea");
        textarea.innerHTML = str;
        return textarea.value;
    }
    

    http://pastebin.com/JGCVs0Ts

    0 讨论(0)
  • 2020-11-22 09:51

    You could try passing a callback function to perform the replacement:

    var tagsToReplace = {
        '&': '&amp;',
        '<': '&lt;',
        '>': '&gt;'
    };
    
    function replaceTag(tag) {
        return tagsToReplace[tag] || tag;
    }
    
    function safe_tags_replace(str) {
        return str.replace(/[&<>]/g, replaceTag);
    }
    

    Here is a performance test: http://jsperf.com/encode-html-entities to compare with calling the replace function repeatedly, and using the DOM method proposed by Dmitrij.

    Your way seems to be faster...

    Why do you need it, though?

    0 讨论(0)
  • 2020-11-22 09:51

    I'm not entirely sure about speed, but if you are looking for simplicity I would suggest using the lodash/underscore escape function.

    0 讨论(0)
  • 2020-11-22 09:53

    The AngularJS source code also has a version inside of angular-sanitize.js.

    var SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
        // Match everything outside of normal chars and " (quote character)
        NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
    /**
     * Escapes all potentially dangerous characters, so that the
     * resulting string can be safely inserted into attribute or
     * element text.
     * @param value
     * @returns {string} escaped text
     */
    function encodeEntities(value) {
      return value.
        replace(/&/g, '&amp;').
        replace(SURROGATE_PAIR_REGEXP, function(value) {
          var hi = value.charCodeAt(0);
          var low = value.charCodeAt(1);
          return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
        }).
        replace(NON_ALPHANUMERIC_REGEXP, function(value) {
          return '&#' + value.charCodeAt(0) + ';';
        }).
        replace(/</g, '&lt;').
        replace(/>/g, '&gt;');
    }
    
    0 讨论(0)
  • 2020-11-22 09:54

    Martijn's method as single function with handling " mark (using in javascript) :

    function escapeHTML(html) {
        var fn=function(tag) {
            var charsToReplace = {
                '&': '&amp;',
                '<': '&lt;',
                '>': '&gt;',
                '"': '&#34;'
            };
            return charsToReplace[tag] || tag;
        }
        return html.replace(/[&<>"]/g, fn);
    }
    
    0 讨论(0)
  • 2020-11-22 09:57

    The fastest method is:

    function escapeHTML(html) {
        return document.createElement('div').appendChild(document.createTextNode(html)).parentNode.innerHTML;
    }
    

    This method is about twice faster than the methods based on 'replace', see http://jsperf.com/htmlencoderegex/35 .

    Source: https://stackoverflow.com/a/17546215/698168

    0 讨论(0)
提交回复
热议问题