Substring text with HTML tags in Javascript

前端 未结 6 1055
小蘑菇
小蘑菇 2020-12-14 22:25

Do you have solution to substring text with HTML tags in Javascript?

For example:

var str = \'Lorem ipsum dolor sit         


        
相关标签:
6条回答
  • 2020-12-14 22:49
    let str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.'
    let plainText = htmlString.replace(/<[^>]+>/g, '');
    

    Extract plain text with above given regular expression then use JS String based ".substr()" function for desired results

    0 讨论(0)
  • 2020-12-14 22:56

    Usage:

    var str = 'Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, consectetur adipiscing elit.';
    
    var res1 = html_substr( str, 20 );
    var res2 = html_substr( str, 30 );
    
    alert( res1 ); // Lorem ipsum <a href="#">dolor <strong>si</strong></a>
    alert( res2 ); // Lorem ipsum <a href="#">dolor <strong>sit</strong> amet</a>, co
    

    Example: http://jsfiddle.net/2ULbK/4/


    Function:

    function html_substr( str, count ) {
    
        var div = document.createElement('div');
        div.innerHTML = str;
    
        walk( div, track );
    
        function track( el ) {
            if( count > 0 ) {
                var len = el.data.length;
                count -= len;
                if( count <= 0 ) {
                    el.data = el.substringData( 0, el.data.length + count );
                }
            } else {
                el.data = '';
            }
        }
    
        function walk( el, fn ) {
            var node = el.firstChild;
            do {
                if( node.nodeType === 3 ) {
                    fn(node);
                        //          Added this >>------------------------------------<<
                } else if( node.nodeType === 1 && node.childNodes && node.childNodes[0] ) {
                    walk( node, fn );
                }
            } while( node = node.nextSibling );
        }
        return div.innerHTML;
    }
    
    0 讨论(0)
  • 2020-12-14 23:02

    Taking into consideration that parsing html with regex is a bad idea, here is a solution that does just that :)

    EDIT: Just to be clear: This is not a valid solution, it was meant as an exercise that made very lenient assumptions about the input string, and as such should be taken with a grain of salt. Read the link above and see why parsing html with regex can never be done.

    function htmlSubstring(s, n) {
        var m, r = /<([^>\s]*)[^>]*>/g,
            stack = [],
            lasti = 0,
            result = '';
    
        //for each tag, while we don't have enough characters
        while ((m = r.exec(s)) && n) {
            //get the text substring between the last tag and this one
            var temp = s.substring(lasti, m.index).substr(0, n);
            //append to the result and count the number of characters added
            result += temp;
            n -= temp.length;
            lasti = r.lastIndex;
    
            if (n) {
                result += m[0];
                if (m[1].indexOf('/') === 0) {
                    //if this is a closing tag, than pop the stack (does not account for bad html)
                    stack.pop();
                } else if (m[1].lastIndexOf('/') !== m[1].length - 1) {
                    //if this is not a self closing tag than push it in the stack
                    stack.push(m[1]);
                }
            }
        }
    
        //add the remainder of the string, if needed (there are no more tags in here)
        result += s.substr(lasti, n);
    
        //fix the unclosed tags
        while (stack.length) {
            result += '</' + stack.pop() + '>';
        }
    
        return result;
    
    }
    

    Example: http://jsfiddle.net/danmana/5mNNU/

    Note: patrick dw's solution may be safer regarding bad html, but I'm not sure how well it handles white spaces.

    0 讨论(0)
  • 2020-12-14 23:05

    it is solution for single tags

    function subStrWithoutBreakingTags(str, start, length) {
        var countTags = 0;
        var returnString = "";
        var writeLetters = 0;
        while (!((writeLetters >= length) && (countTags == 0))) {
            var letter = str.charAt(start + writeLetters);
            if (letter == "<") {
                countTags++;
            }
            if (letter == ">") {
                countTags--;
            }
            returnString += letter;
            writeLetters++;
        }
        return returnString;
    }
    
    0 讨论(0)
  • 2020-12-14 23:05

    Javascript has a sub-string method. It makes no difference if the string contains html.

    see http://www.w3schools.com/jsref/jsref_substr.asp

    0 讨论(0)
  • 2020-12-14 23:09

    Use something similar to = str.replace(/<[^>]*>?/gi, '').substr(0, 20);
    I've created an example at: http://fiddle.jshell.net/xpW9j/1/

    0 讨论(0)
提交回复
热议问题