DOM TreeWalker to return all text nodes

问题

I'm trying to get access to all text nodes within a given element, so that I can isolate words and wrap them in spans.

TreeWalker seems to be the API for the job, but I'm finding it to be extremely unintuitive. Neither the spec nor the MDN reference (usually so good at explaining arcane DOM APIs) are self-evident to my eyes.

My first assumption was that all I needed to pass was the right filter as second argument - something like document.createTreeWalker( element, NodeFilter.TEXT_NODE ). But this seems to stop as soon as it encounters a non-text node:

wordWrap( document.body )

function wordWrap( element ){
  var nodes = document.createTreeWalker( element, NodeFilter.TEXT_NODE )
  var node
  var text
  var word

  while( node = nodes.nextNode() ){
    text = node.nodeValue.replace( /(^\s+|\s+$)/, '' ).split( /\s+/g )

    while( text.length ){
      word = document.createElement( 'span' )

      word.className = 'word'

      word.innerText = text.shift()

      node.parentNode.insertBefore( word, node )

      if( text.length )
        node.parentNode.insertBefore( document.createTextNode( ' ' ), node )
        }

    node.parentNode.removeChild( node )
  }
}

.word {
  background: #fee;
  padding: 0 .5em 0 0;
}

Contact us at <a href="mailto:email@example.com">email@example.com</a> for submissions &#38; other enquiries.

So I assume this is an opportunity to use the third filter argument of the TreeWalker, as well as the extra properties on NodeFilter. If a filter method's valid return values are FILTER_ACCEPT, FILTER_REJECT & FILTER_SKIP, then I reason that by accepting element nodes as well as text nodes in the second argument, I can specify that text nodes should be accepted and the rest skipped. But this seems to give the same results - no text nodes are picked up within or after the anchor:

wordWrap( document.body )

function wordWrap( element ){
  var nodes = document.createTreeWalker(
    element,
    NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT,
    { acceptNode : function( node ){
      if( node.nodeType === node.TEXT_NODE )
        return NodeFilter.FILTER_ACCEPT
      else 
        return NodeFilter.FILTER_SKIP
    } } 
  )
  var node
  var text
  var word

  while( node = nodes.nextNode() ){
    text = node.nodeValue.replace( /(^\s+|\s+$)/, '' ).split( /\s+/g )

    while( text.length ){
      word = document.createElement( 'span' )

      word.className = 'word'

      word.innerText = text.shift()

      node.parentNode.insertBefore( word, node )

      if( text.length )
        node.parentNode.insertBefore( document.createTextNode( ' ' ), node )
        }

    node.parentNode.removeChild( node )
  }
}

.word {
  background: #fee;
  padding: 0 .5em 0 0;
}

Contact us at <a href="mailto:email@example.com">email@example.com</a> for submissions &#38; other enquiries.

By this point I'm convinced recursively iterating through the tree using DOM1 methods would be easier, like in this snippet:

wordWrap( document.body )

function wordWrap( element ){
  textNodes( element ).forEach( function( node ){
    var text = node.nodeValue.split( /\s+/g )
    var word

    while( text.length ){
      word = document.createElement( 'span' )

      word.className = 'word'

      word.innerText = text.shift()

      node.parentNode.insertBefore( word, node )

      if( text.length )
        node.parentNode.insertBefore( document.createTextNode( ' ' ), node )
        }

    node.parentNode.removeChild( node )
  } )
}

function textNodes( element ){
  var nodes = []

  Array.prototype.forEach.call( element.childNodes, function( child ){
    if( child.nodeType === child.TEXT_NODE )
      nodes = nodes.concat( child )
      else if( child.nodeType === child.ELEMENT_NODE )
        nodes = nodes.concat( textNodes( child ) )
        } )

  return nodes
}

.word {
  background: #fee;
  padding: 0 .5em 0 0;
}

Contact us at <a href="mailto:email@example.com">email@example.com</a> for submissions &#38; other enquiries.

What am I missing?

回答1:

What am I missing?

node.parentNode.removeChild(node) is the problem - you're removing the current node from the DOM, so the walker will find no .nextNode() from the there.

You should either advance the walker before removing the node, or just not remove it and instead shrink its content (to what is left over when you've moved out all words).

wordWrap(document.body);

function wordWrap( element ){
  var nodes = document.createTreeWalker(element, NodeFilter.SHOW_TEXT, null, null);
  var node;
  while (node = nodes.nextNode()) {
    var p = node.parentNode;
    var text = node.nodeValue;
    var m;
    while(m = text.match(/^(\s*)(\S+)/)) {
      text = text.slice(m[0].length);
      p.insertBefore(document.createTextNode(m[1]), node);
      var word = p.insertBefore(document.createElement('span'), node);
      word.appendChild(document.createTextNode(m[2]));
      word.className = 'word';
    }
    node.nodeValue = text;
  }
}

.word {
  background: #faa;
  padding: 0 .5em 0 0;
}

Contact us at <a href="mailto:email@example.com">email @ example.com</a> for submissions &#38; other enquiries.

_{Notice that the correct filter is NodeFilter.SHOW_TEXT, not .TEXT_NODE, and that in older browser the four arguments are not optional.}

来源：https://stackoverflow.com/questions/31956960/dom-treewalker-to-return-all-text-nodes

标签

javascript

dom