I have a trie (also called a prefix tree). Given a prefix, I want to get a list of ten words that start with the prefix.
The thing that's unique about this problem is that I only want 10 of the words that start with the given prefix-- not all of them. There are optimizations that can be made, given this.
My code below I know works fine. Each node in the trie has a children
property and a this_is_the_end_of_a_word
property. For instance, when you insert "hi", this is what the trie looks like:
The problem: Given a prefix, I want to get a list of ten words that start with the prefix.
My approach to the problem was this: Head down the prefix tree, following the characters of prefix
until you get to node which corresponds to the last character of prefix
. Now you should perform a DFS on this node, keeping track of nodes that have this_is_the_end_of_a_word === true
in a list. But you should stop searching when the length of your list is equal to 10, and return the list.
I think my approach is sound, but I'm having trouble implementing it-- particularly because I'm trying to use a recursive DFS, so I'm not sure how to pass the the "global" list around between the recursive calls. I know I should use closures, but I'm new to javascript and I'm unsure about how to go about it. An example of what I tried already is below.
My Trie class (I know this code works, this is just so you can see how I organized my data structure.)
var Trie = function() { var that = Object.create(Trie.prototype); that.children = {}; //mapping: next character -> child nodes that.this_is_the_end_of_a_word = false; that.insertWord = function(word) { var current_node = that; for (var i = 0; i < word.length; i++) { var c = word[i] //if character is not in the trie already, add it if (!(c in current_node.children)) { current_node.children[c] = Trie(); } //update current_node current_node = current_node.children[c]; }; //after adding all the chars of the word, //you are at the end of a word current_node.this_is_the_end_of_a_word = true; } that.insertWords = function(words) { for (var i = 0; i < words.length; i++) { that.insertWord(words[i]); } } that.contains = function(word) { //start at the root var current_node = that; for (var i = 0; i < word.length; i++) { var c = word[i]; //if the word's character isn't a child of the current_node, //the word isn't in the trie if (!(c in current_node.children)) { return false; } //move down the trie, update current_node current_node = current_node.children[c]; }; return current_node.this_is_the_end_of_a_word; } Object.freeze(that); return that; }
My 1st approach (has many bugs)
num_words_to_go = 10; //this global is bad practice; //I want to put this as the argument to a closure //so it's passed between recursive calls that.getWords = function(start_node, prefix) { console.log(0); var words = []; //if start node is a word, add it if (start_node.this_is_the_end_of_a_word) { words.push(start_node); num_words_to_go--; } if (num_words_to_go <= 0 || !start_node.children) { return words; } return start_node.children.forEach( currentValue.getWords( currentValue, prefix + <character for this child>)); /*I can't think of a nice way to write this without going through all of the children. I know I don't need to, because I only need to find 10 words and get out. This is why I was leaning towards the recursive DFS. */
}
2nd approach: I also found a python example that I was looking at: http://v1v3kn.tumblr.com/post/18238156967/roll-your-own-autocomplete-solution-using-tries I tried translating his example to JavaScript, but still something's going wrong with all_suffixes
.
that.all_suffixes = function (prefix){ results = []; if (that.this_is_the_end_of_a_word) results.push(prefix); if (!(that.children)) return results; if (results.length > 2) return results; var callback = function(currentValue, i, array){ return currentValue.all_suffixes(prefix+array[i]); } arr = that.children.forEach(callback, that); //[child.all_suffixes(prefix + char) for (char, child) in self.children.items()] return concat(reduce(concat, arr), results); } that.autocomplete = function(prefix){ current_node = that; for(var i = 0; i < prefix.length; i++){ var c = prefix[i]; //if there is nothing in the trie with this prefix if (!(c in current_node.children)){ return []; } current_node = current_node.children[c]; } return list(current_node.all_suffixes(prefix)) }
Basically I take your model and apply a new method getWords(word[, count])
to the Trie
class. I changed the method contains
because I need the functionality in getWords
as well. So I created a new method getNode
, which returns the node where the word or part is found.
The method getWords
first looks up the word (part) and then iterates through the data structure. When a word is found it is pushed to the result set. If the result set length is greater or equal to the required length, then the iteration (hence Array.prototype.some
) is terminated and the recursive call of fork
is stopped.
that.getWords = function (word, count) { function fork(n, w) { function child(c) { return fork(n.children[c], w + c); } n.isWord && words.push(w); return words.length >= count || Object.keys(n.children).some(child); } var words = [], current_node = that.getNode(word); if (current_node) { fork(current_node, word); return words; } }
Side note: I changed this_is_the_end_of_a_word
to isWord
.
Input
- Create a new instance of
Trie
. - Insert some words for testing.
Output
- Test if the trie contains
'motor'
, returns false. - Test if the trie contains
'te'
, returns false. - Test if the trie contains
'ten'
, returns true. - Get all words that starts with
'ind'
(8 available, shows 8). - Get first 10 words that starts with
'in'
(16 available, shows 10). - The whole trie.
var Trie = function () { var that = Object.create(Trie.prototype); that.children = {}; //mapping: next character -> child nodes that.isWord = false; that.insertWord = function (word) { var current_node = that; for (var i = 0; i < word.length; i++) { var c = word[i] //if character is not in the trie already, add it if (!(c in current_node.children)) { current_node.children[c] = Trie(); } //update current_node current_node = current_node.children[c]; }; //after adding all the chars of the word, //you are at the end of a word current_node.isWord = true; } that.insertWords = function (words) { for (var i = 0; i < words.length; i++) { that.insertWord(words[i]); } } that.getNode = function (word) { //start at the root var current_node = that; for (var i = 0; i < word.length; i++) { var c = word[i]; //if the word's character isn't a child of the current_node, //the word isn't in the trie if (!(c in current_node.children)) { return; } //move down the trie, update current_node current_node = current_node.children[c]; }; return current_node; } that.contains = function (word) { var current_node = that.getNode(word); if (current_node) { return current_node.isWord; } return false; } that.getWords = function (word, count) { function fork(n, w) { function child(c) { return fork(n.children[c], w + c); } n.isWord && words.push(w); return words.length >= count || Object.keys(n.children).some(child); } var words = [], current_node = that.getNode(word); if (current_node) { fork(current_node, word); return words; } } // freeze does lock the isWord property, which is not required here //Object.freeze(that); return that; } var trie = new Trie(); trie.insertWords([ 'car', 'cool', 'i', 'in', 'indeed', 'independence', 'india', 'indoor', 'induction', 'industrial', 'industry', 'indwell', 'inferior', 'informal', 'inhale', 'inn', 'inside', 'instance', 'intrepid', 'of', 'off', 'other', 'tea', 'ted', 'ten', 'to', 'zoo', 'zoom' ]); document.write(trie.contains('motor') + '<br>'); // false document.write(trie.contains('te') + '<br>'); // false document.write(trie.contains('ten') + '<br>'); // true document.write('<pre>' + JSON.stringify(trie.getWords('ind'), 0, 4) + '</pre>'); document.write('<pre>' + JSON.stringify(trie.getWords('in', 10), 0, 4) + '</pre>'); document.write('<pre>' + JSON.stringify(trie, 0, 4) + '</pre>');
来源:https://stackoverflow.com/questions/32757760/javascript-find-exactly-10-words-in-a-prefix-tree-that-start-with-a-given-prefi