How to create a trie in Python

后端 未结 12 1308
别那么骄傲
别那么骄傲 2020-11-22 12:39

I\'m interested in tries and DAWGs (direct acyclic word graph) and I\'ve been reading a lot about them but I don\'t understand what should the output trie or DAWG file look

相关标签:
12条回答
  • 2020-11-22 13:15
    from collections import defaultdict
    

    Define Trie:

    _trie = lambda: defaultdict(_trie)
    

    Create Trie:

    trie = _trie()
    for s in ["cat", "bat", "rat", "cam"]:
        curr = trie
        for c in s:
            curr = curr[c]
        curr.setdefault("_end")
    

    Lookup:

    def word_exist(trie, word):
        curr = trie
        for w in word:
            if w not in curr:
                return False
            curr = curr[w]
        return '_end' in curr
    

    Test:

    print(word_exist(trie, 'cam'))
    
    0 讨论(0)
  • 2020-11-22 13:16

    Python Class for Trie


    Trie Data Structure can be used to store data in O(L) where L is the length of the string so for inserting N strings time complexity would be O(NL) the string can be searched in O(L) only same goes for deletion.

    Can be clone from https://github.com/Parikshit22/pytrie.git

    class Node:
        def __init__(self):
            self.children = [None]*26
            self.isend = False
            
    class trie:
        def __init__(self,):
            self.__root = Node()
            
        def __len__(self,):
            return len(self.search_byprefix(''))
        
        def __str__(self):
            ll =  self.search_byprefix('')
            string = ''
            for i in ll:
                string+=i
                string+='\n'
            return string
            
        def chartoint(self,character):
            return ord(character)-ord('a')
        
        def remove(self,string):
            ptr = self.__root
            length = len(string)
            for idx in range(length):
                i = self.chartoint(string[idx])
                if ptr.children[i] is not None:
                    ptr = ptr.children[i]
                else:
                    raise ValueError("Keyword doesn't exist in trie")
            if ptr.isend is not True:
                raise ValueError("Keyword doesn't exist in trie")
            ptr.isend = False
            return
        
        def insert(self,string):
            ptr = self.__root
            length = len(string)
            for idx in range(length):
                i = self.chartoint(string[idx])
                if ptr.children[i] is not None:
                    ptr = ptr.children[i]
                else:
                    ptr.children[i] = Node()
                    ptr = ptr.children[i]
            ptr.isend = True
            
        def search(self,string):
            ptr = self.__root
            length = len(string)
            for idx in range(length):
                i = self.chartoint(string[idx])
                if ptr.children[i] is not None:
                    ptr = ptr.children[i]
                else:
                    return False
            if ptr.isend is not True:
                return False
            return True
        
        def __getall(self,ptr,key,key_list):
            if ptr is None:
                key_list.append(key)
                return
            if ptr.isend==True:
                key_list.append(key)
            for i in range(26):
                if ptr.children[i]  is not None:
                    self.__getall(ptr.children[i],key+chr(ord('a')+i),key_list)
            
        def search_byprefix(self,key):
            ptr = self.__root
            key_list = []
            length = len(key)
            for idx in range(length):
                i = self.chartoint(key[idx])
                if ptr.children[i] is not None:
                    ptr = ptr.children[i]
                else:
                    return None
            
            self.__getall(ptr,key,key_list)
            return key_list
            
    

    t = trie()
    t.insert("shubham")
    t.insert("shubhi")
    t.insert("minhaj")
    t.insert("parikshit")
    t.insert("pari")
    t.insert("shubh")
    t.insert("minakshi")
    
    print(t.search("minhaj"))
    print(t.search("shubhk"))
    print(t.search_byprefix('m'))
    print(len(t))
    print(t.remove("minhaj"))
    print(t)
    

    Code Oputpt

    True
    False
    ['minakshi', 'minhaj']
    7
    minakshi
    minhajsir
    pari
    parikshit
    shubh
    shubham
    shubhi

    0 讨论(0)
  • 2020-11-22 13:22

    If you want a TRIE implemented as a Python class, here is something I wrote after reading about them:

    class Trie:
    
        def __init__(self):
            self.__final = False
            self.__nodes = {}
    
        def __repr__(self):
            return 'Trie<len={}, final={}>'.format(len(self), self.__final)
    
        def __getstate__(self):
            return self.__final, self.__nodes
    
        def __setstate__(self, state):
            self.__final, self.__nodes = state
    
        def __len__(self):
            return len(self.__nodes)
    
        def __bool__(self):
            return self.__final
    
        def __contains__(self, array):
            try:
                return self[array]
            except KeyError:
                return False
    
        def __iter__(self):
            yield self
            for node in self.__nodes.values():
                yield from node
    
        def __getitem__(self, array):
            return self.__get(array, False)
    
        def create(self, array):
            self.__get(array, True).__final = True
    
        def read(self):
            yield from self.__read([])
    
        def update(self, array):
            self[array].__final = True
    
        def delete(self, array):
            self[array].__final = False
    
        def prune(self):
            for key, value in tuple(self.__nodes.items()):
                if not value.prune():
                    del self.__nodes[key]
            if not len(self):
                self.delete([])
            return self
    
        def __get(self, array, create):
            if array:
                head, *tail = array
                if create and head not in self.__nodes:
                    self.__nodes[head] = Trie()
                return self.__nodes[head].__get(tail, create)
            return self
    
        def __read(self, name):
            if self.__final:
                yield name
            for key, value in self.__nodes.items():
                yield from value.__read(name + [key])
    
    0 讨论(0)
  • 2020-11-22 13:29

    This version is using recursion

    import pprint
    from collections import deque
    
    pp = pprint.PrettyPrinter(indent=4)
    
    inp = raw_input("Enter a sentence to show as trie\n")
    words = inp.split(" ")
    trie = {}
    
    
    def trie_recursion(trie_ds, word):
        try:
            letter = word.popleft()
            out = trie_recursion(trie_ds.get(letter, {}), word)
        except IndexError:
            # End of the word
            return {}
    
        # Dont update if letter already present
        if not trie_ds.has_key(letter):
            trie_ds[letter] = out
    
        return trie_ds
    
    for word in words:
        # Go through each word
        trie = trie_recursion(trie, deque(word))
    
    pprint.pprint(trie)
    

    Output:

    Coool                                                                    
    0 讨论(0)
  • 2020-11-22 13:32
    class Trie:
        head = {}
    
        def add(self,word):
    
            cur = self.head
            for ch in word:
                if ch not in cur:
                    cur[ch] = {}
                cur = cur[ch]
            cur['*'] = True
    
        def search(self,word):
            cur = self.head
            for ch in word:
                if ch not in cur:
                    return False
                cur = cur[ch]
    
            if '*' in cur:
                return True
            else:
                return False
        def printf(self):
            print (self.head)
    
    dictionary = Trie()
    dictionary.add("hi")
    #dictionary.add("hello")
    #dictionary.add("eye")
    #dictionary.add("hey")
    
    
    print(dictionary.search("hi"))
    print(dictionary.search("hello"))
    print(dictionary.search("hel"))
    print(dictionary.search("he"))
    dictionary.printf()
    

    Out

    True
    False
    False
    False
    {'h': {'i': {'*': True}}}
    
    0 讨论(0)
  • 2020-11-22 13:34

    There's no "should"; it's up to you. Various implementations will have different performance characteristics, take various amounts of time to implement, understand, and get right. This is typical for software development as a whole, in my opinion.

    I would probably first try having a global list of all trie nodes so far created, and representing the child-pointers in each node as a list of indices into the global list. Having a dictionary just to represent the child linking feels too heavy-weight, to me.

    0 讨论(0)
提交回复
热议问题