Find all indices of a search term in a string

后端 未结 3 1037
予麋鹿
予麋鹿 2021-01-19 10:08

I need a fast method to find all indices of a search term that might occur in a string. I tried this \'brute force\' String extension method:

//         


        
3条回答
  •  清酒与你
    2021-01-19 10:55

    As Martin said you can implement some of the well known fastest algorithms in String Matching, The Knuth–Morris–Pratt string searching algorithm (or KMP algorithm) searches for occurrences of a "word" W within a main "text string" S.

    The algorithm has complexity O(n), where n is the length of S and the O is big-O notation.

    extension String {
    
        // Build pi function of prefixes
        private func build_pi(str: String) -> [Int] {
    
           var n = count(str)
           var pi = Array(count: n + 1, repeatedValue: 0)
           var k = -1
           pi[0] = -1
    
           for (var i = 0; i < n; ++i) {
               while (k >= 0 && str[k] != str[i]) {
                  k = pi[k]
               }
               pi[i + 1] = ++k
           }
    
           return pi
        }
    
        // Knuth-Morris Pratt algorithm
        func searchPattern(pattern: String) -> [Int] {
    
           var matches = [Int]()
           var n = count(self)
    
           var m = count(pattern)
           var k = 0
           var pi = build_pi(pattern)
    
           for var i = 0; i < n; ++i {
               while (k >= 0 && (k == m || pattern[k] != self[i])) {
                  k = pi[k]
               }
               if ++k == m {
                  matches.append(i - m + 1)
               }
           }
    
           return matches
        }
    
        subscript (i: Int) -> Character {
            return self[advance(self.startIndex, i)]
        }
    }
    

    Then you can use it in the following way:

    var string = "apurba mandal loves ayoshi loves"
    var pattern = "loves"
    
    println(string.searchPattern(pattern))
    

    An the output should be :

    [14, 27]
    

    That belong to the start index of the pattern occurrences inside the the string. I hope this help you.

    EDIT:

    As Martin said in his comment you need to avoid the use of the advance function to index an String by an Int because it's O(position to index).

    One possible solution is to convert the String to an array of Character and then access to the indexes is O(1).

    Then the extension can be changed to this one :

    extension String {
    
       // Build pi function of prefixes
       private func build_pi(str: [Character]) -> [Int] {
    
          var n = count(str)
          var pi = Array(count: n + 1, repeatedValue: 0)
          var k = -1
          pi[0] = -1
    
          for (var i = 0; i < n; ++i) {
              while (k >= 0 && str[k] != str[i]) {
                  k = pi[k]
              }
              pi[i + 1] = ++k
          }
    
          return pi
       }
    
       // Knuth-Morris Pratt algorithm
       func searchPattern(pattern: String) -> [Int] {
    
          // Convert to Character array to index in O(1)
          var patt = Array(pattern)
          var S = Array(self)
    
          var matches = [Int]()
          var n = count(self)
    
          var m = count(pattern)
          var k = 0
          var pi = build_pi(patt)
    
          for var i = 0; i < n; ++i {
             while (k >= 0 && (k == m || patt[k] != S[i])) {
                 k = pi[k]
             }
             if ++k == m {
                 matches.append(i - m + 1)
             }
          }
    
          return matches
       }
    }
    

提交回复
热议问题