Find all indices of a search term in a string

后端 未结 3 1033
予麋鹿
予麋鹿 2021-01-19 10:08

I need a fast method to find all indices of a search term that might occur in a string. I tried this \'brute force\' String extension method:

//         


        
相关标签:
3条回答
  • 2021-01-19 10:55

    As Martin said you can implement some of the well known fastest algorithms in String Matching, The Knuth–Morris–Pratt string searching algorithm (or KMP algorithm) searches for occurrences of a "word" W within a main "text string" S.

    The algorithm has complexity O(n), where n is the length of S and the O is big-O notation.

    extension String {
    
        // Build pi function of prefixes
        private func build_pi(str: String) -> [Int] {
    
           var n = count(str)
           var pi = Array(count: n + 1, repeatedValue: 0)
           var k = -1
           pi[0] = -1
    
           for (var i = 0; i < n; ++i) {
               while (k >= 0 && str[k] != str[i]) {
                  k = pi[k]
               }
               pi[i + 1] = ++k
           }
    
           return pi
        }
    
        // Knuth-Morris Pratt algorithm
        func searchPattern(pattern: String) -> [Int] {
    
           var matches = [Int]()
           var n = count(self)
    
           var m = count(pattern)
           var k = 0
           var pi = build_pi(pattern)
    
           for var i = 0; i < n; ++i {
               while (k >= 0 && (k == m || pattern[k] != self[i])) {
                  k = pi[k]
               }
               if ++k == m {
                  matches.append(i - m + 1)
               }
           }
    
           return matches
        }
    
        subscript (i: Int) -> Character {
            return self[advance(self.startIndex, i)]
        }
    }
    

    Then you can use it in the following way:

    var string = "apurba mandal loves ayoshi loves"
    var pattern = "loves"
    
    println(string.searchPattern(pattern))
    

    An the output should be :

    [14, 27]
    

    That belong to the start index of the pattern occurrences inside the the string. I hope this help you.

    EDIT:

    As Martin said in his comment you need to avoid the use of the advance function to index an String by an Int because it's O(position to index).

    One possible solution is to convert the String to an array of Character and then access to the indexes is O(1).

    Then the extension can be changed to this one :

    extension String {
    
       // Build pi function of prefixes
       private func build_pi(str: [Character]) -> [Int] {
    
          var n = count(str)
          var pi = Array(count: n + 1, repeatedValue: 0)
          var k = -1
          pi[0] = -1
    
          for (var i = 0; i < n; ++i) {
              while (k >= 0 && str[k] != str[i]) {
                  k = pi[k]
              }
              pi[i + 1] = ++k
          }
    
          return pi
       }
    
       // Knuth-Morris Pratt algorithm
       func searchPattern(pattern: String) -> [Int] {
    
          // Convert to Character array to index in O(1)
          var patt = Array(pattern)
          var S = Array(self)
    
          var matches = [Int]()
          var n = count(self)
    
          var m = count(pattern)
          var k = 0
          var pi = build_pi(patt)
    
          for var i = 0; i < n; ++i {
             while (k >= 0 && (k == m || patt[k] != S[i])) {
                 k = pi[k]
             }
             if ++k == m {
                 matches.append(i - m + 1)
             }
          }
    
          return matches
       }
    }
    
    0 讨论(0)
  • 2021-01-19 11:01

    Using NSRegularExpression in Swift 4, you can do it like this. NSRegularExpression has been around forever and is probably a better choice than rolling your own algorithm for most cases.

    let text = "The quieter you become, the more you can hear."
    let searchTerm = "you"
    
    let regex = try! NSRegularExpression(pattern: searchTerm, options: [])
    let range: NSRange = NSRange(text.startIndex ..< text.endIndex, in: text)
    let matches: [NSTextCheckingResult] = regex.matches(in: text, options: [], range: range)
    let ranges: [NSRange] = matches.map { $0.range }
    let indices: [Int] = ranges.map { $0.location }
    let swiftRanges = ranges.map { Range($0, in: text) }
    let swiftIndices: [String.Index] = swiftRanges.flatMap { $0?.lowerBound }
    
    0 讨论(0)
  • 2021-01-19 11:02

    Instead of checking for the search term at each position of the string you could use rangeOfString() to find the next occurrence (hoping that rangeOfString() uses more advanced algorithms):

    extension String {
    
        func indicesOf(searchTerm:String) -> [Int] {
            var indices = [Int]()
            var pos = self.startIndex
            while let range = self.rangeOfString(searchTerm, range: pos ..< self.endIndex) {
                indices.append(distance(self.startIndex, range.startIndex))
                pos = range.startIndex.successor()
            }
            return indices
        }
    }
    

    Generally, it depends on the size of the input string and the size of the search string which algorithm is "the fastest". You'll find an overview with links to various algorithms in String searching algorithm.

    Update for Swift 3:

    extension String {
    
        func indices(of searchTerm:String) -> [Int] {
            var indices = [Int]()
            var pos = self.startIndex
            while let range = range(of: searchTerm, range: pos ..< self.endIndex) {
                indices.append(distance(from: startIndex, to: range.lowerBound))
                pos = index(after: range.lowerBound)
            }
            return indices
        }
    }
    
    0 讨论(0)
提交回复
热议问题