I need a fast method to find all indices of a search term that might occur in a string. I tried this \'brute force\' String
extension method:
//
As Martin said you can implement some of the well known fastest algorithms in String Matching, The Knuth–Morris–Pratt string searching algorithm (or KMP algorithm) searches for occurrences of a "word" W
within a main "text string" S
.
The algorithm has complexity O(n), where n is the length of S
and the O is big-O notation.
extension String {
// Build pi function of prefixes
private func build_pi(str: String) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(pattern)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || pattern[k] != self[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
subscript (i: Int) -> Character {
return self[advance(self.startIndex, i)]
}
}
Then you can use it in the following way:
var string = "apurba mandal loves ayoshi loves"
var pattern = "loves"
println(string.searchPattern(pattern))
An the output should be :
[14, 27]
That belong to the start index of the pattern occurrences inside the the string. I hope this help you.
EDIT:
As Martin said in his comment you need to avoid the use of the advance
function to index an String
by an Int
because it's O(position to index).
One possible solution is to convert the String
to an array of Character
and then access to the indexes is O(1).
Then the extension
can be changed to this one :
extension String {
// Build pi function of prefixes
private func build_pi(str: [Character]) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
// Convert to Character array to index in O(1)
var patt = Array(pattern)
var S = Array(self)
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(patt)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || patt[k] != S[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
}