I want to extract substrings from a string that match a regex pattern.
So I\'m looking for something like this:
func matchesForRegexInText(regex: St
Most of the solutions above only give the full match as a result ignoring the capture groups e.g.: ^\d+\s+(\d+)
To get the capture group matches as expected you need something like (Swift4) :
public extension String {
public func capturedGroups(withRegex pattern: String) -> [String] {
var results = [String]()
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return results
}
let matches = regex.matches(in: self, options: [], range: NSRange(location:0, length: self.count))
guard let match = matches.first else { return results }
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return results }
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.range(at: i)
let matchedString = (self as NSString).substring(with: capturedGroupIndex)
results.append(matchedString)
}
return results
}
}
If you want to extract substrings from a String, not just the position, (but the actual String including emojis). Then, the following maybe a simpler solution.
extension String {
func regex (pattern: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: pattern, options: NSRegularExpressionOptions(rawValue: 0))
let nsstr = self as NSString
let all = NSRange(location: 0, length: nsstr.length)
var matches : [String] = [String]()
regex.enumerateMatchesInString(self, options: NSMatchingOptions(rawValue: 0), range: all) {
(result : NSTextCheckingResult?, _, _) in
if let r = result {
let result = nsstr.substringWithRange(r.range) as String
matches.append(result)
}
}
return matches
} catch {
return [String]()
}
}
}
Example Usage:
"someText
Even if the matchesInString()
method takes a String
as the first argument,
it works internally with NSString
, and the range parameter must be given
using the NSString
length and not as the Swift string length. Otherwise it will
fail for "extended grapheme clusters" such as "flags".
As of Swift 4 (Xcode 9), the Swift standard
library provides functions to convert between Range<String.Index>
and NSRange
.
func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex)
let results = regex.matches(in: text,
range: NSRange(text.startIndex..., in: text))
return results.map {
String(text[Range($0.range, in: text)!])
}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
Example:
let string = "
@p4bloch if you want to capture results from a series of capture parentheses, then you need to use the rangeAtIndex(index)
method of NSTextCheckingResult
, instead of range
. Here's @MartinR 's method for Swift2 from above, adapted for capture parentheses. In the array that is returned, the first result [0]
is the entire capture, and then individual capture groups begin from [1]
. I commented out the map
operation (so it's easier to see what I changed) and replaced it with nested loops.
func matches(for regex: String!, in text: String!) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex, options: [])
let nsString = text as NSString
let results = regex.matchesInString(text, options: [], range: NSMakeRange(0, nsString.length))
var match = [String]()
for result in results {
for i in 0..<result.numberOfRanges {
match.append(nsString.substringWithRange( result.rangeAtIndex(i) ))
}
}
return match
//return results.map { nsString.substringWithRange( $0.range )} //rangeAtIndex(0)
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
An example use case might be, say you want to split a string of title year
eg "Finding Dory 2016" you could do this:
print ( matches(for: "^(.+)\\s(\\d{4})" , in: "Finding Dory 2016"))
// ["Finding Dory 2016", "Finding Dory", "2016"]
This is how I did it, I hope it brings a new perspective how this works on Swift.
In this example below I will get the any string between []
var sample = "this is an [hello] amazing [world]"
var regex = NSRegularExpression(pattern: "\\[.+?\\]"
, options: NSRegularExpressionOptions.CaseInsensitive
, error: nil)
var matches = regex?.matchesInString(sample, options: nil
, range: NSMakeRange(0, countElements(sample))) as Array<NSTextCheckingResult>
for match in matches {
let r = (sample as NSString).substringWithRange(match.range)//cast to NSString is required to match range format.
println("found= \(r)")
}
Swift 4 without NSString.
extension String {
func matches(regex: String) -> [String] {
guard let regex = try? NSRegularExpression(pattern: regex, options: [.caseInsensitive]) else { return [] }
let matches = regex.matches(in: self, options: [], range: NSMakeRange(0, self.count))
return matches.map { match in
return String(self[Range(match.range, in: self)!])
}
}
}