问题
In Swift 3.0 (NS)Scanner, string
property returns the string being parsed and scanLocation
returns the current scan location. I'm trying to extract the parsed text:
var parsedText: String {
return string.substring(to: string.index(string.startIndex, offsetBy: scanLocation))
}
This code crashes when string
contains multibyte characters. It turned out that scanLocation
returns number of utf16 units, not number of characters parsed.
How to convert scanLocation (code units) into character index?
Playground for experimenting:
let scanner = Scanner(string: "Hello🐉")
scanner.scanString("Hello🐉", into: nil)
print(scanner.scanLocation) // Returns 7 instead of 6
回答1:
To obtain character index:
import Foundation
extension Scanner {
var scanLocationInCharacters: Int {
let utf16 = string.utf16
guard let to16 = utf16.index(utf16.startIndex, offsetBy: scanLocation, limitedBy: utf16.endIndex),
let to = String.Index(to16, within: string) else {
return 0
}
return string.distance(from: string.startIndex, to: to)
}
}
let scanner = Scanner(string: "Hello🐉")
scanner.scanString("Hello🐉", into: nil)
print(scanner.scanLocation) // 7
print(scanner.scanLocationInCharacters) // 6
To retrieve parsed text:
var parsedText: String {
let utf16 = string.utf16
guard let to16 = utf16.index(utf16.startIndex, offsetBy: scanLocation, limitedBy: utf16.endIndex),
let to = String.Index(to16, within: string) else {
return ""
}
return string.substring(to: to)
}
Bonus: when reporting errors, you'll probably want to print current line and column as well:
var currentLine: Int {
var lineCount = 1
for character in parsedText.characters {
if character == "\n" { lineCount += 1 }
}
return lineCount
}
var currentColumn: Int {
let text = parsedText
if let range = text.range(of: "\n", options: .backwards) {
return text.distance(from: range.upperBound, to: text.endIndex) + 1
}
return parsedText.characters.count + 1
}
来源:https://stackoverflow.com/questions/38625306/converting-scanlocation-from-utf16-units-to-character-index-in-nsscanner-swift