Are there any scripts, libraries, or programs using Python
, or BASH
tools (e.g. awk
, perl
, sed
) which can correc
I ported the code from dani_l to Kotlin (the code in java should be quite similar). It goes :
import java.util.regex.Pattern
val pinyinToneMarks = mapOf(
'a' to "āáǎà",
'e' to "ēéěè",
'i' to "īíǐì",
'o' to "ōóǒò",
'u' to "ūúǔù",
'ü' to "ǖǘǚǜ",
'A' to "ĀÁǍÀ",
'E' to "ĒÉĚÈ",
'I' to "ĪÍǏÌ",
'O' to "ŌÓǑÒ",
'U' to "ŪÚǓÙ",
'Ü' to "ǕǗǙǛ"
)
fun toPinyin(asciiPinyin: String) :String {
val pattern = Pattern.compile("([aeiouüvÜ]{1,3})(n?g?r?)([012345])")!!
val matcher = pattern.matcher(asciiPinyin)
val s = StringBuilder()
var start = 0
while (matcher.find(start)) {
s.append(asciiPinyin, start, matcher.start(1))
val tone = Integer.parseInt(matcher.group(3)!!) % 5
val r = matcher.group(1)!!.replace("v", "ü").replace("V", "Ü")
// for multple vowels, use first one if it is a/e/o, otherwise use second one
val pos = if (r.length >1 && r[0].toString() !in "aeoAEO") 1 else 0
if (tone != 0) s.append(r, 0, pos).append(pinyinToneMarks[r[pos]]!![tone - 1]).append(r, pos + 1, r.length)
else s.append(r)
s.append(matcher.group(2))
start = matcher.end(3)
}
if (start != asciiPinyin.length) s.append(asciiPinyin, start, asciiPinyin.length)
return s.toString()
}
fun test() = print(toPinyin("Ni3 hao3 ma0?"))