Any libraries to convert number Pinyin to Pinyin with tone markings?

后端 未结 3 953
北荒
北荒 2021-01-14 17:30

Just wondering if anyone knows of a class library that can convert Chinese Pinyin to ones with tones, such as nin2 hao3 ma to nín hǎo ma. It would be similar to this answer

3条回答
  •  梦毁少年i
    2021-01-14 18:07

    Here is my porting of @Greg-Hewgill python algorithm to C#. I haven't run into any issues so far.

    public static string ConvertNumericalPinYinToAccented(string input)
        {
            Dictionary PinyinToneMark = new Dictionary
            {
                {0, "aoeiuv\u00fc"},
                {1, "\u0101\u014d\u0113\u012b\u016b\u01d6\u01d6"},
                {2, "\u00e1\u00f3\u00e9\u00ed\u00fa\u01d8\u01d8"},
                {3, "\u01ce\u01d2\u011b\u01d0\u01d4\u01da\u01da"},
                {4, "\u00e0\u00f2\u00e8\u00ec\u00f9\u01dc\u01dc"}
            };
    
            string[] words = input.Split(' ');
            string accented = "";
            string t = "";
            foreach (string pinyin in words)
            {
                foreach (char c in pinyin)
                {
                    if (c >= 'a' && c <= 'z')
                    {
                        t += c;
                    }
                    else if (c == ':')
                    {
                        if (t[t.Length - 1] == 'u')
                        {
                            t = t.Substring(0, t.Length - 2) + "\u00fc";
                        }
                    }
                    else
                    {
                        if (c >= '0' && c <= '5')
                        {
                            int tone = (int)Char.GetNumericValue(c) % 5;
    
                            if (tone != 0)
                            {
                                Match match = Regex.Match(t, "[aoeiuv\u00fc]+");
                                if (!match.Success)
                                {
                                    t += c;
                                }
                                else if (match.Groups[0].Length == 1)
                                {
                                    t = t.Substring(0, match.Groups[0].Index) +
                                        PinyinToneMark[tone][PinyinToneMark[0].IndexOf(match.Groups[0].Value[0])]
                                        + t.Substring(match.Groups[0].Index + match.Groups[0].Length);
                                }
                                else
                                {
                                    if (t.Contains("a"))
                                    {
                                        t = t.Replace("a", PinyinToneMark[tone][0].ToString());
                                    }
                                    else if (t.Contains("o"))
                                    {
                                        t = t.Replace("o", PinyinToneMark[tone][1].ToString());
                                    }
                                    else if (t.Contains("e"))
                                    {
                                        t = t.Replace("e", PinyinToneMark[tone][2].ToString());
                                    }
                                    else if (t.Contains("ui"))
                                    {
                                        t = t.Replace("i", PinyinToneMark[tone][3].ToString());
                                    }
                                    else if (t.Contains("iu"))
                                    {
                                        t = t.Replace("u", PinyinToneMark[tone][4].ToString());
                                    }
                                    else
                                    {
                                        t += "!";
                                    }
                                }
                            }
                        }
                        accented += t;
                        t = "";
                    }
                }
                accented += t + " ";
            }
            accented = accented.TrimEnd();
            return accented;
        }
    

提交回复
热议问题