今天在CodeProject首次发表文章:http://www.codeproject.com/KB/cs/NumberToFromWord.aspx,这里特别对中文数字转换做一说明。
作为一个可扩展的工具,它能够实现数字转中文,以及中文转数字。大体思路是为每个数字建立建立字典
Dictionary<int, List<string>> NumberNameDict,即
new Dictionary<int, List<string>>
{
{10, new List<string>{"十","拾"}},
{20, new List<string>{"廿"}},
{30, new List<string>{"卅"}},
{100, new List<string>{"百", "佰"}},
{1000, new List<string>{"千", "仟"}},
{10000, new List<string>{"万"}},
{100000000, new List<string>{"亿"}}
},
new List<string>() { "○一二三四五六七八九", "0123456789", "零壹贰叁肆伍陆柒捌玖" }
{
{10, new List<string>{"十","拾"}},
{20, new List<string>{"廿"}},
{30, new List<string>{"卅"}},
{100, new List<string>{"百", "佰"}},
{1000, new List<string>{"千", "仟"}},
{10000, new List<string>{"万"}},
{100000000, new List<string>{"亿"}}
},
new List<string>() { "○一二三四五六七八九", "0123456789", "零壹贰叁肆伍陆柒捌玖" }
存储所有对应的中文,其中List<string>仅对中文有效,存储0-9对应的中文字符,其中第一个字符串为缺省输出格式。类的初始化程序由此建立反向查询的字典Dictionary<string, int> WordNameDict:
foreach (KeyValuePair<int, List<string>> kvp in NumberNameDict)
{
foreach (string s in kvp.Value)
{
WordNameDict.Add(s, kvp.Key);
if (!AllWords.Contains(s))
AllWords.Add(s);
}
}
{
foreach (string s in kvp.Value)
{
WordNameDict.Add(s, kvp.Key);
if (!AllWords.Contains(s))
AllWords.Add(s);
}
}
另外对于进制数建立列表protected List<int> groupNums:
groupNums = new List<int>();
int max = NumberNameDict.Keys.Max();
for (int i = firstGroupNum; i <= max; i*= 10 )
{
if (NumberNameDict.ContainsKey(i))
{
//GroupNameDict.Add(i, NumberNameDict[i][0]);
groupNums.Add(i);
}
}
groupNums.Sort();
int max = NumberNameDict.Keys.Max();
for (int i = firstGroupNum; i <= max; i*= 10 )
{
if (NumberNameDict.ContainsKey(i))
{
//GroupNameDict.Add(i, NumberNameDict[i][0]);
groupNums.Add(i);
}
}
groupNums.Sort();
结果是{100000000, 10000, 1000, 100, 10},即"亿万千百十"对应的数字列表。
数字转中文
为实现数字转中文, 首先对于每个非零的字符进行转换,其位数按照groupNums解析:
List<string> sections = new List<string>();
int remained = number;
for (int i = 0; i < groupNums.Count; i ++ )
{
if (remained < groupNums[i])
continue;
int whole = remained / groupNums[i];
sections.Add(toWords(whole));
if (ToPlural != null && whole != 1)
sections.Add(ToPlural(NumberNameDict[groupNums[i]][0]));
else
sections.Add(NumberNameDict[groupNums[i]][0]);
remained -= whole * groupNums[i];
if (remained != 0 && NeedInsertAnd(number, remained))
//if(remained != 0 && remained < 100)
sections.Add(AndWords[0]);
}
if (remained != 0)
sections.Add(toWords(remained));
int remained = number;
for (int i = 0; i < groupNums.Count; i ++ )
{
if (remained < groupNums[i])
continue;
int whole = remained / groupNums[i];
sections.Add(toWords(whole));
if (ToPlural != null && whole != 1)
sections.Add(ToPlural(NumberNameDict[groupNums[i]][0]));
else
sections.Add(NumberNameDict[groupNums[i]][0]);
remained -= whole * groupNums[i];
if (remained != 0 && NeedInsertAnd(number, remained))
//if(remained != 0 && remained < 100)
sections.Add(AndWords[0]);
}
if (remained != 0)
sections.Add(toWords(remained));
最终结果对于中文就是简单地将以上分散的字符串连接起来:
StringBuilder sb = new StringBuilder();
for (int i = 0; i < sections.Count-1; i++)
{
sb.Append(sections[i] + Space);
}
sb.Append(sections.Last());
return sb.ToString();
for (int i = 0; i < sections.Count-1; i++)
{
sb.Append(sections[i] + Space);
}
sb.Append(sections.Last());
return sb.ToString();
为实现个性化的字符输出,可以调用以下接口,对特定字符进行定义。
/// </summary>
/// <param name="number">The number</param>
/// <param name="samples">
/// The characters shall be used to replace the default ones.
/// <example>
/// For example, 234002052 by default will be converted to "二亿三千四百万零二千零五十二",
/// but if the samples is set to "佰零壹贰叁肆拾", then the output will be "贰亿叁千肆佰万零贰千零五拾贰"
/// any characters appeared in the samples will replace the default ones, thus "贰" will replace any "二"s for digit of "2".
/// </example>
/// </param>
/// <returns>The converted string in words.</returns>
private string toWords(int number, string samples)
{
string result = ToWords(number);
foreach (char ch in samples)
{
if (allCharacters.Contains(ch) && WordNameDict.ContainsKey(ch.ToString()))
{
int digit = WordNameDict[ch.ToString()];
if (digit > 9 && !groupNums.Contains(digit))
continue;
string digitStr = NumberNameDict[digit][0];
if (digitStr.Length != 1 || digitStr[0] == ch)
continue;
result = result.Replace(digitStr[0], ch);
}
}
return result;
}
/// <param name="samples">
/// The characters shall be used to replace the default ones.
/// <example>
/// For example, 234002052 by default will be converted to "二亿三千四百万零二千零五十二",
/// but if the samples is set to "佰零壹贰叁肆拾", then the output will be "贰亿叁千肆佰万零贰千零五拾贰"
/// any characters appeared in the samples will replace the default ones, thus "贰" will replace any "二"s for digit of "2".
/// </example>
/// </param>
/// <returns>The converted string in words.</returns>
private string toWords(int number, string samples)
{
string result = ToWords(number);
foreach (char ch in samples)
{
if (allCharacters.Contains(ch) && WordNameDict.ContainsKey(ch.ToString()))
{
int digit = WordNameDict[ch.ToString()];
if (digit > 9 && !groupNums.Contains(digit))
continue;
string digitStr = NumberNameDict[digit][0];
if (digitStr.Length != 1 || digitStr[0] == ch)
continue;
result = result.Replace(digitStr[0], ch);
}
}
return result;
}
中文转换为数字
最大的挑战在于难以判断数字对应的位数,考虑到:
- 高位应当出现在低位之前;
- 如果高位出现在低位之后,意味着它们应通过相乘组合为一个更高位。
- 低位出现在高位之后,意味着对之前的数字做个确认。
/// <summary>
/// Function to get number from split words.
/// </summary>
/// <param name="sectors">Words for each digits of the number</param>
/// <returns>The number</returns>
protected int fromWords(string[] sectors)
{
int result = 0, current, lastGroup=1, temp, maxGroup=1;
Stack<int> stack = new Stack<int>();
foreach (string s in sectors)
{
if (AllWords.Contains(s))
{
if (AndWords.Contains(s))
continue;
if (WordNameDict.ContainsKey(s))
{
current = WordNameDict[s];
if (groupNums.Contains(current))
{
//The current group is higher than any existed group, thus the digits shall be increased: by Multiply!!!!
if(current>= maxGroup)
{
temp = stack.Pop();
while (stack.Count!= 0)
{
temp += stack.Pop();
};
temp *= current;
stack.Push(temp);
maxGroup *= current;
lastGroup = 1;
}
//The current group is higher than the last group, thus shall be add
else if (current > lastGroup)
{
temp = 0;
while(stack.Peek() < current)
{
temp += stack.Pop();
};
temp *= current;
stack.Push(temp);
lastGroup = current;
}
else
{
temp = stack.Pop();
temp *= current;
stack.Push(temp);
lastGroup = current;
}
}
else
{
stack.Push(current);
}
}
}
else
throw new Exception();
}
do
{
result += stack.Pop();
} while (stack.Count != 0);
return result;
}
To parse the string to get number, the tryParse() is recommended.
Collapse | Copy Code
/// <summary>
/// The main function to try to retrieve number from string of words.
/// </summary>
/// <param name="numberInWords">The original word string of number</param>
/// <param name="result">The converted number if successful</param>
/// <returns>TRUE if parse successfully.</returns>
protected virtual bool tryParse(string numberInWords, out int result)
{
result = -1;
try
{
string words = IsCaseSensitive ? numberInWords.ToLower() : numberInWords;
string[] sectors = split(words);
var contained = from s in sectors
where AllWords.Contains(s)
select s;
result = fromWords(contained.ToArray());
return true;
}
catch
{
return false;
}
/// Function to get number from split words.
/// </summary>
/// <param name="sectors">Words for each digits of the number</param>
/// <returns>The number</returns>
protected int fromWords(string[] sectors)
{
int result = 0, current, lastGroup=1, temp, maxGroup=1;
Stack<int> stack = new Stack<int>();
foreach (string s in sectors)
{
if (AllWords.Contains(s))
{
if (AndWords.Contains(s))
continue;
if (WordNameDict.ContainsKey(s))
{
current = WordNameDict[s];
if (groupNums.Contains(current))
{
//The current group is higher than any existed group, thus the digits shall be increased: by Multiply!!!!
if(current>= maxGroup)
{
temp = stack.Pop();
while (stack.Count!= 0)
{
temp += stack.Pop();
};
temp *= current;
stack.Push(temp);
maxGroup *= current;
lastGroup = 1;
}
//The current group is higher than the last group, thus shall be add
else if (current > lastGroup)
{
temp = 0;
while(stack.Peek() < current)
{
temp += stack.Pop();
};
temp *= current;
stack.Push(temp);
lastGroup = current;
}
else
{
temp = stack.Pop();
temp *= current;
stack.Push(temp);
lastGroup = current;
}
}
else
{
stack.Push(current);
}
}
}
else
throw new Exception();
}
do
{
result += stack.Pop();
} while (stack.Count != 0);
return result;
}
To parse the string to get number, the tryParse() is recommended.
Collapse | Copy Code
/// <summary>
/// The main function to try to retrieve number from string of words.
/// </summary>
/// <param name="numberInWords">The original word string of number</param>
/// <param name="result">The converted number if successful</param>
/// <returns>TRUE if parse successfully.</returns>
protected virtual bool tryParse(string numberInWords, out int result)
{
result = -1;
try
{
string words = IsCaseSensitive ? numberInWords.ToLower() : numberInWords;
string[] sectors = split(words);
var contained = from s in sectors
where AllWords.Contains(s)
select s;
result = fromWords(contained.ToArray());
return true;
}
catch
{
return false;
}
}
最终对数字与中英文的转换结果如下:
5: 五 ==> 5
20: 廿 ==> 20
21: 二十一 ==> 21
99: 九十九 ==> 99
100: 一百 ==> 100
102: 一百零二 ==> 102
131: 一百三十一 ==> 131
356: 三百五十六 ==> 356
909: 九百零九 ==> 909
1000: 一千 ==> 1000
1021: 一千零二十一 ==> 1021
2037: 二千零三十七 ==> 2037
12345: 一万二千三百四十五 ==> 12345
31027: 三万一千零二十七 ==> 31027
40002: 四万零二 ==> 40002
90010: 九万零一十 ==> 90010
100232300: 一亿零二十三万二千三百 ==> 100232300
234002052: 二亿三千四百万零二千零五十二 ==> 234002052
5: five ==> 5
20: twenty ==> 20
21: twenty-one ==> 21
99: ninety-nine ==> 99
100: one hundred ==> 100
102: one hundred and two ==> 102
131: one hundred and thirty-one ==> 131
356: three hundreds and fifty-six ==> 356
909: nine hundreds and nine ==> 909
1000: one thousand ==> 1000
1021: one thousand and twenty-one ==> 1021
2037: two thousands and thirty-seven ==> 2037
12345: twelve thousands three hundreds and forty-five ==> 12345
31027: thirty-one thousands and twenty-seven ==> 31027
40002: forty thousands and two ==> 40002
90010: ninety thousands and ten ==> 90010
100232300: one hundred millions two hundreds and thirty-two thousands three hundreds ==> 100232300
234002052: two hundreds and thirty-four millions two thousands and fifty-two ==> 234002052
572030013: 五亿七千贰佰零叁万零壹拾叁 ==> 572030013
234002052: 贰亿叁千肆佰万零贰千零五拾贰 ==> 234002052
5: Five ==> 5
20: Twenty ==> 20
21: Twenty One ==> 21
99: Ninety Nine ==> 99
100: One Hundred ==> 100
102: One Hundred And Two ==> 102
131: One Hundred And Thirty One ==> 131
356: Three Hundreds And Fifty Six ==> 356
909: Nine Hundreds And Nine ==> 909
1000: One Thousand ==> 1000
1021: One Thousand And Twenty One ==> 1021
2037: Two Thousands And Thirty Seven ==> 2037
12345: Twelve Thousands Three Hundreds And Forty Five ==> 12345
31027: Thirty One Thousands And Twenty Seven ==> 31027
40002: Forty Thousands And Two ==> 40002
90010: Ninety Thousands And Ten ==> 90010
100232300: One Hundred Millions Two Hundreds And Thirty Two Thousands Three Hundreds ==> 100232300
234002052: Two Hundreds And Thirty Four Millions Two Thousands And Fifty Two ==> 234002052
20: 廿 ==> 20
21: 二十一 ==> 21
99: 九十九 ==> 99
100: 一百 ==> 100
102: 一百零二 ==> 102
131: 一百三十一 ==> 131
356: 三百五十六 ==> 356
909: 九百零九 ==> 909
1000: 一千 ==> 1000
1021: 一千零二十一 ==> 1021
2037: 二千零三十七 ==> 2037
12345: 一万二千三百四十五 ==> 12345
31027: 三万一千零二十七 ==> 31027
40002: 四万零二 ==> 40002
90010: 九万零一十 ==> 90010
100232300: 一亿零二十三万二千三百 ==> 100232300
234002052: 二亿三千四百万零二千零五十二 ==> 234002052
5: five ==> 5
20: twenty ==> 20
21: twenty-one ==> 21
99: ninety-nine ==> 99
100: one hundred ==> 100
102: one hundred and two ==> 102
131: one hundred and thirty-one ==> 131
356: three hundreds and fifty-six ==> 356
909: nine hundreds and nine ==> 909
1000: one thousand ==> 1000
1021: one thousand and twenty-one ==> 1021
2037: two thousands and thirty-seven ==> 2037
12345: twelve thousands three hundreds and forty-five ==> 12345
31027: thirty-one thousands and twenty-seven ==> 31027
40002: forty thousands and two ==> 40002
90010: ninety thousands and ten ==> 90010
100232300: one hundred millions two hundreds and thirty-two thousands three hundreds ==> 100232300
234002052: two hundreds and thirty-four millions two thousands and fifty-two ==> 234002052
572030013: 五亿七千贰佰零叁万零壹拾叁 ==> 572030013
234002052: 贰亿叁千肆佰万零贰千零五拾贰 ==> 234002052
5: Five ==> 5
20: Twenty ==> 20
21: Twenty One ==> 21
99: Ninety Nine ==> 99
100: One Hundred ==> 100
102: One Hundred And Two ==> 102
131: One Hundred And Thirty One ==> 131
356: Three Hundreds And Fifty Six ==> 356
909: Nine Hundreds And Nine ==> 909
1000: One Thousand ==> 1000
1021: One Thousand And Twenty One ==> 1021
2037: Two Thousands And Thirty Seven ==> 2037
12345: Twelve Thousands Three Hundreds And Forty Five ==> 12345
31027: Thirty One Thousands And Twenty Seven ==> 31027
40002: Forty Thousands And Two ==> 40002
90010: Ninety Thousands And Ten ==> 90010
100232300: One Hundred Millions Two Hundreds And Thirty Two Thousands Three Hundreds ==> 100232300
234002052: Two Hundreds And Thirty Four Millions Two Thousands And Fifty Two ==> 234002052
第壹佰零八 张 = 108
代码如下:
/Files/cruisoring/NumberFromWordSource.zip
/Files/cruisoring/NumberWordTestSource.zip
来源:https://www.cnblogs.com/cruisoring/archive/2011/11/21/2257091.html