I have a string containing a number in a non-ascii format e.g. unicode BENGALI DIGIT ONE (U+09E7) : \"১\"
How do I parse this as an integer in .NET?
Having found this question while looking for a similar answer, but not finding any of the answers quite matched what I needed, I wrote the following as it treats signs okay, and is faster to fail if given a very long string. It does not though, ignore any grouping characters like ,
, '
, ’
, though that could be easily added if someone wanted (I didn't):
public static int ParseIntInternational(this string str)
{
int result = 0;
bool neg = false;
bool seekingSign = true; // Accept sign at beginning only.
bool done = false; // Accept whitespace at beginning end or between sign and number.
// If we see whitespace once we've seen a number, we're "done" and
// further digits should fail.
for(int i = 0; i != str.Length; ++i)
{
if(char.IsWhiteSpace(str, i))
{
if(!seekingSign)
done = true;
}
else if(char.IsDigit(str, i))
{
if(done)
throw new FormatException();
seekingSign = false;
result = checked(result * 10 + (int)char.GetNumericValue(str, i));
}
else if(seekingSign)
switch(str[i])
{
case '﬩': case '+':
//do nothing: Sign unchanged.
break;
case '-': case '−':
neg = !neg;
break;
default:
throw new FormatException();
}
else throw new FormatException();
}
if(seekingSign)
throw new FormatException();
return neg ? -result : result;
}