I am looking at an algorithm that can map between characters with diacritics (tilde, circumflex, caret, umlaut, caron) and their \"simple\" character.
For example:>
For future reference, here is a C# extension method that removes accents.
public static class StringExtensions
{
public static string RemoveDiacritics(this string str)
{
return new string(
str.Normalize(NormalizationForm.FormD)
.Where(c => CharUnicodeInfo.GetUnicodeCategory(c) !=
UnicodeCategory.NonSpacingMark)
.ToArray());
}
}
static void Main()
{
var input = "ŃŅŇ ÀÁÂÃÄÅ ŢŤţť Ĥĥ àáâãäå ńņň";
var output = input.RemoveDiacritics();
Debug.Assert(output == "NNN AAAAAA TTtt Hh aaaaaa nnn");
}