I\'m trying to convert some strings that are in French Canadian and basically, I\'d like to be able to take out the French accent marks in the letters while keeping the lett
I needed something that converts all major unicode characters and the voted answer leaved a few out so I've created a version of CodeIgniter's convert_accented_characters($str)
into C# that is easily customisable:
using System;
using System.Text;
using System.Collections.Generic;
public static class Strings
{
static Dictionary foreign_characters = new Dictionary
{
{ "äæǽ", "ae" },
{ "öœ", "oe" },
{ "ü", "ue" },
{ "Ä", "Ae" },
{ "Ü", "Ue" },
{ "Ö", "Oe" },
{ "ÀÁÂÃÄÅǺĀĂĄǍΑΆẢẠẦẪẨẬẰẮẴẲẶА", "A" },
{ "àáâãåǻāăąǎªαάảạầấẫẩậằắẵẳặа", "a" },
{ "Б", "B" },
{ "б", "b" },
{ "ÇĆĈĊČ", "C" },
{ "çćĉċč", "c" },
{ "Д", "D" },
{ "д", "d" },
{ "ÐĎĐΔ", "Dj" },
{ "ðďđδ", "dj" },
{ "ÈÉÊËĒĔĖĘĚΕΈẼẺẸỀẾỄỂỆЕЭ", "E" },
{ "èéêëēĕėęěέεẽẻẹềếễểệеэ", "e" },
{ "Ф", "F" },
{ "ф", "f" },
{ "ĜĞĠĢΓГҐ", "G" },
{ "ĝğġģγгґ", "g" },
{ "ĤĦ", "H" },
{ "ĥħ", "h" },
{ "ÌÍÎÏĨĪĬǏĮİΗΉΊΙΪỈỊИЫ", "I" },
{ "ìíîïĩīĭǐįıηήίιϊỉịиыї", "i" },
{ "Ĵ", "J" },
{ "ĵ", "j" },
{ "ĶΚК", "K" },
{ "ķκк", "k" },
{ "ĹĻĽĿŁΛЛ", "L" },
{ "ĺļľŀłλл", "l" },
{ "М", "M" },
{ "м", "m" },
{ "ÑŃŅŇΝН", "N" },
{ "ñńņňʼnνн", "n" },
{ "ÒÓÔÕŌŎǑŐƠØǾΟΌΩΏỎỌỒỐỖỔỘỜỚỠỞỢО", "O" },
{ "òóôõōŏǒőơøǿºοόωώỏọồốỗổộờớỡởợо", "o" },
{ "П", "P" },
{ "п", "p" },
{ "ŔŖŘΡР", "R" },
{ "ŕŗřρр", "r" },
{ "ŚŜŞȘŠΣС", "S" },
{ "śŝşșšſσςс", "s" },
{ "ȚŢŤŦτТ", "T" },
{ "țţťŧт", "t" },
{ "ÙÚÛŨŪŬŮŰŲƯǓǕǗǙǛŨỦỤỪỨỮỬỰУ", "U" },
{ "ùúûũūŭůűųưǔǖǘǚǜυύϋủụừứữửựу", "u" },
{ "ÝŸŶΥΎΫỲỸỶỴЙ", "Y" },
{ "ýÿŷỳỹỷỵй", "y" },
{ "В", "V" },
{ "в", "v" },
{ "Ŵ", "W" },
{ "ŵ", "w" },
{ "ŹŻŽΖЗ", "Z" },
{ "źżžζз", "z" },
{ "ÆǼ", "AE" },
{ "ß", "ss" },
{ "IJ", "IJ" },
{ "ij", "ij" },
{ "Œ", "OE" },
{ "ƒ", "f" },
{ "ξ", "ks" },
{ "π", "p" },
{ "β", "v" },
{ "μ", "m" },
{ "ψ", "ps" },
{ "Ё", "Yo" },
{ "ё", "yo" },
{ "Є", "Ye" },
{ "є", "ye" },
{ "Ї", "Yi" },
{ "Ж", "Zh" },
{ "ж", "zh" },
{ "Х", "Kh" },
{ "х", "kh" },
{ "Ц", "Ts" },
{ "ц", "ts" },
{ "Ч", "Ch" },
{ "ч", "ch" },
{ "Ш", "Sh" },
{ "ш", "sh" },
{ "Щ", "Shch" },
{ "щ", "shch" },
{ "ЪъЬь", "" },
{ "Ю", "Yu" },
{ "ю", "yu" },
{ "Я", "Ya" },
{ "я", "ya" },
};
public static char RemoveDiacritics(this char c){
foreach(KeyValuePair entry in foreign_characters)
{
if(entry.Key.IndexOf (c) != -1)
{
return entry.Value[0];
}
}
return c;
}
public static string RemoveDiacritics(this string s)
{
//StringBuilder sb = new StringBuilder ();
string text = "";
foreach (char c in s)
{
int len = text.Length;
foreach(KeyValuePair entry in foreign_characters)
{
if(entry.Key.IndexOf (c) != -1)
{
text += entry.Value;
break;
}
}
if (len == text.Length) {
text += c;
}
}
return text;
}
}
Usage
// for strings
"crème brûlée".RemoveDiacritics (); // creme brulee
// for chars
"Ã"[0].RemoveDiacritics (); // A