How do I remove accentuated characters from a string? Especially in IE6, I had something like this:
accentsTidy = function(s){
var r=s.toLowerCase();
Here's a very simple solution without too much code using a very simple map of diacritics that includes some or all that map to ascii equivalents containing more than one character, i.e. Æ => AE, ffi => ffi, etc... Also included some very basic functional tests
var diacriticsMap = {
'\u00C0': 'A', // À => A
'\u00C1': 'A', // Á => A
'\u00C2': 'A', // Â => A
'\u00C3': 'A', // Ã => A
'\u00C4': 'A', // Ä => A
'\u00C5': 'A', // Å => A
'\u00C6': 'AE', // Æ => AE
'\u00C7': 'C', // Ç => C
'\u00C8': 'E', // È => E
'\u00C9': 'E', // É => E
'\u00CA': 'E', // Ê => E
'\u00CB': 'E', // Ë => E
'\u00CC': 'I', // Ì => I
'\u00CD': 'I', // Í => I
'\u00CE': 'I', // Î => I
'\u00CF': 'I', // Ï => I
'\u0132': 'IJ', // IJ => IJ
'\u00D0': 'D', // Ð => D
'\u00D1': 'N', // Ñ => N
'\u00D2': 'O', // Ò => O
'\u00D3': 'O', // Ó => O
'\u00D4': 'O', // Ô => O
'\u00D5': 'O', // Õ => O
'\u00D6': 'O', // Ö => O
'\u00D8': 'O', // Ø => O
'\u0152': 'OE', // Œ => OE
'\u00DE': 'TH', // Þ => TH
'\u00D9': 'U', // Ù => U
'\u00DA': 'U', // Ú => U
'\u00DB': 'U', // Û => U
'\u00DC': 'U', // Ü => U
'\u00DD': 'Y', // Ý => Y
'\u0178': 'Y', // Ÿ => Y
'\u00E0': 'a', // à => a
'\u00E1': 'a', // á => a
'\u00E2': 'a', // â => a
'\u00E3': 'a', // ã => a
'\u00E4': 'a', // ä => a
'\u00E5': 'a', // å => a
'\u00E6': 'ae', // æ => ae
'\u00E7': 'c', // ç => c
'\u00E8': 'e', // è => e
'\u00E9': 'e', // é => e
'\u00EA': 'e', // ê => e
'\u00EB': 'e', // ë => e
'\u00EC': 'i', // ì => i
'\u00ED': 'i', // í => i
'\u00EE': 'i', // î => i
'\u00EF': 'i', // ï => i
'\u0133': 'ij', // ij => ij
'\u00F0': 'd', // ð => d
'\u00F1': 'n', // ñ => n
'\u00F2': 'o', // ò => o
'\u00F3': 'o', // ó => o
'\u00F4': 'o', // ô => o
'\u00F5': 'o', // õ => o
'\u00F6': 'o', // ö => o
'\u00F8': 'o', // ø => o
'\u0153': 'oe', // œ => oe
'\u00DF': 'ss', // ß => ss
'\u00FE': 'th', // þ => th
'\u00F9': 'u', // ù => u
'\u00FA': 'u', // ú => u
'\u00FB': 'u', // û => u
'\u00FC': 'u', // ü => u
'\u00FD': 'y', // ý => y
'\u00FF': 'y', // ÿ => y
'\uFB00': 'ff', // ff => ff
'\uFB01': 'fi', // fi => fi
'\uFB02': 'fl', // fl => fl
'\uFB03': 'ffi', // ffi => ffi
'\uFB04': 'ffl', // ffl => ffl
'\uFB05': 'ft', // ſt => ft
'\uFB06': 'st' // st => st
};
function replaceDiacritics(str) {
var returnStr = '';
if(str) {
for (var i = 0; i < str.length; i++) {
if (diacriticsMap[str[i]]) {
returnStr += diacriticsMap[str[i]];
} else {
returnStr += str[i];
}
}
}
return returnStr;
}
function testStripDiacritics(input, expected) {
var coChar = replaceDiacritics(input);
console.log('The character passed in was ' + input);
console.log('The character that came out was ' + coChar);
console.log('The character expected was' + expected);
}
testStripDiacritics('À','A');
testStripDiacritics('A','A');
testStripDiacritics('Æ','AE');
testStripDiacritics('AE','AE');
testStripDiacritics('ÇhÀrlËšYŸZŽ','ChArlEsYYZZ');