Remove accents/diacritics in a string in JavaScript

前端 未结 29 2526
轻奢々
轻奢々 2020-11-21 13:29

How do I remove accentuated characters from a string? Especially in IE6, I had something like this:

accentsTidy = function(s){
    var r=s.toLowerCase();
           


        
29条回答
  •  心在旅途
    2020-11-21 14:07

    Here's a very simple solution without too much code using a very simple map of diacritics that includes some or all that map to ascii equivalents containing more than one character, i.e. Æ => AE, ffi => ffi, etc... Also included some very basic functional tests

    var diacriticsMap = {
      '\u00C0': 'A',  // À => A
      '\u00C1': 'A',   // Á => A
      '\u00C2': 'A',   // Â => A
      '\u00C3': 'A',   // Ã => A
      '\u00C4': 'A',   // Ä => A
      '\u00C5': 'A',   // Å => A
      '\u00C6': 'AE', // Æ => AE
      '\u00C7': 'C',   // Ç => C
      '\u00C8': 'E',   // È => E
      '\u00C9': 'E',   // É => E
      '\u00CA': 'E',   // Ê => E
      '\u00CB': 'E',   // Ë => E
      '\u00CC': 'I',   // Ì => I
      '\u00CD': 'I',   // Í => I
      '\u00CE': 'I',   // Î => I
      '\u00CF': 'I',   // Ï => I
      '\u0132': 'IJ', // IJ => IJ
      '\u00D0': 'D',   // Ð => D
      '\u00D1': 'N',   // Ñ => N
      '\u00D2': 'O',   // Ò => O
      '\u00D3': 'O',   // Ó => O
      '\u00D4': 'O',   // Ô => O
      '\u00D5': 'O',   // Õ => O
      '\u00D6': 'O',   // Ö => O
      '\u00D8': 'O',   // Ø => O
      '\u0152': 'OE', // Œ => OE
      '\u00DE': 'TH', // Þ => TH
      '\u00D9': 'U',   // Ù => U
      '\u00DA': 'U',   // Ú => U
      '\u00DB': 'U',   // Û => U
      '\u00DC': 'U',   // Ü => U
      '\u00DD': 'Y',   // Ý => Y
      '\u0178': 'Y',   // Ÿ => Y
      '\u00E0': 'a',   // à => a
      '\u00E1': 'a',   // á => a
      '\u00E2': 'a',   // â => a
      '\u00E3': 'a',   // ã => a
      '\u00E4': 'a',   // ä => a
      '\u00E5': 'a',   // å => a
      '\u00E6': 'ae', // æ => ae
      '\u00E7': 'c',   // ç => c
      '\u00E8': 'e',   // è => e
      '\u00E9': 'e',   // é => e
      '\u00EA': 'e',   // ê => e
      '\u00EB': 'e',   // ë => e
      '\u00EC': 'i',   // ì => i
      '\u00ED': 'i',   // í => i
      '\u00EE': 'i',   // î => i
      '\u00EF': 'i',   // ï => i
      '\u0133': 'ij', // ij => ij
      '\u00F0': 'd',   // ð => d
      '\u00F1': 'n',   // ñ => n
      '\u00F2': 'o',   // ò => o
      '\u00F3': 'o',   // ó => o
      '\u00F4': 'o',   // ô => o
      '\u00F5': 'o',   // õ => o
      '\u00F6': 'o',   // ö => o
      '\u00F8': 'o',   // ø => o
      '\u0153': 'oe', // œ => oe
      '\u00DF': 'ss', // ß => ss
      '\u00FE': 'th', // þ => th
      '\u00F9': 'u',   // ù => u
      '\u00FA': 'u',   // ú => u
      '\u00FB': 'u',   // û => u
      '\u00FC': 'u',   // ü => u
      '\u00FD': 'y',   // ý => y
      '\u00FF': 'y',   // ÿ => y
      '\uFB00': 'ff', // ff => ff
      '\uFB01': 'fi',   // fi => fi
      '\uFB02': 'fl', // fl => fl
      '\uFB03': 'ffi',  // ffi => ffi
      '\uFB04': 'ffl',  // ffl => ffl
      '\uFB05': 'ft', // ſt => ft
      '\uFB06': 'st'  // st => st
    };
    
    function replaceDiacritics(str) {
      var returnStr = '';
      if(str) {
        for (var i = 0; i < str.length; i++) {
          if (diacriticsMap[str[i]]) {
            returnStr += diacriticsMap[str[i]];
          } else {
            returnStr += str[i];
          }
        }
      }
      return returnStr;
    }
    
    function testStripDiacritics(input, expected) {
      var coChar = replaceDiacritics(input);
      console.log('The character passed in was ' + input);
      console.log('The character that came out was ' + coChar);
      console.log('The character expected was' + expected);
    }
    
    testStripDiacritics('À','A');
    testStripDiacritics('A','A');
    testStripDiacritics('Æ','AE');
    testStripDiacritics('AE','AE');
    testStripDiacritics('ÇhÀrlËšYŸZŽ','ChArlEsYYZZ');
    

提交回复
热议问题