Remove accents/diacritics in a string in JavaScript

前端 未结 29 2530
轻奢々
轻奢々 2020-11-21 13:29

How do I remove accentuated characters from a string? Especially in IE6, I had something like this:

accentsTidy = function(s){
    var r=s.toLowerCase();
           


        
相关标签:
29条回答
  • 2020-11-21 13:56

    The format for new RegExp is

    RegExp(something, 'modifiers');
    

    So you would want

    accentsTidy = function(s){
                            var r=s.toLowerCase();
                            r = r.replace(new RegExp("\\s", 'g'),"");
                            r = r.replace(new RegExp("[àáâãäå]", 'g'),"a");
                            r = r.replace(new RegExp("æ", 'g'),"ae");
                            r = r.replace(new RegExp("ç", 'g'),"c");
                            r = r.replace(new RegExp("[èéêë]", 'g'),"e");
                            r = r.replace(new RegExp("[ìíîï]", 'g'),"i");
                            r = r.replace(new RegExp("ñ", 'g'),"n");                            
                            r = r.replace(new RegExp("[òóôõö]", 'g'),"o");
                            r = r.replace(new RegExp("œ", 'g'),"oe");
                            r = r.replace(new RegExp("[ùúûü]", 'g'),"u");
                            r = r.replace(new RegExp("[ýÿ]", 'g'),"y");
                            r = r.replace(new RegExp("\\W", 'g'),"");
                            return r;
                    };
    
    0 讨论(0)
  • 2020-11-21 13:56

    You can use the _.deburr() method from the Lodash library.

    It's available as a stand-alone NPM package lodash.deburr, or as part of the lodash package.

    const myStringWithAccent = 'Mon café est plein de caféïne';
    const myStringWithoutAccent = _.deburr( myStringWithAccent, );
    

    The result will be : "Mon cafe est plein de cafeine"

    0 讨论(0)
  • 2020-11-21 13:56

    There's a lot out there, but I think this one is simple and good enough:

     function remove_accents(strAccents) {
        var strAccents = strAccents.split('');
        var strAccentsOut = new Array();
        var strAccentsLen = strAccents.length;
        var accents =    "ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž";
        var accentsOut = "AAAAAAaaaaaaOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
        for (var y = 0; y < strAccentsLen; y++) {
            if (accents.indexOf(strAccents[y]) != -1) {
                strAccentsOut[y] = accentsOut.substr(accents.indexOf(strAccents[y]), 1);
            } else
                strAccentsOut[y] = strAccents[y];
        }
        strAccentsOut = strAccentsOut.join('');
    
        return strAccentsOut;
    }
    

    If you also want to remove special characters and transform spaces and hyphens in underscores, do this:

    string = remove_accents(string);
    string = string.replace(/[^a-z0-9\s]/gi, '').replace(/[-\s]/g, '_');
    
    0 讨论(0)
  • 2020-11-21 13:57
    $scope.legal_name = $sanitize($scope.legal_name);
        console.log("Name before function...",$scope.legal_name);
    
        var str = "";
        for(var i=0; i < $scope.legal_name.length; i++) {
            var charName = $scope.legal_name.charAt(i);
            if((charName == '&') && ($scope.legal_name.charAt(i + 1) == '#')){
                var count = 0;
                var subString = "";
                while(true) {
                    if(($scope.legal_name.charCodeAt(i + 2 + count) > 47) && ($scope.legal_name.charCodeAt(i + 1 + count) < 58 )) {
                        subString = subString + $scope.legal_name.charAt(i + 2 + count);
                        count++;
                    } else {
                        if(subString.length > 0) {
                            var value = parseInt(subString);
                            str = str + String.fromCharCode(value);
                            i = i + 1 + count; 
                            break;
                        }
                    }
                }               
            } else {
                str = str + charName;
            }
        }
        $scope.legal_name = str;
        console.log("Name After function...",str);
    
    0 讨论(0)
  • 2020-11-21 13:57

    All the above isn't working with decomposed character as used on Mac OS. In order to remove diacritics in that case it is more simple :

    r = r.replace(new RegExp(/[\u0300-\u036f]/g),"")
    

    see comment from Olivier Miakinen on : https://groups.google.com/d/msg/fr.comp.lang.regexp/6IGJTbedGTM/G0sB2kAsR34J (posted in french)

    0 讨论(0)
  • 2020-11-21 13:58

    normalize-diacritics is very useful

        const { normalize } = require('normalize-diacritics');
         
        /** Assuming top-level await is enabled... */
        await normalize('söme stüff with áccènts'); // 'some stuff with accents'
    
    0 讨论(0)
提交回复
热议问题