transliterating cyrillic to latin with javascript function

后端 未结 6 1459
情话喂你
情话喂你 2020-12-28 08:27

I made this function:

function transliterate(word){

    var answer = \"\";

    A = new Array();
    A[\"Ё\"]=\"YO\";A[\"Й\"]=\"I\";A[\"Ц\"]=\"TS\";A[\"У\"         


        
相关标签:
6条回答
  • 2020-12-28 08:36

    In my projects I am using this method of transliterating:

    var transliterate = function(text) {
    
        text = text
            .replace(/\u0401/g, 'YO')
            .replace(/\u0419/g, 'I')
            .replace(/\u0426/g, 'TS')
            .replace(/\u0423/g, 'U')
            .replace(/\u041A/g, 'K')
            .replace(/\u0415/g, 'E')
            .replace(/\u041D/g, 'N')
            .replace(/\u0413/g, 'G')
            .replace(/\u0428/g, 'SH')
            .replace(/\u0429/g, 'SCH')
            .replace(/\u0417/g, 'Z')
            .replace(/\u0425/g, 'H')
            .replace(/\u042A/g, '')
            .replace(/\u0451/g, 'yo')
            .replace(/\u0439/g, 'i')
            .replace(/\u0446/g, 'ts')
            .replace(/\u0443/g, 'u')
            .replace(/\u043A/g, 'k')
            .replace(/\u0435/g, 'e')
            .replace(/\u043D/g, 'n')
            .replace(/\u0433/g, 'g')
            .replace(/\u0448/g, 'sh')
            .replace(/\u0449/g, 'sch')
            .replace(/\u0437/g, 'z')
            .replace(/\u0445/g, 'h')
            .replace(/\u044A/g, "'")
            .replace(/\u0424/g, 'F')
            .replace(/\u042B/g, 'I')
            .replace(/\u0412/g, 'V')
            .replace(/\u0410/g, 'a')
            .replace(/\u041F/g, 'P')
            .replace(/\u0420/g, 'R')
            .replace(/\u041E/g, 'O')
            .replace(/\u041B/g, 'L')
            .replace(/\u0414/g, 'D')
            .replace(/\u0416/g, 'ZH')
            .replace(/\u042D/g, 'E')
            .replace(/\u0444/g, 'f')
            .replace(/\u044B/g, 'i')
            .replace(/\u0432/g, 'v')
            .replace(/\u0430/g, 'a')
            .replace(/\u043F/g, 'p')
            .replace(/\u0440/g, 'r')
            .replace(/\u043E/g, 'o')
            .replace(/\u043B/g, 'l')
            .replace(/\u0434/g, 'd')
            .replace(/\u0436/g, 'zh')
            .replace(/\u044D/g, 'e')
            .replace(/\u042F/g, 'Ya')
            .replace(/\u0427/g, 'CH')
            .replace(/\u0421/g, 'S')
            .replace(/\u041C/g, 'M')
            .replace(/\u0418/g, 'I')
            .replace(/\u0422/g, 'T')
            .replace(/\u042C/g, "'")
            .replace(/\u0411/g, 'B')
            .replace(/\u042E/g, 'YU')
            .replace(/\u044F/g, 'ya')
            .replace(/\u0447/g, 'ch')
            .replace(/\u0441/g, 's')
            .replace(/\u043C/g, 'm')
            .replace(/\u0438/g, 'i')
            .replace(/\u0442/g, 't')
            .replace(/\u044C/g, "'")
            .replace(/\u0431/g, 'b')
            .replace(/\u044E/g, 'yu');
    
        return text;
    };
    

    Run this sample to transliterate:

    transliterate('абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');
    

    I have replaced all russian letters with their unicode analogs (each letter begins with \u) to solve problems with encoding in Javascript file.

    To check the execution speed, i took the best answer to this question, and compared it with my example. My method appeared to be faster in several times (0.16 ms in Firebug :-).

    0 讨论(0)
  • 2020-12-28 08:44

    By combining recommendations of Bart Riemens and T.J. Crowder I came up with this code, that seams to do the trick nicely:

     function transliterate(word){
    
        var answer = "";
        var a = {}
    
        a["Ё"]="YO";a["Й"]="I";a["Ц"]="TS";a["У"]="U";a["К"]="K";a["Е"]="E";a["Н"]="N";a["Г"]="G";a["Ш"]="SH";a["Щ"]="SCH";a["З"]="Z";a["Х"]="H";a["Ъ"]="'";
        a["ё"]="yo";a["й"]="i";a["ц"]="ts";a["у"]="u";a["к"]="k";a["е"]="e";a["н"]="n";a["г"]="g";a["ш"]="sh";a["щ"]="sch";a["з"]="z";a["х"]="h";a["ъ"]="'";
        a["Ф"]="F";a["Ы"]="I";a["В"]="V";a["А"]="a";a["П"]="P";a["Р"]="R";a["О"]="O";a["Л"]="L";a["Д"]="D";a["Ж"]="ZH";a["Э"]="E";
        a["ф"]="f";a["ы"]="i";a["в"]="v";a["а"]="a";a["п"]="p";a["р"]="r";a["о"]="o";a["л"]="l";a["д"]="d";a["ж"]="zh";a["э"]="e";
        a["Я"]="Ya";a["Ч"]="CH";a["С"]="S";a["М"]="M";a["И"]="I";a["Т"]="T";a["Ь"]="'";a["Б"]="B";a["Ю"]="YU";
        a["я"]="ya";a["ч"]="ch";a["с"]="s";a["м"]="m";a["и"]="i";a["т"]="t";a["ь"]="'";a["б"]="b";a["ю"]="yu";
    
        for (i = 0; i < word.length; ++i){
    
            answer += a[word[i]] === undefined ? word[i] : a[word[i]];
        }   
        return answer;
    }
    

    Than you!

    0 讨论(0)
  • 2020-12-28 08:47

    Do not use an array for this task. Do not use for in to iterate a string. Do not check against the string "undefined". Do not return within the for loop.

    function transliterate(word) {
        var A = {};
        var result = '';
    
        A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
        A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
        A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
        A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
        A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
        A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";
    
        for(var i = 0; i < word.length; i++) {
            var c = word.charAt(i);
    
            result += A[c] || c;
        }
    
        return result;
    }
    

    Here is a jsFiddle demonstration.

    0 讨论(0)
  • 2020-12-28 08:50

    Your primary problem is that the return is in the wrong place. It's inside your loop, so it returns on the first iteration. Change it to:

    function transliterate(word){
    
        var answer = "";
    
        A = new Array();
        A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
        A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
        A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
        A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
        A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
        A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";
    
        for (i in word){
    
            if (A[word[i]] === 'undefined'){
                answer += word[i];
                }
            else {
                answer += A[word[i]];
                }
    
        }
        return answer; // <=== Was *above* the } on the previous line
    }
    

    Note that I've fixed the indentation. Consistent indentation helps you avoid these sorts of bugs.


    Note 1: There's nothing about your A object that uses the fact it's an Array. You're just using it as a map. In JavaScript, all objects are maps, so rather than A = new Array(); just use A = {};.

    Note 2: A and i are never declared in your function, so you're falling prey to The Horror of Implicit Globals. To fix it, declare them with var.

    Note 3: Neither using for..in to loop through the characters of a string, nor using [] to index into the string, is reliable across JavaScript engines. Instead, use for (i = 0; i < word.length; ++i) and then ch = word.charAt(i); to get the character at that position, then use ch in your code within the loop.

    Note 4: You can use the Curiously powerful || operator to shorten your code, e.g.:

    answer += A[ch] || ch;
    
    0 讨论(0)
  • 2020-12-28 08:50

    Russian symbols in js don't work at all at my system. I don't know why. So I use the next code for this:

    It will not only transliterate but replace all punctuation with '_' and lowercase everything.

    function translit(str){
    	var sp = '_'; 
    	var text = str.toLowerCase();
    	var transl = { 
    		'\u0430': 'a', '\u0431': 'b', '\u0432': 'v', '\u0433': 'g', '\u0434': 'd', '\u0435': 'e', '\u0451': 'e', '\u0436': 'zh',
    		'\u0437': 'z', '\u0438': 'i', '\u0439': 'j', '\u043a': 'k', '\u043b': 'l', '\u043c': 'm', '\u043d': 'n', '\u043e': 'o',
    		'\u043f': 'p', '\u0440': 'r', '\u0441': 's', '\u0442': 't', '\u0443': 'u', '\u0444': 'f', '\u0445': 'h', '\u0446': 'c', 
    		'\u0447': 'ch', '\u0448': 'sh', '\u0449': 'shch', '\u044a': '\'', '\u044b': 'y', '\u044c': '', '\u044d': 'e', '\u044e': 'yu',
    		'\u044f': 'ya',		
    		'\u00AB':'_', '\u00BB':'_', // «»
    		' ': sp, '_': sp, '`': sp, '~': sp, 
    		'!': sp, '@': sp, '#': sp, '$': sp,
    		'%': sp, '^': sp, '&': sp, '*': sp, '(': sp, ')': sp, '-': sp, '\=': sp,
    		'+': sp, '[': sp, ']': sp, '\\': sp, '|': sp, '/': sp, '.': sp, ',': sp,
    		'{': sp, '}': sp, '\'': sp, '"': sp, ';': sp, ':': sp, '?': sp, '<': sp,
    		'>': sp, '№': sp					
    	}
        var result = '';
    	var curent_sim = '';
        for(i=0; i < text.length; i++) {
    		if(transl[text[i]] != undefined) {			
    			if(curent_sim != transl[text[i]] || curent_sim != sp){
    				result += transl[text[i]];
    				curent_sim = transl[text[i]];				
    			}					
    		} else {
    			result += text[i];
    			curent_sim = text[i];
    		}		
        }
    	result = result.replace(/^_/, '').replace(/_$/, ''); // trim
    	return result
    }
    
    var result = translit('Привет Мир!');
    document.getElementById('alias').value = result;
    <html>
      <body>
        <input name="name" type="text" id="alias" />
      </body>
    </html>

    The code was originally taken here: http://ajaxs.ru/lesson/js/137-transliteracija_stroki_na_javascript.html and then refactored.

    0 讨论(0)
  • 2020-12-28 08:58

    Couple of things...

    1. Use undefined instead of 'undefined'
    2. Don't put the return in the loop
    3. Use hasOwnProperty to filter out functions and properties on the prototype
    4. Use [] instead of new Array()
    5. Use an {} instead of an []
    6. Use lower case variables instead of upper case. Uppercase is reserved for constructors

    Here is the code

    function transliterate(word){
        var answer = ""
          , a = {};
    
       a["Ё"]="YO";a["Й"]="I";a["Ц"]="TS";a["У"]="U";a["К"]="K";a["Е"]="E";a["Н"]="N";a["Г"]="G";a["Ш"]="SH";a["Щ"]="SCH";a["З"]="Z";a["Х"]="H";a["Ъ"]="'";
       a["ё"]="yo";a["й"]="i";a["ц"]="ts";a["у"]="u";a["к"]="k";a["е"]="e";a["н"]="n";a["г"]="g";a["ш"]="sh";a["щ"]="sch";a["з"]="z";a["х"]="h";a["ъ"]="'";
       a["Ф"]="F";a["Ы"]="I";a["В"]="V";a["А"]="a";a["П"]="P";a["Р"]="R";a["О"]="O";a["Л"]="L";a["Д"]="D";a["Ж"]="ZH";a["Э"]="E";
       a["ф"]="f";a["ы"]="i";a["в"]="v";a["а"]="a";a["п"]="p";a["р"]="r";a["о"]="o";a["л"]="l";a["д"]="d";a["ж"]="zh";a["э"]="e";
       a["Я"]="Ya";a["Ч"]="CH";a["С"]="S";a["М"]="M";a["И"]="I";a["Т"]="T";a["Ь"]="'";a["Б"]="B";a["Ю"]="YU";
       a["я"]="ya";a["ч"]="ch";a["с"]="s";a["м"]="m";a["и"]="i";a["т"]="t";a["ь"]="'";a["б"]="b";a["ю"]="yu";
    
       for (i in word){
         if (word.hasOwnProperty(i)) {
           if (a[word[i]] === undefined){
             answer += word[i];
           } else {
             answer += a[word[i]];
           }
         }
       }
       return answer;
    }
    

    More functional one is like this...

    var a = {"Ё":"YO","Й":"I","Ц":"TS","У":"U","К":"K","Е":"E","Н":"N","Г":"G","Ш":"SH","Щ":"SCH","З":"Z","Х":"H","Ъ":"'","ё":"yo","й":"i","ц":"ts","у":"u","к":"k","е":"e","н":"n","г":"g","ш":"sh","щ":"sch","з":"z","х":"h","ъ":"'","Ф":"F","Ы":"I","В":"V","А":"a","П":"P","Р":"R","О":"O","Л":"L","Д":"D","Ж":"ZH","Э":"E","ф":"f","ы":"i","в":"v","а":"a","п":"p","р":"r","о":"o","л":"l","д":"d","ж":"zh","э":"e","Я":"Ya","Ч":"CH","С":"S","М":"M","И":"I","Т":"T","Ь":"'","Б":"B","Ю":"YU","я":"ya","ч":"ch","с":"s","м":"m","и":"i","т":"t","ь":"'","б":"b","ю":"yu"};
    
    function transliterate(word){
      return word.split('').map(function (char) { 
        return a[char] || char; 
      }).join("");
    }
    
    0 讨论(0)
提交回复
热议问题