Replace double quotes by quotation marks

后端 未结 3 854
臣服心动
臣服心动 2021-02-19 06:15

I am looking for a way to replace the quotes with “corrected” quotations marks in an user input.

The idea

Here is a snippet briefly showing the

相关标签:
3条回答
  • 2021-02-19 06:31

    It is working for many of the cases, at the exception of when the "word" is at the very beginning or the very end of a sentence or a line.

    To solve that problem, you can use an alternation of a beginning/end of line assertion and the space, capture that, and use it in the replacement:

    this.value = this.value.replace(/(^| )"/g, '$1“');
    this.value = this.value.replace(/"($| )/g, '”$1');
    

    The alternation is ^| / $|. The capture group will be "" if it matched the assertion, or " " if it matched the sapce.

    $('#myInput').on("keyup", function(e) {
      this.value = this.value.replace(/'/g, '’');
      // The below doesn't work when there's no space before or after.
      this.value = this.value.replace(/(^| )"/g, '$1“');
      this.value = this.value.replace(/"($| )/g, '”$1');
    });
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
    <textarea id="myInput"></textarea>

    However, you've said you want to avoid "escaping" characters on user input. I'm not sure where you're planning to use it, but something like the above is almost never the approach to use to a problem with that sort of description.

    0 讨论(0)
  • 2021-02-19 06:35

    I got a solution that finally fits all my needs.
    I admit it is a lot more complicated than T.J.'s one, which can be perfect for simple cases.

    Remember, my main problem was the impossilibity to use \b because of the accented characters.
    I was able to get rid of that issue by using the solution from this topic:
    Remove accents/diacritics in a string in JavaScript

    After that, I used a modified function highly inspired from the answer here…
    How do I replace a character at a particular index in JavaScript?

    … and had a very hard time, playing a lot with RegEx to finally get to that solution:

    var str_orig = `· I'm "happy" ! Ça y est, j'ai "osé", et mon "âme sœur" était au rendez-vous…
    · The sign says: "Some text "some text" some text." and "Note the space here !"
    ⋅ "Inc"or"rect" quo"tes should " not be replaced.
    · I said: "If it works on 'singles' too, I'd love it even more!"
    word1" word2"
    word1 word2"
    "word1 word2
    "word1" word2
    "word1" word2"
    "word1 word2"`;
    
    // Thanks, exactly what I needed!
    var str_norm = str_orig.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
    
    // Thanks for inspiration
    String.prototype.replaceQuoteAt = function(index, shift) {
      const replacers = "“‘”’";
      var offset = 1 * (this[index] == "'") + 2 * (shift);
      return this.substr(0, index) + replacers[offset] + this.substr(index + 1);
    }
    
    // Opening quote: not after a boundary, not before a space or at the end
    var re_start = /(?!\b)["'](?!(\s|$))/gi;
    while ((match = re_start.exec(str_norm)) != null) {
      str_orig = str_orig.replaceQuoteAt(match.index, false);
    }
    
    // Closing quote: not at the beginning or after a space, not before a boundary
    var re_end = /(?<!(^|\s))["'](?!\b)/gi;
    while ((match = re_end.exec(str_norm)) != null) {
      str_orig = str_orig.replaceQuoteAt(match.index, true);
    }
    
    console.log("Corrected: \n", str_orig);

    And below is a snippet of a working example with a textarea.
    I've just created a function of the code of the first snippet, and I'm using a substring around the caret position to filter the calling of the function (that avoids calling it on every character input):

    String.prototype.replaceQuoteAt = function(index, offset) {
      const replacers = "“‘”’";
      var i = 2 * (offset) + 1 * (this[index] == "'");
      return this.substr(0, index) + replacers[i] + this.substr(index + 1);
    }
    
    function replaceQuotes(str) {
      var str_norm = str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
      var re_quote_start = /(?!\b)["'](?!(\s|$))/gi;
      while ((match = re_quote_start.exec(str_norm)) != null) {
        str = str.replaceQuoteAt(match.index, false);
      }
      var re_quote_end = /(?<!(^|\s))["'](?!\b)./gi;
      while ((match = re_quote_end.exec(str_norm)) != null) {
        str = str.replaceQuoteAt(match.index, true);
      }
      return str;
    }
    
    var pasted = 0;
    document.getElementById("myInput").onpaste = function(e) {
      pasted = 1;
    }
    
    document.getElementById("myInput").oninput = function(e) {
      var caretPos = this.selectionStart; // Gets caret position
      var chars = this.value.substring(caretPos - 2, caretPos + 1); // Gets 2 chars before caret (just typed and the one before), and 1 char just after
      if (pasted || chars.includes(`"`) || chars.includes(`'`)) { // Filters the calling of the function
        this.value = replaceQuotes(this.value); // Calls the function
        if (pasted) {
          pasted = 0;
        } else {
          this.setSelectionRange(caretPos, caretPos); // Restores caret position
        }
      }
    }
    #myInput {
      width: 90%;
      height: 100px;
    }
    <textarea id="myInput"></textarea>

    It seems to work with all I can imagine right now.
    The function correctly replaces the quotes when:
    ⋅ typing regularly,
    ⋅ adding quotes after we typed the text,
    ⋅ pasting text.

    It replaces both the double and the singles quotes.

    Anyway, as I am not a RegEx expert at all, please feel free to comment if you notice a behaviour that may be unwanted, or a way to improve the expressions.

    0 讨论(0)
  • 2021-02-19 06:50

    So instead of following a regex replace approach, I would use a simple looping with a quotes balancing act. You assume the every single quote that appears will match with another one and when it does it will be replaced as pairs.

    Below is a test implementation for the same

    String.prototype.replaceAt=function(index, replacement) {
    return this.substr(0, index) + replacement+ this.substr(index + replacement.length);
    }
    
    tests  =[
    // [`I'm "happy"! J'ai enfin "osé". La rencontre de mon "âme-sœur" a "été" au rendez-vous…
    // and how it should look after correction:`, `I'm "happy"! J'ai enfin "osé". La rencontre de mon "âme-sœur" a "été" au rendez-vous…
    // and how it should look after correction:`],
    [`tarun" lalwani"`, `tarun” lalwani”`],
    [`tarun lalwani"`, `tarun lalwani”`],
    [`"tarun lalwani`,`“tarun lalwani`],
    [`"tarun" lalwani`,`“tarun” lalwani`],
    [`"tarun" lalwani"`,`“tarun” lalwani”`],
    [`"tarun lalwani"`, `“tarun lalwani”`]
    ]
    
    function isCharacterSeparator(value) {
    return /“, /.test(value)
    }
    
    for ([data, output] of tests) {
    let qt = "“”"
    let qtL = '“'
    let qtR = '”'
    let bal = 0
    let pattern = /["“”]/g
    let data_new = data
    while (match = pattern.exec(data)) {
        if (bal == 0) {
            if (match.index == 0) {
                data_new = data_new.replaceAt(match.index, qt[bal]);
                bal = 1
            } else {
                if (isCharacterSeparator(data_new[match.index-1])) {
                    data_new = data_new.replaceAt(match.index, qtL);
                } else {
                    data_new = data_new.replaceAt(match.index, qtR);
                }
            }
        } else {
            if (match.index == data.length - 1) {
                data_new = data_new.replaceAt(match.index, qtR);
            } else if (isCharacterSeparator(data_new[match.index-1])) {
                if (isCharacterSeparator(data_new[match.index+1])) {
                    //previous is separator as well as next one too
                    // "tarun " lalwani"
                    // take a call what needs to be done here?
    
                } else {
                    data_new = data_new.replaceAt(match.index, qtL);
                }
            } else {
                if (isCharacterSeparator(data_new[match.index+1])) {
                    data_new = data_new.replaceAt(match.index, qtL);
                } else {
                    data_new = data_new.replaceAt(match.index, qtR);
                }
            }
        }
    
    
    }
    
    console.log(data_new)
    if (data_new != output) {
      console.log(`Failed to parse '${data}' Actual='${data_new}' Expected='${output}'`)
    } ;
    }

    Update-1: 20-Apr-2018

    I have updated the function. There still may be some edge cases, but you should put everything in the test and run it and fix the ones that don't behave as expected

    0 讨论(0)
提交回复
热议问题