Javascript and regex: split string and keep the separator

后端 未结 7 2262
轮回少年
轮回少年 2020-11-22 05:25

I have a string:

var string = \"aaaaaa
† bbbb
‡ cccc\"

And I would like to split this string w

相关标签:
7条回答
  • 2020-11-22 05:53

    Use (positive) lookahead so that the regular expression asserts that the special character exists, but does not actually match it:

    string.split(/<br \/>(?=&#?[a-zA-Z0-9]+;)/g);
    

    See it in action:

    var string = "aaaaaa<br />&dagger; bbbb<br />&Dagger; cccc";
    console.log(string.split(/<br \/>(?=&#?[a-zA-Z0-9]+;)/g));

    0 讨论(0)
  • 2020-11-22 05:58

    I was having similar but slight different problem. Anyway, here are examples of three different scenarios for where to keep the deliminator.

    "1、2、3".split("、") == ["1", "2", "3"]
    "1、2、3".split(/(、)/g) == ["1", "、", "2", "、", "3"]
    "1、2、3".split(/(?=、)/g) == ["1", "、2", "、3"]
    "1、2、3".split(/(?!、)/g) == ["1、", "2、", "3"]
    "1、2、3".split(/(.*?、)/g) == ["", "1、", "", "2、", "3"]
    

    Warning: The fourth will only work to split single characters. ConnorsFan presents an alternative:

    // Split a path, but keep the slashes that follow directories
    var str = 'Animation/rawr/javascript.js';
    var tokens = str.match(/[^\/]+\/?|\//g);
    
    0 讨论(0)
  • 2020-11-22 06:01

    answered it here also JavaScript Split Regular Expression keep the delimiter

    use the (?=pattern) lookahead pattern in the regex example

    var string = '500x500-11*90~1+1';
    string = string.replace(/(?=[$-/:-?{-~!"^_`\[\]])/gi, ",");
    string = string.split(",");
    

    this will give you the following result.

    [ '500x500', '-11', '*90', '~1', '+1' ]
    

    Can also be directly split

    string = string.split(/(?=[$-/:-?{-~!"^_`\[\]])/gi);
    

    giving the same result

    [ '500x500', '-11', '*90', '~1', '+1' ]
    
    0 讨论(0)
  • 2020-11-22 06:08

    I've been using this:

    String.prototype.splitBy = function (delimiter) {
      var 
        delimiterPATTERN = '(' + delimiter + ')', 
        delimiterRE = new RegExp(delimiterPATTERN, 'g');
    
      return this.split(delimiterRE).reduce((chunks, item) => {
        if (item.match(delimiterRE)){
          chunks.push(item)
        } else {
          chunks[chunks.length - 1] += item
        };
        return chunks
      }, [])
    }
    

    Except that you shouldn't mess with String.prototype, so here's a function version:

    var splitBy = function (text, delimiter) {
      var 
        delimiterPATTERN = '(' + delimiter + ')', 
        delimiterRE = new RegExp(delimiterPATTERN, 'g');
    
      return text.split(delimiterRE).reduce(function(chunks, item){
        if (item.match(delimiterRE)){
          chunks.push(item)
        } else {
          chunks[chunks.length - 1] += item
        };
        return chunks
      }, [])
    }
    

    So you could do:

    var haystack = "aaaaaa<br />&dagger; bbbb<br />&Dagger; cccc"
    var needle =  '<br \/>&#?[a-zA-Z0-9]+;';
    var result = splitBy(haystack , needle)
    console.log( JSON.stringify( result, null, 2) )
    

    And you'll end up with:

    [
      "<br />&dagger; bbbb",
      "<br />&Dagger; cccc"
    ]
    
    0 讨论(0)
  • 2020-11-22 06:10

    If you wrap the delimiter in parantheses it will be part of the returned array.

    string.split(/(<br \/>&#?[a-zA-Z0-9]+);/g);
    // returns ["aaaaaa", "<br />&dagger;", "bbbb", "<br />&Dagger;", "cccc"]
    

    Depending on which part you want to keep change which subgroup you match

    string.split(/(<br \/>)&#?[a-zA-Z0-9]+;/g);
    // returns ["aaaaaa", "<br />", "bbbb", "<br />", "cccc"]
    

    You could improve the expression by ignoring the case of letters string.split(/()&#?[a-z0-9]+;/gi);

    And you can match for predefined groups like this: \d equals [0-9] and \w equals [a-zA-Z0-9_]. This means your expression could look like this.

    string.split(/<br \/>(&#?[a-z\d]+;)/gi);
    

    There is a good Regular Expression Reference on JavaScriptKit.

    0 讨论(0)
  • 2020-11-22 06:11

    An extension function splits string with substring or RegEx and the delimiter is putted according to second parameter ahead or behind.

        String.prototype.splitKeep = function (splitter, ahead) {
            var self = this;
            var result = [];
            if (splitter != '') {
                var matches = [];
                // Getting mached value and its index
                var replaceName = splitter instanceof RegExp ? "replace" : "replaceAll";
                var r = self[replaceName](splitter, function (m, i, e) {
                    matches.push({ value: m, index: i });
                    return getSubst(m);
                });
                // Finds split substrings
                var lastIndex = 0;
                for (var i = 0; i < matches.length; i++) {
                    var m = matches[i];
                    var nextIndex = ahead == true ? m.index : m.index + m.value.length;
                    if (nextIndex != lastIndex) {
                        var part = self.substring(lastIndex, nextIndex);
                        result.push(part);
                        lastIndex = nextIndex;
                    }
                };
                if (lastIndex < self.length) {
                    var part = self.substring(lastIndex, self.length);
                    result.push(part);
                };
                // Substitution of matched string
                function getSubst(value) {
                    var substChar = value[0] == '0' ? '1' : '0';
                    var subst = '';
                    for (var i = 0; i < value.length; i++) {
                        subst += substChar;
                    }
                    return subst;
                };
            }
            else {
                result.add(self);
            };
            return result;
        };
    

    The test:

        test('splitKeep', function () {
            // String
            deepEqual("1231451".splitKeep('1'), ["1", "231", "451"]);
            deepEqual("123145".splitKeep('1', true), ["123", "145"]);
            deepEqual("1231451".splitKeep('1', true), ["123", "145", "1"]);
            deepEqual("hello man how are you!".splitKeep(' '), ["hello ", "man ", "how ", "are ", "you!"]);
            deepEqual("hello man how are you!".splitKeep(' ', true), ["hello", " man", " how", " are", " you!"]);
            // Regex
            deepEqual("mhellommhellommmhello".splitKeep(/m+/g), ["m", "hellomm", "hellommm", "hello"]);
            deepEqual("mhellommhellommmhello".splitKeep(/m+/g, true), ["mhello", "mmhello", "mmmhello"]);
        });
    
    0 讨论(0)
提交回复
热议问题