How to split a string, but also keep the delimiters?

前端 未结 23 2318
我在风中等你
我在风中等你 2020-11-21 06:32

I have a multiline string which is delimited by a set of different delimiters:

(Text1)(DelimiterA)(Text2)(DelimiterC)(Text3)(DelimiterB)(Text4)
23条回答
  •  迷失自我
    2020-11-21 06:47

    import java.util.regex.*;
    import java.util.LinkedList;
    
    public class Splitter {
        private static final Pattern DEFAULT_PATTERN = Pattern.compile("\\s+");
    
        private Pattern pattern;
        private boolean keep_delimiters;
    
        public Splitter(Pattern pattern, boolean keep_delimiters) {
            this.pattern = pattern;
            this.keep_delimiters = keep_delimiters;
        }
        public Splitter(String pattern, boolean keep_delimiters) {
            this(Pattern.compile(pattern==null?"":pattern), keep_delimiters);
        }
        public Splitter(Pattern pattern) { this(pattern, true); }
        public Splitter(String pattern) { this(pattern, true); }
        public Splitter(boolean keep_delimiters) { this(DEFAULT_PATTERN, keep_delimiters); }
        public Splitter() { this(DEFAULT_PATTERN); }
    
        public String[] split(String text) {
            if (text == null) {
                text = "";
            }
    
            int last_match = 0;
            LinkedList splitted = new LinkedList();
    
            Matcher m = this.pattern.matcher(text);
    
            while (m.find()) {
    
                splitted.add(text.substring(last_match,m.start()));
    
                if (this.keep_delimiters) {
                    splitted.add(m.group());
                }
    
                last_match = m.end();
            }
    
            splitted.add(text.substring(last_match));
    
            return splitted.toArray(new String[splitted.size()]);
        }
    
        public static void main(String[] argv) {
            if (argv.length != 2) {
                System.err.println("Syntax: java Splitter  ");
                return;
            }
    
            Pattern pattern = null;
            try {
                pattern = Pattern.compile(argv[0]);
            }
            catch (PatternSyntaxException e) {
                System.err.println(e);
                return;
            }
    
            Splitter splitter = new Splitter(pattern);
    
            String text = argv[1];
            int counter = 1;
            for (String part : splitter.split(text)) {
                System.out.printf("Part %d: \"%s\"\n", counter++, part);
            }
        }
    }
    
    /*
        Example:
        > java Splitter "\W+" "Hello World!"
        Part 1: "Hello"
        Part 2: " "
        Part 3: "World"
        Part 4: "!"
        Part 5: ""
    */
    

    I don't really like the other way, where you get an empty element in front and back. A delimiter is usually not at the beginning or at the end of the string, thus you most often end up wasting two good array slots.

    Edit: Fixed limit cases. Commented source with test cases can be found here: http://snippets.dzone.com/posts/show/6453

提交回复
热议问题