Algorithm to divide text into 3 evenly-sized groups

后端 未结 4 1475
小蘑菇
小蘑菇 2021-01-15 04:07

I\'m would like to create an algorithm that will divide text into 3-evenly sized groups (based on text length). Since this will be put to use for line-breaks, the order of

相关标签:
4条回答
  • 2021-01-15 04:14

    The answer from "someone" works fine. However, I had problems translating this into SWIFT code. Here is my translation for all those that are interested.

    import Foundation   
    
    class SplitText{
        typealias MinRag = (Float, Int) // meaning (cost for line (so far), word index)
    
        // from http://stackoverflow.com/questions/6426017/word-wrap-to-x-lines-instead-of-maximum-width-least-raggedness?lq=1
        class func splitText(text:String, numberOfLines:Int)-> [String]{
            //preparations
            var words = split(text, maxSplit:100, allowEmptySlices: false, isSeparator:{(s:Character)-> Bool in return s == " " || s == "\n"})
            var cumwordwidth =  [Int](); //cummulative word widths
            cumwordwidth.append(0);
            for word in words{
                cumwordwidth.append(cumwordwidth[cumwordwidth.count - 1] + count(word));
            }
            var totalwidth = cumwordwidth[cumwordwidth.count - 1] + count(words) - 1;
            var linewidth:Float = Float(totalwidth - (numberOfLines - 1)) / Float(numberOfLines)
    
            // cost function for one line for words i .. j
            var cost = { (i:Int,j:Int)-> Float in
                var actuallinewidth = max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i]);
                var remainingWidth: Float = linewidth - Float(actuallinewidth)
                return remainingWidth * remainingWidth
            }
    
            var best = [[MinRag]]()
            var tmp = [MinRag]();
            //ensure that data structure is initialised in a way that we start with adding the first word
            tmp.append((0, -1));
            for  word in words {
                tmp.append((Float.infinity , -1));
            }
            best.append(tmp);
            //now we can start. We simply calculate the cost for all possible lines
            for l in 1...numberOfLines {
                tmp = [MinRag]()
                for j in 0...words.count {
                    var min:MinRag = (best[l - 1][0].0 + cost(0, j), 0);
                    var k: Int
                    for k = 0; k < j + 1 ; ++k  {
                        var loc:Float = best[l - 1][k].0 + cost(k, j);
                        if (loc < min.0 || (loc == min.0 && k < min.1)) {
                            min=(loc, k);
                        }
                        println("l=\(l), j=\(j), k=\(k), min=\(min)")
                    }
                    tmp.append(min);
                }
                best.append(tmp);
            }
    
            //now build the answer based on above calculations
            var lines = [String]();
            var b = words.count;
            var o:Int
            for o = numberOfLines; o > 0 ; --o {
                var a = best[o][b].1;
                lines.append(" ".join(words[a...b-1]));
                b = a;
            }
            return reverse(lines);
        }
    }
    
    0 讨论(0)
  • 2021-01-15 04:23

    You can try the next simple heuristic for starters: Place to iterators in n/3 and 2n/3 and search for the closest space near each of them.

    0 讨论(0)
  • 2021-01-15 04:24

    The "minimum raggedness" dynamic program, also from the Wikipedia article on word wrap, can be adapted to your needs. Set LineWidth = len(text)/n - 1 and ignore the comment about infinite penalties for exceeding the line width; use the definition of c(i, j) as is with P = 2.


    Code. I took the liberty of modifying the DP always to return exactly n lines, at the cost of increasing the running time from O(#words ** 2) to O(#words ** 2 * n).
    
    def minragged(text, n=3):
        """
        >>> minragged('Just testing to see how this works.')
        ['Just testing', 'to see how', 'this works.']
        >>> minragged('Just testing to see how this works.', 10)
        ['', '', 'Just', 'testing', 'to', 'see', 'how', 'this', 'works.', '']
        """
        words = text.split()
        cumwordwidth = [0]
        # cumwordwidth[-1] is the last element
        for word in words:
            cumwordwidth.append(cumwordwidth[-1] + len(word))
        totalwidth = cumwordwidth[-1] + len(words) - 1  # len(words) - 1 spaces
        linewidth = float(totalwidth - (n - 1)) / float(n)  # n - 1 line breaks
        def cost(i, j):
            """
            cost of a line words[i], ..., words[j - 1] (words[i:j])
            """
            actuallinewidth = max(j - i - 1, 0) + (cumwordwidth[j] - cumwordwidth[i])
            return (linewidth - float(actuallinewidth)) ** 2
        # best[l][k][0] is the min total cost for words 0, ..., k - 1 on l lines
        # best[l][k][1] is a minimizing index for the start of the last line
        best = [[(0.0, None)] + [(float('inf'), None)] * len(words)]
        # xrange(upper) is the interval 0, 1, ..., upper - 1
        for l in xrange(1, n + 1):
            best.append([])
            for j in xrange(len(words) + 1):
                best[l].append(min((best[l - 1][k][0] + cost(k, j), k) for k in xrange(j + 1)))
        lines = []
        b = len(words)
        # xrange(upper, 0, -1) is the interval upper, upper - 1, ..., 1
        for l in xrange(n, 0, -1):
            a = best[l][b][1]
            lines.append(' '.join(words[a:b]))
            b = a
        lines.reverse()
        return lines
    
    if __name__ == '__main__':
        import doctest
        doctest.testmod()
    
    0 讨论(0)
  • 2021-01-15 04:35

    From http://en.wikipedia.org/wiki/Word_wrap:

    SpaceLeft := LineWidth
    for each Word in Text
        if Width(Word) > SpaceLeft
            insert line break before Word in Text
            SpaceLeft := LineWidth - Width(Word)
        else
            SpaceLeft := SpaceLeft - (Width(Word) + SpaceWidth)
    

    This method is used by many modern word processors, such as OpenOffice.org Writer and Microsoft Word. This algorithm is optimal in that it always puts the text on the minimum number of lines.

    0 讨论(0)
提交回复
热议问题