问题
I am rendering text atop a base image.
One core requirement is for the string to wrap to the next line(s) whenever the total width of characters exceeds the width of the base image. I accomplish this via the following correctly-working snippet:
base_width, base_height = base_img.size
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line_width = 0
line_count = 1
lines = []
string = ""
for c in text:
line_width += font.getsize(c)[0]
string+=str(c)
if line_width > base_width:
lines.append(string)
string = ""
line_width = 0
line_count += 1
if string:
lines.append(string)
The result lines
is a list of substrings gotten via breaking up the original string.
Now I need to improve this algorithm.
The problem is that it breaks the line mid-word. E.g. The string lorem ipsum
could end up as lines = ['lorem ip','sum']
. Instead, the ideal break-up for me is the much more human readable lines = ['lorem ','ipsum']
, or lines = ['lorem',' ipsum']
.
In other words, I want to break the lines along white spaces, and not mid-word. Can someone give me an illustrative example of how I can accomplish that? Can't seem to wrap my head around it.
回答1:
Here's an attempt to get your code working with minimal changes, and lots of debug output:
#!python3
#coding=utf-8
""" Line break demo 2 """
text = "lorem ipsum dolor sit amet blablah"
for wmax in [10,25,55,80,100,120]:
print(wmax)
base_width, base_height = (wmax,None)#base_img.size
#font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line_width = 0
line_count = 1
lines = []
string = ""
for c in text:
line_width += 5#font.getsize(c)[0]
string += c
if line_width > base_width:
print("text ", text)
print("string", string)
s = string.rsplit(" ", 1)
print("split ", s)
string = s[0]
lines.append(string)
try:
string = s[1]
line_width = len(string) * 5
except:
string = ""
line_width = 0
print("lines ", lines)
print("string", string)
line_count += 1
print()
if string:
lines.append(string)
print(lines)
print()
Output:
10
text lorem ipsum dolor sit amet blablah
string lor
split ['lor']
lines ['lor']
string
text lorem ipsum dolor sit amet blablah
string em
split ['em', '']
lines ['lor', 'em']
string
text lorem ipsum dolor sit amet blablah
string ips
split ['ips']
lines ['lor', 'em', 'ips']
string
text lorem ipsum dolor sit amet blablah
string um
split ['um', '']
lines ['lor', 'em', 'ips', 'um']
string
text lorem ipsum dolor sit amet blablah
string dol
split ['dol']
lines ['lor', 'em', 'ips', 'um', 'dol']
string
text lorem ipsum dolor sit amet blablah
string or
split ['or', '']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or']
string
text lorem ipsum dolor sit amet blablah
string sit
split ['sit']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit']
string
text lorem ipsum dolor sit amet blablah
string am
split ['', 'am']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '']
string am
text lorem ipsum dolor sit amet blablah
string ame
split ['ame']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame']
string
text lorem ipsum dolor sit amet blablah
string t b
split ['t', 'b']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't']
string b
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla']
string
text lorem ipsum dolor sit amet blablah
string bla
split ['bla']
lines ['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla
string
['lor', 'em', 'ips', 'um', 'dol', 'or', 'sit', '', 'ame', 't', 'bla', 'bla', 'h']
25
text lorem ipsum dolor sit amet blablah
string lorem
split ['lorem', '']
lines ['lorem']
string
text lorem ipsum dolor sit amet blablah
string ipsum
split ['ipsum', '']
lines ['lorem', 'ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor
split ['dolor', '']
lines ['lorem', 'ipsum', 'dolor']
string
text lorem ipsum dolor sit amet blablah
string sit am
split ['sit', 'am']
lines ['lorem', 'ipsum', 'dolor', 'sit']
string am
text lorem ipsum dolor sit amet blablah
string amet b
split ['amet', 'b']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet']
string b
text lorem ipsum dolor sit amet blablah
string blabla
split ['blabla']
lines ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla']
string
['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blabla', 'h']
55
text lorem ipsum dolor sit amet blablah
string lorem ipsum
split ['lorem ipsum', '']
lines ['lorem ipsum']
string
text lorem ipsum dolor sit amet blablah
string dolor sit am
split ['dolor sit', 'am']
lines ['lorem ipsum', 'dolor sit']
string am
text lorem ipsum dolor sit amet blablah
string amet blablah
split ['amet', 'blablah']
lines ['lorem ipsum', 'dolor sit', 'amet']
string blablah
['lorem ipsum', 'dolor sit', 'amet', 'blablah']
80
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor
split ['lorem ipsum', 'dolor']
lines ['lorem ipsum']
string dolor
text lorem ipsum dolor sit amet blablah
string dolor sit amet bl
split ['dolor sit amet', 'bl']
lines ['lorem ipsum', 'dolor sit amet']
string bl
['lorem ipsum', 'dolor sit amet', 'blablah']
100
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit
split ['lorem ipsum dolor', 'sit']
lines ['lorem ipsum dolor']
string sit
['lorem ipsum dolor', 'sit amet blablah']
120
text lorem ipsum dolor sit amet blablah
string lorem ipsum dolor sit ame
split ['lorem ipsum dolor sit', 'ame']
lines ['lorem ipsum dolor sit']
string ame
['lorem ipsum dolor sit', 'amet blablah']
回答2:
There is a python module textwrap specifically for this:
In [1]: import textwrap
In [2]: textwrap.wrap('x lorem ipsum', width=5)
Out[2]: ['x', 'lorem', 'ipsum']
edit:
I misunderstood the author's purpose. The problem is that the width of a line is not defined as the number of characters but as the width of the image containing the rendered text. I came up with a hacky method by implementing a custom string class with correct width definition and modifying the TextWrapper
class slightly:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import textwrap
class MyTextWrapper(textwrap.TextWrapper):
def _split(self, text):
assert isinstance(text, StringWithWidth)
return [StringWithWidth(i) for i in super()._split(text._str)]
def _wrap_chunks(self, chunks):
# only modify ''.join in the original code
"""_wrap_chunks(chunks : [string]) -> [string]
Wrap a sequence of text chunks and return a list of lines of
length 'self.width' or less. (If 'break_long_words' is false,
some lines may be longer than this.) Chunks correspond roughly
to words and the whitespace between them: each chunk is
indivisible (modulo 'break_long_words'), but a line break can
come between any two chunks. Chunks should not have internal
whitespace; ie. a chunk is either all whitespace or a "word".
Whitespace chunks will be removed from the beginning and end of
lines, but apart from that whitespace is preserved.
"""
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
if self.max_lines is not None:
if self.max_lines > 1:
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
raise ValueError("placeholder too large for max width")
# Arrange in reverse order so items can be efficiently popped
# from a stack of chucks.
chunks.reverse()
while chunks:
# Start the list of chunks that will make up the current line.
# cur_len is just the length of all the chunks in cur_line.
cur_line = []
cur_len = 0
# Figure out which static string will prefix this line.
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
# Maximum width for this line.
width = self.width - len(indent)
# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]
while chunks:
l = len(chunks[-1])
# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
# Nope, this line is full.
else:
break
# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
del cur_line[-1]
if cur_line:
if (self.max_lines is None or
len(lines) + 1 < self.max_lines or
(not chunks or
self.drop_whitespace and
len(chunks) == 1 and
not chunks[0].strip()) and cur_len <= width):
# Convert current line back to a string and store it in
# list of all lines (return value).
lines.append(StringWithWidth(
indent + ''.join(map(_as_str, cur_line))))
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(StringWithWidth(
indent + ''.join(map(_as_str, cur_line))))
break
cur_len -= len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
lines.append(indent + self.placeholder.lstrip())
break
return lines
def _make_str_fwd(name):
def func(self, *args, **kwargs):
return StringWithWidth(getattr(self._str, name)(*args, **kwargs))
func.__name__ = name
return func
def _as_str(val):
if isinstance(val, StringWithWidth):
val = val._str
assert isinstance(val, str)
return val
class StringWithWidth:
char_width = {
'x': 1,
's': 2,
' ': 1
}
def __init__(self, s):
self._str = s
expandtabs = _make_str_fwd('expandtabs')
translate = _make_str_fwd('translate')
strip = _make_str_fwd('strip')
__getitem__ = _make_str_fwd('__getitem__')
def __eq__(self, rhs):
return self._str == _as_str(rhs)
def __add__(self, rhs):
return StringWithWidth(self._str + _as_str(rhs))
def __len__(self):
return sum(map(self.char_width.__getitem__, self._str))
def __repr__(self):
return repr(self._str)
def main():
print(MyTextWrapper(width=8).wrap(StringWithWidth('x ss s')))
if __name__ == '__main__':
main()
回答3:
Maybe this simple approach helps, although I assume recursion may be more elegant. Note that the fixed value for character width needs to be replaced with the corresponding function call.
#!python3
#coding=utf-8
""" Line break demo """
text = "lorem ipsum dolor sit amet blablah"
print("breaking", text)
words = text.split()
widths = [sum( [5 for c in w] ) for w in words]
#print(words)
#print(widths)
print(len(words), "words")
for wmax in [10,25,55,80,100,120]:
print("\nmax line width:", wmax)
lines = []
li = 0
r = range(len(widths)) # 0...5
for i in r:
w = sum(widths[li:i+1])
if w >= wmax and i>0:
lines.append( " ".join(words[li:i]) )
li = i
print(" ---")
w = sum(widths[li:i+1])
print( " ", i, words[i], widths[i], w )
# remainder
lines.append( " ".join(words[li:]) )
print(lines)
Output:
breaking lorem ipsum dolor sit amet blablah
6 words
max line width: 10
0 lorem 25 25
---
1 ipsum 25 25
---
2 dolor 25 25
---
3 sit 15 15
---
4 amet 20 20
---
5 blablah 35 35
['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blablah']
max line width: 25
0 lorem 25 25
---
1 ipsum 25 25
---
2 dolor 25 25
---
3 sit 15 15
---
4 amet 20 20
---
5 blablah 35 35
['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'blablah']
max line width: 55
0 lorem 25 25
1 ipsum 25 50
---
2 dolor 25 25
3 sit 15 40
---
4 amet 20 20
---
5 blablah 35 35
['lorem ipsum', 'dolor sit', 'amet', 'blablah']
max line width: 80
0 lorem 25 25
1 ipsum 25 50
2 dolor 25 75
---
3 sit 15 15
4 amet 20 35
5 blablah 35 70
['lorem ipsum dolor', 'sit amet blablah']
max line width: 100
0 lorem 25 25
1 ipsum 25 50
2 dolor 25 75
3 sit 15 90
---
4 amet 20 20
5 blablah 35 55
['lorem ipsum dolor sit', 'amet blablah']
max line width: 120
0 lorem 25 25
1 ipsum 25 50
2 dolor 25 75
3 sit 15 90
4 amet 20 110
---
5 blablah 35 35
['lorem ipsum dolor sit amet', 'blablah']
回答4:
I couldn't get this out of my head, so here it is a little more compact:
#!python3
#coding=utf-8
""" Line break demo 3 """
def charwidth(char):
return 5
def stringwidth(string):
return sum( [charwidth(char) for char in string] )
text = "lorem ipsum dolor sit amet blablah"
limit = 60
words = text.split()
lines = [[]]
while( words ):
word = words.pop(0)
if stringwidth( " ".join(lines[-1]) ) + 1 + stringwidth(word) < limit:
lines[-1].append(word)
else:
lines.append( [word] )
print( [" ".join(words) for words in lines ] )
回答5:
Here's a modified version of @handle's third answer that also allows splitting long words.
This function also allows one to specify a maximum number of lines and truncate with "..." when over the limit.
def text_width(text, font):
return font.getsize(text)[0]
# Set max_lines to 0 for no limit
def wrap_text(text, font, max_width, max_lines=0):
words = text.split()
lines = []
while(words):
word = words.pop(0)
# Append word if it's not too long
if len(lines) > 0 and (text_width(" ".join(lines[-1]), font) + 1 + text_width(word,font)) < max_width:
lines[-1].append(word)
else:
# Brute-force: chunkify word until it fits
chunk = len(word)
while chunk > 0:
while (text_width(word[:chunk],font) > max_width and chunk > 1):
chunk -= 1
lines.append( [word[:chunk]] )
word = word[chunk:]
chunk = len(word)
lines = [" ".join(words) for words in lines]
if max_lines and len(lines) > max_lines:
lines[max_lines-1] = lines[max_lines-1][:-1] + "..."
return "\n".join(lines[:max_lines])
回答6:
Here's how I've approached the problem's solution (re-wrote my original code):
def break_lines(img, text, font_size):
base_width = img.size[0]
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
line = ""
lines = []
width_of_line = 0
number_of_lines = 0
# break string into multi-lines that fit base_width
for token in text.split():
token = token+' '
token_width = font.getsize(token)[0]
if width_of_line+token_width < base_width:
line+=token
width_of_line+=token_width
else:
lines.append(line)
number_of_lines += 1
width_of_line = 0
line = ""
line+=token
width_of_line+=token_width
if line:
lines.append(line)
number_of_lines += 1
return lines, number_of_lines
来源:https://stackoverflow.com/questions/43828154/breaking-a-string-along-whitespace-once-certain-width-is-exceeded-python