diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index dfbc2b93dfc0d6..276bc6cce14288 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -475,6 +475,30 @@ def test_narrow_non_breaking_space(self): 'non-breaking\N{NARROW NO-BREAK SPACE}space.'], break_on_hyphens=False) + def test_wrap_stability(self): + def wrap(s): + return fill(s, width=30) + + # original is 31 characters long: + # 0 1 2 3 + # 1234567890123456789012345678901 + original = "xxxx xxxx xxxx xxxx xxxx. xxxx" + wrapped = wrap(original) + wrapped2 = wrap(wrapped) + self.assertEqual(wrapped, wrapped2) + + def test_wrap_stability_with_fix_sentence_endings(self): + def wrap(s): + return fill(s, width=30, fix_sentence_endings=True) + + # original is 31 characters long: + # 0 1 2 3 + # 1234567890123456789012345678901 + original = "xxx xxxx xxxx xxxx xxxx. xxxx" + wrapped = wrap(original) + wrapped2 = wrap(wrapped) + self.assertEqual(wrapped, wrapped2) + class MaxLinesTestCase(BaseTestCase): text = "Hello there, how are you this fine day? I'm glad to hear it!" diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 98bedd27ea3a11..171769933af952 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -159,15 +159,17 @@ def _split(self, text): Split the text to wrap into indivisible chunks. Chunks are not quite the same as words; see _wrap_chunks() for full - details. As an example, the text + details. + + As an example, the text Look, goof-ball -- use the -b option! - breaks into the following chunks: + breaks into the following chunks if break_on_hyphens is True: 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 'use', ' ', 'the', ' ', '-b', ' ', 'option!' - if break_on_hyphens is True, or in: + + If break_on_hyphens is False, it instead breaks into these chunks: 'Look,', ' ', 'goof-ball', ' ', '--', ' ', 'use', ' ', 'the', ' ', '-b', ' ', option!' - otherwise. """ if self.break_on_hyphens is True: chunks = self.wordsep_re.split(text) @@ -296,6 +298,35 @@ def _wrap_chunks(self, chunks): else: break + # If the last chunk on this line is all whitespace, + # and drop_whitespace is on, + # see if substituting a one-space chunk here leaves enough + # room to add the next non-whitespace chunk to the end. + # This fixes bpo-32397, where running wrap() twice on + # a paragraph might not be stable--the second run might + # produce different results than the first. (If the first + # wrap() turned "foo. bar" into "foo.\nbar", the second + # wrap() might turn it back into "foo. bar". In that + # scenario, wrap() wil now produce "foo. bar".) + # + # This is all complicated slightly by fix_sentence_endings, + # where the chunk we add back in might need to be two spaces + # instead of one. + if (chunks and self.drop_whitespace + and cur_line and cur_line[-1].strip() == ''): + spacer = " " + if (self.fix_sentence_endings + and (len(cur_line) > 1) + and self.sentence_end_re.search(cur_line[-2])): + spacer = " " + new_len = cur_len - len(cur_line[-1]) + len(spacer) + len(chunks[-1]) + if new_len <= width: + cur_line.pop() + cur_line.append(spacer) + cur_line.append(chunks[-1]) + chunks.pop() + cur_len = new_len + # The current line is full, and the next chunk is too big to # fit on *any* line (not just this one). if chunks and len(chunks[-1]) > width: diff --git a/Misc/NEWS.d/next/Library/2018-02-10-17-28-53.bpo-32397.kNJ3Oa.rst b/Misc/NEWS.d/next/Library/2018-02-10-17-28-53.bpo-32397.kNJ3Oa.rst new file mode 100644 index 00000000000000..a800214ef02e2d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-02-10-17-28-53.bpo-32397.kNJ3Oa.rst @@ -0,0 +1,5 @@ +Fix ``textwrap.wrap()`` so it's stable. In certain fiddly circumstances, +``textwrap.wrap(x)`` wasn't the same as ``textwrap.wrap(textwrap.wrap(x))``, +which was surprising. This happened when a line was wrapped at a whitespace +blob that was longer than 1 character, but the following word *would* have +fit if that whitespace blob was only 1 character long.