From bc300fb76c8a3458d97baa26d4fb339bdda7a976 Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Thu, 30 Apr 2026 17:41:43 -0400 Subject: [PATCH] gh-148518 fix index error in local part attribute (GH-148522) As part of fixing bpo-27931 code was introduced to get_bare_quoted_string that added an empty Terminal if the quoted string was empty. This isn't the best answer in terms of the parse tree; we really want the token list to be empty in that case. But having it be empty resulted in local_part raising the index error. We find that same problem if we try to parse an address consisting of a single dquote. By fixing local_part to not raise on an empty token list, we can have the bare_quoted_string code correctly return an empty token list for the empty string cases (two dquotes or a single dquote as the entire addrespec, at the end of a line). (cherry picked from commit bdbb55c403d2ab6b4b0a3e994d21b623fee4a544) Co-authored-by: R. David Murray --- Lib/email/_header_value_parser.py | 7 ++-- Lib/test/test_email/test_headerregistry.py | 41 ++++++++++++++++++- ...-04-13-15-59-44.gh-issue-148518.RQdvsu.rst | 4 ++ 3 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-13-15-59-44.gh-issue-148518.RQdvsu.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 35e71e04c49358..1367e34195bfdd 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -639,11 +639,11 @@ def local_part(self): for tok in self[0] + [DOT]: if tok.token_type == 'cfws': continue - if (last_is_tl and tok.token_type == 'dot' and + if (last_is_tl and tok.token_type == 'dot' and last and last[-1].token_type == 'cfws'): res[-1] = TokenList(last[:-1]) is_tl = isinstance(tok, TokenList) - if (is_tl and last.token_type == 'dot' and + if (is_tl and last.token_type == 'dot' and tok and tok[0].token_type == 'cfws'): res.append(TokenList(tok[1:])) else: @@ -1245,8 +1245,7 @@ def get_bare_quoted_string(value): bare_quoted_string = BareQuotedString() value = value[1:] if value and value[0] == '"': - token, value = get_qcontent(value) - bare_quoted_string.append(token) + return bare_quoted_string, value[1:] while value and value[0] != '"': if value[0] in WSP: token, value = get_fws(value) diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index b2c31b12ee57db..ae29ce3426c016 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1263,12 +1263,12 @@ class TestAddressHeader(TestHeaderBase): 'example.com', None), - } - # XXX: Need many more examples, and in particular some with names in # trailing comments, which aren't currently handled. comments in # general are not handled yet. + } + def example_as_address(self, source, defects, decoded, display_name, addr_spec, username, domain, comment): h = self.make_header('sender', source) @@ -1286,6 +1286,43 @@ def example_as_address(self, source, defects, decoded, display_name, # XXX: we have no comment support yet. #self.assertEqual(a.comment, comment) + example_broken_header_params = { + + 'just_dquote': + ('"', + [errors.InvalidHeaderDefect]*2, + '<>', + '', + '<>', + '', + '', + ), + + } + + def example_broken_header_as_address( + self, + source, + defects, + decoded, + display_name, + addr_spec, + username, + domain, + ): + h = self.make_header('sender', source) + self.assertEqual(h, decoded) + self.assertDefectsEqual(h.defects, defects) + a = h.address + self.assertEqual(str(a), decoded) + self.assertEqual(len(h.groups), 1) + self.assertEqual([a], list(h.groups[0].addresses)) + self.assertEqual([a], list(h.addresses)) + self.assertEqual(a.display_name, display_name) + self.assertEqual(a.addr_spec, addr_spec) + self.assertEqual(a.username, username) + self.assertEqual(a.domain, domain) + def example_as_group(self, source, defects, decoded, display_name, addr_spec, username, domain, comment): source = 'foo: {};'.format(source) diff --git a/Misc/NEWS.d/next/Library/2026-04-13-15-59-44.gh-issue-148518.RQdvsu.rst b/Misc/NEWS.d/next/Library/2026-04-13-15-59-44.gh-issue-148518.RQdvsu.rst new file mode 100644 index 00000000000000..994e4ad7446670 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-13-15-59-44.gh-issue-148518.RQdvsu.rst @@ -0,0 +1,4 @@ +If an email containing an address header that ended in an open double quote +was parsed with a non-``compat32`` policy, accessing the ``username`` attribute +of the mailbox accessed through that header object would result in an +``IndexError``. It now correctly returns an empty string as the result.