Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ See the [Contributing Guide](contributing.md) for details.
### Fixed

* Fixed dropped content in `md_in_html` (#1526).
* Fixed HTML handling corner case that prevented some content from not being rendered (#1528).

## [3.8.0] - 2025-04-09

Expand Down
9 changes: 9 additions & 0 deletions markdown/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
spec.loader.exec_module(htmlparser)
sys.modules['htmlparser'] = htmlparser

# This is a hack. We are sneaking in `</>` so we can capture it without the HTML parser
# throwing it away. When we see it, we will process it as data.
htmlparser.starttagopen = re.compile('<[a-zA-Z]|</>')

# Monkeypatch `HTMLParser` to only accept `?>` to close Processing Instructions.
htmlparser.piclose = re.compile(r'\?>')
# Monkeypatch `HTMLParser` to only recognize entity references with a closing semicolon.
Expand Down Expand Up @@ -297,6 +301,11 @@ def get_starttag_text(self) -> str:
return self.__starttag_text

def parse_starttag(self, i: int) -> int: # pragma: no cover
# Treat `</>` as normal data as it is not a real tag.
if self.rawdata[i:i + 3] == '</>':
self.handle_data(self.rawdata[i:i + 3])
return i + 3

self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
Expand Down
18 changes: 18 additions & 0 deletions tests/test_syntax/blocks/test_html_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1643,3 +1643,21 @@ def test_placeholder_in_source(self):
placeholder = md.htmlStash.get_placeholder(md.htmlStash.html_counter + 1)
result = md.postprocessors['raw_html'].run(placeholder)
self.assertEqual(placeholder, result)

def test_noname_tag(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div>
</>
</div>
"""
),
self.dedent(
"""
<div>
</>
</div>
"""
)
)
18 changes: 18 additions & 0 deletions tests/test_syntax/extensions/test_md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -1538,6 +1538,24 @@ def test_trailing_content_after_tag_in_md_block(self):
extensions=['md_in_html']
)

def test_noname_tag(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown>
</>
</div>
"""
),
self.dedent(
"""
<div>
<p>&lt;/&gt;</p>
</div>
"""
)
)


def load_tests(loader, tests, pattern):
""" Ensure `TestHTMLBlocks` doesn't get run twice by excluding it here. """
Expand Down
17 changes: 17 additions & 0 deletions tests/test_syntax/inline/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,20 @@ def test_code_html(self):
"""
)
)

def test_noname_tag(self):
# Browsers ignore `</>`, but a Markdown parser should not, and should treat it as data
# but not a tag.

self.assertMarkdownRenders(
self.dedent(
"""
`</>`
"""
),
self.dedent(
"""
<p><code>&lt;/&gt;</code></p>
"""
)
)
3 changes: 3 additions & 0 deletions tests/test_syntax/inline/test_raw_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ def test_inline_html_angle_brackets(self):

def test_inline_html_backslashes(self):
self.assertMarkdownRenders('<img src="..\\..\\foo.png">', '<p><img src="..\\..\\foo.png"></p>')

def test_noname_tag(self):
self.assertMarkdownRenders('<span></></span>', '<p><span>&lt;/&gt;</span></p>')