From 0ad5b0abf9560255cf9d24234f7de64c0411ab98 Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Wed, 12 Mar 2025 08:36:59 -0600 Subject: [PATCH 01/24] Abbr should respect AtomicStrings Fixes #1512 --- .pyspelling.yml | 1 + docs/changelog.md | 2 ++ markdown/extensions/abbr.py | 24 ++++++++++++----------- tests/test_syntax/extensions/test_abbr.py | 12 ++++++++++++ 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/.pyspelling.yml b/.pyspelling.yml index bede97116..62ad0266b 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -20,6 +20,7 @@ matrix: - alt ignores: - 'code, pre' + - '.autorefs-internal[title]' captures: - '[role=main] *|*:not(script,style)' - pyspelling.filters.context: diff --git a/docs/changelog.md b/docs/changelog.md index 34817bddf..29ff8d313 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -25,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * `md_in_html` handle tags within inline code blocks better (#1075). * `md_in_html` fix handling of one-liner block HTML handling (#1074). * Ensure `
` is treated like a block-level element (#1481). +* Ensure that `abbr` extension respects `AtomicString` and does not process + perceived abbreviations in these strings (#1512). ## [3.7] -- 2024-08-16 diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index 36c1d91c8..ab7c37437 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -98,20 +98,22 @@ def iter_element(self, el: etree.Element, parent: etree.Element | None = None) - for child in reversed(el): self.iter_element(child, el) if text := el.text: - for m in reversed(list(self.RE.finditer(text))): - if self.abbrs[m.group(0)]: - abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), text[m.end():]) - el.insert(0, abbr) - text = text[:m.start()] - el.text = text + if not isinstance(text, AtomicString): + for m in reversed(list(self.RE.finditer(text))): + if self.abbrs[m.group(0)]: + abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), text[m.end():]) + el.insert(0, abbr) + text = text[:m.start()] + el.text = text if parent is not None and el.tail: tail = el.tail index = list(parent).index(el) + 1 - for m in reversed(list(self.RE.finditer(tail))): - abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), tail[m.end():]) - parent.insert(index, abbr) - tail = tail[:m.start()] - el.tail = tail + if not isinstance(tail, AtomicString): + for m in reversed(list(self.RE.finditer(tail))): + abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), tail[m.end():]) + parent.insert(index, abbr) + tail = tail[:m.start()] + el.tail = tail def run(self, root: etree.Element) -> etree.Element | None: ''' Step through tree to find known abbreviations. ''' diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py index 9d3ebb27c..32016e1c7 100644 --- a/tests/test_syntax/extensions/test_abbr.py +++ b/tests/test_syntax/extensions/test_abbr.py @@ -30,6 +30,18 @@ class TestAbbr(TestCase): default_kwargs = {'extensions': ['abbr']} + def test_ignore_atomic(self): + self.assertMarkdownRenders( + self.dedent( + """ + This + + *[YAFR]: Yet Another Feature Request + """ + ), + '

This https://example.com/{YAFR}

' + ) + def test_abbr_upper(self): self.assertMarkdownRenders( self.dedent( From 7aae61bea185a4a5c0c48be2619ccc1d294aa381 Mon Sep 17 00:00:00 2001 From: Chris Mayfield Date: Thu, 20 Mar 2025 10:40:08 -0400 Subject: [PATCH 02/24] Add special case for closing nested quotes Fixes #1514. --- docs/changelog.md | 1 + markdown/extensions/smarty.py | 6 +++++- tests/test_syntax/extensions/test_smarty.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 29ff8d313..fe5aca58e 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -27,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Ensure `
` is treated like a block-level element (#1481). * Ensure that `abbr` extension respects `AtomicString` and does not process perceived abbreviations in these strings (#1512). +* The `smarty` extension correctly renders nested closing quotes (#1514). ## [3.7] -- 2024-08-16 diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index d669e69b0..c1817e634 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -134,6 +134,8 @@ #

He said, "'Quoted' words in a larger quote."

doubleQuoteSetsRe = r""""'(?=\w)""" singleQuoteSetsRe = r"""'"(?=\w)""" +doubleQuoteSetsRe2 = r'(?<=%s)\'"' % closeClass +singleQuoteSetsRe2 = r"(?<=%s)\"'" % closeClass # Special case for decade abbreviations (the '80s): decadeAbbrRe = r"(? None: (doubleQuoteStartRe, (rdquo,)), (doubleQuoteSetsRe, (ldquo + lsquo,)), (singleQuoteSetsRe, (lsquo + ldquo,)), + (doubleQuoteSetsRe2, (rsquo + rdquo,)), + (singleQuoteSetsRe2, (rdquo + rsquo,)), (decadeAbbrRe, (rsquo,)), (openingSingleQuotesRegex, (1, lsquo)), (closingSingleQuotesRegex, (rsquo,)), diff --git a/tests/test_syntax/extensions/test_smarty.py b/tests/test_syntax/extensions/test_smarty.py index 0228ddf02..035855170 100644 --- a/tests/test_syntax/extensions/test_smarty.py +++ b/tests/test_syntax/extensions/test_smarty.py @@ -44,6 +44,22 @@ def test_basic(self): '\'Quoted "words" in a larger quote.\'', '

‘Quoted “words” in a larger quote.’

' ) + self.assertMarkdownRenders( + '"Quoted words at the \'end.\'"', + '

“Quoted words at the ‘end.’”

' + ) + self.assertMarkdownRenders( + '\'Quoted words at the "end."\'', + '

‘Quoted words at the “end.”’

' + ) + self.assertMarkdownRenders( + '(He replied, "She said \'Hello.\'")', + '

(He replied, “She said ‘Hello.’”)

' + ) + self.assertMarkdownRenders( + 'He replied, "She said \'Hello.\'"', + '

He replied, “She said ‘Hello.’”

' + ) self.assertMarkdownRenders( '"quoted" text and **bold "quoted" text**', '

“quoted” text and bold “quoted” text

' From 9c6e39ace5e928f37854b3bf1010b209a79e2d63 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Tue, 25 Mar 2025 13:03:22 -0400 Subject: [PATCH 03/24] Add Python 3.13 and drop Python 3.8 --- .github/workflows/tox.yml | 9 ++++----- pyproject.toml | 4 ++-- tox.ini | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index bf7e529d5..bf6dc4d11 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -20,10 +20,8 @@ jobs: fail-fast: false max-parallel: 4 matrix: - tox-env: [py38, py39, py310, py311, py312, pypy38, pypy39, pypy310, pygments] + tox-env: [py39, py310, py311, py312, py313, pypy39, pypy310, pygments] include: - - tox-env: py38 - python-version: '3.8' - tox-env: py39 python-version: '3.9' - tox-env: py310 @@ -32,8 +30,8 @@ jobs: python-version: '3.11' - tox-env: py312 python-version: '3.12' - - tox-env: pypy38 - python-version: pypy-3.8 + - tox-env: py313 + python-version: '3.13' - tox-env: pypy39 python-version: pypy-3.9 - tox-env: pypy310 @@ -50,6 +48,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true - name: Install dependencies run: | sudo apt-get install libtidy-dev diff --git a/pyproject.toml b/pyproject.toml index 088e242af..f26857b84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ maintainers = [ {name = 'Isaac Muse'} ] license = {file = 'LICENSE.md'} -requires-python = '>=3.8' +requires-python = '>=3.9' dependencies = [ "importlib-metadata>=4.4;python_version<'3.10'" ] @@ -29,11 +29,11 @@ classifiers = [ 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', diff --git a/tox.ini b/tox.ini index 768e76bfe..2cea38e38 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, flake8, checkspelling, pep517check, checklinks +envlist = py{39, 310, 311, 312, 313}, pypy{39, 310}, pygments, flake8, checkspelling, pep517check, checklinks isolated_build = True [testenv] From f6cfc5cca3dd2c313d2fa547b7c88dac656ae506 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Tue, 25 Mar 2025 18:12:33 +0100 Subject: [PATCH 04/24] Use PEP 639 license expressions in project metadata --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f26857b84..e350a2376 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] # Minimum requirements for the build system to execute. -requires = ["setuptools>=61.2"] +requires = ["setuptools>=77.0"] build-backend = "setuptools.build_meta" [project] @@ -17,7 +17,8 @@ maintainers = [ {name = 'Waylan Limberg', email = 'python.markdown@gmail.com'}, {name = 'Isaac Muse'} ] -license = {file = 'LICENSE.md'} +license = "BSD-3-Clause" +license-files = ["LICENSE.md"] requires-python = '>=3.9' dependencies = [ "importlib-metadata>=4.4;python_version<'3.10'" @@ -25,7 +26,6 @@ dependencies = [ keywords = ['markdown', 'markdown-parser', 'python-markdown', 'markdown-to-html'] classifiers = [ 'Development Status :: 5 - Production/Stable', - 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 3', From 1caf02892487cead069cb0c4bcdd1e876ca6b590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Thu, 27 Mar 2025 13:44:01 +0100 Subject: [PATCH 05/24] Optimize raw HTML post-processor (#1510) Don't precompute placeholder replacements in raw HTML post-processor. Fixes #1507. Previously, the raw HTML post-processor would precompute all possible replacements for placeholders in a string, based on the HTML stash. It would then apply a regular expression substitution using these replacements. Finally, if the text changed, it would recurse, and do all that again. This was inefficient because placeholders were re-computed each time it recursed, and because only a few replacements would be used anyway. This change moves the recursion into the regular expression substitution, so that: 1. the regular expression does minimal work on the text (contrary to re-scanning text already scanned in previous frames); 2. but more importantly, replacements aren't computed ahead of time anymore (and even less *several times*), and only fetched from the HTML stash as placeholders are found in the text. The substitution function relies on the regular expression groups ordering: we make sure to match `

PLACEHOLDER

` first, before `PLACEHOLDER`. The presence of a wrapping `p` tag indicates whether to wrap again the substitution result, or not (also depending on whether the substituted HTML is a block-level tag). --- docs/changelog.md | 1 + markdown/postprocessors.py | 40 +++++++++++++------------------------- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index fe5aca58e..cd427f612 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * DRY fix in `abbr` extension by introducing method `create_element` (#1483). * Clean up test directory some removing some redundant tests and port non-redundant cases to the newer test framework. +* Improved performance of the raw HTML post-processor (#1510). ### Fixed diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 7f5ede90c..d4b0e1fdc 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -28,7 +28,6 @@ from __future__ import annotations -from collections import OrderedDict from typing import TYPE_CHECKING, Any from . import util import re @@ -73,37 +72,26 @@ class RawHtmlPostprocessor(Postprocessor): def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ - replacements = OrderedDict() - for i in range(self.md.htmlStash.html_counter): - html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) - if self.isblocklevel(html): - replacements["

{}

".format( - self.md.htmlStash.get_placeholder(i))] = html - replacements[self.md.htmlStash.get_placeholder(i)] = html - def substitute_match(m: re.Match[str]) -> str: - key = m.group(0) - - if key not in replacements: - if key[3:-4] in replacements: - return f'

{ replacements[key[3:-4]] }

' - else: - return key - - return replacements[key] - - if replacements: + if key := m.group(1): + wrapped = True + else: + key = m.group(2) + wrapped = False + if (key := int(key)) >= self.md.htmlStash.html_counter: + return m.group(0) + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[key]) + if not wrapped or self.isblocklevel(html): + return pattern.sub(substitute_match, html) + return pattern.sub(substitute_match, f"

{html}

") + + if self.md.htmlStash.html_counter: base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)' pattern = re.compile(f'

{ base_placeholder }

|{ base_placeholder }') - processed_text = pattern.sub(substitute_match, text) + return pattern.sub(substitute_match, text) else: return text - if processed_text == text: - return processed_text - else: - return self.run(processed_text) - def isblocklevel(self, html: str) -> bool: """ Check is block of HTML is block-level. """ m = self.BLOCK_LEVEL_REGEX.match(html) From 42d4b4336e97485b96c1e4e9fcdb4e8df2501217 Mon Sep 17 00:00:00 2001 From: Jan Brasna <1784648+janbrasna@users.noreply.github.com> Date: Wed, 2 Apr 2025 16:09:18 +0200 Subject: [PATCH 06/24] Fix CI badge in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index eeea0c239..38c838bf5 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ [![BSD License][bsdlicense-button]][bsdlicense] [![Code of Conduct][codeofconduct-button]][Code of Conduct] -[build-button]: https://github.com/Python-Markdown/markdown/workflows/CI/badge.svg?event=push -[build]: https://github.com/Python-Markdown/markdown/actions?query=workflow%3ACI+event%3Apush +[build-button]: https://github.com/Python-Markdown/markdown/actions/workflows/tox.yml/badge.svg +[build]: https://github.com/Python-Markdown/markdown/actions/workflows/tox.yml [codecov-button]: https://codecov.io/gh/Python-Markdown/markdown/branch/master/graph/badge.svg [codecov]: https://codecov.io/gh/Python-Markdown/markdown [mdversion-button]: https://img.shields.io/pypi/v/Markdown.svg From e912575a903215ebafaeb0fecbdad079d998b9ba Mon Sep 17 00:00:00 2001 From: Vladyslav Prudius Date: Wed, 2 Apr 2025 13:03:45 +0300 Subject: [PATCH 07/24] Fix incorrect TOC list structure in docs --- docs/extensions/toc.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/extensions/toc.md b/docs/extensions/toc.md index d1c64a9de..6db62226f 100644 --- a/docs/extensions/toc.md +++ b/docs/extensions/toc.md @@ -49,10 +49,12 @@ would generate the following output: ```html

Header 1

@@ -121,10 +123,12 @@ would generate the following output: ```html

Functions

From bd67d4862b388c7c2dd1ae90635c633472c2c77c Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 9 Apr 2025 12:22:38 -0400 Subject: [PATCH 08/24] Improve changelog validation * Cleanup versions in changelog * Refactor changelog to validate. Use nested lists for contents of each version. * Fix internal link in changelog * Refactor action to use 2 jobs --- .github/workflows/changelog-validator.yml | 29 +- docs/changelog.md | 742 +++++++++++----------- 2 files changed, 392 insertions(+), 379 deletions(-) diff --git a/.github/workflows/changelog-validator.yml b/.github/workflows/changelog-validator.yml index 2dff4708d..bf05b0072 100644 --- a/.github/workflows/changelog-validator.yml +++ b/.github/workflows/changelog-validator.yml @@ -7,14 +7,41 @@ on: - 'docs/changelog.md' jobs: - validate: + validate-unreleased: + # Validates changelog and confirms that an Unreleased entry exists. + # Only run when the `release` label is not set on a PR. + if: ${{ ! contains(github.event.pull_request.labels.*.name, 'release') }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Validate Changelog + id: changelog_reader + uses: mindsers/changelog-reader-action@v2 + with: + validation_level: error + path: docs/changelog.md + version: Unreleased + validate-release: + # Validates changelog and confirms an entry exists for version in code. + # Only run when the `release` label is set on a PR. + if: ${{ contains(github.event.pull_request.labels.*.name, 'release') }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Install Markdown + run: python -m pip install semver . + - name: Get Markdown Version + id: markdown + run: echo "version=$(python -c 'import markdown, semver; print(semver.Version.parse(markdown.__version__, optional_minor_and_patch=True))')" >> $GITHUB_OUTPUT - name: Validate Changelog id: changelog_reader uses: mindsers/changelog-reader-action@v2 with: validation_level: error path: docs/changelog.md + version: ${{ steps.markdown.outputs.version }} diff --git a/docs/changelog.md b/docs/changelog.md index cd427f612..272ca0f54 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -6,88 +6,88 @@ toc_depth: 2 All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [Contributing Guide](contributing.md) for details. +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +See the [Contributing Guide](contributing.md) for details. ## [Unreleased] ### Changed * DRY fix in `abbr` extension by introducing method `create_element` (#1483). -* Clean up test directory some removing some redundant tests and port +* Clean up test directory by removing some redundant tests and port non-redundant cases to the newer test framework. * Improved performance of the raw HTML post-processor (#1510). ### Fixed * Backslash Unescape IDs set via `attr_list` on `toc` (#1493). -* `md_in_html` will process content inside "markdown" blocks a similar way - as they are parsed outside of "markdown" blocks giving a more consistent - expectation to external extensions (#1503). +* Ensure `md_in_html` processes content inside "markdown" blocks as they are + parsed outside of "markdown" blocks to keep things more consistent for + third-party extensions (#1503). * `md_in_html` handle tags within inline code blocks better (#1075). * `md_in_html` fix handling of one-liner block HTML handling (#1074). * Ensure `
` is treated like a block-level element (#1481). * Ensure that `abbr` extension respects `AtomicString` and does not process perceived abbreviations in these strings (#1512). -* The `smarty` extension correctly renders nested closing quotes (#1514). +* Ensure `smarty` extension correctly renders nested closing quotes (#1514). -## [3.7] -- 2024-08-16 +## [3.7.0] - 2024-08-16 ### Changed -#### Refactor `abbr` Extension +* Refactor `abbr` Extension -A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated -`AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists, -avoiding a conflict between the two extensions (#1460). + A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated + `AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists, + avoiding a conflict between the two extensions (#1460). -The `AbbrPreprocessor` class has been renamed to `AbbrBlockprocessor`, which -better reflects what it is. `AbbrPreprocessor` has been deprecated. + The `AbbrPreprocessor` class has been renamed to `AbbrBlockprocessor`, which + better reflects what it is. `AbbrPreprocessor` has been deprecated. -A call to `Markdown.reset()` now clears all previously defined abbreviations. + A call to `Markdown.reset()` now clears all previously defined abbreviations. -Abbreviations are now sorted by length before executing `AbbrTreeprocessor` -to ensure that multi-word abbreviations are implemented even if an abbreviation -exists for one of those component words. (#1465) + Abbreviations are now sorted by length before executing `AbbrTreeprocessor` + to ensure that multi-word abbreviations are implemented even if an abbreviation + exists for one of those component words. (#1465) -Abbreviations without a definition are now ignored. This avoids applying -abbr tags to text without a title value. + Abbreviations without a definition are now ignored. This avoids applying + abbr tags to text without a title value. -Added an optional `glossary` configuration option to the abbreviations extension. -This provides a simple and efficient way to apply a dictionary of abbreviations -to every page. - -Abbreviations can now be disabled by setting their definition to `""` or `''`. -This can be useful when using the `glossary` option. + Added an optional `glossary` configuration option to the abbreviations extension. + This provides a simple and efficient way to apply a dictionary of abbreviations + to every page. + Abbreviations can now be disabled by setting their definition to `""` or `''`. + This can be useful when using the `glossary` option. ### Fixed * Fixed links to source code on GitHub from the documentation (#1453). -## [3.6] -- 2024-03-14 +## [3.6.0] - 2024-03-14 ### Changed -#### Refactor TOC Sanitation - -* All postprocessors are now run on heading content. -* Footnote references are now stripped from heading content. Fixes #660. -* A more robust `striptags` is provided to convert headings to plain text. - Unlike, the `markupsafe` implementation, HTML entities are not unescaped. -* The plain text `name`, rich `html`, and unescaped raw `data-toc-label` are - saved to `toc_tokens`, allowing users to access the full rich text content of - the headings directly from `toc_tokens`. -* The value of `data-toc-label` is sanitized separate from heading content - before being written to `name`. This fixes a bug which allowed markup through - in certain circumstances. To access the raw unsanitized data, retrieve the - value from `token['data-toc-label']` directly. -* An `html.unescape` call is made just prior to calling `slugify` so that - `slugify` only operates on Unicode characters. Note that `html.unescape` is - not run on `name`, `html`, or `data-toc-label`. -* The functions `get_name` and `stashedHTML2text` defined in the `toc` extension - are both **deprecated**. Instead, third party extensions should use some - combination of the new functions `run_postprocessors`, `render_inner_html` and - `striptags`. +* Refactor TOC Sanitation + + * All postprocessors are now run on heading content. + * Footnote references are now stripped from heading content. Fixes #660. + * A more robust `striptags` is provided to convert headings to plain text. + Unlike, the `markupsafe` implementation, HTML entities are not unescaped. + * The plain text `name`, rich `html`, and unescaped raw `data-toc-label` are + saved to `toc_tokens`, allowing users to access the full rich text content of + the headings directly from `toc_tokens`. + * The value of `data-toc-label` is sanitized separate from heading content + before being written to `name`. This fixes a bug which allowed markup through + in certain circumstances. To access the raw unsanitized data, retrieve the + value from `token['data-toc-label']` directly. + * An `html.unescape` call is made just prior to calling `slugify` so that + `slugify` only operates on Unicode characters. Note that `html.unescape` is + not run on `name`, `html`, or `data-toc-label`. + * The functions `get_name` and `stashedHTML2text` defined in the `toc` extension + are both **deprecated**. Instead, third party extensions should use some + combination of the new functions `run_postprocessors`, `render_inner_html` and + `striptags`. ### Fixed @@ -99,7 +99,7 @@ This can be useful when using the `glossary` option. * In attribute lists (`attr_list`, `fenced_code`), quoted attribute values are now allowed to contain curly braces (`}`) (#1414). -## [3.5.2] -- 2024-01-10 +## [3.5.2] - 2024-01-10 ### Fixed @@ -113,7 +113,7 @@ This can be useful when using the `glossary` option. * Improve and expand type annotations in the code base (#1401). * Fix handling of bogus comments (#1425). -## [3.5.1] -- 2023-10-31 +## [3.5.1] - 2023-10-31 ### Fixed @@ -121,17 +121,17 @@ This can be useful when using the `glossary` option. trigger quadratic line counting behavior (#1392). * Improve and expand type annotations in the code base (#1394). -## [3.5] -- 2023-10-06 +## [3.5.0] - 2023-10-06 ### Added -#### Add `permalink_leading` configuration option to the toc extension (#1339) +* Add `permalink_leading` configuration option to the toc extension (#1339) -A new boolean option `permalink_leading` controls the position of the permanent -link anchors generated with `permalink`. Setting `permalink_leading` to `True` -will cause the links to be inserted at the start of the header, before any other -header content. The default behavior for `permalink` is to append permanent -links to the header, placing them after all other header content. + A new boolean option `permalink_leading` controls the position of the permanent + link anchors generated with `permalink`. Setting `permalink_leading` to `True` + will cause the links to be inserted at the start of the header, before any other + header content. The default behavior for `permalink` is to append permanent + links to the header, placing them after all other header content. ### Changed @@ -148,7 +148,7 @@ links to the header, placing them after all other header content. * Fix a corner case in admonitions where if an indented code block was provided as the first block, the output would be malformed (#1329). -## [3.4.4] -- 2023-07-25 +## [3.4.4] - 2023-07-25 ### Fixed @@ -156,13 +156,13 @@ links to the header, placing them after all other header content. * Unescape any backslash escaped inline raw HTML (#1358). * Unescape backslash escaped TOC token names (#1360). -## [3.4.3] -- 2023-03-23 +## [3.4.3] - 2023-03-23 ### Fixed * Restore console script (#1327). -## [3.4.2] -- 2023-03-22 +## [3.4.2] - 2023-03-22 ### Fixed * Officially support Python 3.11. @@ -170,72 +170,70 @@ links to the header, placing them after all other header content. * Consider `` HTML tag a block-level element (#1309). * Switch from `setup.py` to `pyproject.toml`. -## [3.4.1] -- 2022-07-15 +## [3.4.1] - 2022-07-15 ### Fixed * Fix an import issue with `importlib.util` (#1274). -## [3.4] -- 2022-07-15 +## [3.4.0] - 2022-07-15 ### Changed -#### The `tables` extension now uses a `style` attribute instead of an `align` attribute for alignment. +* The `tables` extension now uses a `style` attribute instead of an `align` attribute for alignment. -The [HTML4 spec](https://www.w3.org/TR/html4/present/graphics.html#h-15.1.2) -specifically deprecates the use of the `align` attribute and it does not appear -at all in the [HTML5 -spec](https://www.w3.org/TR/html53/tabular-data.html#attributes-common-to-td-and-th-elements). -Therefore, by default, the [tables](extensions/tables.md) extension will now use -the `style` attribute (setting just the `text-align` property) in `td` and `th` -blocks. + The [HTML4 spec](https://www.w3.org/TR/html4/present/graphics.html#h-15.1.2) + specifically deprecates the use of the `align` attribute and it does not appear + at all in the [HTML5 + spec](https://www.w3.org/TR/html53/tabular-data.html#attributes-common-to-td-and-th-elements). + Therefore, by default, the [tables](extensions/tables.md) extension will now use + the `style` attribute (setting just the `text-align` property) in `td` and `th` + blocks. -The former behavior is available by setting the `use_align_attribute` -configuration option to `True` when enabling the extension. + The former behavior is available by setting the `use_align_attribute` + configuration option to `True` when enabling the extension. -For example, to configure the old `align` behavior: + For example, to configure the old `align` behavior: -```python -from markdown.extensions.tables import TableExtension + from markdown.extensions.tables import TableExtension -markdown.markdown(src, extensions=[TableExtension(use_align_attribute=True)]) -``` + markdown.markdown(src, extensions=[TableExtension(use_align_attribute=True)]) -#### Backslash unescaping moved to Treeprocessor (#1131). +* Backslash unescaping moved to Treeprocessor (#1131). -Unescaping backslash escapes has been moved to a Treeprocessor, which enables -proper HTML escaping during serialization. However, it is recognized that -various third-party extensions may be calling the old class at -`postprocessors.UnescapePostprocessor`. Therefore, the old class remains in the -code base, but has been deprecated and will be removed in a future release. The -new class `treeprocessors.UnescapeTreeprocessor` should be used instead. + Unescaping backslash escapes has been moved to a Treeprocessor, which enables + proper HTML escaping during serialization. However, it is recognized that + various third-party extensions may be calling the old class at + `postprocessors.UnescapePostprocessor`. Therefore, the old class remains in the + code base, but has been deprecated and will be removed in a future release. The + new class `treeprocessors.UnescapeTreeprocessor` should be used instead. -#### Previously deprecated objects have been removed +* Previously deprecated objects have been removed -Various objects were deprecated in version 3.0 and began raising deprecation -warnings (see the [version 3.0 release notes](#30-2018-09-21) for details). Any of those objects -which remained in version 3.3 have been removed from the code base in version 3.4 -and will now raise errors. The relevant objects are listed below. + Various objects were deprecated in version 3.0 and began raising deprecation + warnings (see the [version 3.0 release notes](#300-2018-09-21) for details). Any of those objects + which remained in version 3.3 have been removed from the code base in version 3.4 + and will now raise errors. The relevant objects are listed below. -| Deprecated Object | Replacement Object | -|----------------------------------------|-------------------------------------| -| `markdown.version` | `markdown.__version__` | -| `markdown.version_info` | `markdown.__version_info__` | -| `markdown.util.etree` | `xml.etree.ElementTree` | -| `markdown.util.string_type` | `str` | -| `markdown.util.text_type` | `str` | -| `markdown.util.int2str` | `chr` | -| `markdown.util.iterrange` | `range` | -| `markdown.util.isBlockLevel` | `markdown.Markdown().is_block_level`| -| `markdown.util.Processor().markdown` | `markdown.util.Processor().md` | -| `markdown.util.Registry().__setitem__` | `markdown.util.Registry().register` | -| `markdown.util.Registry().__delitem__` |`markdown.util.Registry().deregister`| -| `markdown.util.Registry().add` | `markdown.util.Registry().register` | + | Deprecated Object | Replacement Object | + |----------------------------------------|-------------------------------------| + | `markdown.version` | `markdown.__version__` | + | `markdown.version_info` | `markdown.__version_info__` | + | `markdown.util.etree` | `xml.etree.ElementTree` | + | `markdown.util.string_type` | `str` | + | `markdown.util.text_type` | `str` | + | `markdown.util.int2str` | `chr` | + | `markdown.util.iterrange` | `range` | + | `markdown.util.isBlockLevel` | `markdown.Markdown().is_block_level`| + | `markdown.util.Processor().markdown` | `markdown.util.Processor().md` | + | `markdown.util.Registry().__setitem__` | `markdown.util.Registry().register` | + | `markdown.util.Registry().__delitem__` |`markdown.util.Registry().deregister`| + | `markdown.util.Registry().add` | `markdown.util.Registry().register` | -In addition, the `md_globals` parameter of -`Markdown.extensions.Extension.extendMarkdown()` is no longer recognized as a -valid parameter and will raise an error if provided. + In addition, the `md_globals` parameter of + `Markdown.extensions.Extension.extendMarkdown()` is no longer recognized as a + valid parameter and will raise an error if provided. ### Added @@ -279,7 +277,7 @@ valid parameter and will raise an error if provided. `PrettifyTreeprocessor` (#1261, #1263). * Fix XML deprecation warnings. -## [3.3.7] -- 2022-05-05 +## [3.3.7] - 2022-05-05 ### Fixed @@ -287,13 +285,13 @@ valid parameter and will raise an error if provided. * Retain configured `pygments_style` after first code block (#1240). * Ensure fenced code attributes are properly escaped (#1247). -## [3.3.6] -- 2021-11-17 +## [3.3.6] - 2021-11-17 ### Fixed * Fix a dependency issue (#1195, #1196). -## [3.3.5] -- 2021-11-16 +## [3.3.5] - 2021-11-16 ### Fixed @@ -305,7 +303,7 @@ valid parameter and will raise an error if provided. * Ensure `` tags are parsed correctly (#1079). * Support Python 3.10 (#1124). -## [3.3.4] -- 2021-02-24 +## [3.3.4] - 2021-02-24 ### Fixed @@ -317,7 +315,7 @@ valid parameter and will raise an error if provided. * Ensure `permalinks` and `anchorlinks` are not restricted by `toc_depth` (#1107). * Fix corner cases with lists under admonitions (#1102). -## [3.3.3] -- 2020-10-25 +## [3.3.3] - 2020-10-25 ### Fixed @@ -326,71 +324,69 @@ valid parameter and will raise an error if provided. * Avoid catastrophic backtracking in `hr` regex (#1055). * Fix `hr` HTML handling (#1053). -## [3.3.2] -- 2020-10-19 +## [3.3.2] - 2020-10-19 ### Fixed * Properly parse inline HTML in md_in_html (#1040 & #1045). * Avoid crashing when md_in_html fails (#1040). -## [3.3.1] -- 2020-10-12 +## [3.3.1] - 2020-10-12 ### Fixed * Correctly parse raw `script` and `style` tags (#1036). * Ensure consistent class handling by `fenced_code` and `codehilite` (#1032). -## [3.3] -- 2020-10-06 +## [3.3.0] - 2020-10-06 ### Changed -#### The prefix `language-` is now prepended to all language classes by default on code blocks. +* The prefix `language-` is now prepended to all language classes by default on code blocks. -The [HTML5 -spec](https://www.w3.org/TR/html5/text-level-semantics.html#the-code-element) -recommends that the class defining the language of a code block be prefixed with -`language-`. Therefore, by default, both the -[fenced_code](extensions/fenced_code_blocks.md) and -[codehilite](extensions/code_hilite.md) extensions now prepend the prefix when -code highlighting is disabled. + The [HTML5 + spec](https://www.w3.org/TR/html5/text-level-semantics.html#the-code-element) + recommends that the class defining the language of a code block be prefixed with + `language-`. Therefore, by default, both the + [fenced_code](extensions/fenced_code_blocks.md) and + [codehilite](extensions/code_hilite.md) extensions now prepend the prefix when + code highlighting is disabled. -If you have previously been including the prefix manually in your fenced code blocks, then you will not want a second -instance of the prefix. Similarly, if you are using a third party syntax highlighting tool which does not recognize -the prefix, or requires a different prefix, then you will want to redefine the prefix globally using the `lang_prefix` -configuration option of either the `fenced_code` or `codehilite` extensions. + If you have previously been including the prefix manually in your fenced code blocks, then you will not want a second + instance of the prefix. Similarly, if you are using a third party syntax highlighting tool which does not recognize + the prefix, or requires a different prefix, then you will want to redefine the prefix globally using the `lang_prefix` + configuration option of either the `fenced_code` or `codehilite` extensions. -For example, to configure `fenced_code` to not apply any prefix (the previous behavior), set the option to an empty string: + For example, to configure `fenced_code` to not apply any prefix (the previous behavior), set the option to an empty string: -```python -from markdown.extensions.fenced_code import FencedCodeExtension + from markdown.extensions.fenced_code import FencedCodeExtension -markdown.markdown(src, extensions=[FencedCodeExtension(lang_prefix='')]) -``` + markdown.markdown(src, extensions=[FencedCodeExtension(lang_prefix='')]) -!!! note - When code highlighting is - [enabled](extensions/fenced_code_blocks.md#enabling-syntax-highlighting), - the output from Pygments is used unaltered. Currently, Pygments does not - provide an option to include the language class in the output, let alone - prefix it. Therefore, any language prefix is only applied when syntax - highlighting is disabled. + !!! note + When code highlighting is + [enabled](extensions/fenced_code_blocks.md#enabling-syntax-highlighting), + the output from Pygments is used unaltered. Currently, Pygments does not + provide an option to include the language class in the output, let alone + prefix it. Therefore, any language prefix is only applied when syntax + highlighting is disabled. -#### Attribute Lists are more strict (#898). +* Attribute Lists are more strict (#898). -Empty curly braces are now completely ignored by the [Attribute List](extensions/attr_list.md) extension. Previously, the extension would -recognize them as attribute lists and remove them from the document. Therefore, it is no longer necessary to backslash -escape a set of curly braces which are empty or only contain whitespace. + Empty curly braces are now completely ignored by the [Attribute List](extensions/attr_list.md) extension. Previously, the extension would + recognize them as attribute lists and remove them from the document. Therefore, it is no longer necessary to backslash + escape a set of curly braces which are empty or only contain whitespace. -Despite not being documented, previously an attribute list could be defined anywhere within a table cell and get -applied to the cell (`` element). Now the attribute list must be defined at the end of the cell content and must -be separated from the rest of the content by at least one space. This makes it easy to differentiate between attribute -lists defined on inline elements within a cell and the attribute list for the cell itself. It is also more consistent -with how attribute lists are defined on other types of elements. + Despite not being documented, previously an attribute list could be defined anywhere within a table cell and get + applied to the cell (`` element). Now the attribute list must be defined at the end of the cell content and must + be separated from the rest of the content by at least one space. This makes it easy to differentiate between attribute + lists defined on inline elements within a cell and the attribute list for the cell itself. It is also more consistent + with how attribute lists are defined on other types of elements. -The extension has also added support for defining attribute lists on table header cells (`` elements) in the same -manner as data cells (`` elements). + The extension has also added support for defining attribute lists on table header cells (`` elements) in the same + manner as data cells (`` elements). -In addition, the documentation for the extensions received an overhaul. The features (#987) and limitations (#965) of the extension are now fully documented. + In addition, the documentation for the extensions received an overhaul. The features (#987) and limitations (#965) of the extension are now fully documented. ### Added @@ -443,7 +439,7 @@ In addition, the documentation for the extensions received an overhaul. The feat * Fix complex scenarios involving lists and admonitions (#1004). * Fix complex scenarios with nested ordered and unordered lists in a definition list (#918). -## [3.2.2] -- 2020-05-08 +## [3.2.2] - 2020-05-08 ### Fixed @@ -457,72 +453,72 @@ In addition, the documentation for the extensions received an overhaul. The feat * Remove import of `packaging` (or `pkg_resources` fallback) entirely. * Remove `setuptools` as a run-time dependency (`install_required`). -## [3.2.1] -- 2020-02-12 +## [3.2.1] - 2020-02-12 ### Fixed * The `name` property in `toc_tokens` from the TOC extension now escapes HTML special characters (`<`, `>`, and `&`). -## [3.2] -- 2020-02-07 +## [3.2.0] - 2020-02-07 ### Changed -#### Drop support for Python 2.7 +* Drop support for Python 2.7 -Python 2.7 reaches end-of-life on 2020-01-01 and Python-Markdown 3.2 has dropped -support for it. Please upgrade to Python 3, or use Python-Markdown 3.1. + Python 2.7 reaches end-of-life on 2020-01-01 and Python-Markdown 3.2 has dropped + support for it. Please upgrade to Python 3, or use Python-Markdown 3.1. -#### `em` and `strong` inline processor changes +* `em` and `strong` inline processor changes -In order to fix issue #792, `em`/`strong` inline processors were refactored. This -translated into removing many of the existing inline processors that handled this -logic: + In order to fix issue #792, `em`/`strong` inline processors were refactored. This + translated into removing many of the existing inline processors that handled this + logic: -* `em_strong` -* `strong` -* `emphasis` -* `strong2` -* `emphasis` + * `em_strong` + * `strong` + * `emphasis` + * `strong2` + * `emphasis` -These processors were replaced with two new ones: + These processors were replaced with two new ones: -* `em_strong` -* `em_strong2` + * `em_strong` + * `em_strong2` -The [`legacy_em`](extensions/legacy_em.md) extension was also modified with new, -refactored logic and simply overrides the `em_strong2` inline processor. + The [`legacy_em`](extensions/legacy_em.md) extension was also modified with new, + refactored logic and simply overrides the `em_strong2` inline processor. -#### CodeHilite now always wraps with `` tags +* CodeHilite now always wraps with `` tags -Before, the HTML generated by CodeHilite looked like: -- `
foo = 'bar'
` if you **were not** using Pygments. -- `
foo = 'bar'
` if you **were** using Pygments. + Before, the HTML generated by CodeHilite looked like: + - `
foo = 'bar'
` if you **were not** using Pygments. + - `
foo = 'bar'
` if you **were** using Pygments. -To make the cases more consistent (and adhere to many Markdown specifications and -HTML code block markup suggestions), CodeHilite will now always additionally wrap -code with `` tags. See #862 for more details. + To make the cases more consistent (and adhere to many Markdown specifications and + HTML code block markup suggestions), CodeHilite will now always additionally wrap + code with `` tags. See #862 for more details. -This change does not alter the Python-Markdown API, but users relying on the old -markup will find their output now changed. + This change does not alter the Python-Markdown API, but users relying on the old + markup will find their output now changed. -Internally, this change relies on the Pygments 2.4, so you must be using at least -that version to see this effect. Users with earlier Pygments versions will -continue to see the old behavior. + Internally, this change relies on the Pygments 2.4, so you must be using at least + that version to see this effect. Users with earlier Pygments versions will + continue to see the old behavior. -#### `markdown.util.etree` deprecated +* `markdown.util.etree` deprecated -Previously, Python-Markdown was using either the `xml.etree.cElementTree` module -or the `xml.etree.ElementTree` module, based on their availability. In modern -Python versions, the former is a deprecated alias for the latter. Thus, the -compatibility layer is deprecated and extensions are advised to use -`xml.etree.ElementTree` directly. Importing `markdown.util.etree` will raise -a `DeprecationWarning` beginning in version 3.2 and may be removed in a future -release. + Previously, Python-Markdown was using either the `xml.etree.cElementTree` module + or the `xml.etree.ElementTree` module, based on their availability. In modern + Python versions, the former is a deprecated alias for the latter. Thus, the + compatibility layer is deprecated and extensions are advised to use + `xml.etree.ElementTree` directly. Importing `markdown.util.etree` will raise + a `DeprecationWarning` beginning in version 3.2 and may be removed in a future + release. -Therefore, extension developers are encouraged to replace -`from markdown.util import etree` with -`import xml.etree.ElementTree as etree` in their code. + Therefore, extension developers are encouraged to replace + `from markdown.util import etree` with + `import xml.etree.ElementTree as etree` in their code. ### Added @@ -552,7 +548,7 @@ Therefore, extension developers are encouraged to replace * Refactor bold and italic logic in order to solve complex nesting issues (#792). * Always wrap CodeHilite code in `code` tags (#862). -## [3.1.1] -- 2019-05-20 +## [3.1.1] - 2019-05-20 ### Fixed @@ -561,18 +557,18 @@ Therefore, extension developers are encouraged to replace * Prefer public `packaging` module to pkg_resources' private copy of it (#825). -## [3.1] -- 2019-03-25 +## [3.1.0] - 2019-03-25 ### Changed -#### `markdown.version` and `markdown.version_info` deprecated +* `markdown.version` and `markdown.version_info` deprecated -Historically, version numbers were acquired via the attributes -`markdown.version` and `markdown.version_info`. As of 3.0, a more standardized -approach is being followed and versions are acquired via the -`markdown.__version__` and `markdown.__version_info__` attributes. As of 3.1 -the legacy attributes will raise a `DeprecationWarning` if they are accessed. In -a future release the legacy attributes will be removed. + Historically, version numbers were acquired via the attributes + `markdown.version` and `markdown.version_info`. As of 3.0, a more standardized + approach is being followed and versions are acquired via the + `markdown.__version__` and `markdown.__version_info__` attributes. As of 3.1 + the legacy attributes will raise a `DeprecationWarning` if they are accessed. In + a future release the legacy attributes will be removed. ### Added @@ -601,206 +597,196 @@ a future release the legacy attributes will be removed. * Problems with newlines in references has been fixed (#742). * Escaped `#` are now handled in header syntax (#762). -## [3.0.1] -- 2018-09-28 +## [3.0.1] - 2018-09-28 ### Fixed * Brought back the `version` and `version_info` variables (#709). * Added support for hexadecimal HTML entities (#712). -## [3.0] -- 2018-09-21 +## [3.0.0] - 2018-09-21 ### Changed -#### `enable_attributes` keyword deprecated +* `enable_attributes` keyword deprecated + + The `enable_attributes` keyword is deprecated in version 3.0 and will be + ignored. Previously the keyword was `True` by default and enabled an + undocumented way to define attributes on document elements. The feature has been + removed from version 3.0. As most users did not use the undocumented feature, it + should not affect most users. For the few who did use the feature, it can be + enabled by using the [Legacy Attributes](extensions/legacy_attrs.md) + extension. + +* `smart_emphasis` keyword and `smart_strong` extension deprecated + + The `smart_emphasis` keyword is deprecated in version 3.0 and will be ignored. + Previously the keyword was `True` by default and caused the parser to ignore + middle-word emphasis. Additionally, the optional `smart_strong` extension + provided the same behavior for strong emphasis. Both of those features are now + part of the default behavior, and the [Legacy + Emphasis](extensions/legacy_em.md) extension is available to disable that + behavior. + +* `output_formats` simplified to `html` and `xhtml`. + + The `output_formats` keyword now only accepts two options: `html` and `xhtml` + Note that if `(x)html1`, `(x)html4` or `(x)html5` are passed in, the number is + stripped and ignored. + +* `safe_mode` and `html_replacement_text` keywords deprecated + + Both `safe_mode` and the associated `html_replacement_text` keywords are + deprecated in version 3.0 and will be ignored. The so-called "safe mode" was + never actually "safe" which has resulted in many people having a false sense of + security when using it. As an alternative, the developers of Python-Markdown + recommend that any untrusted content be passed through an HTML sanitizer (like + [Bleach](https://bleach.readthedocs.io/)) after being converted to HTML by + markdown. In fact, [Bleach + Whitelist](https://github.com/yourcelf/bleach-whitelist) provides a curated list + of tags, attributes, and styles suitable for filtering user-provided HTML using + bleach. + + If your code previously looked like this: + + html = markdown.markdown(text, safe_mode=True) + + Then it is recommended that you change your code to read something like this: + + import bleach + from bleach_whitelist import markdown_tags, markdown_attrs + html = bleach.clean(markdown.markdown(text), markdown_tags, markdown_attrs) + + If you are not interested in sanitizing untrusted text, but simply desire to + escape raw HTML, then that can be accomplished through an extension which + removes HTML parsing: + + from markdown.extensions import Extension + + class EscapeHtml(Extension): + def extendMarkdown(self, md): + md.preprocessors.deregister('html_block') + md.inlinePatterns.deregister('html') + + html = markdown.markdown(text, extensions=[EscapeHtml()]) + + As the HTML would not be parsed with the above Extension, then the serializer + will escape the raw HTML, which is exactly what happened in previous versions + with `safe_mode="escape"`. + +* Positional arguments deprecated + + Positional arguments on the `markdown.Markdown()` class are deprecated as are + all except the `text` argument on the `markdown.markdown()` wrapper function. + Using positional arguments will raise an error. Only keyword arguments should be + used. For example, if your code previously looked like this: + + html = markdown.markdown(text, [SomeExtension()]) + + Then it is recommended that you change it to read something like this: + + html = markdown.markdown(text, extensions=[SomeExtension()]) + + !!! Note + This change is being made as a result of deprecating `"safe_mode"` as the + `safe_mode` argument was one of the positional arguments. When that argument + is removed, the two arguments following it will no longer be at the correct + position. It is recommended that you always use keywords when they are + supported for this reason. + +* Extension name behavior has changed + + In previous versions of Python-Markdown, the built-in extensions received + special status and did not require the full path to be provided. Additionally, + third party extensions whose name started with `"mdx_"` received the same + special treatment. This is no longer the case. + + Support has been added for extensions to define an [entry + point](extensions/api.md#entry_point). An entry point is a string name which + can be used to point to an `Extension` class. The built-in extensions now have + entry points which match the old short names. And any third-party extensions + which define entry points can now get the same behavior. See the documentation + for each specific extension to find the assigned name. + + If an extension does not define an entry point, then the full path to the + extension must be used. See the [documentation](reference.md#extensions) for + a full explanation of the current behavior. + +* Extension configuration as part of extension name deprecated + + The previously documented method of appending the extension configuration + options as a string to the extension name is deprecated and will raise an error. + The [`extension_configs`](reference.md#extension_configs) keyword should be + used instead. See the [documentation](reference.md#extension_configs) for a + full explanation of the current behavior. + +* HeaderId extension deprecated + + The HeaderId Extension is deprecated and will raise an error if specified. Use + the [Table of Contents](extensions/toc.md) Extension instead, which offers + most of the features of the HeaderId Extension and more (support for meta data + is missing). -The `enable_attributes` keyword is deprecated in version 3.0 and will be -ignored. Previously the keyword was `True` by default and enabled an -undocumented way to define attributes on document elements. The feature has been -removed from version 3.0. As most users did not use the undocumented feature, it -should not affect most users. For the few who did use the feature, it can be -enabled by using the [Legacy Attributes](extensions/legacy_attrs.md) -extension. + Extension authors who have been using the `slugify` and `unique` functions + defined in the HeaderId Extension should note that those functions are now + defined in the Table of Contents extension and should adjust their import + statements accordingly (`from markdown.extensions.toc import slugify, unique`). -#### `smart_emphasis` keyword and `smart_strong` extension deprecated - -The `smart_emphasis` keyword is deprecated in version 3.0 and will be ignored. -Previously the keyword was `True` by default and caused the parser to ignore -middle-word emphasis. Additionally, the optional `smart_strong` extension -provided the same behavior for strong emphasis. Both of those features are now -part of the default behavior, and the [Legacy -Emphasis](extensions/legacy_em.md) extension is available to disable that -behavior. - -#### `output_formats` simplified to `html` and `xhtml`. +* Homegrown `OrderedDict` has been replaced with a purpose-built `Registry` -The `output_formats` keyword now only accepts two options: `html` and `xhtml` -Note that if `(x)html1`, `(x)html4` or `(x)html5` are passed in, the number is -stripped and ignored. + All processors and patterns now get "registered" to a + [Registry](extensions/api.md#registry). A backwards compatible shim is + included so that existing simple extensions should continue to work. + A `DeprecationWarning` will be raised for any code which calls the old API. -#### `safe_mode` and `html_replacement_text` keywords deprecated - -Both `safe_mode` and the associated `html_replacement_text` keywords are -deprecated in version 3.0 and will be ignored. The so-called "safe mode" was -never actually "safe" which has resulted in many people having a false sense of -security when using it. As an alternative, the developers of Python-Markdown -recommend that any untrusted content be passed through an HTML sanitizer (like -[Bleach](https://bleach.readthedocs.io/)) after being converted to HTML by -markdown. In fact, [Bleach -Whitelist](https://github.com/yourcelf/bleach-whitelist) provides a curated list -of tags, attributes, and styles suitable for filtering user-provided HTML using -bleach. - -If your code previously looked like this: +* Markdown class instance references. -```python -html = markdown.markdown(text, safe_mode=True) -``` + Previously, instances of the `Markdown` class were represented as any one of + `md`, `md_instance`, or `markdown`. This inconsistency made it difficult when + developing extensions, or just maintaining the existing code. Now, all instances + are consistently represented as `md`. -Then it is recommended that you change your code to read something like this: - -```python -import bleach -from bleach_whitelist import markdown_tags, markdown_attrs -html = bleach.clean(markdown.markdown(text), markdown_tags, markdown_attrs) -``` + The old attributes on class instances still exist, but raise a + `DeprecationWarning` when accessed. Also on classes where the instance was + optional, the attribute always exists now and is simply `None` if no instance + was provided (previously the attribute would not exist). -If you are not interested in sanitizing untrusted text, but simply desire to -escape raw HTML, then that can be accomplished through an extension which -removes HTML parsing: +* `markdown.util.isBlockLevel` deprecated -```python -from markdown.extensions import Extension - -class EscapeHtml(Extension): - def extendMarkdown(self, md): - md.preprocessors.deregister('html_block') - md.inlinePatterns.deregister('html') + The `markdown.util.isBlockLevel` function is deprecated and will raise a + `DeprecationWarning`. Instead, extensions should use the `isBlockLevel` method + of the `Markdown` class instance. Additionally, a list of block level elements + is defined in the `block_level_elements` attribute of the `Markdown` class which + extensions can access to alter the list of elements which are treated as block + level elements. -html = markdown.markdown(text, extensions=[EscapeHtml()]) -``` - -As the HTML would not be parsed with the above Extension, then the serializer -will escape the raw HTML, which is exactly what happened in previous versions -with `safe_mode="escape"`. - -#### Positional arguments deprecated +* `md_globals` keyword deprecated from extension API -Positional arguments on the `markdown.Markdown()` class are deprecated as are -all except the `text` argument on the `markdown.markdown()` wrapper function. -Using positional arguments will raise an error. Only keyword arguments should be -used. For example, if your code previously looked like this: - -```python -html = markdown.markdown(text, [SomeExtension()]) -``` - -Then it is recommended that you change it to read something like this: - -```python -html = markdown.markdown(text, extensions=[SomeExtension()]) -``` - -!!! Note - This change is being made as a result of deprecating `"safe_mode"` as the - `safe_mode` argument was one of the positional arguments. When that argument - is removed, the two arguments following it will no longer be at the correct - position. It is recommended that you always use keywords when they are - supported for this reason. - -#### Extension name behavior has changed - -In previous versions of Python-Markdown, the built-in extensions received -special status and did not require the full path to be provided. Additionally, -third party extensions whose name started with `"mdx_"` received the same -special treatment. This is no longer the case. - -Support has been added for extensions to define an [entry -point](extensions/api.md#entry_point). An entry point is a string name which -can be used to point to an `Extension` class. The built-in extensions now have -entry points which match the old short names. And any third-party extensions -which define entry points can now get the same behavior. See the documentation -for each specific extension to find the assigned name. - -If an extension does not define an entry point, then the full path to the -extension must be used. See the [documentation](reference.md#extensions) for -a full explanation of the current behavior. - -#### Extension configuration as part of extension name deprecated - -The previously documented method of appending the extension configuration -options as a string to the extension name is deprecated and will raise an error. -The [`extension_configs`](reference.md#extension_configs) keyword should be -used instead. See the [documentation](reference.md#extension_configs) for a -full explanation of the current behavior. - -#### HeaderId extension deprecated - -The HeaderId Extension is deprecated and will raise an error if specified. Use -the [Table of Contents](extensions/toc.md) Extension instead, which offers -most of the features of the HeaderId Extension and more (support for meta data -is missing). - -Extension authors who have been using the `slugify` and `unique` functions -defined in the HeaderId Extension should note that those functions are now -defined in the Table of Contents extension and should adjust their import -statements accordingly (`from markdown.extensions.toc import slugify, unique`). - -#### Homegrown `OrderedDict` has been replaced with a purpose-built `Registry` + Previously, the `extendMarkdown` method of a `markdown.extensions.Extension` + subclasses accepted an `md_globals` keyword, which contained the value returned + by Python's `globals()` built-in function. As all of the configuration is now + held within the `Markdown` class instance, access to the globals is no longer + necessary and any extensions which expect the keyword will raise a + `DeprecationWarning`. A future release will raise an error. -All processors and patterns now get "registered" to a -[Registry](extensions/api.md#registry). A backwards compatible shim is -included so that existing simple extensions should continue to work. -A `DeprecationWarning` will be raised for any code which calls the old API. +* `markdown.version` and `markdown.version_info` deprecated -#### Markdown class instance references. - -Previously, instances of the `Markdown` class were represented as any one of -`md`, `md_instance`, or `markdown`. This inconsistency made it difficult when -developing extensions, or just maintaining the existing code. Now, all instances -are consistently represented as `md`. - -The old attributes on class instances still exist, but raise a -`DeprecationWarning` when accessed. Also on classes where the instance was -optional, the attribute always exists now and is simply `None` if no instance -was provided (previously the attribute would not exist). + Historically, version numbers were acquired via the attributes + `markdown.version` and `markdown.version_info`. Moving forward, a more + standardized approach is being followed and versions are acquired via the + `markdown.__version__` and `markdown.__version_info__` attributes. The legacy + attributes are still available to allow distinguishing versions between the + legacy Markdown 2.0 series and the Markdown 3.0 series, but in the future the + legacy attributes will be removed. -#### `markdown.util.isBlockLevel` deprecated - -The `markdown.util.isBlockLevel` function is deprecated and will raise a -`DeprecationWarning`. Instead, extensions should use the `isBlockLevel` method -of the `Markdown` class instance. Additionally, a list of block level elements -is defined in the `block_level_elements` attribute of the `Markdown` class which -extensions can access to alter the list of elements which are treated as block -level elements. - -#### `md_globals` keyword deprecated from extension API +* Added new, more flexible `InlineProcessor` class -Previously, the `extendMarkdown` method of a `markdown.extensions.Extension` -subclasses accepted an `md_globals` keyword, which contained the value returned -by Python's `globals()` built-in function. As all of the configuration is now -held within the `Markdown` class instance, access to the globals is no longer -necessary and any extensions which expect the keyword will raise a -`DeprecationWarning`. A future release will raise an error. - -#### `markdown.version` and `markdown.version_info` deprecated - -Historically, version numbers were acquired via the attributes -`markdown.version` and `markdown.version_info`. Moving forward, a more -standardized approach is being followed and versions are acquired via the -`markdown.__version__` and `markdown.__version_info__` attributes. The legacy -attributes are still available to allow distinguishing versions between the -legacy Markdown 2.0 series and the Markdown 3.0 series, but in the future the -legacy attributes will be removed. - -#### Added new, more flexible `InlineProcessor` class - -A new `InlineProcessor` class handles inline processing much better and allows -for more flexibility. The new `InlineProcessor` classes no longer utilize -unnecessary pretext and post-text captures. New class can accept the buffer that -is being worked on and manually process the text without regular expressions and -return new replacement bounds. This helps us to handle links in a better way and -handle nested brackets and logic that is too much for regular expression. + A new `InlineProcessor` class handles inline processing much better and allows + for more flexibility. The new `InlineProcessor` classes no longer utilize + unnecessary pretext and post-text captures. New class can accept the buffer that + is being worked on and manually process the text without regular expressions and + return new replacement bounds. This helps us to handle links in a better way and + handle nested brackets and logic that is too much for regular expression. ### Added @@ -825,7 +811,7 @@ handle nested brackets and logic that is too much for regular expression. * Additional CSS class names can be appended to [Admonitions](extensions/admonition.md). -## Previous Releases +# Previous Releases For information on prior releases, see their changelogs: From e6b71632726c049ab9a7a1b42e816a67f422064b Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 9 Apr 2025 13:17:32 -0400 Subject: [PATCH 09/24] Update deploy workflow to normalize version --- .github/workflows/deploy.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 342ea1711..37ee5df1d 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: python-version: 3.11 - name: Install dependencies run: | - python -m pip install --upgrade pip setuptools wheel build + python -m pip install --upgrade pip setuptools wheel build semver - name: Build run: | python -m build @@ -27,19 +27,22 @@ jobs: with: user: __token__ password: ${{ secrets.PYPI_PASSWORD }} + - name: Normalize Version + id: normalize + run: echo "version=$(python -c 'import semver; print(semver.Version.parse("${{ github.ref_name }}", optional_minor_and_patch=True))')" >> $GITHUB_OUTPUT - name: Get Changelog Entry if: success() id: changelog_reader uses: mindsers/changelog-reader-action@v2 with: - version: ${{ github.ref_name }} + version: ${{ steps.normalize.outputs.version }} path: ./docs/changelog.md - name: Release to GitHub if: success() uses: ncipollo/release-action@v1 with: - tag: ${{ steps.changelog_reader.outputs.version }} - name: Release ${{ steps.changelog_reader.outputs.version }} + tag: ${{ github.ref_name }} + name: Release ${{ github.ref_name }} body: ${{ steps.changelog_reader.outputs.changes }} artifacts: dist/** prerelease: ${{ steps.changelog_reader.outputs.status == 'prereleased' }} From b34e1d03387be771aa626241fe56f8f0c34243f2 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 9 Apr 2025 13:31:31 -0400 Subject: [PATCH 10/24] Bump version to 3.8 --- docs/changelog.md | 2 +- markdown/__meta__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 272ca0f54..930199fe3 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [Contributing Guide](contributing.md) for details. -## [Unreleased] +## [3.8.0] - 2025-04-09 ### Changed diff --git a/markdown/__meta__.py b/markdown/__meta__.py index 640d23fb5..78b470ea4 100644 --- a/markdown/__meta__.py +++ b/markdown/__meta__.py @@ -28,7 +28,7 @@ from __future__ import annotations -__version_info__ = (3, 7, 0, 'final', 0) +__version_info__ = (3, 8, 0, 'final', 0) def _get_version(version_info): From 513de8a0db81c840f917488af7078a45f74542bf Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Thu, 10 Apr 2025 10:23:57 +0300 Subject: [PATCH 11/24] Update pypa/gh-action-pypi-publish to v1.12.4 This is the first tag with support for metadata format 2.4. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 37ee5df1d..814e1bed7 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -23,7 +23,7 @@ jobs: python -m build - name: Publish to PyPI if: success() - uses: pypa/gh-action-pypi-publish@v1.1.0 + uses: pypa/gh-action-pypi-publish@v1.12.4 with: user: __token__ password: ${{ secrets.PYPI_PASSWORD }} From f2b9fd10f1fc7b0683f226e6a82da8e8375bccf5 Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Mon, 21 Apr 2025 13:57:02 -0600 Subject: [PATCH 12/24] Ensure `md_in_html` does not drop content Fixes #1526. Co-authored-by: Dmitry Shachnev --- docs/changelog.md | 6 ++++++ markdown/extensions/md_in_html.py | 6 +++++- .../test_syntax/extensions/test_md_in_html.py | 21 +++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 930199fe3..7c96a6f46 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -9,6 +9,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [Contributing Guide](contributing.md) for details. +## [Unreleased] + +### Fixed + +* Fixed dropped content in `md_in_html` (#1526). + ## [3.8.0] - 2025-04-09 ### Changed diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index d1fbd7af5..ba73c9425 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -387,12 +387,16 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool: element = self.parser.md.htmlStash.rawHtmlBlocks[index] if isinstance(element, etree.Element): # We have a matched element. Process it. - blocks.pop(0) + block = blocks.pop(0) parent.append(element) self.parse_element_content(element) # Cleanup stash. Replace element with empty string to avoid confusing postprocessor. self.parser.md.htmlStash.rawHtmlBlocks.pop(index) self.parser.md.htmlStash.rawHtmlBlocks.insert(index, '') + content = block[m.end(0):] + # Ensure the rest of the content gets handled + if content: + blocks.insert(0, content) # Confirm the match to the `blockparser`. return True # No match found. diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py index 1bdca393d..5ef860d56 100644 --- a/tests/test_syntax/extensions/test_md_in_html.py +++ b/tests/test_syntax/extensions/test_md_in_html.py @@ -1517,6 +1517,27 @@ def test_md1_code_cdata(self): extensions=['md_in_html'] ) + def test_trailing_content_after_tag_in_md_block(self): + + # It should be noted that this is not the way `md_in_html` is intended to be used. + # What we are specifically testing is an edge case where content was previously lost. + # Lost content should not happen. + self.assertMarkdownRenders( + self.dedent( + """ +
+
AAAAA
+
+ """ + ), + '
\n' + '
\n' + '

AAAAA

\n' + '
\n' + '
', + extensions=['md_in_html'] + ) + def load_tests(loader, tests, pattern): """ Ensure `TestHTMLBlocks` doesn't get run twice by excluding it here. """ From 64a3c0fbc00327fbfee1fd6b44da0e5453287fe4 Mon Sep 17 00:00:00 2001 From: facelessuser Date: Sat, 26 Apr 2025 09:02:09 -0600 Subject: [PATCH 13/24] Fix HTML handling of `` Fixes #1528 --- docs/changelog.md | 1 + markdown/htmlparser.py | 9 +++++++++ tests/test_syntax/blocks/test_html_blocks.py | 18 ++++++++++++++++++ .../test_syntax/extensions/test_md_in_html.py | 18 ++++++++++++++++++ tests/test_syntax/inline/test_code.py | 17 +++++++++++++++++ tests/test_syntax/inline/test_raw_html.py | 3 +++ 6 files changed, 66 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 7c96a6f46..9f972a35b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -14,6 +14,7 @@ See the [Contributing Guide](contributing.md) for details. ### Fixed * Fixed dropped content in `md_in_html` (#1526). +* Fixed HTML handling corner case that prevented some content from not being rendered (#1528). ## [3.8.0] - 2025-04-09 diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 33b918d54..1528325c5 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -41,6 +41,10 @@ spec.loader.exec_module(htmlparser) sys.modules['htmlparser'] = htmlparser +# This is a hack. We are sneaking in `` so we can capture it without the HTML parser +# throwing it away. When we see it, we will process it as data. +htmlparser.starttagopen = re.compile('<[a-zA-Z]|') + # Monkeypatch `HTMLParser` to only accept `?>` to close Processing Instructions. htmlparser.piclose = re.compile(r'\?>') # Monkeypatch `HTMLParser` to only recognize entity references with a closing semicolon. @@ -297,6 +301,11 @@ def get_starttag_text(self) -> str: return self.__starttag_text def parse_starttag(self, i: int) -> int: # pragma: no cover + # Treat `` as normal data as it is not a real tag. + if self.rawdata[i:i + 3] == '': + self.handle_data(self.rawdata[i:i + 3]) + return i + 3 + self.__starttag_text = None endpos = self.check_for_whole_start_tag(i) if endpos < 0: diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py index 33375d3ad..8ea3904c0 100644 --- a/tests/test_syntax/blocks/test_html_blocks.py +++ b/tests/test_syntax/blocks/test_html_blocks.py @@ -1643,3 +1643,21 @@ def test_placeholder_in_source(self): placeholder = md.htmlStash.get_placeholder(md.htmlStash.html_counter + 1) result = md.postprocessors['raw_html'].run(placeholder) self.assertEqual(placeholder, result) + + def test_noname_tag(self): + self.assertMarkdownRenders( + self.dedent( + """ +
+ +
+ """ + ), + self.dedent( + """ +
+ +
+ """ + ) + ) diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py index 5ef860d56..903766fca 100644 --- a/tests/test_syntax/extensions/test_md_in_html.py +++ b/tests/test_syntax/extensions/test_md_in_html.py @@ -1538,6 +1538,24 @@ def test_trailing_content_after_tag_in_md_block(self): extensions=['md_in_html'] ) + def test_noname_tag(self): + self.assertMarkdownRenders( + self.dedent( + """ +
+ +
+ """ + ), + self.dedent( + """ +
+

</>

+
+ """ + ) + ) + def load_tests(loader, tests, pattern): """ Ensure `TestHTMLBlocks` doesn't get run twice by excluding it here. """ diff --git a/tests/test_syntax/inline/test_code.py b/tests/test_syntax/inline/test_code.py index 9fc379008..c7ccc4bf2 100644 --- a/tests/test_syntax/inline/test_code.py +++ b/tests/test_syntax/inline/test_code.py @@ -62,3 +62,20 @@ def test_code_html(self): """ ) ) + + def test_noname_tag(self): + # Browsers ignore ``, but a Markdown parser should not, and should treat it as data + # but not a tag. + + self.assertMarkdownRenders( + self.dedent( + """ + `` + """ + ), + self.dedent( + """ +

</>

+ """ + ) + ) diff --git a/tests/test_syntax/inline/test_raw_html.py b/tests/test_syntax/inline/test_raw_html.py index f165f750a..57d725e7e 100644 --- a/tests/test_syntax/inline/test_raw_html.py +++ b/tests/test_syntax/inline/test_raw_html.py @@ -31,3 +31,6 @@ def test_inline_html_angle_brackets(self): def test_inline_html_backslashes(self): self.assertMarkdownRenders('', '

') + + def test_noname_tag(self): + self.assertMarkdownRenders('', '

</>

') From 820721485c928c6f97f3d74f37afb6d2450aef9e Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 18 Jun 2025 10:29:03 -0400 Subject: [PATCH 14/24] Ensure incomplete markup declaration in raw HTML doesn't crash parser. See Python bug report at gh-77057 for details. Until we drop support for Python < 3.13 (where this was fixed upstream), we need to avoid the unwanted error by checking for it explicitly. Fixes #1534. --- docs/changelog.md | 1 + markdown/extensions/md_in_html.py | 4 ++++ markdown/htmlparser.py | 4 ++++ tests/test_syntax/blocks/test_html_blocks.py | 7 +++++++ 4 files changed, 16 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 9f972a35b..0b00d9df7 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -13,6 +13,7 @@ See the [Contributing Guide](contributing.md) for details. ### Fixed +* Ensure incomplete markup declaration in raw HTML doesn't crash parser (#1534). * Fixed dropped content in `md_in_html` (#1526). * Fixed HTML handling corner case that prevented some content from not being rendered (#1528). diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index ba73c9425..5256e9046 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -280,6 +280,10 @@ def parse_pi(self, i: int) -> int: def parse_html_declaration(self, i: int) -> int: if self.at_line_start() or self.intail or self.mdstack: + if self.rawdata[i:i+3] == ' int: def parse_html_declaration(self, i: int) -> int: if self.at_line_start() or self.intail: + if self.rawdata[i:i+3] == '<![

' + ) + def test_raw_cdata_code_span(self): self.assertMarkdownRenders( self.dedent( From 3870f20ba1dd214609b380e921dffc5f115730f5 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 18 Jun 2025 10:37:55 -0400 Subject: [PATCH 15/24] Bump version to 3.8.1 --- docs/changelog.md | 2 +- markdown/__meta__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 0b00d9df7..7b79973ee 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [Contributing Guide](contributing.md) for details. -## [Unreleased] +## [3.8.1] - 2025-06-18 ### Fixed diff --git a/markdown/__meta__.py b/markdown/__meta__.py index 78b470ea4..476a19044 100644 --- a/markdown/__meta__.py +++ b/markdown/__meta__.py @@ -28,7 +28,7 @@ from __future__ import annotations -__version_info__ = (3, 8, 0, 'final', 0) +__version_info__ = (3, 8, 1, 'final', 0) def _get_version(version_info): From 3bb9d42b93dae519d4f5a6eea970a571232e05e2 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 18 Jun 2025 15:17:56 -0400 Subject: [PATCH 16/24] Update documentation for release process --- docs/contributing.md | 62 ++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index c48080752..704b48014 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -474,24 +474,37 @@ following steps: 3. Update the version defined in [`markdown/__meta__.py`][markdown/__meta__.py]. -4. Build a local copy of the documentation, browse through the pages and +4. Build a local copy of the documentation and browse through the pages to confirm that no obvious issues exist with the documentation. -5. Create a pull request with a commit message in the following format: +5. Create a pull request labeled `release`{ .label .release } with a commit + message in the following format: Bump version to X.X.X + !!! note + + For the checks to run properly the `release`{ .label .release } label + must be set on GitHub before creating the pull request. If the label + is added to the pull request later, additional changes will need to + be pushed to the pull request for the checks to acknowledge the + label.The relevant check verifies that the version defined in the + code matches the latest version in the changelog and that the + changelog no longer lists an `unreleased` entry. This check is + nessecary to ensure deployment will not fail later. + 6. After all checks have passed, merge the pull request. -7. Create a git tag with the new version as the tag name and push to the - [Python-Markdown/markdown] repository. The new tag should trigger a GitHub - workflow which will automatically deploy the release to PyPI and update the - documentation. +7. Create a git tag with the new version as the tag name (in the format X.X.X + with no prefixes or sufixes) and push to the [Python-Markdown/markdown] + repository. The new tag should trigger a GitHub workflow which will + automatically deploy the release to PyPI and update the documentation. In the event that the deployment fails, the following steps can be taken to deploy manually: - - Deploy the release to [PyPI] with the command `make deploy`. + - Deploy the release to [PyPI] with the command `make deploy` (a valid + authentication token will need to be provided). - Deploy an update to the documentation using [MkDocs]. The following example assumes that local clones of the [Python-Markdown/markdown] and @@ -504,7 +517,7 @@ following steps: ## Issue and Pull Request Labels -Below are the labels used to track and manages issues and pull requests. The +Below are the labels used to track and manage issues and pull requests. The labels are loosely grouped by their purpose, but it is not necessary for every issue to have a label from every group, and an issue may have more than one label from the same group. @@ -551,6 +564,10 @@ label from the same group. | `approved`{ .label .approved } | The pull request is ready to be merged. | | `rejected`{ .label .rejected } | The pull request is rejected for the stated reasons. | +One additional label exists named `release`{ .label .release }. This label should only be +assigned to pull requests which bump the version. See the [Release Process](#release-process) +for details. + [Python-Markdown Organization]: https://github.com/Python-Markdown [Python-Markdown Code of Conduct]: https://github.com/Python-Markdown/markdown/blob/master/CODE_OF_CONDUCT.md [Python-Markdown/markdown]: https://github.com/Python-Markdown/markdown @@ -596,41 +613,46 @@ label from the same group. font-weight: 600; line-height: 15px; display: inline-block; - padding: 4px 6px; + padding: 0 8px; + margin: 4px 0; + border-radius: 999px; } code.bug { - background-color: #c45b46; + background-color: #c45b46 !important; } code.feature { - background-color: #7b17d8; + background-color: #7b17d8 !important; color: #ffffff; } code.support { - background-color: #efbe62; + background-color: #efbe62 !important; } code.process { - background-color: #eec9ff; + background-color: #eec9ff !important; } code.core { - background-color: #0b02e1; + background-color: #0b02e1 !important; color: #ffffff; } code.extension { - background-color: #709ad8; + background-color: #709ad8 !important; } code.docs { - background-color: #b2ffeb; + background-color: #b2ffeb !important; } code.approved { - background-color: #beed6d; + background-color: #beed6d !important; } code.low { - background-color: #dddddd; + background-color: #dddddd !important; } code.pending { - background-color: #f0f49a; + background-color: #f0f49a !important; } code.rejected { - background-color: #f7c7be; + background-color: #f7c7be !important; + } + code.release { + background-color: #d4c5f9 !important; } From 3561310d30dac10c47f0b9fc404b167fc65331b0 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 18 Jun 2025 15:41:26 -0400 Subject: [PATCH 17/24] Properly document version specification. The docs previously incorrectly linked to the Semantic Versioning specification. However, this project has never used that specification. Instead it uses the Python Version Specification which was originally defined in PEP 440. The specification explicitly identifies differences between the two (See https://packaging.python.org/en/latest/specifications/version-specifiers/#semantic-versioning). --- docs/changelog.md | 5 ++++- docs/contributing.md | 39 ++++++++++++++++++++------------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 7b79973ee..9770c47ac 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -6,9 +6,12 @@ toc_depth: 2 All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +and this project adheres to the +[Python Version Specification]: https://packaging.python.org/en/latest/specifications/version-specifiers/. See the [Contributing Guide](contributing.md) for details. +## [unreleased] + ## [3.8.1] - 2025-06-18 ### Fixed diff --git a/docs/contributing.md b/docs/contributing.md index 704b48014..7927847f0 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -23,7 +23,7 @@ The [Python-Markdown/markdown] project is organized as follows: * Branch `master` should generally be stable and release-ready at all times. * Version branches should be used for bug-fixes back-ported to the most recent - PATCH release. + MICRO release. * No other branches should be created. Any other branches which exist are preserved for historical reasons only. @@ -256,8 +256,8 @@ that version, or is otherwise expressly deemed appropriate by the project maintainers. The current changelog should only document the changes for one MAJOR release and -its various MINOR and PATCH releases (see [Versions](#versions) for an -explanation of MAJOR, MINOR, and PATCH releases). Older versions from previous +its various MINOR and MICRO releases (see [Versions](#versions) for an +explanation of MAJOR, MINOR, and MICRO releases). Older versions from previous series of releases can be found in the archive at `docs/change_log/` and may follow a different format. Note that the archived changelogs are not in the site navigation and are only linked from the [Previous @@ -416,22 +416,23 @@ with no arguments. See help (`tox -h`) for more options. ## Versions -Python-Markdown follows [Semantic Versioning] and uses the -`MAJOR.MINOR.PATCH[.dev#|a#|b#|rc#]` format for identifying releases. The status -of the `master` branch should always be identified in the `__version_info__` -tuple defined in [`markdown/__meta__.py`][markdown/__meta__.py]. The contents of -that tuple will automatically be converted into a normalized version which -conforms to [PEP 440]. Each time the version is changed, the continuous -integration server will run a test to ensure that the current version is in a -valid normalized format. +Python-Markdown follows the [Python Version Specification] (originally defined +in [PEP 440]) and uses the `MAJOR.MINOR.MICRO[.dev#|a#|b#|rc#]` format for +identifying releases. The status of the `master` branch should always be +identified in the `__version_info__` tuple defined in[`markdown/__meta__.py`] +[markdown/__meta__.py]. The contents of that tuple will automatically be +converted into a normalized version string which conforms to the +[Python Version Specification]. Each time the version is changed, the +continuous integration server will run a test to ensure that the current +version is in a valid normalized format. ### Version Status A MAJOR version is in development status when the MINOR version is `0`, the -PATCH version is `0`, and the version includes a `dev` segment. +MICRO version is `0`, and the version includes a `dev` segment. A MINOR version is in development status when the MINOR version is not `0`, the -PATCH version is `0`, and the version includes a `dev` segment. +MICRO version is `0`, and the version includes a `dev` segment. At all other times, the code is considered stable and release-ready. @@ -446,7 +447,7 @@ failed prior to the change. New features and backward incompatible changes may only be merged to the `master` branch when the MAJOR and/or MINOR version is in development status -pursuant to [Semantic Versioning]. +pursuant to the [Python Version Specification]. A separate commit to the `master` branch should be made to bump up the MAJOR and/or MINOR version and set development status. Only then will any pull @@ -459,7 +460,7 @@ request back-porting the fix made against that branch. The version branch should be named with the most recently released MINOR version. For example, if the `master` branch is at `3.1.dev0` and the most recent MINOR release was `3.0.4`, then the version branch would be named `3.0` and any releases from that branch -would increment the PATCH version only (`3.0.5`, `3.0.6`...). +would increment the MICRO version only (`3.0.5`, `3.0.6`...). ## Release Process @@ -491,12 +492,12 @@ following steps: label.The relevant check verifies that the version defined in the code matches the latest version in the changelog and that the changelog no longer lists an `unreleased` entry. This check is - nessecary to ensure deployment will not fail later. + necessary to ensure deployment will not fail later. 6. After all checks have passed, merge the pull request. 7. Create a git tag with the new version as the tag name (in the format X.X.X - with no prefixes or sufixes) and push to the [Python-Markdown/markdown] + with no prefixes or suffixes) and push to the [Python-Markdown/markdown] repository. The new tag should trigger a GitHub workflow which will automatically deploy the release to PyPI and update the documentation. @@ -599,8 +600,8 @@ for details. [tox]: https://tox.readthedocs.io/en/latest/ [aspell]: http://aspell.net/ [test tools]: test_tools.md -[Semantic Versioning]: https://semver.org/spec/v2.0.0.html -[markdown/__meta__.py]: https://github.com/Python-Markdown/markdown/blob/master/markdown/__meta__.py#L29 +[Python Version Specification]: https://packaging.python.org/en/latest/specifications/version-specifiers/ +[markdown/__meta__.py]: https://github.com/Python-Markdown/markdown/blob/master/markdown/__meta__.py#L31 [PEP 440]: https://www.python.org/dev/peps/pep-0440/ [PyPI]: https://pypi.org/project/Markdown/ [Python-Markdown/Python-Markdown.github.io]: https://github.com/Python-Markdown/Python-Markdown.github.io From 9980cb5b27b07ff48283178d98213e41543701ec Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Thu, 19 Jun 2025 09:46:13 -0600 Subject: [PATCH 18/24] Fixes for Python 3.14 - Fix codecs deprecation - Fix issue with unclosed ` int: if self.rawdata[i:i+3] == '': + self.handle_data('<') + self.override_comment_update = True + return self.handle_empty_tag(''.format(data), is_block=True) + def updatepos(self, i: int, j: int) -> int: + if self.override_comment_update: + self.override_comment_update = False + i = 0 + j = 1 + return super().updatepos(i, j) + def handle_decl(self, data: str): self.handle_empty_tag(''.format(data), is_block=True) @@ -278,7 +293,11 @@ def parse_html_declaration(self, i: int) -> int: if self.rawdata[i:i+3] == ' int: # pragma: no cover self.__starttag_text = None endpos = self.check_for_whole_start_tag(i) if endpos < 0: - return endpos + self.handle_data(self.rawdata[i:i + 1]) + return i + 1 rawdata = self.rawdata self.__starttag_text = rawdata[i:endpos] diff --git a/tox.ini b/tox.ini index 2cea38e38..7bc4f8db4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{39, 310, 311, 312, 313}, pypy{39, 310}, pygments, flake8, checkspelling, pep517check, checklinks +envlist = py{39, 310, 311, 312, 313, py314}, pypy{39, 310}, pygments, flake8, checkspelling, pep517check, checklinks isolated_build = True [testenv] From d9c8431e404d614812e39a11109afbe9981bba13 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 19 Jun 2025 11:59:46 -0400 Subject: [PATCH 19/24] Bump version to 3.8.2 --- docs/changelog.md | 2 +- markdown/__meta__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 60dc3ea66..aa5ca6e25 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,7 +10,7 @@ and this project adheres to the [Python Version Specification]: https://packaging.python.org/en/latest/specifications/version-specifiers/. See the [Contributing Guide](contributing.md) for details. -## [Unreleased] +## [3.8.2] - 2025-06-19 ### Fixed diff --git a/markdown/__meta__.py b/markdown/__meta__.py index 476a19044..c5302997a 100644 --- a/markdown/__meta__.py +++ b/markdown/__meta__.py @@ -28,7 +28,7 @@ from __future__ import annotations -__version_info__ = (3, 8, 1, 'final', 0) +__version_info__ = (3, 8, 2, 'final', 0) def _get_version(version_info): From 4669a09894d4a35cd5f5d2106b0da95e48d1a3f9 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 19 Jun 2025 13:18:24 -0400 Subject: [PATCH 20/24] fix typo --- docs/changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index aa5ca6e25..d81e66ae3 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,7 +7,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to the -[Python Version Specification]: https://packaging.python.org/en/latest/specifications/version-specifiers/. +[Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/). See the [Contributing Guide](contributing.md) for details. ## [3.8.2] - 2025-06-19 From 23c301de28e12426408656efdfa153b11d4ff558 Mon Sep 17 00:00:00 2001 From: facelessuser Date: Mon, 21 Jul 2025 08:56:43 -0600 Subject: [PATCH 21/24] Fix failing cases for Python 3.14 New change requires us to monkey patch `locatetagend` to prevent capturing incomplete tags in code spans. --- docs/changelog.md | 6 ++++++ markdown/htmlparser.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index d81e66ae3..3d54e6e2f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,6 +10,12 @@ and this project adheres to the [Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/). See the [Contributing Guide](contributing.md) for details. +## [Unreleased] + +### Fixed + +* Fix handling of incomplete HTML tags in code spans in Python 3.14. + ## [3.8.2] - 2025-06-19 ### Fixed diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 478e70216..63e5df31b 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -69,6 +69,20 @@ )? \s* # trailing whitespace """, re.VERBOSE) +htmlparser.locatetagend = re.compile(r""" + [a-zA-Z][^`\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:= # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? +""", re.VERBOSE) # Match a blank line at the start of a block of text (two newlines). # The newlines may be preceded by additional whitespace. From 07bf2076623be5de9952e1f35bfb8c218b699300 Mon Sep 17 00:00:00 2001 From: Anders Eskildsen <22001464+aeskildsen@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:38:41 +0200 Subject: [PATCH 22/24] Order footnotes by reference * Alter footnote ordering so footnotes definitions are listed in the order in which their references appear in the document. * Add config option, USE_DEFINITION_ORDER, to support previous behavior. * Add comprehensive tests for extension. * Change to the behavior of inlinepatterns by ensuring that inlinepatterns iterate through elements in document order. Previously, in some specific case, elements with nested children had their inline content parsed in reverse order. Resolves #1367. --- docs/changelog.md | 8 + docs/extensions/footnotes.md | 52 ++- markdown/extensions/footnotes.py | 78 +++- markdown/treeprocessors.py | 2 +- .../test_syntax/extensions/test_footnotes.py | 339 ++++++++++++++++++ 5 files changed, 452 insertions(+), 27 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 3d54e6e2f..7afa81bb2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -12,8 +12,16 @@ See the [Contributing Guide](contributing.md) for details. ## [Unreleased] +### Changed + +* Footnotes are now ordered by the occurrence of their references in the + document. A new configuration option for the footnotes extension, + `USE_DEFINITION_ORDER`, has been added to support restoring the previous + behavior of ordering footnotes by the occurrence of definitions. + ### Fixed +* Ensure inline processing iterates through elements in document order. * Fix handling of incomplete HTML tags in code spans in Python 3.14. ## [3.8.2] - 2025-06-19 diff --git a/docs/extensions/footnotes.md b/docs/extensions/footnotes.md index e841a324d..7d033478f 100644 --- a/docs/extensions/footnotes.md +++ b/docs/extensions/footnotes.md @@ -24,26 +24,33 @@ the output. Example: ```md -Footnotes[^1] have a label[^@#$%] and the footnote's content. +Footnotes have a name, a reference[^1], and a definition[^word]. -[^1]: This is a footnote content. -[^@#$%]: A footnote on the label: "@#$%". +[^1]: This is a footnote definition. +[^word]: A footnote with the name "word". ``` -A footnote label must start with a caret `^` and may contain any inline text -(including spaces) between a set of square brackets `[]`. Only the first -caret has any special meaning. - -A footnote content must start with the label followed by a colon and at least -one space. The label used to define the content must exactly match the label used -in the body (including capitalization and white space). The content would then -follow the label either on the same line or on the next line. The content may -contain multiple lines, paragraphs, code blocks, blockquotes and most any other -markdown syntax. The additional lines must be indented one level (four spaces or -one tab). - -When working with multiple blocks, it may be helpful to start the content on a -separate line from the label which defines the content. This way the entire block +A **footnote name** is a string that uniquely identifies a footnote within the +document. It may contain any character which is valid for an HTML id attribute +(including spaces). Examples: `1` in `[^1]`, `word` in `[^word]`, +and `@#$%` in `[^@#$%]`. + +A **footnote reference** is a link within the text body to a footnote definition. +A footnote reference contains the footnote name prefixed by a caret `^` and enclosed +in square brackets `[]`. Examples: `[^1]` and `[^@#$%]`. In the output, footnote +references are replaced by a superscript number that links to the footnote definition. + +A **footnote definition** must start with the corresponding footnote reference +followed by a colon and at least one space. The reference must exactly match +the reference used in the body (including capitalization and white space). +The content of the definition would then follow either on the same line +(`[^1]: This is a footnote definition.`) or on the next line. +Footnote definitions may contain multiple lines, paragraphs, code blocks, +blockquotes and most any other markdown syntax. The additional lines must be +indented one level (four spaces or one tab). + +When working with multiple blocks, it may be helpful to start the definition on a +separate line from the reference which defines the content. This way the entire block is indented consistently and any errors are more easily discernible by the author. ```md @@ -98,6 +105,15 @@ The following options are provided to configure the output: * **`SEPARATOR`**: The text string used to set the footnote separator. Defaults to `:`. +* **`USE_DEFINITION_ORDER`**: + Whether to order footnotes by the occurrence of footnote definitions + in the document. Defaults to `False`. + + Introduced in version 3.9.0, this option allows footnotes to be ordered + by the occurrence of their definitions in the document, rather than by the + order of their references in the text. This was the behavior of + previous versions of the extension. + A trivial example: ```python @@ -109,7 +125,7 @@ Resetting Instance State Footnote definitions are stored within the `markdown.Markdown` class instance between multiple runs of the class. This allows footnotes from all runs to be included in -output, with links and references that are unique, even though the class has been +output, with links and references that are unique, even though the class has been called multiple times. However, if needed, the definitions can be cleared between runs by calling `reset`. diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 30c081138..13ecf7c22 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -33,6 +33,7 @@ FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX RE_REF_ID = re.compile(r'(fnref)(\d+)') +RE_REFERENCE = re.compile(r'(? None: """ Clear footnotes on reset, and prepare for distinct document. """ + self.footnote_order: list[str] = [] self.footnotes: OrderedDict[str, str] = OrderedDict() self.unique_prefix += 1 self.found_refs = {} @@ -150,6 +164,11 @@ def setFootnote(self, id: str, text: str) -> None: """ Store a footnote for later retrieval. """ self.footnotes[id] = text + def addFootnoteRef(self, id: str) -> None: + """ Store a footnote reference id in order of appearance. """ + if id not in self.footnote_order: + self.footnote_order.append(id) + def get_separator(self) -> str: """ Get the footnote separator. """ return self.getConfig("SEPARATOR") @@ -180,9 +199,6 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: ol = etree.SubElement(div, "ol") surrogate_parent = etree.Element("div") - # Backward compatibility with old '%d' placeholder - backlink_title = self.getConfig("BACKLINK_TITLE").replace("%d", "{}") - for index, id in enumerate(self.footnotes.keys(), start=1): li = etree.SubElement(ol, "li") li.set("id", self.makeFootnoteId(id)) @@ -198,7 +214,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: backlink.set("class", "footnote-backref") backlink.set( "title", - backlink_title.format(index) + self.getConfig('BACKLINK_TITLE').format(index) ) backlink.text = FN_BACKLINK_TEXT @@ -214,7 +230,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: class FootnoteBlockProcessor(BlockProcessor): - """ Find all footnote references and store for later use. """ + """ Find footnote definitions and store for later use. """ RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE) @@ -228,6 +244,7 @@ def test(self, parent: etree.Element, block: str) -> bool: def run(self, parent: etree.Element, blocks: list[str]) -> bool: """ Find, set, and remove footnote definitions. """ block = blocks.pop(0) + m = self.RE.search(block) if m: id = m.group(1) @@ -312,14 +329,21 @@ def __init__(self, pattern: str, footnotes: FootnoteExtension): def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: id = m.group(1) if id in self.footnotes.footnotes.keys(): + self.footnotes.addFootnoteRef(id) + + if not self.footnotes.getConfig("USE_DEFINITION_ORDER"): + # Order by reference + footnote_num = self.footnotes.footnote_order.index(id) + 1 + else: + # Order by definition + footnote_num = list(self.footnotes.footnotes.keys()).index(id) + 1 + sup = etree.Element("sup") a = etree.SubElement(sup, "a") sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) a.set('href', '#' + self.footnotes.makeFootnoteId(id)) a.set('class', 'footnote-ref') - a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format( - list(self.footnotes.footnotes.keys()).index(id) + 1 - ) + a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(footnote_num) return sup, m.start(0), m.end(0) else: return None, None, None @@ -401,6 +425,44 @@ def run(self, root: etree.Element) -> None: root.append(footnotesDiv) +class FootnoteReorderingProcessor(Treeprocessor): + """ Reorder list items in the footnotes div. """ + + def __init__(self, footnotes: FootnoteExtension): + self.footnotes = footnotes + + def run(self, root: etree.Element) -> None: + if not self.footnotes.footnotes: + return + if self.footnotes.footnote_order != list(self.footnotes.footnotes.keys()): + for div in root.iter('div'): + if div.attrib.get('class', '') == 'footnote': + self.reorder_footnotes(div) + break + + def reorder_footnotes(self, parent: etree.Element) -> None: + old_list = parent.find('ol') + parent.remove(old_list) + items = old_list.findall('li') + + def order_by_id(li) -> int: + id = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)[-1] + return ( + self.footnotes.footnote_order.index(id) + if id in self.footnotes.footnote_order + else len(self.footnotes.footnotes) + ) + + items = sorted(items, key=order_by_id) + + new_list = etree.SubElement(parent, 'ol') + + for index, item in enumerate(items, start=1): + backlink = item.find('.//a[@class="footnote-backref"]') + backlink.set("title", self.footnotes.getConfig("BACKLINK_TITLE").format(index)) + new_list.append(item) + + class FootnotePostprocessor(Postprocessor): """ Replace placeholders with html entities. """ def __init__(self, footnotes: FootnoteExtension): diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 83630999e..9a27446d4 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -368,7 +368,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. stack = [(tree, tree_parents)] while stack: - currElement, parents = stack.pop() + currElement, parents = stack.pop(0) self.ancestors = parents self.__build_ancestors(currElement, self.ancestors) diff --git a/tests/test_syntax/extensions/test_footnotes.py b/tests/test_syntax/extensions/test_footnotes.py index 6f504e39c..070fa27fc 100644 --- a/tests/test_syntax/extensions/test_footnotes.py +++ b/tests/test_syntax/extensions/test_footnotes.py @@ -336,3 +336,342 @@ def test_superscript_text(self): '', extension_configs={'footnotes': {'SUPERSCRIPT_TEXT': '[{}]'}} ) + + def test_footnote_order(self): + """Test that footnotes occur in order of reference appearance.""" + + self.assertMarkdownRenders( + self.dedent( + """ + First footnote reference[^first]. Second footnote reference[^last]. + + [^last]: Second footnote. + [^first]: First footnote. + """ + ), + '

First footnote reference1. Second footnote reference' + '2.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    First footnote. 

    \n' + '
  2. \n' + '
  3. \n' + '

    Second footnote. 

    \n' + '
  4. \n' + '
\n' + '
' + ) + + def test_footnote_order_tricky(self): + """Test a tricky sequence of footnote references.""" + + self.assertMarkdownRenders( + self.dedent( + """ + `Footnote reference in code spans should be ignored[^tricky]`. + A footnote reference[^ordinary]. + Another footnote reference[^tricky]. + + [^ordinary]: This should be the first footnote. + [^tricky]: This should be the second footnote. + """ + ), + '

Footnote reference in code spans should be ignored[^tricky].\n' + 'A footnote reference' + '1.\n' + 'Another footnote reference' + '2.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    This should be the first footnote. 

    \n' + '
  2. \n' + '
  3. \n' + '

    This should be the second footnote. 

    \n' + '
  4. \n' + '
\n' + '
' + ) + + def test_footnote_order_by_definition(self): + """Test that footnotes occur in order of definition occurrence when so configured.""" + + self.assertMarkdownRenders( + self.dedent( + """ + First footnote reference[^last_def]. Second footnote reference[^first_def]. + + [^first_def]: First footnote. + [^last_def]: Second footnote. + """ + ), + '

First footnote reference2. Second footnote reference' + '1.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    First footnote. 

    \n' + '
  2. \n' + '
  3. \n' + '

    Second footnote. 

    \n' + '
  4. \n' + '
\n' + '
', + extension_configs={'footnotes': {'USE_DEFINITION_ORDER': True}} + ) + + def test_footnote_reference_within_code_span(self): + """Test footnote reference within a code span.""" + + self.assertMarkdownRenders( + 'A `code span with a footnote[^1] reference`.', + '

A code span with a footnote[^1] reference.

' + ) + + def test_footnote_reference_within_link(self): + """Test footnote reference within a link.""" + + self.assertMarkdownRenders( + 'A [link with a footnote[^1] reference](http://example.com).', + '

A link with a footnote[^1] reference.

' + ) + + def test_footnote_reference_within_footnote_definition(self): + """Test footnote definition containing another footnote reference.""" + + self.assertMarkdownRenders( + self.dedent( + """ + Main footnote[^main]. + + [^main]: This footnote references another[^nested]. + [^nested]: Nested footnote. + """ + ), + '

Main footnote1.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    This footnote references another2

    \n' + '
  2. \n' + '
  3. \n' + '

    Nested footnote. 

    \n' + '
  4. \n' + '
\n' + '
' + ) + + def test_footnote_reference_within_blockquote(self): + """Test footnote reference within a blockquote.""" + + self.assertMarkdownRenders( + self.dedent( + """ + > This is a quote with a footnote[^quote]. + + [^quote]: Quote footnote. + """ + ), + '
\n' + '

This is a quote with a footnote' + '1.

\n' + '
\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    Quote footnote. 

    \n' + '
  2. \n' + '
\n' + '
' + ) + + def test_footnote_reference_within_list(self): + """Test footnote reference within a list item.""" + + self.assertMarkdownRenders( + self.dedent( + """ + 1. First item with footnote[^note] + 1. Second item + + [^note]: List footnote. + """ + ), + '
    \n' + '
  1. First item with footnote' + '1
  2. \n' + '
  3. Second item
  4. \n' + '
\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    List footnote. 

    \n' + '
  2. \n' + '
\n' + '
' + ) + + def test_footnote_references_within_loose_list(self): + """Test footnote references within loose list items.""" + + self.assertMarkdownRenders( + self.dedent( + ''' + * Reference to [^first] + + * Reference to [^second] + + [^first]: First footnote definition + [^second]: Second footnote definition + ''' + ), + '
    \n' + '
  • \n' + '

    Reference to 1

    \n' + '
  • \n' + '
  • \n' + '

    Reference to 2

    \n' + '
  • \n' + '
\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    First footnote definition 

    \n' + '
  2. \n' + '
  3. \n' + '

    Second footnote definition 

    \n' + '
  4. \n' + '
\n' + '
' + ) + + def test_footnote_reference_within_html(self): + """Test footnote reference within HTML tags.""" + + self.assertMarkdownRenders( + self.dedent( + """ + A footnote reference[^1] within a span element. + + [^1]: The footnote. + """ + ), + '

A footnote reference' + '1' + ' within a span element.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    The footnote. 

    \n' + '
  2. \n' + '
\n' + '
' + ) + + def test_duplicate_footnote_references(self): + """Test multiple references to the same footnote.""" + + self.assertMarkdownRenders( + self.dedent( + """ + First[^dup] and second[^dup] reference. + + [^dup]: Duplicate footnote. + """ + ), + '

First' + '1 and second' + '1 reference.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    Duplicate footnote. ' + '' + '

    \n' + '
  2. \n' + '
\n' + '
' + ) + + def test_footnote_reference_without_definition(self): + """Test footnote reference without corresponding definition.""" + + self.assertMarkdownRenders( + 'This has a missing footnote[^missing].', + '

This has a missing footnote[^missing].

' + ) + + def test_footnote_definition_without_reference(self): + """Test footnote definition without corresponding reference.""" + + self.assertMarkdownRenders( + self.dedent( + """ + No reference here. + + [^orphan]: Orphaned footnote. + """ + ), + '

No reference here.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    Orphaned footnote. 

    \n' + '
  2. \n' + '
\n' + '
' + ) + + def test_footnote_id_with_special_chars(self): + """Test footnote id containing special and Unicode characters.""" + + self.assertMarkdownRenders( + self.dedent( + """ + Special footnote id[^!#¤%/()=?+}{§øé]. + + [^!#¤%/()=?+}{§øé]: The footnote. + """ + ), + '

Special footnote id' + '1.

\n' + '
\n' + '
\n' + '
    \n' + '
  1. \n' + '

    The footnote. 

    \n' + '
  2. \n' + '
\n' + '
' + ) From f39cf84a24124526c1a0efbe52219fa9950774f6 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 4 Sep 2025 15:44:15 -0400 Subject: [PATCH 23/24] Bump version to 3.9 --- docs/changelog.md | 16 ++++++++-------- markdown/__meta__.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 7afa81bb2..00c7b5e1b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,28 +10,28 @@ and this project adheres to the [Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/). See the [Contributing Guide](contributing.md) for details. -## [Unreleased] +## [3.9.0] - 2025-09-04 ### Changed * Footnotes are now ordered by the occurrence of their references in the document. A new configuration option for the footnotes extension, `USE_DEFINITION_ORDER`, has been added to support restoring the previous - behavior of ordering footnotes by the occurrence of definitions. + behavior of ordering footnotes by the occurrence of definitions (#1367). ### Fixed -* Ensure inline processing iterates through elements in document order. -* Fix handling of incomplete HTML tags in code spans in Python 3.14. +* Ensure inline processing iterates through elements in document order (#1546). +* Fix handling of incomplete HTML tags in code spans in Python 3.14 (#1547). ## [3.8.2] - 2025-06-19 ### Fixed -* Fix `codecs` deprecation in Python 3.14. -* Fix issue with unclosed comment parsing in Python 3.14. -* Fix issue with unclosed declarations in Python 3.14. -* Fix issue with unclosed HTML tag ` Date: Thu, 25 Sep 2025 14:39:25 -0600 Subject: [PATCH 24/24] Fix an HTML comment parsing case that can cause an infinite loop Fixes #1554 --- docs/changelog.md | 6 ++++++ markdown/htmlparser.py | 19 +++++++++++++++++++ tests/test_syntax/blocks/test_html_blocks.py | 18 +++++++++++++++++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 00c7b5e1b..cd6c8ec82 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -10,6 +10,12 @@ and this project adheres to the [Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/). See the [Contributing Guide](contributing.md) for details. +## [Unreleased] + +### Fixed + +* Fix an HTML comment parsing case in some Python versions that can cause an infinite loop (#1554). + ## [3.9.0] - 2025-09-04 ### Changed diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 63e5df31b..658cd37e0 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -33,6 +33,9 @@ if TYPE_CHECKING: # pragma: no cover from markdown import Markdown +# Included for versions which do not have current comment fix +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. # Users can still do `from html import parser` and get the default behavior. @@ -302,6 +305,22 @@ def parse_pi(self, i: int) -> int: self.handle_data(' int: if self.at_line_start() or self.intail: if self.rawdata[i:i+3] == ' + Some content after the bad comment. + """ + ), + self.dedent( + """ +

<!-- This comment is malformed and never closes -- > + Some content after the bad comment.

+ """ + ) + ) + def test_raw_processing_instruction_one_line(self): self.assertMarkdownRenders( "'; ?>",