Skip to content

Commit 7f29f1a

Browse files
authored
More reliable fix for </
Include hack so we don't break MkDocs.
1 parent c438647 commit 7f29f1a

File tree

3 files changed

+82
-20
lines changed

3 files changed

+82
-20
lines changed

docs/changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ See the [Contributing Guide](contributing.md) for details.
1515
### Fixed
1616

1717
* Fix a regression related to comment handling (#1590).
18+
* More reliable fix for `</`.
1819

1920
## [3.10.1] - 2026-01-21
2021

markdown/htmlparser.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535

3636
# Included for versions which do not have current comment fix
3737
commentclose = re.compile(r'--!?>')
38-
commentabruptclose = re.compile(r'-?>')
3938

4039
# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
4140
# Users can still do `from html import parser` and get the default behavior.
@@ -48,6 +47,8 @@
4847
# throwing it away. When we see it, we will process it as data.
4948
htmlparser.starttagopen = re.compile('<[a-zA-Z]|</>')
5049

50+
htmlparser.endtagopen = re.compile('</[a-zA-Z]?')
51+
5152
# Monkeypatch `HTMLParser` to only accept `?>` to close Processing Instructions.
5253
htmlparser.piclose = re.compile(r'\?>')
5354
# Monkeypatch `HTMLParser` to only recognize entity references with a closing semicolon.
@@ -92,6 +93,30 @@
9293
blank_line_re = re.compile(r'^([ ]*\n){2}')
9394

9495

96+
class _HTMLParser(htmlparser.HTMLParser):
97+
"""Handle special start and end tags."""
98+
99+
def parse_endtag(self, i):
100+
start = self.rawdata[i:i+3]
101+
c = ord(start[-1])
102+
if len(start) < 3 or not (65 <= c <= 90 or 97 <= c <= 122):
103+
self.handle_data(self.rawdata[i:i + 2])
104+
return i + 2
105+
return super().parse_endtag(i)
106+
107+
def parse_starttag(self, i: int) -> int: # pragma: no cover
108+
# Treat `</>` as normal data as it is not a real tag.
109+
if self.rawdata[i:i + 3] == '</>':
110+
self.handle_data(self.rawdata[i:i + 3])
111+
return i + 3
112+
113+
return super().parse_starttag(i)
114+
115+
116+
# Overwrite our custom one for people like MkDocs that pull it in
117+
htmlparser.HTMLParser = _HTMLParser
118+
119+
95120
class HTMLExtractor(htmlparser.HTMLParser):
96121
"""
97122
Extract raw HTML from text.
@@ -110,9 +135,6 @@ def __init__(self, md: Markdown, *args, **kwargs):
110135

111136
self.lineno_start_cache = [0]
112137

113-
self.override_comment_update = False
114-
self.override_comment_start = 0
115-
116138
# This calls self.reset
117139
super().__init__(*args, **kwargs)
118140
self.md = md
@@ -125,8 +147,6 @@ def reset(self):
125147
self._cache: list[str] = []
126148
self.cleandoc: list[str] = []
127149
self.lineno_start_cache = [0]
128-
self.override_comment_start = 0
129-
self.override_comment_update = False
130150

131151
super().reset()
132152

@@ -276,22 +296,8 @@ def handle_entityref(self, name: str):
276296

277297
def handle_comment(self, data: str):
278298
# Check if the comment is unclosed, if so, we need to override position
279-
j = self.rawdata.find(data)
280-
i = j - 2
281-
if self.rawdata[i:j] == '</':
282-
self.handle_data('<')
283-
self.override_comment_start = i
284-
self.override_comment_update = True
285-
return
286299
self.handle_empty_tag('<!--{}-->'.format(data), is_block=True)
287300

288-
def updatepos(self, i: int, j: int) -> int:
289-
if self.override_comment_update:
290-
self.override_comment_update = False
291-
i = self.override_comment_start
292-
j = self.override_comment_start + 1
293-
return super().updatepos(i, j)
294-
295301
def handle_decl(self, data: str):
296302
self.handle_empty_tag('<!{}>'.format(data), is_block=True)
297303

tests/test_syntax/blocks/test_html_blocks.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,3 +1732,58 @@ def test_issue_1590(self):
17321732
'''
17331733
)
17341734
)
1735+
1736+
def test_stress_comment_handling(self):
1737+
"""Stress test the comment handling."""
1738+
1739+
self.assertMarkdownRenders(
1740+
self.dedent(
1741+
'''
1742+
`</` <!-- `<!--[if mso]>` and <!-- </> and `<!--[if mso]>`
1743+
1744+
<!-- and <!-- `<!--[if mso]>` and </> `</` and `<!--[if mso]>`
1745+
1746+
<!-- Real comment -->
1747+
1748+
`<!--[if mso]>` `</` `<!--[if mso]>` and </> <!-- and <!--
1749+
1750+
</> `<!--[if mso]>` `</` <!-- and <!-- and `<!--[if mso]>`
1751+
'''
1752+
),
1753+
self.dedent(
1754+
'''
1755+
<p><code>&lt;/</code> &lt;!-- <code>&lt;!--[if mso]&gt;</code> and &lt;!-- &lt;/&gt; and <code>&lt;!--[if mso]&gt;</code></p>
1756+
<p>&lt;!-- and &lt;!-- <code>&lt;!--[if mso]&gt;</code> and &lt;/&gt; <code>&lt;/</code> and <code>&lt;!--[if mso]&gt;</code></p>
1757+
<!-- Real comment -->
1758+
<p><code>&lt;!--[if mso]&gt;</code> <code>&lt;/</code> <code>&lt;!--[if mso]&gt;</code> and &lt;/&gt; &lt;!-- and &lt;!--</p>
1759+
<p>&lt;/&gt; <code>&lt;!--[if mso]&gt;</code> <code>&lt;/</code> &lt;!-- and &lt;!-- and <code>&lt;!--[if mso]&gt;</code></p>
1760+
''' # noqa: E501
1761+
)
1762+
)
1763+
1764+
def test_unclosed_endtag(self):
1765+
"""Ensure unclosed end tag does not have side effects."""
1766+
1767+
self.assertMarkdownRenders(
1768+
self.dedent(
1769+
'''
1770+
`</`
1771+
1772+
<div>
1773+
<!--[if mso]>-->
1774+
<p>foo</p>
1775+
<!--<!endif]-->
1776+
</div>
1777+
'''
1778+
),
1779+
self.dedent(
1780+
'''
1781+
<p><code>&lt;/</code></p>
1782+
<div>
1783+
<!--[if mso]>-->
1784+
<p>foo</p>
1785+
<!--<!endif]-->
1786+
</div>
1787+
'''
1788+
)
1789+
)

0 commit comments

Comments
 (0)