More reliable fix for </

facelessuser · web-flow · commit 7f29f1a69d23 · 2026-02-03T11:20:10.000-05:00
Include hack so we don't break MkDocs.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -15,6 +15,7 @@ See the [Contributing Guide](contributing.md) for details.
 ### Fixed
 
 * Fix a regression related to comment handling (#1590).
+* More reliable fix for `</`.
 
 ## [3.10.1] - 2026-01-21
 
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
@@ -35,7 +35,6 @@
 
 # Included for versions which do not have current comment fix
 commentclose = re.compile(r'--!?>')
-commentabruptclose = re.compile(r'-?>')
 
 # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
 # Users can still do `from html import parser` and get the default behavior.
@@ -48,6 +47,8 @@
 # throwing it away. When we see it, we will process it as data.
 htmlparser.starttagopen = re.compile('<[a-zA-Z]|</>')
 
+htmlparser.endtagopen = re.compile('</[a-zA-Z]?')
+
 # Monkeypatch `HTMLParser` to only accept `?>` to close Processing Instructions.
 htmlparser.piclose = re.compile(r'\?>')
 # Monkeypatch `HTMLParser` to only recognize entity references with a closing semicolon.
@@ -92,6 +93,30 @@
 blank_line_re = re.compile(r'^([ ]*\n){2}')
 
 
+class _HTMLParser(htmlparser.HTMLParser):
+    """Handle special start and end tags."""
+
+    def parse_endtag(self, i):
+        start = self.rawdata[i:i+3]
+        c = ord(start[-1])
+        if len(start) < 3 or not (65 <= c <= 90 or 97 <= c <= 122):
+            self.handle_data(self.rawdata[i:i + 2])
+            return i + 2
+        return super().parse_endtag(i)
+
+    def parse_starttag(self, i: int) -> int:  # pragma: no cover
+        # Treat `</>` as normal data as it is not a real tag.
+        if self.rawdata[i:i + 3] == '</>':
+            self.handle_data(self.rawdata[i:i + 3])
+            return i + 3
+
+        return super().parse_starttag(i)
+
+
+# Overwrite our custom one for people like MkDocs that pull it in
+htmlparser.HTMLParser = _HTMLParser
+
+
 class HTMLExtractor(htmlparser.HTMLParser):
     """
     Extract raw HTML from text.
@@ -110,9 +135,6 @@ def __init__(self, md: Markdown, *args, **kwargs):
 
         self.lineno_start_cache = [0]
 
-        self.override_comment_update = False
-        self.override_comment_start = 0
-
         # This calls self.reset
         super().__init__(*args, **kwargs)
         self.md = md
@@ -125,8 +147,6 @@ def reset(self):
         self._cache: list[str] = []
         self.cleandoc: list[str] = []
         self.lineno_start_cache = [0]
-        self.override_comment_start = 0
-        self.override_comment_update = False
 
         super().reset()
 
@@ -276,22 +296,8 @@ def handle_entityref(self, name: str):
 
     def handle_comment(self, data: str):
         # Check if the comment is unclosed, if so, we need to override position
-        j = self.rawdata.find(data)
-        i = j - 2
-        if self.rawdata[i:j] == '</':
-            self.handle_data('<')
-            self.override_comment_start = i
-            self.override_comment_update = True
-            return
         self.handle_empty_tag('<!--{}-->'.format(data), is_block=True)
 
-    def updatepos(self, i: int, j: int) -> int:
-        if self.override_comment_update:
-            self.override_comment_update = False
-            i = self.override_comment_start
-            j = self.override_comment_start + 1
-        return super().updatepos(i, j)
-
     def handle_decl(self, data: str):
         self.handle_empty_tag('<!{}>'.format(data), is_block=True)
 
diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py
@@ -1732,3 +1732,58 @@ def test_issue_1590(self):
                 '''
             )
         )
+
+    def test_stress_comment_handling(self):
+        """Stress test the comment handling."""
+
+        self.assertMarkdownRenders(
+            self.dedent(
+                '''
+                `</` <!-- `<!--[if mso]>` and <!-- </> and `<!--[if mso]>`
+
+                <!-- and <!-- `<!--[if mso]>` and </> `</` and `<!--[if mso]>`
+
+                <!-- Real comment -->
+
+                `<!--[if mso]>` `</` `<!--[if mso]>` and </> <!-- and <!--
+
+                </> `<!--[if mso]>` `</` <!--  and <!--  and `<!--[if mso]>`
+                '''
+            ),
+            self.dedent(
+                '''
+                <p><code>&lt;/</code> &lt;!-- <code>&lt;!--[if mso]&gt;</code> and &lt;!-- &lt;/&gt; and <code>&lt;!--[if mso]&gt;</code></p>
+                <p>&lt;!-- and &lt;!-- <code>&lt;!--[if mso]&gt;</code> and &lt;/&gt; <code>&lt;/</code> and <code>&lt;!--[if mso]&gt;</code></p>
+                <!-- Real comment -->
+                <p><code>&lt;!--[if mso]&gt;</code> <code>&lt;/</code> <code>&lt;!--[if mso]&gt;</code> and &lt;/&gt; &lt;!-- and &lt;!--</p>
+                <p>&lt;/&gt; <code>&lt;!--[if mso]&gt;</code> <code>&lt;/</code> &lt;!--  and &lt;!--  and <code>&lt;!--[if mso]&gt;</code></p>
+                '''  # noqa: E501
+            )
+        )
+
+    def test_unclosed_endtag(self):
+        """Ensure unclosed end tag does not have side effects."""
+
+        self.assertMarkdownRenders(
+            self.dedent(
+                '''
+                `</`
+
+                <div>
+                <!--[if mso]>-->
+                <p>foo</p>
+                <!--<!endif]-->
+                </div>
+                '''
+            ),
+            self.dedent(
+                '''
+                <p><code>&lt;/</code></p>
+                <div>
+                <!--[if mso]>-->
+                <p>foo</p>
+                <!--<!endif]-->
+                </div>
+                '''
+            )
+        )