Skip to content

Commit ada40c6

Browse files
authored
TOC fix for AtomicString handling (#934)
Fixes #931.
1 parent 7c595e2 commit ada40c6

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

docs/change_log/index.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ title: Change Log
33
Python-Markdown Change Log
44
=========================
55

6-
Feb 12, 2020: Released version 3.2.1 (a bug-fix release).
6+
Under development: version 3.2.2 (a bug-fix release).
7+
8+
* Fixed issue where double escaped entities could end up in TOC.
9+
10+
Feb 12, 2020: Released version 3.2.1 (a bug-fix release).
711

812
* The `name` property in `toc_tokens` from the TOC extension now
913
escapes HTML special characters (`<`, `>`, and `&`).

markdown/extensions/toc.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515

1616
from . import Extension
1717
from ..treeprocessors import Treeprocessor
18-
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
18+
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
1919
from ..postprocessors import UnescapePostprocessor
2020
import re
21+
import html
2122
import unicodedata
2223
import xml.etree.ElementTree as etree
2324

@@ -44,6 +45,18 @@ def unique(id, ids):
4445
return id
4546

4647

48+
def get_name(el):
49+
"""Get title name."""
50+
51+
text = []
52+
for c in el.itertext():
53+
if isinstance(c, AtomicString):
54+
text.append(html.unescape(c))
55+
else:
56+
text.append(c)
57+
return ''.join(text).strip()
58+
59+
4760
def stashedHTML2text(text, md, strip_entities=True):
4861
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
4962
def _html_sub(m):
@@ -253,7 +266,7 @@ def run(self, doc):
253266
self.set_level(el)
254267
if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom:
255268
continue
256-
text = ''.join(el.itertext()).strip()
269+
text = get_name(el)
257270

258271
# Do not override pre-existing ids
259272
if "id" not in el.attrib:

tests/test_syntax/extensions/test_toc.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,28 @@ class TestTOC(TestCase):
2727

2828
# TODO: Move the rest of the TOC tests here.
2929

30+
def test_escaped_code(self):
31+
self.assertMarkdownRenders(
32+
self.dedent(
33+
'''
34+
[TOC]
35+
36+
# `<test>`
37+
'''
38+
),
39+
self.dedent(
40+
'''
41+
<div class="toc">
42+
<ul>
43+
<li><a href="#test">&lt;test&gt;</a></li>
44+
</ul>
45+
</div>
46+
<h1 id="test"><code>&lt;test&gt;</code></h1>
47+
'''
48+
),
49+
extensions=['toc']
50+
)
51+
3052
def test_escaped_char_in_id(self):
3153
self.assertMarkdownRenders(
3254
r'# escaped\_character',

0 commit comments

Comments
 (0)