-
-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathdescriptionparser.py
127 lines (100 loc) · 3.93 KB
/
descriptionparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import html
import string
from typing import Iterable
import docutils.nodes as nodes
class DescriptionParser(nodes.NodeVisitor):
"""
Finds the title and creates a description from a doctree
"""
def __init__(
self,
desc_len: int,
known_titles: Iterable[str] = None,
document: nodes.document = None,
):
# Hack to prevent requirement for the doctree to be passed in.
# It's only used by doctree.walk(...) to print debug messages.
if document is None:
class document_cls:
class reporter:
@staticmethod
def debug(*args, **kwaargs):
pass
document = document_cls()
if known_titles == None:
known_titles = []
super().__init__(document)
self.description = ""
self.desc_len = desc_len
self.list_level = 0
self.known_titles = known_titles
self.first_title_found = False
# Exceptions can't be raised from dispatch_departure()
# This is used to loop the stop call back to the next dispatch_visit()
self.stop = False
def dispatch_visit(self, node: nodes.Element) -> None:
if self.stop:
raise nodes.StopTraversal
# Skip comments
if isinstance(node, nodes.Invisible):
raise nodes.SkipNode
# Skip all admonitions
if isinstance(node, nodes.Admonition):
raise nodes.SkipNode
# Mark start of nested lists
if isinstance(node, nodes.Sequential):
self.list_level += 1
if self.list_level > 1:
self.description += "-"
# Skip the first title if it's the title of the page
if not self.first_title_found and isinstance(node, nodes.title):
self.first_title_found = True
if node.astext() in self.known_titles:
raise nodes.SkipNode
if isinstance(node, nodes.raw) or isinstance(node.parent, nodes.literal_block):
raise nodes.SkipNode
# Only include leaf nodes in the description
if len(node.children) == 0:
text = node.astext().replace("\r", "").replace("\n", " ").strip()
# Ensure string contains HTML-safe characters
text = html.escape(text, True)
# Remove double spaces
while text.find(" ") != -1:
text = text.replace(" ", " ")
# Put a space between elements if one does not already exist.
if (
len(self.description) > 0
and len(text) > 0
and self.description[-1] not in string.whitespace
and text[0] not in string.whitespace + string.punctuation
):
self.description += " "
self.description += text
def dispatch_departure(self, node: nodes.Element) -> None:
# Separate title from text
if isinstance(node, nodes.title):
self.description += ":"
# Separate list elements
if isinstance(node, nodes.Part):
self.description += ","
# Separate end of list from text
if isinstance(node, nodes.Sequential):
if self.description and self.description[-1] == ",":
self.description = self.description[:-1]
self.description += "."
self.list_level -= 1
# Check for length
if len(self.description) > self.desc_len:
self.description = self.description[: self.desc_len]
if self.desc_len >= 3:
self.description = self.description[:-3] + "..."
self.stop = True
def get_description(
doctree: nodes.document,
description_length: int,
known_titles: Iterable[str] = None,
document: nodes.document = None,
):
mcv = DescriptionParser(description_length, known_titles, document)
doctree.walkabout(mcv)
return mcv.description