Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-127833: Docs: Add a grammar-snippet directive & replace productionlist #127835

Merged
merged 20 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
'sphinx.ext.coverage',
'sphinx.ext.doctest',
'sphinx.ext.extlinks',
'grammar_snippet',
]

# Skip if downstream redistributors haven't installed them
Expand Down
8 changes: 6 additions & 2 deletions Doc/reference/toplevel_components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ File input

All input read from non-interactive files has the same form:

.. productionlist:: python-grammar
.. grammar-snippet::
:group: python-grammar
encukou marked this conversation as resolved.
Show resolved Hide resolved

file_input: (NEWLINE | `statement`)*

This syntax is used in the following situations:
Expand All @@ -85,7 +87,9 @@ Interactive input

Input in interactive mode is parsed using the following grammar:

.. productionlist:: python-grammar
.. grammar-snippet::
:group: python-grammar

interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE

Note that a (top-level) compound statement must be followed by a blank line in
Expand Down
192 changes: 192 additions & 0 deletions Doc/tools/extensions/grammar_snippet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import re

from docutils import nodes
from docutils.parsers.rst import directives
from sphinx import addnodes
from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import make_id


class GrammarSnippetDirective(SphinxDirective):
"""Transform a grammar-snippet directive to a Sphinx literal_block

That is, turn something like:

.. grammar-snippet:: file
:group: python-grammar

file: (NEWLINE | statement)*

into something similar to Sphinx productionlist, but better suited
for our needs:
- Instead of `::=`, use a colon, as in `Grammar/python.gram`
- Show the listing almost as is, with no auto-aligment.
The only special character is the backtick, which marks tokens.

Unlike Sphinx's productionlist, this directive supports options.
The "group" must be given as a named option.
The content must be preceded by a blank line (like with most ReST
directives).
"""

has_content = True
option_spec = {
'group': directives.unchanged,
}

# We currently ignore arguments.
required_arguments = 0
optional_arguments = 1
final_argument_whitespace = True

def run(self):
encukou marked this conversation as resolved.
Show resolved Hide resolved
return make_snippet(self, self.options, self.content)


def make_snippet(directive, options, content):
encukou marked this conversation as resolved.
Show resolved Hide resolved
"""Create a literal block from options & content.

This implements the common functionality for GrammarSnippetDirective
and CompatProductionList.
"""

group_name = options['group']

# Docutils elements have a `rawsource` attribute that is supposed to be
# set to the original ReST source.
# Sphinx does the following with it:
# - if it's empty, set it to `self.astext()`
# - if it matches `self.astext()` when generating the output,
# apply syntax highlighting (which is based on the plain-text content
# and thus discards internal formatting, like references).
# To get around this, we set it to this non-empty string:
rawsource = 'You should not see this.'

literal = nodes.literal_block(
rawsource,
'',
# TODO: Use a dedicated CSS class here and for strings.
# and add it to the theme too
classes=['highlight'],
)

grammar_re = re.compile(
"""
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
(?=:) # ... followed by a colon
|
[`](?P<rule_ref>[a-zA-Z0-9_]+)[`] # identifier in backquotes
|
(?P<single_quoted>'[^']*') # string in 'quotes'
|
(?P<double_quoted>"[^"]*") # string in "quotes"
""",
re.VERBOSE,
)

for line in content:
last_pos = 0
for match in grammar_re.finditer(line):
# Handle text between matches
if match.start() > last_pos:
literal += nodes.Text(line[last_pos : match.start()])
last_pos = match.end()

# Handle matches
groupdict = {
name: content
for name, content in match.groupdict().items()
if content is not None
}
match groupdict:
case {'rule_name': name}:
name_node = addnodes.literal_strong()

# Cargo-culted magic to make `name_node` a link target
# similar to Sphinx `production`.
# This needs to be the same as what Sphinx does
# to avoid breaking existing links.
domain = directive.env.domains['std']
obj_name = f"{group_name}:{name}"
prefix = f'grammar-token-{group_name}'
node_id = make_id(
directive.env, directive.state.document, prefix, name
)
name_node['ids'].append(node_id)
directive.state.document.note_implicit_target(
name_node, name_node
)
domain.note_object(
'token', obj_name, node_id, location=name_node
)
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved

text_node = nodes.Text(name)
name_node += text_node
literal += name_node
case {'rule_ref': name}:
ref_node = addnodes.pending_xref(
name,
reftype="token",
refdomain="std",
reftarget=f"{group_name}:{name}",
)
ref_node += nodes.Text(name)
literal += ref_node
case {'single_quoted': name} | {'double_quoted': name}:
string_node = nodes.inline(classes=['nb'])
encukou marked this conversation as resolved.
Show resolved Hide resolved
string_node += nodes.Text(name)
literal += string_node
case _:
raise ValueError('unhandled match')
literal += nodes.Text(line[last_pos:] + '\n')

node = nodes.paragraph(
'',
'',
literal,
)

return [node]


class CompatProductionList(SphinxDirective):
"""Create grammar snippets from ReST productionlist syntax

This is intended to be a transitional directive, used while we switch
from productionlist to grammar-snippet.
It makes existing docs that use the ReST syntax look like grammar-snippet,
as much as possible.
"""

has_content = False
required_arguments = 1
optional_arguments = 0
final_argument_whitespace = True
option_spec = {}

def run(self):
# The "content" of a productionlist is actually the first and only
# argument. The first line is the group; the rest is the content lines.
lines = self.arguments[0].splitlines()
group = lines[0].strip()
options = {'group': group}
# We assume there's a colon in each line; align on it.
align_column = max(line.index(':') for line in lines[1:]) + 1
content = []
for line in lines[1:]:
rule_name, _colon, text = line.partition(':')
rule_name = rule_name.strip()
if rule_name:
name_part = rule_name + ':'
else:
name_part = ''
content.append(f'{name_part:<{align_column}}{text}')
return make_snippet(self, options, content)


def setup(app):
app.add_directive('grammar-snippet', GrammarSnippetDirective)
app.add_directive_to_domain(
'std', 'productionlist', CompatProductionList, override=True
)
return {'version': '1.0', 'parallel_read_safe': True}
6 changes: 0 additions & 6 deletions Doc/tools/extensions/pyspecific.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@
Body.enum.converters['lowerroman'] = \
Body.enum.converters['upperroman'] = lambda x: None

# monkey-patch the productionlist directive to allow hyphens in group names
# https://github.com/sphinx-doc/sphinx/issues/11854
from sphinx.domains import std

std.token_re = re.compile(r'`((~?[\w-]*:)?\w+)`')

# backport :no-index:
PyModule.option_spec['no-index'] = directives.flag

Expand Down
Loading