Skip to content

Commit eab8d5b

Browse files
authored
🔀 MERGE: Improve Math Parsing (#217)
2 parents 9c4ddac + 2054b9f commit eab8d5b

File tree

18 files changed

+309
-154
lines changed

18 files changed

+309
-154
lines changed

codecov.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
coverage:
2+
status:
3+
project:
4+
default:
5+
target: 90%
6+
threshold: 0.5%
7+
patch:
8+
default:
9+
target: 85%
10+
threshold: 0.5%

docs/conf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@
6767

6868
myst_amsmath_enable = True
6969
myst_admonition_enable = True
70-
myst_html_img = True
70+
myst_html_img_enable = True
71+
myst_dmath_enable = True
7172

7273

7374
def run_apidoc(app):

docs/using/intro.md

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -224,20 +224,46 @@ To do so, use the keywords beginning `myst_`.
224224
- `None`
225225
- [URI schemes](https://en.wikipedia.org/wiki/List_of_URI_schemes) that will be recognised as external URLs in `[](scheme:loc)` syntax, or set `None` to recognise all.
226226
Other links will be resolved as internal cross-references.
227-
* - `myst_html_img`
227+
* - `myst_html_img_enable`
228228
- `False`
229229
- Convert HTML `<img>` elements to sphinx image nodes, see the [image syntax](syntax/images) for details
230-
* - `myst_math_delimiters`
231-
- "dollars"
232-
- Delimiters for parsing math, see the [Math syntax](syntax/math) for details
233-
* - `myst_amsmath_enable`
234-
- `False`
235-
- Enable direct parsing of [amsmath LaTeX environments](https://ctan.org/pkg/amsmath), [see here](syntax/amsmath) for details.
236230
* - `myst_admonition_enable`
237231
- `False`
238232
- Enable admonition style directives, [see here](syntax/admonitions) for details.
239233
`````
240234

235+
Math specific, see the [Math syntax](syntax/math) for more details:
236+
237+
`````{list-table}
238+
:header-rows: 1
239+
240+
* - Option
241+
- Default
242+
- Description
243+
* - `myst_dmath_enable`
244+
- `True`
245+
- Enable parsing of dollar `$` and `$$` encapsulated math
246+
* - `myst_dmath_allow_labels`
247+
- `True`
248+
- Parse `$$...$$ (label)` syntax (if dmath enabled)
249+
* - `myst_dmath_allow_space`
250+
- `True`
251+
- If False then inline math will only be parsed if there are no initial/final spaces,
252+
e.g. `$a$` but not `$ a$` or `$a $`
253+
* - `myst_dmath_allow_digits`
254+
- `True`
255+
- If False then inline math will only be parsed if there are no initial/final digits,
256+
e.g. `$a$` but not `1$a$` or `$a$2` (this is useful for using `$` as currency)
257+
* - `myst_amsmath_enable`
258+
- `False`
259+
- Enable direct parsing of [amsmath LaTeX environments](https://ctan.org/pkg/amsmath)
260+
* - `myst_override_mathjax`
261+
- `True`
262+
- If using [sphinx.ext.mathjax](https://www.sphinx-doc.org/en/master/usage/extensions/math.html#module-sphinx.ext.mathjax) (the default) then `mathjax_config` will be overridden,
263+
to ignore `$` delimiters and LaTeX environments, which should instead be handled by
264+
`myst_dmath_enable` and `myst_amsmath_enable` respectively.
265+
`````
266+
241267
### Disable markdown syntax for the parser
242268

243269
If you'd like to either enable or disable custom markdown syntax, use `myst_disable_syntax`.

docs/using/syntax.md

Lines changed: 91 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ described in the [CommonMark Spec](https://spec.commonmark.org/0.29/), which the
3131

3232
Block tokens span multiple lines of content. They are broken down into two sections:
3333

34-
* {ref}`extended-block-tokens` contains *extra* tokens that are not in CommonMark.
35-
* {ref}`commonmark-block-tokens` contains CommonMark tokens that also work, for reference.
34+
- {ref}`extended-block-tokens` contains *extra* tokens that are not in CommonMark.
35+
- {ref}`commonmark-block-tokens` contains CommonMark tokens that also work, for reference.
3636

3737
In addition to these summaries of block-level syntax, see {ref}`extra-markdown-syntax`.
3838

@@ -181,8 +181,8 @@ we have shown equivalent rST syntax for many MyST markdown features below.
181181
Span (or inline) tokens are defined on a single line of content. They are broken down into two
182182
sections below:
183183

184-
* {ref}`extended-span-tokens` contains *extra* tokens that are not in CommonMark.
185-
* {ref}`commonmark-span-tokens` contains CommonMark tokens that also work, for reference.
184+
- {ref}`extended-span-tokens` contains *extra* tokens that are not in CommonMark.
185+
- {ref}`commonmark-span-tokens` contains CommonMark tokens that also work, for reference.
186186

187187
In addition to these summaries of inline syntax, see {ref}`extra-markdown-syntax`.
188188

@@ -648,19 +648,25 @@ header-rows: 1
648648

649649
(syntax/math)=
650650

651-
### Math shortcuts
651+
## Math shortcuts
652652

653-
The style of math parsing is governed by the `myst_math_delimiters` option set in the sphinx `conf.py` [configuration file](https://www.sphinx-doc.org/en/master/usage/configuration.html).
654-
The two common settings are:
653+
Math is parsed by setting, in the sphinx `conf.py` [configuration file](https://www.sphinx-doc.org/en/master/usage/configuration.html) one or both of:
655654

656-
- `myst_math_delimiters = "dollars"` (default)
657-
- inline: `$...$` or `$$...$$`
658-
- display: `$$...$$`
659-
- display + equation label: `$$...$$ (1)`
660-
- `myst_math_delimiters = "brackets"`
661-
- inline: `\(...\)`
662-
- display: `\[...\]`
663-
- display + equation label: `\[...\] (1)`
655+
- `myst_dmath_enable=True` (the default) for parsing of dollar `$` and `$$` encapsulated math.
656+
- `myst_amsmath_enable=True` (off by default) for direct parsing of [amsmath LaTeX environments](https://ctan.org/pkg/amsmath).
657+
658+
These options enable their respective Markdown parser plugins, as detailed in the [markdown-it plugin guide](markdown_it:md/plugins).
659+
660+
### Dollar delimited math
661+
662+
Enabling dollar math will parse the following syntax:
663+
664+
- Inline math: `$...$`
665+
- Display (block) math: `$$...$$`
666+
667+
Additionally if `myst_dmath_allow_labels=True` is set (the default):
668+
669+
- Display (block) math with equation label: `$$...$$ (1)`
664670

665671
For example, `$x_{hey}=it+is^{math}$` renders as $x_{hey}=it+is^{math}$.
666672
This is equivalent to writing:
@@ -669,11 +675,14 @@ This is equivalent to writing:
669675
{math}`x_{hey}=it+is^{math}`
670676
```
671677

672-
```{tip}
673-
Math can be escaped (negated) by adding a `\` before the first symbol, e.g. `\$a$` renders as \$a$.
674-
```
678+
:::{admonition,tip} Escaping Dollars
679+
Math can be escaped (negated) by adding a `\` before the first symbol, e.g. `\$a$` renders as \$a\$.
680+
Escaping can also be used inside math, e.g. `$a=\$3$` renders as $a=\$3$.
675681

676-
Block-level math can then be provided with `$$` signs that wrap the math block you'd like to parse.
682+
Conversely `\\` will negate the escaping, so `\\$a$` renders as \\$a$.
683+
:::
684+
685+
Block-level math can be specified with `$$` signs that wrap the math block you'd like to parse.
677686
For example:
678687

679688
```latex
@@ -721,9 +730,17 @@ $$ (eqn:best)
721730
722731
This is the best equation {eq}`eqn:best`
723732
733+
There are a few other options available to control dollar math parsing:
734+
735+
`myst_dmath_allow_space=False`, will cause inline math to only be parsed if there are no initial / final spaces, e.g. `$a$` but not `$ a$` or `$a $`.
736+
737+
`myst_dmath_allow_digits=False`, will cause inline math to only be parsed if there are no initial / final digits, e.g. `$a$` but not `1$a$` or `$a$2`.
738+
739+
These options can both be useful if you also wish to use `$` as a unit of currency.
740+
724741
(syntax/amsmath)=
725742
726-
### Direct LaTeX Math (optional)
743+
### Direct LaTeX Math
727744
728745
You can enable direct parsing of [amsmath](https://ctan.org/pkg/amsmath) LaTeX equations by setting `myst_amsmath_enable = True` in your sphinx `conf.py`.
729746
These top-level math environments will then be directly parsed:
@@ -763,9 +780,56 @@ a_{21}& =b_{21}&
763780
We hope to implement this in a future update (see [executablebooks/MyST-Parser#202](https://github.com/executablebooks/MyST-Parser/issues/202))!
764781
:::
765782
783+
### Math in other block elements
784+
785+
Math will also work when nested in other block elements, like lists or quotes:
786+
787+
```md
788+
- $$ a = 1 $$
789+
- \begin{gather*}
790+
a_1=b_1+c_1\\a_2=b_2+c_2-d_2+e_2
791+
\end{gather*}
792+
793+
> $$ a = 1 $$
794+
> \begin{gather*}
795+
a_1=b_1+c_1\\a_2=b_2+c_2-d_2+e_2
796+
\end{gather*}
797+
```
798+
799+
- $$ a = 1 $$
800+
- \begin{gather*}
801+
a_1=b_1+c_1\\a_2=b_2+c_2-d_2+e_2
802+
\end{gather*}
803+
804+
> $$ a = 1 $$
805+
> \begin{gather*}
806+
a_1=b_1+c_1\\a_2=b_2+c_2-d_2+e_2
807+
\end{gather*}
808+
809+
### Mathjax and math parsing
810+
811+
When building HTML using the [sphinx.ext.mathjax](https://www.sphinx-doc.org/en/master/usage/extensions/math.html#module-sphinx.ext.mathjax) extension (enabled by default), its default configuration is to also search for `$` delimiters and LaTeX environments (see [the tex2jax preprocessor](https://docs.mathjax.org/en/v2.7-latest/options/preprocessors/tex2jax.html#configure-tex2jax)).
812+
813+
Since such parsing is already covered by the plugins above, MyST-Parser disables this behaviour by overriding the `mathjax_config` option with:
814+
815+
```python
816+
mathjax_config = {
817+
"tex2jax": {
818+
"inlineMath": [["\\(", "\\)"]],
819+
"displayMath": [["\\[", "\\]"]],
820+
"processRefs": False,
821+
"processEnvironments": False,
822+
}
823+
}
824+
```
825+
826+
Since these delimiters are how `sphinx.ext.mathjax` wraps the math content in the built HTML documents.
827+
828+
To inhibit this override, set `myst_override_mathjax=False`.
829+
766830
(syntax/frontmatter)=
767831

768-
### Front Matter
832+
## Front Matter
769833

770834
This is a YAML block at the start of the document, as used for example in
771835
[jekyll](https://jekyllrb.com/docs/front-matter/). Sphinx intercepts these data and
@@ -786,7 +850,7 @@ This is an orphan document, not specified in any toctrees.
786850

787851
(syntax/comments)=
788852

789-
### Comments
853+
## Comments
790854

791855
You may add comments by putting the `%` character at the beginning of a line. This will
792856
prevent the line from being parsed into the output document.
@@ -819,7 +883,7 @@ another line
819883

820884
(syntax/blockbreaks)=
821885

822-
### Block Breaks
886+
## Block Breaks
823887

824888
You may add a block break by putting `+++` at the beginning of a line.
825889
This constuct's intended use case is for mapping to cell based document formats,
@@ -839,7 +903,7 @@ Is below, but it won't be parsed into the document.
839903

840904
(syntax/targets)=
841905

842-
### Targets and Cross-Referencing
906+
## Targets and Cross-Referencing
843907

844908
Targets are used to define custom anchors that you can refer to elsewhere in your
845909
documentation. They generally go before section titles so that you can easily refer
@@ -897,7 +961,7 @@ markdown: `[](syntax.md)` will result in: [](syntax.md).
897961

898962
(syntax/images)=
899963

900-
### Images
964+
## Images
901965

902966
MyST provides a few different syntaxes for including images in your documentation, as explained below.
903967

@@ -935,7 +999,7 @@ The final option is directly using HTML, which is also parsed by MyST.
935999
This is usually a bad option, because the HTML is treated as raw text during the build process and so sphinx will not recognise that the image file is to be copied, and will not output the HTML into non-HTML output formats.
9361000

9371001
HTML parsing to the rescue!
938-
By setting `myst_html_img = True` in the sphinx `conf.py` configuration file, MySt-Parser will attempt to convert any isolated `img` tags (i.e. not wrapped in any other HTML) to the internal representation used in sphinx.
1002+
By setting `myst_html_img_enable = True` in the sphinx `conf.py` configuration file, MySt-Parser will attempt to convert any isolated `img` tags (i.e. not wrapped in any other HTML) to the internal representation used in sphinx.
9391003

9401004
```md
9411005
<img src="img/fun-fish.png" alt="fishy" class="bg-primary" width="200px">
@@ -947,7 +1011,7 @@ Allowed attributes are equivalent to the `image` directive: src, alt, class, wid
9471011
Any other attributes will be dropped.
9481012

9491013
(syntax/footnotes)=
950-
### Footnotes
1014+
## Footnotes
9511015

9521016
Footnote labels **start with `^`** and can then be any alpha-numeric string (no spaces),
9531017
which is case-insensitive.

myst_parser/__init__.py

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,42 +19,45 @@ def setup_sphinx(app):
1919
# so that it can be called by external packages like myst_nb
2020
from myst_parser.myst_refs import MystReferenceResolver
2121
from myst_parser.myst_amsmath import MystAmsMathTransform
22+
from myst_parser.main import MdParserConfig
2223

2324
app.add_post_transform(MystReferenceResolver)
2425
app.add_post_transform(MystAmsMathTransform)
2526

26-
app.add_config_value("myst_disable_syntax", (), "env")
27-
# see https://en.wikipedia.org/wiki/List_of_URI_schemes
28-
app.add_config_value("myst_url_schemes", None, "env")
29-
app.add_config_value("myst_math_delimiters", "dollars", "env")
30-
app.add_config_value("myst_amsmath_enable", False, "env")
31-
app.add_config_value("myst_admonition_enable", False, "env")
32-
app.add_config_value("myst_html_img", False, "env")
27+
for name, default in MdParserConfig().as_dict().items():
28+
if not name == "renderer":
29+
app.add_config_value(f"myst_{name}", default, "env")
3330

34-
app.connect("config-inited", validate_config)
31+
app.connect("builder-inited", create_myst_config)
3532

3633

37-
def validate_config(app, config):
34+
def create_myst_config(app):
3835
from sphinx.util import logging
36+
from sphinx.util.console import bold
37+
from myst_parser.main import MdParserConfig
3938

4039
logger = logging.getLogger(__name__)
4140

42-
# TODO raise errors or log error with sphinx?
41+
values = {
42+
name: app.config[f"myst_{name}"]
43+
for name in MdParserConfig().as_dict().keys()
44+
if name != "renderer"
45+
}
46+
4347
try:
44-
for s in config.myst_disable_syntax:
45-
assert isinstance(s, str)
46-
except (AssertionError, TypeError):
47-
logger.error("myst_disable_syntax config option not of type List[str]")
48-
49-
allowed_delimiters = ["brackets", "kramdown", "dollars", "julia"]
50-
if config.myst_math_delimiters not in allowed_delimiters:
51-
logger.error(
52-
"myst_math_delimiters config option not an allowed name: "
53-
+ f"{allowed_delimiters}"
54-
)
55-
56-
if not isinstance(config.myst_amsmath_enable, bool):
57-
logger.error("myst_amsmath_enable config option not of type boolean")
58-
59-
if not isinstance(config.myst_admonition_enable, bool):
60-
logger.error("myst_admonition_enable config option not of type boolean")
48+
app.env.myst_config = MdParserConfig(**values)
49+
logger.info(bold("myst v%s:") + " %s", __version__, app.env.myst_config)
50+
except (TypeError, ValueError) as error:
51+
logger.error("myst configuration invalid: %s", error.args[0])
52+
app.env.myst_config = MdParserConfig()
53+
54+
# https://docs.mathjax.org/en/v2.7-latest/options/preprocessors/tex2jax.html#configure-tex2jax
55+
if app.env.myst_config.override_mathjax:
56+
app.config.mathjax_config = {
57+
"tex2jax": {
58+
"inlineMath": [["\\(", "\\)"]],
59+
"displayMath": [["\\[", "\\]"]],
60+
"processRefs": False,
61+
"processEnvironments": False,
62+
}
63+
}

myst_parser/cli/benchmark.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,16 @@ def run_myst_parser_html(package, text):
7777
@benchmark("myst_parser.main")
7878
def run_myst_parser_docutils(package, text):
7979
package.to_docutils(
80-
text, renderer="docutils", options={"ignore_missing_refs": True}
80+
text,
81+
package.MdParserConfig(renderer="docutils"),
82+
options={"ignore_missing_refs": True},
8183
)
8284

8385

8486
@benchmark("myst_parser.main")
8587
def run_myst_parser_sphinx(package, text):
8688
package.to_docutils(
87-
text,
88-
renderer="sphinx",
89-
options={"ignore_missing_refs": True},
90-
in_sphinx_env=True,
89+
text, options={"ignore_missing_refs": True}, in_sphinx_env=True,
9190
)
9291

9392

myst_parser/docutils_renderer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ def render_html_inline(self, token):
470470

471471
def render_html_block(self, token):
472472
node = None
473-
if self.config.get("myst_html_img", False):
473+
if self.config.get("enable_html_img", False):
474474
node = HTMLImgParser().parse(token.content, self.document, token.map[0])
475475
if node is None:
476476
node = nodes.raw("", token.content, format="html")

0 commit comments

Comments
 (0)