Skip to content

Commit d24d2c8

Browse files
committed
Formatting fixes in markdown conversion for formulas.
1 parent 84dcf64 commit d24d2c8

File tree

1 file changed

+20
-9
lines changed

1 file changed

+20
-9
lines changed

notion/markdown.py

+20-9
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,11 @@ def _extract_text_and_format_from_ast(item):
8181
if item["type"] == "html_inline":
8282
if item.get("literal", "") == "<s>":
8383
return "", ("s",)
84-
if item.get("literal", "").startswith('<latex'):
85-
elem = minidom.parseString(item.get("literal", "") + '</latex>').documentElement
86-
equation = elem.attributes['equation'].value
84+
if item.get("literal", "").startswith("<latex"):
85+
elem = minidom.parseString(
86+
item.get("literal", "") + "</latex>"
87+
).documentElement
88+
equation = elem.attributes["equation"].value
8789
return "", ("e", equation)
8890

8991
if item["type"] == "emph":
@@ -126,8 +128,15 @@ def markdown_to_notion(markdown):
126128

127129
# commonmark doesn't support latex blocks, so we need to handle it ourselves
128130
def handle_latex(match):
129-
return f'<latex equation="{html.escape(match.group(0)[2:-2])}">\u204d</latex>'
130-
markdown = re.sub(r'(?<!\\\\|\$\$)(?:\\\\)*((\$\$)+)(?!(\$\$))(.+?)(?<!(\$\$))\1(?!(\$\$))', handle_latex, markdown)
131+
return '<latex equation="{}">\u204d</latex>'.format(
132+
html.escape(match.group(0)[2:-2])
133+
)
134+
135+
markdown = re.sub(
136+
r"(?<!\\\\|\$\$)(?:\\\\)*((\$\$)+)(?!(\$\$))(.+?)(?<!(\$\$))\1(?!(\$\$))",
137+
handle_latex,
138+
markdown,
139+
)
131140

132141
# we don't want to touch dashes, so temporarily replace them here
133142
markdown = markdown.replace("-", "⸻")
@@ -160,7 +169,7 @@ def handle_latex(match):
160169
literal = ""
161170

162171
if item["type"] == "html_inline" and literal == "</latex>":
163-
for f in filter(lambda f: f[0] == 'e', format):
172+
for f in filter(lambda f: f[0] == "e", format):
164173
format.remove(f)
165174
break
166175
literal = ""
@@ -195,16 +204,18 @@ def handle_latex(match):
195204

196205
return cleanup_dashes(consolidated)
197206

207+
198208
def cleanup_dashes(thing):
199-
regex_pattern = re.compile('⸻|%E2%B8%BB')
209+
regex_pattern = re.compile("⸻|%E2%B8%BB")
200210
if type(thing) is list:
201211
for counter, value in enumerate(thing):
202212
thing[counter] = cleanup_dashes(value)
203213
elif type(thing) is str:
204-
return regex_pattern.sub('-', thing)
214+
return regex_pattern.sub("-", thing)
205215

206216
return thing
207217

218+
208219
def notion_to_markdown(notion):
209220

210221
markdown_chunks = []
@@ -247,7 +258,7 @@ def notion_to_markdown(notion):
247258
# Check wheter a format modifies the content
248259
content_changed = False
249260
for f in sorted_format:
250-
if f[0] == 'e':
261+
if f[0] == "e":
251262
markdown += f[1]
252263
content_changed = True
253264

0 commit comments

Comments
 (0)