|
1 | 1 | import { LINE_BOUNDARIES } from 'enso-common/src/utilities/data/string' |
2 | | -import { ensoMarkdownParser } from './ensoMarkdown' |
| 2 | +import * as Y from 'yjs' |
| 3 | +import { ensoMarkdownParser, ensoStandardMarkdownParser } from './ensoMarkdown' |
3 | 4 | import { xxHash128 } from './ffi' |
4 | 5 | import type { ConcreteChild, RawConcreteChild } from './print' |
5 | 6 | import { ensureUnspaced, firstChild, preferUnspaced, unspaced } from './print' |
6 | 7 | import { Token, TokenType } from './token' |
7 | 8 | import type { ConcreteRefs, DeepReadonly, DocLine, TextToken } from './tree' |
8 | 9 |
|
| 10 | +// === AST logic === |
| 11 | + |
9 | 12 | /** Render a documentation line to concrete tokens. */ |
10 | 13 | export function* docLineToConcrete( |
11 | 14 | docLine: DeepReadonly<DocLine>, |
@@ -33,49 +36,48 @@ export function* docLineToConcrete( |
33 | 36 | for (const newline of docLine.newlines) yield preferUnspaced(newline) |
34 | 37 | } |
35 | 38 |
|
36 | | -// === Markdown === |
37 | | - |
38 | 39 | /** |
39 | | - * Render function documentation to concrete tokens. If the `markdown` content has the same value as when `docLine` was |
40 | | - * parsed (as indicated by `hash`), the `docLine` will be used (preserving concrete formatting). If it is different, the |
41 | | - * `markdown` text will be converted to source tokens. |
| 40 | + * Render function documentation to concrete tokens. If the `markdown` content has the same value as |
| 41 | + * when `docLine` was parsed (as indicated by `hash`), the `docLine` will be used (preserving |
| 42 | + * concrete formatting). If it is different, the `markdown` text will be converted to source tokens. |
42 | 43 | */ |
43 | 44 | export function functionDocsToConcrete( |
44 | | - markdown: string, |
| 45 | + markdown: DeepReadonly<Y.Text>, |
45 | 46 | hash: string | undefined, |
46 | 47 | docLine: DeepReadonly<DocLine> | undefined, |
47 | 48 | indent: string | null, |
48 | 49 | ): Iterable<RawConcreteChild> | undefined { |
49 | | - return ( |
50 | | - hash && docLine && xxHash128(markdown) === hash ? docLineToConcrete(docLine, indent) |
51 | | - : markdown ? markdownYTextToTokens(markdown, (indent || '') + ' ') |
52 | | - : undefined |
53 | | - ) |
54 | | -} |
55 | | - |
56 | | -function markdownYTextToTokens(yText: string, indent: string): Iterable<ConcreteChild<Token>> { |
57 | | - const tokensBuilder = new DocTokensBuilder(indent) |
58 | | - standardizeMarkdown(yText, tokensBuilder) |
| 50 | + const markdownText = markdown.toString() |
| 51 | + if (hash && docLine && xxHash128(markdownText) === hash) return docLineToConcrete(docLine, indent) |
| 52 | + if (!markdownText) return |
| 53 | + const tokensBuilder = new DocTokensBuilder((indent || '') + ' ') |
| 54 | + standardizeMarkdown(markdownText, tokensBuilder) |
59 | 55 | return tokensBuilder.build() |
60 | 56 | } |
61 | 57 |
|
62 | 58 | /** |
63 | | - * Given Enso documentation comment tokens, returns a model of their Markdown content. This model abstracts away details |
64 | | - * such as the locations of line breaks that are not paragraph breaks (e.g. lone newlines denoting hard-wrapping of the |
65 | | - * source code). |
| 59 | + * Given Enso documentation comment tokens, returns a model of their Markdown content. This model |
| 60 | + * abstracts away details such as the locations of line breaks that are not paragraph breaks (e.g. |
| 61 | + * lone newlines denoting hard-wrapping of the source code). |
66 | 62 | */ |
67 | | -export function abstractMarkdown(elements: undefined | TextToken<ConcreteRefs>[]) { |
| 63 | +export function abstractMarkdown(elements: undefined | TextToken<ConcreteRefs>[]): { |
| 64 | + markdown: Y.Text |
| 65 | + hash: string |
| 66 | +} { |
68 | 67 | const { tags, rawMarkdown } = toRawMarkdown(elements) |
69 | | - const markdown = [...tags, normalizeMarkdown(rawMarkdown)].join('\n') |
| 68 | + const markdown = [...tags, prerenderMarkdown(rawMarkdown)].join('\n') |
70 | 69 | const hash = xxHash128(markdown) |
71 | | - return { markdown, hash } |
| 70 | + return { markdown: new Y.Text(markdown), hash } |
72 | 71 | } |
73 | 72 |
|
74 | 73 | function indentLevel(whitespace: string) { |
75 | 74 | return whitespace.length + whitespace.split('\t').length - 1 |
76 | 75 | } |
77 | 76 |
|
78 | | -function toRawMarkdown(elements: undefined | TextToken<ConcreteRefs>[]) { |
| 77 | +function toRawMarkdown(elements: undefined | TextToken<ConcreteRefs>[]): { |
| 78 | + tags: string[] |
| 79 | + rawMarkdown: string |
| 80 | +} { |
79 | 81 | const tags: string[] = [] |
80 | 82 | let readingTags = true |
81 | 83 | const tokenWhitespace = ({ token: { whitespace } }: TextToken<ConcreteRefs>) => whitespace |
@@ -113,73 +115,55 @@ function toRawMarkdown(elements: undefined | TextToken<ConcreteRefs>[]) { |
113 | 115 | return { tags, rawMarkdown } |
114 | 116 | } |
115 | 117 |
|
| 118 | +// === Markdown === |
| 119 | + |
116 | 120 | /** |
117 | | - * Convert the Markdown input to a format with rendered-style linebreaks: Hard-wrapped lines within a paragraph will be |
118 | | - * joined, and only a single linebreak character is used to separate paragraphs. |
| 121 | + * Convert the Markdown input to a format with "prerendered" linebreaks: Hard-wrapped lines within |
| 122 | + * a paragraph will be joined, and only a single linebreak character is used to separate paragraphs. |
119 | 123 | */ |
120 | | -export function normalizeMarkdown(rawMarkdown: string): string { |
121 | | - let normalized = '' |
| 124 | +export function prerenderMarkdown(markdown: string): string { |
| 125 | + let prerendered = '' |
122 | 126 | let prevTo = 0 |
123 | 127 | let prevName: string | undefined = undefined |
124 | | - const cursor = ensoMarkdownParser.parse(rawMarkdown).cursor() |
| 128 | + const cursor = ensoStandardMarkdownParser.parse(markdown).cursor() |
125 | 129 | cursor.firstChild() |
126 | 130 | do { |
127 | 131 | if (prevTo < cursor.from) { |
128 | | - const textBetween = rawMarkdown.slice(prevTo, cursor.from) |
129 | | - normalized += |
| 132 | + const textBetween = markdown.slice(prevTo, cursor.from) |
| 133 | + prerendered += |
130 | 134 | cursor.name === 'Paragraph' && prevName !== 'Table' ? textBetween.slice(0, -1) : textBetween |
131 | 135 | } |
132 | | - const text = rawMarkdown.slice(cursor.from, cursor.to) |
133 | | - normalized += cursor.name === 'Paragraph' ? text.replaceAll(/ *\n */g, ' ') : text |
| 136 | + const text = markdown.slice(cursor.from, cursor.to) |
| 137 | + prerendered += cursor.name === 'Paragraph' ? text.replaceAll(/ *\n */g, ' ') : text |
134 | 138 | prevTo = cursor.to |
135 | 139 | prevName = cursor.name |
136 | 140 | } while (cursor.nextSibling()) |
137 | | - return normalized |
138 | | -} |
139 | | - |
140 | | -function stringCollector() { |
141 | | - let output = '' |
142 | | - const collector = { |
143 | | - text: (text: string) => (output += text), |
144 | | - wrapText: (text: string) => (output += text), |
145 | | - newline: () => (output += '\n'), |
146 | | - } |
147 | | - return { collector, output } |
| 141 | + return prerendered |
148 | 142 | } |
149 | 143 |
|
150 | 144 | /** |
151 | | - * Convert from "normalized" Markdown (with hard line-breaks removed) to the standard format, with paragraphs separated |
152 | | - * by blank lines. |
| 145 | + * Convert from our internal "prerendered" Markdown to the (more standard-compatible) on-disk |
| 146 | + * representation, with paragraphs hard-wrapped and separated by blank lines. |
153 | 147 | */ |
154 | | -export function normalizedMarkdownToStandard(normalizedMarkdown: string) { |
155 | | - const { collector, output } = stringCollector() |
156 | | - standardizeMarkdown(normalizedMarkdown, collector) |
157 | | - return output |
158 | | -} |
159 | | - |
160 | | -/** |
161 | | - * Convert from "normalized" Markdown to the on-disk representation, with paragraphs hard-wrapped and separated by blank |
162 | | - * lines. |
163 | | - */ |
164 | | -function standardizeMarkdown(normalizedMarkdown: string, textConsumer: TextConsumer) { |
| 148 | +function standardizeMarkdown(prerenderedMarkdown: string, textConsumer: TextConsumer): void { |
165 | 149 | let printingTags = true |
166 | | - const cursor = ensoMarkdownParser.parse(normalizedMarkdown).cursor() |
| 150 | + const cursor = ensoMarkdownParser.parse(prerenderedMarkdown).cursor() |
167 | 151 |
|
168 | 152 | function standardizeDocument() { |
169 | 153 | let prevTo = 0 |
170 | 154 | let prevName: string | undefined = undefined |
171 | 155 | cursor.firstChild() |
172 | 156 | do { |
173 | 157 | if (prevTo < cursor.from) { |
174 | | - const betweenText = normalizedMarkdown.slice(prevTo, cursor.from) |
| 158 | + const betweenText = prerenderedMarkdown.slice(prevTo, cursor.from) |
175 | 159 | for (const _match of betweenText.matchAll(LINE_BOUNDARIES)) { |
176 | 160 | textConsumer.newline() |
177 | 161 | } |
178 | | - if (cursor.name === 'Paragraph' && prevName !== 'Table') { |
| 162 | + if (cursor.name === 'Paragraph' && prevName === 'Paragraph' && !printingTags) { |
179 | 163 | textConsumer.newline() |
180 | 164 | } |
181 | 165 | } |
182 | | - const lines = normalizedMarkdown.slice(cursor.from, cursor.to).split(LINE_BOUNDARIES) |
| 166 | + const lines = prerenderedMarkdown.slice(cursor.from, cursor.to).split(LINE_BOUNDARIES) |
183 | 167 | if (cursor.name === 'Paragraph') { |
184 | 168 | standardizeParagraph(lines) |
185 | 169 | } else { |
@@ -218,6 +202,8 @@ function standardizeMarkdown(normalizedMarkdown: string, textConsumer: TextConsu |
218 | 202 | standardizeDocument() |
219 | 203 | } |
220 | 204 |
|
| 205 | +// === AST utilities === |
| 206 | + |
221 | 207 | interface TextConsumer { |
222 | 208 | text: (text: string) => void |
223 | 209 | wrapText: (text: string) => void |
|
0 commit comments