|
1 | 1 | import { LINE_BOUNDARIES } from 'enso-common/src/utilities/data/string'
|
2 |
| -import { ensoMarkdownParser } from './ensoMarkdown' |
| 2 | +import * as Y from 'yjs' |
| 3 | +import { ensoMarkdownParser, ensoStandardMarkdownParser } from './ensoMarkdown' |
3 | 4 | import { xxHash128 } from './ffi'
|
4 | 5 | import type { ConcreteChild, RawConcreteChild } from './print'
|
5 | 6 | import { ensureUnspaced, firstChild, preferUnspaced, unspaced } from './print'
|
6 | 7 | import { Token, TokenType } from './token'
|
7 | 8 | import type { ConcreteRefs, DeepReadonly, DocLine, TextToken } from './tree'
|
8 | 9 |
|
| 10 | +// === AST logic === |
| 11 | + |
9 | 12 | /** Render a documentation line to concrete tokens. */
|
10 | 13 | export function* docLineToConcrete(
|
11 | 14 | docLine: DeepReadonly<DocLine>,
|
@@ -33,49 +36,48 @@ export function* docLineToConcrete(
|
33 | 36 | for (const newline of docLine.newlines) yield preferUnspaced(newline)
|
34 | 37 | }
|
35 | 38 |
|
36 |
| -// === Markdown === |
37 |
| - |
38 | 39 | /**
|
39 |
| - * Render function documentation to concrete tokens. If the `markdown` content has the same value as when `docLine` was |
40 |
| - * parsed (as indicated by `hash`), the `docLine` will be used (preserving concrete formatting). If it is different, the |
41 |
| - * `markdown` text will be converted to source tokens. |
| 40 | + * Render function documentation to concrete tokens. If the `markdown` content has the same value as |
| 41 | + * when `docLine` was parsed (as indicated by `hash`), the `docLine` will be used (preserving |
| 42 | + * concrete formatting). If it is different, the `markdown` text will be converted to source tokens. |
42 | 43 | */
|
43 | 44 | export function functionDocsToConcrete(
|
44 |
| - markdown: string, |
| 45 | + markdown: DeepReadonly<Y.Text>, |
45 | 46 | hash: string | undefined,
|
46 | 47 | docLine: DeepReadonly<DocLine> | undefined,
|
47 | 48 | indent: string | null,
|
48 | 49 | ): Iterable<RawConcreteChild> | undefined {
|
49 |
| - return ( |
50 |
| - hash && docLine && xxHash128(markdown) === hash ? docLineToConcrete(docLine, indent) |
51 |
| - : markdown ? markdownYTextToTokens(markdown, (indent || '') + ' ') |
52 |
| - : undefined |
53 |
| - ) |
54 |
| -} |
55 |
| - |
56 |
| -function markdownYTextToTokens(yText: string, indent: string): Iterable<ConcreteChild<Token>> { |
57 |
| - const tokensBuilder = new DocTokensBuilder(indent) |
58 |
| - standardizeMarkdown(yText, tokensBuilder) |
| 50 | + const markdownText = markdown.toString() |
| 51 | + if (hash && docLine && xxHash128(markdownText) === hash) return docLineToConcrete(docLine, indent) |
| 52 | + if (!markdownText) return |
| 53 | + const tokensBuilder = new DocTokensBuilder((indent || '') + ' ') |
| 54 | + standardizeMarkdown(markdownText, tokensBuilder) |
59 | 55 | return tokensBuilder.build()
|
60 | 56 | }
|
61 | 57 |
|
62 | 58 | /**
|
63 |
| - * Given Enso documentation comment tokens, returns a model of their Markdown content. This model abstracts away details |
64 |
| - * such as the locations of line breaks that are not paragraph breaks (e.g. lone newlines denoting hard-wrapping of the |
65 |
| - * source code). |
| 59 | + * Given Enso documentation comment tokens, returns a model of their Markdown content. This model |
| 60 | + * abstracts away details such as the locations of line breaks that are not paragraph breaks (e.g. |
| 61 | + * lone newlines denoting hard-wrapping of the source code). |
66 | 62 | */
|
67 |
| -export function abstractMarkdown(elements: undefined | TextToken<ConcreteRefs>[]) { |
| 63 | +export function abstractMarkdown(elements: undefined | TextToken<ConcreteRefs>[]): { |
| 64 | + markdown: Y.Text |
| 65 | + hash: string |
| 66 | +} { |
68 | 67 | const { tags, rawMarkdown } = toRawMarkdown(elements)
|
69 |
| - const markdown = [...tags, normalizeMarkdown(rawMarkdown)].join('\n') |
| 68 | + const markdown = [...tags, prerenderMarkdown(rawMarkdown)].join('\n') |
70 | 69 | const hash = xxHash128(markdown)
|
71 |
| - return { markdown, hash } |
| 70 | + return { markdown: new Y.Text(markdown), hash } |
72 | 71 | }
|
73 | 72 |
|
74 | 73 | function indentLevel(whitespace: string) {
|
75 | 74 | return whitespace.length + whitespace.split('\t').length - 1
|
76 | 75 | }
|
77 | 76 |
|
78 |
| -function toRawMarkdown(elements: undefined | TextToken<ConcreteRefs>[]) { |
| 77 | +function toRawMarkdown(elements: undefined | TextToken<ConcreteRefs>[]): { |
| 78 | + tags: string[] |
| 79 | + rawMarkdown: string |
| 80 | +} { |
79 | 81 | const tags: string[] = []
|
80 | 82 | let readingTags = true
|
81 | 83 | const tokenWhitespace = ({ token: { whitespace } }: TextToken<ConcreteRefs>) => whitespace
|
@@ -113,73 +115,55 @@ function toRawMarkdown(elements: undefined | TextToken<ConcreteRefs>[]) {
|
113 | 115 | return { tags, rawMarkdown }
|
114 | 116 | }
|
115 | 117 |
|
| 118 | +// === Markdown === |
| 119 | + |
116 | 120 | /**
|
117 |
| - * Convert the Markdown input to a format with rendered-style linebreaks: Hard-wrapped lines within a paragraph will be |
118 |
| - * joined, and only a single linebreak character is used to separate paragraphs. |
| 121 | + * Convert the Markdown input to a format with "prerendered" linebreaks: Hard-wrapped lines within |
| 122 | + * a paragraph will be joined, and only a single linebreak character is used to separate paragraphs. |
119 | 123 | */
|
120 |
| -export function normalizeMarkdown(rawMarkdown: string): string { |
121 |
| - let normalized = '' |
| 124 | +export function prerenderMarkdown(markdown: string): string { |
| 125 | + let prerendered = '' |
122 | 126 | let prevTo = 0
|
123 | 127 | let prevName: string | undefined = undefined
|
124 |
| - const cursor = ensoMarkdownParser.parse(rawMarkdown).cursor() |
| 128 | + const cursor = ensoStandardMarkdownParser.parse(markdown).cursor() |
125 | 129 | cursor.firstChild()
|
126 | 130 | do {
|
127 | 131 | if (prevTo < cursor.from) {
|
128 |
| - const textBetween = rawMarkdown.slice(prevTo, cursor.from) |
129 |
| - normalized += |
| 132 | + const textBetween = markdown.slice(prevTo, cursor.from) |
| 133 | + prerendered += |
130 | 134 | cursor.name === 'Paragraph' && prevName !== 'Table' ? textBetween.slice(0, -1) : textBetween
|
131 | 135 | }
|
132 |
| - const text = rawMarkdown.slice(cursor.from, cursor.to) |
133 |
| - normalized += cursor.name === 'Paragraph' ? text.replaceAll(/ *\n */g, ' ') : text |
| 136 | + const text = markdown.slice(cursor.from, cursor.to) |
| 137 | + prerendered += cursor.name === 'Paragraph' ? text.replaceAll(/ *\n */g, ' ') : text |
134 | 138 | prevTo = cursor.to
|
135 | 139 | prevName = cursor.name
|
136 | 140 | } while (cursor.nextSibling())
|
137 |
| - return normalized |
138 |
| -} |
139 |
| - |
140 |
| -function stringCollector() { |
141 |
| - let output = '' |
142 |
| - const collector = { |
143 |
| - text: (text: string) => (output += text), |
144 |
| - wrapText: (text: string) => (output += text), |
145 |
| - newline: () => (output += '\n'), |
146 |
| - } |
147 |
| - return { collector, output } |
| 141 | + return prerendered |
148 | 142 | }
|
149 | 143 |
|
150 | 144 | /**
|
151 |
| - * Convert from "normalized" Markdown (with hard line-breaks removed) to the standard format, with paragraphs separated |
152 |
| - * by blank lines. |
| 145 | + * Convert from our internal "prerendered" Markdown to the (more standard-compatible) on-disk |
| 146 | + * representation, with paragraphs hard-wrapped and separated by blank lines. |
153 | 147 | */
|
154 |
| -export function normalizedMarkdownToStandard(normalizedMarkdown: string) { |
155 |
| - const { collector, output } = stringCollector() |
156 |
| - standardizeMarkdown(normalizedMarkdown, collector) |
157 |
| - return output |
158 |
| -} |
159 |
| - |
160 |
| -/** |
161 |
| - * Convert from "normalized" Markdown to the on-disk representation, with paragraphs hard-wrapped and separated by blank |
162 |
| - * lines. |
163 |
| - */ |
164 |
| -function standardizeMarkdown(normalizedMarkdown: string, textConsumer: TextConsumer) { |
| 148 | +function standardizeMarkdown(prerenderedMarkdown: string, textConsumer: TextConsumer): void { |
165 | 149 | let printingTags = true
|
166 |
| - const cursor = ensoMarkdownParser.parse(normalizedMarkdown).cursor() |
| 150 | + const cursor = ensoMarkdownParser.parse(prerenderedMarkdown).cursor() |
167 | 151 |
|
168 | 152 | function standardizeDocument() {
|
169 | 153 | let prevTo = 0
|
170 | 154 | let prevName: string | undefined = undefined
|
171 | 155 | cursor.firstChild()
|
172 | 156 | do {
|
173 | 157 | if (prevTo < cursor.from) {
|
174 |
| - const betweenText = normalizedMarkdown.slice(prevTo, cursor.from) |
| 158 | + const betweenText = prerenderedMarkdown.slice(prevTo, cursor.from) |
175 | 159 | for (const _match of betweenText.matchAll(LINE_BOUNDARIES)) {
|
176 | 160 | textConsumer.newline()
|
177 | 161 | }
|
178 |
| - if (cursor.name === 'Paragraph' && prevName !== 'Table') { |
| 162 | + if (cursor.name === 'Paragraph' && prevName === 'Paragraph' && !printingTags) { |
179 | 163 | textConsumer.newline()
|
180 | 164 | }
|
181 | 165 | }
|
182 |
| - const lines = normalizedMarkdown.slice(cursor.from, cursor.to).split(LINE_BOUNDARIES) |
| 166 | + const lines = prerenderedMarkdown.slice(cursor.from, cursor.to).split(LINE_BOUNDARIES) |
183 | 167 | if (cursor.name === 'Paragraph') {
|
184 | 168 | standardizeParagraph(lines)
|
185 | 169 | } else {
|
@@ -218,6 +202,8 @@ function standardizeMarkdown(normalizedMarkdown: string, textConsumer: TextConsu
|
218 | 202 | standardizeDocument()
|
219 | 203 | }
|
220 | 204 |
|
| 205 | +// === AST utilities === |
| 206 | + |
221 | 207 | interface TextConsumer {
|
222 | 208 | text: (text: string) => void
|
223 | 209 | wrapText: (text: string) => void
|
|
0 commit comments