generated from ryansonshine/typescript-npm-package-template
-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathtransform.ts
269 lines (240 loc) · 9.3 KB
/
transform.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
import chalk from "chalk";
import {
IDocuNotionContext,
IRegexMarkdownModification,
} from "./plugins/pluginTypes";
import { error, info, logDebug, logDebugFn, verbose, warning } from "./log";
import { NotionPage } from "./NotionPage";
import { IDocuNotionConfig } from "./config/configuration";
import { NotionBlock } from "./types";
export async function getMarkdownForPage(
config: IDocuNotionConfig,
context: IDocuNotionContext,
page: NotionPage
): Promise<string> {
info(
`Reading & converting page ${page.layoutContext}/${
page.nameOrTitle
} (${chalk.blue(
page.hasExplicitSlug
? page.slug
: page.foundDirectlyInOutline
? "Descendant of Outline, not Database"
: "NO SLUG"
)})`
);
const blocks = await context.getBlockChildren(page.pageId);
logDebugFn("markdown from page", () => JSON.stringify(blocks, null, 2));
const body = await getMarkdownFromNotionBlocks(context, config, blocks);
const frontMatter = getMarkdownFrontMatter(context, config, page);
return `${frontMatter}\n${body}`;
}
// this is split off from getMarkdownForPage so that unit tests can provide the block contents
export async function getMarkdownFromNotionBlocks(
context: IDocuNotionContext,
config: IDocuNotionConfig,
blocks: Array<NotionBlock>
): Promise<string> {
// changes to the blocks we get from notion API
doNotionBlockTransforms(blocks, config);
// overrides for the default notion-to-markdown conversions
registerNotionToMarkdownCustomTransforms(config, context);
// the main conversion to markdown, using the notion-to-md library
let markdown = await doNotionToMarkdown(context, blocks); // ?
// corrections to links after they are converted to markdown,
// with access to all the pages we've seen
markdown = doLinkFixes(context, markdown, config);
//console.log("markdown after link fixes", markdown);
// simple regex-based tweaks. These are usually related to docusaurus
const { imports, body } = await doTransformsOnMarkdown(
context,
config,
markdown
);
// console.log("markdown after regex fixes", markdown);
// console.log("body after regex", body);
return `${imports}\n${body}`;
}
// operations on notion blocks before they are converted to markdown
function doNotionBlockTransforms(
blocks: Array<NotionBlock>,
config: IDocuNotionConfig
) {
for (const block of blocks) {
config.plugins.forEach(plugin => {
if (plugin.notionBlockModifications) {
plugin.notionBlockModifications.forEach(transform => {
logDebug("transforming block with plugin", plugin.name);
transform.modify(block);
});
}
});
}
}
async function doTransformsOnMarkdown(
context: IDocuNotionContext,
config: IDocuNotionConfig,
input: string
) {
const regexMods: IRegexMarkdownModification[] = config.plugins
.filter(plugin => !!plugin.regexMarkdownModifications)
.map(plugin => {
const mods = plugin.regexMarkdownModifications!;
// stick the name of the plugin into each mode for logging
const modsWithNames = mods.map(m => ({ name: plugin.name, ...m }));
return modsWithNames;
})
.flat();
// regex that matches markdown code blocks
const codeBlocks = /```.*\n[\s\S]*?\n```/;
let body = input;
//console.log("body before regex: " + body);
let match;
const imports = new Set<string>();
// eslint-disable-next-line @typescript-eslint/no-unused-vars
for (const mod of regexMods) {
let replacement = undefined;
// regex.exec is stateful, so we don't want to mess up the plugin's use of its own regex, so we clone it.
// we also add the "g" flag to make sure we get all matches
const regex = new RegExp(`${codeBlocks.source}|(${mod.regex.source})`, "g");
while ((match = regex.exec(input)) !== null) {
if (match[0]) {
const original = match[0];
if (
original.startsWith("```") &&
original.endsWith("```") &&
!mod.includeCodeBlocks
) {
continue; // code block, and they didn't say to include them
}
if (mod.getReplacement) {
// our match here has an extra group, which is an implementation detail
// that shouldn't be made visible to the plugin
const matchAsThePluginWouldExpectIt = mod.regex.exec(match[0])!;
replacement = await mod.getReplacement(
context,
matchAsThePluginWouldExpectIt
);
} else if (mod.replacementPattern) {
console.log(`mod.replacementPattern.replace("$1", ${match[2]}`);
replacement = mod.replacementPattern.replace("$1", match[2]);
}
if (replacement !== undefined) {
verbose(`[${(mod as any).name}] ${original} --> ${replacement}`);
const precedingPart = body.substring(0, match.index); // ?
const partStartingFromThisMatch = body.substring(match.index); // ?
body =
precedingPart +
partStartingFromThisMatch.replace(original, replacement);
// add any library imports
mod.imports?.forEach(imp => imports.add(imp));
}
}
}
}
logDebug("doTransformsOnMarkdown", "body after regex: " + body);
const uniqueImports = [...new Set(imports)];
return { body, imports: [...uniqueImports].join("\n") };
}
async function doNotionToMarkdown(
docunotionContext: IDocuNotionContext,
blocks: Array<NotionBlock>
) {
const mdBlocks = await docunotionContext.notionToMarkdown.blocksToMarkdown(
blocks
);
const markdown =
docunotionContext.notionToMarkdown.toMarkdownString(mdBlocks);
return markdown;
}
// corrections to links after they are converted to markdown
// Note: from notion (or notion-md?) we get slightly different hrefs depending on whether the links is "inline"
// (has some other text that's been turned into a link) or "raw".
// Raw links come in without a leading slash, e.g. [link_to_page](4a6de8c0-b90b-444b-8a7b-d534d6ec71a4)
// Inline links come in with a leading slash, e.g. [pointer to the introduction](/4a6de8c0b90b444b8a7bd534d6ec71a4)
function doLinkFixes(
context: IDocuNotionContext,
markdown: string,
config: IDocuNotionConfig
): string {
const linkRegExp = /\[.*\]\([^\)]*\)/g;
logDebug("markdown before link fixes", markdown);
let match: RegExpExecArray | null;
// since we're going to make changes to the markdown,
// we need to keep track of where we are in the string as we search
const markdownToSearch = markdown;
// The key to understanding this `while` is that linkRegExp actually has state, and
// it gives you a new one each time. https://stackoverflow.com/a/1520853/723299
while ((match = linkRegExp.exec(markdownToSearch)) !== null) {
const originalLinkMarkdown = match[0];
verbose(
`Checking to see if a plugin wants to modify "${originalLinkMarkdown}" `
);
// We only use the first plugin that matches and makes a change to the link.
// Enhance: we could take the time to see if multiple plugins match, and
// and point this out in verbose logging mode.
config.plugins.some(plugin => {
if (!plugin.linkModifier) return false;
if (plugin.linkModifier.match.exec(originalLinkMarkdown) === null) {
verbose(`plugin "${plugin.name}" did not match this url`);
return false;
}
const newMarkdown = plugin.linkModifier.convert(
context,
originalLinkMarkdown
);
if (newMarkdown !== originalLinkMarkdown) {
markdown = markdown.replace(originalLinkMarkdown, newMarkdown);
verbose(
`plugin "${plugin.name}" transformed link: ${originalLinkMarkdown}-->${newMarkdown}`
);
return true; // the first plugin that matches and does something wins
} else {
verbose(`plugin "${plugin.name}" did not change this url`);
return false;
}
});
}
return markdown;
}
// overrides for the conversions that notion-to-md does
function registerNotionToMarkdownCustomTransforms(
config: IDocuNotionConfig,
docunotionContext: IDocuNotionContext
) {
config.plugins.forEach(plugin => {
if (plugin.notionToMarkdownTransforms) {
plugin.notionToMarkdownTransforms.forEach(transform => {
logDebug(
"registering custom transform",
`${plugin.name} for ${transform.type}`
);
docunotionContext.notionToMarkdown.setCustomTransformer(
transform.type,
(block: any) => {
logDebug(
"notion to MD conversion of ",
`${transform.type} with plugin: ${plugin.name}`
);
return transform.getStringFromBlock(docunotionContext, block);
}
);
});
}
});
}
function getMarkdownFrontMatter(
context: IDocuNotionContext,
config: IDocuNotionConfig,
page: NotionPage
): string {
let frontMatter = "---\n";
config.plugins.forEach(plugin => {
if (plugin.frontMatterGenerator) {
logDebug("transforming page with plugin", plugin.name);
frontMatter += plugin.frontMatterGenerator?.getFrontMatter(context, page);
}
});
frontMatter += "---\n";
return frontMatter;
}