-
-
Notifications
You must be signed in to change notification settings - Fork 222
/
Copy pathblock-text-builder.js
485 lines (443 loc) · 15.3 KB
/
block-text-builder.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
import {
// eslint-disable-next-line no-unused-vars
StackItem, BlockStackItem,
TableCellStackItem, TableRowStackItem, TableStackItem,
TransformerStackItem, ListStackItem, ListItemStackItem
} from './stack-item';
import { trimCharacter } from './util';
import { WhitespaceProcessor } from './whitespace-processor';
// eslint-disable-next-line import/no-unassigned-import
import './typedefs';
/**
* Helps to build text from inline and block elements.
*
* @class BlockTextBuilder
*/
class BlockTextBuilder {
/**
* Creates an instance of BlockTextBuilder.
*
* @param { Options } options HtmlToText options.
* @param { import('selderee').Picker<DomNode, TagDefinition> } picker Selectors decision tree picker.
* @param { any} [metadata] Optional metadata for HTML document, for use in formatters.
*/
constructor (options, picker, metadata = undefined) {
this.options = options;
this.picker = picker;
this.metadata = metadata;
this.whitespaceProcessor = new WhitespaceProcessor(options);
/** @type { StackItem } */
this._stackItem = new BlockStackItem(options);
/** @type { TransformerStackItem } */
this._wordTransformer = undefined;
}
/**
* Put a word-by-word transform function onto the transformations stack.
*
* Mainly used for uppercasing. Can be bypassed to add unformatted text such as URLs.
*
* Word transformations applied before wrapping.
*
* @param { (str: string) => string } wordTransform Word transformation function.
*/
pushWordTransform (wordTransform) {
this._wordTransformer = new TransformerStackItem(this._wordTransformer, wordTransform);
}
/**
* Remove a function from the word transformations stack.
*
* @returns { (str: string) => string } A function that was removed.
*/
popWordTransform () {
if (!this._wordTransformer) { return undefined; }
const transform = this._wordTransformer.transform;
this._wordTransformer = this._wordTransformer.next;
return transform;
}
/**
* Ignore wordwrap option in followup inline additions and disable automatic wrapping.
*/
startNoWrap () {
this._stackItem.isNoWrap = true;
}
/**
* Return automatic wrapping to behavior defined by options.
*/
stopNoWrap () {
this._stackItem.isNoWrap = false;
}
/** @returns { (str: string) => string } */
_getCombinedWordTransformer () {
const wt = (this._wordTransformer)
? ((str) => applyTransformer(str, this._wordTransformer))
: undefined;
const ce = this.options.encodeCharacters;
return (wt)
? ((ce) ? (str) => ce(wt(str)) : wt)
: ce;
}
_popStackItem () {
const item = this._stackItem;
this._stackItem = item.next;
return item;
}
/**
* Add a line break into currently built block.
*/
addLineBreak () {
if (!(
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
)) { return; }
if (this._stackItem.isPre) {
this._stackItem.rawText += '\n';
} else {
this._stackItem.inlineTextBuilder.startNewLine();
}
}
/**
* Allow to break line in case directly following text will not fit.
*/
addWordBreakOpportunity () {
if (
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
) {
this._stackItem.inlineTextBuilder.wordBreakOpportunity = true;
}
}
/**
* Add a node inline into the currently built block.
*
* @param { string } str
* Text content of a node to add.
*
* @param { object } [param1]
* Object holding the parameters of the operation.
*
* @param { boolean } [param1.noWordTransform]
* Ignore word transformers if there are any.
* Don't encode characters as well.
* (Use this for things like URL addresses).
*/
addInline (str, { noWordTransform = false } = {}) {
if (!(
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
)) { return; }
if (this._stackItem.isPre) {
this._stackItem.rawText += str;
return;
}
if (
str.length === 0 || // empty string
(
this._stackItem.stashedLineBreaks && // stashed linebreaks make whitespace irrelevant
!this.whitespaceProcessor.testContainsWords(str) // no words to add
)
) { return; }
if (this.options.preserveNewlines) {
const newlinesNumber = this.whitespaceProcessor.countNewlinesNoWords(str);
if (newlinesNumber > 0) {
this._stackItem.inlineTextBuilder.startNewLine(newlinesNumber);
// keep stashedLineBreaks unchanged
return;
}
}
if (this._stackItem.stashedLineBreaks) {
this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
}
this.whitespaceProcessor.shrinkWrapAdd(
str,
this._stackItem.inlineTextBuilder,
(noWordTransform) ? undefined : this._getCombinedWordTransformer(),
this._stackItem.isNoWrap
);
this._stackItem.stashedLineBreaks = 0; // inline text doesn't introduce line breaks
}
/**
* Add a string inline into the currently built block.
*
* Use this for markup elements that don't have to adhere
* to text layout rules.
*
* @param { string } str Text to add.
*/
addLiteral (str) {
if (!(
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
)) { return; }
if (str.length === 0) { return; }
if (this._stackItem.isPre) {
this._stackItem.rawText += str;
return;
}
if (this._stackItem.stashedLineBreaks) {
this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
}
this.whitespaceProcessor.addLiteral(
str,
this._stackItem.inlineTextBuilder,
this._stackItem.isNoWrap
);
this._stackItem.stashedLineBreaks = 0;
}
/**
* Start building a new block.
*
* @param { object } [param0]
* Object holding the parameters of the block.
*
* @param { number } [param0.leadingLineBreaks]
* This block should have at least this number of line breaks to separate it from any preceding block.
*
* @param { number } [param0.reservedLineLength]
* Reserve this number of characters on each line for block markup.
*
* @param { boolean } [param0.isPre]
* Should HTML whitespace be preserved inside this block.
*/
openBlock ({ leadingLineBreaks = 1, reservedLineLength = 0, isPre = false } = {}) {
const maxLineLength = Math.max(20, this._stackItem.inlineTextBuilder.maxLineLength - reservedLineLength);
this._stackItem = new BlockStackItem(
this.options,
this._stackItem,
leadingLineBreaks,
maxLineLength
);
if (isPre) { this._stackItem.isPre = true; }
}
/**
* Finalize currently built block, add it's content to the parent block.
*
* @param { object } [param0]
* Object holding the parameters of the block.
*
* @param { number } [param0.trailingLineBreaks]
* This block should have at least this number of line breaks to separate it from any following block.
*
* @param { (str: string) => string } [param0.blockTransform]
* A function to transform the block text before adding to the parent block.
* This happens after word wrap and should be used in combination with reserved line length
* in order to keep line lengths correct.
* Used for whole block markup.
*/
closeBlock ({ trailingLineBreaks = 1, blockTransform = undefined } = {}) {
const block = this._popStackItem();
const blockText = (blockTransform) ? blockTransform(getText(block)) : getText(block);
addText(this._stackItem, blockText, block.leadingLineBreaks, Math.max(block.stashedLineBreaks, trailingLineBreaks));
}
/**
* Start building a new list.
*
* @param { object } [param0]
* Object holding the parameters of the list.
*
* @param { number } [param0.maxPrefixLength]
* Length of the longest list item prefix.
* If not supplied or too small then list items won't be aligned properly.
*
* @param { 'left' | 'right' } [param0.prefixAlign]
* Specify how prefixes of different lengths have to be aligned
* within a column.
*
* @param { number } [param0.interRowLineBreaks]
* Minimum number of line breaks between list items.
*
* @param { number } [param0.leadingLineBreaks]
* This list should have at least this number of line breaks to separate it from any preceding block.
*/
openList ({ maxPrefixLength = 0, prefixAlign = 'left', interRowLineBreaks = 1, leadingLineBreaks = 2 } = {}) {
this._stackItem = new ListStackItem(this.options, this._stackItem, {
interRowLineBreaks: interRowLineBreaks,
leadingLineBreaks: leadingLineBreaks,
maxLineLength: this._stackItem.inlineTextBuilder.maxLineLength,
maxPrefixLength: maxPrefixLength,
prefixAlign: prefixAlign
});
}
/**
* Start building a new list item.
*
* @param {object} param0
* Object holding the parameters of the list item.
*
* @param { string } [param0.prefix]
* Prefix for this list item (item number, bullet point, etc).
*/
openListItem ({ prefix = '' } = {}) {
if (!(this._stackItem instanceof ListStackItem)) {
throw new Error('Can\'t add a list item to something that is not a list! Check the formatter.');
}
const list = this._stackItem;
const prefixLength = Math.max(prefix.length, list.maxPrefixLength);
const maxLineLength = Math.max(20, list.inlineTextBuilder.maxLineLength - prefixLength);
this._stackItem = new ListItemStackItem(this.options, list, {
prefix: prefix,
maxLineLength: maxLineLength,
leadingLineBreaks: list.interRowLineBreaks
});
}
/**
* Finalize currently built list item, add it's content to the parent list.
*/
closeListItem () {
const listItem = this._popStackItem();
const list = listItem.next;
const prefixLength = Math.max(listItem.prefix.length, list.maxPrefixLength);
const spacing = '\n' + ' '.repeat(prefixLength);
const prefix = (list.prefixAlign === 'right')
? listItem.prefix.padStart(prefixLength)
: listItem.prefix.padEnd(prefixLength);
const text = prefix + getText(listItem).replace(/\n/g, spacing);
addText(
list,
text,
listItem.leadingLineBreaks,
Math.max(listItem.stashedLineBreaks, list.interRowLineBreaks)
);
}
/**
* Finalize currently built list, add it's content to the parent block.
*
* @param { object } param0
* Object holding the parameters of the list.
*
* @param { number } [param0.trailingLineBreaks]
* This list should have at least this number of line breaks to separate it from any following block.
*/
closeList ({ trailingLineBreaks = 2 } = {}) {
const list = this._popStackItem();
const text = getText(list);
if (text) {
addText(this._stackItem, text, list.leadingLineBreaks, trailingLineBreaks);
}
}
/**
* Start building a table.
*/
openTable () {
this._stackItem = new TableStackItem(this._stackItem);
}
/**
* Start building a table row.
*/
openTableRow () {
if (!(this._stackItem instanceof TableStackItem)) {
throw new Error('Can\'t add a table row to something that is not a table! Check the formatter.');
}
this._stackItem = new TableRowStackItem(this._stackItem);
}
/**
* Start building a table cell.
*
* @param { object } [param0]
* Object holding the parameters of the cell.
*
* @param { number } [param0.maxColumnWidth]
* Wrap cell content to this width. Fall back to global wordwrap value if undefined.
*/
openTableCell ({ maxColumnWidth = undefined } = {}) {
if (!(this._stackItem instanceof TableRowStackItem)) {
throw new Error('Can\'t add a table cell to something that is not a table row! Check the formatter.');
}
this._stackItem = new TableCellStackItem(this.options, this._stackItem, maxColumnWidth);
}
/**
* Finalize currently built table cell and add it to parent table row's cells.
*
* @param { object } [param0]
* Object holding the parameters of the cell.
*
* @param { number } [param0.colspan] How many columns this cell should occupy.
* @param { number } [param0.rowspan] How many rows this cell should occupy.
*/
closeTableCell ({ colspan = 1, rowspan = 1 } = {}) {
const cell = this._popStackItem();
const text = trimCharacter(getText(cell), '\n');
cell.next.cells.push({ colspan: colspan, rowspan: rowspan, text: text });
}
/**
* Finalize currently built table row and add it to parent table's rows.
*/
closeTableRow () {
const row = this._popStackItem();
row.next.rows.push(row.cells);
}
/**
* Finalize currently built table and add the rendered text to the parent block.
*
* @param { object } param0
* Object holding the parameters of the table.
*
* @param { TablePrinter } param0.tableToString
* A function to convert a table of stringified cells into a complete table.
*
* @param { number } [param0.leadingLineBreaks]
* This table should have at least this number of line breaks to separate if from any preceding block.
*
* @param { number } [param0.trailingLineBreaks]
* This table should have at least this number of line breaks to separate it from any following block.
*/
closeTable ({ tableToString, leadingLineBreaks = 2, trailingLineBreaks = 2 }) {
const table = this._popStackItem();
const output = tableToString(table.rows);
if (output) {
addText(this._stackItem, output, leadingLineBreaks, trailingLineBreaks);
}
}
/**
* Return the rendered text content of this builder.
*
* @returns { string }
*/
toString () {
return getText(this._stackItem.getRoot());
// There should only be the root item if everything is closed properly.
}
}
function getText (stackItem) {
if (!(
stackItem instanceof BlockStackItem
|| stackItem instanceof ListItemStackItem
|| stackItem instanceof TableCellStackItem
)) {
throw new Error('Only blocks, list items and table cells can be requested for text contents.');
}
return (stackItem.inlineTextBuilder.isEmpty())
? stackItem.rawText
: stackItem.rawText + stackItem.inlineTextBuilder.toString();
}
function addText (stackItem, text, leadingLineBreaks, trailingLineBreaks) {
if (!(
stackItem instanceof BlockStackItem
|| stackItem instanceof ListItemStackItem
|| stackItem instanceof TableCellStackItem
)) {
throw new Error('Only blocks, list items and table cells can contain text.');
}
const parentText = getText(stackItem);
const lineBreaks = Math.max(stackItem.stashedLineBreaks, leadingLineBreaks);
stackItem.inlineTextBuilder.clear();
if (parentText) {
stackItem.rawText = parentText + '\n'.repeat(lineBreaks) + text;
} else {
stackItem.rawText = text;
stackItem.leadingLineBreaks = lineBreaks;
}
stackItem.stashedLineBreaks = trailingLineBreaks;
}
/**
* @param { string } str A string to transform.
* @param { TransformerStackItem } transformer A transformer item (with possible continuation).
* @returns { string }
*/
function applyTransformer (str, transformer) {
return ((transformer) ? applyTransformer(transformer.transform(str), transformer.next) : str);
}
export { BlockTextBuilder };