forked from chipsalliance/verible
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalign.h
544 lines (473 loc) · 21.8 KB
/
align.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_COMMON_FORMATTING_ALIGN_H_
#define VERIBLE_COMMON_FORMATTING_ALIGN_H_
#include <functional>
#include <vector>
#include "absl/strings/string_view.h"
#include "common/formatting/format_token.h"
#include "common/formatting/token_partition_tree.h"
#include "common/strings/position.h" // for ByteOffsetSet
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h" // for TokenRange
#include "common/text/tree_context_visitor.h"
#include "common/text/tree_utils.h" // for GetRightmostLeaf
#include "common/util/logging.h"
#include "common/util/vector_tree.h"
namespace verible {
// Attributes of columns of text alignment (controlled by developer).
struct AlignmentColumnProperties {
static constexpr int kNoBorderOverride = -1;
// If true format cell with padding to the right: |text |
// else format cell with padding to the left: | text|
bool flush_left = true;
// When set, ignores tokens' left_border and uses this value instead.
// This is propagated to a leftmost subcolumn if the subcolumn's
// left_border_override is lower.
int left_border_override = kNoBorderOverride;
bool contains_delimiter = false;
AlignmentColumnProperties() = default;
explicit constexpr AlignmentColumnProperties(bool flush_left)
: flush_left(flush_left) {}
constexpr AlignmentColumnProperties(bool flush_left, int left_border_override)
: flush_left(flush_left), left_border_override(left_border_override) {}
};
// This object represents a bid for a new column as a row of tokens is scanned.
struct ColumnPositionEntry {
// Establishes total ordering among columns.
// This is used as a key for determining column uniqueness.
SyntaxTreePath path;
// Identifies the token that starts each sparse cell.
TokenInfo starting_token;
// Properties of alignment columns (controlled by developer).
AlignmentColumnProperties properties;
};
using ColumnPositionTree = VectorTree<ColumnPositionEntry>;
std::ostream& operator<<(std::ostream&, const ColumnPositionTree&);
// ColumnSchemaScanner traverses syntax subtrees of similar types and
// collects the positions that wish to register columns for alignment
// consideration.
// This serves as a base class for scanners that mark new columns
// for alignment.
// Subclasses are expected to implement the Visit({node, leaf}) virtual methods
// and call ReserveNewColumn() in locations that want a new columns.
class ColumnSchemaScanner : public TreeContextPathVisitor {
public:
ColumnSchemaScanner()
: sparse_columns_(ColumnPositionTree({{}, TokenInfo::EOFToken(), {}})) {}
// Returns the collection of column position entries.
const ColumnPositionTree& SparseColumns() const { return sparse_columns_; }
protected:
// Returns subpath relative to base_path
static SyntaxTreePath GetSubpath(
const SyntaxTreePath& base_path,
std::initializer_list<SyntaxTreePath::value_type> subpositions) {
auto subpath = base_path;
subpath.insert(subpath.end(), subpositions);
return subpath;
}
// Mark the start of a new column for alignment.
// 'parent_column' is a pointer to the parent column.
// 'symbol' is a reference to the original source syntax subtree.
// 'properties' contains alignment configuration for the column.
// 'path' represents relative position within the enclosing syntax subtree,
// and is used as a key for ordering and matching columns.
// Returns pointer to a created column or nullptr if column was not created.
static ColumnPositionTree* ReserveNewColumn(
ColumnPositionTree* parent_column, const Symbol& symbol,
const AlignmentColumnProperties& properties, const SyntaxTreePath& path);
ColumnPositionTree* ReserveNewColumn(
const Symbol& symbol, const AlignmentColumnProperties& properties,
const SyntaxTreePath& path) {
return ReserveNewColumn(&sparse_columns_, symbol, properties, path);
}
// Reserve a column using the current path as the key.
ColumnPositionTree* ReserveNewColumn(
const Symbol& symbol, const AlignmentColumnProperties& properties) {
return ReserveNewColumn(&sparse_columns_, symbol, properties, Path());
}
// Reserve a subcolumn using subcolumn number appended to the parent's path
// as the key.
static ColumnPositionTree* ReserveNewColumn(
ColumnPositionTree* parent_column, const Symbol& symbol,
const AlignmentColumnProperties& properties) {
CHECK_NOTNULL(parent_column);
const SyntaxTreePath::value_type subindex =
parent_column->Children().size();
const auto subpath = GetSubpath(parent_column->Value().path, {subindex});
return ReserveNewColumn(parent_column, symbol, properties, subpath);
}
private:
// Keeps track of unique positions where new columns are desired.
// This is a tree root and its value is not actually used.
ColumnPositionTree sparse_columns_;
};
// This enum signals to the GetPartitionAlignmentSubranges() function
// how a token partition should be included or excluded in partition groups.
enum class AlignmentGroupAction {
kIgnore, // This does not influence the current matching range.
kMatch, // Include this partition in the current matching range.
kNoMatch, // Close the current matching range (if any).
};
// This struct drives partition sub-range selection in the
// GetPartitionAlignmentSubranges() function.
struct AlignedPartitionClassification {
AlignmentGroupAction action;
// Matches that differ in subtype will also mark alignment group boundaries.
// These values are up to the user's interpretation. They are only checked
// for equality in decision-making.
// For example, when scanning a token partition that contains different
// syntax groups, like assignments and function calls, the caller could
// enumerate these subtypes 1 and 2, respectively, to distinguish the
// alignable groups that are returned.
// In situations where there can be only one alignment group subtype,
// this value does not matter.
int match_subtype;
};
struct TaggedTokenPartitionRange {
TokenPartitionRange range;
// Same as that in AlignedPartitionClassification::match_subtype.
int match_subtype;
TaggedTokenPartitionRange(TokenPartitionRange r, int subtype)
: range(r), match_subtype(subtype) {}
TaggedTokenPartitionRange(TokenPartitionIterator begin,
TokenPartitionIterator end, int subtype)
: range(begin, end), match_subtype(subtype) {}
};
// Adapter function for extracting ranges of tokens that represent the same type
// of group to align (same syntax).
std::vector<TaggedTokenPartitionRange>
GetSubpartitionsBetweenBlankLinesSingleTag(
const TokenPartitionRange& full_range, int subtype);
// From a range of token 'partitions', this selects sub-ranges to align.
// 'partition_selector' decides which partitions qualify for alignment.
// When there is a match, the AlignedPartitionClassification::match_subtype
// is also compared: if it matches, continue to grow the current
// TaggedTokenPartitionRange, and if it doesn't match, start a new one.
// 'min_match_count' sets the minimum sub-range size to return.
//
// Visualization from 'partition_selector's perspective:
//
// case 1:
// nomatch
// match // not enough matches to yield a group for min_match_count=2
// nomatch
//
// case 2:
// nomatch
// match // an alignment group starts here
// match // continues as long as subtype is the same
// match // ends here, inclusively
// nomatch
//
// case 3:
// nomatch
// match // an alignment group starts here
// ignore // ... continues ...
// match // ends here, inclusively
// nomatch
//
// case 4:
// nomatch
// match (A) // start group subtype A
// match (A) // continue group
// match (B) // finish previous group, start group subtype B
// match (B) // continue group
//
std::vector<TaggedTokenPartitionRange> GetPartitionAlignmentSubranges(
const TokenPartitionRange& partitions,
const std::function<AlignedPartitionClassification(
const TokenPartitionTree&)>& partition_selector,
int min_match_count = 2);
// This is the interface used to extract alignment cells from ranges of tokens.
// Note that it is not required to use a ColumnSchemaScanner.
using AlignmentCellScannerFunction =
std::function<ColumnPositionTree(const TokenPartitionTree&)>;
// For sections of code that are deemed alignable, this enum controls
// the formatter behavior.
enum class AlignmentPolicy {
// Preserve text as-is.
kPreserve,
// No-align: flush text to left while obeying spacing constraints
kFlushLeft,
// Attempt tabular alignment.
kAlign,
// Infer whether user wanted flush-left or alignment, based on original
// spacing.
kInferUserIntent,
};
std::ostream& operator<<(std::ostream&, AlignmentPolicy);
bool AbslParseFlag(absl::string_view text, AlignmentPolicy* policy,
std::string* error);
std::string AbslUnparseFlag(const AlignmentPolicy& policy);
// This represents one unit of alignable work, which is usually a filtered
// subset of partitions within a contiguous range of partitions.
class AlignablePartitionGroup {
public:
AlignablePartitionGroup(const std::vector<TokenPartitionIterator>& rows,
const AlignmentCellScannerFunction& scanner,
AlignmentPolicy policy)
: alignable_rows_(rows),
alignment_cell_scanner_(scanner),
alignment_policy_(policy) {}
bool IsEmpty() const { return alignable_rows_.empty(); }
TokenPartitionRange Range() const {
return TokenPartitionRange(alignable_rows_.front(),
alignable_rows_.back() + 1);
}
// This executes alignment, depending on the alignment_policy.
// 'column_limit' is the maximum text width allowed post-alignment.
void Align(int column_limit) const;
private:
struct GroupAlignmentData;
static GroupAlignmentData CalculateAlignmentSpacings(
const std::vector<TokenPartitionIterator>& rows,
const AlignmentCellScannerFunction& cell_scanner_gen, int column_limit);
void ApplyAlignment(const GroupAlignmentData& align_data) const;
private:
// The set of partitions to treat as rows for tabular alignment.
const std::vector<TokenPartitionIterator> alignable_rows_;
// This function scans each row to identify column positions and properties of
// alignable cells (containing token ranges).
const AlignmentCellScannerFunction alignment_cell_scanner_;
// Controls how this group should be aligned or flushed or preserved.
const AlignmentPolicy alignment_policy_;
};
// This is the interface used to sub-divide a range of token partitions into
// a sequence of sub-ranges for the purposes of formatting aligned groups.
using ExtractAlignmentGroupsFunction =
std::function<std::vector<AlignablePartitionGroup>(
const TokenPartitionRange&)>;
// This predicate function is used to select partitions to be ignored within
// an alignment group. For example, one may wish to ignore comment-only lines.
using IgnoreAlignmentRowPredicate =
std::function<bool(const TokenPartitionTree&)>;
// Select subset of iterators inside a partition range that are not ignored
// by the predicate.
std::vector<TokenPartitionIterator> FilterAlignablePartitions(
const TokenPartitionRange& range,
const IgnoreAlignmentRowPredicate& ignore_partition_predicate);
// This adapter composes several functions for alignment (legacy interface) into
// one used in the current interface. This exists to help migrate existing code
// to the new interface.
// This is only useful when all of the AlignablePartitionGroups want to be
// handled the same way using the same AlignmentCellScannerFunction and
// AlignmentPolicy.
// TODO(fangism): phase this out this interface by rewriting
// TabularAlignTokens().
ExtractAlignmentGroupsFunction ExtractAlignmentGroupsAdapter(
const std::function<std::vector<TaggedTokenPartitionRange>(
const TokenPartitionRange&)>& legacy_extractor,
const IgnoreAlignmentRowPredicate& legacy_ignore_predicate,
const AlignmentCellScannerFunction& alignment_cell_scanner,
AlignmentPolicy alignment_policy);
// Instantiates a ScannerType (implements ColumnSchemaScanner) and extracts
// column alignment information, suitable as an AlignmentCellScannerFunction.
// A 'row' corresponds to a range of format tokens over which spacing is to be
// adjusted to achieve alignment.
// Returns a sequence of column entries that will be uniquified and ordered
// for alignment purposes.
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells(
const TokenPartitionTree& row,
const std::function<ScannerType(void)>& scanner_factory) {
const UnwrappedLine& unwrapped_line = row.Value();
// Walk the original syntax tree that spans a subset of the tokens spanned by
// this 'row', and detect the sparse set of columns found by the scanner.
ScannerType scanner = scanner_factory();
const Symbol* origin = unwrapped_line.Origin();
if (origin != nullptr) origin->Accept(&scanner);
return scanner.SparseColumns();
}
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells(
const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells<ScannerType>(
row, [] { return ScannerType(); });
}
using NonTreeTokensScannerFunction = std::function<void(
FormatTokenRange, FormatTokenRange, ColumnPositionTree*)>;
// Similarly to the function above this function creates an instance of
// ScannerType and extracts column alignment information. Firstly it reuses
// existing column scanner for extracting column alignment information from
// SyntaxTree (which doesn't contain delimiters and comments) and after that it
// examines TokenPartitionTree to detect comments and delimiters and extract
// information for alignment with non_tree_column_scanner function.
// A 'row' corresponds to a range of format tokens over which spacing is to be
// adjusted to achieve alignment.
// A 'non_tree_column_scanner' is a function that scans non-tree tokens in a
// line (like comments) and creates columns for them.
// Returns a concatenated sequence of column entries for tokens from SyntaxTree
// and TokenPartitionTree that will be uniquified and ordered for alignment
// purposes.
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells_WithNonTreeTokens(
const TokenPartitionTree& row,
const std::function<ScannerType(void)>& scanner_factory,
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
// re-use existing scanner
ColumnPositionTree column_entries =
ScanPartitionForAlignmentCells<ScannerType>(row, scanner_factory);
const UnwrappedLine& unwrapped_line = row.Value();
const auto ftokens = unwrapped_line.TokensRange();
const Symbol* origin = unwrapped_line.Origin();
FormatTokenRange leading_tokens(ftokens.begin(), ftokens.begin());
FormatTokenRange trailing_tokens(ftokens.end(), ftokens.end());
if (origin != nullptr) {
// Identify the last token covered by the origin tree.
const SyntaxTreeLeaf* first_leaf = GetLeftmostLeaf(*origin);
const SyntaxTreeLeaf* last_leaf = GetRightmostLeaf(*origin);
CHECK_NOTNULL(first_leaf);
CHECK_NOTNULL(last_leaf);
const TokenInfo& first_tree_token = first_leaf->get();
const TokenInfo& last_tree_token = last_leaf->get();
// Collect tokens excluded from SyntaxTree (delimiters and comments)
CHECK(!ftokens.empty());
CHECK_LE(ftokens.front().Text().begin(), first_tree_token.text().begin());
CHECK_GE(ftokens.back().Text().end(), last_tree_token.text().end());
FormatTokenRange::const_iterator ftoken_it = ftokens.begin();
// Find leading non-tree tokens range end
while (*(ftoken_it->token) != first_tree_token) ++ftoken_it;
leading_tokens.set_end(ftoken_it);
const auto first_tree_token_it = ftoken_it;
// Skip tree tokens. Non-tree tokens located between tree tokens (e.g. block
// comments) are also skipped.
while (*(ftoken_it->token) != last_tree_token) ++ftoken_it;
// Use next token as begining of trailing non-tree tokens
trailing_tokens.set_begin(ftoken_it + 1);
// Breaking following condition leads to e.g. concatenation of EOL comment
// and code in a single line. To fix situation that lead to this, flatten
// token partitions that contain EOL comment subpartition just before a
// subpartition that starts with the same token as Origin(). Example of a
// partition that needs flattening:
//
// { (>>[...], (origin: "input bit second"))
// { (>>[// comment] }
// { (>>[input bit second], (origin: "input")) }
// }
CHECK(leading_tokens.empty() || first_tree_token_it == ftokens.end() ||
first_tree_token_it->before.break_decision !=
SpacingOptions::MustWrap);
} else {
// All tokens are passed as leading
leading_tokens.set_end(ftokens.end());
}
non_tree_column_scanner(leading_tokens, trailing_tokens, &column_entries);
return column_entries;
}
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells_WithNonTreeTokens(
const TokenPartitionTree& row,
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>(
row, [] { return ScannerType(); }, non_tree_column_scanner);
}
// Convenience function for generating alignment cell scanners.
// This can be useful for constructing maps of scanners based on type.
//
// Example:
// static const auto* kAlignHandlers =
// new std::map<NodeEnum, verible::AlignmentCellScannerFunction>{
// {NodeEnum::kTypeA,
// AlignmentCellScannerGenerator<TypeA_ColumnSchemaScanner>()},
// {NodeEnum::kTypeB,
// AlignmentCellScannerGenerator<TypeB_ColumnSchemaScanner>()},
// ...
// };
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator() {
return [](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells<ScannerType>(row);
};
}
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator(
const std::function<ScannerType(void)>& scanner_factory) {
return [scanner_factory](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells<ScannerType>(row, scanner_factory);
};
}
// Overloaded function for generating alignment cell scanners. This adapter
// accepts a trailing token scanner function for aligning delimiters and
// comments.
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator(
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
return [non_tree_column_scanner](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>(
row, non_tree_column_scanner);
};
}
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator(
const std::function<ScannerType(void)> scanner_factory,
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
return [scanner_factory,
non_tree_column_scanner](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>(
row, scanner_factory, non_tree_column_scanner);
};
}
// Converts partitions from 'partition_range' into partitions with
// kAlreadyFormatted/kInline policies that emulate original spacing of token
// range spanned by them.
void FormatUsingOriginalSpacing(TokenPartitionRange partition_range);
// This aligns sections of text by modifying the spacing between tokens.
// 'partition_ptr' is a partition that can span one or more sections of
// code to align. The partitions themselves are not reshaped, however,
// the inter-token spacing of tokens spanned by these partitions can be
// modified.
// 'extract_alignment_groups' is a function that returns groups of token
// partitions to align along with their column extraction functions.
// (See AlignablePartitionGroup.)
//
// How it works:
// Let a 'line' be a unit of text to be aligned.
// Groups of lines are aligned together, as if their contents were table cells.
// Vertical alignment is achieved by sizing each column in the table to
// the max cell width in each column, and padding spaces as necessary.
//
// Other parameters:
// 'full_text' is the string_view buffer of whole text being formatted, not just
// the text spanned by 'partition_ptr'.
// 'ftokens' points to the array of PreFormatTokens that spans 'full_text'.
// 'disabled_byte_ranges' contains information about which ranges of text
// are to preserve their original spacing (no-formatting).
// 'column_limit' is the column width beyond which the aligner should fallback
// to a safer action, e.g. refusing to align and leaving spacing untouched.
//
// Illustrated example:
// The following text:
//
// aaa bb[11][22]
// ccc[33] dd[444]
//
// could be arranged into a table (| for column delimiters):
//
// aaa | bb | [11] |[22]
// ccc[33] | dd | [444] |
//
// and assuming one space of padding between columns,
// and with every column flushed-left, result in:
//
// aaa bb [11] [22]
// ccc[33] dd [444]
//
void TabularAlignTokens(
int column_limit, absl::string_view full_text,
const ByteOffsetSet& disabled_byte_ranges,
const ExtractAlignmentGroupsFunction& extract_alignment_groups,
TokenPartitionTree* partition_ptr);
} // namespace verible
#endif // VERIBLE_COMMON_FORMATTING_ALIGN_H_