forked from chipsalliance/verible
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstate_node.cc
401 lines (369 loc) · 16.2 KB
/
state_node.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/formatting/state_node.h"
#include <cstddef>
#include <iterator>
#include <memory>
#include <stack>
#include <vector>
#include "absl/strings/string_view.h"
#include "common/formatting/basic_format_style.h"
#include "common/formatting/format_token.h"
#include "common/formatting/unwrapped_line.h"
#include "common/strings/position.h"
#include "common/strings/range.h"
#include "common/text/token_info.h"
#include "common/util/iterator_adaptors.h"
#include "common/util/iterator_range.h"
#include "common/util/logging.h"
namespace verible {
static const char kNotForAlignment[] =
"Aligned tokens should never use line-wrap optimization!";
static SpacingDecision FrontTokenSpacing(const FormatTokenRange range) {
if (range.empty()) return SpacingDecision::Append;
const SpacingOptions opt = range.front().before.break_decision;
// Treat first token as appended, unless explicitly preserving spaces.
switch (opt) {
case SpacingOptions::Preserve:
return SpacingDecision::Preserve;
case SpacingOptions::AppendAligned:
LOG(FATAL) << kNotForAlignment;
break;
default:
break;
}
return SpacingDecision::Append;
}
StateNode::StateNode(const UnwrappedLine& uwline, const BasicFormatStyle& style)
: prev_state(nullptr),
undecided_path(uwline.TokensRange().begin(), uwline.TokensRange().end()),
spacing_choice(FrontTokenSpacing(uwline.TokensRange())),
// Kludge: This leaks into the resulting FormattedExcerpt, which means
// additional logic is needed to handle preservation of (vertical) spacing
// between formatted token partitions.
current_column(uwline.IndentationSpaces()) {
// The starting column is relative to the current indentation level.
VLOG(4) << "initial column position: " << current_column;
wrap_column_positions.push(current_column + style.wrap_spaces);
if (!uwline.TokensRange().empty()) {
VLOG(4) << "token.text: \'" << undecided_path.front().token->text() << '\'';
// Point undecided_path past the first token.
undecided_path.pop_front();
// Place first token on unwrapped line.
_UpdateColumnPosition();
CHECK_EQ(cumulative_cost, 0);
_OpenGroupBalance(style);
}
VLOG(4) << "root: " << *this;
}
StateNode::StateNode(const std::shared_ptr<const StateNode>& parent,
const BasicFormatStyle& style,
SpacingDecision spacing_choice)
: prev_state(ABSL_DIE_IF_NULL(parent)),
undecided_path(prev_state->undecided_path.begin() + 1, // pop_front()
prev_state->undecided_path.end()),
spacing_choice(spacing_choice),
// current_column to be computed, depending on spacing_choice
cumulative_cost(prev_state->cumulative_cost), // will be adjusted below
wrap_column_positions(prev_state->wrap_column_positions) {
CHECK(!prev_state->Done());
const PreFormatToken& current_format_token(GetCurrentToken());
VLOG(4) << "token.text: \'" << current_format_token.token->text() << '\'';
bool called_open_group_balance = false;
bool called_close_group_balance = false;
if (spacing_choice == SpacingDecision::Wrap) {
// When wrapping and closing a balance group, adjust wrap column stack
// first.
if (current_format_token.balancing == GroupBalancing::Close) {
_CloseGroupBalance();
called_close_group_balance = true;
}
// When wrapping after opening a balance group, adjust wrap column stack
// first.
if (prev_state->spacing_choice == SpacingDecision::Wrap) {
_OpenGroupBalance(style);
called_open_group_balance = true;
}
}
// Update column position and add penalty to the cumulative cost.
const int column_for_penalty = _UpdateColumnPosition();
_UpdateCumulativeCost(style, column_for_penalty);
// Adjusting for open-group is done after updating current column position,
// and is based on the *previous* open-group token, and the
// spacing_choice for *this* token.
if (!called_open_group_balance) {
_OpenGroupBalance(style);
}
// When appending and closing a balance group, adjust wrap column stack last.
if (!called_close_group_balance &&
(current_format_token.balancing == GroupBalancing::Close)) {
_CloseGroupBalance();
}
VLOG(4) << "new state_node: " << *this;
}
const PreFormatToken& StateNode::_GetPreviousToken() const {
CHECK(!ABSL_DIE_IF_NULL(prev_state)->Done());
return prev_state->GetCurrentToken();
}
// Returns the effective column position that should be used for determining
// penalty for going over the column limit. This could be different from
// current_column for multi-line tokens.
int StateNode::_UpdateColumnPosition() {
VLOG(4) << __FUNCTION__ << " spacing decision: " << spacing_choice;
const PreFormatToken& current_format_token(GetCurrentToken());
const int token_length = current_format_token.Length();
{
// Special handling for multi-line tokens.
// Account for the length of text *before* the first newline that might
// overflow the previous line (and should be penalized accordingly).
const auto text = current_format_token.Text();
const auto last_newline_pos = text.find_last_of('\n');
if (last_newline_pos != absl::string_view::npos) {
// There was a newline, it doesn't matter what the wrapping decision was.
// The position is the length of the text after the last newline.
current_column = text.length() - last_newline_pos - 1;
const auto first_newline_pos = text.find_first_of('\n');
if (spacing_choice == SpacingDecision::Wrap) {
// Record the number of spaces preceding this format token because
// it cannot be simply inferred based on current column and
// raw text length.
wrap_multiline_token_spaces_before = wrap_column_positions.top();
return current_column;
}
// Penalize based on the column position that resulted in appending
// text up to the first newline.
if (IsRootState()) {
return first_newline_pos;
} else {
return prev_state->current_column +
current_format_token.before.spaces_required + first_newline_pos;
}
}
}
switch (spacing_choice) {
case SpacingDecision::Align:
LOG(FATAL) << kNotForAlignment;
break;
case SpacingDecision::Wrap:
// If wrapping, new column position is based on the wrap_column_positions
// top-of-stack.
current_column = wrap_column_positions.top() + token_length;
VLOG(4) << "current wrap_position = " << wrap_column_positions.top();
VLOG(4) << "wrapping, current_column is now " << current_column;
break;
case SpacingDecision::Append:
// If appending, new column position is added to previous state's column
// position.
if (!IsRootState()) {
VLOG(4) << " previous column position: " << prev_state->current_column;
current_column = prev_state->current_column +
current_format_token.before.spaces_required +
token_length;
} else {
VLOG(4) << " old column position: " << current_column;
// current_column was already initialized, so just add token length.
current_column += token_length;
}
break;
case SpacingDecision::Preserve: {
const absl::string_view original_spacing_text =
current_format_token.OriginalLeadingSpaces();
// prev_state is null when the first token of the unwrapped line was
// marked as SpacingOptions::Preserve, which indicates that formatting
// was disabled in this range. In this case, we don't really care about
// column position accuracy since we are using original spacing.
current_column =
prev_state ? AdvancingTextNewColumnPosition(
prev_state->current_column, original_spacing_text)
: 0;
current_column += token_length;
VLOG(4) << " new column position (preserved): " << current_column;
break;
}
}
return current_column;
}
void StateNode::_UpdateCumulativeCost(const BasicFormatStyle& style,
int column_for_penalty) {
// This must be called after _UpdateColumnPosition() to account for
// the updated current_column.
// column_for_penalty can be different than current_column in the
// case of multi-line tokens.
// Penalize based on column for penalty.
if (!IsRootState()) {
CHECK_EQ(cumulative_cost, prev_state->cumulative_cost);
}
const PreFormatToken& current_format_token(GetCurrentToken());
if (spacing_choice == SpacingDecision::Wrap) {
// Only incur the penalty for breaking before this token.
// Newly wrapped, so don't bother checking line length and suppress
// penalty if the first token on a line happens to exceed column limit.
cumulative_cost += current_format_token.before.break_penalty;
} else if (spacing_choice == SpacingDecision::Append) {
// Check for line length violation of column_for_penalty, and penalize
// more for each column over the limit.
if (column_for_penalty > style.column_limit) {
cumulative_cost += style.over_column_limit_penalty + column_for_penalty -
style.column_limit;
}
}
// no additional cost if Spacing::Preserve
}
void StateNode::_OpenGroupBalance(const BasicFormatStyle& style) {
VLOG(4) << __FUNCTION__;
// The adjustment to the wrap_column_positions stack based on a token's
// balance type is delayed until we see the token *after*.
// If previous token was an open-group, then update indentation of
// subsequent tokens to line up with the column of the open-group operator.
// Otherwise, it should wrap to the previous state's column position.
//
// Illustrated:
//
// [append-open-group, wrap-next-token]
// ...... (
// ^--- next wrap should line up here
//
// [append-open-group, append-next-token]
// ...... ( ...something...
// ^--- next wrap should line up here
//
// [wrap-open-group, wrap-next-token]
// ......
// (
// ^--- next wrap should line up here
//
// [wrap-open-group, append-next-token]
// ......
// ( ...something...
// ^--- next wrap should line up here
//
// TODO(fangism): what if previous token is open, and new token is close?
// Suppress?
CHECK(!wrap_column_positions.empty());
if (!IsRootState()) {
const PreFormatToken& prev_format_token(_GetPreviousToken());
if (prev_format_token.balancing == GroupBalancing::Open) {
VLOG(4) << "previous token is open-group";
switch (spacing_choice) {
case SpacingDecision::Wrap:
VLOG(4) << "current token is wrapped";
wrap_column_positions.push(prev_state->wrap_column_positions.top() +
style.wrap_spaces);
break;
case SpacingDecision::Align:
LOG(FATAL) << kNotForAlignment;
break;
case SpacingDecision::Append:
VLOG(4) << "current token is appended or aligned";
wrap_column_positions.push(prev_state->current_column);
break;
case SpacingDecision::Preserve:
// TODO(b/134711965): calculate column position using original spaces
break;
}
}
}
// TODO(fangism): what if first token on unwrapped line is open-group?
}
void StateNode::_CloseGroupBalance() {
if (wrap_column_positions.size() > 1) {
// Always maintain at least one element on column position stack.
wrap_column_positions.pop();
}
// TODO(fangism): Align with the corresponding open-group operator,
// assuming its string length is 1, but only when the open-group operator
// has text that follows on the same line.
// This will appear like:
// ... (...
// ...
// ) <-- aligned with (
}
std::shared_ptr<const StateNode> StateNode::AppendIfItFits(
const std::shared_ptr<const StateNode>& current_state,
const verible::BasicFormatStyle& style) {
if (current_state->Done()) return current_state;
const auto& token = current_state->GetNextToken();
// It seems little wasteful to always create both states when only one is
// returned, but compiler optimization should be able to leverage this.
// In any case, this is not a critical path operation, so we're not going to
// worry about it.
const auto wrapped =
std::make_shared<StateNode>(current_state, style, SpacingDecision::Wrap);
const auto appended = std::make_shared<StateNode>(current_state, style,
SpacingDecision::Append);
if (token.before.break_decision == SpacingOptions::MustWrap ||
appended->current_column > style.column_limit) {
return wrapped;
} else {
return appended;
}
}
std::shared_ptr<const StateNode> StateNode::QuickFinish(
const std::shared_ptr<const StateNode>& current_state,
const verible::BasicFormatStyle& style) {
std::shared_ptr<const StateNode> latest(current_state);
// Construct a chain of reference-counted states where the returned pointer
// "holds on" to all of its ancestors like a singly-linked-list.
while (!latest->Done()) {
latest = AppendIfItFits(latest, style);
}
return latest;
}
void StateNode::ReconstructFormatDecisions(FormattedExcerpt* result) const {
// Find all wrap decisions from the greatest ancestor state to this state.
// This is allowed to work on any intermediate state in the search process,
// so the depth can be less than the number of format tokens in the
// UnwrappedLine.
const size_t depth = Depth();
CHECK_LE(depth, result->Tokens().size());
const StateNode* reverse_iter = this;
auto& format_tokens = result->MutableTokens();
const auto format_tokens_slice =
make_range(format_tokens.begin(), format_tokens.begin() + depth);
for (auto& format_token : reversed_view(format_tokens_slice)) {
const auto text = format_token.token->text();
VLOG(3) << "reconstructing: " << text;
// Apply decision at reverse_iter to (formatted) FormatToken.
format_token.before.action = ABSL_DIE_IF_NULL(reverse_iter)->spacing_choice;
if (reverse_iter->wrap_multiline_token_spaces_before >= 0) {
VLOG(3) << " wrapped a multi-line token, leading spaces was: "
<< reverse_iter->wrap_multiline_token_spaces_before;
// This is a special case where a multi-line token was wrapped.
// This number of spaces can only be inferred if the token that was
// wrapped did not contain multi-line text.
// In this case that spacing is not deducible, and had to be recorded.
CHECK_EQ(reverse_iter->spacing_choice, SpacingDecision::Wrap);
format_token.before.spaces =
reverse_iter->wrap_multiline_token_spaces_before;
} else if (reverse_iter->spacing_choice == SpacingDecision::Wrap) {
// Mark as inserting a line break.
// Immediately after a line break, print out the amount of spaces
// required to honor the indentation and wrapping.
format_token.before.spaces = reverse_iter->current_column - text.length();
VLOG(3) << " wrapped, with " << format_token.before.spaces
<< " leading spaces.";
CHECK_GE(format_token.before.spaces, 0);
} // else: no need to calculate before.spaces.
reverse_iter = reverse_iter->next();
}
}
std::ostream& operator<<(std::ostream& stream, const StateNode& state) {
// Omit information about remaining decisions and parent state.
CHECK(!state.wrap_column_positions.empty());
return stream << "spacing:" << state.spacing_choice << // noformat
", col@" << state.current_column << // noformat
", cost=" << state.cumulative_cost << // noformat
", [..." << state.wrap_column_positions.top() << ']';
}
} // namespace verible