Format multi-line strings and string interpolation. (#1362)

Format multi-line strings and string interpolation. In the old style, the formatter has some special code to discard line splits that occur inside string interpolation expressions. That's largely for historical reasons because the formatter initially didn't support formatting of string interpolation expressions *at all* and I didn't want too much churn when adding support for formatting them. In the new style here, we don't do that: The contents of a string interpolation expression are split like any other expression. In practice, it doesn't matter much since users generally reorganize their code to avoid long strings and splits in string interpolation. This way leads to less special case code in the formatter. This change is somewhat large because I also reorganized how newlines inside lexemes are handled in general. Previously, TextPiece stored a list of "lines" to handle things like line comments preceding or following a token. But it was also possible for a single "line" string in that list to internally contain newline characters because of multi-line strings or block comments. But those internal newlines also need to force surrounding code to split, so there was this "_containsNewline" bit that had to be plumbed through and tracked. Even so, there were latent bugs where the column calculation in CodeWriter would be incorrect if a line contained internal newlines because it just used to the length of the entire "line" string. With this change, the "_lines" list in TextPiece really is a list of lines. We eagerly split any incoming lexeme into multiple lines before writing it to the TextPiece. I think the resulting code is simpler, it fixes the column calculation in CodeWriter, and it means the formatter will correctly normalize line endings even when they occur inside block comments or multiline strings. This was a good time to test the line ending code, so I copied those existing tests over from short_format_test.dart. I went ahead and copied all of the unit tests from that file, even the ones not related to line endings, since they're all working and passing now. This PR does *not* handle adjacent strings. Those have a decent amount of special handling not related to what's going on here, so I'll do those separately.
dart-lang · Jan 23, 2024 · 94f81dd · 94f81dd
1 parent b1b0481
commit 94f81dd
Show file tree

Hide file tree

Showing 12 changed files with 606 additions and 73 deletions.
diff --git a/lib/src/ast_extensions.dart b/lib/src/ast_extensions.dart
@@ -156,14 +156,6 @@ extension ExpressionExtensions on Expression {
       expression = expression.expression;
     }
 
-    // TODO(tall): We should also allow multi-line strings to be formatted
-    // like block arguments, at least in some cases like:
-    //
-    //     function('''
-    //       Lots of
-    //       text
-    //     ''');
-
     // TODO(tall): Consider whether immediately-invoked function expressions
     // should be block argument candidates, like:
     //
@@ -177,16 +169,26 @@ extension ExpressionExtensions on Expression {
         parameters.parameters.canSplit(parameters.rightParenthesis) ||
             (body is BlockFunctionBody &&
                 body.block.statements.canSplit(body.block.rightBracket)),
+
+      // Non-empty collection literals can block split.
       ListLiteral(:var elements, :var rightBracket) ||
       SetOrMapLiteral(:var elements, :var rightBracket) =>
         elements.canSplit(rightBracket),
       RecordLiteral(:var fields, :var rightParenthesis) =>
         fields.canSplit(rightParenthesis),
       SwitchExpression(:var cases, :var rightBracket) =>
         cases.canSplit(rightBracket),
+
+      // Function calls can block split if their argument lists can.
       InstanceCreationExpression(:var argumentList) ||
       MethodInvocation(:var argumentList) =>
         argumentList.arguments.canSplit(argumentList.rightParenthesis),
+
+      // Multi-line strings can.
+      StringInterpolation(isMultiline: true) => true,
+      SimpleStringLiteral(isMultiline: true) => true,
+
+      // Parenthesized expressions can if the inner one can.
       ParenthesizedExpression(:var expression) => expression.canBlockSplit,
       _ => false,
     };

diff --git a/lib/src/back_end/code_writer.dart b/lib/src/back_end/code_writer.dart
@@ -128,22 +128,6 @@ class CodeWriter {
         isValid: !_hasInvalidNewline, overflow: _overflow, cost: _cost);
   }
 
-  /// Notes that a newline has been written.
-  ///
-  /// If this occurs in a place where newlines are prohibited, then invalidates
-  /// the solution.
-  ///
-  /// This is called externally by [TextPiece] to let the writer know some of
-  /// the raw text contains a newline, which can happen in multi-line block
-  /// comments and multi-line string literals.
-  void handleNewline() {
-    if (!_options.allowNewlines) _hasInvalidNewline = true;
-
-    // Note that this piece contains a newline so that we can propagate that
-    // up to containing pieces too.
-    _options.hasNewline = true;
-  }
-
   /// Appends [text] to the output.
   ///
   /// If [text] contains any internal newlines, the caller is responsible for
@@ -206,16 +190,30 @@ class CodeWriter {
   ///
   /// If [indent] is given, set the indentation of the new line (and all
   /// subsequent lines) to that indentation relative to the containing piece.
-  void newline({bool blank = false, int? indent}) {
+  ///
+  /// If [flushLeft] is `true`, then the new line begins at column 1 and ignores
+  /// any surrounding indentation. This is used for multi-line block comments
+  /// and multi-line strings.
+  void newline({bool blank = false, int? indent, bool flushLeft = false}) {
     if (indent != null) setIndent(indent);
 
-    whitespace(blank ? Whitespace.blankLine : Whitespace.newline);
+    whitespace(blank ? Whitespace.blankLine : Whitespace.newline,
+        flushLeft: flushLeft);
   }
 
-  void whitespace(Whitespace whitespace) {
+  /// Queues [whitespace] to be written to the output.
+  ///
+  /// If any non-whitespace is written after this call, then this whitespace
+  /// will be written first. Also handles merging multiple kinds of whitespace
+  /// intelligently together.
+  ///
+  /// If [flushLeft] is `true`, then the new line begins at column 1 and ignores
+  /// any surrounding indentation. This is used for multi-line block comments
+  /// and multi-line strings.
+  void whitespace(Whitespace whitespace, {bool flushLeft = false}) {
     if (whitespace case Whitespace.newline || Whitespace.blankLine) {
-      handleNewline();
-      _pendingIndent = _options.indent;
+      _handleNewline();
+      _pendingIndent = flushLeft ? 0 : _options.indent;
     }
 
     _pendingWhitespace = _pendingWhitespace.collapse(whitespace);
@@ -248,9 +246,7 @@ class CodeWriter {
     var childOptions = _pieceOptions.removeLast();
 
     // If the child [piece] contains a newline then this one transitively does.
-    // TODO(tall): At some point, we may want to provide an API so that pieces
-    // can block this from propagating outward.
-    if (childOptions.hasNewline) handleNewline();
+    if (childOptions.hasNewline) _handleNewline();
   }
 
   /// Format [piece] if not null.
@@ -274,6 +270,18 @@ class CodeWriter {
     _selectionEnd = _buffer.length + end;
   }
 
+  /// Notes that a newline has been written.
+  ///
+  /// If this occurs in a place where newlines are prohibited, then invalidates
+  /// the solution.
+  void _handleNewline() {
+    if (!_options.allowNewlines) _hasInvalidNewline = true;
+
+    // Note that this piece contains a newline so that we can propagate that
+    // up to containing pieces too.
+    _options.hasNewline = true;
+  }
+
   /// Write any pending whitespace.
   ///
   /// This is called before non-whitespace text is about to be written, or

diff --git a/lib/src/front_end/ast_node_visitor.dart b/lib/src/front_end/ast_node_visitor.dart
@@ -1079,12 +1079,17 @@ class AstNodeVisitor extends ThrowingAstVisitor<Piece> with PieceFactory {
 
   @override
   Piece visitInterpolationExpression(InterpolationExpression node) {
-    throw UnimplementedError();
+    return buildPiece((b) {
+      b.token(node.leftBracket);
+      b.visit(node.expression);
+      b.token(node.rightBracket);
+    });
   }
 
   @override
   Piece visitInterpolationString(InterpolationString node) {
-    throw UnimplementedError();
+    return pieces.stringLiteralPiece(node.contents,
+        isMultiline: (node.parent as StringInterpolation).isMultiline);
   }
 
   @override
@@ -1530,7 +1535,8 @@ class AstNodeVisitor extends ThrowingAstVisitor<Piece> with PieceFactory {
 
   @override
   Piece visitSimpleStringLiteral(SimpleStringLiteral node) {
-    return tokenPiece(node.literal);
+    return pieces.stringLiteralPiece(node.literal,
+        isMultiline: node.isMultiline);
   }
 
   @override
@@ -1543,7 +1549,11 @@ class AstNodeVisitor extends ThrowingAstVisitor<Piece> with PieceFactory {
 
   @override
   Piece visitStringInterpolation(StringInterpolation node) {
-    throw UnimplementedError();
+    return buildPiece((b) {
+      for (var element in node.elements) {
+        b.visit(element);
+      }
+    });
   }
 
   @override

diff --git a/lib/src/front_end/piece_writer.dart b/lib/src/front_end/piece_writer.dart
@@ -11,6 +11,9 @@ import '../piece/piece.dart';
 import '../source_code.dart';
 import 'comment_writer.dart';
 
+/// RegExp that matches any valid Dart line terminator.
+final _lineTerminatorPattern = RegExp(r'\r\n?|\n');
+
 /// Builds [TextPiece]s for [Token]s and comments.
 ///
 /// Handles updating selection markers and attaching comments to the tokens
@@ -69,6 +72,22 @@ class PieceWriter {
     return tokenPiece;
   }
 
+  /// Creates a piece for a simple or interpolated string [literal].
+  ///
+  /// Handles splitting it into multiple lines in the resulting [TextPiece] if
+  /// [isMultiline] is `true`.
+  Piece stringLiteralPiece(Token literal, {required bool isMultiline}) {
+    if (!isMultiline) return tokenPiece(literal);
+
+    if (!_writeCommentsBefore(literal)) {
+      // We want this token to be in its own TextPiece, so if the comments
+      // didn't already lead to ending the previous TextPiece than do so now.
+      _currentText = TextPiece();
+    }
+
+    return _writeMultiLine(literal.lexeme, offset: literal.offset);
+  }
+
   // TODO(tall): Much of the comment handling code in CommentWriter got moved
   // into here, so there isn't great separation of concerns anymore. Can we
   // organize this code better? Or just combine CommentWriter with this class
@@ -95,9 +114,7 @@ class PieceWriter {
   Piece writeComment(SourceComment comment) {
     _currentText = TextPiece();
 
-    _write(comment.text,
-        offset: comment.offset, containsNewline: comment.text.contains('\n'));
-    return _currentText;
+    return _writeMultiLine(comment.text, offset: comment.offset);
   }
 
   /// Writes all of the comments that appear between [token] and the previous
@@ -146,8 +163,7 @@ class PieceWriter {
         _currentText.newline();
       }
 
-      _write(comment.text,
-          offset: comment.offset, containsNewline: comment.text.contains('\n'));
+      _write(comment.text, offset: comment.offset);
     }
 
     // Output a trailing newline after the last comment if it needs one.
@@ -180,14 +196,34 @@ class PieceWriter {
       _currentText = TextPiece();
     }
 
-    _write(lexeme ?? token.lexeme, offset: token.offset);
+    lexeme ??= token.lexeme;
+
+    _write(lexeme, offset: token.offset);
+  }
+
+  /// Writes multi-line [text] to the current [TextPiece].
+  ///
+  /// Handles breaking [text] into lines and adding them to the [TextPiece].
+  ///
+  /// The [offset] parameter is the offset in the original source code of the
+  /// beginning of multi-line lexeme.
+  Piece _writeMultiLine(String text, {required int offset}) {
+    var lines = text.split(_lineTerminatorPattern);
+    var currentOffset = offset;
+    for (var i = 0; i < lines.length; i++) {
+      if (i > 0) _currentText.newline(flushLeft: true);
+      _write(lines[i], offset: currentOffset);
+      currentOffset += lines[i].length;
+    }
+
+    return _currentText;
   }
 
   /// Writes [text] to the current [TextPiece].
   ///
   /// If [offset] is given and it contains any selection markers, then attaches
   /// those markers to the [TextPiece].
-  void _write(String text, {bool containsNewline = false, int? offset}) {
+  void _write(String text, {int? offset}) {
     if (offset != null) {
       // If this text contains any of the selection endpoints, note their
       // relative locations in the text piece.
@@ -200,7 +236,7 @@ class PieceWriter {
       }
     }
 
-    _currentText.append(text, containsNewline: containsNewline);
+    _currentText.append(text);
   }
 
   /// Finishes writing and returns a [SourceCode] containing the final output