Skip to content

Commit 5aa5558

Browse files
authored
Merge pull request #3023 from rintaro/parser-contetual-keyword-prefixed
[Parser] Add 'atContextualKeywordPrefixedSyntax' method
2 parents d9fabf7 + ebf79c3 commit 5aa5558

File tree

4 files changed

+149
-140
lines changed

4 files changed

+149
-140
lines changed

Sources/SwiftParser/Expressions.swift

+6-40
Original file line numberDiff line numberDiff line change
@@ -381,20 +381,6 @@ extension Parser {
381381
}
382382
}
383383

384-
/// Whether the current token is a valid contextual exprssion modifier like
385-
/// `copy`, `consume`.
386-
///
387-
/// `copy` etc. are only contextually a keyword if they are followed by an
388-
/// identifier or keyword on the same line. We do this to ensure that we do
389-
/// not break any copy functions defined by users.
390-
private mutating func atContextualExpressionModifier() -> Bool {
391-
return self.peek(
392-
isAt: TokenSpec(.identifier, allowAtStartOfLine: false),
393-
TokenSpec(.dollarIdentifier, allowAtStartOfLine: false),
394-
TokenSpec(.self, allowAtStartOfLine: false)
395-
)
396-
}
397-
398384
/// Parse an expression sequence element.
399385
mutating func parseSequenceExpressionElement(
400386
flavor: ExprFlavor,
@@ -445,27 +431,7 @@ extension Parser {
445431
)
446432
)
447433
case (.unsafe, let handle)?:
448-
if self.peek().isAtStartOfLine
449-
// Closing paired syntax
450-
|| self.peek(isAt: .rightParen, .rightSquare, .rightBrace)
451-
// Assignment
452-
|| self.peek(isAt: .equal)
453-
// As an argument label or in a list context.
454-
|| self.peek(isAt: .colon, .comma)
455-
// Start of a closure in a context where it should be interpreted as
456-
// being part of a statement.
457-
|| (flavor == .stmtCondition && self.peek(isAt: .leftBrace))
458-
// Avoid treating as an "unsafe" expression when there is no trivia
459-
// following the "unsafe" and the following token could either be a
460-
// postfix expression or a subexpression:
461-
// - Member access vs. leading .
462-
// - Call vs. tuple expression.
463-
// - Subscript vs. array or dictionary expression
464-
|| (self.peek(isAt: .period, .leftParen, .leftSquare) && self.peek().leadingTriviaByteLength == 0
465-
&& self.currentToken.trailingTriviaByteLength == 0)
466-
// End of file
467-
|| self.peek(isAt: .endOfFile)
468-
{
434+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor, acceptClosure: true, preferPostfixExpr: false) {
469435
break EXPR_PREFIX
470436
}
471437

@@ -486,7 +452,7 @@ extension Parser {
486452
assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings))
487453
fallthrough
488454
case (.borrow, let handle)?:
489-
if !atContextualExpressionModifier() {
455+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
490456
break EXPR_PREFIX
491457
}
492458
let borrowTok = self.eat(handle)
@@ -503,7 +469,7 @@ extension Parser {
503469
)
504470

505471
case (.copy, let handle)?:
506-
if !atContextualExpressionModifier() {
472+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
507473
break EXPR_PREFIX
508474
}
509475

@@ -524,7 +490,7 @@ extension Parser {
524490
assert(self.experimentalFeatures.contains(.oldOwnershipOperatorSpellings))
525491
fallthrough
526492
case (.consume, let handle)?:
527-
if !atContextualExpressionModifier() {
493+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
528494
break EXPR_PREFIX
529495
}
530496

@@ -546,7 +512,7 @@ extension Parser {
546512
return RawExprSyntax(parsePackExpansionExpr(repeatHandle: handle, flavor: flavor, pattern: pattern))
547513

548514
case (.each, let handle)?:
549-
if !atContextualExpressionModifier() {
515+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) {
550516
break EXPR_PREFIX
551517
}
552518

@@ -561,7 +527,7 @@ extension Parser {
561527
)
562528

563529
case (.any, _)?:
564-
if !atContextualExpressionModifier() && !self.peek().isContextualPunctuator("~") {
530+
if !atContextualKeywordPrefixedSyntax(exprFlavor: flavor) && !self.peek().isContextualPunctuator("~") {
565531
break EXPR_PREFIX
566532
}
567533

Sources/SwiftParser/Statements.swift

+133-79
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,128 @@ extension Parser {
900900
}
901901
}
902902

903+
extension TokenConsumer {
904+
/// Disambiguate the word at the cursor looks like a keyword-prefixed syntax.
905+
///
906+
/// - Parameters:
907+
/// - exprFlavor: The expression context. When using this function for a statement, e.g. 'yield',
908+
/// use `.basic`.
909+
/// - acceptClosure: When the next token is '{' and it looks like a closure, use this value as the result.
910+
/// - preferPostfixExpr: When the next token is '.', '(', or '[' and there is a space between the word,
911+
/// use `!preferPostfixExpr` as the result.
912+
/// - allowNextLineOperand: Whether the keyword-prefixed syntax accepts the operand on the next line.
913+
mutating func atContextualKeywordPrefixedSyntax(
914+
exprFlavor: Parser.ExprFlavor,
915+
acceptClosure: Bool = false,
916+
preferPostfixExpr: Bool = true,
917+
allowNextLineOperand: Bool = false
918+
) -> Bool {
919+
let next = peek()
920+
921+
// The next token must be at the same line.
922+
if next.isAtStartOfLine && !allowNextLineOperand {
923+
return false
924+
}
925+
926+
switch next.rawTokenKind {
927+
928+
case .identifier, .dollarIdentifier, .wildcard:
929+
// E.g. <word> foo
930+
return true
931+
932+
case .integerLiteral, .floatLiteral,
933+
.stringQuote, .multilineStringQuote, .singleQuote, .rawStringPoundDelimiter,
934+
.regexSlash, .regexPoundDelimiter:
935+
// E.g. <word> 1
936+
return true
937+
938+
case .prefixAmpersand, .prefixOperator, .atSign, .backslash, .pound:
939+
// E.g. <word> !<expr>
940+
return true
941+
942+
case .keyword:
943+
// Some lexer-classified keywords can start expressions.
944+
switch Keyword(next.tokenText) {
945+
case .Any, .Self, .self, .super, .`init`, .true, .false, .nil:
946+
return true
947+
case .repeat, .try:
948+
return true
949+
case .if, .switch:
950+
return true
951+
case .do where self.experimentalFeatures.contains(.doExpressions):
952+
return true
953+
954+
default:
955+
return false
956+
}
957+
958+
case .binaryOperator, .equal, .arrow, .infixQuestionMark:
959+
// E.g. <word> != <expr>
960+
return false
961+
case .postfixOperator, .postfixQuestionMark, .exclamationMark, .ellipsis:
962+
// E.g. <word>++
963+
return false
964+
case .rightBrace, .rightParen, .rightSquare:
965+
// E.g. <word>]
966+
return false
967+
case .colon, .comma:
968+
// E.g. <word>,
969+
return false
970+
case .semicolon, .endOfFile, .poundElse, .poundElseif, .poundEndif:
971+
return false
972+
973+
case .leftAngle, .rightAngle:
974+
// Lexer never produce these token kinds.
975+
return false
976+
977+
case .stringSegment, .regexLiteralPattern:
978+
// Calling this function inside a string/regex literal?
979+
return false
980+
981+
case .backtick, .poundAvailable, .poundUnavailable,
982+
.poundSourceLocation, .poundIf, .shebang, .unknown:
983+
// These are invalid for both cases
984+
// E.g. <word> #available
985+
return false
986+
987+
case .period, .leftParen, .leftSquare:
988+
// These are truly ambiguous. They can be both start of postfix expression
989+
// suffix or start of primary expression:
990+
//
991+
// - Member access vs. implicit member expression
992+
// - Call vs. tuple expression
993+
// - Subscript vs. collection literal
994+
//
995+
if preferPostfixExpr {
996+
return false
997+
}
998+
999+
// If there's no space between the tokens, consider it's an expression.
1000+
// Otherwise, it looks like a keyword followed by an expression.
1001+
return (next.leadingTriviaByteLength + currentToken.trailingTriviaByteLength) != 0
1002+
1003+
case .leftBrace:
1004+
// E.g. <word> { ... }
1005+
// Trailing closure is also ambiguous:
1006+
//
1007+
// - Trailing closure vs. immediately-invoked closure
1008+
//
1009+
if !acceptClosure {
1010+
return false
1011+
}
1012+
1013+
// Checking whitespace between the word cannot help this because people
1014+
// usually put a space before trailing closures. Even though that is source
1015+
// breaking, we prefer parsing it as a keyword if the syntax accepts
1016+
// expressions starting with a closure. E.g. 'unsafe { ... }()'
1017+
return self.withLookahead {
1018+
$0.consumeAnyToken()
1019+
return $0.atValidTrailingClosure(flavor: exprFlavor)
1020+
}
1021+
}
1022+
}
1023+
}
1024+
9031025
// MARK: Lookahead
9041026

9051027
extension Parser.Lookahead {
@@ -949,91 +1071,23 @@ extension Parser.Lookahead {
9491071
// FIXME: 'repeat' followed by '{' could be a pack expansion
9501072
// with a closure pattern.
9511073
return self.peek().rawTokenKind == .leftBrace
952-
case .yield?:
953-
switch self.peek().rawTokenKind {
954-
case .prefixAmpersand:
955-
// "yield &" always denotes a yield statement.
956-
return true
957-
case .leftParen:
958-
// "yield (", by contrast, must be disambiguated with additional
959-
// context. We always consider it an apply expression of a function
960-
// called `yield` for the purposes of the parse.
961-
return false
962-
case .binaryOperator:
963-
// 'yield &= x' treats yield as an identifier.
964-
return false
965-
default:
966-
// "yield" followed immediately by any other token is likely a
967-
// yield statement of some singular expression.
968-
return !self.peek().isAtStartOfLine
969-
}
970-
case .discard?:
971-
let next = peek()
972-
// The thing to be discarded must be on the same line as `discard`.
973-
if next.isAtStartOfLine {
974-
return false
975-
}
976-
switch next.rawTokenKind {
977-
case .identifier, .keyword:
978-
// Since some identifiers like "self" are classified as keywords,
979-
// we want to recognize those too, to handle "discard self". We also
980-
// accept any identifier since we want to emit a nice error message
981-
// later on during type checking.
982-
return true
983-
default:
984-
// any other token following "discard" means it's not the statement.
985-
// For example, could be the function call "discard()".
986-
return false
987-
}
988-
989-
case .then:
990-
return atStartOfThenStatement(preferExpr: preferExpr)
1074+
case .yield?, .discard?:
1075+
return atContextualKeywordPrefixedSyntax(
1076+
exprFlavor: .basic,
1077+
preferPostfixExpr: true
1078+
)
1079+
case .then?:
1080+
return atContextualKeywordPrefixedSyntax(
1081+
exprFlavor: .basic,
1082+
preferPostfixExpr: false,
1083+
allowNextLineOperand: !preferExpr
1084+
)
9911085

9921086
case nil:
9931087
return false
9941088
}
9951089
}
9961090

997-
/// Whether we're currently at a `then` token that should be parsed as a
998-
/// `then` statement.
999-
mutating func atStartOfThenStatement(preferExpr: Bool) -> Bool {
1000-
guard self.at(.keyword(.then)) else {
1001-
return false
1002-
}
1003-
1004-
// If we prefer an expr and aren't at the start of a newline, then don't
1005-
// parse a ThenStmt.
1006-
if preferExpr && !self.atStartOfLine {
1007-
return false
1008-
}
1009-
1010-
// If 'then' is followed by a binary or postfix operator, prefer to parse as
1011-
// an expr.
1012-
if peek(isAtAnyIn: BinaryOperatorLike.self) != nil || peek(isAtAnyIn: PostfixOperatorLike.self) != nil {
1013-
return false
1014-
}
1015-
1016-
switch PrepareForKeywordMatch(peek()) {
1017-
case TokenSpec(.is), TokenSpec(.as):
1018-
// Treat 'is' and 'as' like the binary operator case, and parse as an
1019-
// expr.
1020-
return false
1021-
1022-
case .leftBrace:
1023-
// This is a trailing closure.
1024-
return false
1025-
1026-
case .leftParen, .leftSquare, .period:
1027-
// These are handled based on whether there is trivia between the 'then'
1028-
// and the token. If so, it's a 'then' statement. Otherwise it should
1029-
// be treated as an expression, e.g `then(...)`, `then[...]`, `then.foo`.
1030-
return !self.currentToken.trailingTriviaText.isEmpty || !peek().leadingTriviaText.isEmpty
1031-
default:
1032-
break
1033-
}
1034-
return true
1035-
}
1036-
10371091
/// Returns whether the parser's current position is the start of a switch case,
10381092
/// given that we're in the middle of a switch already.
10391093
mutating func atStartOfSwitchCase(allowRecovery: Bool = false) -> Bool {

Tests/SwiftParserTest/StatementTests.swift

+6-5
Original file line numberDiff line numberDiff line change
@@ -609,21 +609,22 @@ final class StatementTests: ParserTestCase {
609609

610610
assertParse(
611611
"""
612-
discard 1️⃣case
612+
discard1️⃣ 2️⃣case
613613
""",
614614
diagnostics: [
615615
DiagnosticSpec(
616616
locationMarker: "1️⃣",
617-
message: "expected expression in 'discard' statement",
618-
fixIts: ["insert expression"]
617+
message: "consecutive statements on a line must be separated by newline or ';'",
618+
fixIts: ["insert newline", "insert ';'"]
619619
),
620620
DiagnosticSpec(
621-
locationMarker: "1️⃣",
621+
locationMarker: "2️⃣",
622622
message: "'case' can only appear inside a 'switch' statement or 'enum' declaration"
623623
),
624624
],
625625
fixedSource: """
626-
discard <#expression#>case
626+
discard
627+
case
627628
"""
628629
)
629630

Tests/SwiftParserTest/ThenStatementTests.swift

+4-16
Original file line numberDiff line numberDiff line change
@@ -295,30 +295,18 @@ final class ThenStatementTests: ParserTestCase {
295295
func testThenStmt22() {
296296
assertParse(
297297
"""
298-
then1️⃣
298+
then
299299
""",
300-
diagnostics: [
301-
DiagnosticSpec(
302-
message: "expected expression in 'then' statement",
303-
fixIts: ["insert expression"]
304-
)
305-
],
306-
fixedSource: "then <#expression#>"
300+
substructure: DeclReferenceExprSyntax(baseName: .identifier("then"))
307301
)
308302
}
309303

310304
func testThenStmt23() {
311305
assertParse(
312306
"""
313-
then1️⃣;
307+
then;
314308
""",
315-
diagnostics: [
316-
DiagnosticSpec(
317-
message: "expected expression in 'then' statement",
318-
fixIts: ["insert expression"]
319-
)
320-
],
321-
fixedSource: "then <#expression#>;"
309+
substructure: DeclReferenceExprSyntax(baseName: .identifier("then"))
322310
)
323311
}
324312

0 commit comments

Comments
 (0)