Skip to content

Commit 073a271

Browse files
committed
[Parser] Remove redundant UTF-16 surrogates check logic
1 parent 2186cc6 commit 073a271

File tree

2 files changed

+35
-5
lines changed

2 files changed

+35
-5
lines changed

Sources/SwiftParser/Lexer/UnicodeScalarExtensions.swift

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,6 @@ extension Unicode.Scalar {
212212
_ = advance()
213213
}
214214

215-
// UTF-16 surrogate pair values are not valid code points.
216-
if (charValue >= 0xD800 && charValue <= 0xDFFF) {
217-
return nil
218-
}
219-
220215
// If we got here, we read the appropriate number of accumulated bytes.
221216
// Verify that the encoding was actually minimal.
222217
// Number of bits in the value, ignoring leading zeros.

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,41 @@ public class LexerTests: ParserTestCase {
10831083
}
10841084
}
10851085

1086+
func testUTF16Surrogates1() {
1087+
// U+D800 <= (UTF16 surrogates code point) <= U+DFFF
1088+
let sourceBytes: [UInt8] = [0xED, 0xA0, 0x80] // The bytes represent the code point U+D800
1089+
1090+
lex(sourceBytes) { lexemes in
1091+
guard lexemes.count == 1 else {
1092+
return XCTFail("Expected 1 lexemes, got \(lexemes.count)")
1093+
}
1094+
assertRawBytesLexeme(
1095+
lexemes[0],
1096+
kind: .endOfFile,
1097+
leadingTrivia: [0xED, 0xA0, 0x80],
1098+
text: [],
1099+
error: TokenDiagnostic(.invalidUtf8, byteOffset: 0)
1100+
)
1101+
}
1102+
}
1103+
1104+
func testUTF16Surrogates2() {
1105+
let sourceBytes: [UInt8] = [0xED, 0xBF, 0xBF] // The bytes represent the code point U+DFFF
1106+
1107+
lex(sourceBytes) { lexemes in
1108+
guard lexemes.count == 1 else {
1109+
return XCTFail("Expected 1 lexemes, got \(lexemes.count)")
1110+
}
1111+
assertRawBytesLexeme(
1112+
lexemes[0],
1113+
kind: .endOfFile,
1114+
leadingTrivia: [0xED, 0xBF, 0xBF],
1115+
text: [],
1116+
error: TokenDiagnostic(.invalidUtf8, byteOffset: 0)
1117+
)
1118+
}
1119+
}
1120+
10861121
func testInvalidUTF8RegexLiteral() {
10871122
let slashByte = UInt8(UnicodeScalar("/").value)
10881123
let sourceBytes: [UInt8] = [slashByte, 0xfd, slashByte]

0 commit comments

Comments
 (0)