Skip to content

Commit acb6690

Browse files
committed
Auto merge of #66670 - crlf0710:normalize_ident, r=Dylan-DPC
Normalize ident Perform unicode normalization on identifiers. Resolving the first bullet point in #55467.
2 parents c0b16b4 + 27e7a1b commit acb6690

File tree

7 files changed

+35
-6
lines changed

7 files changed

+35
-6
lines changed

Cargo.lock

+7-2
Original file line numberDiff line numberDiff line change
@@ -3333,6 +3333,7 @@ dependencies = [
33333333
"serde",
33343334
"serde_json",
33353335
"smallvec 0.6.10",
3336+
"smallvec 1.0.0",
33363337
"syn 0.15.35",
33373338
"url 2.1.0",
33383339
"winapi 0.3.8",
@@ -3696,6 +3697,7 @@ dependencies = [
36963697
"smallvec 1.0.0",
36973698
"syntax",
36983699
"syntax_pos",
3700+
"unicode-normalization",
36993701
]
37003702

37013703
[[package]]
@@ -4913,9 +4915,12 @@ dependencies = [
49134915

49144916
[[package]]
49154917
name = "unicode-normalization"
4916-
version = "0.1.7"
4918+
version = "0.1.11"
49174919
source = "registry+https://github.com/rust-lang/crates.io-index"
4918-
checksum = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
4920+
checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"
4921+
dependencies = [
4922+
"smallvec 1.0.0",
4923+
]
49194924

49204925
[[package]]
49214926
name = "unicode-segmentation"

src/librustc_parse/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ rustc_error_codes = { path = "../librustc_error_codes" }
2020
smallvec = { version = "1.0", features = ["union", "may_dangle"] }
2121
syntax_pos = { path = "../libsyntax_pos" }
2222
syntax = { path = "../libsyntax" }
23+
unicode-normalization = "0.1.11"

src/librustc_parse/lexer/mod.rs

+15-2
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,7 @@ impl<'a> StringReader<'a> {
220220
if is_raw_ident {
221221
ident_start = ident_start + BytePos(2);
222222
}
223-
// FIXME: perform NFKC normalization here. (Issue #2253)
224-
let sym = self.symbol_from(ident_start);
223+
let sym = self.nfc_symbol_from(ident_start);
225224
if is_raw_ident {
226225
let span = self.mk_sp(start, self.pos);
227226
if !sym.can_be_raw() {
@@ -470,6 +469,20 @@ impl<'a> StringReader<'a> {
470469
Symbol::intern(self.str_from_to(start, end))
471470
}
472471

472+
/// As symbol_from, with the text normalized into Unicode NFC form.
473+
fn nfc_symbol_from(&self, start: BytePos) -> Symbol {
474+
use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
475+
debug!("taking an normalized ident from {:?} to {:?}", start, self.pos);
476+
let sym = self.str_from(start);
477+
match is_nfc_quick(sym.chars()) {
478+
IsNormalized::Yes => Symbol::intern(sym),
479+
_ => {
480+
let sym_str: String = sym.chars().nfc().collect();
481+
Symbol::intern(&sym_str)
482+
}
483+
}
484+
}
485+
473486
/// Slice of the source text spanning from `start` up to but excluding `end`.
474487
fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
475488
&self.src[self.src_index(start)..self.src_index(end)]

src/test/ui/codemap_tests/unicode_2.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ LL | let _ = ("아あ", 1i42);
1414
|
1515
= help: valid widths are 8, 16, 32, 64 and 128
1616

17-
error[E0425]: cannot find value `a̐é` in this scope
17+
error[E0425]: cannot find value `a̐é` in this scope
1818
--> $DIR/unicode_2.rs:6:13
1919
|
2020
LL | let _ = a̐é;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// check-pass
2+
#![feature(non_ascii_idents)]
3+
4+
struct Résumé; // ['LATIN SMALL LETTER E WITH ACUTE']
5+
6+
fn main() {
7+
let _ = Résumé; // ['LATIN SMALL LETTER E', 'COMBINING ACUTE ACCENT']
8+
}

src/tools/rustc-workspace-hack/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ curl-sys = { version = "0.4.13", features = ["http2", "libnghttp2-sys"], optiona
6262
crossbeam-utils = { version = "0.6.5", features = ["nightly"] }
6363
serde = { version = "1.0.82", features = ['derive'] }
6464
serde_json = { version = "1.0.31", features = ["raw_value"] }
65-
smallvec = { version = "0.6", features = ['union', 'may_dangle'] }
65+
smallvec-0_6 = { package = "smallvec", version = "0.6", features = ['union', 'may_dangle'] }
66+
smallvec = { version = "1.0", features = ['union', 'may_dangle'] }
6667
url = { version = "2.0", features = ['serde'] }
6768
syn = { version = "0.15", features = ['full'] }
6869

src/tools/tidy/src/deps.rs

+1
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ const WHITELIST: &[Crate<'_>] = &[
170170
Crate("term_size"),
171171
Crate("thread_local"),
172172
Crate("ucd-util"),
173+
Crate("unicode-normalization"),
173174
Crate("unicode-width"),
174175
Crate("unicode-xid"),
175176
Crate("unreachable"),

0 commit comments

Comments
 (0)