Skip to content

Commit 206fe8e

Browse files
committed
flatten rustc_lexer::character_properties module
On the call site, `rustc_lexer::is_whitespace` reads much better than `character_properties::is_whitespace`.
1 parent a0c186c commit 206fe8e

File tree

7 files changed

+82
-86
lines changed

7 files changed

+82
-86
lines changed

src/libfmt_macros/lib.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ use std::string;
2323
use std::iter;
2424

2525
use syntax_pos::{InnerSpan, Symbol};
26-
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
2726

2827
#[derive(Copy, Clone)]
2928
struct InnerOffset(usize);
@@ -602,7 +601,7 @@ impl<'a> Parser<'a> {
602601
/// Rust identifier, except that it can't start with `_` character.
603602
fn word(&mut self) -> &'a str {
604603
let start = match self.cur.peek() {
605-
Some(&(pos, c)) if c != '_' && is_id_start(c) => {
604+
Some(&(pos, c)) if c != '_' && rustc_lexer::is_id_start(c) => {
606605
self.cur.next();
607606
pos
608607
}
@@ -611,7 +610,7 @@ impl<'a> Parser<'a> {
611610
}
612611
};
613612
while let Some(&(pos, c)) = self.cur.peek() {
614-
if is_id_continue(c) {
613+
if rustc_lexer::is_id_continue(c) {
615614
self.cur.next();
616615
} else {
617616
return &self.input[start..pos];

src/librustc_lexer/src/lib.rs

+70-65
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,62 @@ pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
102102
})
103103
}
104104

105+
// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these
106+
// classes.
107+
108+
/// True if `c` is considered a whitespace according to Rust language definition.
109+
pub fn is_whitespace(c: char) -> bool {
110+
// This is Pattern_White_Space.
111+
//
112+
// Note that this set is stable (ie, it doesn't change with different
113+
// Unicode versions), so it's ok to just hard-code the values.
114+
115+
match c {
116+
// Usual ASCII suspects
117+
| '\u{0009}' // \t
118+
| '\u{000A}' // \n
119+
| '\u{000B}' // vertical tab
120+
| '\u{000C}' // form feed
121+
| '\u{000D}' // \r
122+
| '\u{0020}' // space
123+
124+
// NEXT LINE from latin1
125+
| '\u{0085}'
126+
127+
// Bidi markers
128+
| '\u{200E}' // LEFT-TO-RIGHT MARK
129+
| '\u{200F}' // RIGHT-TO-LEFT MARK
130+
131+
// Dedicated whitespace characters from Unicode
132+
| '\u{2028}' // LINE SEPARATOR
133+
| '\u{2029}' // PARAGRAPH SEPARATOR
134+
=> true,
135+
_ => false,
136+
}
137+
}
138+
139+
/// True if `c` is valid as a first character of an identifier.
140+
pub fn is_id_start(c: char) -> bool {
141+
// This is XID_Start OR '_' (which formally is not a XID_Start).
142+
// We also add fast-path for ascii idents
143+
('a' <= c && c <= 'z')
144+
|| ('A' <= c && c <= 'Z')
145+
|| c == '_'
146+
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
147+
}
148+
149+
/// True if `c` is valid as a non-first character of an identifier.
150+
pub fn is_id_continue(c: char) -> bool {
151+
// This is exactly XID_Continue.
152+
// We also add fast-path for ascii idents
153+
('a' <= c && c <= 'z')
154+
|| ('A' <= c && c <= 'Z')
155+
|| ('0' <= c && c <= '9')
156+
|| c == '_'
157+
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
158+
}
159+
160+
105161
impl Cursor<'_> {
106162
fn advance_token(&mut self) -> Token {
107163
let first_char = self.bump().unwrap();
@@ -111,9 +167,9 @@ impl Cursor<'_> {
111167
'*' => self.block_comment(),
112168
_ => Slash,
113169
},
114-
c if character_properties::is_whitespace(c) => self.whitespace(),
170+
c if is_whitespace(c) => self.whitespace(),
115171
'r' => match (self.nth_char(0), self.nth_char(1)) {
116-
('#', c1) if character_properties::is_id_start(c1) => self.raw_ident(),
172+
('#', c1) if is_id_start(c1) => self.raw_ident(),
117173
('#', _) | ('"', _) => {
118174
let (n_hashes, started, terminated) = self.raw_double_quoted_string();
119175
let suffix_start = self.len_consumed();
@@ -158,7 +214,7 @@ impl Cursor<'_> {
158214
}
159215
_ => self.ident(),
160216
},
161-
c if character_properties::is_id_start(c) => self.ident(),
217+
c if is_id_start(c) => self.ident(),
162218
c @ '0'..='9' => {
163219
let literal_kind = self.number(c);
164220
let suffix_start = self.len_consumed();
@@ -246,8 +302,8 @@ impl Cursor<'_> {
246302
}
247303

248304
fn whitespace(&mut self) -> TokenKind {
249-
debug_assert!(character_properties::is_whitespace(self.prev()));
250-
while character_properties::is_whitespace(self.nth_char(0)) {
305+
debug_assert!(is_whitespace(self.prev()));
306+
while is_whitespace(self.nth_char(0)) {
251307
self.bump();
252308
}
253309
Whitespace
@@ -257,19 +313,19 @@ impl Cursor<'_> {
257313
debug_assert!(
258314
self.prev() == 'r'
259315
&& self.nth_char(0) == '#'
260-
&& character_properties::is_id_start(self.nth_char(1))
316+
&& is_id_start(self.nth_char(1))
261317
);
262318
self.bump();
263319
self.bump();
264-
while character_properties::is_id_continue(self.nth_char(0)) {
320+
while is_id_continue(self.nth_char(0)) {
265321
self.bump();
266322
}
267323
RawIdent
268324
}
269325

270326
fn ident(&mut self) -> TokenKind {
271-
debug_assert!(character_properties::is_id_start(self.prev()));
272-
while character_properties::is_id_continue(self.nth_char(0)) {
327+
debug_assert!(is_id_start(self.prev()));
328+
while is_id_continue(self.nth_char(0)) {
273329
self.bump();
274330
}
275331
Ident
@@ -314,7 +370,7 @@ impl Cursor<'_> {
314370
// integer literal followed by field/method access or a range pattern
315371
// (`0..2` and `12.foo()`)
316372
'.' if self.nth_char(1) != '.'
317-
&& !character_properties::is_id_start(self.nth_char(1)) =>
373+
&& !is_id_start(self.nth_char(1)) =>
318374
{
319375
// might have stuff after the ., and if it does, it needs to start
320376
// with a number
@@ -344,15 +400,15 @@ impl Cursor<'_> {
344400
fn lifetime_or_char(&mut self) -> TokenKind {
345401
debug_assert!(self.prev() == '\'');
346402
let mut starts_with_number = false;
347-
if (character_properties::is_id_start(self.nth_char(0))
403+
if (is_id_start(self.nth_char(0))
348404
|| self.nth_char(0).is_digit(10) && {
349405
starts_with_number = true;
350406
true
351407
})
352408
&& self.nth_char(1) != '\''
353409
{
354410
self.bump();
355-
while character_properties::is_id_continue(self.nth_char(0)) {
411+
while is_id_continue(self.nth_char(0)) {
356412
self.bump();
357413
}
358414

@@ -494,64 +550,13 @@ impl Cursor<'_> {
494550
}
495551

496552
fn eat_literal_suffix(&mut self) {
497-
if !character_properties::is_id_start(self.nth_char(0)) {
553+
if !is_id_start(self.nth_char(0)) {
498554
return;
499555
}
500556
self.bump();
501557

502-
while character_properties::is_id_continue(self.nth_char(0)) {
558+
while is_id_continue(self.nth_char(0)) {
503559
self.bump();
504560
}
505561
}
506562
}
507-
508-
pub mod character_properties {
509-
// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these
510-
// classes.
511-
512-
// This is Pattern_White_Space.
513-
//
514-
// Note that this set is stable (ie, it doesn't change with different
515-
// Unicode versions), so it's ok to just hard-code the values.
516-
pub fn is_whitespace(c: char) -> bool {
517-
match c {
518-
// Usual ASCII suspects
519-
| '\u{0009}' // \t
520-
| '\u{000A}' // \n
521-
| '\u{000B}' // vertical tab
522-
| '\u{000C}' // form feed
523-
| '\u{000D}' // \r
524-
| '\u{0020}' // space
525-
526-
// NEXT LINE from latin1
527-
| '\u{0085}'
528-
529-
// Bidi markers
530-
| '\u{200E}' // LEFT-TO-RIGHT MARK
531-
| '\u{200F}' // RIGHT-TO-LEFT MARK
532-
533-
// Dedicated whitespace characters from Unicode
534-
| '\u{2028}' // LINE SEPARATOR
535-
| '\u{2029}' // PARAGRAPH SEPARATOR
536-
=> true,
537-
_ => false,
538-
}
539-
}
540-
541-
// This is XID_Start OR '_' (which formally is not a XID_Start).
542-
pub fn is_id_start(c: char) -> bool {
543-
('a' <= c && c <= 'z')
544-
|| ('A' <= c && c <= 'Z')
545-
|| c == '_'
546-
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c))
547-
}
548-
549-
// This is XID_Continue.
550-
pub fn is_id_continue(c: char) -> bool {
551-
('a' <= c && c <= 'z')
552-
|| ('A' <= c && c <= 'Z')
553-
|| ('0' <= c && c <= '9')
554-
|| c == '_'
555-
|| (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c))
556-
}
557-
}

src/librustc_mir/borrow_check/move_errors.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use rustc::mir::*;
22
use rustc::ty;
33
use rustc_errors::{DiagnosticBuilder,Applicability};
4-
use rustc_lexer::character_properties::is_whitespace;
54
use syntax_pos::Span;
65

76
use crate::borrow_check::MirBorrowckCtxt;
@@ -525,7 +524,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, 'tcx> {
525524
let suggestion;
526525
let to_remove;
527526
if pat_snippet.starts_with("mut")
528-
&& pat_snippet["mut".len()..].starts_with(is_whitespace)
527+
&& pat_snippet["mut".len()..].starts_with(rustc_lexer::is_whitespace)
529528
{
530529
suggestion = pat_snippet["mut".len()..].trim_start();
531530
to_remove = "&mut";

src/librustc_mir/borrow_check/mutability_errors.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use rustc_lexer::character_properties::is_whitespace;
21
use rustc::hir;
32
use rustc::hir::Node;
43
use rustc::mir::{self, BindingForm, ClearCrossCrate, Local, Location, Body};
@@ -715,7 +714,7 @@ fn annotate_struct_field(
715714
fn suggest_ref_mut(tcx: TyCtxt<'_>, binding_span: Span) -> Option<String> {
716715
let hi_src = tcx.sess.source_map().span_to_snippet(binding_span).ok()?;
717716
if hi_src.starts_with("ref")
718-
&& hi_src["ref".len()..].starts_with(is_whitespace)
717+
&& hi_src["ref".len()..].starts_with(rustc_lexer::is_whitespace)
719718
{
720719
let replacement = format!("ref mut{}", &hi_src["ref".len()..]);
721720
Some(replacement)

src/librustdoc/test.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ use rustc::hir;
44
use rustc::hir::intravisit;
55
use rustc::session::{self, config, DiagnosticOutput};
66
use rustc::util::common::ErrorReported;
7-
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
87
use syntax::ast;
98
use syntax::with_globals;
109
use syntax::source_map::SourceMap;
@@ -764,8 +763,8 @@ impl Tester for Collector {
764763
// We use these headings as test names, so it's good if
765764
// they're valid identifiers.
766765
let name = name.chars().enumerate().map(|(i, c)| {
767-
if (i == 0 && is_id_start(c)) ||
768-
(i != 0 && is_id_continue(c)) {
766+
if (i == 0 && rustc_lexer::is_id_start(c)) ||
767+
(i != 0 && rustc_lexer::is_id_continue(c)) {
769768
c
770769
} else {
771770
'_'

src/libsyntax/ext/proc_macro_server.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use crate::tokenstream::{self, DelimSpan, IsJoint::*, TokenStream, TreeAndJoint}
66

77
use errors::{Diagnostic, DiagnosticBuilder};
88
use rustc_data_structures::sync::Lrc;
9-
use rustc_lexer::character_properties::{is_id_start, is_id_continue};
109
use syntax_pos::{BytePos, FileName, MultiSpan, Pos, SourceFile, Span};
1110
use syntax_pos::symbol::{kw, sym, Symbol};
1211

@@ -323,7 +322,7 @@ impl Ident {
323322
fn is_valid(string: &str) -> bool {
324323
let mut chars = string.chars();
325324
if let Some(start) = chars.next() {
326-
is_id_start(start) && chars.all(is_id_continue)
325+
rustc_lexer::is_id_start(start) && chars.all(rustc_lexer::is_id_continue)
327326
} else {
328327
false
329328
}

src/libsyntax/tests.rs

+5-9
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
6363
(None, None) => return true,
6464
(None, _) => return false,
6565
(Some(&a), None) => {
66-
if is_pattern_whitespace(a) {
66+
if rustc_lexer::is_whitespace(a) {
6767
break // trailing whitespace check is out of loop for borrowck
6868
} else {
6969
return false
@@ -72,11 +72,11 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
7272
(Some(&a), Some(&b)) => (a, b)
7373
};
7474

75-
if is_pattern_whitespace(a) && is_pattern_whitespace(b) {
75+
if rustc_lexer::is_whitespace(a) && rustc_lexer::is_whitespace(b) {
7676
// skip whitespace for a and b
7777
scan_for_non_ws_or_end(&mut a_iter);
7878
scan_for_non_ws_or_end(&mut b_iter);
79-
} else if is_pattern_whitespace(a) {
79+
} else if rustc_lexer::is_whitespace(a) {
8080
// skip whitespace for a
8181
scan_for_non_ws_or_end(&mut a_iter);
8282
} else if a == b {
@@ -88,20 +88,16 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
8888
}
8989

9090
// check if a has *only* trailing whitespace
91-
a_iter.all(is_pattern_whitespace)
91+
a_iter.all(rustc_lexer::is_whitespace)
9292
}
9393

9494
/// Advances the given peekable `Iterator` until it reaches a non-whitespace character
9595
fn scan_for_non_ws_or_end<I: Iterator<Item = char>>(iter: &mut Peekable<I>) {
96-
while iter.peek().copied().map(|c| is_pattern_whitespace(c)) == Some(true) {
96+
while iter.peek().copied().map(|c| rustc_lexer::is_whitespace(c)) == Some(true) {
9797
iter.next();
9898
}
9999
}
100100

101-
fn is_pattern_whitespace(c: char) -> bool {
102-
rustc_lexer::character_properties::is_whitespace(c)
103-
}
104-
105101
/// Identify a position in the text by the Nth occurrence of a string.
106102
struct Position {
107103
string: &'static str,

0 commit comments

Comments
 (0)