From 97376bb68b8ef38c5891b206aedd7024189ab172 Mon Sep 17 00:00:00 2001 From: Yongqian Li Date: Mon, 22 Jun 2015 21:30:06 -0700 Subject: [PATCH 1/4] added is_superscript() to `char` --- src/librustc_unicode/char.rs | 7 +++++++ src/librustc_unicode/tables.rs | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index 0ad5141c5bee6..f80f35341aefa 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -385,6 +385,13 @@ impl char { #[inline] pub fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) } + /// Returns whether the specified `char` is a superscript character in the + /// ['Superscripts and Subscripts' unicode block](https://en.wikipedia.org/wiki/Superscripts_and_Subscripts) + #[unstable(feature = "unicode", + reason = "mainly needed for compiler internals")] + #[inline] + pub fn is_superscript(self) -> bool { derived_property::Superscript(self) } + /// Indicates whether a character is in lowercase. /// /// This is defined according to the terms of the Unicode Derived Core diff --git a/src/librustc_unicode/tables.rs b/src/librustc_unicode/tables.rs index 4ebb6a70d291c..f6d2015e06b91 100644 --- a/src/librustc_unicode/tables.rs +++ b/src/librustc_unicode/tables.rs @@ -1147,6 +1147,13 @@ pub mod derived_property { super::bsearch_range_table(c, XID_Start_table) } + pub const Superscript_table: &'static [(char, char)] = &[ + ('\u{2070}', '\u{2071}'), ('\u{2074}', '\u{207f}') + ]; + + pub fn Superscript(c: char) -> bool { + super::bsearch_range_table(c, Superscript_table) + } } pub mod property { From c3c21f1366a24e726da6c8b44cdaec124fd228ff Mon Sep 17 00:00:00 2001 From: Yongqian Li Date: Tue, 23 Jun 2015 04:22:43 -0700 Subject: [PATCH 2/4] allow superscripts to continue identifiers --- src/doc/grammar.md | 4 ++-- src/libfmt_macros/lib.rs | 2 +- src/libsyntax/parse/lexer/mod.rs | 2 +- src/test/run-pass/unicode-superscripts.rs | 25 +++++++++++++++++++++++ 4 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 src/test/run-pass/unicode-superscripts.rs diff --git a/src/doc/grammar.md b/src/doc/grammar.md index 3aa89cba0314a..a6e705fc6526a 100644 --- a/src/doc/grammar.md +++ b/src/doc/grammar.md @@ -108,7 +108,7 @@ the following form: gated. This is expected to improve soon. - The first character has property `XID_start` -- The remaining characters have property `XID_continue` +- The remaining characters have property `XID_continue` or is a superscript that does _not_ occur in the set of [keywords](#keywords). @@ -281,7 +281,7 @@ type_path_tail : '<' type_expr [ ',' type_expr ] + '>' ## Macros ```antlr -expr_macro_rules : "macro_rules" '!' ident '(' macro_rule * ')' ';' +expr_macro_rules : "macro_rules" '!' ident '(' macro_rule * ')' ';' | "macro_rules" '!' ident '{' macro_rule * '}' ; macro_rule : '(' matcher * ')' "=>" '(' transcriber * ')' ';' ; matcher : '(' matcher * ')' | '[' matcher * ']' diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index c2b28bd134d47..ef21c9356e52a 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -402,7 +402,7 @@ impl<'a> Parser<'a> { let mut end; loop { match self.cur.clone().next() { - Some((_, c)) if c.is_xid_continue() => { + Some((_, c)) if c.is_xid_continue() || c.is_superscript() => { self.cur.next(); } Some((pos, _)) => { end = pos; break } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b6b5ac5c01eee..7fba078657959 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1396,7 +1396,7 @@ fn ident_continue(c: Option) -> bool { || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' - || (c > '\x7f' && c.is_xid_continue()) + || (c > '\x7f' && (c.is_xid_continue() || c.is_superscript())) } #[cfg(test)] diff --git a/src/test/run-pass/unicode-superscripts.rs b/src/test/run-pass/unicode-superscripts.rs new file mode 100644 index 0000000000000..56d5f82e0111b --- /dev/null +++ b/src/test/run-pass/unicode-superscripts.rs @@ -0,0 +1,25 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(unused_variable)] +#![feature(non_ascii_idents)] + +pub fn main() { + let σ⁰ = 2; + let σ¹ = 2; + let σ² = 1; + let σ³ = 2; + let σ⁴ = 2; + let σ⁵ = 2; + let σ⁶ = 2; + let σ⁷ = 2; + let σ⁸ = 2; + let σ⁹ = 2; +} From b5f05643f43556135eebb6c5a538434022c84000 Mon Sep 17 00:00:00 2001 From: Yongqian Li Date: Tue, 23 Jun 2015 05:03:23 -0700 Subject: [PATCH 3/4] small fix to superscripts --- src/librustc_unicode/tables.rs | 3 ++- src/test/run-pass/unicode-superscripts.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/librustc_unicode/tables.rs b/src/librustc_unicode/tables.rs index f6d2015e06b91..70395c4467d84 100644 --- a/src/librustc_unicode/tables.rs +++ b/src/librustc_unicode/tables.rs @@ -1148,7 +1148,8 @@ pub mod derived_property { } pub const Superscript_table: &'static [(char, char)] = &[ - ('\u{2070}', '\u{2071}'), ('\u{2074}', '\u{207f}') + ('\u{b2}', '\u{b3}'), ('\u{b9}', '\u{b9}'), ('\u{2070}', '\u{2071}'), + ('\u{2074}', '\u{207f}') ]; pub fn Superscript(c: char) -> bool { diff --git a/src/test/run-pass/unicode-superscripts.rs b/src/test/run-pass/unicode-superscripts.rs index 56d5f82e0111b..f98746088d56a 100644 --- a/src/test/run-pass/unicode-superscripts.rs +++ b/src/test/run-pass/unicode-superscripts.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![allow(unused_variable)] +#![allow(unused_variables)] #![feature(non_ascii_idents)] pub fn main() { From 72f04e89d54b14dfe874016d92013df95d20b46c Mon Sep 17 00:00:00 2001 From: Yongqian Li Date: Tue, 23 Jun 2015 06:26:03 -0700 Subject: [PATCH 4/4] fix line that was too long --- src/librustc_unicode/char.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index f80f35341aefa..c6dbcdb1736b0 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -386,7 +386,8 @@ impl char { pub fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) } /// Returns whether the specified `char` is a superscript character in the - /// ['Superscripts and Subscripts' unicode block](https://en.wikipedia.org/wiki/Superscripts_and_Subscripts) + /// ['Superscripts and Subscripts' unicode block] + /// (https://en.wikipedia.org/wiki/Superscripts_and_Subscripts) #[unstable(feature = "unicode", reason = "mainly needed for compiler internals")] #[inline]