diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 411d6be9c44f4..176c77ca6edc6 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -11,13 +11,14 @@ use rustc_parse::lexer::nfc_normalize; use rustc_parse::parse_stream_from_source_str; use rustc_session::parse::ParseSess; use rustc_span::def_id::CrateNum; -use rustc_span::symbol::{self, kw, sym, Symbol}; +use rustc_span::symbol::{self, sym, Symbol}; use rustc_span::{BytePos, FileName, Pos, SourceFile, Span}; -use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Punct, TokenTree}; +use pm::bridge::{ + server, DelimSpan, ExpnGlobals, Group, Ident, LitKind, Literal, Punct, TokenTree, +}; use pm::{Delimiter, Level, LineColumn}; use std::ops::Bound; -use std::{ascii, panic}; trait FromInternal { fn from_internal(x: T) -> Self; @@ -49,9 +50,40 @@ impl ToInternal for Delimiter { } } -impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> - for Vec> -{ +impl FromInternal for LitKind { + fn from_internal(kind: token::LitKind) -> Self { + match kind { + token::Byte => LitKind::Byte, + token::Char => LitKind::Char, + token::Integer => LitKind::Integer, + token::Float => LitKind::Float, + token::Str => LitKind::Str, + token::StrRaw(n) => LitKind::StrRaw(n), + token::ByteStr => LitKind::ByteStr, + token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), + token::Err => LitKind::Err, + token::Bool => unreachable!(), + } + } +} + +impl ToInternal for LitKind { + fn to_internal(self) -> token::LitKind { + match self { + LitKind::Byte => token::Byte, + LitKind::Char => token::Char, + LitKind::Integer => token::Integer, + LitKind::Float => token::Float, + LitKind::Str => token::Str, + LitKind::StrRaw(n) => token::StrRaw(n), + LitKind::ByteStr => token::ByteStr, + LitKind::ByteStrRaw(n) => token::ByteStrRaw(n), + LitKind::Err => token::Err, + } + } +} + +impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec> { fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self { use rustc_ast::token::*; @@ -135,16 +167,22 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> Question => op("?"), SingleQuote => op("'"), - Ident(name, false) if name == kw::DollarCrate => trees.push(TokenTree::Ident(Ident::dollar_crate(span))), - Ident(name, is_raw) => trees.push(TokenTree::Ident(Ident::new(rustc.sess(), name, is_raw, span))), + Ident(sym, is_raw) => trees.push(TokenTree::Ident(Ident { sym, is_raw, span })), Lifetime(name) => { let ident = symbol::Ident::new(name, span).without_first_quote(); trees.extend([ TokenTree::Punct(Punct { ch: b'\'', joint: true, span }), - TokenTree::Ident(Ident::new(rustc.sess(), ident.name, false, span)), + TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }), ]); } - Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })), + Literal(token::Lit { kind, symbol, suffix }) => { + trees.push(TokenTree::Literal(self::Literal { + kind: FromInternal::from_internal(kind), + symbol, + suffix, + span, + })); + } DocComment(_, attr_style, data) => { let mut escaped = String::new(); for ch in data.as_str().chars() { @@ -170,7 +208,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> } Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => { - trees.push(TokenTree::Ident(Ident::new(rustc.sess(), ident.name, is_raw, ident.span))) + trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span })) } Interpolated(nt) => { @@ -200,11 +238,12 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> } } -impl ToInternal for TokenTree { +impl ToInternal for (TokenTree, &mut Rustc<'_, '_>) { fn to_internal(self) -> TokenStream { use rustc_ast::token::*; - let (ch, joint, span) = match self { + let (tree, rustc) = self; + let (ch, joint, span) = match tree { TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span), TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => { return tokenstream::TokenTree::Delimited( @@ -215,10 +254,13 @@ impl ToInternal for TokenTree { .into(); } TokenTree::Ident(self::Ident { sym, is_raw, span }) => { + rustc.sess().symbol_gallery.insert(sym, span); return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into(); } TokenTree::Literal(self::Literal { - lit: token::Lit { kind: token::Integer, symbol, suffix }, + kind: self::LitKind::Integer, + symbol, + suffix, span, }) if symbol.as_str().starts_with('-') => { let minus = BinOp(BinOpToken::Minus); @@ -229,7 +271,9 @@ impl ToInternal for TokenTree { return [a, b].into_iter().collect(); } TokenTree::Literal(self::Literal { - lit: token::Lit { kind: token::Float, symbol, suffix }, + kind: self::LitKind::Float, + symbol, + suffix, span, }) if symbol.as_str().starts_with('-') => { let minus = BinOp(BinOpToken::Minus); @@ -239,8 +283,12 @@ impl ToInternal for TokenTree { let b = tokenstream::TokenTree::token(float, span); return [a, b].into_iter().collect(); } - TokenTree::Literal(self::Literal { lit, span }) => { - return tokenstream::TokenTree::token(Literal(lit), span).into(); + TokenTree::Literal(self::Literal { kind, symbol, suffix, span }) => { + return tokenstream::TokenTree::token( + TokenKind::lit(kind.to_internal(), symbol, suffix), + span, + ) + .into(); } }; @@ -289,40 +337,6 @@ impl ToInternal for Level { pub struct FreeFunctions; -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct Ident { - sym: Symbol, - is_raw: bool, - span: Span, -} - -impl Ident { - fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident { - let sym = nfc_normalize(sym.as_str()); - let string = sym.as_str(); - if !rustc_lexer::is_ident(string) { - panic!("`{:?}` is not a valid identifier", string) - } - if is_raw && !sym.can_be_raw() { - panic!("`{}` cannot be a raw identifier", string); - } - sess.symbol_gallery.insert(sym, span); - Ident { sym, is_raw, span } - } - - fn dollar_crate(span: Span) -> Ident { - // `$crate` is accepted as an ident only if it comes from the compiler. - Ident { sym: kw::DollarCrate, is_raw: false, span } - } -} - -// FIXME(eddyb) `Literal` should not expose internal `Debug` impls. -#[derive(Clone, Debug)] -pub struct Literal { - lit: token::Lit, - span: Span, -} - pub(crate) struct Rustc<'a, 'b> { ecx: &'a mut ExtCtxt<'b>, def_site: Span, @@ -348,21 +362,16 @@ impl<'a, 'b> Rustc<'a, 'b> { fn sess(&self) -> &ParseSess { self.ecx.parse_sess() } - - fn lit(&mut self, kind: token::LitKind, symbol: Symbol, suffix: Option) -> Literal { - Literal { lit: token::Lit::new(kind, symbol, suffix), span: self.call_site } - } } impl server::Types for Rustc<'_, '_> { type FreeFunctions = FreeFunctions; type TokenStream = TokenStream; - type Ident = Ident; - type Literal = Literal; type SourceFile = Lrc; type MultiSpan = Vec; type Diagnostic = Diagnostic; type Span = Span; + type Symbol = Symbol; } impl server::FreeFunctions for Rustc<'_, '_> { @@ -376,6 +385,57 @@ impl server::FreeFunctions for Rustc<'_, '_> { fn track_path(&mut self, path: &str) { self.sess().file_depinfo.borrow_mut().insert(Symbol::intern(path)); } + + fn literal_from_str(&mut self, s: &str) -> Result, ()> { + let name = FileName::proc_macro_source_code(s); + let mut parser = rustc_parse::new_parser_from_source_str(self.sess(), name, s.to_owned()); + + let first_span = parser.token.span.data(); + let minus_present = parser.eat(&token::BinOp(token::Minus)); + + let lit_span = parser.token.span.data(); + let token::Literal(mut lit) = parser.token.kind else { + return Err(()); + }; + + // Check no comment or whitespace surrounding the (possibly negative) + // literal, or more tokens after it. + if (lit_span.hi.0 - first_span.lo.0) as usize != s.len() { + return Err(()); + } + + if minus_present { + // If minus is present, check no comment or whitespace in between it + // and the literal token. + if first_span.hi.0 != lit_span.lo.0 { + return Err(()); + } + + // Check literal is a kind we allow to be negated in a proc macro token. + match lit.kind { + token::LitKind::Bool + | token::LitKind::Byte + | token::LitKind::Char + | token::LitKind::Str + | token::LitKind::StrRaw(_) + | token::LitKind::ByteStr + | token::LitKind::ByteStrRaw(_) + | token::LitKind::Err => return Err(()), + token::LitKind::Integer | token::LitKind::Float => {} + } + + // Synthesize a new symbol that includes the minus sign. + let symbol = Symbol::intern(&s[..1 + lit.symbol.as_str().len()]); + lit = token::Lit::new(lit.kind, symbol, lit.suffix); + } + let token::Lit { kind, symbol, suffix } = lit; + Ok(Literal { + kind: FromInternal::from_internal(kind), + symbol, + suffix, + span: self.call_site, + }) + } } impl server::TokenStream for Rustc<'_, '_> { @@ -453,22 +513,22 @@ impl server::TokenStream for Rustc<'_, '_> { fn from_token_tree( &mut self, - tree: TokenTree, + tree: TokenTree, ) -> Self::TokenStream { - tree.to_internal() + (tree, &mut *self).to_internal() } fn concat_trees( &mut self, base: Option, - trees: Vec>, + trees: Vec>, ) -> Self::TokenStream { let mut builder = tokenstream::TokenStreamBuilder::new(); if let Some(base) = base { builder.push(base); } for tree in trees { - builder.push(tree.to_internal()); + builder.push((tree, &mut *self).to_internal()); } builder.build() } @@ -491,178 +551,11 @@ impl server::TokenStream for Rustc<'_, '_> { fn into_trees( &mut self, stream: Self::TokenStream, - ) -> Vec> { + ) -> Vec> { FromInternal::from_internal((stream, self)) } } -impl server::Ident for Rustc<'_, '_> { - fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident { - Ident::new(self.sess(), Symbol::intern(string), is_raw, span) - } - - fn span(&mut self, ident: Self::Ident) -> Self::Span { - ident.span - } - - fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident { - Ident { span, ..ident } - } -} - -impl server::Literal for Rustc<'_, '_> { - fn from_str(&mut self, s: &str) -> Result { - let name = FileName::proc_macro_source_code(s); - let mut parser = rustc_parse::new_parser_from_source_str(self.sess(), name, s.to_owned()); - - let first_span = parser.token.span.data(); - let minus_present = parser.eat(&token::BinOp(token::Minus)); - - let lit_span = parser.token.span.data(); - let token::Literal(mut lit) = parser.token.kind else { - return Err(()); - }; - - // Check no comment or whitespace surrounding the (possibly negative) - // literal, or more tokens after it. - if (lit_span.hi.0 - first_span.lo.0) as usize != s.len() { - return Err(()); - } - - if minus_present { - // If minus is present, check no comment or whitespace in between it - // and the literal token. - if first_span.hi.0 != lit_span.lo.0 { - return Err(()); - } - - // Check literal is a kind we allow to be negated in a proc macro token. - match lit.kind { - token::LitKind::Bool - | token::LitKind::Byte - | token::LitKind::Char - | token::LitKind::Str - | token::LitKind::StrRaw(_) - | token::LitKind::ByteStr - | token::LitKind::ByteStrRaw(_) - | token::LitKind::Err => return Err(()), - token::LitKind::Integer | token::LitKind::Float => {} - } - - // Synthesize a new symbol that includes the minus sign. - let symbol = Symbol::intern(&s[..1 + lit.symbol.as_str().len()]); - lit = token::Lit::new(lit.kind, symbol, lit.suffix); - } - - Ok(Literal { lit, span: self.call_site }) - } - - fn to_string(&mut self, literal: &Self::Literal) -> String { - literal.lit.to_string() - } - - fn debug_kind(&mut self, literal: &Self::Literal) -> String { - format!("{:?}", literal.lit.kind) - } - - fn symbol(&mut self, literal: &Self::Literal) -> String { - literal.lit.symbol.to_string() - } - - fn suffix(&mut self, literal: &Self::Literal) -> Option { - literal.lit.suffix.as_ref().map(Symbol::to_string) - } - - fn integer(&mut self, n: &str) -> Self::Literal { - self.lit(token::Integer, Symbol::intern(n), None) - } - - fn typed_integer(&mut self, n: &str, kind: &str) -> Self::Literal { - self.lit(token::Integer, Symbol::intern(n), Some(Symbol::intern(kind))) - } - - fn float(&mut self, n: &str) -> Self::Literal { - self.lit(token::Float, Symbol::intern(n), None) - } - - fn f32(&mut self, n: &str) -> Self::Literal { - self.lit(token::Float, Symbol::intern(n), Some(sym::f32)) - } - - fn f64(&mut self, n: &str) -> Self::Literal { - self.lit(token::Float, Symbol::intern(n), Some(sym::f64)) - } - - fn string(&mut self, string: &str) -> Self::Literal { - let quoted = format!("{:?}", string); - assert!(quoted.starts_with('"') && quoted.ends_with('"')); - let symbol = "ed[1..quoted.len() - 1]; - self.lit(token::Str, Symbol::intern(symbol), None) - } - - fn character(&mut self, ch: char) -> Self::Literal { - let quoted = format!("{:?}", ch); - assert!(quoted.starts_with('\'') && quoted.ends_with('\'')); - let symbol = "ed[1..quoted.len() - 1]; - self.lit(token::Char, Symbol::intern(symbol), None) - } - - fn byte_string(&mut self, bytes: &[u8]) -> Self::Literal { - let string = bytes - .iter() - .cloned() - .flat_map(ascii::escape_default) - .map(Into::::into) - .collect::(); - self.lit(token::ByteStr, Symbol::intern(&string), None) - } - - fn span(&mut self, literal: &Self::Literal) -> Self::Span { - literal.span - } - - fn set_span(&mut self, literal: &mut Self::Literal, span: Self::Span) { - literal.span = span; - } - - fn subspan( - &mut self, - literal: &Self::Literal, - start: Bound, - end: Bound, - ) -> Option { - let span = literal.span; - let length = span.hi().to_usize() - span.lo().to_usize(); - - let start = match start { - Bound::Included(lo) => lo, - Bound::Excluded(lo) => lo.checked_add(1)?, - Bound::Unbounded => 0, - }; - - let end = match end { - Bound::Included(hi) => hi.checked_add(1)?, - Bound::Excluded(hi) => hi, - Bound::Unbounded => length, - }; - - // Bounds check the values, preventing addition overflow and OOB spans. - if start > u32::MAX as usize - || end > u32::MAX as usize - || (u32::MAX - start as u32) < span.lo().to_u32() - || (u32::MAX - end as u32) < span.lo().to_u32() - || start >= end - || end > length - { - return None; - } - - let new_lo = span.lo() + BytePos::from_usize(start); - let new_hi = span.lo() + BytePos::from_usize(end); - Some(span.with_lo(new_lo).with_hi(new_hi)) - } -} - impl server::SourceFile for Rustc<'_, '_> { fn eq(&mut self, file1: &Self::SourceFile, file2: &Self::SourceFile) -> bool { Lrc::ptr_eq(file1, file2) @@ -767,6 +660,42 @@ impl server::Span for Rustc<'_, '_> { Some(first.to(second)) } + fn subspan( + &mut self, + span: Self::Span, + start: Bound, + end: Bound, + ) -> Option { + let length = span.hi().to_usize() - span.lo().to_usize(); + + let start = match start { + Bound::Included(lo) => lo, + Bound::Excluded(lo) => lo.checked_add(1)?, + Bound::Unbounded => 0, + }; + + let end = match end { + Bound::Included(hi) => hi.checked_add(1)?, + Bound::Excluded(hi) => hi, + Bound::Unbounded => length, + }; + + // Bounds check the values, preventing addition overflow and OOB spans. + if start > u32::MAX as usize + || end > u32::MAX as usize + || (u32::MAX - start as u32) < span.lo().to_u32() + || (u32::MAX - end as u32) < span.lo().to_u32() + || start >= end + || end > length + { + return None; + } + + let new_lo = span.lo() + BytePos::from_usize(start); + let new_hi = span.lo() + BytePos::from_usize(end); + Some(span.with_lo(new_lo).with_hi(new_hi)) + } + fn resolved_at(&mut self, span: Self::Span, at: Self::Span) -> Self::Span { span.with_ctxt(at.ctxt()) } @@ -812,6 +741,13 @@ impl server::Span for Rustc<'_, '_> { } } +impl server::Symbol for Rustc<'_, '_> { + fn normalize_and_validate_ident(&mut self, string: &str) -> Result { + let sym = nfc_normalize(string); + if rustc_lexer::is_ident(sym.as_str()) { Ok(sym) } else { Err(()) } + } +} + impl server::Server for Rustc<'_, '_> { fn globals(&mut self) -> ExpnGlobals { ExpnGlobals { @@ -820,4 +756,12 @@ impl server::Server for Rustc<'_, '_> { mixed_site: self.mixed_site, } } + + fn intern_symbol(string: &str) -> Self::Symbol { + Symbol::intern(string) + } + + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) { + f(&symbol.as_str()) + } } diff --git a/library/proc_macro/src/bridge/arena.rs b/library/proc_macro/src/bridge/arena.rs new file mode 100644 index 0000000000000..fa72d2816ebfe --- /dev/null +++ b/library/proc_macro/src/bridge/arena.rs @@ -0,0 +1,113 @@ +//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`. +//! +//! This is unfortunately a minimal re-implementation rather than a dependency +//! as it is difficult to depend on crates from within `proc_macro`, due to it +//! being built at the same time as `std`. + +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ptr; +use std::slice; +use std::str; + +// The arenas start with PAGE-sized chunks, and then each new chunk is twice as +// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon +// we stop growing. This scales well, from arenas that are barely used up to +// arenas that are used for 100s of MiBs. Note also that the chosen sizes match +// the usual sizes of pages and huge pages on Linux. +const PAGE: usize = 4096; +const HUGE_PAGE: usize = 2 * 1024 * 1024; + +/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`. +/// +/// This is unfortunately a complete re-implementation rather than a dependency +/// as it is difficult to depend on crates from within `proc_macro`, due to it +/// being built at the same time as `std`. +/// +/// This arena doesn't have support for allocating anything other than byte +/// slices, as that is all that is necessary. +pub(crate) struct Arena { + start: Cell<*mut MaybeUninit>, + end: Cell<*mut MaybeUninit>, + chunks: RefCell]>>>, +} + +impl Arena { + pub(crate) fn new() -> Self { + Arena { + start: Cell::new(ptr::null_mut()), + end: Cell::new(ptr::null_mut()), + chunks: RefCell::new(Vec::new()), + } + } + + /// Add a new chunk with at least `additional` free bytes. + #[inline(never)] + #[cold] + fn grow(&self, additional: usize) { + let mut chunks = self.chunks.borrow_mut(); + let mut new_cap; + if let Some(last_chunk) = chunks.last_mut() { + // If the previous chunk's len is less than HUGE_PAGE + // bytes, then this chunk will be least double the previous + // chunk's size. + new_cap = last_chunk.len().min(HUGE_PAGE / 2); + new_cap *= 2; + } else { + new_cap = PAGE; + } + // Also ensure that this chunk can fit `additional`. + new_cap = cmp::max(additional, new_cap); + + let mut chunk = Box::new_uninit_slice(new_cap); + let Range { start, end } = chunk.as_mut_ptr_range(); + self.start.set(start); + self.end.set(end); + chunks.push(chunk); + } + + /// Allocates a byte slice with specified size from the current memory + /// chunk. Returns `None` if there is no free space left to satisfy the + /// request. + fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit]> { + let start = self.start.get().addr(); + let old_end = self.end.get(); + let end = old_end.addr(); + + let new_end = end.checked_sub(bytes)?; + if start <= new_end { + let new_end = old_end.with_addr(new_end); + self.end.set(new_end); + // SAFETY: `bytes` bytes starting at `new_end` were just reserved. + Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) }) + } else { + None + } + } + + fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit] { + if bytes == 0 { + return &mut []; + } + + loop { + if let Some(a) = self.alloc_raw_without_grow(bytes) { + break a; + } + // No free space left. Allocate a new chunk to satisfy the request. + // On failure the grow will panic or abort. + self.grow(bytes); + } + } + + pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str { + let alloc = self.alloc_raw(string.len()); + let bytes = MaybeUninit::write_slice(alloc, string.as_bytes()); + + // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, + // and immediately convert the clone back to `&str`. + unsafe { str::from_utf8_unchecked_mut(bytes) } + } +} diff --git a/library/proc_macro/src/bridge/client.rs b/library/proc_macro/src/bridge/client.rs index 8254bd6e5024a..1516f084ab8b6 100644 --- a/library/proc_macro/src/bridge/client.rs +++ b/library/proc_macro/src/bridge/client.rs @@ -175,13 +175,11 @@ define_handles! { 'owned: FreeFunctions, TokenStream, - Literal, SourceFile, MultiSpan, Diagnostic, 'interned: - Ident, Span, } @@ -197,25 +195,6 @@ impl Clone for TokenStream { } } -impl Clone for Literal { - fn clone(&self) -> Self { - self.clone() - } -} - -impl fmt::Debug for Literal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Literal") - // format the kind without quotes, as in `kind: Float` - .field("kind", &format_args!("{}", &self.debug_kind())) - .field("symbol", &self.symbol()) - // format `Some("...")` on one line even in {:#?} mode - .field("suffix", &format_args!("{:?}", &self.suffix())) - .field("span", &self.span()) - .finish() - } -} - impl Clone for SourceFile { fn clone(&self) -> Self { self.clone() @@ -242,6 +221,8 @@ impl fmt::Debug for Span { } } +pub(crate) use super::symbol::Symbol; + macro_rules! define_client_side { ($($name:ident { $(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)* @@ -405,6 +386,9 @@ fn run_client DecodeMut<'a, 's, ()>, R: Encode<()>>( panic::catch_unwind(panic::AssertUnwindSafe(|| { maybe_install_panic_hook(force_show_panics); + // Make sure the symbol store is empty before decoding inputs. + Symbol::invalidate_all(); + let reader = &mut &buf[..]; let (globals, input) = <(ExpnGlobals, A)>::decode(reader, &mut ()); @@ -438,6 +422,10 @@ fn run_client DecodeMut<'a, 's, ()>, R: Encode<()>>( buf.clear(); Err::<(), _>(e).encode(&mut buf, &mut ()); }); + + // Now that a response has been serialized, invalidate all symbols + // registered with the interner. + Symbol::invalidate_all(); buf } diff --git a/library/proc_macro/src/bridge/fxhash.rs b/library/proc_macro/src/bridge/fxhash.rs new file mode 100644 index 0000000000000..4b1e412e24b70 --- /dev/null +++ b/library/proc_macro/src/bridge/fxhash.rs @@ -0,0 +1,117 @@ +//! This is a copy of the `rustc_hash` crate, adapted to work as a module. +//! +//! If in the future it becomes more reasonable to add dependencies to +//! `proc_macro`, this module should be removed and replaced with a dependency +//! on the `rustc_hash` crate. + +use std::collections::HashMap; +use std::convert::TryInto; +use std::default::Default; +use std::hash::BuildHasherDefault; +use std::hash::Hasher; +use std::mem::size_of; +use std::ops::BitXor; + +/// Type alias for a hashmap using the `fx` hash algorithm. +pub type FxHashMap = HashMap>; + +/// A speedy hash algorithm for use within rustc. The hashmap in liballoc +/// by default uses SipHash which isn't quite as speedy as we want. In the +/// compiler we're not really worried about DOS attempts, so we use a fast +/// non-cryptographic hash. +/// +/// This is the same as the algorithm used by Firefox -- which is a homespun +/// one not based on any widely-known algorithm -- though modified to produce +/// 64-bit hash values instead of 32-bit hash values. It consistently +/// out-performs an FNV-based hash within rustc itself -- the collision rate is +/// similar or slightly worse than FNV, but the speed of the hash function +/// itself is much higher because it works on up to 8 bytes at a time. +pub struct FxHasher { + hash: usize, +} + +#[cfg(target_pointer_width = "32")] +const K: usize = 0x9e3779b9; +#[cfg(target_pointer_width = "64")] +const K: usize = 0x517cc1b727220a95; + +impl Default for FxHasher { + #[inline] + fn default() -> FxHasher { + FxHasher { hash: 0 } + } +} + +impl FxHasher { + #[inline] + fn add_to_hash(&mut self, i: usize) { + self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); + } +} + +impl Hasher for FxHasher { + #[inline] + fn write(&mut self, mut bytes: &[u8]) { + #[cfg(target_pointer_width = "32")] + let read_usize = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap()); + #[cfg(target_pointer_width = "64")] + let read_usize = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap()); + + let mut hash = FxHasher { hash: self.hash }; + assert!(size_of::() <= 8); + while bytes.len() >= size_of::() { + hash.add_to_hash(read_usize(bytes) as usize); + bytes = &bytes[size_of::()..]; + } + if (size_of::() > 4) && (bytes.len() >= 4) { + hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as usize); + bytes = &bytes[4..]; + } + if (size_of::() > 2) && bytes.len() >= 2 { + hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize); + bytes = &bytes[2..]; + } + if (size_of::() > 1) && bytes.len() >= 1 { + hash.add_to_hash(bytes[0] as usize); + } + self.hash = hash.hash; + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.add_to_hash(i as usize); + } + + #[cfg(target_pointer_width = "32")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + self.add_to_hash((i >> 32) as usize); + } + + #[cfg(target_pointer_width = "64")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.add_to_hash(i); + } + + #[inline] + fn finish(&self) -> u64 { + self.hash as u64 + } +} diff --git a/library/proc_macro/src/bridge/handle.rs b/library/proc_macro/src/bridge/handle.rs index c219a9465d39f..00954107b7769 100644 --- a/library/proc_macro/src/bridge/handle.rs +++ b/library/proc_macro/src/bridge/handle.rs @@ -1,11 +1,13 @@ //! Server-side handles and storage for per-handle data. -use std::collections::{BTreeMap, HashMap}; -use std::hash::{BuildHasher, Hash}; +use std::collections::BTreeMap; +use std::hash::Hash; use std::num::NonZeroU32; use std::ops::{Index, IndexMut}; use std::sync::atomic::{AtomicUsize, Ordering}; +use super::fxhash::FxHashMap; + pub(super) type Handle = NonZeroU32; /// A store that associates values of type `T` with numeric handles. A value can @@ -51,31 +53,15 @@ impl IndexMut for OwnedStore { } } -// HACK(eddyb) deterministic `std::collections::hash_map::RandomState` replacement -// that doesn't require adding any dependencies to `proc_macro` (like `rustc-hash`). -#[derive(Clone)] -struct NonRandomState; - -impl BuildHasher for NonRandomState { - type Hasher = std::collections::hash_map::DefaultHasher; - #[inline] - fn build_hasher(&self) -> Self::Hasher { - Self::Hasher::new() - } -} - /// Like `OwnedStore`, but avoids storing any value more than once. pub(super) struct InternedStore { owned: OwnedStore, - interner: HashMap, + interner: FxHashMap, } impl InternedStore { pub(super) fn new(counter: &'static AtomicUsize) -> Self { - InternedStore { - owned: OwnedStore::new(counter), - interner: HashMap::with_hasher(NonRandomState), - } + InternedStore { owned: OwnedStore::new(counter), interner: FxHashMap::default() } } pub(super) fn alloc(&mut self, x: T) -> Handle { diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs index 048ba3a8fdb79..5cde966bf173d 100644 --- a/library/proc_macro/src/bridge/mod.rs +++ b/library/proc_macro/src/bridge/mod.rs @@ -56,6 +56,7 @@ macro_rules! with_api { fn drop($self: $S::FreeFunctions); fn track_env_var(var: &str, value: Option<&str>); fn track_path(path: &str); + fn literal_from_str(s: &str) -> Result, ()>; }, TokenStream { fn drop($self: $S::TokenStream); @@ -65,11 +66,11 @@ macro_rules! with_api { fn from_str(src: &str) -> $S::TokenStream; fn to_string($self: &$S::TokenStream) -> String; fn from_token_tree( - tree: TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>, + tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol>, ) -> $S::TokenStream; fn concat_trees( base: Option<$S::TokenStream>, - trees: Vec>, + trees: Vec>, ) -> $S::TokenStream; fn concat_streams( base: Option<$S::TokenStream>, @@ -77,36 +78,7 @@ macro_rules! with_api { ) -> $S::TokenStream; fn into_trees( $self: $S::TokenStream - ) -> Vec>; - }, - Ident { - fn new(string: &str, span: $S::Span, is_raw: bool) -> $S::Ident; - fn span($self: $S::Ident) -> $S::Span; - fn with_span($self: $S::Ident, span: $S::Span) -> $S::Ident; - }, - Literal { - fn drop($self: $S::Literal); - fn clone($self: &$S::Literal) -> $S::Literal; - fn from_str(s: &str) -> Result<$S::Literal, ()>; - fn to_string($self: &$S::Literal) -> String; - fn debug_kind($self: &$S::Literal) -> String; - fn symbol($self: &$S::Literal) -> String; - fn suffix($self: &$S::Literal) -> Option; - fn integer(n: &str) -> $S::Literal; - fn typed_integer(n: &str, kind: &str) -> $S::Literal; - fn float(n: &str) -> $S::Literal; - fn f32(n: &str) -> $S::Literal; - fn f64(n: &str) -> $S::Literal; - fn string(string: &str) -> $S::Literal; - fn character(ch: char) -> $S::Literal; - fn byte_string(bytes: &[u8]) -> $S::Literal; - fn span($self: &$S::Literal) -> $S::Span; - fn set_span($self: &mut $S::Literal, span: $S::Span); - fn subspan( - $self: &$S::Literal, - start: Bound, - end: Bound, - ) -> Option<$S::Span>; + ) -> Vec>; }, SourceFile { fn drop($self: $S::SourceFile); @@ -141,11 +113,15 @@ macro_rules! with_api { fn before($self: $S::Span) -> $S::Span; fn after($self: $S::Span) -> $S::Span; fn join($self: $S::Span, other: $S::Span) -> Option<$S::Span>; + fn subspan($self: $S::Span, start: Bound, end: Bound) -> Option<$S::Span>; fn resolved_at($self: $S::Span, at: $S::Span) -> $S::Span; fn source_text($self: $S::Span) -> Option; fn save_span($self: $S::Span) -> usize; fn recover_proc_macro_span(id: usize) -> $S::Span; }, + Symbol { + fn normalize_and_validate_ident(string: &str) -> Result<$S::Symbol, ()>; + }, } }; } @@ -170,6 +146,8 @@ macro_rules! reverse_decode { } } +#[allow(unsafe_code)] +mod arena; #[allow(unsafe_code)] mod buffer; #[forbid(unsafe_code)] @@ -177,6 +155,8 @@ pub mod client; #[allow(unsafe_code)] mod closure; #[forbid(unsafe_code)] +mod fxhash; +#[forbid(unsafe_code)] mod handle; #[macro_use] #[forbid(unsafe_code)] @@ -187,6 +167,8 @@ mod scoped_cell; mod selfless_reify; #[forbid(unsafe_code)] pub mod server; +#[allow(unsafe_code)] +mod symbol; use buffer::Buffer; pub use rpc::PanicMessage; @@ -328,6 +310,7 @@ mark_noop! { u8, usize, Delimiter, + LitKind, Level, LineColumn, Spacing, @@ -357,6 +340,33 @@ rpc_encode_decode!( } ); +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum LitKind { + Byte, + Char, + Integer, + Float, + Str, + StrRaw(u8), + ByteStr, + ByteStrRaw(u8), + Err, +} + +rpc_encode_decode!( + enum LitKind { + Byte, + Char, + Integer, + Float, + Str, + StrRaw(n), + ByteStr, + ByteStrRaw(n), + Err, + } +); + macro_rules! mark_compound { (struct $name:ident <$($T:ident),+> { $($field:ident),* $(,)? }) => { impl<$($T: Mark),+> Mark for $name <$($T),+> { @@ -464,16 +474,35 @@ pub struct Punct { compound_traits!(struct Punct { ch, joint, span }); +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Ident { + pub sym: Symbol, + pub is_raw: bool, + pub span: Span, +} + +compound_traits!(struct Ident { sym, is_raw, span }); + +#[derive(Clone, Eq, PartialEq)] +pub struct Literal { + pub kind: LitKind, + pub symbol: Symbol, + pub suffix: Option, + pub span: Span, +} + +compound_traits!(struct Literal { kind, symbol, suffix, span }); + #[derive(Clone)] -pub enum TokenTree { +pub enum TokenTree { Group(Group), Punct(Punct), - Ident(Ident), - Literal(Literal), + Ident(Ident), + Literal(Literal), } compound_traits!( - enum TokenTree { + enum TokenTree { Group(tt), Punct(tt), Ident(tt), diff --git a/library/proc_macro/src/bridge/server.rs b/library/proc_macro/src/bridge/server.rs index ea8b833b48fde..d46e325951d72 100644 --- a/library/proc_macro/src/bridge/server.rs +++ b/library/proc_macro/src/bridge/server.rs @@ -8,12 +8,11 @@ use super::client::HandleStore; pub trait Types { type FreeFunctions: 'static; type TokenStream: 'static + Clone; - type Ident: 'static + Copy + Eq + Hash; - type Literal: 'static + Clone; type SourceFile: 'static + Clone; type MultiSpan: 'static; type Diagnostic: 'static; type Span: 'static + Copy + Eq + Hash; + type Symbol: 'static; } /// Declare an associated fn of one of the traits below, adding necessary @@ -38,6 +37,12 @@ macro_rules! declare_server_traits { pub trait Server: Types $(+ $name)* { fn globals(&mut self) -> ExpnGlobals; + + /// Intern a symbol received from RPC + fn intern_symbol(ident: &str) -> Self::Symbol; + + /// Recover the string value of a symbol, and invoke a callback with it. + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)); } } } @@ -49,6 +54,12 @@ impl Server for MarkedTypes { fn globals(&mut self) -> ExpnGlobals { <_>::mark(Server::globals(&mut self.0)) } + fn intern_symbol(ident: &str) -> Self::Symbol { + <_>::mark(S::intern_symbol(ident)) + } + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) { + S::with_symbol_string(symbol.unmark(), f) + } } macro_rules! define_mark_types_impls { @@ -81,11 +92,13 @@ macro_rules! define_dispatcher_impl { pub trait DispatcherTrait { // HACK(eddyb) these are here to allow `Self::$name` to work below. $(type $name;)* + fn dispatch(&mut self, buf: Buffer) -> Buffer; } impl DispatcherTrait for Dispatcher> { $(type $name = as Types>::$name;)* + fn dispatch(&mut self, mut buf: Buffer) -> Buffer { let Dispatcher { handle_store, server } = self; diff --git a/library/proc_macro/src/bridge/symbol.rs b/library/proc_macro/src/bridge/symbol.rs new file mode 100644 index 0000000000000..930c111455df0 --- /dev/null +++ b/library/proc_macro/src/bridge/symbol.rs @@ -0,0 +1,205 @@ +//! Client-side interner used for symbols. +//! +//! This is roughly based on the symbol interner from `rustc_span` and the +//! DroplessArena from `rustc_arena`. It is unfortunately a complete +//! copy/re-implementation rather than a dependency as it is difficult to depend +//! on crates from within `proc_macro`, due to it being built at the same time +//! as `std`. +//! +//! If at some point in the future it becomes easier to add dependencies to +//! proc_macro, this module should probably be removed or simplified. + +use std::cell::RefCell; +use std::num::NonZeroU32; +use std::str; + +use super::*; + +/// Handle for a symbol string stored within the Interner. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Symbol(NonZeroU32); + +impl !Send for Symbol {} +impl !Sync for Symbol {} + +impl Symbol { + /// Intern a new `Symbol` + pub(crate) fn new(string: &str) -> Self { + INTERNER.with_borrow_mut(|i| i.intern(string)) + } + + /// Create a new `Symbol` for an identifier. + /// + /// Validates and normalizes before converting it to a symbol. + pub(crate) fn new_ident(string: &str, is_raw: bool) -> Self { + // Fast-path: check if this is a valid ASCII identifier + if Self::is_valid_ascii_ident(string.as_bytes()) { + if is_raw && !Self::can_be_raw(string) { + panic!("`{}` cannot be a raw identifier", string); + } + return Self::new(string); + } + + // Slow-path: If the string is already ASCII we're done, otherwise ask + // our server to do this for us over RPC. + // We don't need to check for identifiers which can't be raw here, + // because all of them are ASCII. + if string.is_ascii() { + Err(()) + } else { + client::Symbol::normalize_and_validate_ident(string) + } + .unwrap_or_else(|_| panic!("`{:?}` is not a valid identifier", string)) + } + + /// Run a callback with the symbol's string value. + pub(crate) fn with(self, f: impl FnOnce(&str) -> R) -> R { + INTERNER.with_borrow(|i| f(i.get(self))) + } + + /// Clear out the thread-local symbol interner, making all previously + /// created symbols invalid such that `with` will panic when called on them. + pub(crate) fn invalidate_all() { + INTERNER.with_borrow_mut(|i| i.clear()); + } + + /// Check if the ident is a valid ASCII identifier. + /// + /// This is a short-circuit which is cheap to implement within the + /// proc-macro client to avoid RPC when creating simple idents, but may + /// return `false` for a valid identifier if it contains non-ASCII + /// characters. + fn is_valid_ascii_ident(bytes: &[u8]) -> bool { + matches!(bytes.first(), Some(b'_' | b'a'..=b'z' | b'A'..=b'Z')) + && bytes[1..] + .iter() + .all(|b| matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')) + } + + // Mimics the behaviour of `Symbol::can_be_raw` from `rustc_span` + fn can_be_raw(string: &str) -> bool { + match string { + "_" | "super" | "self" | "Self" | "crate" => false, + _ => true, + } + } +} + +impl fmt::Debug for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|s| fmt::Debug::fmt(s, f)) + } +} + +impl ToString for Symbol { + fn to_string(&self) -> String { + self.with(|s| s.to_owned()) + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|s| fmt::Display::fmt(s, f)) + } +} + +impl Encode for Symbol { + fn encode(self, w: &mut Writer, s: &mut S) { + self.with(|sym| sym.encode(w, s)) + } +} + +impl DecodeMut<'_, '_, client::HandleStore>> + for Marked +{ + fn decode(r: &mut Reader<'_>, s: &mut client::HandleStore>) -> Self { + Mark::mark(S::intern_symbol(<&str>::decode(r, s))) + } +} + +impl Encode>> + for Marked +{ + fn encode(self, w: &mut Writer, s: &mut client::HandleStore>) { + S::with_symbol_string(&self.unmark(), |sym| sym.encode(w, s)) + } +} + +impl DecodeMut<'_, '_, S> for Symbol { + fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { + Symbol::new(<&str>::decode(r, s)) + } +} + +thread_local! { + static INTERNER: RefCell = RefCell::new(Interner { + arena: arena::Arena::new(), + names: fxhash::FxHashMap::default(), + strings: Vec::new(), + // Start with a base of 1 to make sure that `NonZeroU32` works. + sym_base: NonZeroU32::new(1).unwrap(), + }); +} + +/// Basic interner for a `Symbol`, inspired by the one in `rustc_span`. +struct Interner { + arena: arena::Arena, + // SAFETY: These `'static` lifetimes are actually references to data owned + // by the Arena. This is safe, as we never return them as static references + // from `Interner`. + names: fxhash::FxHashMap<&'static str, Symbol>, + strings: Vec<&'static str>, + // The offset to apply to symbol names stored in the interner. This is used + // to ensure that symbol names are not re-used after the interner is + // cleared. + sym_base: NonZeroU32, +} + +impl Interner { + fn intern(&mut self, string: &str) -> Symbol { + if let Some(&name) = self.names.get(string) { + return name; + } + + let name = Symbol( + self.sym_base + .checked_add(self.strings.len() as u32) + .expect("`proc_macro` symbol name overflow"), + ); + + let string: &str = self.arena.alloc_str(string); + + // SAFETY: we can extend the arena allocation to `'static` because we + // only access these while the arena is still alive. + let string: &'static str = unsafe { &*(string as *const str) }; + self.strings.push(string); + self.names.insert(string, name); + name + } + + /// Read a symbol's value from the store while it is held. + fn get(&self, symbol: Symbol) -> &str { + // NOTE: Subtract out the offset which was added to make the symbol + // nonzero and prevent symbol name re-use. + let name = symbol + .0 + .get() + .checked_sub(self.sym_base.get()) + .expect("use-after-free of `proc_macro` symbol"); + self.strings[name as usize] + } + + /// Clear all symbols from the store, invalidating them such that `get` will + /// panic if they are accessed in the future. + fn clear(&mut self) { + // NOTE: Be careful not to panic here, as we may be called on the client + // when a `catch_unwind` isn't installed. + self.sym_base = self.sym_base.saturating_add(self.strings.len() as u32); + self.names.clear(); + self.strings.clear(); + + // SAFETY: This is cleared after the names and strings tables are + // cleared out, so no references into the arena should remain. + self.arena = arena::Arena::new(); + } +} diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 9ab5061c66807..5cf16bdd08cdd 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -24,10 +24,14 @@ #![feature(staged_api)] #![feature(allow_internal_unstable)] #![feature(decl_macro)] +#![feature(local_key_cell_methods)] +#![feature(maybe_uninit_write_slice)] #![feature(negative_impls)] +#![feature(new_uninit)] #![feature(restricted_std)] #![feature(rustc_attrs)] #![feature(min_specialization)] +#![feature(strict_provenance)] #![recursion_limit = "256"] #[unstable(feature = "proc_macro_internals", issue = "27812")] @@ -211,12 +215,7 @@ pub use quote::{quote, quote_span}; fn tree_to_bridge_tree( tree: TokenTree, -) -> bridge::TokenTree< - bridge::client::TokenStream, - bridge::client::Span, - bridge::client::Ident, - bridge::client::Literal, -> { +) -> bridge::TokenTree { match tree { TokenTree::Group(tt) => bridge::TokenTree::Group(tt.0), TokenTree::Punct(tt) => bridge::TokenTree::Punct(tt.0), @@ -240,8 +239,7 @@ struct ConcatTreesHelper { bridge::TokenTree< bridge::client::TokenStream, bridge::client::Span, - bridge::client::Ident, - bridge::client::Literal, + bridge::client::Symbol, >, >, } @@ -367,8 +365,7 @@ pub mod token_stream { bridge::TokenTree< bridge::client::TokenStream, bridge::client::Span, - bridge::client::Ident, - bridge::client::Literal, + bridge::client::Symbol, >, >, ); @@ -1004,6 +1001,13 @@ impl Punct { } } +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] +impl ToString for Punct { + fn to_string(&self) -> String { + self.as_char().to_string() + } +} + /// Prints the punctuation character as a string that should be losslessly convertible /// back into the same character. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] @@ -1041,7 +1045,7 @@ impl PartialEq for char { /// An identifier (`ident`). #[derive(Clone)] #[stable(feature = "proc_macro_lib2", since = "1.29.0")] -pub struct Ident(bridge::client::Ident); +pub struct Ident(bridge::Ident); impl Ident { /// Creates a new `Ident` with the given `string` as well as the specified @@ -1065,7 +1069,11 @@ impl Ident { /// tokens, requires a `Span` to be specified at construction. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn new(string: &str, span: Span) -> Ident { - Ident(bridge::client::Ident::new(string, span.0, false)) + Ident(bridge::Ident { + sym: bridge::client::Symbol::new_ident(string, false), + is_raw: false, + span: span.0, + }) } /// Same as `Ident::new`, but creates a raw identifier (`r#ident`). @@ -1074,38 +1082,45 @@ impl Ident { /// (e.g. `self`, `super`) are not supported, and will cause a panic. #[stable(feature = "proc_macro_raw_ident", since = "1.47.0")] pub fn new_raw(string: &str, span: Span) -> Ident { - Ident(bridge::client::Ident::new(string, span.0, true)) + Ident(bridge::Ident { + sym: bridge::client::Symbol::new_ident(string, true), + is_raw: true, + span: span.0, + }) } /// Returns the span of this `Ident`, encompassing the entire string returned - /// by [`to_string`](Self::to_string). + /// by [`to_string`](ToString::to_string). #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn span(&self) -> Span { - Span(self.0.span()) + Span(self.0.span) } /// Configures the span of this `Ident`, possibly changing its hygiene context. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn set_span(&mut self, span: Span) { - self.0 = self.0.with_span(span.0); + self.0.span = span.0; } } -// N.B., the bridge only provides `to_string`, implement `fmt::Display` -// based on it (the reverse of the usual relationship between the two). -#[stable(feature = "proc_macro_lib", since = "1.15.0")] +/// Converts the identifier to a string that should be losslessly convertible +/// back into the same identifier. +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl ToString for Ident { fn to_string(&self) -> String { - TokenStream::from(TokenTree::from(self.clone())).to_string() + self.0.sym.with(|sym| if self.0.is_raw { ["r#", sym].concat() } else { sym.to_owned() }) } } -/// Prints the identifier as a string that should be losslessly convertible -/// back into the same identifier. +/// Prints the identifier as a string that should be losslessly convertible back +/// into the same identifier. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.to_string()) + if self.0.is_raw { + f.write_str("r#")?; + } + fmt::Display::fmt(&self.0.sym, f) } } @@ -1125,7 +1140,7 @@ impl fmt::Debug for Ident { /// Boolean literals like `true` and `false` do not belong here, they are `Ident`s. #[derive(Clone)] #[stable(feature = "proc_macro_lib2", since = "1.29.0")] -pub struct Literal(bridge::client::Literal); +pub struct Literal(bridge::Literal); macro_rules! suffixed_int_literals { ($($name:ident => $kind:ident,)*) => ($( @@ -1142,7 +1157,12 @@ macro_rules! suffixed_int_literals { /// below. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn $name(n: $kind) -> Literal { - Literal(bridge::client::Literal::typed_integer(&n.to_string(), stringify!($kind))) + Literal(bridge::Literal { + kind: bridge::LitKind::Integer, + symbol: bridge::client::Symbol::new(&n.to_string()), + suffix: Some(bridge::client::Symbol::new(stringify!($kind))), + span: Span::call_site().0, + }) } )*) } @@ -1164,12 +1184,26 @@ macro_rules! unsuffixed_int_literals { /// below. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn $name(n: $kind) -> Literal { - Literal(bridge::client::Literal::integer(&n.to_string())) + Literal(bridge::Literal { + kind: bridge::LitKind::Integer, + symbol: bridge::client::Symbol::new(&n.to_string()), + suffix: None, + span: Span::call_site().0, + }) } )*) } impl Literal { + fn new(kind: bridge::LitKind, value: &str, suffix: Option<&str>) -> Self { + Literal(bridge::Literal { + kind, + symbol: bridge::client::Symbol::new(value), + suffix: suffix.map(bridge::client::Symbol::new), + span: Span::call_site().0, + }) + } + suffixed_int_literals! { u8_suffixed => u8, u16_suffixed => u16, @@ -1221,7 +1255,7 @@ impl Literal { if !repr.contains('.') { repr.push_str(".0"); } - Literal(bridge::client::Literal::float(&repr)) + Literal::new(bridge::LitKind::Float, &repr, None) } /// Creates a new suffixed floating-point literal. @@ -1242,7 +1276,7 @@ impl Literal { if !n.is_finite() { panic!("Invalid float literal {n}"); } - Literal(bridge::client::Literal::f32(&n.to_string())) + Literal::new(bridge::LitKind::Float, &n.to_string(), Some("f32")) } /// Creates a new unsuffixed floating-point literal. @@ -1266,7 +1300,7 @@ impl Literal { if !repr.contains('.') { repr.push_str(".0"); } - Literal(bridge::client::Literal::float(&repr)) + Literal::new(bridge::LitKind::Float, &repr, None) } /// Creates a new suffixed floating-point literal. @@ -1287,37 +1321,49 @@ impl Literal { if !n.is_finite() { panic!("Invalid float literal {n}"); } - Literal(bridge::client::Literal::f64(&n.to_string())) + Literal::new(bridge::LitKind::Float, &n.to_string(), Some("f64")) } /// String literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn string(string: &str) -> Literal { - Literal(bridge::client::Literal::string(string)) + let quoted = format!("{:?}", string); + assert!(quoted.starts_with('"') && quoted.ends_with('"')); + let symbol = "ed[1..quoted.len() - 1]; + Literal::new(bridge::LitKind::Str, symbol, None) } /// Character literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn character(ch: char) -> Literal { - Literal(bridge::client::Literal::character(ch)) + let quoted = format!("{:?}", ch); + assert!(quoted.starts_with('\'') && quoted.ends_with('\'')); + let symbol = "ed[1..quoted.len() - 1]; + Literal::new(bridge::LitKind::Char, symbol, None) } /// Byte string literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn byte_string(bytes: &[u8]) -> Literal { - Literal(bridge::client::Literal::byte_string(bytes)) + let string = bytes + .iter() + .cloned() + .flat_map(std::ascii::escape_default) + .map(Into::::into) + .collect::(); + Literal::new(bridge::LitKind::ByteStr, &string, None) } /// Returns the span encompassing this literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn span(&self) -> Span { - Span(self.0.span()) + Span(self.0.span) } /// Configures the span associated for this literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn set_span(&mut self, span: Span) { - self.0.set_span(span.0); + self.0.span = span.0; } /// Returns a `Span` that is a subset of `self.span()` containing only the @@ -1333,7 +1379,50 @@ impl Literal { // was 'c' or whether it was '\u{63}'. #[unstable(feature = "proc_macro_span", issue = "54725")] pub fn subspan>(&self, range: R) -> Option { - self.0.subspan(range.start_bound().cloned(), range.end_bound().cloned()).map(Span) + self.0.span.subspan(range.start_bound().cloned(), range.end_bound().cloned()).map(Span) + } + + fn with_symbol_and_suffix(&self, f: impl FnOnce(&str, &str) -> R) -> R { + self.0.symbol.with(|symbol| match self.0.suffix { + Some(suffix) => suffix.with(|suffix| f(symbol, suffix)), + None => f(symbol, ""), + }) + } + + /// Invokes the callback with a `&[&str]` consisting of each part of the + /// literal's representation. This is done to allow the `ToString` and + /// `Display` implementations to borrow references to symbol values, and + /// both be optimized to reduce overhead. + fn with_stringify_parts(&self, f: impl FnOnce(&[&str]) -> R) -> R { + /// Returns a string containing exactly `num` '#' characters. + /// Uses a 256-character source string literal which is always safe to + /// index with a `u8` index. + fn get_hashes_str(num: u8) -> &'static str { + const HASHES: &str = "\ + ################################################################\ + ################################################################\ + ################################################################\ + ################################################################\ + "; + const _: () = assert!(HASHES.len() == 256); + &HASHES[..num as usize] + } + + self.with_symbol_and_suffix(|symbol, suffix| match self.0.kind { + bridge::LitKind::Byte => f(&["b'", symbol, "'", suffix]), + bridge::LitKind::Char => f(&["'", symbol, "'", suffix]), + bridge::LitKind::Str => f(&["\"", symbol, "\"", suffix]), + bridge::LitKind::StrRaw(n) => { + let hashes = get_hashes_str(n); + f(&["r", hashes, "\"", symbol, "\"", hashes, suffix]) + } + bridge::LitKind::ByteStr => f(&["b\"", symbol, "\"", suffix]), + bridge::LitKind::ByteStrRaw(n) => { + let hashes = get_hashes_str(n); + f(&["br", hashes, "\"", symbol, "\"", hashes, suffix]) + } + _ => f(&[symbol, suffix]), + }) } } @@ -1352,19 +1441,17 @@ impl FromStr for Literal { type Err = LexError; fn from_str(src: &str) -> Result { - match bridge::client::Literal::from_str(src) { + match bridge::client::FreeFunctions::literal_from_str(src) { Ok(literal) => Ok(Literal(literal)), Err(()) => Err(LexError), } } } -// N.B., the bridge only provides `to_string`, implement `fmt::Display` -// based on it (the reverse of the usual relationship between the two). -#[stable(feature = "proc_macro_lib", since = "1.15.0")] +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl ToString for Literal { fn to_string(&self) -> String { - self.0.to_string() + self.with_stringify_parts(|parts| parts.concat()) } } @@ -1373,14 +1460,26 @@ impl ToString for Literal { #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.to_string()) + self.with_stringify_parts(|parts| { + for part in parts { + fmt::Display::fmt(part, f)?; + } + Ok(()) + }) } } #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Debug for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) + f.debug_struct("Literal") + // format the kind on one line even in {:#?} mode + .field("kind", &format_args!("{:?}", &self.0.kind)) + .field("symbol", &self.0.symbol) + // format `Some("...")` on one line even in {:#?} mode + .field("suffix", &format_args!("{:?}", &self.0.suffix)) + .field("span", &self.0.span) + .finish() } } diff --git a/src/test/ui/proc-macro/invalid-punct-ident-2.rs b/src/test/ui/proc-macro/invalid-punct-ident-2.rs index 04a0a8733115a..151f6203439f7 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-2.rs +++ b/src/test/ui/proc-macro/invalid-punct-ident-2.rs @@ -1,17 +1,9 @@ // aux-build:invalid-punct-ident.rs -// rustc-env:RUST_BACKTRACE=0 - -// FIXME https://github.com/rust-lang/rust/issues/59998 -// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> "" -// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> "" -// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> "" -// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> "" -// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> "" -// normalize-stderr-test "note: compiler flags.*\n\n" -> "" -// normalize-stderr-test "note: rustc.*running on.*\n\n" -> "" -// normalize-stderr-test "query stack during panic:\n" -> "" -// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> "" -// normalize-stderr-test "end of query stack\n" -> "" +// ignore-stage1 +// only-linux +// +// FIXME: This should be a normal (stage1, all platforms) test in +// src/test/ui/proc-macro once issue #59998 is fixed. #[macro_use] extern crate invalid_punct_ident; diff --git a/src/test/ui/proc-macro/invalid-punct-ident-2.stderr b/src/test/ui/proc-macro/invalid-punct-ident-2.stderr index f7e1f4bc7d361..0bd07bd649e47 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-2.stderr +++ b/src/test/ui/proc-macro/invalid-punct-ident-2.stderr @@ -1,5 +1,5 @@ error: proc macro panicked - --> $DIR/invalid-punct-ident-2.rs:19:1 + --> $DIR/invalid-punct-ident-2.rs:11:1 | LL | invalid_ident!(); | ^^^^^^^^^^^^^^^^ diff --git a/src/test/ui/proc-macro/invalid-punct-ident-3.rs b/src/test/ui/proc-macro/invalid-punct-ident-3.rs index aebba341625ae..7c22a56b6fbe9 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-3.rs +++ b/src/test/ui/proc-macro/invalid-punct-ident-3.rs @@ -1,17 +1,9 @@ // aux-build:invalid-punct-ident.rs -// rustc-env:RUST_BACKTRACE=0 - -// FIXME https://github.com/rust-lang/rust/issues/59998 -// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> "" -// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> "" -// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> "" -// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> "" -// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> "" -// normalize-stderr-test "note: compiler flags.*\n\n" -> "" -// normalize-stderr-test "note: rustc.*running on.*\n\n" -> "" -// normalize-stderr-test "query stack during panic:\n" -> "" -// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> "" -// normalize-stderr-test "end of query stack\n" -> "" +// ignore-stage1 +// only-linux +// +// FIXME: This should be a normal (stage1, all platforms) test in +// src/test/ui/proc-macro once issue #59998 is fixed. #[macro_use] extern crate invalid_punct_ident; diff --git a/src/test/ui/proc-macro/invalid-punct-ident-3.stderr b/src/test/ui/proc-macro/invalid-punct-ident-3.stderr index 541c71d74db53..a0cc5ef6e2d62 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-3.stderr +++ b/src/test/ui/proc-macro/invalid-punct-ident-3.stderr @@ -1,5 +1,5 @@ error: proc macro panicked - --> $DIR/invalid-punct-ident-3.rs:19:1 + --> $DIR/invalid-punct-ident-3.rs:11:1 | LL | invalid_raw_ident!(); | ^^^^^^^^^^^^^^^^^^^^