From 51c5700f15a23ac55c51e8e6a3f478a7fd77fcc6 Mon Sep 17 00:00:00 2001 From: Marijn Schouten Date: Tue, 28 Jan 2025 09:38:54 +0100 Subject: [PATCH 1/5] clarify BufRead::{fill_buf, consume} docs Fixes #85394 --- library/std/src/io/mod.rs | 41 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index cfd03b8e3d6d0..767e6e18cca8d 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2250,24 +2250,18 @@ fn skip_until(r: &mut R, delim: u8) -> Result { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub trait BufRead: Read { - /// Returns the contents of the internal buffer, filling it with more data - /// from the inner reader if it is empty. + /// Returns the contents of the internal buffer, filling it with more data, via Read methods, if empty. /// - /// This function is a lower-level call. It needs to be paired with the - /// [`consume`] method to function properly. When calling this - /// method, none of the contents will be "read" in the sense that later - /// calling `read` may return the same contents. As such, [`consume`] must - /// be called with the number of bytes that are consumed from this buffer to - /// ensure that the bytes are never returned twice. + /// This is a lower-level method and is meant to be used together with [`consume`], + /// which can be used to mark bytes that should not be returned by subsequent calls to `read`. /// /// [`consume`]: BufRead::consume /// - /// An empty buffer returned indicates that the stream has reached EOF. + /// Returns an empty buffer to indicate that the stream has reached EOF. /// /// # Errors /// - /// This function will return an I/O error if the underlying reader was - /// read, but returned an error. + /// Passes on I/O errors from Read. /// /// # Examples /// @@ -2285,7 +2279,7 @@ pub trait BufRead: Read { /// // work with buffer /// println!("{buffer:?}"); /// - /// // ensure the bytes we worked with aren't returned again later + /// // mark the bytes we worked with as read /// let length = buffer.len(); /// stdin.consume(length); /// # std::io::Result::Ok(()) @@ -2293,18 +2287,13 @@ pub trait BufRead: Read { #[stable(feature = "rust1", since = "1.0.0")] fn fill_buf(&mut self) -> Result<&[u8]>; - /// Tells this buffer that `amt` bytes have been consumed from the buffer, - /// so they should no longer be returned in calls to `read`. + /// Marks the given `amount` of additional bytes from the internal buffer as having been read. + /// Subsequent calls to `read` return bytes that have not yet been so marked. /// - /// This function is a lower-level call. It needs to be paired with the - /// [`fill_buf`] method to function properly. This function does - /// not perform any I/O, it simply informs this object that some amount of - /// its buffer, returned from [`fill_buf`], has been consumed and should - /// no longer be returned. As such, this function may do odd things if - /// [`fill_buf`] isn't called before calling it. + /// This is a lower-level method and is meant to be used together with [`fill_buf`], + /// which can be used to fill the internal buffer via Read methods. /// - /// The `amt` must be `<=` the number of bytes in the buffer returned by - /// [`fill_buf`]. + /// It is a logic error if `amount` exceeds the number of unread bytes in the internal buffer. /// /// # Examples /// @@ -2313,9 +2302,9 @@ pub trait BufRead: Read { /// /// [`fill_buf`]: BufRead::fill_buf #[stable(feature = "rust1", since = "1.0.0")] - fn consume(&mut self, amt: usize); + fn consume(&mut self, amount: usize); - /// Checks if the underlying `Read` has any data left to be read. + /// Checks if there is any data left to be `read`. /// /// This function may fill the buffer to check for data, /// so this functions returns `Result`, not `bool`. @@ -2324,6 +2313,10 @@ pub trait BufRead: Read { /// returned slice is empty (which means that there is no data left, /// since EOF is reached). /// + /// # Errors + /// + /// Passes on I/O errors from Read. + /// /// Examples /// /// ``` From 7b5e847ae5b8d2da27455ffd3fe55cc2e2dccf6d Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 30 Jan 2025 22:15:26 +0100 Subject: [PATCH 2/5] exit: document interaction with C --- library/std/src/env.rs | 2 +- library/std/src/process.rs | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/library/std/src/env.rs b/library/std/src/env.rs index 4a071b4e1faec..c0415eafb0510 100644 --- a/library/std/src/env.rs +++ b/library/std/src/env.rs @@ -330,7 +330,7 @@ impl Error for VarError { /// /// Discussion of this unsafety on Unix may be found in: /// -/// - [Austin Group Bugzilla](https://austingroupbugs.net/view.php?id=188) +/// - [Austin Group Bugzilla (for POSIX)](https://austingroupbugs.net/view.php?id=188) /// - [GNU C library Bugzilla](https://sourceware.org/bugzilla/show_bug.cgi?id=15607#c2) /// /// To pass an environment variable to a child process, you can instead use [`Command::env`]. diff --git a/library/std/src/process.rs b/library/std/src/process.rs index bdd4844b6511a..2ae93d84ba4b6 100644 --- a/library/std/src/process.rs +++ b/library/std/src/process.rs @@ -2003,9 +2003,9 @@ impl ExitCode { /// /// Note that this has the same caveats as [`process::exit()`][exit], namely that this function /// terminates the process immediately, so no destructors on the current stack or any other - /// thread's stack will be run. If a clean shutdown is needed, it is recommended to simply - /// return this ExitCode from the `main` function, as demonstrated in the [type - /// documentation](#examples). + /// thread's stack will be run. Also see those docs for some important notes on interop with C + /// code. If a clean shutdown is needed, it is recommended to simply return this ExitCode from + /// the `main` function, as demonstrated in the [type documentation](#examples). /// /// # Differences from `process::exit()` /// @@ -2297,6 +2297,34 @@ impl Child { /// considered undesirable. Note that returning from `main` also calls `exit`, so making `exit` an /// unsafe operation is not an option.) /// +/// ## Safe interop with C code +/// +/// This function is safe to call as long as `exit` is only ever invoked from Rust. However, on some +/// platforms this function is implemented by calling the C function [`exit`][C-exit]. As of C23, +/// the C standard does not permit multiple threads to call `exit` concurrently. Rust mitigates this +/// with a lock, but if C code calls `exit`, that can still cause undefined behavior. Note that +/// returning from `main` is equivalent to calling `exit`. +/// +/// Therefore, it is undefined behavior to have two concurrent threads perform the following +/// without synchronization: +/// - One thread calls Rust's `exit` function or returns from Rust's `main` function +/// - Another thread calls the C function `exit` or `quick_exit`, or returns from C's `main` function +/// +/// Note that if a binary contains multiple copies of the Rust runtime (e.g., when combining +/// multiple `cdylib` or `staticlib`), they each have their own separate lock, so from the +/// perspective of code running in one of the Rust runtimes, the "outside" Rust code is basically C +/// code, and concurrent `exit` again causes undefined behavior. +/// +/// Individual C implementations might provide more guarantees than the standard and permit concurrent +/// calls to `exit`; consult the documentation of your C implementation for details. +/// +/// For some of the on-going discussion to make `exit` thread-safe in C, see: +/// - [Rust issue #126600](https://github.com/rust-lang/rust/issues/126600) +/// - [Austin Group Bugzilla (for POSIX)](https://austingroupbugs.net/view.php?id=1845) +/// - [GNU C library Bugzilla](https://sourceware.org/bugzilla/show_bug.cgi?id=31997) +/// +/// [C-exit]: https://en.cppreference.com/w/c/program/exit +/// /// ## Platform-specific behavior /// /// **Unix**: On Unix-like platforms, it is unlikely that all 32 bits of `exit` From c133e22f7c3ba129185a500896fabb7befef7926 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 17 Mar 2025 17:56:22 +0100 Subject: [PATCH 3/5] move new section into platform-specific behavior, as it is unix-specific --- library/std/src/process.rs | 41 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/library/std/src/process.rs b/library/std/src/process.rs index 2ae93d84ba4b6..f1ee65e4648a1 100644 --- a/library/std/src/process.rs +++ b/library/std/src/process.rs @@ -2297,13 +2297,27 @@ impl Child { /// considered undesirable. Note that returning from `main` also calls `exit`, so making `exit` an /// unsafe operation is not an option.) /// -/// ## Safe interop with C code +/// ## Platform-specific behavior +/// +/// **Unix**: On Unix-like platforms, it is unlikely that all 32 bits of `exit` +/// will be visible to a parent process inspecting the exit code. On most +/// Unix-like platforms, only the eight least-significant bits are considered. +/// +/// For example, the exit code for this example will be `0` on Linux, but `256` +/// on Windows: +/// +/// ```no_run +/// use std::process; +/// +/// process::exit(0x0100); +/// ``` +/// +/// ### Safe interop with C code /// -/// This function is safe to call as long as `exit` is only ever invoked from Rust. However, on some -/// platforms this function is implemented by calling the C function [`exit`][C-exit]. As of C23, -/// the C standard does not permit multiple threads to call `exit` concurrently. Rust mitigates this -/// with a lock, but if C code calls `exit`, that can still cause undefined behavior. Note that -/// returning from `main` is equivalent to calling `exit`. +/// On Unix, this function is currently implemented using the `exit` C function [`exit`][C-exit]. As +/// of C23, the C standard does not permit multiple threads to call `exit` concurrently. Rust +/// mitigates this with a lock, but if C code calls `exit`, that can still cause undefined behavior. +/// Note that returning from `main` is equivalent to calling `exit`. /// /// Therefore, it is undefined behavior to have two concurrent threads perform the following /// without synchronization: @@ -2324,21 +2338,6 @@ impl Child { /// - [GNU C library Bugzilla](https://sourceware.org/bugzilla/show_bug.cgi?id=31997) /// /// [C-exit]: https://en.cppreference.com/w/c/program/exit -/// -/// ## Platform-specific behavior -/// -/// **Unix**: On Unix-like platforms, it is unlikely that all 32 bits of `exit` -/// will be visible to a parent process inspecting the exit code. On most -/// Unix-like platforms, only the eight least-significant bits are considered. -/// -/// For example, the exit code for this example will be `0` on Linux, but `256` -/// on Windows: -/// -/// ```no_run -/// use std::process; -/// -/// process::exit(0x0100); -/// ``` #[stable(feature = "rust1", since = "1.0.0")] #[cfg_attr(not(test), rustc_diagnostic_item = "process_exit")] pub fn exit(code: i32) -> ! { From 7fdb7bd92b90a2716f6f9bb6eef71220d30f3e9e Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 17 Mar 2025 13:06:34 +0100 Subject: [PATCH 4/5] Slim `rustc_parse_format` dependencies down `rustc_index` is only used for its size assertion macro, so demote it to a dev-dependency gated under testing instead --- compiler/rustc_parse_format/Cargo.toml | 6 +++++- compiler/rustc_parse_format/src/lib.rs | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml index e63ed9e16f2e8..ee70de9952dfc 100644 --- a/compiler/rustc_parse_format/Cargo.toml +++ b/compiler/rustc_parse_format/Cargo.toml @@ -6,6 +6,10 @@ edition = "2024" [dependencies] # tidy-alphabetical-start literal-escaper = { path = "../../library/literal-escaper" } -rustc_index = { path = "../rustc_index", default-features = false } rustc_lexer = { path = "../rustc_lexer" } # tidy-alphabetical-end + +[dev-dependencies] +# tidy-alphabetical-start +rustc_index = { path = "../rustc_index", default-features = false } +# tidy-alphabetical-end diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 5780daf303437..5bb823992cc02 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -1106,7 +1106,7 @@ fn unescape_string(string: &str) -> Option { } // Assert a reasonable size for `Piece` -#[cfg(target_pointer_width = "64")] +#[cfg(all(test, target_pointer_width = "64"))] rustc_index::static_assert_size!(Piece<'_>, 16); #[cfg(test)] From e5b86e2c73f0c3f09e70dfaa7bfed778585574bf Mon Sep 17 00:00:00 2001 From: Marijn Schouten Date: Tue, 18 Mar 2025 11:16:18 +0100 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Ibraheem Ahmed --- library/std/src/io/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 767e6e18cca8d..a78ce7031d39e 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2250,18 +2250,18 @@ fn skip_until(r: &mut R, delim: u8) -> Result { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub trait BufRead: Read { - /// Returns the contents of the internal buffer, filling it with more data, via Read methods, if empty. + /// Returns the contents of the internal buffer, filling it with more data, via `Read` methods, if empty. /// /// This is a lower-level method and is meant to be used together with [`consume`], /// which can be used to mark bytes that should not be returned by subsequent calls to `read`. /// /// [`consume`]: BufRead::consume /// - /// Returns an empty buffer to indicate that the stream has reached EOF. + /// Returns an empty buffer when the stream has reached EOF. /// /// # Errors /// - /// Passes on I/O errors from Read. + /// This function will return an I/O error if a `Read` method was called, but returned an error. /// /// # Examples /// @@ -2288,12 +2288,12 @@ pub trait BufRead: Read { fn fill_buf(&mut self) -> Result<&[u8]>; /// Marks the given `amount` of additional bytes from the internal buffer as having been read. - /// Subsequent calls to `read` return bytes that have not yet been so marked. + /// Subsequent calls to `read` only return bytes that have not been marked as read. /// /// This is a lower-level method and is meant to be used together with [`fill_buf`], - /// which can be used to fill the internal buffer via Read methods. + /// which can be used to fill the internal buffer via `Read` methods. /// - /// It is a logic error if `amount` exceeds the number of unread bytes in the internal buffer. + /// It is a logic error if `amount` exceeds the number of unread bytes in the internal buffer, which is returned by [`fill_buf`]. /// /// # Examples /// @@ -2315,7 +2315,7 @@ pub trait BufRead: Read { /// /// # Errors /// - /// Passes on I/O errors from Read. + /// This function will return an I/O error if a `Read` method was called, but returned an error. /// /// Examples ///