Auto merge of #81858 - ijackson:fork-no-unwind, r=m-ou-se

bors · bors · commit d565c7488749 · 2021-05-15T22:27:09.000Z
Do not allocate or unwind after fork ### Objective scenarios * Make (simple) panics safe in `Command::pre_exec_hook`, including most `panic!` calls, `Option::unwrap`, and array bounds check failures. * Make it possible to `libc::fork` and then safely panic in the child (needed for the above, but this requirement means exposing the new raw hook API which the `Command` implementation needs). * In singlethreaded programs, where panic in `pre_exec_hook` is already memory-safe, prevent the double-unwinding malfunction #79740. I think we want to make panic after fork safe even though the post-fork child environment is only experienced by users of `unsafe`, beause the subset of Rust in which any panic is UB is really far too hazardous and unnatural. #### Approach * Provide a way for a program to, at runtime, switch to having panics abort. This makes it possible to panic without making *any* heap allocations, which is needed because on some platforms malloc is UB in a child forked from a multithreaded program (see #80263 (comment), and maybe also the SuS [spec](https://pubs.opengroup.org/onlinepubs/9699919799/functions/fork.html)). * Make that change in the child spawned by `Command`. * Document the rules comprehensively enough that a programmer has a fighting chance of writing correct code. * Test that this all works as expected (and in particular, that there aren't any heap allocations we missed) Fixes #79740 #### Rejected (or previously attempted) approaches * Change the panic machinery to be able to unwind without allocating, at least when the payload and message are both `'static`. This seems like it would be even more subtle. Also that is a potentially-hot path which I don't want to mess with. * Change the existing panic hook mechanism to not convert the message to a `String` before calling the hook. This would be a surprising change for existing code and would not be detected by the type system. * Provide a `raw_panic_hook` function to intercept panics in a way that doesn't allocate. (That was an earlier version of this MR.) ### History This MR could be considered a v2 of #80263. Thanks to everyone who commented there. In particular, thanks to `@m-ou-se,` `@Mark-Simulacrum` and `@hyd-dev.` (Tagging you since I think you might be interested in this new MR.) Compared to #80263, this MR has very substantial changes and additions. Additionally, I have recently (2021-04-20) completely revised this series following very helpful comments from `@m-ou-se.` r? `@m-ou-se`
diff --git a/library/std/src/os/unix/process.rs b/library/std/src/os/unix/process.rs
@@ -75,6 +75,12 @@ pub trait CommandExt: Sealed {
     /// sure that the closure does not violate library invariants by making
     /// invalid use of these duplicates.
     ///
+    /// Panicking in the closure is safe only if all the format arguments for the
+    /// panic message can be safely formatted; this is because although
+    /// `Command` calls [`std::panic::always_abort`](crate::panic::always_abort)
+    /// before calling the pre_exec hook, panic will still try to format the
+    /// panic message.
+    ///
     /// When this closure is run, aspects such as the stdio file descriptors and
     /// working directory have successfully been changed, so output to these
     /// locations may not appear where intended.
diff --git a/library/std/src/panic.rs b/library/std/src/panic.rs
@@ -463,5 +463,41 @@ pub fn resume_unwind(payload: Box<dyn Any + Send>) -> ! {
     panicking::rust_panic_without_hook(payload)
 }
 
+/// Make all future panics abort directly without running the panic hook or unwinding.
+///
+/// There is no way to undo this; the effect lasts until the process exits or
+/// execs (or the equivalent).
+///
+/// # Use after fork
+///
+/// This function is particularly useful for calling after `libc::fork`.  After `fork`, in a
+/// multithreaded program it is (on many platforms) not safe to call the allocator.  It is also
+/// generally highly undesirable for an unwind to unwind past the `fork`, because that results in
+/// the unwind propagating to code that was only ever expecting to run in the parent.
+///
+/// `panic::always_abort()` helps avoid both of these.  It directly avoids any further unwinding,
+/// and if there is a panic, the abort will occur without allocating provided that the arguments to
+/// panic can be formatted without allocating.
+///
+/// Examples
+///
+/// ```no_run
+/// #![feature(panic_always_abort)]
+/// use std::panic;
+///
+/// panic::always_abort();
+///
+/// let _ = panic::catch_unwind(|| {
+///     panic!("inside the catch");
+/// });
+///
+/// // We will have aborted already, due to the panic.
+/// unreachable!();
+/// ```
+#[unstable(feature = "panic_always_abort", issue = "84438")]
+pub fn always_abort() {
+    crate::panicking::panic_count::set_always_abort();
+}
+
 #[cfg(test)]
 mod tests;
diff --git a/library/std/src/panicking.rs b/library/std/src/panicking.rs
@@ -180,7 +180,7 @@ pub fn take_hook() -> Box<dyn Fn(&PanicInfo<'_>) + 'static + Sync + Send> {
 fn default_hook(info: &PanicInfo<'_>) {
     // If this is a double panic, make sure that we print a backtrace
     // for this panic. Otherwise only print it if logging is enabled.
-    let backtrace_env = if panic_count::get() >= 2 {
+    let backtrace_env = if panic_count::get_count() >= 2 {
         RustBacktrace::Print(crate::backtrace_rs::PrintFmt::Full)
     } else {
         backtrace::rust_backtrace_env()
@@ -233,6 +233,8 @@ pub mod panic_count {
     use crate::cell::Cell;
     use crate::sync::atomic::{AtomicUsize, Ordering};
 
+    pub const ALWAYS_ABORT_FLAG: usize = 1 << (usize::BITS - 1);
+
     // Panic count for the current thread.
     thread_local! { static LOCAL_PANIC_COUNT: Cell<usize> = Cell::new(0) }
 
@@ -241,33 +243,53 @@ pub mod panic_count {
     // thread, if that thread currently views `GLOBAL_PANIC_COUNT` as being zero,
     // then `LOCAL_PANIC_COUNT` in that thread is zero. This invariant holds before
     // and after increase and decrease, but not necessarily during their execution.
+    //
+    // Additionally, the top bit of GLOBAL_PANIC_COUNT (GLOBAL_ALWAYS_ABORT_FLAG)
+    // records whether panic::always_abort() has been called.  This can only be
+    // set, never cleared.
+    //
+    // This could be viewed as a struct containing a single bit and an n-1-bit
+    // value, but if we wrote it like that it would be more than a single word,
+    // and even a newtype around usize would be clumsy because we need atomics.
+    // But we use such a tuple for the return type of increase().
+    //
+    // Stealing a bit is fine because it just amounts to assuming that each
+    // panicking thread consumes at least 2 bytes of address space.
     static GLOBAL_PANIC_COUNT: AtomicUsize = AtomicUsize::new(0);
 
-    pub fn increase() -> usize {
-        GLOBAL_PANIC_COUNT.fetch_add(1, Ordering::Relaxed);
-        LOCAL_PANIC_COUNT.with(|c| {
-            let next = c.get() + 1;
-            c.set(next);
-            next
-        })
+    pub fn increase() -> (bool, usize) {
+        (
+            GLOBAL_PANIC_COUNT.fetch_add(1, Ordering::Relaxed) & ALWAYS_ABORT_FLAG != 0,
+            LOCAL_PANIC_COUNT.with(|c| {
+                let next = c.get() + 1;
+                c.set(next);
+                next
+            }),
+        )
     }
 
-    pub fn decrease() -> usize {
+    pub fn decrease() {
         GLOBAL_PANIC_COUNT.fetch_sub(1, Ordering::Relaxed);
         LOCAL_PANIC_COUNT.with(|c| {
             let next = c.get() - 1;
             c.set(next);
             next
-        })
+        });
+    }
+
+    pub fn set_always_abort() {
+        GLOBAL_PANIC_COUNT.fetch_or(ALWAYS_ABORT_FLAG, Ordering::Relaxed);
     }
 
-    pub fn get() -> usize {
+    // Disregards ALWAYS_ABORT_FLAG
+    pub fn get_count() -> usize {
         LOCAL_PANIC_COUNT.with(|c| c.get())
     }
 
+    // Disregards ALWAYS_ABORT_FLAG
     #[inline]
-    pub fn is_zero() -> bool {
-        if GLOBAL_PANIC_COUNT.load(Ordering::Relaxed) == 0 {
+    pub fn count_is_zero() -> bool {
+        if GLOBAL_PANIC_COUNT.load(Ordering::Relaxed) & !ALWAYS_ABORT_FLAG == 0 {
             // Fast path: if `GLOBAL_PANIC_COUNT` is zero, all threads
             // (including the current one) will have `LOCAL_PANIC_COUNT`
             // equal to zero, so TLS access can be avoided.
@@ -410,7 +432,7 @@ pub unsafe fn r#try<R, F: FnOnce() -> R>(f: F) -> Result<R, Box<dyn Any + Send>>
 /// Determines whether the current thread is unwinding because of panic.
 #[inline]
 pub fn panicking() -> bool {
-    !panic_count::is_zero()
+    !panic_count::count_is_zero()
 }
 
 /// The entry point for panicking with a formatted message.
@@ -563,15 +585,27 @@ fn rust_panic_with_hook(
     message: Option<&fmt::Arguments<'_>>,
     location: &Location<'_>,
 ) -> ! {
-    let panics = panic_count::increase();
+    let (must_abort, panics) = panic_count::increase();
 
     // If this is the third nested call (e.g., panics == 2, this is 0-indexed),
     // the panic hook probably triggered the last panic, otherwise the
     // double-panic check would have aborted the process. In this case abort the
     // process real quickly as we don't want to try calling it again as it'll
     // probably just panic again.
-    if panics > 2 {
-        util::dumb_print(format_args!("thread panicked while processing panic. aborting.\n"));
+    if must_abort || panics > 2 {
+        if panics > 2 {
+            // Don't try to print the message in this case
+            // - perhaps that is causing the recursive panics.
+            util::dumb_print(format_args!("thread panicked while processing panic. aborting.\n"));
+        } else {
+            // Unfortunately, this does not print a backtrace, because creating
+            // a `Backtrace` will allocate, which we must to avoid here.
+            let panicinfo = PanicInfo::internal_constructor(message, location);
+            util::dumb_print(format_args!(
+                "{}\npanicked after panic::always_abort(), aborting.\n",
+                panicinfo
+            ));
+        }
         intrinsics::abort()
     }
 
diff --git a/library/std/src/sys/unix/process/process_unix.rs b/library/std/src/sys/unix/process/process_unix.rs
@@ -54,6 +54,7 @@ impl Command {
         let (env_lock, pid) = unsafe { (sys::os::env_read_lock(), cvt(libc::fork())?) };
 
         if pid == 0 {
+            crate::panic::always_abort();
             mem::forget(env_lock);
             drop(input);
             let Err(err) = unsafe { self.do_exec(theirs, envp.as_ref()) };
diff --git a/library/std/src/sys/unix/process/process_unix/tests.rs b/library/std/src/sys/unix/process/process_unix/tests.rs
@@ -1,3 +1,10 @@
+use crate::os::unix::process::{CommandExt, ExitStatusExt};
+use crate::panic::catch_unwind;
+use crate::process::Command;
+
+// Many of the other aspects of this situation, including heap alloc concurrency
+// safety etc., are tested in src/test/ui/process/process-panic-after-fork.rs
+
 #[test]
 fn exitstatus_display_tests() {
     // In practice this is the same on every Unix.
@@ -28,3 +35,23 @@ fn exitstatus_display_tests() {
         t(0x000ff, "unrecognised wait status: 255 0xff");
     }
 }
+
+#[test]
+#[cfg_attr(target_os = "emscripten", ignore)]
+fn test_command_fork_no_unwind() {
+    let got = catch_unwind(|| {
+        let mut c = Command::new("echo");
+        c.arg("hi");
+        unsafe {
+            c.pre_exec(|| panic!("{}", "crash now!"));
+        }
+        let st = c.status().expect("failed to get command status");
+        dbg!(st);
+        st
+    });
+    dbg!(&got);
+    let status = got.expect("panic unexpectedly propagated");
+    dbg!(status);
+    let signal = status.signal().expect("expected child process to die of signal");
+    assert!(signal == libc::SIGABRT || signal == libc::SIGILL || signal == libc::SIGTRAP);
+}
diff --git a/src/test/ui/panics/abort-on-panic.rs b/src/test/ui/panics/abort-on-panic.rs
@@ -2,6 +2,7 @@
 
 #![allow(unused_must_use)]
 #![feature(unwind_attributes)]
+#![feature(panic_always_abort)]
 // Since we mark some ABIs as "nounwind" to LLVM, we must make sure that
 // we never unwind through them.
 
@@ -11,7 +12,9 @@
 use std::{env, panic};
 use std::io::prelude::*;
 use std::io;
-use std::process::{Command, Stdio};
+use std::process::{exit, Command, Stdio};
+use std::sync::{Arc, Barrier};
+use std::thread;
 
 #[unwind(aborts)] // FIXME(#58794) should work even without the attribute
 extern "C" fn panic_in_ffi() {
@@ -23,41 +26,77 @@ extern "Rust" fn panic_in_rust_abi() {
     panic!("TestRust");
 }
 
-fn test() {
-    let _ = panic::catch_unwind(|| { panic_in_ffi(); });
-    // The process should have aborted by now.
+fn should_have_aborted() {
     io::stdout().write(b"This should never be printed.\n");
     let _ = io::stdout().flush();
 }
 
+fn bomb_out_but_not_abort(msg: &str) {
+    eprintln!("bombing out: {}", msg);
+    exit(1);
+}
+
+fn test() {
+    let _ = panic::catch_unwind(|| { panic_in_ffi(); });
+    should_have_aborted();
+}
+
 fn testrust() {
     let _ = panic::catch_unwind(|| { panic_in_rust_abi(); });
-    // The process should have aborted by now.
-    io::stdout().write(b"This should never be printed.\n");
-    let _ = io::stdout().flush();
+    should_have_aborted();
+}
+
+fn test_always_abort() {
+    panic::always_abort();
+    let _ = panic::catch_unwind(|| { panic!(); });
+    should_have_aborted();
+}
+
+fn test_always_abort_thread() {
+    let barrier = Arc::new(Barrier::new(2));
+    let thr = {
+        let barrier = barrier.clone();
+        thread::spawn(move ||{
+            barrier.wait();
+            panic!("in thread");
+        })
+    };
+    panic::always_abort();
+    barrier.wait();
+    let _ = thr.join();
+    bomb_out_but_not_abort("joined - but we were supposed to panic!");
 }
 
 fn main() {
+    let tests: &[(_, fn())] = &[
+        ("test", test),
+        ("testrust", testrust),
+        ("test_always_abort", test_always_abort),
+        ("test_always_abort_thread", test_always_abort_thread),
+    ];
+
     let args: Vec<String> = env::args().collect();
     if args.len() > 1 {
         // This is inside the self-executed command.
-        match &*args[1] {
-            "test" => return test(),
-            "testrust" => return testrust(),
-            _ => panic!("bad test"),
+        for (a,f) in tests {
+            if &args[1] == a { return f() }
         }
+        bomb_out_but_not_abort("bad test");
     }
 
-    // These end up calling the self-execution branches above.
-    let mut p = Command::new(&args[0])
-                        .stdout(Stdio::piped())
-                        .stdin(Stdio::piped())
-                        .arg("test").spawn().unwrap();
-    assert!(!p.wait().unwrap().success());
-
-    let mut p = Command::new(&args[0])
-                        .stdout(Stdio::piped())
-                        .stdin(Stdio::piped())
-                        .arg("testrust").spawn().unwrap();
-    assert!(!p.wait().unwrap().success());
+    let execute_self_expecting_abort = |arg| {
+        let mut p = Command::new(&args[0])
+                            .stdout(Stdio::piped())
+                            .stdin(Stdio::piped())
+                            .arg(arg).spawn().unwrap();
+        let status = p.wait().unwrap();
+        assert!(!status.success());
+        // Any reasonable platform can distinguish a process which
+        // called exit(1) from one which panicked.
+        assert_ne!(status.code(), Some(1));
+    };
+
+    for (a,_f) in tests {
+        execute_self_expecting_abort(a);
+    }
 }
diff --git a/src/test/ui/process/process-panic-after-fork.rs b/src/test/ui/process/process-panic-after-fork.rs