From ca68d4d7a2975ffd09f0125ac03c6700e728e0c8 Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 17 Oct 2023 17:33:13 +0200 Subject: [PATCH 1/4] std: begin unifying TLS destructor lists --- .../sys/pal/common/thread_local/fast_local.rs | 85 +++++++++++++++---- .../src/sys/pal/common/thread_local/mod.rs | 23 +---- library/std/src/thread/mod.rs | 2 +- 3 files changed, 69 insertions(+), 41 deletions(-) diff --git a/library/std/src/sys/pal/common/thread_local/fast_local.rs b/library/std/src/sys/pal/common/thread_local/fast_local.rs index 04c0dd6f75090..629e0ff0a57f3 100644 --- a/library/std/src/sys/pal/common/thread_local/fast_local.rs +++ b/library/std/src/sys/pal/common/thread_local/fast_local.rs @@ -1,6 +1,5 @@ use super::lazy::LazyKeyInner; -use crate::cell::Cell; -use crate::sys::thread_local_dtor::register_dtor; +use crate::cell::{Cell, RefCell}; use crate::{fmt, mem, panic}; #[doc(hidden)] @@ -39,13 +38,11 @@ pub macro thread_local_inner { // Safety: Performs `drop_in_place(ptr as *mut $t)`, and requires // all that comes with it. - unsafe extern "C" fn destroy(ptr: *mut $crate::primitive::u8) { - $crate::thread::local_impl::abort_on_dtor_unwind(|| { - let old_state = STATE.replace(2); - $crate::debug_assert_eq!(old_state, 1); - // Safety: safety requirement is passed on to caller. - unsafe { $crate::ptr::drop_in_place(ptr.cast::<$t>()); } - }); + unsafe fn destroy(ptr: *mut $crate::primitive::u8) { + let old_state = STATE.replace(2); + $crate::debug_assert_eq!(old_state, 1); + // Safety: safety requirement is passed on to caller. + unsafe { $crate::ptr::drop_in_place(ptr.cast::<$t>()); } } unsafe { @@ -155,8 +152,8 @@ impl Key { // note that this is just a publicly-callable function only for the // const-initialized form of thread locals, basically a way to call the - // free `register_dtor` function defined elsewhere in std. - pub unsafe fn register_dtor(a: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { + // free `register_dtor` function. + pub unsafe fn register_dtor(a: *mut u8, dtor: unsafe fn(*mut u8)) { unsafe { register_dtor(a, dtor); } @@ -220,7 +217,7 @@ impl Key { } } -unsafe extern "C" fn destroy_value(ptr: *mut u8) { +unsafe fn destroy_value(ptr: *mut u8) { let ptr = ptr as *mut Key; // SAFETY: @@ -233,14 +230,66 @@ unsafe extern "C" fn destroy_value(ptr: *mut u8) { // `Option` to `None`, and `dtor_state` to `RunningOrHasRun`. This // causes future calls to `get` to run `try_initialize_drop` again, // which will now fail, and return `None`. - // - // Wrap the call in a catch to ensure unwinding is caught in the event - // a panic takes place in a destructor. - if let Err(_) = panic::catch_unwind(panic::AssertUnwindSafe(|| unsafe { + unsafe { let value = (*ptr).inner.take(); (*ptr).dtor_state.set(DtorState::RunningOrHasRun); drop(value); - })) { - rtabort!("thread local panicked on drop"); + } +} + +#[thread_local] +static DTORS: RefCell> = RefCell::new(Vec::new()); + +// Ensure this can never be inlined on Windows because otherwise this may break +// in dylibs. See #44391. +#[cfg_attr(windows, inline(never))] +unsafe fn register_dtor(t: *mut u8, dtor: unsafe fn(*mut u8)) { + // Ensure that destructors are run on thread exit. + crate::sys::thread_local_guard::activate(); + + let mut dtors = match DTORS.try_borrow_mut() { + Ok(dtors) => dtors, + // The only place this function can be called reentrantly is inside the + // heap allocator. This is currently forbidden. + Err(_) => rtabort!("the global allocator may not register TLS destructors"), + }; + dtors.push((t, dtor)); +} + +/// Called by the platform on thread exit to run all registered destructors. +/// The signature was chosen so that this function may be passed as a callback +/// to platform functions. The argument is ignored. +/// +/// # Safety +/// May only be called on thread exit. In particular, no thread locals may +/// currently be referenced. +pub unsafe extern "C" fn run_dtors(_unused: *mut u8) { + // This function must not unwind. This is ensured by the `extern "C"` ABI, + // but by catching the unwind, we can print a more helpful message. + + match panic::catch_unwind(|| { + let dtors = &DTORS; + + loop { + // Ensure that the `RefMut` guard is not held while the destructor is + // executed to allow initializing TLS variables in destructors. + let (t, dtor) = { + let mut dtors = dtors.borrow_mut(); + match dtors.pop() { + Some(entry) => entry, + None => break, + } + }; + + unsafe { + (dtor)(t); + } + } + + // All destructors were run, deallocate the list. + drop(dtors.replace(Vec::new())); + }) { + Ok(()) => {} + Err(_) => rtabort!("thread local panicked on drop"), } } diff --git a/library/std/src/sys/pal/common/thread_local/mod.rs b/library/std/src/sys/pal/common/thread_local/mod.rs index 8b2c839f837d4..ac6ad4ff889e4 100644 --- a/library/std/src/sys/pal/common/thread_local/mod.rs +++ b/library/std/src/sys/pal/common/thread_local/mod.rs @@ -15,7 +15,7 @@ cfg_if::cfg_if! { #[doc(hidden)] mod fast_local; #[doc(hidden)] - pub use fast_local::{Key, thread_local_inner}; + pub use fast_local::{Key, thread_local_inner, run_dtors}; } else { #[doc(hidden)] mod os_local; @@ -101,24 +101,3 @@ mod lazy { } } } - -/// Run a callback in a scenario which must not unwind (such as a `extern "C" -/// fn` declared in a user crate). If the callback unwinds anyway, then -/// `rtabort` with a message about thread local panicking on drop. -#[inline] -pub fn abort_on_dtor_unwind(f: impl FnOnce()) { - // Using a guard like this is lower cost. - let guard = DtorUnwindGuard; - f(); - core::mem::forget(guard); - - struct DtorUnwindGuard; - impl Drop for DtorUnwindGuard { - #[inline] - fn drop(&mut self) { - // This is not terribly descriptive, but it doesn't need to be as we'll - // already have printed a panic message at this point. - rtabort!("thread local panicked on drop"); - } - } -} diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 4f0f010984ab9..2cc2bf3f90a96 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -205,7 +205,7 @@ cfg_if::cfg_if! { #[doc(hidden)] #[unstable(feature = "thread_local_internals", issue = "none")] pub mod local_impl { - pub use crate::sys::common::thread_local::{thread_local_inner, Key, abort_on_dtor_unwind}; + pub use crate::sys::common::thread_local::{thread_local_inner, Key}; } } } From 13cc6af2b548ba4c57700cd21c2d407e903931dc Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 17 Oct 2023 17:37:22 +0200 Subject: [PATCH 2/4] std: move UNIX to new destructor list implementation --- library/std/src/sys/pal/unix/mod.rs | 2 +- .../std/src/sys/pal/unix/thread_local_dtor.rs | 97 +++++++------- .../src/sys/pal/unix/thread_local_guard.rs | 118 ++++++++++++++++++ library/std/src/sys_common/mod.rs | 1 - .../std/src/sys_common/thread_local_dtor.rs | 56 --------- 5 files changed, 163 insertions(+), 111 deletions(-) create mode 100644 library/std/src/sys/pal/unix/thread_local_guard.rs delete mode 100644 library/std/src/sys_common/thread_local_dtor.rs diff --git a/library/std/src/sys/pal/unix/mod.rs b/library/std/src/sys/pal/unix/mod.rs index 976a437c17ff9..bd970d1190b0e 100644 --- a/library/std/src/sys/pal/unix/mod.rs +++ b/library/std/src/sys/pal/unix/mod.rs @@ -33,7 +33,7 @@ pub mod rand; pub mod stack_overflow; pub mod stdio; pub mod thread; -pub mod thread_local_dtor; +pub mod thread_local_guard; pub mod thread_local_key; pub mod thread_parking; pub mod time; diff --git a/library/std/src/sys/pal/unix/thread_local_dtor.rs b/library/std/src/sys/pal/unix/thread_local_dtor.rs index 8857f96501c19..03651174bc9fc 100644 --- a/library/std/src/sys/pal/unix/thread_local_dtor.rs +++ b/library/std/src/sys/pal/unix/thread_local_dtor.rs @@ -1,8 +1,10 @@ +//! Ensures that thread-local destructors are run on thread exit. + #![cfg(target_thread_local)] #![unstable(feature = "thread_local_internals", issue = "none")] -//! Provides thread-local destructors without an associated "key", which -//! can be more efficient. +use crate::ptr; +use crate::sys::common::thread_local::run_dtors; // Since what appears to be glibc 2.18 this symbol has been shipped which // GCC and clang both use to invoke destructors in thread_local globals, so @@ -23,9 +25,10 @@ // FIXME: The Rust compiler currently omits weakly function definitions (i.e., // __cxa_thread_atexit_impl) and its metadata from LLVM IR. #[no_sanitize(cfi, kcfi)] -pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { +pub fn activate() { + use crate::cell::Cell; use crate::mem; - use crate::sys_common::thread_local_dtor::register_dtor_fallback; + use crate::sys_common::thread_local_key::StaticKey; /// This is necessary because the __cxa_thread_atexit_impl implementation /// std links to by default may be a C or C++ implementation that was not @@ -50,64 +53,47 @@ pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { >; } - if let Some(f) = __cxa_thread_atexit_impl { - unsafe { - f( - mem::transmute::< - unsafe extern "C" fn(*mut u8), - unsafe extern "C" fn(*mut libc::c_void), - >(dtor), - t.cast(), - &__dso_handle as *const _ as *mut _, - ); + unsafe { + if let Some(atexit) = __cxa_thread_atexit_impl { + #[thread_local] + static REGISTERED: Cell = Cell::new(false); + if !REGISTERED.get() { + atexit( + mem::transmute::< + unsafe extern "C" fn(*mut u8), + unsafe extern "C" fn(*mut libc::c_void), + >(run_dtors), + ptr::null_mut(), + &__dso_handle as *const _ as *mut _, + ); + REGISTERED.set(true); + } + } else { + static KEY: StaticKey = StaticKey::new(Some(run_dtors)); + + KEY.set(ptr::invalid_mut(1)); } - return; } - register_dtor_fallback(t, dtor); } -// This implementation is very similar to register_dtor_fallback in -// sys_common/thread_local.rs. The main difference is that we want to hook into -// macOS's analog of the above linux function, _tlv_atexit. OSX will run the -// registered dtors before any TLS slots get freed, and when the main thread +// We hook into macOS's analog of the above linux function, _tlv_atexit. OSX +// will run `run_dtors` before any TLS slots get freed, and when the main thread // exits. -// -// Unfortunately, calling _tlv_atexit while tls dtors are running is UB. The -// workaround below is to register, via _tlv_atexit, a custom DTOR list once per -// thread. thread_local dtors are pushed to the DTOR list without calling -// _tlv_atexit. #[cfg(any(target_os = "macos", target_os = "ios", target_os = "watchos", target_os = "tvos"))] -pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - use crate::cell::{Cell, RefCell}; - use crate::ptr; - - #[thread_local] - static REGISTERED: Cell = Cell::new(false); - - #[thread_local] - static DTORS: RefCell> = RefCell::new(Vec::new()); - - if !REGISTERED.get() { - _tlv_atexit(run_dtors, ptr::null_mut()); - REGISTERED.set(true); - } +pub fn activate() { + use crate::cell::Cell; extern "C" { fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8); } - match DTORS.try_borrow_mut() { - Ok(mut dtors) => dtors.push((t, dtor)), - Err(_) => rtabort!("global allocator may not use TLS"), - } + #[thread_local] + static REGISTERED: Cell = Cell::new(false); - unsafe extern "C" fn run_dtors(_: *mut u8) { - let mut list = DTORS.take(); - while !list.is_empty() { - for (ptr, dtor) in list { - dtor(ptr); - } - list = DTORS.take(); + if !REGISTERED.get() { + unsafe { + _tlv_atexit(run_dtors, ptr::null_mut()); + REGISTERED.set(true); } } } @@ -120,7 +106,12 @@ pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { target_os = "freebsd", ))] #[cfg_attr(target_family = "wasm", allow(unused))] // might remain unused depending on target details (e.g. wasm32-unknown-emscripten) -pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - use crate::sys_common::thread_local_dtor::register_dtor_fallback; - register_dtor_fallback(t, dtor); +pub fn activate() { + use crate::sys_common::thread_local_key::StaticKey; + + static KEY: StaticKey = StaticKey::new(Some(run_dtors)); + + unsafe { + KEY.set(ptr::invalid_mut(1)); + } } diff --git a/library/std/src/sys/pal/unix/thread_local_guard.rs b/library/std/src/sys/pal/unix/thread_local_guard.rs new file mode 100644 index 0000000000000..9ce13c19ead42 --- /dev/null +++ b/library/std/src/sys/pal/unix/thread_local_guard.rs @@ -0,0 +1,118 @@ +//! Ensures that thread-local destructors are run on thread exit. + +#![cfg(target_thread_local)] +#![unstable(feature = "thread_local_internals", issue = "none")] + +use crate::ptr; +use crate::sys::common::thread_local::run_dtors; + +// Since what appears to be glibc 2.18 this symbol has been shipped which +// GCC and clang both use to invoke destructors in thread_local globals, so +// let's do the same! +// +// Note, however, that we run on lots older linuxes, as well as cross +// compiling from a newer linux to an older linux, so we also have a +// fallback implementation to use as well. +#[cfg_attr(bootstrap, allow(unexpected_cfgs))] +#[cfg(any( + target_os = "linux", + target_os = "android", + target_os = "fuchsia", + target_os = "redox", + target_os = "hurd", + target_os = "freebsd", + target_os = "netbsd", + target_os = "dragonfly" +))] +// FIXME: The Rust compiler currently omits weakly function definitions (i.e., +// __cxa_thread_atexit_impl) and its metadata from LLVM IR. +#[no_sanitize(cfi, kcfi)] +pub fn activate() { + use crate::cell::Cell; + use crate::mem; + use crate::sys_common::thread_local_key::StaticKey; + + /// This is necessary because the __cxa_thread_atexit_impl implementation + /// std links to by default may be a C or C++ implementation that was not + /// compiled using the Clang integer normalization option. + #[cfg(sanitizer_cfi_normalize_integers)] + use core::ffi::c_int; + #[cfg(not(sanitizer_cfi_normalize_integers))] + #[cfi_encoding = "i"] + #[repr(transparent)] + pub struct c_int(pub libc::c_int); + + extern "C" { + #[linkage = "extern_weak"] + static __dso_handle: *mut u8; + #[linkage = "extern_weak"] + static __cxa_thread_atexit_impl: Option< + extern "C" fn( + unsafe extern "C" fn(*mut libc::c_void), + *mut libc::c_void, + *mut libc::c_void, + ) -> c_int, + >; + } + + unsafe { + if let Some(atexit) = __cxa_thread_atexit_impl { + #[thread_local] + static REGISTERED: Cell = Cell::new(false); + if !REGISTERED.get() { + atexit( + mem::transmute::< + unsafe extern "C" fn(*mut u8), + unsafe extern "C" fn(*mut libc::c_void), + >(run_dtors), + ptr::null_mut(), + &__dso_handle as *const _ as *mut _, + ); + REGISTERED.set(true); + } + } else { + static KEY: StaticKey = StaticKey::new(Some(run_dtors)); + + KEY.set(ptr::invalid_mut(1)); + } + } +} + +// We hook into macOS's analog of the above linux function, _tlv_atexit. OSX +// will run `run_dtors` before any TLS slots get freed, and when the main thread +// exits. +#[cfg(any(target_os = "macos", target_os = "ios", target_os = "watchos", target_os = "tvos"))] +pub fn activate() { + use crate::cell::Cell; + + extern "C" { + fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8); + } + + #[thread_local] + static REGISTERED: Cell = Cell::new(false); + + if !REGISTERED.get() { + unsafe { + _tlv_atexit(run_dtors, ptr::null_mut()); + REGISTERED.set(true); + } + } +} + +#[cfg(any( + target_os = "vxworks", + target_os = "horizon", + target_os = "emscripten", + target_os = "aix" +))] +#[cfg_attr(target_family = "wasm", allow(unused))] // might remain unused depending on target details (e.g. wasm32-unknown-emscripten) +pub fn activate() { + use crate::sys_common::thread_local_key::StaticKey; + + static KEY: StaticKey = StaticKey::new(Some(run_dtors)); + + unsafe { + KEY.set(ptr::invalid_mut(1)); + } +} diff --git a/library/std/src/sys_common/mod.rs b/library/std/src/sys_common/mod.rs index 01f83ecb41452..3e74e7f19d55e 100644 --- a/library/std/src/sys_common/mod.rs +++ b/library/std/src/sys_common/mod.rs @@ -29,7 +29,6 @@ pub mod once; pub mod process; pub mod thread; pub mod thread_info; -pub mod thread_local_dtor; pub mod thread_parking; pub mod wstr; pub mod wtf8; diff --git a/library/std/src/sys_common/thread_local_dtor.rs b/library/std/src/sys_common/thread_local_dtor.rs deleted file mode 100644 index 98382fc6acc23..0000000000000 --- a/library/std/src/sys_common/thread_local_dtor.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! Thread-local destructor -//! -//! Besides thread-local "keys" (pointer-sized non-addressable thread-local store -//! with an associated destructor), many platforms also provide thread-local -//! destructors that are not associated with any particular data. These are -//! often more efficient. -//! -//! This module provides a fallback implementation for that interface, based -//! on the less efficient thread-local "keys". Each platform provides -//! a `thread_local_dtor` module which will either re-export the fallback, -//! or implement something more efficient. - -#![unstable(feature = "thread_local_internals", issue = "none")] -#![allow(dead_code)] - -use crate::cell::RefCell; -use crate::ptr; -use crate::sys_common::thread_local_key::StaticKey; - -pub unsafe fn register_dtor_fallback(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - // The fallback implementation uses a vanilla OS-based TLS key to track - // the list of destructors that need to be run for this thread. The key - // then has its own destructor which runs all the other destructors. - // - // The destructor for DTORS is a little special in that it has a `while` - // loop to continuously drain the list of registered destructors. It - // *should* be the case that this loop always terminates because we - // provide the guarantee that a TLS key cannot be set after it is - // flagged for destruction. - - static DTORS: StaticKey = StaticKey::new(Some(run_dtors)); - // FIXME(joboet): integrate RefCell into pointer to avoid infinite recursion - // when the global allocator tries to register a destructor and just panic - // instead. - type List = RefCell>; - if DTORS.get().is_null() { - let v: Box = Box::new(RefCell::new(Vec::new())); - DTORS.set(Box::into_raw(v) as *mut u8); - } - let list = &*(DTORS.get() as *const List); - match list.try_borrow_mut() { - Ok(mut dtors) => dtors.push((t, dtor)), - Err(_) => rtabort!("global allocator may not use TLS"), - } - - unsafe extern "C" fn run_dtors(mut ptr: *mut u8) { - while !ptr.is_null() { - let list = Box::from_raw(ptr as *mut List).into_inner(); - for (ptr, dtor) in list.into_iter() { - dtor(ptr); - } - ptr = DTORS.get(); - DTORS.set(ptr::null_mut()); - } - } -} From 2e425cfd0eb7965fb2b17177e024689291125f2b Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 17 Oct 2023 17:44:17 +0200 Subject: [PATCH 3/4] std: refactor Windows TLS destructor support Now that the fallback code has been removed, there are no users of `StaticKey` left for targets with native TLS. Therefore, move the at-exit hack to the thread-local guard module, where it can be shared by both implementations, and cfg-out the key-based TLS when it's not needed. --- library/std/src/sys/pal/windows/c.rs | 1 + library/std/src/sys/pal/windows/mod.rs | 2 +- .../src/sys/pal/windows/thread_local_dtor.rs | 7 - .../src/sys/pal/windows/thread_local_guard.rs | 120 +++++++++++++ .../src/sys/pal/windows/thread_local_key.rs | 169 +----------------- library/std/src/sys_common/mod.rs | 1 + src/tools/miri/src/shims/tls.rs | 2 +- 7 files changed, 130 insertions(+), 172 deletions(-) delete mode 100644 library/std/src/sys/pal/windows/thread_local_dtor.rs create mode 100644 library/std/src/sys/pal/windows/thread_local_guard.rs diff --git a/library/std/src/sys/pal/windows/c.rs b/library/std/src/sys/pal/windows/c.rs index 1a59ac9a9cadf..12c2a4989c2aa 100644 --- a/library/std/src/sys/pal/windows/c.rs +++ b/library/std/src/sys/pal/windows/c.rs @@ -55,6 +55,7 @@ pub const EXIT_FAILURE: u32 = 1; pub const CONDITION_VARIABLE_INIT: CONDITION_VARIABLE = CONDITION_VARIABLE { Ptr: ptr::null_mut() }; pub const SRWLOCK_INIT: SRWLOCK = SRWLOCK { Ptr: ptr::null_mut() }; +#[cfg(not(target_thread_local))] // Only used by key-based TLS. pub const INIT_ONCE_STATIC_INIT: INIT_ONCE = INIT_ONCE { Ptr: ptr::null_mut() }; // Some windows_sys types have different signs than the types we use. diff --git a/library/std/src/sys/pal/windows/mod.rs b/library/std/src/sys/pal/windows/mod.rs index 726a4509f280f..20a0ccda525e1 100644 --- a/library/std/src/sys/pal/windows/mod.rs +++ b/library/std/src/sys/pal/windows/mod.rs @@ -28,7 +28,7 @@ pub mod process; pub mod rand; pub mod stdio; pub mod thread; -pub mod thread_local_dtor; +pub mod thread_local_guard; pub mod thread_local_key; pub mod thread_parking; pub mod time; diff --git a/library/std/src/sys/pal/windows/thread_local_dtor.rs b/library/std/src/sys/pal/windows/thread_local_dtor.rs deleted file mode 100644 index cf542d2bfb838..0000000000000 --- a/library/std/src/sys/pal/windows/thread_local_dtor.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Implements thread-local destructors that are not associated with any -//! particular data. - -#![unstable(feature = "thread_local_internals", issue = "none")] -#![cfg(target_thread_local)] - -pub use super::thread_local_key::register_keyless_dtor as register_dtor; diff --git a/library/std/src/sys/pal/windows/thread_local_guard.rs b/library/std/src/sys/pal/windows/thread_local_guard.rs new file mode 100644 index 0000000000000..712e502a2e3f0 --- /dev/null +++ b/library/std/src/sys/pal/windows/thread_local_guard.rs @@ -0,0 +1,120 @@ +//! A TLS destructor system. +//! +//! Turns out, like pretty much everything, Windows is pretty close the +//! functionality that Unix provides, but slightly different! In the case of +//! TLS, Windows does not provide an API to provide a destructor for a TLS +//! variable. This ends up being pretty crucial to this implementation, so we +//! need a way around this. +//! +//! The solution here ended up being a little obscure, but fear not, the +//! internet has informed me [1][2] that this solution is not unique (no way +//! I could have thought of it as well!). The key idea is to insert some hook +//! somewhere to run arbitrary code on thread termination. With this in place +//! we'll be able to run anything we like, including all TLS destructors! +//! +//! If you're looking at this code, and wondering "what is this doing?", +//! you're not alone! I'll try to break this down step by step: +//! +//! # What's up with CRT$XLB? +//! +//! For anything about TLS destructors to work on Windows, we have to be able +//! to run *something* when a thread exits. To do so, we place a very special +//! static in a very special location. If this is encoded in just the right +//! way, the kernel's loader is apparently nice enough to run some function +//! of ours whenever a thread exits! How nice of the kernel! +//! +//! Lots of detailed information can be found in source [1] above, but the +//! gist of it is that this is leveraging a feature of Microsoft's PE format +//! (executable format) which is not actually used by any compilers today. +//! This apparently translates to any callbacks in the ".CRT$XLB" section +//! being run on certain events. +//! +//! So after all that, we use the compiler's #[link_section] feature to place +//! a callback pointer into the magic section so it ends up being called. +//! +//! # What's up with this callback? +//! +//! The callback specified receives a number of parameters from... someone! +//! (the kernel? the runtime? I'm not quite sure!) There are a few events that +//! this gets invoked for, but we're currently only interested on when a +//! thread or a process "detaches" (exits). The process part happens for the +//! last thread and the thread part happens for any normal thread. +//! +//! # The article mentions weird stuff about "/INCLUDE"? +//! +//! It sure does! Specifically we're talking about this quote: +//! +//! > The Microsoft run-time library facilitates this process by defining a +//! > memory image of the TLS Directory and giving it the special name +//! > “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The +//! > linker looks for this memory image and uses the data there to create the +//! > TLS Directory. Other compilers that support TLS and work with the +//! > Microsoft linker must use this same technique. +//! +//! Basically what this means is that if we want support for our TLS +//! destructors/our hook being called then we need to make sure the linker does +//! not omit this symbol. Otherwise it will omit it and our callback won't be +//! wired up. +//! +//! We don't actually use the `/INCLUDE` linker flag here like the article +//! mentions because the Rust compiler doesn't propagate linker flags, but +//! instead we use a shim function which performs a volatile 1-byte load from +//! the address of the symbol to ensure it sticks around. +//! +//! [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way +//! [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42 + +#![unstable(feature = "thread_local_internals", issue = "none")] + +use crate::ptr; +use crate::sync::atomic::{ + AtomicBool, + Ordering::{Acquire, Relaxed}, +}; +use crate::sys::c; + +// If the target uses native TLS, run its destructors. +#[cfg(target_thread_local)] +use crate::sys::common::thread_local::run_dtors; +// Otherwise, run the destructors for the key-based variant. +#[cfg(not(target_thread_local))] +use super::thread_local_key::run_dtors; + +/// An optimization hint. The compiler is often smart enough to know if an atomic +/// is never set and can remove dead code based on that fact. +static HAS_DTORS: AtomicBool = AtomicBool::new(false); + +/// Ensure that thread-locals are destroyed when the thread exits. +pub fn activate() { + HAS_DTORS.store(true, Relaxed); +} + +#[link_section = ".CRT$XLB"] +#[allow(dead_code, unused_variables)] +#[used] // we don't want LLVM eliminating this symbol for any reason, and +// when the symbol makes it to the linker the linker will take over +pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) = + on_tls_callback; + +#[allow(dead_code, unused_variables)] +unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) { + if !HAS_DTORS.load(Acquire) { + return; + } + if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH { + run_dtors(ptr::null_mut()); + } + + // See comments above for what this is doing. Note that we don't need this + // trickery on GNU windows, just on MSVC. + reference_tls_used(); + #[cfg(target_env = "msvc")] + unsafe fn reference_tls_used() { + extern "C" { + static _tls_used: u8; + } + crate::intrinsics::volatile_load(&_tls_used); + } + #[cfg(not(target_env = "msvc"))] + unsafe fn reference_tls_used() {} +} diff --git a/library/std/src/sys/pal/windows/thread_local_key.rs b/library/std/src/sys/pal/windows/thread_local_key.rs index 5eee4a9667ba4..a62ead43d494a 100644 --- a/library/std/src/sys/pal/windows/thread_local_key.rs +++ b/library/std/src/sys/pal/windows/thread_local_key.rs @@ -1,7 +1,9 @@ +#![cfg(not(target_thread_local))] + use crate::cell::UnsafeCell; use crate::ptr; use crate::sync::atomic::{ - AtomicBool, AtomicPtr, AtomicU32, + AtomicPtr, AtomicU32, Ordering::{AcqRel, Acquire, Relaxed, Release}, }; use crate::sys::c; @@ -9,81 +11,9 @@ use crate::sys::c; #[cfg(test)] mod tests; -/// An optimization hint. The compiler is often smart enough to know if an atomic -/// is never set and can remove dead code based on that fact. -static HAS_DTORS: AtomicBool = AtomicBool::new(false); - -// Using a per-thread list avoids the problems in synchronizing global state. -#[thread_local] -#[cfg(target_thread_local)] -static DESTRUCTORS: crate::cell::RefCell> = - crate::cell::RefCell::new(Vec::new()); - -// Ensure this can never be inlined because otherwise this may break in dylibs. -// See #44391. -#[inline(never)] -#[cfg(target_thread_local)] -pub unsafe fn register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - match DESTRUCTORS.try_borrow_mut() { - Ok(mut dtors) => dtors.push((t, dtor)), - Err(_) => rtabort!("global allocator may not use TLS"), - } - - HAS_DTORS.store(true, Relaxed); -} - -#[inline(never)] // See comment above -#[cfg(target_thread_local)] -/// Runs destructors. This should not be called until thread exit. -unsafe fn run_keyless_dtors() { - // Drop all the destructors. - // - // Note: While this is potentially an infinite loop, it *should* be - // the case that this loop always terminates because we provide the - // guarantee that a TLS key cannot be set after it is flagged for - // destruction. - loop { - // Use a let-else binding to ensure the `RefCell` guard is dropped - // immediately. Otherwise, a panic would occur if a TLS destructor - // tries to access the list. - let Some((ptr, dtor)) = DESTRUCTORS.borrow_mut().pop() else { - break; - }; - (dtor)(ptr); - } - // We're done so free the memory. - DESTRUCTORS.replace(Vec::new()); -} - type Key = c::DWORD; type Dtor = unsafe extern "C" fn(*mut u8); -// Turns out, like pretty much everything, Windows is pretty close the -// functionality that Unix provides, but slightly different! In the case of -// TLS, Windows does not provide an API to provide a destructor for a TLS -// variable. This ends up being pretty crucial to this implementation, so we -// need a way around this. -// -// The solution here ended up being a little obscure, but fear not, the -// internet has informed me [1][2] that this solution is not unique (no way -// I could have thought of it as well!). The key idea is to insert some hook -// somewhere to run arbitrary code on thread termination. With this in place -// we'll be able to run anything we like, including all TLS destructors! -// -// To accomplish this feat, we perform a number of threads, all contained -// within this module: -// -// * All TLS destructors are tracked by *us*, not the Windows runtime. This -// means that we have a global list of destructors for each TLS key that -// we know about. -// * When a thread exits, we run over the entire list and run dtors for all -// non-null keys. This attempts to match Unix semantics in this regard. -// -// For more details and nitty-gritty, see the code sections below! -// -// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way -// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42 - pub struct StaticKey { /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX /// is not a valid key value, this allows us to use zero as sentinel value @@ -215,41 +145,10 @@ unsafe fn register_dtor(key: &'static StaticKey) { Err(new) => head = new, } } - HAS_DTORS.store(true, Release); + super::thread_local_guard::activate(); } -// ------------------------------------------------------------------------- -// Where the Magic (TM) Happens -// -// If you're looking at this code, and wondering "what is this doing?", -// you're not alone! I'll try to break this down step by step: -// -// # What's up with CRT$XLB? -// -// For anything about TLS destructors to work on Windows, we have to be able -// to run *something* when a thread exits. To do so, we place a very special -// static in a very special location. If this is encoded in just the right -// way, the kernel's loader is apparently nice enough to run some function -// of ours whenever a thread exits! How nice of the kernel! -// -// Lots of detailed information can be found in source [1] above, but the -// gist of it is that this is leveraging a feature of Microsoft's PE format -// (executable format) which is not actually used by any compilers today. -// This apparently translates to any callbacks in the ".CRT$XLB" section -// being run on certain events. -// -// So after all that, we use the compiler's #[link_section] feature to place -// a callback pointer into the magic section so it ends up being called. -// -// # What's up with this callback? -// -// The callback specified receives a number of parameters from... someone! -// (the kernel? the runtime? I'm not quite sure!) There are a few events that -// this gets invoked for, but we're currently only interested on when a -// thread or a process "detaches" (exits). The process part happens for the -// last thread and the thread part happens for any normal thread. -// -// # Ok, what's up with running all these destructors? +// What's up with running all these destructors? // // This will likely need to be improved over time, but this function // attempts a "poor man's" destructor callback system. Once we've got a list @@ -258,63 +157,7 @@ unsafe fn register_dtor(key: &'static StaticKey) { // beforehand). We do this a few times in a loop to basically match Unix // semantics. If we don't reach a fixed point after a short while then we just // inevitably leak something most likely. -// -// # The article mentions weird stuff about "/INCLUDE"? -// -// It sure does! Specifically we're talking about this quote: -// -// The Microsoft run-time library facilitates this process by defining a -// memory image of the TLS Directory and giving it the special name -// “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The -// linker looks for this memory image and uses the data there to create the -// TLS Directory. Other compilers that support TLS and work with the -// Microsoft linker must use this same technique. -// -// Basically what this means is that if we want support for our TLS -// destructors/our hook being called then we need to make sure the linker does -// not omit this symbol. Otherwise it will omit it and our callback won't be -// wired up. -// -// We don't actually use the `/INCLUDE` linker flag here like the article -// mentions because the Rust compiler doesn't propagate linker flags, but -// instead we use a shim function which performs a volatile 1-byte load from -// the address of the symbol to ensure it sticks around. - -#[link_section = ".CRT$XLB"] -#[allow(dead_code, unused_variables)] -#[used] // we don't want LLVM eliminating this symbol for any reason, and -// when the symbol makes it to the linker the linker will take over -pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) = - on_tls_callback; - -#[allow(dead_code, unused_variables)] -unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) { - if !HAS_DTORS.load(Acquire) { - return; - } - if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH { - #[cfg(not(target_thread_local))] - run_dtors(); - #[cfg(target_thread_local)] - run_keyless_dtors(); - } - - // See comments above for what this is doing. Note that we don't need this - // trickery on GNU windows, just on MSVC. - reference_tls_used(); - #[cfg(target_env = "msvc")] - unsafe fn reference_tls_used() { - extern "C" { - static _tls_used: u8; - } - crate::intrinsics::volatile_load(&_tls_used); - } - #[cfg(not(target_env = "msvc"))] - unsafe fn reference_tls_used() {} -} - -#[allow(dead_code)] // actually called below -unsafe fn run_dtors() { +pub(super) unsafe fn run_dtors(_ptr: *mut u8) { for _ in 0..5 { let mut any_run = false; diff --git a/library/std/src/sys_common/mod.rs b/library/std/src/sys_common/mod.rs index 3e74e7f19d55e..9fe4e1f38d098 100644 --- a/library/std/src/sys_common/mod.rs +++ b/library/std/src/sys_common/mod.rs @@ -35,6 +35,7 @@ pub mod wtf8; cfg_if::cfg_if! { if #[cfg(target_os = "windows")] { + #[cfg(not(target_thread_local))] pub use crate::sys::thread_local_key; } else { pub mod thread_local_key; diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs index b319516c25b9e..ffbae1456471f 100644 --- a/src/tools/miri/src/shims/tls.rs +++ b/src/tools/miri/src/shims/tls.rs @@ -298,7 +298,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { return Ok(()); } let thread_callback = - this.eval_windows("thread_local_key", "p_thread_callback").to_pointer(this)?; + this.eval_windows("thread_local_guard", "p_thread_callback").to_pointer(this)?; let thread_callback = this.get_ptr_fn(thread_callback)?.as_instance()?; // FIXME: Technically, the reason should be `DLL_PROCESS_DETACH` when the main thread exits From 5e1727c7387618b4a8b722ad9f7cfb8d3b6d2d9f Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 17 Oct 2023 17:45:22 +0200 Subject: [PATCH 4/4] std: complete unifying the TLS destructor list implementations --- library/std/src/sys/pal/hermit/mod.rs | 6 +-- library/std/src/sys/pal/hermit/thread.rs | 3 +- .../src/sys/pal/hermit/thread_local_dtor.rs | 29 ------------- .../src/sys/pal/hermit/thread_local_guard.rs | 6 +++ library/std/src/sys/pal/itron/thread.rs | 6 +-- library/std/src/sys/pal/solid/mod.rs | 2 +- .../src/sys/pal/solid/thread_local_dtor.rs | 43 ------------------- .../src/sys/pal/solid/thread_local_guard.rs | 21 +++++++++ 8 files changed, 36 insertions(+), 80 deletions(-) delete mode 100644 library/std/src/sys/pal/hermit/thread_local_dtor.rs create mode 100644 library/std/src/sys/pal/hermit/thread_local_guard.rs delete mode 100644 library/std/src/sys/pal/solid/thread_local_dtor.rs create mode 100644 library/std/src/sys/pal/solid/thread_local_guard.rs diff --git a/library/std/src/sys/pal/hermit/mod.rs b/library/std/src/sys/pal/hermit/mod.rs index 57cc656e266a1..c0956499bc85f 100644 --- a/library/std/src/sys/pal/hermit/mod.rs +++ b/library/std/src/sys/pal/hermit/mod.rs @@ -34,7 +34,7 @@ pub mod pipe; pub mod process; pub mod stdio; pub mod thread; -pub mod thread_local_dtor; +pub mod thread_local_guard; #[path = "../unsupported/thread_local_key.rs"] pub mod thread_local_key; pub mod time; @@ -109,7 +109,7 @@ pub unsafe extern "C" fn runtime_entry( argv: *const *const c_char, env: *const *const c_char, ) -> ! { - use thread_local_dtor::run_dtors; + use crate::sys::common::thread_local::run_dtors; extern "C" { fn main(argc: isize, argv: *const *const c_char) -> i32; } @@ -119,7 +119,7 @@ pub unsafe extern "C" fn runtime_entry( let result = main(argc as isize, argv); - run_dtors(); + run_dtors(crate::ptr::null_mut()); abi::exit(result); } diff --git a/library/std/src/sys/pal/hermit/thread.rs b/library/std/src/sys/pal/hermit/thread.rs index fee80c02d4a6f..5928d0941fa3f 100644 --- a/library/std/src/sys/pal/hermit/thread.rs +++ b/library/std/src/sys/pal/hermit/thread.rs @@ -7,6 +7,7 @@ use crate::io; use crate::mem; use crate::num::NonZero; use crate::ptr; +use crate::sys::common::thread_local::run_dtors; use crate::time::Duration; pub type Tid = abi::Tid; @@ -50,7 +51,7 @@ impl Thread { Box::from_raw(ptr::from_exposed_addr::>(main).cast_mut())(); // run all destructors - run_dtors(); + run_dtors(ptr::null_mut()); } } } diff --git a/library/std/src/sys/pal/hermit/thread_local_dtor.rs b/library/std/src/sys/pal/hermit/thread_local_dtor.rs deleted file mode 100644 index 98adaf4bff1aa..0000000000000 --- a/library/std/src/sys/pal/hermit/thread_local_dtor.rs +++ /dev/null @@ -1,29 +0,0 @@ -#![cfg(target_thread_local)] -#![unstable(feature = "thread_local_internals", issue = "none")] - -// Simplify dtor registration by using a list of destructors. -// The this solution works like the implementation of macOS and -// doesn't additional OS support - -use crate::cell::RefCell; - -#[thread_local] -static DTORS: RefCell> = RefCell::new(Vec::new()); - -pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - match DTORS.try_borrow_mut() { - Ok(mut dtors) => dtors.push((t, dtor)), - Err(_) => rtabort!("global allocator may not use TLS"), - } -} - -// every thread call this function to run through all possible destructors -pub unsafe fn run_dtors() { - let mut list = DTORS.take(); - while !list.is_empty() { - for (ptr, dtor) in list { - dtor(ptr); - } - list = DTORS.take(); - } -} diff --git a/library/std/src/sys/pal/hermit/thread_local_guard.rs b/library/std/src/sys/pal/hermit/thread_local_guard.rs new file mode 100644 index 0000000000000..16d4cb1b91056 --- /dev/null +++ b/library/std/src/sys/pal/hermit/thread_local_guard.rs @@ -0,0 +1,6 @@ +#![cfg(target_thread_local)] +#![unstable(feature = "thread_local_internals", issue = "none")] + +pub fn activate() { + // run_dtors is always executed by the threading support. +} diff --git a/library/std/src/sys/pal/itron/thread.rs b/library/std/src/sys/pal/itron/thread.rs index 9c1387bf4083a..4aec8c9130510 100644 --- a/library/std/src/sys/pal/itron/thread.rs +++ b/library/std/src/sys/pal/itron/thread.rs @@ -12,9 +12,9 @@ use crate::{ hint, io, mem::ManuallyDrop, num::NonZero, - ptr::NonNull, + ptr::{self, NonNull}, sync::atomic::{AtomicUsize, Ordering}, - sys::thread_local_dtor::run_dtors, + sys::common::thread_local::run_dtors, time::Duration, }; @@ -116,7 +116,7 @@ impl Thread { // Run TLS destructors now because they are not // called automatically for terminated tasks. - unsafe { run_dtors() }; + unsafe { run_dtors(ptr::null_mut()) }; let old_lifecycle = inner .lifecycle diff --git a/library/std/src/sys/pal/solid/mod.rs b/library/std/src/sys/pal/solid/mod.rs index be8e00339021f..fc5d55c758c23 100644 --- a/library/std/src/sys/pal/solid/mod.rs +++ b/library/std/src/sys/pal/solid/mod.rs @@ -36,7 +36,7 @@ pub mod process; pub mod stdio; pub use self::itron::thread; pub mod memchr; -pub mod thread_local_dtor; +pub mod thread_local_guard; pub mod thread_local_key; pub use self::itron::thread_parking; pub mod time; diff --git a/library/std/src/sys/pal/solid/thread_local_dtor.rs b/library/std/src/sys/pal/solid/thread_local_dtor.rs deleted file mode 100644 index 26918a4fcb012..0000000000000 --- a/library/std/src/sys/pal/solid/thread_local_dtor.rs +++ /dev/null @@ -1,43 +0,0 @@ -#![cfg(target_thread_local)] -#![unstable(feature = "thread_local_internals", issue = "none")] - -// Simplify dtor registration by using a list of destructors. - -use super::{abi, itron::task}; -use crate::cell::{Cell, RefCell}; - -#[thread_local] -static REGISTERED: Cell = Cell::new(false); - -#[thread_local] -static DTORS: RefCell> = RefCell::new(Vec::new()); - -pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - if !REGISTERED.get() { - let tid = task::current_task_id_aborting(); - // Register `tls_dtor` to make sure the TLS destructors are called - // for tasks created by other means than `std::thread` - unsafe { abi::SOLID_TLS_AddDestructor(tid as i32, tls_dtor) }; - REGISTERED.set(true); - } - - match DTORS.try_borrow_mut() { - Ok(mut dtors) => dtors.push((t, dtor)), - Err(_) => rtabort!("global allocator may not use TLS"), - } -} - -pub unsafe fn run_dtors() { - let mut list = DTORS.take(); - while !list.is_empty() { - for (ptr, dtor) in list { - unsafe { dtor(ptr) }; - } - - list = DTORS.take(); - } -} - -unsafe extern "C" fn tls_dtor(_unused: *mut u8) { - unsafe { run_dtors() }; -} diff --git a/library/std/src/sys/pal/solid/thread_local_guard.rs b/library/std/src/sys/pal/solid/thread_local_guard.rs new file mode 100644 index 0000000000000..986d40aa5fc75 --- /dev/null +++ b/library/std/src/sys/pal/solid/thread_local_guard.rs @@ -0,0 +1,21 @@ +//! Ensures that thread-local destructors are run on thread exit. + +#![cfg(target_thread_local)] +#![unstable(feature = "thread_local_internals", issue = "none")] + +use super::{abi, itron::task}; +use crate::cell::Cell; +use crate::sys::common::thread_local::run_dtors; + +#[thread_local] +static REGISTERED: Cell = Cell::new(false); + +pub fn activate() { + if !REGISTERED.get() { + let tid = task::current_task_id_aborting(); + // Register `tls_dtor` to make sure the TLS destructors are called + // for tasks created by other means than `std::thread` + unsafe { abi::SOLID_TLS_AddDestructor(tid as i32, run_dtors) }; + REGISTERED.set(true); + } +}