1
+ #![ cfg( not( target_thread_local) ) ]
2
+
1
3
use crate :: cell:: UnsafeCell ;
2
4
use crate :: ptr;
3
5
use crate :: sync:: atomic:: {
4
- AtomicBool , AtomicPtr , AtomicU32 ,
6
+ AtomicPtr , AtomicU32 ,
5
7
Ordering :: { AcqRel , Acquire , Relaxed , Release } ,
6
8
} ;
7
9
use crate :: sys:: c;
8
10
9
11
#[ cfg( test) ]
10
12
mod tests;
11
13
12
- /// An optimization hint. The compiler is often smart enough to know if an atomic
13
- /// is never set and can remove dead code based on that fact.
14
- static HAS_DTORS : AtomicBool = AtomicBool :: new ( false ) ;
15
-
16
- // Using a per-thread list avoids the problems in synchronizing global state.
17
- #[ thread_local]
18
- #[ cfg( target_thread_local) ]
19
- static DESTRUCTORS : crate :: cell:: RefCell < Vec < ( * mut u8 , unsafe extern "C" fn ( * mut u8 ) ) > > =
20
- crate :: cell:: RefCell :: new ( Vec :: new ( ) ) ;
21
-
22
- // Ensure this can never be inlined because otherwise this may break in dylibs.
23
- // See #44391.
24
- #[ inline( never) ]
25
- #[ cfg( target_thread_local) ]
26
- pub unsafe fn register_keyless_dtor ( t : * mut u8 , dtor : unsafe extern "C" fn ( * mut u8 ) ) {
27
- match DESTRUCTORS . try_borrow_mut ( ) {
28
- Ok ( mut dtors) => dtors. push ( ( t, dtor) ) ,
29
- Err ( _) => rtabort ! ( "global allocator may not use TLS" ) ,
30
- }
31
-
32
- HAS_DTORS . store ( true , Relaxed ) ;
33
- }
34
-
35
- #[ inline( never) ] // See comment above
36
- #[ cfg( target_thread_local) ]
37
- /// Runs destructors. This should not be called until thread exit.
38
- unsafe fn run_keyless_dtors ( ) {
39
- // Drop all the destructors.
40
- //
41
- // Note: While this is potentially an infinite loop, it *should* be
42
- // the case that this loop always terminates because we provide the
43
- // guarantee that a TLS key cannot be set after it is flagged for
44
- // destruction.
45
- loop {
46
- // Use a let-else binding to ensure the `RefCell` guard is dropped
47
- // immediately. Otherwise, a panic would occur if a TLS destructor
48
- // tries to access the list.
49
- let Some ( ( ptr, dtor) ) = DESTRUCTORS . borrow_mut ( ) . pop ( ) else {
50
- break ;
51
- } ;
52
- ( dtor) ( ptr) ;
53
- }
54
- // We're done so free the memory.
55
- DESTRUCTORS . replace ( Vec :: new ( ) ) ;
56
- }
57
-
58
14
type Key = c:: DWORD ;
59
15
type Dtor = unsafe extern "C" fn ( * mut u8 ) ;
60
16
61
- // Turns out, like pretty much everything, Windows is pretty close the
62
- // functionality that Unix provides, but slightly different! In the case of
63
- // TLS, Windows does not provide an API to provide a destructor for a TLS
64
- // variable. This ends up being pretty crucial to this implementation, so we
65
- // need a way around this.
66
- //
67
- // The solution here ended up being a little obscure, but fear not, the
68
- // internet has informed me [1][2] that this solution is not unique (no way
69
- // I could have thought of it as well!). The key idea is to insert some hook
70
- // somewhere to run arbitrary code on thread termination. With this in place
71
- // we'll be able to run anything we like, including all TLS destructors!
72
- //
73
- // To accomplish this feat, we perform a number of threads, all contained
74
- // within this module:
75
- //
76
- // * All TLS destructors are tracked by *us*, not the Windows runtime. This
77
- // means that we have a global list of destructors for each TLS key that
78
- // we know about.
79
- // * When a thread exits, we run over the entire list and run dtors for all
80
- // non-null keys. This attempts to match Unix semantics in this regard.
81
- //
82
- // For more details and nitty-gritty, see the code sections below!
83
- //
84
- // [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
85
- // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
86
-
87
17
pub struct StaticKey {
88
18
/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
89
19
/// is not a valid key value, this allows us to use zero as sentinel value
@@ -215,41 +145,10 @@ unsafe fn register_dtor(key: &'static StaticKey) {
215
145
Err ( new) => head = new,
216
146
}
217
147
}
218
- HAS_DTORS . store ( true , Release ) ;
148
+ super :: thread_local_guard :: activate ( ) ;
219
149
}
220
150
221
- // -------------------------------------------------------------------------
222
- // Where the Magic (TM) Happens
223
- //
224
- // If you're looking at this code, and wondering "what is this doing?",
225
- // you're not alone! I'll try to break this down step by step:
226
- //
227
- // # What's up with CRT$XLB?
228
- //
229
- // For anything about TLS destructors to work on Windows, we have to be able
230
- // to run *something* when a thread exits. To do so, we place a very special
231
- // static in a very special location. If this is encoded in just the right
232
- // way, the kernel's loader is apparently nice enough to run some function
233
- // of ours whenever a thread exits! How nice of the kernel!
234
- //
235
- // Lots of detailed information can be found in source [1] above, but the
236
- // gist of it is that this is leveraging a feature of Microsoft's PE format
237
- // (executable format) which is not actually used by any compilers today.
238
- // This apparently translates to any callbacks in the ".CRT$XLB" section
239
- // being run on certain events.
240
- //
241
- // So after all that, we use the compiler's #[link_section] feature to place
242
- // a callback pointer into the magic section so it ends up being called.
243
- //
244
- // # What's up with this callback?
245
- //
246
- // The callback specified receives a number of parameters from... someone!
247
- // (the kernel? the runtime? I'm not quite sure!) There are a few events that
248
- // this gets invoked for, but we're currently only interested on when a
249
- // thread or a process "detaches" (exits). The process part happens for the
250
- // last thread and the thread part happens for any normal thread.
251
- //
252
- // # Ok, what's up with running all these destructors?
151
+ // What's up with running all these destructors?
253
152
//
254
153
// This will likely need to be improved over time, but this function
255
154
// attempts a "poor man's" destructor callback system. Once we've got a list
@@ -258,63 +157,7 @@ unsafe fn register_dtor(key: &'static StaticKey) {
258
157
// beforehand). We do this a few times in a loop to basically match Unix
259
158
// semantics. If we don't reach a fixed point after a short while then we just
260
159
// inevitably leak something most likely.
261
- //
262
- // # The article mentions weird stuff about "/INCLUDE"?
263
- //
264
- // It sure does! Specifically we're talking about this quote:
265
- //
266
- // The Microsoft run-time library facilitates this process by defining a
267
- // memory image of the TLS Directory and giving it the special name
268
- // “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
269
- // linker looks for this memory image and uses the data there to create the
270
- // TLS Directory. Other compilers that support TLS and work with the
271
- // Microsoft linker must use this same technique.
272
- //
273
- // Basically what this means is that if we want support for our TLS
274
- // destructors/our hook being called then we need to make sure the linker does
275
- // not omit this symbol. Otherwise it will omit it and our callback won't be
276
- // wired up.
277
- //
278
- // We don't actually use the `/INCLUDE` linker flag here like the article
279
- // mentions because the Rust compiler doesn't propagate linker flags, but
280
- // instead we use a shim function which performs a volatile 1-byte load from
281
- // the address of the symbol to ensure it sticks around.
282
-
283
- #[ link_section = ".CRT$XLB" ]
284
- #[ allow( dead_code, unused_variables) ]
285
- #[ used] // we don't want LLVM eliminating this symbol for any reason, and
286
- // when the symbol makes it to the linker the linker will take over
287
- pub static p_thread_callback: unsafe extern "system" fn ( c:: LPVOID , c:: DWORD , c:: LPVOID ) =
288
- on_tls_callback;
289
-
290
- #[ allow( dead_code, unused_variables) ]
291
- unsafe extern "system" fn on_tls_callback ( h : c:: LPVOID , dwReason : c:: DWORD , pv : c:: LPVOID ) {
292
- if !HAS_DTORS . load ( Acquire ) {
293
- return ;
294
- }
295
- if dwReason == c:: DLL_THREAD_DETACH || dwReason == c:: DLL_PROCESS_DETACH {
296
- #[ cfg( not( target_thread_local) ) ]
297
- run_dtors ( ) ;
298
- #[ cfg( target_thread_local) ]
299
- run_keyless_dtors ( ) ;
300
- }
301
-
302
- // See comments above for what this is doing. Note that we don't need this
303
- // trickery on GNU windows, just on MSVC.
304
- reference_tls_used ( ) ;
305
- #[ cfg( target_env = "msvc" ) ]
306
- unsafe fn reference_tls_used ( ) {
307
- extern "C" {
308
- static _tls_used: u8 ;
309
- }
310
- crate :: intrinsics:: volatile_load ( & _tls_used) ;
311
- }
312
- #[ cfg( not( target_env = "msvc" ) ) ]
313
- unsafe fn reference_tls_used ( ) { }
314
- }
315
-
316
- #[ allow( dead_code) ] // actually called below
317
- unsafe fn run_dtors ( ) {
160
+ pub ( super ) unsafe fn run_dtors ( _ptr : * mut u8 ) {
318
161
for _ in 0 ..5 {
319
162
let mut any_run = false ;
320
163
0 commit comments