src/libstd/sys/windows/thread_local.rs - rust - Git at Google

 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 use ptr;
 use sys::c;
 use sys_common::mutex::Mutex;
 use sys_common;

 pub type Key = c::DWORD;
 pub type Dtor = unsafe extern fn(*mut u8);

 // Turns out, like pretty much everything, Windows is pretty close the
 // functionality that Unix provides, but slightly different! In the case of
 // TLS, Windows does not provide an API to provide a destructor for a TLS
 // variable. This ends up being pretty crucial to this implementation, so we
 // need a way around this.
 //
 // The solution here ended up being a little obscure, but fear not, the
 // internet has informed me [1][2] that this solution is not unique (no way
 // I could have thought of it as well!). The key idea is to insert some hook
 // somewhere to run arbitrary code on thread termination. With this in place
 // we'll be able to run anything we like, including all TLS destructors!
 //
 // To accomplish this feat, we perform a number of threads, all contained
 // within this module:
 //
 // * All TLS destructors are tracked by *us*, not the windows runtime. This
 //   means that we have a global list of destructors for each TLS key that
 //   we know about.
 // * When a TLS key is destroyed, we're sure to remove it from the dtor list
 //   if it's in there.
 // * When a thread exits, we run over the entire list and run dtors for all
 //   non-null keys. This attempts to match Unix semantics in this regard.
 //
 // This ends up having the overhead of using a global list, having some
 // locks here and there, and in general just adding some more code bloat. We
 // attempt to optimize runtime by forgetting keys that don't have
 // destructors, but this only gets us so far.
 //
 // For more details and nitty-gritty, see the code sections below!
 //
 // [1]: http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
 // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
 //                        /threading/thread_local_storage_win.cc#L42

 // NB these are specifically not types from `std::sync` as they currently rely
 // on poisoning and this module needs to operate at a lower level than requiring
 // the thread infrastructure to be in place (useful on the borders of
 // initialization/destruction).
 static DTOR_LOCK: Mutex = Mutex::new();
 static mut DTORS: *mut Vec<(Key, Dtor)> = ptr::null_mut();

 // -------------------------------------------------------------------------
 // Native bindings
 //
 // This section is just raw bindings to the native functions that Windows
 // provides, There's a few extra calls to deal with destructors.

 #[inline]
 pub unsafe fn create(dtor: Option<Dtor>) -> Key {
     let key = c::TlsAlloc();
     assert!(key != c::TLS_OUT_OF_INDEXES);
     if let Some(f) = dtor {
         register_dtor(key, f);
     }
     return key;
 }

 #[inline]
 pub unsafe fn set(key: Key, value: *mut u8) {
     let r = c::TlsSetValue(key, value as c::LPVOID);
     debug_assert!(r != 0);
 }

 #[inline]
 pub unsafe fn get(key: Key) -> *mut u8 {
     c::TlsGetValue(key) as *mut u8
 }

 #[inline]
 pub unsafe fn destroy(key: Key) {
     if unregister_dtor(key) {
         // FIXME: Currently if a key has a destructor associated with it we
         // can't actually ever unregister it. If we were to
         // unregister it, then any key destruction would have to be
         // serialized with respect to actually running destructors.
         //
         // We want to avoid a race where right before run_dtors runs
         // some destructors TlsFree is called. Allowing the call to
         // TlsFree would imply that the caller understands that *all
         // known threads* are not exiting, which is quite a difficult
         // thing to know!
         //
         // For now we just leak all keys with dtors to "fix" this.
         // Note that source [2] above shows precedent for this sort
         // of strategy.
     } else {
         let r = c::TlsFree(key);
         debug_assert!(r != 0);
     }
 }

 // -------------------------------------------------------------------------
 // Dtor registration
 //
 // These functions are associated with registering and unregistering
 // destructors. They're pretty simple, they just push onto a vector and scan
 // a vector currently.
 //
 // FIXME: This could probably be at least a little faster with a BTree.

 unsafe fn init_dtors() {
     if !DTORS.is_null() { return }

     let dtors = box Vec::<(Key, Dtor)>::new();

     let res = sys_common::at_exit(move|| {
         DTOR_LOCK.lock();
         let dtors = DTORS;
         DTORS = 1 as *mut _;
         Box::from_raw(dtors);
         assert!(DTORS as usize == 1); // can't re-init after destructing
         DTOR_LOCK.unlock();
     });
     if res.is_ok() {
         DTORS = Box::into_raw(dtors);
     } else {
         DTORS = 1 as *mut _;
     }
 }

 unsafe fn register_dtor(key: Key, dtor: Dtor) {
     DTOR_LOCK.lock();
     init_dtors();
     assert!(DTORS as usize != 0);
     assert!(DTORS as usize != 1,
             "cannot create new TLS keys after the main thread has exited");
     (*DTORS).push((key, dtor));
     DTOR_LOCK.unlock();
 }

 unsafe fn unregister_dtor(key: Key) -> bool {
     DTOR_LOCK.lock();
     init_dtors();
     assert!(DTORS as usize != 0);
     assert!(DTORS as usize != 1,
             "cannot unregister destructors after the main thread has exited");
     let ret = {
         let dtors = &mut *DTORS;
         let before = dtors.len();
         dtors.retain(|&(k, _)| k != key);
         dtors.len() != before
     };
     DTOR_LOCK.unlock();
     ret
 }

 // -------------------------------------------------------------------------
 // Where the Magic (TM) Happens
 //
 // If you're looking at this code, and wondering "what is this doing?",
 // you're not alone! I'll try to break this down step by step:
 //
 // # What's up with CRT$XLB?
 //
 // For anything about TLS destructors to work on Windows, we have to be able
 // to run *something* when a thread exits. To do so, we place a very special
 // static in a very special location. If this is encoded in just the right
 // way, the kernel's loader is apparently nice enough to run some function
 // of ours whenever a thread exits! How nice of the kernel!
 //
 // Lots of detailed information can be found in source [1] above, but the
 // gist of it is that this is leveraging a feature of Microsoft's PE format
 // (executable format) which is not actually used by any compilers today.
 // This apparently translates to any callbacks in the ".CRT$XLB" section
 // being run on certain events.
 //
 // So after all that, we use the compiler's #[link_section] feature to place
 // a callback pointer into the magic section so it ends up being called.
 //
 // # What's up with this callback?
 //
 // The callback specified receives a number of parameters from... someone!
 // (the kernel? the runtime? I'm not quite sure!) There are a few events that
 // this gets invoked for, but we're currently only interested on when a
 // thread or a process "detaches" (exits). The process part happens for the
 // last thread and the thread part happens for any normal thread.
 //
 // # Ok, what's up with running all these destructors?
 //
 // This will likely need to be improved over time, but this function
 // attempts a "poor man's" destructor callback system. To do this we clone a
 // local copy of the dtor list to start out with. This is our fudgy attempt
 // to not hold the lock while destructors run and not worry about the list
 // changing while we're looking at it.
 //
 // Once we've got a list of what to run, we iterate over all keys, check
 // their values, and then run destructors if the values turn out to be non
 // null (setting them to null just beforehand). We do this a few times in a
 // loop to basically match Unix semantics. If we don't reach a fixed point
 // after a short while then we just inevitably leak something most likely.
 //
 // # The article mentions crazy stuff about "/INCLUDE"?
 //
 // It sure does! Specifically we're talking about this quote:
 //
 //      The Microsoft run-time library facilitates this process by defining a
 //      memory image of the TLS Directory and giving it the special name
 //      “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
 //      linker looks for this memory image and uses the data there to create the
 //      TLS Directory. Other compilers that support TLS and work with the
 //      Microsoft linker must use this same technique.
 //
 // Basically what this means is that if we want support for our TLS
 // destructors/our hook being called then we need to make sure the linker does
 // not omit this symbol. Otherwise it will omit it and our callback won't be
 // wired up.
 //
 // We don't actually use the `/INCLUDE` linker flag here like the article
 // mentions because the Rust compiler doesn't propagate linker flags, but
 // instead we use a shim function which performs a volatile 1-byte load from
 // the address of the symbol to ensure it sticks around.

 #[link_section = ".CRT$XLB"]
 #[linkage = "external"]
 #[allow(dead_code, unused_variables)]
 pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD,
                                                         c::LPVOID) =
         on_tls_callback;

 #[allow(dead_code, unused_variables)]
 unsafe extern "system" fn on_tls_callback(h: c::LPVOID,
                                           dwReason: c::DWORD,
                                           pv: c::LPVOID) {
     if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
         run_dtors();
     }

     // See comments above for what this is doing. Note that we don't need this
     // trickery on GNU windows, just on MSVC.
     reference_tls_used();
     #[cfg(target_env = "msvc")]
     unsafe fn reference_tls_used() {
         extern { static _tls_used: u8; }
         ::intrinsics::volatile_load(&_tls_used);
     }
     #[cfg(not(target_env = "msvc"))]
     unsafe fn reference_tls_used() {}
 }

 #[allow(dead_code)] // actually called above
 unsafe fn run_dtors() {
     let mut any_run = true;
     for _ in 0..5 {
         if !any_run { break }
         any_run = false;
         let dtors = {
             DTOR_LOCK.lock();
             let ret = if DTORS as usize <= 1 {
                 Vec::new()
             } else {
                 (*DTORS).iter().map(|s| *s).collect()
             };
             DTOR_LOCK.unlock();
             ret
         };
         for &(key, dtor) in &dtors {
             let ptr = c::TlsGetValue(key);
             if !ptr.is_null() {
                 c::TlsSetValue(key, ptr::null_mut());
                 dtor(ptr as *mut _);
                 any_run = true;
             }
         }
     }
 }
	// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
	// file at the top-level directory of this distribution and at
	// http://rust-lang.org/COPYRIGHT.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.

	use ptr;
	use sys::c;
	use sys_common::mutex::Mutex;
	use sys_common;

	pub type Key = c::DWORD;
	pub type Dtor = unsafe extern fn(*mut u8);

	// Turns out, like pretty much everything, Windows is pretty close the
	// functionality that Unix provides, but slightly different! In the case of
	// TLS, Windows does not provide an API to provide a destructor for a TLS
	// variable. This ends up being pretty crucial to this implementation, so we
	// need a way around this.
	//
	// The solution here ended up being a little obscure, but fear not, the
	// internet has informed me [1][2] that this solution is not unique (no way
	// I could have thought of it as well!). The key idea is to insert some hook
	// somewhere to run arbitrary code on thread termination. With this in place
	// we'll be able to run anything we like, including all TLS destructors!
	//
	// To accomplish this feat, we perform a number of threads, all contained
	// within this module:
	//
	// * All TLS destructors are tracked by us, not the windows runtime. This
	// means that we have a global list of destructors for each TLS key that
	// we know about.
	// * When a TLS key is destroyed, we're sure to remove it from the dtor list
	// if it's in there.
	// * When a thread exits, we run over the entire list and run dtors for all
	// non-null keys. This attempts to match Unix semantics in this regard.
	//
	// This ends up having the overhead of using a global list, having some
	// locks here and there, and in general just adding some more code bloat. We
	// attempt to optimize runtime by forgetting keys that don't have
	// destructors, but this only gets us so far.
	//
	// For more details and nitty-gritty, see the code sections below!
	//
	// [1]: http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
	// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
	// /threading/thread_local_storage_win.cc#L42

	// NB these are specifically not types from `std::sync` as they currently rely
	// on poisoning and this module needs to operate at a lower level than requiring
	// the thread infrastructure to be in place (useful on the borders of
	// initialization/destruction).
	static DTOR_LOCK: Mutex = Mutex::new();
	static mut DTORS: *mut Vec<(Key, Dtor)> = ptr::null_mut();

	// -------------------------------------------------------------------------
	// Native bindings
	//
	// This section is just raw bindings to the native functions that Windows
	// provides, There's a few extra calls to deal with destructors.

	#[inline]
	pub unsafe fn create(dtor: Option<Dtor>) -> Key {
	let key = c::TlsAlloc();
	assert!(key != c::TLS_OUT_OF_INDEXES);
	if let Some(f) = dtor {
	register_dtor(key, f);
	}
	return key;
	}

	#[inline]
	pub unsafe fn set(key: Key, value: *mut u8) {
	let r = c::TlsSetValue(key, value as c::LPVOID);
	debug_assert!(r != 0);
	}

	#[inline]
	pub unsafe fn get(key: Key) -> *mut u8 {
	c::TlsGetValue(key) as *mut u8
	}

	#[inline]
	pub unsafe fn destroy(key: Key) {
	if unregister_dtor(key) {
	// FIXME: Currently if a key has a destructor associated with it we
	// can't actually ever unregister it. If we were to
	// unregister it, then any key destruction would have to be
	// serialized with respect to actually running destructors.
	//
	// We want to avoid a race where right before run_dtors runs
	// some destructors TlsFree is called. Allowing the call to
	// TlsFree would imply that the caller understands that *all
	// known threads* are not exiting, which is quite a difficult
	// thing to know!
	//
	// For now we just leak all keys with dtors to "fix" this.
	// Note that source [2] above shows precedent for this sort
	// of strategy.
	} else {
	let r = c::TlsFree(key);
	debug_assert!(r != 0);
	}
	}

	// -------------------------------------------------------------------------
	// Dtor registration
	//
	// These functions are associated with registering and unregistering
	// destructors. They're pretty simple, they just push onto a vector and scan
	// a vector currently.
	//
	// FIXME: This could probably be at least a little faster with a BTree.

	unsafe fn init_dtors() {
	if !DTORS.is_null() { return }

	let dtors = box Vec::<(Key, Dtor)>::new();

	let res = sys_common::at_exit(move\|\| {
	DTOR_LOCK.lock();
	let dtors = DTORS;
	DTORS = 1 as *mut _;
	Box::from_raw(dtors);
	assert!(DTORS as usize == 1); // can't re-init after destructing
	DTOR_LOCK.unlock();
	});
	if res.is_ok() {
	DTORS = Box::into_raw(dtors);
	} else {
	DTORS = 1 as *mut _;
	}
	}

	unsafe fn register_dtor(key: Key, dtor: Dtor) {
	DTOR_LOCK.lock();
	init_dtors();
	assert!(DTORS as usize != 0);
	assert!(DTORS as usize != 1,
	"cannot create new TLS keys after the main thread has exited");
	(*DTORS).push((key, dtor));
	DTOR_LOCK.unlock();
	}

	unsafe fn unregister_dtor(key: Key) -> bool {
	DTOR_LOCK.lock();
	init_dtors();
	assert!(DTORS as usize != 0);
	assert!(DTORS as usize != 1,
	"cannot unregister destructors after the main thread has exited");
	let ret = {
	let dtors = &mut *DTORS;
	let before = dtors.len();
	dtors.retain(\|&(k, _)\| k != key);
	dtors.len() != before
	};
	DTOR_LOCK.unlock();
	ret
	}

	// -------------------------------------------------------------------------
	// Where the Magic (TM) Happens
	//
	// If you're looking at this code, and wondering "what is this doing?",
	// you're not alone! I'll try to break this down step by step:
	//
	// # What's up with CRT$XLB?
	//
	// For anything about TLS destructors to work on Windows, we have to be able
	// to run something when a thread exits. To do so, we place a very special
	// static in a very special location. If this is encoded in just the right
	// way, the kernel's loader is apparently nice enough to run some function
	// of ours whenever a thread exits! How nice of the kernel!
	//
	// Lots of detailed information can be found in source [1] above, but the
	// gist of it is that this is leveraging a feature of Microsoft's PE format
	// (executable format) which is not actually used by any compilers today.
	// This apparently translates to any callbacks in the ".CRT$XLB" section
	// being run on certain events.
	//
	// So after all that, we use the compiler's #[link_section] feature to place
	// a callback pointer into the magic section so it ends up being called.
	//
	// # What's up with this callback?
	//
	// The callback specified receives a number of parameters from... someone!
	// (the kernel? the runtime? I'm not quite sure!) There are a few events that
	// this gets invoked for, but we're currently only interested on when a
	// thread or a process "detaches" (exits). The process part happens for the
	// last thread and the thread part happens for any normal thread.
	//
	// # Ok, what's up with running all these destructors?
	//
	// This will likely need to be improved over time, but this function
	// attempts a "poor man's" destructor callback system. To do this we clone a
	// local copy of the dtor list to start out with. This is our fudgy attempt
	// to not hold the lock while destructors run and not worry about the list
	// changing while we're looking at it.
	//
	// Once we've got a list of what to run, we iterate over all keys, check
	// their values, and then run destructors if the values turn out to be non
	// null (setting them to null just beforehand). We do this a few times in a
	// loop to basically match Unix semantics. If we don't reach a fixed point
	// after a short while then we just inevitably leak something most likely.
	//
	// # The article mentions crazy stuff about "/INCLUDE"?
	//
	// It sure does! Specifically we're talking about this quote:
	//
	// The Microsoft run-time library facilitates this process by defining a
	// memory image of the TLS Directory and giving it the special name
	// “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
	// linker looks for this memory image and uses the data there to create the
	// TLS Directory. Other compilers that support TLS and work with the
	// Microsoft linker must use this same technique.
	//
	// Basically what this means is that if we want support for our TLS
	// destructors/our hook being called then we need to make sure the linker does
	// not omit this symbol. Otherwise it will omit it and our callback won't be
	// wired up.
	//
	// We don't actually use the `/INCLUDE` linker flag here like the article
	// mentions because the Rust compiler doesn't propagate linker flags, but
	// instead we use a shim function which performs a volatile 1-byte load from
	// the address of the symbol to ensure it sticks around.

	#[link_section = ".CRT$XLB"]
	#[linkage = "external"]
	#[allow(dead_code, unused_variables)]
	pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD,
	c::LPVOID) =
	on_tls_callback;

	#[allow(dead_code, unused_variables)]
	unsafe extern "system" fn on_tls_callback(h: c::LPVOID,
	dwReason: c::DWORD,
	pv: c::LPVOID) {
	if dwReason == c::DLL_THREAD_DETACH \|\| dwReason == c::DLL_PROCESS_DETACH {
	run_dtors();
	}

	// See comments above for what this is doing. Note that we don't need this
	// trickery on GNU windows, just on MSVC.
	reference_tls_used();
	#[cfg(target_env = "msvc")]
	unsafe fn reference_tls_used() {
	extern { static _tls_used: u8; }
	::intrinsics::volatile_load(&_tls_used);
	}
	#[cfg(not(target_env = "msvc"))]
	unsafe fn reference_tls_used() {}
	}

	#[allow(dead_code)] // actually called above
	unsafe fn run_dtors() {
	let mut any_run = true;
	for _ in 0..5 {
	if !any_run { break }
	any_run = false;
	let dtors = {
	DTOR_LOCK.lock();
	let ret = if DTORS as usize <= 1 {
	Vec::new()
	} else {
	(DTORS).iter().map(\|s\| s).collect()
	};
	DTOR_LOCK.unlock();
	ret
	};
	for &(key, dtor) in &dtors {
	let ptr = c::TlsGetValue(key);
	if !ptr.is_null() {
	c::TlsSetValue(key, ptr::null_mut());
	dtor(ptr as *mut _);
	any_run = true;
	}
	}
	}
	}