| //! Interning of slices, potentially with a header. |
| //! |
| //! See [`crate::intern`] for an explanation of interning modes. Note that slice interning is currently |
| //! available only in GC mode (there is no other need). |
| //! |
| //! [`InternedSlice`] and [`InternedSliceRef`] are essentially [`Interned<(Header, Box<[SliceType]>)>`][crate::Interned] |
| //! and [`InternedRef`][crate::InternedRef] with the same types, but more optimized. There is only one |
| //! allocation and the pointer is thin. |
| |
| use std::{ |
| ffi::c_void, |
| fmt::{self, Debug}, |
| hash::{BuildHasher, Hash, Hasher}, |
| marker::PhantomData, |
| mem::ManuallyDrop, |
| ops::Deref, |
| ptr::{self, NonNull}, |
| sync::OnceLock, |
| }; |
| |
| use dashmap::{DashMap, SharedValue}; |
| use hashbrown::raw::RawTable; |
| use rustc_hash::FxBuildHasher; |
| use triomphe::{HeaderSlice, HeaderWithLength, ThinArc}; |
| |
| type InternMap<T> = DashMap< |
| ThinArc<<T as SliceInternable>::Header, <T as SliceInternable>::SliceType>, |
| (), |
| FxBuildHasher, |
| >; |
| type Guard<T> = dashmap::RwLockWriteGuard< |
| 'static, |
| RawTable<( |
| ThinArc<<T as SliceInternable>::Header, <T as SliceInternable>::SliceType>, |
| SharedValue<()>, |
| )>, |
| >; |
| type Pointee<T> = HeaderSlice< |
| HeaderWithLength<<T as SliceInternable>::Header>, |
| [<T as SliceInternable>::SliceType], |
| >; |
| |
| pub struct InternedSlice<T: SliceInternable> { |
| arc: ThinArc<T::Header, T::SliceType>, |
| } |
| |
| impl<T: SliceInternable> InternedSlice<T> { |
| #[inline] |
| pub fn from_header_and_slice<'a>( |
| header: T::Header, |
| slice: &[T::SliceType], |
| ) -> InternedSliceRef<'a, T> { |
| const { assert!(T::USE_GC) }; |
| |
| let storage = T::storage().get(); |
| let (mut shard, hash) = Self::select(storage, &header, slice); |
| // Atomically, |
| // - check if `obj` is already in the map |
| // - if so, clone its `Arc` and return it |
| // - if not, box it up, insert it, and return a clone |
| // This needs to be atomic (locking the shard) to avoid races with other thread, which could |
| // insert the same object between us looking it up and inserting it. |
| let bucket = match shard.find_or_find_insert_slot( |
| hash, |
| |(other, _)| other.header.header == header && other.slice == *slice, |
| |(x, _)| storage.hasher().hash_one(x), |
| ) { |
| Ok(bucket) => bucket, |
| // SAFETY: The slot came from `find_or_find_insert_slot()`, and the table wasn't modified since then. |
| Err(insert_slot) => unsafe { |
| shard.insert_in_slot( |
| hash, |
| insert_slot, |
| (ThinArc::from_header_and_slice(header, slice), SharedValue::new(())), |
| ) |
| }, |
| }; |
| // SAFETY: We just retrieved/inserted this bucket. |
| // `NonNull::new_unchecked()` is safe because the pointer originates from a `ThinArc`. |
| unsafe { |
| InternedSliceRef { |
| // INVARIANT: We create it from a `ThinArc`. |
| ptr: NonNull::new_unchecked(ThinArc::as_ptr(&bucket.as_ref().0).cast_mut()), |
| _marker: PhantomData, |
| } |
| } |
| } |
| |
| #[inline] |
| fn select( |
| storage: &'static InternMap<T>, |
| header: &T::Header, |
| slice: &[T::SliceType], |
| ) -> (Guard<T>, u64) { |
| let hash = Self::hash(storage, header, slice); |
| let shard_idx = storage.determine_shard(hash as usize); |
| let shard = &storage.shards()[shard_idx]; |
| (shard.write(), hash) |
| } |
| |
| #[inline] |
| fn hash(storage: &'static InternMap<T>, header: &T::Header, slice: &[T::SliceType]) -> u64 { |
| storage.hasher().hash_one(HeaderSlice { |
| header: HeaderWithLength { header, length: slice.len() }, |
| slice, |
| }) |
| } |
| |
| #[inline(always)] |
| fn ptr(&self) -> *const c_void { |
| self.arc.as_ptr() |
| } |
| |
| #[inline] |
| pub fn as_ref(&self) -> InternedSliceRef<'_, T> { |
| InternedSliceRef { |
| // SAFETY: `self.ptr` comes from a valid `ThinArc`, so non null. |
| // INVARIANT: We create it from a `ThinArc`. |
| ptr: unsafe { NonNull::new_unchecked(self.ptr().cast_mut()) }, |
| _marker: PhantomData, |
| } |
| } |
| } |
| |
| /// Compares interned `Ref`s using pointer equality. |
| impl<T: SliceInternable> PartialEq for InternedSlice<T> { |
| // NOTE: No `?Sized` because `ptr_eq` doesn't work right with trait objects. |
| |
| #[inline] |
| fn eq(&self, other: &Self) -> bool { |
| self.arc.as_ptr() == other.arc.as_ptr() |
| } |
| } |
| |
| impl<T: SliceInternable> Eq for InternedSlice<T> {} |
| |
| impl<T: SliceInternable> Hash for InternedSlice<T> { |
| #[inline] |
| fn hash<H: Hasher>(&self, state: &mut H) { |
| state.write_usize(self.ptr().addr()) |
| } |
| } |
| |
| impl<T: SliceInternable> Deref for InternedSlice<T> { |
| type Target = Pointee<T>; |
| |
| #[inline] |
| fn deref(&self) -> &Self::Target { |
| &self.arc |
| } |
| } |
| |
| impl<T: SliceInternable> Clone for InternedSlice<T> { |
| #[inline] |
| fn clone(&self) -> Self { |
| Self { arc: self.arc.clone() } |
| } |
| } |
| |
| impl<T> Debug for InternedSlice<T> |
| where |
| T: SliceInternable, |
| T::SliceType: Debug, |
| T::Header: Debug, |
| { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| (*self.arc).fmt(f) |
| } |
| } |
| |
| #[repr(transparent)] |
| pub struct InternedSliceRef<'a, T> { |
| /// # Invariant |
| /// |
| /// There is no `ThinArcBorrow` unfortunately, so this is basically a `ManuallyDrop<ThinArc>`, |
| /// except that can't be `Copy`, so we store a raw pointer instead. |
| ptr: NonNull<c_void>, |
| _marker: PhantomData<&'a T>, |
| } |
| |
| // SAFETY: This is essentially a `ThinArc`, implemented as a raw pointer because there is no `ThinArcBorrowed`. |
| unsafe impl<T: Send + Sync> Send for InternedSliceRef<'_, T> {} |
| unsafe impl<T: Send + Sync> Sync for InternedSliceRef<'_, T> {} |
| |
| impl<'a, T: SliceInternable> InternedSliceRef<'a, T> { |
| #[inline(always)] |
| fn arc(self) -> ManuallyDrop<ThinArc<T::Header, T::SliceType>> { |
| // SAFETY: `self.ptr`'s invariant. |
| unsafe { ManuallyDrop::new(ThinArc::from_raw(self.ptr.as_ptr())) } |
| } |
| |
| #[inline] |
| pub fn to_owned(self) -> InternedSlice<T> { |
| InternedSlice { arc: (*self.arc()).clone() } |
| } |
| |
| #[inline] |
| pub fn get(self) -> &'a Pointee<T> { |
| // SAFETY: This is a lifetime extension, valid because we live for `'a`. |
| unsafe { &*ptr::from_ref::<Pointee<T>>(&*self.arc()) } |
| } |
| |
| /// # Safety |
| /// |
| /// You have to make sure the data is not referenced after the refcount reaches zero; beware the interning |
| /// map also keeps a reference to the value. |
| #[inline] |
| pub unsafe fn decrement_refcount(self) { |
| drop(ManuallyDrop::into_inner(self.arc())); |
| } |
| |
| #[inline] |
| pub(crate) fn strong_count(self) -> usize { |
| ThinArc::strong_count(&self.arc()) |
| } |
| |
| #[inline] |
| pub(crate) fn as_raw(self) -> *const c_void { |
| self.arc().as_ptr() |
| } |
| |
| /// **Available only on GC mode**. |
| /// |
| /// Changes the attached lifetime, as in GC mode, the lifetime is more kind of a lint to prevent misuse |
| /// than actual soundness check. |
| #[inline] |
| pub fn change_lifetime<'b>(self) -> InternedSliceRef<'b, T> { |
| const { assert!(T::USE_GC) }; |
| // SAFETY: The lifetime on `InternedSliceRef` is essentially advisory only for GCed types. |
| unsafe { std::mem::transmute::<InternedSliceRef<'a, T>, InternedSliceRef<'b, T>>(self) } |
| } |
| } |
| |
| impl<T> Clone for InternedSliceRef<'_, T> { |
| #[inline] |
| fn clone(&self) -> Self { |
| *self |
| } |
| } |
| |
| impl<T> Copy for InternedSliceRef<'_, T> {} |
| |
| impl<T: SliceInternable> Hash for InternedSliceRef<'_, T> { |
| #[inline] |
| fn hash<H: Hasher>(&self, state: &mut H) { |
| state.write_usize(self.ptr.as_ptr().addr()); |
| } |
| } |
| |
| impl<T: SliceInternable> PartialEq for InternedSliceRef<'_, T> { |
| #[inline] |
| fn eq(&self, other: &Self) -> bool { |
| self.ptr == other.ptr |
| } |
| } |
| |
| impl<T: SliceInternable> Eq for InternedSliceRef<'_, T> {} |
| |
| impl<T: SliceInternable> Deref for InternedSliceRef<'_, T> { |
| type Target = Pointee<T>; |
| |
| #[inline] |
| fn deref(&self) -> &Self::Target { |
| self.get() |
| } |
| } |
| |
| impl<T> Debug for InternedSliceRef<'_, T> |
| where |
| T: SliceInternable, |
| T::SliceType: Debug, |
| T::Header: Debug, |
| { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| (**self).fmt(f) |
| } |
| } |
| |
| pub struct InternSliceStorage<T: SliceInternable> { |
| map: OnceLock<InternMap<T>>, |
| } |
| |
| #[allow( |
| clippy::new_without_default, |
| reason = "this a const fn, so it can't be default yet. See <https://github.com/rust-lang/rust/issues/63065>" |
| )] |
| impl<T: SliceInternable> InternSliceStorage<T> { |
| pub const fn new() -> Self { |
| Self { map: OnceLock::new() } |
| } |
| } |
| |
| impl<T: SliceInternable> InternSliceStorage<T> { |
| pub(crate) fn get(&self) -> &InternMap<T> { |
| self.map.get_or_init(|| { |
| DashMap::with_capacity_and_hasher( |
| (64 * 1024) / std::mem::size_of::<T::SliceType>(), |
| Default::default(), |
| ) |
| }) |
| } |
| } |
| |
| pub trait SliceInternable: Sized + 'static { |
| const USE_GC: bool; |
| type Header: Eq + Hash + Send + Sync; |
| type SliceType: Eq + Hash + Send + Sync + Copy + 'static; |
| fn storage() -> &'static InternSliceStorage<Self>; |
| } |
| |
| /// Implements `SliceInternable` for a given list of types, making them usable with `InternedSlice`. |
| #[macro_export] |
| #[doc(hidden)] |
| macro_rules! _impl_slice_internable { |
| ( gc; $tag:ident, $h:ty, $t:ty $(,)? ) => { |
| #[allow(unreachable_pub)] |
| pub struct $tag; |
| impl $crate::SliceInternable for $tag { |
| const USE_GC: bool = true; |
| type Header = $h; |
| type SliceType = $t; |
| fn storage() -> &'static $crate::InternSliceStorage<Self> { |
| static STORAGE: $crate::InternSliceStorage<$tag> = |
| $crate::InternSliceStorage::new(); |
| &STORAGE |
| } |
| } |
| }; |
| } |
| pub use crate::_impl_slice_internable as impl_slice_internable; |