blob: 8c5b3529979d7fd81b49861eab571839df5fca5a [file]
//! This module helps you efficiently store and retrieve values using interning.
//!
//! Interning is a neat trick that keeps only one copy of identical values, saving memory
//! and making comparisons super fast. Here, we provide the `Interned<T>` struct and the `Internable` trait
//! to make interning easy for different data types.
//!
//! The `Interner` struct handles caching for common types like `String`, `PathBuf`, and `Vec<String>`,
//! while the `Cache` struct acts as a write-once storage for linking computation steps with their results.
//!
//! # Thread Safety
//!
//! We use `Mutex` to make sure interning and retrieval are thread-safe. But keep in mind—once a value is
//! interned, it sticks around for the entire lifetime of the program.
use std::any::{Any, TypeId};
use std::borrow::Borrow;
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use std::ops::Deref;
use std::sync::{LazyLock, Mutex};
use std::{fmt, mem};
use crate::core::builder::Step;
/// Represents an interned value of type `T`, allowing for efficient comparisons and retrieval.
///
/// This struct stores a unique index referencing the interned value within an internal cache.
pub struct Interned<T>(usize, PhantomData<*const T>);
impl<T: Internable + Default> Default for Interned<T> {
fn default() -> Self {
T::default().intern()
}
}
impl<T> Copy for Interned<T> {}
impl<T> Clone for Interned<T> {
fn clone(&self) -> Interned<T> {
*self
}
}
impl<T> PartialEq for Interned<T> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<T> Eq for Interned<T> {}
impl PartialEq<&str> for Interned<String> {
fn eq(&self, other: &&str) -> bool {
**self == **other
}
}
unsafe impl<T> Send for Interned<T> {}
unsafe impl<T> Sync for Interned<T> {}
impl fmt::Display for Interned<String> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: &str = self;
f.write_str(s)
}
}
impl<T, U: ?Sized + fmt::Debug> fmt::Debug for Interned<T>
where
Self: Deref<Target = U>,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: &U = self;
f.write_fmt(format_args!("{s:?}"))
}
}
impl<T: Internable + Hash> Hash for Interned<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
let l = T::intern_cache().lock().unwrap();
l.get(*self).hash(state)
}
}
impl<T: Internable + Deref> Deref for Interned<T> {
type Target = T::Target;
fn deref(&self) -> &Self::Target {
let l = T::intern_cache().lock().unwrap();
unsafe { mem::transmute::<&Self::Target, &Self::Target>(l.get(*self)) }
}
}
impl<T: Internable + AsRef<U>, U: ?Sized> AsRef<U> for Interned<T> {
fn as_ref(&self) -> &U {
let l = T::intern_cache().lock().unwrap();
unsafe { mem::transmute::<&U, &U>(l.get(*self).as_ref()) }
}
}
impl<T: Internable + PartialOrd> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
let l = T::intern_cache().lock().unwrap();
l.get(*self).partial_cmp(l.get(*other))
}
}
impl<T: Internable + Ord> Ord for Interned<T> {
fn cmp(&self, other: &Self) -> Ordering {
let l = T::intern_cache().lock().unwrap();
l.get(*self).cmp(l.get(*other))
}
}
/// A structure for managing the interning of values of type `T`.
///
/// `TyIntern<T>` maintains a mapping between values and their interned representations,
/// ensuring that duplicate values are not stored multiple times.
struct TyIntern<T: Clone + Eq> {
items: Vec<T>,
set: HashMap<T, Interned<T>>,
}
impl<T: Hash + Clone + Eq> Default for TyIntern<T> {
fn default() -> Self {
TyIntern { items: Vec::new(), set: Default::default() }
}
}
impl<T: Hash + Clone + Eq> TyIntern<T> {
/// Interns a borrowed value, ensuring it is stored uniquely.
///
/// If the value has been previously interned, the same `Interned<T>` instance is returned.
fn intern_borrow<B>(&mut self, item: &B) -> Interned<T>
where
B: Eq + Hash + ToOwned<Owned = T> + ?Sized,
T: Borrow<B>,
{
if let Some(i) = self.set.get(item) {
return *i;
}
let item = item.to_owned();
let interned = Interned(self.items.len(), PhantomData::<*const T>);
self.set.insert(item.clone(), interned);
self.items.push(item);
interned
}
/// Interns an owned value, storing it uniquely.
///
/// If the value has been previously interned, the existing `Interned<T>` is returned.
fn intern(&mut self, item: T) -> Interned<T> {
if let Some(i) = self.set.get(&item) {
return *i;
}
let interned = Interned(self.items.len(), PhantomData::<*const T>);
self.set.insert(item.clone(), interned);
self.items.push(item);
interned
}
/// Retrieves a reference to the interned value associated with the given `Interned<T>` instance.
fn get(&self, i: Interned<T>) -> &T {
&self.items[i.0]
}
}
/// A global interner for managing interned values of common types.
///
/// This structure maintains caches for `String`, `PathBuf`, and `Vec<String>`, ensuring efficient storage
/// and retrieval of frequently used values.
#[derive(Default)]
pub struct Interner {
strs: Mutex<TyIntern<String>>,
}
/// Defines the behavior required for a type to be internable.
///
/// Types implementing this trait must provide access to a static cache and define an `intern` method
/// that ensures values are stored uniquely.
trait Internable: Clone + Eq + Hash + 'static {
fn intern_cache() -> &'static Mutex<TyIntern<Self>>;
fn intern(self) -> Interned<Self> {
Self::intern_cache().lock().unwrap().intern(self)
}
}
impl Internable for String {
fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
&INTERNER.strs
}
}
impl Interner {
/// Interns a string reference, ensuring it is stored uniquely.
///
/// If the string has been previously interned, the same `Interned<String>` instance is returned.
pub fn intern_str(&self, s: &str) -> Interned<String> {
self.strs.lock().unwrap().intern_borrow(s)
}
}
/// A global instance of `Interner` that caches common interned values.
pub static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::default);
/// This is essentially a `HashMap` which allows storing any type in its input and
/// any type in its output. It is a write-once cache; values are never evicted,
/// which means that references to the value can safely be returned from the
/// `get()` method.
#[derive(Debug, Default)]
pub struct Cache {
cache: RefCell<
HashMap<
TypeId,
Box<dyn Any>, // actually a HashMap<Step, Interned<Step::Output>>
>,
>,
#[cfg(test)]
/// Contains step metadata of executed steps (in the same order in which they were executed).
/// Useful for tests.
executed_steps: RefCell<Vec<ExecutedStep>>,
}
#[cfg(test)]
#[derive(Debug)]
pub struct ExecutedStep {
pub metadata: Option<crate::core::builder::StepMetadata>,
}
impl Cache {
/// Creates a new empty cache.
pub fn new() -> Cache {
Cache::default()
}
/// Stores the result of a computation step in the cache.
pub fn put<S: Step>(&self, step: S, value: S::Output) {
let mut cache = self.cache.borrow_mut();
let type_id = TypeId::of::<S>();
let stepcache = cache
.entry(type_id)
.or_insert_with(|| Box::<HashMap<S, S::Output>>::default())
.downcast_mut::<HashMap<S, S::Output>>()
.expect("invalid type mapped");
assert!(!stepcache.contains_key(&step), "processing {step:?} a second time");
#[cfg(test)]
{
let metadata = step.metadata();
self.executed_steps.borrow_mut().push(ExecutedStep { metadata });
}
stepcache.insert(step, value);
}
/// Retrieves a cached result for the given step, if available.
pub fn get<S: Step>(&self, step: &S) -> Option<S::Output> {
let mut cache = self.cache.borrow_mut();
let type_id = TypeId::of::<S>();
let stepcache = cache
.entry(type_id)
.or_insert_with(|| Box::<HashMap<S, S::Output>>::default())
.downcast_mut::<HashMap<S, S::Output>>()
.expect("invalid type mapped");
stepcache.get(step).cloned()
}
}
#[cfg(test)]
impl Cache {
pub(crate) fn inspect_all_steps_of_type<S: Step, T: Ord>(
&self,
map_fn: impl Fn(&S, &S::Output) -> T,
) -> Vec<T> {
let cache = self.cache.borrow();
let mut values = cache
.get(&TypeId::of::<S>())
.map(|any| any.downcast_ref::<HashMap<S, S::Output>>().expect("correct type"))
.map(|m| m.iter().map(|(step, output)| map_fn(step, output)).collect::<Vec<_>>())
.unwrap_or_default();
values.sort();
values
}
pub fn contains<S: Step>(&self) -> bool {
self.cache.borrow().contains_key(&TypeId::of::<S>())
}
#[cfg(test)]
pub fn into_executed_steps(mut self) -> Vec<ExecutedStep> {
mem::take(&mut self.executed_steps.borrow_mut())
}
}
#[cfg(test)]
mod tests;