blob: 7b84f7a4a5aa2e8c15dbc275a442b1f5aed74def [file]
//! Numeric traits used for internal implementations.
#![doc(hidden)]
#![unstable(
feature = "num_internals",
reason = "internal routines only exposed for testing",
issue = "none"
)]
use crate::num::FpCategory;
use crate::{f64, fmt, ops};
/// Lossy `as` casting between two types.
pub trait CastInto<T: Copy>: Copy {
fn cast(self) -> T;
}
/// Collection of traits that allow us to be generic over integer size.
pub trait Int:
Sized
+ Clone
+ Copy
+ fmt::Debug
+ ops::Shr<u32, Output = Self>
+ ops::Shl<u32, Output = Self>
+ ops::BitAnd<Output = Self>
+ ops::BitOr<Output = Self>
+ PartialEq
+ CastInto<i16>
{
const ZERO: Self;
const ONE: Self;
}
macro_rules! int {
($($ty:ty),+) => {
$(
impl CastInto<i16> for $ty {
fn cast(self) -> i16 {
self as i16
}
}
impl Int for $ty {
const ZERO: Self = 0;
const ONE: Self = 1;
}
)+
}
}
int!(u16, u32, u64);
/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
#[doc(hidden)]
pub trait Float:
Sized
+ ops::Div<Output = Self>
+ ops::Neg<Output = Self>
+ ops::Mul<Output = Self>
+ ops::Add<Output = Self>
+ fmt::Debug
+ PartialEq
+ PartialOrd
+ Default
+ Clone
+ Copy
{
/// The unsigned integer with the same size as the float
type Int: Int + Into<u64>;
/* general constants */
const INFINITY: Self;
const NEG_INFINITY: Self;
const NAN: Self;
const NEG_NAN: Self;
/// Bit width of the float
const BITS: u32;
/// The number of bits in the significand, *including* the hidden bit.
const SIG_TOTAL_BITS: u32;
const EXP_MASK: Self::Int;
const SIG_MASK: Self::Int;
/// The number of bits in the significand, *excluding* the hidden bit.
const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1;
/// Number of bits in the exponent.
const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
/// representation.
///
/// This shifted fully right, use `EXP_MASK` for the shifted value.
const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
/// Signed version of `EXP_SAT` since we convert a lot.
const INFINITE_POWER: i32 = Self::EXP_SAT as i32;
/// The exponent bias value. This is also the maximum value of the exponent.
const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
/// Minimum exponent value of normal values.
const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1);
/// Round-to-even only happens for negative values of q
/// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
/// the 32-bit case.
///
/// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
/// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
/// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
///
/// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
/// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
/// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
/// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
/// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bit case).
///
/// Thus we have that we only need to round ties to even when
/// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
/// (in the 32-bit case). In both cases,the power of five(5^|q|)
/// fits in a 64-bit word.
const MIN_EXPONENT_ROUND_TO_EVEN: i32;
const MAX_EXPONENT_ROUND_TO_EVEN: i32;
/// Largest decimal exponent for a non-infinite value.
///
/// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
/// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
const LARGEST_POWER_OF_TEN: i32 = {
let largest_pow2 = Self::EXP_BIAS + 1;
pow2_to_pow10(largest_pow2 as i64) as i32
};
/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
/// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
///
/// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
/// the smallest power of two. The `2^64 - 1)` denominator comes from the number of values
/// that are representable by the intermediate storage format. I don't actually know _why_
/// the storage format is relevant here.
///
/// The values may be calculated using the formula. Unfortunately we cannot calculate them at
/// compile time since intermediates exceed the range of an `f64`.
const SMALLEST_POWER_OF_TEN: i32;
/// Returns the category that this number falls into.
fn classify(self) -> FpCategory;
/// Transmute to the integer representation
fn to_bits(self) -> Self::Int;
}
/// Items that ideally would be on `Float`, but don't apply to all float types because they
/// rely on the mantissa fitting into a `u64` (which isn't true for `f128`).
#[doc(hidden)]
pub trait FloatExt: Float {
/// Performs a raw transmutation from an integer.
fn from_u64_bits(v: u64) -> Self;
/// Returns the mantissa, exponent and sign as integers.
///
/// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
/// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
/// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
///
/// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
/// with the explicit bit set but otherwise unshifted
///
/// `s` is only ever +/-1.
fn integer_decode(self) -> (u64, i16, i8) {
let bits = self.to_bits();
let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast();
let mantissa = if exponent == 0 {
(bits & Self::SIG_MASK) << 1
} else {
(bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS)
};
// Exponent bias + mantissa shift
exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16;
(mantissa.into(), exponent, sign)
}
}
/// Solve for `b` in `10^b = 2^a`
const fn pow2_to_pow10(a: i64) -> i64 {
let res = (a as f64) / f64::consts::LOG2_10;
res as i64
}
#[cfg(target_has_reliable_f16)]
impl Float for f16 {
type Int = u16;
const INFINITY: Self = Self::INFINITY;
const NEG_INFINITY: Self = Self::NEG_INFINITY;
const NAN: Self = Self::NAN;
const NEG_NAN: Self = -Self::NAN;
const BITS: u32 = 16;
const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
const EXP_MASK: Self::Int = Self::EXP_MASK;
const SIG_MASK: Self::Int = Self::MAN_MASK;
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22;
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5;
const SMALLEST_POWER_OF_TEN: i32 = -27;
fn to_bits(self) -> Self::Int {
self.to_bits()
}
fn classify(self) -> FpCategory {
self.classify()
}
}
#[cfg(target_has_reliable_f16)]
impl FloatExt for f16 {
#[inline]
fn from_u64_bits(v: u64) -> Self {
Self::from_bits((v & 0xFFFF) as u16)
}
}
impl Float for f32 {
type Int = u32;
const INFINITY: Self = f32::INFINITY;
const NEG_INFINITY: Self = f32::NEG_INFINITY;
const NAN: Self = f32::NAN;
const NEG_NAN: Self = -f32::NAN;
const BITS: u32 = 32;
const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
const EXP_MASK: Self::Int = Self::EXP_MASK;
const SIG_MASK: Self::Int = Self::MAN_MASK;
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
const SMALLEST_POWER_OF_TEN: i32 = -65;
fn to_bits(self) -> Self::Int {
self.to_bits()
}
fn classify(self) -> FpCategory {
self.classify()
}
}
impl FloatExt for f32 {
#[inline]
fn from_u64_bits(v: u64) -> Self {
f32::from_bits((v & 0xFFFFFFFF) as u32)
}
}
impl Float for f64 {
type Int = u64;
const INFINITY: Self = Self::INFINITY;
const NEG_INFINITY: Self = Self::NEG_INFINITY;
const NAN: Self = Self::NAN;
const NEG_NAN: Self = -Self::NAN;
const BITS: u32 = 64;
const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
const EXP_MASK: Self::Int = Self::EXP_MASK;
const SIG_MASK: Self::Int = Self::MAN_MASK;
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
const SMALLEST_POWER_OF_TEN: i32 = -342;
fn to_bits(self) -> Self::Int {
self.to_bits()
}
fn classify(self) -> FpCategory {
self.classify()
}
}
impl FloatExt for f64 {
#[inline]
fn from_u64_bits(v: u64) -> Self {
f64::from_bits(v)
}
}