| #![cfg_attr( |
| feature = "as_crate", |
| feature(core_intrinsics), |
| feature(portable_simd), |
| allow(internal_features) |
| )] |
| #[cfg(not(feature = "as_crate"))] |
| use core::simd; |
| #[cfg(feature = "as_crate")] |
| use core_simd::simd; |
| |
| use core::intrinsics::simd as intrinsics; |
| |
| use simd::{LaneCount, Simd, SupportedLaneCount}; |
| |
| #[cfg(feature = "as_crate")] |
| mod experimental { |
| pub trait Sealed {} |
| } |
| |
| #[cfg(feature = "as_crate")] |
| use experimental as sealed; |
| |
| use crate::sealed::Sealed; |
| |
| /// This trait provides a possibly-temporary implementation of float functions |
| /// that may, in the absence of hardware support, canonicalize to calling an |
| /// operating system's `math.h` dynamically-loaded library (also known as a |
| /// shared object). As these conditionally require runtime support, they |
| /// should only appear in binaries built assuming OS support: `std`. |
| /// |
| /// However, there is no reason SIMD types, in general, need OS support, |
| /// as for many architectures an embedded binary may simply configure that |
| /// support itself. This means these types must be visible in `core` |
| /// but have these functions available in `std`. |
| /// |
| /// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but |
| /// due to compiler limitations, it is harder to implement this approach for |
| /// abstract data types like [`Simd`]. From that need, this trait is born. |
| /// |
| /// It is possible this trait will be replaced in some manner in the future, |
| /// when either the compiler or its supporting runtime functions are improved. |
| /// For now this trait is available to permit experimentation with SIMD float |
| /// operations that may lack hardware support, such as `mul_add`. |
| pub trait StdFloat: Sealed + Sized { |
| /// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error, |
| /// yielding a more accurate result than an unfused multiply-add. |
| /// |
| /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target |
| /// architecture has a dedicated `fma` CPU instruction. However, this is not always |
| /// true, and will be heavily dependent on designing algorithms with specific target |
| /// hardware in mind. |
| #[inline] |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn mul_add(self, a: Self, b: Self) -> Self { |
| unsafe { intrinsics::simd_fma(self, a, b) } |
| } |
| |
| /// Produces a vector where every element has the square root value |
| /// of the equivalently-indexed element in `self` |
| #[inline] |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn sqrt(self) -> Self { |
| unsafe { intrinsics::simd_fsqrt(self) } |
| } |
| |
| /// Produces a vector where every element has the sine of the value |
| /// in the equivalently-indexed element in `self`. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn sin(self) -> Self; |
| |
| /// Produces a vector where every element has the cosine of the value |
| /// in the equivalently-indexed element in `self`. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn cos(self) -> Self; |
| |
| /// Produces a vector where every element has the exponential (base e) of the value |
| /// in the equivalently-indexed element in `self`. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn exp(self) -> Self; |
| |
| /// Produces a vector where every element has the exponential (base 2) of the value |
| /// in the equivalently-indexed element in `self`. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn exp2(self) -> Self; |
| |
| /// Produces a vector where every element has the natural logarithm of the value |
| /// in the equivalently-indexed element in `self`. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn ln(self) -> Self; |
| |
| /// Produces a vector where every element has the logarithm with respect to an arbitrary |
| /// in the equivalently-indexed elements in `self` and `base`. |
| #[inline] |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn log(self, base: Self) -> Self { |
| unsafe { intrinsics::simd_div(self.ln(), base.ln()) } |
| } |
| |
| /// Produces a vector where every element has the base-2 logarithm of the value |
| /// in the equivalently-indexed element in `self`. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn log2(self) -> Self; |
| |
| /// Produces a vector where every element has the base-10 logarithm of the value |
| /// in the equivalently-indexed element in `self`. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn log10(self) -> Self; |
| |
| /// Returns the smallest integer greater than or equal to each element. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| #[inline] |
| fn ceil(self) -> Self { |
| unsafe { intrinsics::simd_ceil(self) } |
| } |
| |
| /// Returns the largest integer value less than or equal to each element. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| #[inline] |
| fn floor(self) -> Self { |
| unsafe { intrinsics::simd_floor(self) } |
| } |
| |
| /// Rounds to the nearest integer value. Ties round toward zero. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| #[inline] |
| fn round(self) -> Self { |
| unsafe { intrinsics::simd_round(self) } |
| } |
| |
| /// Returns the floating point's integer value, with its fractional part removed. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| #[inline] |
| fn trunc(self) -> Self { |
| unsafe { intrinsics::simd_trunc(self) } |
| } |
| |
| /// Returns the floating point's fractional value, with its integer part removed. |
| #[must_use = "method returns a new vector and does not mutate the original value"] |
| fn fract(self) -> Self; |
| } |
| |
| impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {} |
| impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {} |
| |
| macro_rules! impl_float { |
| { |
| $($fn:ident: $intrinsic:ident,)* |
| } => { |
| impl<const N: usize> StdFloat for Simd<f32, N> |
| where |
| LaneCount<N>: SupportedLaneCount, |
| { |
| #[inline] |
| fn fract(self) -> Self { |
| self - self.trunc() |
| } |
| |
| $( |
| #[inline] |
| fn $fn(self) -> Self { |
| unsafe { intrinsics::$intrinsic(self) } |
| } |
| )* |
| } |
| |
| impl<const N: usize> StdFloat for Simd<f64, N> |
| where |
| LaneCount<N>: SupportedLaneCount, |
| { |
| #[inline] |
| fn fract(self) -> Self { |
| self - self.trunc() |
| } |
| |
| $( |
| #[inline] |
| fn $fn(self) -> Self { |
| // https://github.com/llvm/llvm-project/issues/83729 |
| #[cfg(target_arch = "aarch64")] |
| { |
| let mut ln = Self::splat(0f64); |
| for i in 0..N { |
| ln[i] = self[i].$fn() |
| } |
| ln |
| } |
| |
| #[cfg(not(target_arch = "aarch64"))] |
| { |
| unsafe { intrinsics::$intrinsic(self) } |
| } |
| } |
| )* |
| } |
| } |
| } |
| |
| impl_float! { |
| sin: simd_fsin, |
| cos: simd_fcos, |
| exp: simd_fexp, |
| exp2: simd_fexp2, |
| ln: simd_flog, |
| log2: simd_flog2, |
| log10: simd_flog10, |
| } |