blob: 25834943f009ba80e9fb665f9ab7ee45e516b172 [file] [log] [blame]
//! Internal `#[repr(simd)]` types
#![allow(non_camel_case_types)]
macro_rules! simd_ty {
($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone)]
pub(crate) struct $id([$elem_type; $len]);
#[allow(clippy::use_self)]
impl $id {
/// A value of this type where all elements are zeroed out.
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
#[inline(always)]
pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
$id([$($param_name),*])
}
#[inline(always)]
pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
$id(elements)
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) fn splat(value: $elem_type) -> Self {
#[derive(Copy, Clone)]
#[repr(simd)]
struct JustOne([$elem_type; 1]);
let one = JustOne([value]);
// SAFETY: 0 is always in-bounds because we're shuffling
// a simd type with exactly one element.
unsafe { simd_shuffle!(one, one, [0; $len]) }
}
/// Extract the element at position `index`.
/// `index` is not a constant so this is not efficient!
/// Use for testing only.
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) fn extract(&self, index: usize) -> $elem_type {
self.as_array()[index]
}
#[inline]
pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
let simd_ptr: *const Self = self;
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}
impl core::cmp::PartialEq for $id {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}
impl core::fmt::Debug for $id {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_simd_finish(f, stringify!($id), self.as_array())
}
}
}
}
macro_rules! simd_m_ty {
($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone)]
pub(crate) struct $id([$elem_type; $len]);
#[allow(clippy::use_self)]
impl $id {
#[inline(always)]
const fn bool_to_internal(x: bool) -> $elem_type {
[0 as $elem_type, !(0 as $elem_type)][x as usize]
}
#[inline(always)]
pub(crate) const fn new($($param_name: bool),*) -> Self {
$id([$(Self::bool_to_internal($param_name)),*])
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) fn splat(value: bool) -> Self {
#[derive(Copy, Clone)]
#[repr(simd)]
struct JustOne([$elem_type; 1]);
let one = JustOne([Self::bool_to_internal(value)]);
// SAFETY: 0 is always in-bounds because we're shuffling
// a simd type with exactly one element.
unsafe { simd_shuffle!(one, one, [0; $len]) }
}
#[inline]
pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
let simd_ptr: *const Self = self;
let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
// SAFETY: We can always read the prefix of a simd type as an array.
// There might be more padding afterwards for some widths, but
// that's not a problem for reading less than that.
unsafe { &*array_ptr }
}
}
impl core::cmp::PartialEq for $id {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_array() == other.as_array()
}
}
impl core::fmt::Debug for $id {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
debug_simd_finish(f, stringify!($id), self.as_array())
}
}
}
}
// 16-bit wide types:
simd_ty!(u8x2[u8;2]: x0, x1);
simd_ty!(i8x2[i8;2]: x0, x1);
// 32-bit wide types:
simd_ty!(u8x4[u8;4]: x0, x1, x2, x3);
simd_ty!(u16x2[u16;2]: x0, x1);
simd_ty!(i8x4[i8;4]: x0, x1, x2, x3);
simd_ty!(i16x2[i16;2]: x0, x1);
// 64-bit wide types:
simd_ty!(
u8x8[u8;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(u16x4[u16;4]: x0, x1, x2, x3);
simd_ty!(u32x2[u32;2]: x0, x1);
simd_ty!(u64x1[u64;1]: x1);
simd_ty!(
i8x8[i8;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(i16x4[i16;4]: x0, x1, x2, x3);
simd_ty!(i32x2[i32;2]: x0, x1);
simd_ty!(i64x1[i64;1]: x1);
simd_ty!(f32x2[f32;2]: x0, x1);
simd_ty!(f64x1[f64;1]: x1);
// 128-bit wide types:
simd_ty!(
u8x16[u8;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
u16x8[u16;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(u32x4[u32;4]: x0, x1, x2, x3);
simd_ty!(u64x2[u64;2]: x0, x1);
simd_ty!(
i8x16[i8;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
i16x8[i16;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(i32x4[i32;4]: x0, x1, x2, x3);
simd_ty!(i64x2[i64;2]: x0, x1);
simd_ty!(f16x4[f16;4]: x0, x1, x2, x3);
simd_ty!(
f16x8[f16;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(f32x4[f32;4]: x0, x1, x2, x3);
simd_ty!(f64x2[f64;2]: x0, x1);
simd_m_ty!(
m8x16[i8;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_m_ty!(
m16x8[i16;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3);
simd_m_ty!(m64x2[i64;2]: x0, x1);
// 256-bit wide types:
simd_ty!(
u8x32[u8;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
u16x16[u16;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
u32x8[u32;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(u64x4[u64;4]: x0, x1, x2, x3);
simd_ty!(
i8x32[i8;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
i16x16[i16;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
i32x8[i32;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(i64x4[i64;4]: x0, x1, x2, x3);
simd_ty!(
f16x16[f16;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
f32x8[f32;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(f64x4[f64;4]: x0, x1, x2, x3);
simd_m_ty!(
m8x32[i8;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_m_ty!(
m16x16[i16;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_m_ty!(
m32x8[i32;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
// 512-bit wide types:
simd_ty!(
i8x64[i8;64]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31,
x32,
x33,
x34,
x35,
x36,
x37,
x38,
x39,
x40,
x41,
x42,
x43,
x44,
x45,
x46,
x47,
x48,
x49,
x50,
x51,
x52,
x53,
x54,
x55,
x56,
x57,
x58,
x59,
x60,
x61,
x62,
x63
);
simd_ty!(
u8x64[u8;64]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31,
x32,
x33,
x34,
x35,
x36,
x37,
x38,
x39,
x40,
x41,
x42,
x43,
x44,
x45,
x46,
x47,
x48,
x49,
x50,
x51,
x52,
x53,
x54,
x55,
x56,
x57,
x58,
x59,
x60,
x61,
x62,
x63
);
simd_ty!(
i16x32[i16;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
u16x32[u16;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
i32x16[i32;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
u32x16[u32;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
f16x32[f16;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
f32x16[f32;16]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
i64x8[i64;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(
u64x8[u64;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(
f64x8[f64;8]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
// 1024-bit wide types:
simd_ty!(
u16x64[u16;64]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31,
x32,
x33,
x34,
x35,
x36,
x37,
x38,
x39,
x40,
x41,
x42,
x43,
x44,
x45,
x46,
x47,
x48,
x49,
x50,
x51,
x52,
x53,
x54,
x55,
x56,
x57,
x58,
x59,
x60,
x61,
x62,
x63
);
simd_ty!(
i32x32[i32;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
u32x32[u32;32]:
x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
/// were before moving to array-based simd.
#[inline]
pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
formatter: &mut crate::fmt::Formatter<'_>,
type_name: &str,
array: &[T; N],
) -> crate::fmt::Result {
crate::fmt::Formatter::debug_tuple_fields_finish(
formatter,
type_name,
&crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i| &array[i]),
)
}