blob: 4c3dd078e034f512b48daf5f54b0925f2cb455db [file] [edit]
use itertools::Itertools as _;
use crate::common::{
PASSES,
intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind},
};
/// Maximum size of a SVE vector
pub const MAX_SVE_BITS: u32 = 2048;
/// Writes a string defining a static variable with test values used for all intrinsics with
/// arguments of type `ty` to `w`.
///
/// e.g.
/// ```rust,ignore
/// static U8_20: [u8; 20] = [
/// 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xf0,
/// 0x80, 0x3b, 0xff,
/// ];
/// ```
pub fn test_values_array_static<T: IntrinsicTypeDefinition>(
w: &mut impl std::io::Write,
ty: &T,
) -> std::io::Result<()> {
writeln!(
w,
"static {name}: [{ty}; {load_size}] = {values};\n",
name = test_values_array_name(ty),
ty = ty.rust_scalar_type(),
load_size = test_values_array_length(&ty),
values = test_values_array(&ty)
)
}
/// Returns a string with the name of the static variable containing test values for intrinsic
/// arguments of this type.
pub fn test_values_array_name<T: IntrinsicTypeDefinition>(ty: &T) -> String {
format!(
"{ty}_{load_size}",
ty = ty.rust_scalar_type().to_uppercase(),
load_size = test_values_array_length(&ty),
)
}
/// Returns the elements used in the test value arrays in `gen_arg_rust`. Uses the
/// `test_values_array_length` fn to determine the number of values that
/// `test_values_array_static` expects and `ArgumentList::load_values_rust` needs.
///
/// Each value in the array starts as a bit pattern from `bit_pattern_for_test_values_array`
/// which is then printed as a hex value in the generated code (and if identified as a negative
/// value, with the appropriate minus and corrected hex pattern). Calls to `fN::from_bits` are
/// generated for floats.
pub fn test_values_array(ty: &IntrinsicType) -> String {
let (bit_len, kind) = match ty {
IntrinsicType {
kind: TypeKind::Float,
bit_len: Some(bit_len),
..
} => (*bit_len, TypeKind::Float),
IntrinsicType {
kind: TypeKind::Vector,
..
} => (32, TypeKind::Vector),
IntrinsicType {
kind,
bit_len: Some(bit_len),
..
} => (*bit_len, *kind),
_ => unimplemented!(),
};
format!(
"[{}]",
(0..test_values_array_length(ty)).format_with(",", |i, fmt| {
let src = bit_pattern_for_test_values_array(bit_len, i);
assert!(src == 0 || src.ilog2() < bit_len);
match kind {
TypeKind::Float => fmt(&format_args!("f{bit_len}::from_bits({src:#x})")),
TypeKind::Vector | TypeKind::Int(Sign::Signed) if (src >> (bit_len - 1)) != 0 => {
// `src` is a two's complement representation of a negative value.
let mask = !0u64 >> (64 - bit_len);
let ones_compl = src ^ mask;
let twos_compl = ones_compl + 1;
fmt(&format_args!("-{twos_compl:#x}"))
}
_ => fmt(&format_args!("{src:#x}")),
}
})
)
}
/// Returns the number of values that need to be in an array of test values such that there can be
/// `num_loads` distinct windows for a given vector of type `ty`.
///
/// For example, vectors of type `uint32x2x2_t` load four values (`2 x 2`) and so to support
/// `num_loads=10` distinct windows, the total length of the array of test values must be
/// `(2 x 2) + 10 - 1`:
///
/// ```text
/// [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD]
/// ^^^^^^^^^^^^^^^^^^ first window
/// ^^^^^^^^^^^^^^^^^^ second window
/// 10th window ^^^^^^^^^^^^^^^^^^
/// ```
///
/// For scalable vectors (only SVE is currently supported), assume that the length of the vector is
/// the maximum supported by the architecture.
pub fn test_values_array_length(ty: &IntrinsicType) -> u32 {
let IntrinsicType {
simd_len, vec_len, ..
} = ty;
let simd_len = simd_len.map_or(1, |v| {
if let SimdLen::Fixed(n) = v {
n
} else {
MAX_SVE_BITS / ty.inner_size()
}
});
let vec_len = vec_len.unwrap_or(1);
(simd_len * vec_len) + PASSES - 1
}
/// Returns a bit pattern for a value being output into a array of test values. Bit patterns come
/// from one of many constant arrays of test values. The specific constant array used depends on
/// the number of bits - `bits` - of the type having test values generated for it. This function
/// is called repeatedly with incrementing values of `index` to produce an entire array of test
/// values.
///
/// Each constant array of bit patterns should ideally be at least the length of the largest array
/// of test values that will be requested (e.g. 51 for a `poly8x8x4` when `PASSES=20`:
/// `(8 * 4) + 20 - 1`), otherwise values will be repeated.
pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 {
let index = index as usize;
match bits {
bits @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(),
16 => BIT_PATTERNS_16[index % BIT_PATTERNS_16.len()].into(),
32 => BIT_PATTERNS_32[index % BIT_PATTERNS_32.len()].into(),
64 => BIT_PATTERNS_64[index % BIT_PATTERNS_64.len()],
_ => unimplemented!("bit_pattern_for_test_values_array(bits: {bits}, ..)"),
}
}
// Contains every possible 8-bit value in order
pub const BIT_PATTERNS_8: &[u8] = &[
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11,
0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21,
0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31,
0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51,
0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61,
0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71,
0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81,
0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91,
0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1,
0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1,
0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1,
0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1,
0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1,
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
];
#[rustfmt::skip]
pub const BIT_PATTERNS_16: &[u16] = &[
// Simple values:
// 0.0
0x0000,
// The smallest normal value
0x0400,
// The value just below 0.5
0x37ff,
// 0.5
0x3800,
// The value just above 0.5
0x3801,
// The value just below 1.0
0x3bff,
// 1.0
0x3c00,
// The value just above 1.0
0x3c01,
// 1.5
0x3e00,
// 10
0x4900,
// The largest finite value
0x7bff,
// Infinity.
0x7c00,
// NaNs:
// Quiet NaNs
0x7f23,
0x7e00,
// Signalling NaNs
0x7d23,
0x7c01,
// Subnormals:
// A recognisable bit pattern
0x0012,
// The largest subnormal value
0x03ff,
// The smallest subnormal value
0x0001,
// Other values:
// Above values, negated
0x8000, 0x8400, 0xb7ff, 0xb800, 0xb801, 0xbbff, 0xbc00, 0xbc01, 0xbe00, 0xc900, 0xfbff, 0xfc00,
0xff23, 0xfe00, 0xfd23, 0xfc01, 0x8012, 0x83ff, 0x8001,
// Random values
0xfc00, 0xc000, 0x5140, 0x5800, 0x63d2, 0x5630, 0x3560, 0x9191, 0x4178, 0x6212, 0x67d0, 0x3312,
0x4cef, 0x4973, 0x3ecc, 0x5166, 0x4d80, 0x6248, 0x46fd, 0x39c4, 0x39c5, 0x4866, 0x6050, 0x498e,
0x4a0f,
// Previous values in a different order
0x3555, 0xfc00, 0xc000, 0x9191, 0x5140, 0x5800, 0x8001, 0x83ff, 0x63d2, 0x5630, 0x3560, 0x4178,
0x7d23, 0x7c01, 0x0012, 0xb800, 0x03ff, 0x0001, 0x7e00, 0x7f23, 0x8000, 0x8400, 0xb7ff, 0xb801,
0x3312, 0x4cef, 0x4973, 0x39c4, 0x3ecc, 0x5166, 0x67d0, 0x6212, 0x4d80, 0x6248, 0x46fd, 0x39c5,
0xbc01, 0xbe00, 0xc900, 0xfc01, 0xfbff, 0xfc00, 0xbc00, 0xbbff, 0xff23, 0xfe00, 0xfd23, 0x8012,
0x37ff, 0x3800, 0x3801, 0x7bff, 0x3bff, 0x3c00, 0x0400, 0x0000, 0x3c01, 0x3e00, 0x4900, 0x7c00,
0x498e, 0x4a0f, 0x6050, 0x4866,
// Specific values:
// As close to 1/3 as possible.
0x3555,
];
#[rustfmt::skip]
pub const BIT_PATTERNS_32: &[u32] = &[
// Simple values:
// 0.0
0x00000000,
// The smallest normal value
0x00800000,
// The value just below 0.5
0x3effffff,
// 0.5
0x3f000000,
// The value just above 0.5
0x3f000001,
// The value just below 1.0
0x3f7fffff,
// 1.0
0x3f800000,
// The value just above 1.0
0x3f800001,
// 1.5
0x3fc00000,
// 10
0x41200000,
// The largest finite value
0x7f8fffff,
// Infinity
0x7f800000,
// NaNs:
// Quiet NaNs
0x7fd23456,
0x7fc00000,
// Signalling NaNs
0x7f923456,
0x7f800001,
// Subnormals:
// A recognisable bit pattern
0x00123456,
// The largest subnormal value
0x007fffff,
// The smallest subnormal value
0x00000001,
// Other values:
// Above values, negated
0x80000000, 0x80800000, 0xbeffffff, 0xbf000000, 0xbf000001, 0xbf7fffff, 0xbf800000, 0xbf800001,
0xbfc00000, 0xc1200000, 0xff8fffff, 0xff800000, 0xffd23456, 0xffc00000, 0xff923456, 0xff800001,
0x80123456, 0x807fffff, 0x80000001, 0x80123456, 0x807fffff, 0x80000001,
// Random values
0x4205cccd, 0x4229178D, 0x42C6A0C5, 0x3B3302F7, 0x3F9DF45E, 0x41DAA3D7, 0x47C3501D, 0xC3889333,
0xC2C675C3, 0xC69C449A, 0xC341FD71, 0xC502DFD7, 0xBBB43958, 0x3EE24DD3, 0x42B1C28F, 0x42F06666,
0x45D379C3, 0x44637148, 0x3CBBECAB, 0x4113EDFA, 0x444B22F2, 0x1FD93A96, 0x9921055F, 0xFF626925,
// Specific values:
// Approximately Pi
0x40490fdb,
// Approximately 1/3
0x3eaaaaab,
];
#[rustfmt::skip]
pub const BIT_PATTERNS_64: &[u64] = &[
// Simple values:
// 0.0
0x0000000000000000,
// The smallest normal value
0x0010000000000000,
// The value just below 0.5
0x3fdfffffffffffff,
// 0.5
0x3fe0000000000000,
// The value just above 0.5
0x3fe0000000000001,
// The value just below 1.0
0x3fefffffffffffff,
// 1.0
0x3ff0000000000000,
// The value just above 1.0
0x3ff0000000000001,
// 1.5
0x3ff8000000000000,
// 10
0x4024000000000000,
// The largest finite value
0x7fefffffffffffff,
// Infinity
0x7ff0000000000000,
// NaNs:
// Quiet NaNs
0x7ff923456789abcd, 0x7ff8000000000000,
// Signalling NaNs
0x7ff123456789abcd, 0x7ff0000000000000,
// Subnormals:
// A recognisable bit pattern
0x000123456789abcd,
// The largest subnormal value
0x000fffffffffffff,
// The smallest subnormal value
0x0000000000000001,
// Other values:
// Above values, negated
0x8000000000000000, 0x8010000000000000, 0xbfdfffffffffffff, 0xbfe0000000000000,
0xbfe0000000000001, 0xbfefffffffffffff, 0xbff0000000000000, 0xbff0000000000001,
0xbff8000000000000, 0xc024000000000000, 0xffefffffffffffff, 0xfff0000000000000,
0xfff923456789abcd, 0xfff8000000000000, 0xfff123456789abcd, 0xfff0000000000000,
0x800123456789abcd, 0x800fffffffffffff, 0x8000000000000001,
// Specific values:
// Pi
0x400921FB54442D18,
// Approximately 1/3
0x3fd5555555555555,
];