crates/core_arch/src/x86/f16c.rs - rust-lang/stdarch - Git at Google

 //! [F16C intrinsics].
 //!
 //! [F16C intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769

 use crate::{
     core_arch::{simd::*, x86::*},
     hint::unreachable_unchecked,
     mem::transmute,
 };

 #[cfg(test)]
 use stdarch_test::assert_instr;

 #[allow(improper_ctypes)]
 extern "unadjusted" {
     #[link_name = "llvm.x86.vcvtph2ps.128"]
     fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
     #[link_name = "llvm.x86.vcvtph2ps.256"]
     fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
     #[link_name = "llvm.x86.vcvtps2ph.128"]
     fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
     #[link_name = "llvm.x86.vcvtps2ph.256"]
     fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
 }

 /// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
 /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
 /// vector.
 #[inline]
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
     transmute(llvm_vcvtph2ps_128(transmute(a)))
 }

 /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
 /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
 #[inline]
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
     transmute(llvm_vcvtph2ps_256(transmute(a)))
 }

 macro_rules! dispatch_rounding {
     ($rounding:ident, $call:ident) => {{
         match $rounding {
             0 => call!(0),
             1 => call!(1),
             2 => call!(2),
             3 => call!(3),
             4 => call!(4),
             5 => call!(5),
             6 => call!(6),
             7 => call!(7),
             _ => unreachable_unchecked(),
         }
     }};
 }

 /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
 /// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
 /// vector.
 ///
 /// Rounding is done according to the `imm_rounding` parameter, which can be one of:
 ///
 /// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
 /// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions,
 /// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions,
 /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
 #[target_feature(enable = "f16c")]
 #[rustc_args_required_const(1)]
 #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
 pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
     let a = transmute(a);
     macro_rules! call {
         ($rounding:expr) => {
             llvm_vcvtps2ph_128(a, $rounding)
         };
     }
     transmute(dispatch_rounding!(imm_rounding, call))
 }

 /// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
 /// 16-bit half-precision float values stored in a 128-bit wide vector.
 ///
 /// Rounding is done according to the `imm_rounding` parameter, which can be one of:
 ///
 /// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
 /// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions,
 /// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions,
 /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
 #[target_feature(enable = "f16c")]
 #[rustc_args_required_const(1)]
 #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
 pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
     let a = transmute(a);
     macro_rules! call {
         ($rounding:expr) => {
             llvm_vcvtps2ph_256(a, $rounding)
         };
     }
     transmute(dispatch_rounding!(imm_rounding, call))
 }

 #[cfg(test)]
 mod tests {
     use crate::{core_arch::x86::*, mem::transmute};
     use stdarch_test::simd_test;

     #[simd_test(enable = "f16c")]
     unsafe fn test_mm_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32];
         let float_vec: __m128 = transmute(array);
         let halfs: __m128i = _mm_cvtps_ph(float_vec, 0);
         let floats: __m128 = _mm_cvtph_ps(halfs);
         let result: [f32; 4] = transmute(floats);
         assert_eq!(result, array);
     }

     #[simd_test(enable = "f16c")]
     unsafe fn test_mm256_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
         let float_vec: __m256 = transmute(array);
         let halfs: __m128i = _mm256_cvtps_ph(float_vec, 0);
         let floats: __m256 = _mm256_cvtph_ps(halfs);
         let result: [f32; 8] = transmute(floats);
         assert_eq!(result, array);
     }
 }
	//! [F16C intrinsics].
	//!
	//! [F16C intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769

	use crate::{
	core_arch::{simd::, x86::},
	hint::unreachable_unchecked,
	mem::transmute,
	};

	#[cfg(test)]
	use stdarch_test::assert_instr;

	#[allow(improper_ctypes)]
	extern "unadjusted" {
	#[link_name = "llvm.x86.vcvtph2ps.128"]
	fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
	#[link_name = "llvm.x86.vcvtph2ps.256"]
	fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
	#[link_name = "llvm.x86.vcvtps2ph.128"]
	fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
	#[link_name = "llvm.x86.vcvtps2ph.256"]
	fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
	}

	/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
	/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
	/// vector.
	#[inline]
	#[target_feature(enable = "f16c")]
	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
	pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
	transmute(llvm_vcvtph2ps_128(transmute(a)))
	}

	/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
	/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
	#[inline]
	#[target_feature(enable = "f16c")]
	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
	pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
	transmute(llvm_vcvtph2ps_256(transmute(a)))
	}

	macro_rules! dispatch_rounding {
	($rounding:ident, $call:ident) => {{
	match $rounding {
	0 => call!(0),
	1 => call!(1),
	2 => call!(2),
	3 => call!(3),
	4 => call!(4),
	5 => call!(5),
	6 => call!(6),
	7 => call!(7),
	_ => unreachable_unchecked(),
	}
	}};
	}

	/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
	/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
	/// vector.
	///
	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
	///
	/// * `_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
	/// * `_MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC`: round down and suppress exceptions,
	/// * `_MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC`: round up and suppress exceptions,
	/// * `_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
	/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
	#[inline]
	#[target_feature(enable = "f16c")]
	#[rustc_args_required_const(1)]
	#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
	pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
	let a = transmute(a);
	macro_rules! call {
	($rounding:expr) => {
	llvm_vcvtps2ph_128(a, $rounding)
	};
	}
	transmute(dispatch_rounding!(imm_rounding, call))
	}

	/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
	/// 16-bit half-precision float values stored in a 128-bit wide vector.
	///
	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
	///
	/// * `_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions,
	/// * `_MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC`: round down and suppress exceptions,
	/// * `_MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC`: round up and suppress exceptions,
	/// * `_MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
	/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
	#[inline]
	#[target_feature(enable = "f16c")]
	#[rustc_args_required_const(1)]
	#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
	pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
	let a = transmute(a);
	macro_rules! call {
	($rounding:expr) => {
	llvm_vcvtps2ph_256(a, $rounding)
	};
	}
	transmute(dispatch_rounding!(imm_rounding, call))
	}

	#[cfg(test)]
	mod tests {
	use crate::{core_arch::x86::*, mem::transmute};
	use stdarch_test::simd_test;

	#[simd_test(enable = "f16c")]
	unsafe fn test_mm_cvtph_ps() {
	let array = [1_f32, 2_f32, 3_f32, 4_f32];
	let float_vec: __m128 = transmute(array);
	let halfs: __m128i = _mm_cvtps_ph(float_vec, 0);
	let floats: __m128 = _mm_cvtph_ps(halfs);
	let result: [f32; 4] = transmute(floats);
	assert_eq!(result, array);
	}

	#[simd_test(enable = "f16c")]
	unsafe fn test_mm256_cvtph_ps() {
	let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
	let float_vec: __m256 = transmute(array);
	let halfs: __m128i = _mm256_cvtps_ph(float_vec, 0);
	let floats: __m256 = _mm256_cvtph_ps(halfs);
	let result: [f32; 8] = transmute(floats);
	assert_eq!(result, array);
	}
	}