crates/core_arch/src/x86/f16c.rs - rust-lang/stdarch - Git at Google

 //! [F16C intrinsics].
 //!
 //! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769

 use crate::core_arch::{simd::*, x86::*};

 #[cfg(test)]
 use stdarch_test::assert_instr;

 #[allow(improper_ctypes)]
 unsafe extern "unadjusted" {
     #[link_name = "llvm.x86.vcvtph2ps.128"]
     fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
     #[link_name = "llvm.x86.vcvtph2ps.256"]
     fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
     #[link_name = "llvm.x86.vcvtps2ph.128"]
     fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
     #[link_name = "llvm.x86.vcvtps2ph.256"]
     fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
 }

 /// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
 /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
 /// vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps)
 #[inline]
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
 pub fn _mm_cvtph_ps(a: __m128i) -> __m128 {
     unsafe { transmute(llvm_vcvtph2ps_128(transmute(a))) }
 }

 /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
 /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps)
 #[inline]
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
 pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
     unsafe { transmute(llvm_vcvtph2ps_256(transmute(a))) }
 }

 /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
 /// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
 /// vector.
 ///
 /// Rounding is done according to the `imm_rounding` parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph)
 #[inline]
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
 pub fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
     static_assert_uimm_bits!(IMM_ROUNDING, 3);
     unsafe {
         let a = a.as_f32x4();
         let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
         transmute(r)
     }
 }

 /// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
 /// 16-bit half-precision float values stored in a 128-bit wide vector.
 ///
 /// Rounding is done according to the `imm_rounding` parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph)
 #[inline]
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
 pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
     static_assert_uimm_bits!(IMM_ROUNDING, 3);
     unsafe {
         let a = a.as_f32x8();
         let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
         transmute(r)
     }
 }

 #[cfg(test)]
 mod tests {
     use crate::{core_arch::x86::*, mem::transmute};
     use stdarch_test::simd_test;

     const F16_ONE: i16 = 0x3c00;
     const F16_TWO: i16 = 0x4000;
     const F16_THREE: i16 = 0x4200;
     const F16_FOUR: i16 = 0x4400;
     const F16_FIVE: i16 = 0x4500;
     const F16_SIX: i16 = 0x4600;
     const F16_SEVEN: i16 = 0x4700;
     const F16_EIGHT: i16 = 0x4800;

     #[simd_test(enable = "f16c")]
     unsafe fn test_mm_cvtph_ps() {
         let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
         let r = _mm_cvtph_ps(a);
         let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "f16c")]
     unsafe fn test_mm256_cvtph_ps() {
         let a = _mm_set_epi16(
             F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
         );
         let r = _mm256_cvtph_ps(a);
         let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "f16c")]
     unsafe fn test_mm_cvtps_ph() {
         let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "f16c")]
     unsafe fn test_mm256_cvtps_ph() {
         let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm_set_epi16(
             F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
         );
         assert_eq_m128i(r, e);
     }
 }
	//! [F16C intrinsics].
	//!
	//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769

	use crate::core_arch::{simd::, x86::};

	#[cfg(test)]
	use stdarch_test::assert_instr;

	#[allow(improper_ctypes)]
	unsafe extern "unadjusted" {
	#[link_name = "llvm.x86.vcvtph2ps.128"]
	fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
	#[link_name = "llvm.x86.vcvtph2ps.256"]
	fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
	#[link_name = "llvm.x86.vcvtps2ph.128"]
	fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
	#[link_name = "llvm.x86.vcvtps2ph.256"]
	fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
	}

	/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
	/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
	/// vector.
	///
	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps)
	#[inline]
	#[target_feature(enable = "f16c")]
	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
	pub fn _mm_cvtph_ps(a: __m128i) -> __m128 {
	unsafe { transmute(llvm_vcvtph2ps_128(transmute(a))) }
	}

	/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
	/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
	///
	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps)
	#[inline]
	#[target_feature(enable = "f16c")]
	#[cfg_attr(test, assert_instr("vcvtph2ps"))]
	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
	pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
	unsafe { transmute(llvm_vcvtph2ps_256(transmute(a))) }
	}

	/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
	/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
	/// vector.
	///
	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
	///
	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
	///
	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph)
	#[inline]
	#[target_feature(enable = "f16c")]
	#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
	#[rustc_legacy_const_generics(1)]
	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
	pub fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
	static_assert_uimm_bits!(IMM_ROUNDING, 3);
	unsafe {
	let a = a.as_f32x4();
	let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
	transmute(r)
	}
	}

	/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
	/// 16-bit half-precision float values stored in a 128-bit wide vector.
	///
	/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
	///
	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
	///
	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph)
	#[inline]
	#[target_feature(enable = "f16c")]
	#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
	#[rustc_legacy_const_generics(1)]
	#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
	pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
	static_assert_uimm_bits!(IMM_ROUNDING, 3);
	unsafe {
	let a = a.as_f32x8();
	let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
	transmute(r)
	}
	}

	#[cfg(test)]
	mod tests {
	use crate::{core_arch::x86::*, mem::transmute};
	use stdarch_test::simd_test;

	const F16_ONE: i16 = 0x3c00;
	const F16_TWO: i16 = 0x4000;
	const F16_THREE: i16 = 0x4200;
	const F16_FOUR: i16 = 0x4400;
	const F16_FIVE: i16 = 0x4500;
	const F16_SIX: i16 = 0x4600;
	const F16_SEVEN: i16 = 0x4700;
	const F16_EIGHT: i16 = 0x4800;

	#[simd_test(enable = "f16c")]
	unsafe fn test_mm_cvtph_ps() {
	let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
	let r = _mm_cvtph_ps(a);
	let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
	assert_eq_m128(r, e);
	}

	#[simd_test(enable = "f16c")]
	unsafe fn test_mm256_cvtph_ps() {
	let a = _mm_set_epi16(
	F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
	);
	let r = _mm256_cvtph_ps(a);
	let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
	assert_eq_m256(r, e);
	}

	#[simd_test(enable = "f16c")]
	unsafe fn test_mm_cvtps_ph() {
	let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
	let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
	let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
	assert_eq_m128i(r, e);
	}

	#[simd_test(enable = "f16c")]
	unsafe fn test_mm256_cvtps_ph() {
	let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
	let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
	let e = _mm_set_epi16(
	F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
	);
	assert_eq_m128i(r, e);
	}
	}