| use crate::{ |
| core_arch::{simd::*, x86::*}, |
| intrinsics::simd::*, |
| mem::transmute, |
| }; |
| |
| // And // |
| |
| /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let and = _mm_and_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, and, src.as_f64x2())) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let and = _mm_and_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, and, f64x2::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let and = _mm256_and_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, and, src.as_f64x4())) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let and = _mm256_and_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, and, f64x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandp))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { |
| unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } |
| } |
| |
| /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let and = _mm512_and_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, and, src.as_f64x8())) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let and = _mm512_and_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, and, f64x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let and = _mm_and_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, and, src.as_f32x4())) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let and = _mm_and_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, and, f32x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let and = _mm256_and_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, and, src.as_f32x8())) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let and = _mm256_and_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, and, f32x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| transmute(simd_and( |
| transmute::<_, u32x16>(a), |
| transmute::<_, u32x16>(b), |
| )) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let and = _mm512_and_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, and, src.as_f32x16())) |
| } |
| } |
| |
| /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let and = _mm512_and_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, and, f32x16::ZERO)) |
| } |
| } |
| |
| // Andnot |
| |
| /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let andnot = _mm_andnot_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, andnot, src.as_f64x2())) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let andnot = _mm_andnot_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, andnot, f64x2::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let andnot = _mm256_andnot_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, andnot, src.as_f64x4())) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let andnot = _mm256_andnot_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, andnot, f64x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandnp))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { |
| unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) } |
| } |
| |
| /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandnpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let andnot = _mm512_andnot_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, andnot, src.as_f64x8())) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandnpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let andnot = _mm512_andnot_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, andnot, f64x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let andnot = _mm_andnot_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, andnot, src.as_f32x4())) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let andnot = _mm_andnot_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, andnot, f32x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let andnot = _mm256_andnot_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, andnot, src.as_f32x8())) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vandnps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let andnot = _mm256_andnot_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, andnot, f32x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandnps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 { |
| unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) } |
| } |
| |
| /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandnps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let andnot = _mm512_andnot_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, andnot, src.as_f32x16())) |
| } |
| } |
| |
| /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then |
| /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vandnps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let andnot = _mm512_andnot_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, andnot, f32x16::ZERO)) |
| } |
| } |
| |
| // Or |
| |
| /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let or = _mm_or_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, or, src.as_f64x2())) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let or = _mm_or_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, or, f64x2::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let or = _mm256_or_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, or, src.as_f64x4())) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let or = _mm256_or_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, or, f64x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vorp))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { |
| unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } |
| } |
| |
| /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let or = _mm512_or_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, or, src.as_f64x8())) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let or = _mm512_or_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, or, f64x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let or = _mm_or_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, or, src.as_f32x4())) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let or = _mm_or_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, or, f32x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let or = _mm256_or_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, or, src.as_f32x8())) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let or = _mm256_or_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, or, f32x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| transmute(simd_or( |
| transmute::<_, u32x16>(a), |
| transmute::<_, u32x16>(b), |
| )) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let or = _mm512_or_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, or, src.as_f32x16())) |
| } |
| } |
| |
| /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let or = _mm512_or_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, or, f32x16::ZERO)) |
| } |
| } |
| |
| // Xor |
| |
| /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let xor = _mm_xor_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, xor, src.as_f64x2())) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| unsafe { |
| let xor = _mm_xor_pd(a, b).as_f64x2(); |
| transmute(simd_select_bitmask(k, xor, f64x2::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let xor = _mm256_xor_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, xor, src.as_f64x4())) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| unsafe { |
| let xor = _mm256_xor_pd(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, xor, f64x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vxorp))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { |
| unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } |
| } |
| |
| /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vxorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let xor = _mm512_xor_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, xor, src.as_f64x8())) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vxorpd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| unsafe { |
| let xor = _mm512_xor_pd(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, xor, f64x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let xor = _mm_xor_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, xor, src.as_f32x4())) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| unsafe { |
| let xor = _mm_xor_ps(a, b).as_f32x4(); |
| transmute(simd_select_bitmask(k, xor, f32x4::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let xor = _mm256_xor_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, xor, src.as_f32x8())) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vxorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| unsafe { |
| let xor = _mm256_xor_ps(a, b).as_f32x8(); |
| transmute(simd_select_bitmask(k, xor, f32x8::ZERO)) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vxorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| transmute(simd_xor( |
| transmute::<_, u32x16>(a), |
| transmute::<_, u32x16>(b), |
| )) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vxorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let xor = _mm512_xor_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, xor, src.as_f32x16())) |
| } |
| } |
| |
| /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and |
| /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vxorps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| unsafe { |
| let xor = _mm512_xor_ps(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, xor, f32x16::ZERO)) |
| } |
| } |
| |
| // Broadcast |
| |
| /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 { |
| unsafe { |
| let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vbroadcastf32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 { |
| unsafe { |
| let b = _mm256_broadcast_f32x2(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x8())) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vbroadcastf32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 { |
| unsafe { |
| let b = _mm256_broadcast_f32x2(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, f32x8::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 { |
| unsafe { |
| let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vbroadcastf32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 { |
| unsafe { |
| let b = _mm512_broadcast_f32x2(a).as_f32x16(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x16())) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vbroadcastf32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 { |
| unsafe { |
| let b = _mm512_broadcast_f32x2(a).as_f32x16(); |
| transmute(simd_select_bitmask(k, b, f32x16::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 { |
| unsafe { |
| let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 { |
| unsafe { |
| let b = _mm512_broadcast_f32x8(a).as_f32x16(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x16())) |
| } |
| } |
| |
| /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all |
| /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 { |
| unsafe { |
| let b = _mm512_broadcast_f32x8(a).as_f32x16(); |
| transmute(simd_select_bitmask(k, b, f32x16::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
| /// elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d { |
| unsafe { |
| let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
| /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { |
| unsafe { |
| let b = _mm256_broadcast_f64x2(a).as_f64x4(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x4())) |
| } |
| } |
| |
| /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
| /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d { |
| unsafe { |
| let b = _mm256_broadcast_f64x2(a).as_f64x4(); |
| transmute(simd_select_bitmask(k, b, f64x4::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
| /// elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d { |
| unsafe { |
| let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
| /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { |
| unsafe { |
| let b = _mm512_broadcast_f64x2(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x8())) |
| } |
| } |
| |
| /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all |
| /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d { |
| unsafe { |
| let b = _mm512_broadcast_f64x2(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, f64x8::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i { |
| unsafe { |
| let a = a.as_i32x4(); |
| let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vbroadcasti32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
| unsafe { |
| let b = _mm_broadcast_i32x2(a).as_i32x4(); |
| transmute(simd_select_bitmask(k, b, src.as_i32x4())) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vbroadcasti32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i { |
| unsafe { |
| let b = _mm_broadcast_i32x2(a).as_i32x4(); |
| transmute(simd_select_bitmask(k, b, i32x4::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i { |
| unsafe { |
| let a = a.as_i32x4(); |
| let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vbroadcasti32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { |
| unsafe { |
| let b = _mm256_broadcast_i32x2(a).as_i32x8(); |
| transmute(simd_select_bitmask(k, b, src.as_i32x8())) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vbroadcasti32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i { |
| unsafe { |
| let b = _mm256_broadcast_i32x2(a).as_i32x8(); |
| transmute(simd_select_bitmask(k, b, i32x8::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i { |
| unsafe { |
| let a = a.as_i32x4(); |
| let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vbroadcasti32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { |
| unsafe { |
| let b = _mm512_broadcast_i32x2(a).as_i32x16(); |
| transmute(simd_select_bitmask(k, b, src.as_i32x16())) |
| } |
| } |
| |
| /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vbroadcasti32x2))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i { |
| unsafe { |
| let b = _mm512_broadcast_i32x2(a).as_i32x16(); |
| transmute(simd_select_bitmask(k, b, i32x16::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i { |
| unsafe { |
| let a = a.as_i32x8(); |
| let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { |
| unsafe { |
| let b = _mm512_broadcast_i32x8(a).as_i32x16(); |
| transmute(simd_select_bitmask(k, b, src.as_i32x16())) |
| } |
| } |
| |
| /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i { |
| unsafe { |
| let b = _mm512_broadcast_i32x8(a).as_i32x16(); |
| transmute(simd_select_bitmask(k, b, i32x16::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i { |
| unsafe { |
| let a = a.as_i64x2(); |
| let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { |
| unsafe { |
| let b = _mm256_broadcast_i64x2(a).as_i64x4(); |
| transmute(simd_select_bitmask(k, b, src.as_i64x4())) |
| } |
| } |
| |
| /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i { |
| unsafe { |
| let b = _mm256_broadcast_i64x2(a).as_i64x4(); |
| transmute(simd_select_bitmask(k, b, i64x4::ZERO)) |
| } |
| } |
| |
| /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i { |
| unsafe { |
| let a = a.as_i64x2(); |
| let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); |
| transmute(b) |
| } |
| } |
| |
| /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { |
| unsafe { |
| let b = _mm512_broadcast_i64x2(a).as_i64x8(); |
| transmute(simd_select_bitmask(k, b, src.as_i64x8())) |
| } |
| } |
| |
| /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i { |
| unsafe { |
| let b = _mm512_broadcast_i64x2(a).as_i64x8(); |
| transmute(simd_select_bitmask(k, b, i64x8::ZERO)) |
| } |
| } |
| |
| // Extract |
| |
| /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| match IMM8 & 1 { |
| 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), |
| _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), |
| } |
| } |
| } |
| |
| /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src |
| /// if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m512) -> __m256 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm512_extractf32x8_ps::<IMM8>(a); |
| transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8())) |
| } |
| } |
| |
| /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm512_extractf32x8_ps::<IMM8>(a); |
| transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO)) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| match IMM8 & 1 { |
| 0 => simd_shuffle!(a, a, [0, 1]), |
| _ => simd_shuffle!(a, a, [2, 3]), |
| } |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src |
| /// if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_extractf64x2_pd<const IMM8: i32>( |
| src: __m128d, |
| k: __mmask8, |
| a: __m256d, |
| ) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm256_extractf64x2_pd::<IMM8>(a); |
| transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2())) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm256_extractf64x2_pd::<IMM8>(a); |
| transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO)) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| match IMM8 & 3 { |
| 0 => simd_shuffle!(a, a, [0, 1]), |
| 1 => simd_shuffle!(a, a, [2, 3]), |
| 2 => simd_shuffle!(a, a, [4, 5]), |
| _ => simd_shuffle!(a, a, [6, 7]), |
| } |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src |
| /// if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_extractf64x2_pd<const IMM8: i32>( |
| src: __m128d, |
| k: __mmask8, |
| a: __m512d, |
| ) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x2())) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, |
| /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2(); |
| transmute(simd_select_bitmask(k, b, f64x2::ZERO)) |
| } |
| } |
| |
| /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let a = a.as_i32x16(); |
| let b: i32x8 = match IMM8 & 1 { |
| 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), |
| _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), |
| }; |
| transmute(b) |
| } |
| } |
| |
| /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>( |
| src: __m256i, |
| k: __mmask8, |
| a: __m512i, |
| ) -> __m256i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8(); |
| transmute(simd_select_bitmask(k, b, src.as_i32x8())) |
| } |
| } |
| |
| /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8(); |
| transmute(simd_select_bitmask(k, b, i32x8::ZERO)) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let a = a.as_i64x4(); |
| match IMM8 & 1 { |
| 0 => simd_shuffle!(a, a, [0, 1]), |
| _ => simd_shuffle!(a, a, [2, 3]), |
| } |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>( |
| src: __m128i, |
| k: __mmask8, |
| a: __m256i, |
| ) -> __m128i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
| transmute(simd_select_bitmask(k, b, src.as_i64x2())) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
| transmute(simd_select_bitmask(k, b, i64x2::ZERO)) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let a = a.as_i64x8(); |
| match IMM8 & 3 { |
| 0 => simd_shuffle!(a, a, [0, 1]), |
| 1 => simd_shuffle!(a, a, [2, 3]), |
| 2 => simd_shuffle!(a, a, [4, 5]), |
| _ => simd_shuffle!(a, a, [6, 7]), |
| } |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>( |
| src: __m128i, |
| k: __mmask8, |
| a: __m512i, |
| ) -> __m128i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
| transmute(simd_select_bitmask(k, b, src.as_i64x2())) |
| } |
| } |
| |
| /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores |
| /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2(); |
| transmute(simd_select_bitmask(k, b, i64x2::ZERO)) |
| } |
| } |
| |
| // Insert |
| |
| /// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point |
| /// elements) from b into dst at the location specified by IMM8. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm512_castps256_ps512(b); |
| match IMM8 & 1 { |
| 0 => { |
| simd_shuffle!( |
| a, |
| b, |
| [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] |
| ) |
| } |
| _ => { |
| simd_shuffle!( |
| a, |
| b, |
| [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] |
| ) |
| } |
| } |
| } |
| } |
| |
| /// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point |
| /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_insertf32x8<const IMM8: i32>( |
| src: __m512, |
| k: __mmask16, |
| a: __m512, |
| b: __m256, |
| ) -> __m512 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm512_insertf32x8::<IMM8>(a, b); |
| transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16())) |
| } |
| } |
| |
| /// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point |
| /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_insertf32x8<const IMM8: i32>(k: __mmask16, a: __m512, b: __m256) -> __m512 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16(); |
| transmute(simd_select_bitmask(k, c, f32x16::ZERO)) |
| } |
| } |
| |
| /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
| /// elements) from b into dst at the location specified by IMM8. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let b = _mm256_castpd128_pd256(b); |
| match IMM8 & 1 { |
| 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), |
| _ => simd_shuffle!(a, b, [0, 1, 4, 5]), |
| } |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
| /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_insertf64x2<const IMM8: i32>( |
| src: __m256d, |
| k: __mmask8, |
| a: __m256d, |
| b: __m128d, |
| ) -> __m256d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm256_insertf64x2::<IMM8>(a, b); |
| transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4())) |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
| /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m128d) -> __m256d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4(); |
| transmute(simd_select_bitmask(k, c, f64x4::ZERO)) |
| } |
| } |
| |
| /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
| /// elements) from b into dst at the location specified by IMM8. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let b = _mm512_castpd128_pd512(b); |
| match IMM8 & 3 { |
| 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), |
| 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), |
| 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), |
| _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), |
| } |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
| /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k |
| /// (elements are copied from src if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_insertf64x2<const IMM8: i32>( |
| src: __m512d, |
| k: __mmask8, |
| a: __m512d, |
| b: __m128d, |
| ) -> __m512d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let c = _mm512_insertf64x2::<IMM8>(a, b); |
| transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8())) |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point |
| /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k |
| /// (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m128d) -> __m512d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let c = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8(); |
| transmute(simd_select_bitmask(k, c, f64x8::ZERO)) |
| } |
| } |
| |
| /// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the |
| /// location specified by IMM8. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let a = a.as_i32x16(); |
| let b = _mm512_castsi256_si512(b).as_i32x16(); |
| let r: i32x16 = match IMM8 & 1 { |
| 0 => { |
| simd_shuffle!( |
| a, |
| b, |
| [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] |
| ) |
| } |
| _ => { |
| simd_shuffle!( |
| a, |
| b, |
| [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] |
| ) |
| } |
| }; |
| transmute(r) |
| } |
| } |
| |
| /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the |
| /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if |
| /// the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_inserti32x8<const IMM8: i32>( |
| src: __m512i, |
| k: __mmask16, |
| a: __m512i, |
| b: __m256i, |
| ) -> __m512i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm512_inserti32x8::<IMM8>(a, b); |
| transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16())) |
| } |
| } |
| |
| /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the |
| /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_inserti32x8<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m256i) -> __m512i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16(); |
| transmute(simd_select_bitmask(k, c, i32x16::ZERO)) |
| } |
| } |
| |
| /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the |
| /// location specified by IMM8. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let a = a.as_i64x4(); |
| let b = _mm256_castsi128_si256(b).as_i64x4(); |
| match IMM8 & 1 { |
| 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), |
| _ => simd_shuffle!(a, b, [0, 1, 4, 5]), |
| } |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
| /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if |
| /// the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_inserti64x2<const IMM8: i32>( |
| src: __m256i, |
| k: __mmask8, |
| a: __m256i, |
| b: __m128i, |
| ) -> __m256i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm256_inserti64x2::<IMM8>(a, b); |
| transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4())) |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
| /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 1); |
| let c = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4(); |
| transmute(simd_select_bitmask(k, c, i64x4::ZERO)) |
| } |
| } |
| |
| /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the |
| /// location specified by IMM8. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let a = a.as_i64x8(); |
| let b = _mm512_castsi128_si512(b).as_i64x8(); |
| match IMM8 & 3 { |
| 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), |
| 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), |
| 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), |
| _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), |
| } |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
| /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if |
| /// the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_inserti64x2<const IMM8: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m512i, |
| b: __m128i, |
| ) -> __m512i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let c = _mm512_inserti64x2::<IMM8>(a, b); |
| transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8())) |
| } |
| } |
| |
| /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the |
| /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m128i) -> __m512i { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 2); |
| let c = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8(); |
| transmute(simd_select_bitmask(k, c, i64x8::ZERO)) |
| } |
| } |
| |
| // Convert |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>( |
| src: __m512d, |
| k: __mmask8, |
| a: __m512i, |
| ) -> __m512d { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x8())) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, f64x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d { |
| unsafe { transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { |
| unsafe { |
| let b = _mm_cvtepi64_pd(a).as_f64x2(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x2())) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d { |
| unsafe { |
| let b = _mm_cvtepi64_pd(a).as_f64x2(); |
| transmute(simd_select_bitmask(k, b, f64x2::ZERO)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d { |
| unsafe { transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { |
| unsafe { |
| let b = _mm256_cvtepi64_pd(a).as_f64x4(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x4())) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d { |
| unsafe { |
| let b = _mm256_cvtepi64_pd(a).as_f64x4(); |
| transmute(simd_select_bitmask(k, b, f64x4::ZERO)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d { |
| unsafe { transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { |
| unsafe { |
| let b = _mm512_cvtepi64_pd(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x8())) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d { |
| unsafe { |
| let b = _mm512_cvtepi64_pd(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, f64x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>( |
| src: __m256, |
| k: __mmask8, |
| a: __m512i, |
| ) -> __m256 { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x8())) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, f32x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 { |
| _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a) |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { |
| unsafe { transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 { |
| _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a) |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 { |
| unsafe { transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { |
| unsafe { |
| let b = _mm256_cvtepi64_ps(a).as_f32x4(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x4())) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 { |
| unsafe { |
| let b = _mm256_cvtepi64_ps(a).as_f32x4(); |
| transmute(simd_select_bitmask(k, b, f32x4::ZERO)) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 { |
| unsafe { transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { |
| unsafe { |
| let b = _mm512_cvtepi64_ps(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x8())) |
| } |
| } |
| |
| /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 { |
| unsafe { |
| let b = _mm512_cvtepi64_ps(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, f32x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>( |
| src: __m512d, |
| k: __mmask8, |
| a: __m512i, |
| ) -> __m512d { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x8())) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, f64x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d { |
| unsafe { transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { |
| unsafe { |
| let b = _mm_cvtepu64_pd(a).as_f64x2(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x2())) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d { |
| unsafe { |
| let b = _mm_cvtepu64_pd(a).as_f64x2(); |
| transmute(simd_select_bitmask(k, b, f64x2::ZERO)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d { |
| unsafe { transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { |
| unsafe { |
| let b = _mm256_cvtepu64_pd(a).as_f64x4(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x4())) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d { |
| unsafe { |
| let b = _mm256_cvtepu64_pd(a).as_f64x4(); |
| transmute(simd_select_bitmask(k, b, f64x4::ZERO)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d { |
| unsafe { transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { |
| unsafe { |
| let b = _mm512_cvtepu64_pd(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f64x8())) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2pd))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d { |
| unsafe { |
| let b = _mm512_cvtepu64_pd(a).as_f64x8(); |
| transmute(simd_select_bitmask(k, b, f64x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>( |
| src: __m256, |
| k: __mmask8, |
| a: __m512i, |
| ) -> __m256 { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x8())) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, f32x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 { |
| _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a) |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { |
| unsafe { transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 { |
| _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a) |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 { |
| unsafe { transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { |
| unsafe { |
| let b = _mm256_cvtepu64_ps(a).as_f32x4(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x4())) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 { |
| unsafe { |
| let b = _mm256_cvtepu64_ps(a).as_f32x4(); |
| transmute(simd_select_bitmask(k, b, f32x4::ZERO)) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 { |
| unsafe { transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { |
| unsafe { |
| let b = _mm512_cvtepu64_ps(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, src.as_f32x8())) |
| } |
| } |
| |
| /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtuqq2ps))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 { |
| unsafe { |
| let b = _mm512_cvtepu64_ps(a).as_f32x8(); |
| transmute(simd_select_bitmask(k, b, f32x8::ZERO)) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m512d, |
| ) -> __m512i { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING)) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i { |
| _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
| unsafe { transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i { |
| _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i { |
| _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
| unsafe { transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i { |
| _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i { |
| _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
| unsafe { |
| transmute(vcvtpd2qq_512( |
| a.as_f64x8(), |
| src.as_i64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i { |
| _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m256, |
| ) -> __m512i { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING)) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtps_epi64(a: __m128) -> __m128i { |
| _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
| unsafe { transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i { |
| _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i { |
| _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
| unsafe { transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i { |
| _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i { |
| _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
| unsafe { |
| transmute(vcvtps2qq_512( |
| a.as_f32x8(), |
| src.as_i64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i { |
| _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m512d, |
| ) -> __m512i { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING)) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i { |
| _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
| unsafe { transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i { |
| _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i { |
| _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
| unsafe { transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i { |
| _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i { |
| _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
| unsafe { |
| transmute(vcvtpd2uqq_512( |
| a.as_f64x8(), |
| src.as_u64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i { |
| _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m256, |
| ) -> __m512i { |
| unsafe { |
| static_assert_rounding!(ROUNDING); |
| transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING)) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// Rounding is done according to the ROUNDING parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions |
| /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions |
| /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions |
| /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i { |
| static_assert_rounding!(ROUNDING); |
| _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvtps_epu64(a: __m128) -> __m128i { |
| _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
| unsafe { transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i { |
| _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i { |
| _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
| unsafe { transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i { |
| _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i { |
| _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is |
| /// not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
| unsafe { |
| transmute(vcvtps2uqq_512( |
| a.as_f32x8(), |
| src.as_u64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, |
| /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvtps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i { |
| _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
| /// to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m512d, |
| ) -> __m512i { |
| unsafe { |
| static_assert_sae!(SAE); |
| transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE)) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i { |
| _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
| unsafe { transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i { |
| _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i { |
| _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
| unsafe { transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i { |
| _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i { |
| _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
| unsafe { |
| transmute(vcvttpd2qq_512( |
| a.as_f64x8(), |
| src.as_i64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i { |
| _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
| /// to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m256, |
| ) -> __m512i { |
| unsafe { |
| static_assert_sae!(SAE); |
| transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE)) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvttps_epi64(a: __m128) -> __m128i { |
| _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
| unsafe { transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i { |
| _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i { |
| _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
| unsafe { transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i { |
| _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i { |
| _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
| unsafe { |
| transmute(vcvttps2qq_512( |
| a.as_f32x8(), |
| src.as_i64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2qq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i { |
| _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
| /// to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m512d, |
| ) -> __m512i { |
| unsafe { |
| static_assert_sae!(SAE); |
| transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE)) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i { |
| _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { |
| unsafe { transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i { |
| _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i { |
| _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { |
| unsafe { transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i { |
| _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i { |
| _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { |
| unsafe { |
| transmute(vcvttpd2uqq_512( |
| a.as_f64x8(), |
| src.as_u64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttpd2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i { |
| _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC |
| /// to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>( |
| src: __m512i, |
| k: __mmask8, |
| a: __m256, |
| ) -> __m512i { |
| unsafe { |
| static_assert_sae!(SAE); |
| transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE)) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i { |
| static_assert_sae!(SAE); |
| _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_setzero_si512(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_cvttps_epu64(a: __m128) -> __m128i { |
| _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { |
| unsafe { transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i { |
| _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i { |
| _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { |
| unsafe { transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i { |
| _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i { |
| _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a) |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using writemask k (elements are copied from src if the |
| /// corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { |
| unsafe { |
| transmute(vcvttps2uqq_512( |
| a.as_f32x8(), |
| src.as_u64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers |
| /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding |
| /// bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vcvttps2uqq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i { |
| _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a) |
| } |
| |
| // Multiply-Low |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst`. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i { |
| unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from |
| /// `src` if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
| unsafe { |
| let b = _mm_mullo_epi64(a, b).as_i64x2(); |
| transmute(simd_select_bitmask(k, b, src.as_i64x2())) |
| } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if |
| /// the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { |
| unsafe { |
| let b = _mm_mullo_epi64(a, b).as_i64x2(); |
| transmute(simd_select_bitmask(k, b, i64x2::ZERO)) |
| } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst`. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i { |
| unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from |
| /// `src` if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
| unsafe { |
| let b = _mm256_mullo_epi64(a, b).as_i64x4(); |
| transmute(simd_select_bitmask(k, b, src.as_i64x4())) |
| } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if |
| /// the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { |
| unsafe { |
| let b = _mm256_mullo_epi64(a, b).as_i64x4(); |
| transmute(simd_select_bitmask(k, b, i64x4::ZERO)) |
| } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst`. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i { |
| unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from |
| /// `src` if the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { |
| unsafe { |
| let b = _mm512_mullo_epi64(a, b).as_i64x8(); |
| transmute(simd_select_bitmask(k, b, src.as_i64x8())) |
| } |
| } |
| |
| /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store |
| /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if |
| /// the corresponding bit is not set). |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vpmullq))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { |
| unsafe { |
| let b = _mm512_mullo_epi64(a, b).as_i64x8(); |
| transmute(simd_select_bitmask(k, b, i64x8::ZERO)) |
| } |
| } |
| |
| // Mask Registers |
| |
| /// Convert 8-bit mask a to a 32-bit integer value and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _cvtmask8_u32(a: __mmask8) -> u32 { |
| a as u32 |
| } |
| |
| /// Convert 32-bit integer value a to an 8-bit mask and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _cvtu32_mask8(a: u32) -> __mmask8 { |
| a as __mmask8 |
| } |
| |
| /// Add 16-bit masks a and b, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { |
| a + b |
| } |
| |
| /// Add 8-bit masks a and b, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
| a + b |
| } |
| |
| /// Bitwise AND of 8-bit masks a and b, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
| a & b |
| } |
| |
| /// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
| _knot_mask8(a) & b |
| } |
| |
| /// Bitwise NOT of 8-bit mask a, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _knot_mask8(a: __mmask8) -> __mmask8 { |
| a ^ 0b11111111 |
| } |
| |
| /// Bitwise OR of 8-bit masks a and b, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
| a | b |
| } |
| |
| /// Bitwise XNOR of 8-bit masks a and b, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
| _knot_mask8(_kxor_mask8(a, b)) |
| } |
| |
| /// Bitwise XOR of 8-bit masks a and b, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { |
| a ^ b |
| } |
| |
| /// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise |
| /// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 { |
| let tmp = _kor_mask8(a, b); |
| *all_ones = (tmp == 0xff) as u8; |
| (tmp == 0) as u8 |
| } |
| |
| /// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise |
| /// store 0 in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
| (_kor_mask8(a, b) == 0xff) as u8 |
| } |
| |
| /// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise |
| /// store 0 in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
| (_kor_mask8(a, b) == 0) as u8 |
| } |
| |
| /// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 { |
| a << COUNT |
| } |
| |
| /// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 { |
| a >> COUNT |
| } |
| |
| /// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst, |
| /// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all |
| /// zeros, store 1 in and_not, otherwise store 0 in and_not. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 { |
| *and_not = (_kandn_mask16(a, b) == 0) as u8; |
| (_kand_mask16(a, b) == 0) as u8 |
| } |
| |
| /// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst, |
| /// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all |
| /// zeros, store 1 in and_not, otherwise store 0 in and_not. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 { |
| *and_not = (_kandn_mask8(a, b) == 0) as u8; |
| (_kand_mask8(a, b) == 0) as u8 |
| } |
| |
| /// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all |
| /// zeros, store 1 in dst, otherwise store 0 in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { |
| (_kandn_mask16(a, b) == 0) as u8 |
| } |
| |
| /// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all |
| /// zeros, store 1 in dst, otherwise store 0 in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
| (_kandn_mask8(a, b) == 0) as u8 |
| } |
| |
| /// Compute the bitwise AND of 16-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise |
| /// store 0 in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { |
| (_kand_mask16(a, b) == 0) as u8 |
| } |
| |
| /// Compute the bitwise AND of 8-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise |
| /// store 0 in dst. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { |
| (_kand_mask8(a, b) == 0) as u8 |
| } |
| |
| /// Load 8-bit mask from memory |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 { |
| *mem_addr |
| } |
| |
| /// Store 8-bit mask to memory |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) { |
| *mem_addr = a; |
| } |
| |
| /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit |
| /// integer in a. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 { |
| let zero = _mm_setzero_si128(); |
| _mm_cmplt_epi32_mask(a, zero) |
| } |
| |
| /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit |
| /// integer in a. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 { |
| let zero = _mm256_setzero_si256(); |
| _mm256_cmplt_epi32_mask(a, zero) |
| } |
| |
| /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit |
| /// integer in a. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 { |
| let zero = _mm512_setzero_si512(); |
| _mm512_cmplt_epi32_mask(a, zero) |
| } |
| |
| /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit |
| /// integer in a. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 { |
| let zero = _mm_setzero_si128(); |
| _mm_cmplt_epi64_mask(a, zero) |
| } |
| |
| /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit |
| /// integer in a. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 { |
| let zero = _mm256_setzero_si256(); |
| _mm256_cmplt_epi64_mask(a, zero) |
| } |
| |
| /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit |
| /// integer in a. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 { |
| let zero = _mm512_setzero_si512(); |
| _mm512_cmplt_epi64_mask(a, zero) |
| } |
| |
| /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding |
| /// bit in k. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmovm2d))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_movm_epi32(k: __mmask8) -> __m128i { |
| let ones = _mm_set1_epi32(-1); |
| _mm_maskz_mov_epi32(k, ones) |
| } |
| |
| /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding |
| /// bit in k. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmovm2d))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i { |
| let ones = _mm256_set1_epi32(-1); |
| _mm256_maskz_mov_epi32(k, ones) |
| } |
| |
| /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding |
| /// bit in k. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vpmovm2d))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i { |
| let ones = _mm512_set1_epi32(-1); |
| _mm512_maskz_mov_epi32(k, ones) |
| } |
| |
| /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding |
| /// bit in k. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmovm2q))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_movm_epi64(k: __mmask8) -> __m128i { |
| let ones = _mm_set1_epi64x(-1); |
| _mm_maskz_mov_epi64(k, ones) |
| } |
| |
| /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding |
| /// bit in k. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpmovm2q))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i { |
| let ones = _mm256_set1_epi64x(-1); |
| _mm256_maskz_mov_epi64(k, ones) |
| } |
| |
| /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding |
| /// bit in k. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vpmovm2q))] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i { |
| let ones = _mm512_set1_epi64(-1); |
| _mm512_maskz_mov_epi64(k, ones) |
| } |
| |
| // Range |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(4, 5)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>( |
| src: __m512d, |
| k: __mmask8, |
| a: __m512d, |
| b: __m512d, |
| ) -> __m512d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| transmute(vrangepd_512( |
| a.as_f64x8(), |
| b.as_f64x8(), |
| IMM8, |
| src.as_f64x8(), |
| k, |
| SAE, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>( |
| k: __mmask8, |
| a: __m512d, |
| b: __m512d, |
| ) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_range_pd<const IMM8: i32>( |
| src: __m128d, |
| k: __mmask8, |
| a: __m128d, |
| b: __m128d, |
| ) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangepd_128( |
| a.as_f64x2(), |
| b.as_f64x2(), |
| IMM8, |
| src.as_f64x2(), |
| k, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_range_pd<const IMM8: i32>( |
| src: __m256d, |
| k: __mmask8, |
| a: __m256d, |
| b: __m256d, |
| ) -> __m256d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangepd_256( |
| a.as_f64x4(), |
| b.as_f64x4(), |
| IMM8, |
| src.as_f64x4(), |
| k, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_range_pd<const IMM8: i32>( |
| src: __m512d, |
| k: __mmask8, |
| a: __m512d, |
| b: __m512d, |
| ) -> __m512d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangepd_512( |
| a.as_f64x8(), |
| b.as_f64x8(), |
| IMM8, |
| src.as_f64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), 0xffff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(4, 5)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>( |
| src: __m512, |
| k: __mmask16, |
| a: __m512, |
| b: __m512, |
| ) -> __m512 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| transmute(vrangeps_512( |
| a.as_f32x16(), |
| b.as_f32x16(), |
| IMM8, |
| src.as_f32x16(), |
| k, |
| SAE, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>( |
| k: __mmask16, |
| a: __m512, |
| b: __m512, |
| ) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_range_ps<const IMM8: i32>( |
| src: __m128, |
| k: __mmask8, |
| a: __m128, |
| b: __m128, |
| ) -> __m128 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangeps_128( |
| a.as_f32x4(), |
| b.as_f32x4(), |
| IMM8, |
| src.as_f32x4(), |
| k, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_range_ps<const IMM8: i32>( |
| src: __m256, |
| k: __mmask8, |
| a: __m256, |
| b: __m256, |
| ) -> __m256 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangeps_256( |
| a.as_f32x8(), |
| b.as_f32x8(), |
| IMM8, |
| src.as_f32x8(), |
| k, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), 0xffff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_range_ps<const IMM8: i32>( |
| src: __m512, |
| k: __mmask16, |
| a: __m512, |
| b: __m512, |
| ) -> __m512 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangeps_512( |
| a.as_f32x16(), |
| b.as_f32x16(), |
| IMM8, |
| src.as_f32x16(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed |
| /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using |
| /// zeromask k (elements are zeroed out if the corresponding mask bit is not set). |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst, and copy the upper element from a to the upper element of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
| /// upper element from a to the upper element of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(4, 5)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>( |
| src: __m128d, |
| k: __mmask8, |
| a: __m128d, |
| b: __m128d, |
| ) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| transmute(vrangesd( |
| a.as_f64x2(), |
| b.as_f64x2(), |
| src.as_f64x2(), |
| k, |
| IMM8, |
| SAE, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
| /// element from a to the upper element of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>( |
| k: __mmask8, |
| a: __m128d, |
| b: __m128d, |
| ) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
| /// upper element from a to the upper element of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_range_sd<const IMM8: i32>( |
| src: __m128d, |
| k: __mmask8, |
| a: __m128d, |
| b: __m128d, |
| ) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangesd( |
| a.as_f64x2(), |
| b.as_f64x2(), |
| src.as_f64x2(), |
| k, |
| IMM8, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
| /// element from a to the upper element of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm_mask_range_sd::<IMM8>(_mm_setzero_pd(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst, and copy the upper 3 packed elements from a to the upper elements of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), 0xff, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
| /// upper 3 packed elements from a to the upper elements of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(4, 5)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>( |
| src: __m128, |
| k: __mmask8, |
| a: __m128, |
| b: __m128, |
| ) -> __m128 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| transmute(vrangess( |
| a.as_f32x4(), |
| b.as_f32x4(), |
| src.as_f32x4(), |
| k, |
| IMM8, |
| SAE, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
| /// 3 packed elements from a to the upper elements of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>( |
| k: __mmask8, |
| a: __m128, |
| b: __m128, |
| ) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 4); |
| static_assert_sae!(SAE); |
| _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b) |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the |
| /// upper 3 packed elements from a to the upper elements of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_range_ss<const IMM8: i32>( |
| src: __m128, |
| k: __mmask8, |
| a: __m128, |
| b: __m128, |
| ) -> __m128 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 4); |
| transmute(vrangess( |
| a.as_f32x4(), |
| b.as_f32x4(), |
| src.as_f32x4(), |
| k, |
| IMM8, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower |
| /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element |
| /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper |
| /// 3 packed elements from a to the upper elements of dst. |
| /// Lower 2 bits of IMM8 specifies the operation control: |
| /// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max. |
| /// Upper 2 bits of IMM8 specifies the sign control: |
| /// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 4); |
| _mm_mask_range_ss::<IMM8>(_mm_setzero_ps(), k, a, b) |
| } |
| |
| // Reduce |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(1, 2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_undefined_pd(), 0xff, a) |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>( |
| src: __m512d, |
| k: __mmask8, |
| a: __m512d, |
| ) -> __m512d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE)) |
| } |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>( |
| k: __mmask8, |
| a: __m512d, |
| ) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a) |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_pd::<IMM8>(_mm_undefined_pd(), 0xff, a) |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k)) |
| } |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_pd::<IMM8>(_mm_setzero_pd(), k, a) |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm256_mask_reduce_pd::<IMM8>(_mm256_undefined_pd(), 0xff, a) |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k)) |
| } |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm256_mask_reduce_pd::<IMM8>(_mm256_setzero_pd(), k, a) |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm512_mask_reduce_pd::<IMM8>(_mm512_undefined_pd(), 0xff, a) |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreducepd_512( |
| a.as_f64x8(), |
| IMM8, |
| src.as_f64x8(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm512_mask_reduce_pd::<IMM8>(_mm512_setzero_pd(), k, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(1, 2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_undefined_ps(), 0xffff, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>( |
| src: __m512, |
| k: __mmask16, |
| a: __m512, |
| ) -> __m512 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE)) |
| } |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>( |
| k: __mmask16, |
| a: __m512, |
| ) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_ps::<IMM8>(_mm_undefined_ps(), 0xff, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k)) |
| } |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_ps::<IMM8>(_mm_setzero_ps(), k, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm256_mask_reduce_ps::<IMM8>(_mm256_undefined_ps(), 0xff, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k)) |
| } |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm256_mask_reduce_ps::<IMM8>(_mm256_setzero_ps(), k, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm512_mask_reduce_ps::<IMM8>(_mm512_undefined_ps(), 0xffff, a) |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are |
| /// copied from src to dst if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreduceps_512( |
| a.as_f32x16(), |
| IMM8, |
| src.as_f32x16(), |
| k, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by |
| /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are |
| /// zeroed out if the corresponding mask bit is not set). |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm512_mask_reduce_ps::<IMM8>(_mm512_setzero_ps(), k, a) |
| } |
| |
| /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy |
| /// the upper element from a to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_undefined_pd(), 0xff, a, b) |
| } |
| |
| /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
| /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(4, 5)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>( |
| src: __m128d, |
| k: __mmask8, |
| a: __m128d, |
| b: __m128d, |
| ) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| transmute(vreducesd( |
| a.as_f64x2(), |
| b.as_f64x2(), |
| src.as_f64x2(), |
| k, |
| IMM8, |
| SAE, |
| )) |
| } |
| } |
| |
| /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
| /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>( |
| k: __mmask8, |
| a: __m128d, |
| b: __m128d, |
| ) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b) |
| } |
| |
| /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using, and |
| /// copy the upper element from a. |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_sd::<IMM8>(_mm_undefined_pd(), 0xff, a, b) |
| } |
| |
| /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
| /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_reduce_sd<const IMM8: i32>( |
| src: __m128d, |
| k: __mmask8, |
| a: __m128d, |
| b: __m128d, |
| ) -> __m128d { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreducesd( |
| a.as_f64x2(), |
| b.as_f64x2(), |
| src.as_f64x2(), |
| k, |
| IMM8, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
| /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_sd::<IMM8>(_mm_setzero_pd(), k, a, b) |
| } |
| |
| /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy |
| /// the upper element from a. |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(2, 3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_undefined_ps(), 0xff, a, b) |
| } |
| |
| /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
| /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a. |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(4, 5)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>( |
| src: __m128, |
| k: __mmask8, |
| a: __m128, |
| b: __m128, |
| ) -> __m128 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| transmute(vreducess( |
| a.as_f32x4(), |
| b.as_f32x4(), |
| src.as_f32x4(), |
| k, |
| IMM8, |
| SAE, |
| )) |
| } |
| } |
| |
| /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
| /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a. |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] |
| #[rustc_legacy_const_generics(3, 4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>( |
| k: __mmask8, |
| a: __m128, |
| b: __m128, |
| ) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 8); |
| static_assert_sae!(SAE); |
| _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b) |
| } |
| |
| /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy |
| /// the upper element from a. |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_ss::<IMM8>(_mm_undefined_ps(), 0xff, a, b) |
| } |
| |
| /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask |
| /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a. |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] |
| #[rustc_legacy_const_generics(4)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_reduce_ss<const IMM8: i32>( |
| src: __m128, |
| k: __mmask8, |
| a: __m128, |
| b: __m128, |
| ) -> __m128 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vreducess( |
| a.as_f32x4(), |
| b.as_f32x4(), |
| src.as_f32x4(), |
| k, |
| IMM8, |
| _MM_FROUND_CUR_DIRECTION, |
| )) |
| } |
| } |
| |
| /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b |
| /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask |
| /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a. |
| /// to the upper element of dst. |
| /// Rounding is done according to the imm8 parameter, which can be one of: |
| /// |
| /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest |
| /// * [`_MM_FROUND_TO_NEG_INF`] : round down |
| /// * [`_MM_FROUND_TO_POS_INF`] : round up |
| /// * [`_MM_FROUND_TO_ZERO`] : truncate |
| /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] |
| #[rustc_legacy_const_generics(3)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_reduce_ss::<IMM8>(_mm_setzero_ps(), k, a, b) |
| } |
| |
| // FP-Class |
| |
| /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_fpclass_pd_mask::<IMM8>(0xff, a) |
| } |
| |
| /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1)) |
| } |
| } |
| |
| /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm256_mask_fpclass_pd_mask::<IMM8>(0xff, a) |
| } |
| |
| /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1)) |
| } |
| } |
| |
| /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm512_mask_fpclass_pd_mask::<IMM8>(0xff, a) |
| } |
| |
| /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1)) |
| } |
| } |
| |
| /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_fpclass_ps_mask::<IMM8>(0xff, a) |
| } |
| |
| /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1)) |
| } |
| } |
| |
| /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm256_mask_fpclass_ps_mask::<IMM8>(0xff, a) |
| } |
| |
| /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508) |
| #[inline] |
| #[target_feature(enable = "avx512dq,avx512vl")] |
| #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1)) |
| } |
| } |
| |
| /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm512_mask_fpclass_ps_mask::<IMM8>(0xffff, a) |
| } |
| |
| /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1)) |
| } |
| } |
| |
| /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_fpclass_sd_mask::<IMM8>(0xff, a) |
| } |
| |
| /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| vfpclasssd(a.as_f64x2(), IMM8, k1) |
| } |
| } |
| |
| /// Test the lower single-precision (32-bit) floating-point element in a for special categories specified |
| /// by imm8, and store the results in mask vector k. |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 { |
| static_assert_uimm_bits!(IMM8, 8); |
| _mm_mask_fpclass_ss_mask::<IMM8>(0xff, a) |
| } |
| |
| /// Test the lower single-precision (32-bit) floating-point element in a for special categories specified |
| /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the |
| /// corresponding mask bit is not set). |
| /// imm can be a combination of: |
| /// |
| /// - 0x01 // QNaN |
| /// - 0x02 // Positive Zero |
| /// - 0x04 // Negative Zero |
| /// - 0x08 // Positive Infinity |
| /// - 0x10 // Negative Infinity |
| /// - 0x20 // Denormal |
| /// - 0x40 // Negative |
| /// - 0x80 // SNaN |
| /// |
| /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516) |
| #[inline] |
| #[target_feature(enable = "avx512dq")] |
| #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "stdarch_x86_avx512", since = "1.89")] |
| pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 { |
| unsafe { |
| static_assert_uimm_bits!(IMM8, 8); |
| vfpclassss(a.as_f32x4(), IMM8, k1) |
| } |
| } |
| |
| #[allow(improper_ctypes)] |
| unsafe extern "C" { |
| #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"] |
| fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2; |
| #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"] |
| fn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4; |
| #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"] |
| fn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"] |
| fn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4; |
| #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"] |
| fn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4; |
| #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"] |
| fn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8; |
| |
| #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"] |
| fn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2; |
| #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"] |
| fn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4; |
| #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"] |
| fn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"] |
| fn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4; |
| #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"] |
| fn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4; |
| #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"] |
| fn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"] |
| fn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"] |
| fn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"] |
| fn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"] |
| fn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"] |
| fn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"] |
| fn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"] |
| fn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"] |
| fn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"] |
| fn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"] |
| fn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"] |
| fn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"] |
| fn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"] |
| fn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"] |
| fn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"] |
| fn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"] |
| fn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"] |
| fn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"] |
| fn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"] |
| fn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"] |
| fn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"] |
| fn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"] |
| fn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2; |
| #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"] |
| fn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4; |
| #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"] |
| fn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.range.pd.128"] |
| fn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2; |
| #[link_name = "llvm.x86.avx512.mask.range.pd.256"] |
| fn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4; |
| #[link_name = "llvm.x86.avx512.mask.range.pd.512"] |
| fn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.range.ps.128"] |
| fn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4; |
| #[link_name = "llvm.x86.avx512.mask.range.ps.256"] |
| fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8; |
| #[link_name = "llvm.x86.avx512.mask.range.ps.512"] |
| fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) |
| -> f32x16; |
| |
| #[link_name = "llvm.x86.avx512.mask.range.sd"] |
| fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2; |
| #[link_name = "llvm.x86.avx512.mask.range.ss"] |
| fn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4; |
| |
| #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"] |
| fn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2; |
| #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"] |
| fn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4; |
| #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"] |
| fn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8; |
| |
| #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"] |
| fn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4; |
| #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"] |
| fn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8; |
| #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"] |
| fn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16; |
| |
| #[link_name = "llvm.x86.avx512.mask.reduce.sd"] |
| fn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2; |
| #[link_name = "llvm.x86.avx512.mask.reduce.ss"] |
| fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4; |
| |
| #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"] |
| fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; |
| #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"] |
| fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8; |
| #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"] |
| fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8; |
| |
| #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"] |
| fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8; |
| #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"] |
| fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8; |
| #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"] |
| fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16; |
| |
| #[link_name = "llvm.x86.avx512.mask.fpclass.sd"] |
| fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8; |
| #[link_name = "llvm.x86.avx512.mask.fpclass.ss"] |
| fn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8; |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| use stdarch_test::simd_test; |
| |
| use crate::core_arch::x86::*; |
| use crate::mem::transmute; |
| |
| const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) }; |
| const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) }; |
| |
| const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) }; |
| const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) }; |
| const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) }; |
| const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) }; |
| |
| const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) }; |
| const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) }; |
| |
| const AND_32: f32 = unsafe { transmute(0x11111111_u32) }; |
| const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) }; |
| const OR_32: f32 = unsafe { transmute(0x77777777_u32) }; |
| const XOR_32: f32 = unsafe { transmute(0x66666666_u32) }; |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_and_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let src = _mm_set_pd(1., 2.); |
| let r = _mm_mask_and_pd(src, 0b01, a, b); |
| let e = _mm_set_pd(1., AND_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_and_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let r = _mm_maskz_and_pd(0b01, a, b); |
| let e = _mm_set_pd(0.0, AND_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_and_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let src = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_mask_and_pd(src, 0b0101, a, b); |
| let e = _mm256_set_pd(1., AND_64, 3., AND_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_and_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let r = _mm256_maskz_and_pd(0b0101, a, b); |
| let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_and_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_and_pd(a, b); |
| let e = _mm512_set1_pd(AND_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_and_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_mask_and_pd(src, 0b01010101, a, b); |
| let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_and_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_maskz_and_pd(0b01010101, a, b); |
| let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_and_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let src = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_mask_and_ps(src, 0b0101, a, b); |
| let e = _mm_set_ps(1., AND_32, 3., AND_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_and_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let r = _mm_maskz_and_ps(0b0101, a, b); |
| let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_and_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm256_mask_and_ps(src, 0b01010101, a, b); |
| let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_and_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let r = _mm256_maskz_and_ps(0b01010101, a, b); |
| let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_and_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_and_ps(a, b); |
| let e = _mm512_set1_ps(AND_32); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_and_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let src = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32, |
| 15., AND_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_and_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_maskz_and_ps(0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., |
| AND_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_andnot_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let src = _mm_set_pd(1., 2.); |
| let r = _mm_mask_andnot_pd(src, 0b01, a, b); |
| let e = _mm_set_pd(1., ANDN_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_andnot_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let r = _mm_maskz_andnot_pd(0b01, a, b); |
| let e = _mm_set_pd(0.0, ANDN_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_andnot_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let src = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_mask_andnot_pd(src, 0b0101, a, b); |
| let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_andnot_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let r = _mm256_maskz_andnot_pd(0b0101, a, b); |
| let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_andnot_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_andnot_pd(a, b); |
| let e = _mm512_set1_pd(ANDN_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_andnot_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b); |
| let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_andnot_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_maskz_andnot_pd(0b01010101, a, b); |
| let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_andnot_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let src = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_mask_andnot_ps(src, 0b0101, a, b); |
| let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_andnot_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let r = _mm_maskz_andnot_ps(0b0101, a, b); |
| let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_andnot_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b); |
| let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_andnot_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let r = _mm256_maskz_andnot_ps(0b01010101, a, b); |
| let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_andnot_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_andnot_ps(a, b); |
| let e = _mm512_set1_ps(ANDN_32); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_andnot_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let src = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13., |
| ANDN_32, 15., ANDN_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_andnot_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., |
| ANDN_32, 0., ANDN_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_or_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let src = _mm_set_pd(1., 2.); |
| let r = _mm_mask_or_pd(src, 0b01, a, b); |
| let e = _mm_set_pd(1., OR_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_or_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let r = _mm_maskz_or_pd(0b01, a, b); |
| let e = _mm_set_pd(0.0, OR_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_or_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let src = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_mask_or_pd(src, 0b0101, a, b); |
| let e = _mm256_set_pd(1., OR_64, 3., OR_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_or_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let r = _mm256_maskz_or_pd(0b0101, a, b); |
| let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_or_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_or_pd(a, b); |
| let e = _mm512_set1_pd(OR_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_or_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_mask_or_pd(src, 0b01010101, a, b); |
| let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_or_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_maskz_or_pd(0b01010101, a, b); |
| let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_or_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let src = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_mask_or_ps(src, 0b0101, a, b); |
| let e = _mm_set_ps(1., OR_32, 3., OR_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_or_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let r = _mm_maskz_or_ps(0b0101, a, b); |
| let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_or_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm256_mask_or_ps(src, 0b01010101, a, b); |
| let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_or_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let r = _mm256_maskz_or_ps(0b01010101, a, b); |
| let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_or_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_or_ps(a, b); |
| let e = _mm512_set1_ps(OR_32); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_or_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let src = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15., |
| OR_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_or_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_maskz_or_ps(0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_xor_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let src = _mm_set_pd(1., 2.); |
| let r = _mm_mask_xor_pd(src, 0b01, a, b); |
| let e = _mm_set_pd(1., XOR_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_xor_pd() { |
| let a = _mm_set1_pd(OPRND1_64); |
| let b = _mm_set1_pd(OPRND2_64); |
| let r = _mm_maskz_xor_pd(0b01, a, b); |
| let e = _mm_set_pd(0.0, XOR_64); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_xor_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let src = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_mask_xor_pd(src, 0b0101, a, b); |
| let e = _mm256_set_pd(1., XOR_64, 3., XOR_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_xor_pd() { |
| let a = _mm256_set1_pd(OPRND1_64); |
| let b = _mm256_set1_pd(OPRND2_64); |
| let r = _mm256_maskz_xor_pd(0b0101, a, b); |
| let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_xor_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_xor_pd(a, b); |
| let e = _mm512_set1_pd(XOR_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_xor_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_mask_xor_pd(src, 0b01010101, a, b); |
| let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_xor_pd() { |
| let a = _mm512_set1_pd(OPRND1_64); |
| let b = _mm512_set1_pd(OPRND2_64); |
| let r = _mm512_maskz_xor_pd(0b01010101, a, b); |
| let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_xor_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let src = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_mask_xor_ps(src, 0b0101, a, b); |
| let e = _mm_set_ps(1., XOR_32, 3., XOR_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_xor_ps() { |
| let a = _mm_set1_ps(OPRND1_32); |
| let b = _mm_set1_ps(OPRND2_32); |
| let r = _mm_maskz_xor_ps(0b0101, a, b); |
| let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_xor_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm256_mask_xor_ps(src, 0b01010101, a, b); |
| let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_xor_ps() { |
| let a = _mm256_set1_ps(OPRND1_32); |
| let b = _mm256_set1_ps(OPRND2_32); |
| let r = _mm256_maskz_xor_ps(0b01010101, a, b); |
| let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_xor_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_xor_ps(a, b); |
| let e = _mm512_set1_ps(XOR_32); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_xor_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let src = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32, |
| 15., XOR_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_xor_ps() { |
| let a = _mm512_set1_ps(OPRND1_32); |
| let b = _mm512_set1_ps(OPRND2_32); |
| let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b); |
| let e = _mm512_set_ps( |
| 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., |
| XOR_32, |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_broadcast_f32x2() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_broadcast_f32x2(a); |
| let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_broadcast_f32x2() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.); |
| let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a); |
| let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_broadcast_f32x2() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_maskz_broadcast_f32x2(0b01101001, a); |
| let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_broadcast_f32x2() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm512_broadcast_f32x2(a); |
| let e = _mm512_set_ps( |
| 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_broadcast_f32x2() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm512_set_ps( |
| 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., |
| ); |
| let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a); |
| let e = _mm512_set_ps( |
| 5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_broadcast_f32x2() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a); |
| let e = _mm512_set_ps( |
| 0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_broadcast_f32x8() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_broadcast_f32x8(a); |
| let e = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_broadcast_f32x8() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_ps( |
| 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., |
| ); |
| let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a); |
| let e = _mm512_set_ps( |
| 9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_broadcast_f32x8() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a); |
| let e = _mm512_set_ps( |
| 0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_broadcast_f64x2() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm256_broadcast_f64x2(a); |
| let e = _mm256_set_pd(1., 2., 1., 2.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_broadcast_f64x2() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm256_set_pd(3., 4., 5., 6.); |
| let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a); |
| let e = _mm256_set_pd(3., 2., 1., 6.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_broadcast_f64x2() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm256_maskz_broadcast_f64x2(0b0110, a); |
| let e = _mm256_set_pd(0., 2., 1., 0.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_broadcast_f64x2() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm512_broadcast_f64x2(a); |
| let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_broadcast_f64x2() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); |
| let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a); |
| let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_broadcast_f64x2() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm512_maskz_broadcast_f64x2(0b01101001, a); |
| let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let r = _mm_broadcast_i32x2(a); |
| let e = _mm_set_epi32(3, 4, 3, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let b = _mm_set_epi32(5, 6, 7, 8); |
| let r = _mm_mask_broadcast_i32x2(b, 0b0110, a); |
| let e = _mm_set_epi32(5, 4, 3, 8); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let r = _mm_maskz_broadcast_i32x2(0b0110, a); |
| let e = _mm_set_epi32(0, 4, 3, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let r = _mm256_broadcast_i32x2(a); |
| let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12); |
| let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a); |
| let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let r = _mm256_maskz_broadcast_i32x2(0b01101001, a); |
| let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let r = _mm512_broadcast_i32x2(a); |
| let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); |
| let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a); |
| let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_broadcast_i32x2() { |
| let a = _mm_set_epi32(1, 2, 3, 4); |
| let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a); |
| let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_broadcast_i32x8() { |
| let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_broadcast_i32x8(a); |
| let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_broadcast_i32x8() { |
| let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_epi32( |
| 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, |
| ); |
| let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a); |
| let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_broadcast_i32x8() { |
| let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a); |
| let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_broadcast_i64x2() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm256_broadcast_i64x2(a); |
| let e = _mm256_set_epi64x(1, 2, 1, 2); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_broadcast_i64x2() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm256_set_epi64x(3, 4, 5, 6); |
| let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a); |
| let e = _mm256_set_epi64x(3, 2, 1, 6); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_broadcast_i64x2() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm256_maskz_broadcast_i64x2(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 1, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_broadcast_i64x2() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm512_broadcast_i64x2(a); |
| let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_broadcast_i64x2() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10); |
| let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a); |
| let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_broadcast_i64x2() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm512_maskz_broadcast_i64x2(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_extractf32x8_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let r = _mm512_extractf32x8_ps::<1>(a); |
| let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_extractf32x8_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
| let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a); |
| let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_extractf32x8_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a); |
| let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_extractf64x2_pd() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_extractf64x2_pd::<1>(a); |
| let e = _mm_set_pd(1., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_extractf64x2_pd() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm_set_pd(5., 6.); |
| let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a); |
| let e = _mm_set_pd(5., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_extractf64x2_pd() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a); |
| let e = _mm_set_pd(0., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_extractf64x2_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_extractf64x2_pd::<2>(a); |
| let e = _mm_set_pd(3., 4.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_extractf64x2_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm_set_pd(9., 10.); |
| let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a); |
| let e = _mm_set_pd(9., 4.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_extractf64x2_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a); |
| let e = _mm_set_pd(0., 4.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_extracti32x8_epi32() { |
| let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_extracti32x8_epi32::<1>(a); |
| let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_extracti32x8_epi32() { |
| let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
| let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a); |
| let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_extracti32x8_epi32() { |
| let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a); |
| let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_extracti64x2_epi64() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_extracti64x2_epi64::<1>(a); |
| let e = _mm_set_epi64x(1, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_extracti64x2_epi64() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm_set_epi64x(5, 6); |
| let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a); |
| let e = _mm_set_epi64x(5, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_extracti64x2_epi64() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a); |
| let e = _mm_set_epi64x(0, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_extracti64x2_epi64() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_extracti64x2_epi64::<2>(a); |
| let e = _mm_set_epi64x(3, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_extracti64x2_epi64() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm_set_epi64x(9, 10); |
| let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a); |
| let e = _mm_set_epi64x(9, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_extracti64x2_epi64() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a); |
| let e = _mm_set_epi64x(0, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_insertf32x8() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
| let r = _mm512_insertf32x8::<1>(a, b); |
| let e = _mm512_set_ps( |
| 17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_insertf32x8() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
| let src = _mm512_set_ps( |
| 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., |
| ); |
| let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b); |
| let e = _mm512_set_ps( |
| 25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_insertf32x8() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.); |
| let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b); |
| let e = _mm512_set_ps( |
| 0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_insertf64x2() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm_set_pd(5., 6.); |
| let r = _mm256_insertf64x2::<1>(a, b); |
| let e = _mm256_set_pd(5., 6., 3., 4.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_insertf64x2() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm_set_pd(5., 6.); |
| let src = _mm256_set_pd(7., 8., 9., 10.); |
| let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b); |
| let e = _mm256_set_pd(7., 6., 3., 10.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_insertf64x2() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm_set_pd(5., 6.); |
| let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b); |
| let e = _mm256_set_pd(0., 6., 3., 0.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_insertf64x2() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm_set_pd(9., 10.); |
| let r = _mm512_insertf64x2::<2>(a, b); |
| let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_insertf64x2() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm_set_pd(9., 10.); |
| let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.); |
| let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b); |
| let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_insertf64x2() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm_set_pd(9., 10.); |
| let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b); |
| let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_inserti32x8() { |
| let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
| let r = _mm512_inserti32x8::<1>(a, b); |
| let e = _mm512_set_epi32( |
| 17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_inserti32x8() { |
| let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
| let src = _mm512_set_epi32( |
| 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
| ); |
| let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b); |
| let e = _mm512_set_epi32( |
| 25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_inserti32x8() { |
| let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); |
| let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24); |
| let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b); |
| let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_inserti64x2() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm_set_epi64x(5, 6); |
| let r = _mm256_inserti64x2::<1>(a, b); |
| let e = _mm256_set_epi64x(5, 6, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_inserti64x2() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm_set_epi64x(5, 6); |
| let src = _mm256_set_epi64x(7, 8, 9, 10); |
| let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b); |
| let e = _mm256_set_epi64x(7, 6, 3, 10); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_inserti64x2() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm_set_epi64x(5, 6); |
| let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b); |
| let e = _mm256_set_epi64x(0, 6, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_inserti64x2() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm_set_epi64x(9, 10); |
| let r = _mm512_inserti64x2::<2>(a, b); |
| let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_inserti64x2() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm_set_epi64x(9, 10); |
| let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18); |
| let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b); |
| let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_inserti64x2() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm_set_epi64x(9, 10); |
| let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b); |
| let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundepi64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundepi64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundepi64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtepi64_pd() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_cvtepi64_pd(a); |
| let e = _mm_set_pd(1., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtepi64_pd() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm_set_pd(3., 4.); |
| let r = _mm_mask_cvtepi64_pd(b, 0b01, a); |
| let e = _mm_set_pd(3., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtepi64_pd() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_maskz_cvtepi64_pd(0b01, a); |
| let e = _mm_set_pd(0., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtepi64_pd() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_cvtepi64_pd(a); |
| let e = _mm256_set_pd(1., 2., 3., 4.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtepi64_pd() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm256_set_pd(5., 6., 7., 8.); |
| let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a); |
| let e = _mm256_set_pd(5., 2., 3., 8.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtepi64_pd() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_maskz_cvtepi64_pd(0b0110, a); |
| let e = _mm256_set_pd(0., 2., 3., 0.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtepi64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvtepi64_pd(a); |
| let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtepi64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a); |
| let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtepi64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvtepi64_pd(0b01101001, a); |
| let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundepi64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundepi64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundepi64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtepi64_ps() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_cvtepi64_ps(a); |
| let e = _mm_set_ps(0., 0., 1., 2.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtepi64_ps() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm_set_ps(3., 4., 5., 6.); |
| let r = _mm_mask_cvtepi64_ps(b, 0b01, a); |
| let e = _mm_set_ps(0., 0., 5., 2.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtepi64_ps() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_maskz_cvtepi64_ps(0b01, a); |
| let e = _mm_set_ps(0., 0., 0., 2.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtepi64_ps() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_cvtepi64_ps(a); |
| let e = _mm_set_ps(1., 2., 3., 4.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtepi64_ps() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm_set_ps(5., 6., 7., 8.); |
| let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a); |
| let e = _mm_set_ps(5., 2., 3., 8.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtepi64_ps() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_maskz_cvtepi64_ps(0b0110, a); |
| let e = _mm_set_ps(0., 2., 3., 0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtepi64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvtepi64_ps(a); |
| let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtepi64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a); |
| let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtepi64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvtepi64_ps(0b01101001, a); |
| let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundepu64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundepu64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundepu64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtepu64_pd() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_cvtepu64_pd(a); |
| let e = _mm_set_pd(1., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtepu64_pd() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm_set_pd(3., 4.); |
| let r = _mm_mask_cvtepu64_pd(b, 0b01, a); |
| let e = _mm_set_pd(3., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtepu64_pd() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_maskz_cvtepu64_pd(0b01, a); |
| let e = _mm_set_pd(0., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtepu64_pd() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_cvtepu64_pd(a); |
| let e = _mm256_set_pd(1., 2., 3., 4.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtepu64_pd() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm256_set_pd(5., 6., 7., 8.); |
| let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a); |
| let e = _mm256_set_pd(5., 2., 3., 8.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtepu64_pd() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_maskz_cvtepu64_pd(0b0110, a); |
| let e = _mm256_set_pd(0., 2., 3., 0.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtepu64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvtepu64_pd(a); |
| let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtepu64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a); |
| let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtepu64_pd() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvtepu64_pd(0b01101001, a); |
| let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundepu64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundepu64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundepu64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtepu64_ps() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_cvtepu64_ps(a); |
| let e = _mm_set_ps(0., 0., 1., 2.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtepu64_ps() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm_set_ps(3., 4., 5., 6.); |
| let r = _mm_mask_cvtepu64_ps(b, 0b01, a); |
| let e = _mm_set_ps(0., 0., 5., 2.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtepu64_ps() { |
| let a = _mm_set_epi64x(1, 2); |
| let r = _mm_maskz_cvtepu64_ps(0b01, a); |
| let e = _mm_set_ps(0., 0., 0., 2.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtepu64_ps() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_cvtepu64_ps(a); |
| let e = _mm_set_ps(1., 2., 3., 4.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtepu64_ps() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm_set_ps(5., 6., 7., 8.); |
| let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a); |
| let e = _mm_set_ps(5., 2., 3., 8.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtepu64_ps() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let r = _mm256_maskz_cvtepu64_ps(0b0110, a); |
| let e = _mm_set_ps(0., 2., 3., 0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtepu64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_cvtepu64_ps(a); |
| let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtepu64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a); |
| let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtepu64_ps() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let r = _mm512_maskz_cvtepu64_ps(0b01101001, a); |
| let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtpd_epi64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_cvtpd_epi64(a); |
| let e = _mm_set_epi64x(1, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtpd_epi64() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_epi64x(3, 4); |
| let r = _mm_mask_cvtpd_epi64(b, 0b01, a); |
| let e = _mm_set_epi64x(3, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtpd_epi64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_maskz_cvtpd_epi64(0b01, a); |
| let e = _mm_set_epi64x(0, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtpd_epi64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_cvtpd_epi64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtpd_epi64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtpd_epi64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvtpd_epi64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtpd_epi64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtpd_epi64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_cvtps_epi64(a); |
| let e = _mm_set_epi64x(3, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_epi64x(5, 6); |
| let r = _mm_mask_cvtps_epi64(b, 0b01, a); |
| let e = _mm_set_epi64x(5, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_maskz_cvtps_epi64(0b01, a); |
| let e = _mm_set_epi64x(0, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_cvtps_epi64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvtps_epi64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvtps_epi64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtps_epi64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtps_epi64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtpd_epu64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_cvtpd_epu64(a); |
| let e = _mm_set_epi64x(1, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtpd_epu64() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_epi64x(3, 4); |
| let r = _mm_mask_cvtpd_epu64(b, 0b01, a); |
| let e = _mm_set_epi64x(3, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtpd_epu64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_maskz_cvtpd_epu64(0b01, a); |
| let e = _mm_set_epi64x(0, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtpd_epu64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_cvtpd_epu64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtpd_epu64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtpd_epu64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvtpd_epu64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtpd_epu64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtpd_epu64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvt_roundps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvt_roundps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| b, 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvt_roundps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( |
| 0b01101001, a, |
| ); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvtps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_cvtps_epu64(a); |
| let e = _mm_set_epi64x(3, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvtps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_epi64x(5, 6); |
| let r = _mm_mask_cvtps_epu64(b, 0b01, a); |
| let e = _mm_set_epi64x(5, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvtps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_maskz_cvtps_epu64(0b01, a); |
| let e = _mm_set_epi64x(0, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvtps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_cvtps_epu64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvtps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvtps_epu64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvtps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvtps_epu64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtps_epu64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtps_epu64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtt_roundpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtt_roundpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvttpd_epi64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_cvttpd_epi64(a); |
| let e = _mm_set_epi64x(1, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvttpd_epi64() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_epi64x(3, 4); |
| let r = _mm_mask_cvttpd_epi64(b, 0b01, a); |
| let e = _mm_set_epi64x(3, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvttpd_epi64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_maskz_cvttpd_epi64(0b01, a); |
| let e = _mm_set_epi64x(0, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvttpd_epi64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_cvttpd_epi64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvttpd_epi64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvttpd_epi64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvttpd_epi64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvttpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvttpd_epi64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvttpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvttpd_epi64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvttpd_epi64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtt_roundps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtt_roundps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtt_roundps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvttps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_cvttps_epi64(a); |
| let e = _mm_set_epi64x(3, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvttps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_epi64x(5, 6); |
| let r = _mm_mask_cvttps_epi64(b, 0b01, a); |
| let e = _mm_set_epi64x(5, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvttps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_maskz_cvttps_epi64(0b01, a); |
| let e = _mm_set_epi64x(0, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvttps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_cvttps_epi64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvttps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvttps_epi64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvttps_epi64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvttps_epi64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvttps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvttps_epi64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvttps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvttps_epi64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvttps_epi64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtt_roundpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtt_roundpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvttpd_epu64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_cvttpd_epu64(a); |
| let e = _mm_set_epi64x(1, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvttpd_epu64() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_epi64x(3, 4); |
| let r = _mm_mask_cvttpd_epu64(b, 0b01, a); |
| let e = _mm_set_epi64x(3, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvttpd_epu64() { |
| let a = _mm_set_pd(1., 2.); |
| let r = _mm_maskz_cvttpd_epu64(0b01, a); |
| let e = _mm_set_epi64x(0, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvttpd_epu64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_cvttpd_epu64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvttpd_epu64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvttpd_epu64() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvttpd_epu64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvttpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvttpd_epu64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvttpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvttpd_epu64() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvttpd_epu64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvtt_roundps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvtt_roundps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvtt_roundps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_cvttps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_cvttps_epu64(a); |
| let e = _mm_set_epi64x(3, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_cvttps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_epi64x(5, 6); |
| let r = _mm_mask_cvttps_epu64(b, 0b01, a); |
| let e = _mm_set_epi64x(5, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_cvttps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm_maskz_cvttps_epu64(0b01, a); |
| let e = _mm_set_epi64x(0, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_cvttps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_cvttps_epu64(a); |
| let e = _mm256_set_epi64x(1, 2, 3, 4); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_cvttps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mask_cvttps_epu64(b, 0b0110, a); |
| let e = _mm256_set_epi64x(5, 2, 3, 8); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_cvttps_epu64() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let r = _mm256_maskz_cvttps_epu64(0b0110, a); |
| let e = _mm256_set_epi64x(0, 2, 3, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_cvttps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_cvttps_epu64(a); |
| let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_cvttps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a); |
| let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_cvttps_epu64() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let r = _mm512_maskz_cvttps_epu64(0b01101001, a); |
| let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mullo_epi64() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm_set_epi64x(3, 4); |
| let r = _mm_mullo_epi64(a, b); |
| let e = _mm_set_epi64x(3, 8); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_mullo_epi64() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm_set_epi64x(3, 4); |
| let c = _mm_set_epi64x(5, 6); |
| let r = _mm_mask_mullo_epi64(c, 0b01, a, b); |
| let e = _mm_set_epi64x(5, 8); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_mullo_epi64() { |
| let a = _mm_set_epi64x(1, 2); |
| let b = _mm_set_epi64x(3, 4); |
| let r = _mm_maskz_mullo_epi64(0b01, a, b); |
| let e = _mm_set_epi64x(0, 8); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mullo_epi64() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_mullo_epi64(a, b); |
| let e = _mm256_set_epi64x(5, 12, 21, 32); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_mullo_epi64() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let c = _mm256_set_epi64x(9, 10, 11, 12); |
| let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b); |
| let e = _mm256_set_epi64x(9, 12, 21, 12); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_mullo_epi64() { |
| let a = _mm256_set_epi64x(1, 2, 3, 4); |
| let b = _mm256_set_epi64x(5, 6, 7, 8); |
| let r = _mm256_maskz_mullo_epi64(0b0110, a, b); |
| let e = _mm256_set_epi64x(0, 12, 21, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mullo_epi64() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_mullo_epi64(a, b); |
| let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_mullo_epi64() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); |
| let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b); |
| let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_mullo_epi64() { |
| let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm512_maskz_mullo_epi64(0b01101001, a, b); |
| let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_cvtmask8_u32() { |
| let a: __mmask8 = 0b01101001; |
| let r = _cvtmask8_u32(a); |
| let e: u32 = 0b01101001; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_cvtu32_mask8() { |
| let a: u32 = 0b01101001; |
| let r = _cvtu32_mask8(a); |
| let e: __mmask8 = 0b01101001; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kadd_mask16() { |
| let a: __mmask16 = 27549; |
| let b: __mmask16 = 23434; |
| let r = _kadd_mask16(a, b); |
| let e: __mmask16 = 50983; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kadd_mask8() { |
| let a: __mmask8 = 98; |
| let b: __mmask8 = 117; |
| let r = _kadd_mask8(a, b); |
| let e: __mmask8 = 215; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kand_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110011; |
| let r = _kand_mask8(a, b); |
| let e: __mmask8 = 0b00100001; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kandn_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110011; |
| let r = _kandn_mask8(a, b); |
| let e: __mmask8 = 0b10010010; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_knot_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let r = _knot_mask8(a); |
| let e: __mmask8 = 0b10010110; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kor_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110011; |
| let r = _kor_mask8(a, b); |
| let e: __mmask8 = 0b11111011; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kxnor_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110011; |
| let r = _kxnor_mask8(a, b); |
| let e: __mmask8 = 0b00100101; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kxor_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110011; |
| let r = _kxor_mask8(a, b); |
| let e: __mmask8 = 0b11011010; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kortest_mask8_u8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110110; |
| let mut all_ones: u8 = 0; |
| let r = _kortest_mask8_u8(a, b, &mut all_ones); |
| assert_eq!(r, 0); |
| assert_eq!(all_ones, 1); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kortestc_mask8_u8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110110; |
| let r = _kortestc_mask8_u8(a, b); |
| assert_eq!(r, 1); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kortestz_mask8_u8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10110110; |
| let r = _kortestz_mask8_u8(a, b); |
| assert_eq!(r, 0); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kshiftli_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let r = _kshiftli_mask8::<3>(a); |
| let e: __mmask8 = 0b01001000; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_kshiftri_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let r = _kshiftri_mask8::<3>(a); |
| let e: __mmask8 = 0b00001101; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_ktest_mask8_u8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10010110; |
| let mut and_not: u8 = 0; |
| let r = _ktest_mask8_u8(a, b, &mut and_not); |
| assert_eq!(r, 1); |
| assert_eq!(and_not, 0); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_ktestc_mask8_u8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10010110; |
| let r = _ktestc_mask8_u8(a, b); |
| assert_eq!(r, 0); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_ktestz_mask8_u8() { |
| let a: __mmask8 = 0b01101001; |
| let b: __mmask8 = 0b10010110; |
| let r = _ktestz_mask8_u8(a, b); |
| assert_eq!(r, 1); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_ktest_mask16_u8() { |
| let a: __mmask16 = 0b0110100100111100; |
| let b: __mmask16 = 0b1001011011000011; |
| let mut and_not: u8 = 0; |
| let r = _ktest_mask16_u8(a, b, &mut and_not); |
| assert_eq!(r, 1); |
| assert_eq!(and_not, 0); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_ktestc_mask16_u8() { |
| let a: __mmask16 = 0b0110100100111100; |
| let b: __mmask16 = 0b1001011011000011; |
| let r = _ktestc_mask16_u8(a, b); |
| assert_eq!(r, 0); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_ktestz_mask16_u8() { |
| let a: __mmask16 = 0b0110100100111100; |
| let b: __mmask16 = 0b1001011011000011; |
| let r = _ktestz_mask16_u8(a, b); |
| assert_eq!(r, 1); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_load_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let r = _load_mask8(&a); |
| let e: __mmask8 = 0b01101001; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_store_mask8() { |
| let a: __mmask8 = 0b01101001; |
| let mut r = 0; |
| _store_mask8(&mut r, a); |
| let e: __mmask8 = 0b01101001; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_movepi32_mask() { |
| let a = _mm_set_epi32(0, -2, -3, 4); |
| let r = _mm_movepi32_mask(a); |
| let e = 0b0110; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_movepi32_mask() { |
| let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8); |
| let r = _mm256_movepi32_mask(a); |
| let e = 0b01101001; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_movepi32_mask() { |
| let a = _mm512_set_epi32( |
| 0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16, |
| ); |
| let r = _mm512_movepi32_mask(a); |
| let e = 0b0110100100111100; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_movepi64_mask() { |
| let a = _mm_set_epi64x(0, -2); |
| let r = _mm_movepi64_mask(a); |
| let e = 0b01; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_movepi64_mask() { |
| let a = _mm256_set_epi64x(0, -2, -3, 4); |
| let r = _mm256_movepi64_mask(a); |
| let e = 0b0110; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_movepi64_mask() { |
| let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8); |
| let r = _mm512_movepi64_mask(a); |
| let e = 0b01101001; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_movm_epi32() { |
| let a = 0b0110; |
| let r = _mm_movm_epi32(a); |
| let e = _mm_set_epi32(0, -1, -1, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_movm_epi32() { |
| let a = 0b01101001; |
| let r = _mm256_movm_epi32(a); |
| let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_movm_epi32() { |
| let a = 0b0110100100111100; |
| let r = _mm512_movm_epi32(a); |
| let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_movm_epi64() { |
| let a = 0b01; |
| let r = _mm_movm_epi64(a); |
| let e = _mm_set_epi64x(0, -1); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_movm_epi64() { |
| let a = 0b0110; |
| let r = _mm256_movm_epi64(a); |
| let e = _mm256_set_epi64x(0, -1, -1, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_movm_epi64() { |
| let a = 0b01101001; |
| let r = _mm512_movm_epi64(a); |
| let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_range_round_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
| let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
| let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_range_round_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
| let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b); |
| let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_range_round_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
| let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b); |
| let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_range_pd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_pd(2., 1.); |
| let r = _mm_range_pd::<0b0101>(a, b); |
| let e = _mm_set_pd(2., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_range_pd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_pd(2., 1.); |
| let c = _mm_set_pd(3., 4.); |
| let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b); |
| let e = _mm_set_pd(3., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_range_pd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_pd(2., 1.); |
| let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b); |
| let e = _mm_set_pd(0., 2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_range_pd() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm256_set_pd(2., 1., 4., 3.); |
| let r = _mm256_range_pd::<0b0101>(a, b); |
| let e = _mm256_set_pd(2., 2., 4., 4.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_range_pd() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm256_set_pd(2., 1., 4., 3.); |
| let c = _mm256_set_pd(5., 6., 7., 8.); |
| let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b); |
| let e = _mm256_set_pd(5., 2., 4., 8.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_range_pd() { |
| let a = _mm256_set_pd(1., 2., 3., 4.); |
| let b = _mm256_set_pd(2., 1., 4., 3.); |
| let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b); |
| let e = _mm256_set_pd(0., 2., 4., 0.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_range_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
| let r = _mm512_range_pd::<0b0101>(a, b); |
| let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_range_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
| let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b); |
| let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_range_pd() { |
| let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.); |
| let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b); |
| let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_range_round_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm512_set_ps( |
| 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
| ); |
| let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
| let e = _mm512_set_ps( |
| 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_range_round_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm512_set_ps( |
| 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
| ); |
| let c = _mm512_set_ps( |
| 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
| ); |
| let r = |
| _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b); |
| let e = _mm512_set_ps( |
| 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_range_round_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm512_set_ps( |
| 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
| ); |
| let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b); |
| let e = _mm512_set_ps( |
| 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_range_ps() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ps(2., 1., 4., 3.); |
| let r = _mm_range_ps::<0b0101>(a, b); |
| let e = _mm_set_ps(2., 2., 4., 4.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_range_ps() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ps(2., 1., 4., 3.); |
| let c = _mm_set_ps(5., 6., 7., 8.); |
| let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b); |
| let e = _mm_set_ps(5., 2., 4., 8.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_range_ps() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ps(2., 1., 4., 3.); |
| let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b); |
| let e = _mm_set_ps(0., 2., 4., 0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_range_ps() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); |
| let r = _mm256_range_ps::<0b0101>(a, b); |
| let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_range_ps() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); |
| let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.); |
| let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b); |
| let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_range_ps() { |
| let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.); |
| let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b); |
| let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_range_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm512_set_ps( |
| 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
| ); |
| let r = _mm512_range_ps::<0b0101>(a, b); |
| let e = _mm512_set_ps( |
| 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_range_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm512_set_ps( |
| 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
| ); |
| let c = _mm512_set_ps( |
| 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., |
| ); |
| let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b); |
| let e = _mm512_set_ps( |
| 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_range_ps() { |
| let a = _mm512_set_ps( |
| 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., |
| ); |
| let b = _mm512_set_ps( |
| 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15., |
| ); |
| let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b); |
| let e = _mm512_set_ps( |
| 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_range_round_sd() { |
| let a = _mm_set_sd(1.); |
| let b = _mm_set_sd(2.); |
| let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
| let e = _mm_set_sd(2.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_range_round_sd() { |
| let a = _mm_set_sd(1.); |
| let b = _mm_set_sd(2.); |
| let c = _mm_set_sd(3.); |
| let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b); |
| let e = _mm_set_sd(3.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_range_round_sd() { |
| let a = _mm_set_sd(1.); |
| let b = _mm_set_sd(2.); |
| let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b); |
| let e = _mm_set_sd(0.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_range_sd() { |
| let a = _mm_set_sd(1.); |
| let b = _mm_set_sd(2.); |
| let c = _mm_set_sd(3.); |
| let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b); |
| let e = _mm_set_sd(3.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_range_sd() { |
| let a = _mm_set_sd(1.); |
| let b = _mm_set_sd(2.); |
| let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b); |
| let e = _mm_set_sd(0.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_range_round_ss() { |
| let a = _mm_set_ss(1.); |
| let b = _mm_set_ss(2.); |
| let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b); |
| let e = _mm_set_ss(2.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_range_round_ss() { |
| let a = _mm_set_ss(1.); |
| let b = _mm_set_ss(2.); |
| let c = _mm_set_ss(3.); |
| let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b); |
| let e = _mm_set_ss(3.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_range_round_ss() { |
| let a = _mm_set_ss(1.); |
| let b = _mm_set_ss(2.); |
| let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b); |
| let e = _mm_set_ss(0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_range_ss() { |
| let a = _mm_set_ss(1.); |
| let b = _mm_set_ss(2.); |
| let c = _mm_set_ss(3.); |
| let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b); |
| let e = _mm_set_ss(3.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_range_ss() { |
| let a = _mm_set_ss(1.); |
| let b = _mm_set_ss(2.); |
| let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b); |
| let e = _mm_set_ss(0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_reduce_round_pd() { |
| let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); |
| let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_reduce_round_pd() { |
| let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); |
| let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
| src, 0b01101001, a, |
| ); |
| let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_reduce_round_pd() { |
| let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
| 0b01101001, a, |
| ); |
| let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_reduce_pd() { |
| let a = _mm_set_pd(0.25, 0.50); |
| let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
| let e = _mm_set_pd(0.25, 0.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_reduce_pd() { |
| let a = _mm_set_pd(0.25, 0.50); |
| let src = _mm_set_pd(3., 4.); |
| let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a); |
| let e = _mm_set_pd(3., 0.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_reduce_pd() { |
| let a = _mm_set_pd(0.25, 0.50); |
| let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a); |
| let e = _mm_set_pd(0., 0.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_reduce_pd() { |
| let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); |
| let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
| let e = _mm256_set_pd(0.25, 0., 0.25, 0.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_reduce_pd() { |
| let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); |
| let src = _mm256_set_pd(3., 4., 5., 6.); |
| let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a); |
| let e = _mm256_set_pd(3., 0., 0.25, 6.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_reduce_pd() { |
| let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0); |
| let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a); |
| let e = _mm256_set_pd(0., 0., 0.25, 0.); |
| assert_eq_m256d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_reduce_pd() { |
| let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
| let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_reduce_pd() { |
| let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.); |
| let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a); |
| let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_reduce_pd() { |
| let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a); |
| let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.); |
| assert_eq_m512d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_reduce_round_ps() { |
| let a = _mm512_set_ps( |
| 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
| 4.0, |
| ); |
| let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a); |
| let e = _mm512_set_ps( |
| 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_reduce_round_ps() { |
| let a = _mm512_set_ps( |
| 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
| 4.0, |
| ); |
| let src = _mm512_set_ps( |
| 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., |
| ); |
| let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
| src, |
| 0b0110100100111100, |
| a, |
| ); |
| let e = _mm512_set_ps( |
| 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_reduce_round_ps() { |
| let a = _mm512_set_ps( |
| 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
| 4.0, |
| ); |
| let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
| 0b0110100100111100, |
| a, |
| ); |
| let e = _mm512_set_ps( |
| 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_reduce_ps() { |
| let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); |
| let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
| let e = _mm_set_ps(0.25, 0., 0.25, 0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_reduce_ps() { |
| let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); |
| let src = _mm_set_ps(2., 3., 4., 5.); |
| let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a); |
| let e = _mm_set_ps(2., 0., 0.25, 5.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_maskz_reduce_ps() { |
| let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0); |
| let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a); |
| let e = _mm_set_ps(0., 0., 0.25, 0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_reduce_ps() { |
| let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
| let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_reduce_ps() { |
| let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.); |
| let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a); |
| let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_maskz_reduce_ps() { |
| let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0); |
| let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a); |
| let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.); |
| assert_eq_m256(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_reduce_ps() { |
| let a = _mm512_set_ps( |
| 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
| 4.0, |
| ); |
| let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a); |
| let e = _mm512_set_ps( |
| 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_reduce_ps() { |
| let a = _mm512_set_ps( |
| 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
| 4.0, |
| ); |
| let src = _mm512_set_ps( |
| 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., |
| ); |
| let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a); |
| let e = _mm512_set_ps( |
| 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_maskz_reduce_ps() { |
| let a = _mm512_set_ps( |
| 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75, |
| 4.0, |
| ); |
| let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a); |
| let e = _mm512_set_ps( |
| 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0., |
| ); |
| assert_eq_m512(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_reduce_round_sd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_sd(0.25); |
| let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); |
| let e = _mm_set_pd(1., 0.25); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_reduce_round_sd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_sd(0.25); |
| let c = _mm_set_pd(3., 4.); |
| let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
| c, 0b0, a, b, |
| ); |
| let e = _mm_set_pd(1., 4.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_reduce_round_sd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_sd(0.25); |
| let r = |
| _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b); |
| let e = _mm_set_pd(1., 0.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_reduce_sd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_sd(0.25); |
| let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); |
| let e = _mm_set_pd(1., 0.25); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_reduce_sd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_sd(0.25); |
| let c = _mm_set_pd(3., 4.); |
| let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b); |
| let e = _mm_set_pd(1., 4.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_reduce_sd() { |
| let a = _mm_set_pd(1., 2.); |
| let b = _mm_set_sd(0.25); |
| let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b); |
| let e = _mm_set_pd(1., 0.); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_reduce_round_ss() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ss(0.25); |
| let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b); |
| let e = _mm_set_ps(1., 2., 3., 0.25); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_reduce_round_ss() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ss(0.25); |
| let c = _mm_set_ps(5., 6., 7., 8.); |
| let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>( |
| c, 0b0, a, b, |
| ); |
| let e = _mm_set_ps(1., 2., 3., 8.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_reduce_round_ss() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ss(0.25); |
| let r = |
| _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b); |
| let e = _mm_set_ps(1., 2., 3., 0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_reduce_ss() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ss(0.25); |
| let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b); |
| let e = _mm_set_ps(1., 2., 3., 0.25); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_reduce_ss() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ss(0.25); |
| let c = _mm_set_ps(5., 6., 7., 8.); |
| let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b); |
| let e = _mm_set_ps(1., 2., 3., 8.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_maskz_reduce_ss() { |
| let a = _mm_set_ps(1., 2., 3., 4.); |
| let b = _mm_set_ss(0.25); |
| let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b); |
| let e = _mm_set_ps(1., 2., 3., 0.); |
| assert_eq_m128(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_fpclass_pd_mask() { |
| let a = _mm_set_pd(1., f64::INFINITY); |
| let r = _mm_fpclass_pd_mask::<0x18>(a); |
| let e = 0b01; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_fpclass_pd_mask() { |
| let a = _mm_set_pd(1., f64::INFINITY); |
| let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a); |
| let e = 0b00; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_fpclass_pd_mask() { |
| let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0); |
| let r = _mm256_fpclass_pd_mask::<0x18>(a); |
| let e = 0b0110; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_fpclass_pd_mask() { |
| let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0); |
| let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a); |
| let e = 0b0010; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_fpclass_pd_mask() { |
| let a = _mm512_set_pd( |
| 1., |
| f64::INFINITY, |
| f64::NEG_INFINITY, |
| 0.0, |
| -0.0, |
| -2.0, |
| f64::NAN, |
| 1.0e-308, |
| ); |
| let r = _mm512_fpclass_pd_mask::<0x18>(a); |
| let e = 0b01100000; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_fpclass_pd_mask() { |
| let a = _mm512_set_pd( |
| 1., |
| f64::INFINITY, |
| f64::NEG_INFINITY, |
| 0.0, |
| -0.0, |
| -2.0, |
| f64::NAN, |
| 1.0e-308, |
| ); |
| let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a); |
| let e = 0b00100000; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_fpclass_ps_mask() { |
| let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0); |
| let r = _mm_fpclass_ps_mask::<0x18>(a); |
| let e = 0b0110; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm_mask_fpclass_ps_mask() { |
| let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0); |
| let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a); |
| let e = 0b0010; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_fpclass_ps_mask() { |
| let a = _mm256_set_ps( |
| 1., |
| f32::INFINITY, |
| f32::NEG_INFINITY, |
| 0.0, |
| -0.0, |
| -2.0, |
| f32::NAN, |
| 1.0e-38, |
| ); |
| let r = _mm256_fpclass_ps_mask::<0x18>(a); |
| let e = 0b01100000; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq,avx512vl")] |
| unsafe fn test_mm256_mask_fpclass_ps_mask() { |
| let a = _mm256_set_ps( |
| 1., |
| f32::INFINITY, |
| f32::NEG_INFINITY, |
| 0.0, |
| -0.0, |
| -2.0, |
| f32::NAN, |
| 1.0e-38, |
| ); |
| let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a); |
| let e = 0b00100000; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_fpclass_ps_mask() { |
| let a = _mm512_set_ps( |
| 1., |
| f32::INFINITY, |
| f32::NEG_INFINITY, |
| 0.0, |
| -0.0, |
| -2.0, |
| f32::NAN, |
| 1.0e-38, |
| -1., |
| f32::NEG_INFINITY, |
| f32::INFINITY, |
| -0.0, |
| 0.0, |
| 2.0, |
| f32::NAN, |
| -1.0e-38, |
| ); |
| let r = _mm512_fpclass_ps_mask::<0x18>(a); |
| let e = 0b0110000001100000; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm512_mask_fpclass_ps_mask() { |
| let a = _mm512_set_ps( |
| 1., |
| f32::INFINITY, |
| f32::NEG_INFINITY, |
| 0.0, |
| -0.0, |
| -2.0, |
| f32::NAN, |
| 1.0e-38, |
| -1., |
| f32::NEG_INFINITY, |
| f32::INFINITY, |
| -0.0, |
| 0.0, |
| 2.0, |
| f32::NAN, |
| -1.0e-38, |
| ); |
| let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a); |
| let e = 0b0010000000100000; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_fpclass_sd_mask() { |
| let a = _mm_set_pd(1., f64::INFINITY); |
| let r = _mm_fpclass_sd_mask::<0x18>(a); |
| let e = 0b1; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_fpclass_sd_mask() { |
| let a = _mm_set_sd(f64::INFINITY); |
| let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a); |
| let e = 0b0; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_fpclass_ss_mask() { |
| let a = _mm_set_ss(f32::INFINITY); |
| let r = _mm_fpclass_ss_mask::<0x18>(a); |
| let e = 0b1; |
| assert_eq!(r, e); |
| } |
| |
| #[simd_test(enable = "avx512dq")] |
| unsafe fn test_mm_mask_fpclass_ss_mask() { |
| let a = _mm_set_ss(f32::INFINITY); |
| let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a); |
| let e = 0b0; |
| assert_eq!(r, e); |
| } |
| } |