crates/core_arch/src/x86/avx512dq.rs - rust-lang/stdarch - Git at Google

 use crate::{
     core_arch::{simd::*, x86::*},
     intrinsics::simd::*,
     mem::transmute,
 };

 // And //

 /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let and = _mm_and_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, and, src.as_f64x2()))
     }
 }

 /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let and = _mm_and_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, and, f64x2::ZERO))
     }
 }

 /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let and = _mm256_and_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, and, src.as_f64x4()))
     }
 }

 /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let and = _mm256_and_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, and, f64x4::ZERO))
     }
 }

 /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
 }

 /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let and = _mm512_and_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, and, src.as_f64x8()))
     }
 }

 /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let and = _mm512_and_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, and, f64x8::ZERO))
     }
 }

 /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let and = _mm_and_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, and, src.as_f32x4()))
     }
 }

 /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let and = _mm_and_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, and, f32x4::ZERO))
     }
 }

 /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let and = _mm256_and_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, and, src.as_f32x8()))
     }
 }

 /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let and = _mm256_and_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, and, f32x8::ZERO))
     }
 }

 /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         transmute(simd_and(
             transmute::<_, u32x16>(a),
             transmute::<_, u32x16>(b),
         ))
     }
 }

 /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let and = _mm512_and_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, and, src.as_f32x16()))
     }
 }

 /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let and = _mm512_and_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, and, f32x16::ZERO))
     }
 }

 // Andnot

 /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let andnot = _mm_andnot_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
     }
 }

 /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let andnot = _mm_andnot_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, andnot, f64x2::ZERO))
     }
 }

 /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let andnot = _mm256_andnot_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
     }
 }

 /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let andnot = _mm256_andnot_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, andnot, f64x4::ZERO))
     }
 }

 /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) }
 }

 /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let andnot = _mm512_andnot_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
     }
 }

 /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let andnot = _mm512_andnot_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, andnot, f64x8::ZERO))
     }
 }

 /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let andnot = _mm_andnot_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
     }
 }

 /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let andnot = _mm_andnot_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, andnot, f32x4::ZERO))
     }
 }

 /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let andnot = _mm256_andnot_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
     }
 }

 /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let andnot = _mm256_andnot_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, andnot, f32x8::ZERO))
     }
 }

 /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
     unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) }
 }

 /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let andnot = _mm512_andnot_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
     }
 }

 /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
 /// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let andnot = _mm512_andnot_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, andnot, f32x16::ZERO))
     }
 }

 // Or

 /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let or = _mm_or_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, or, src.as_f64x2()))
     }
 }

 /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let or = _mm_or_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, or, f64x2::ZERO))
     }
 }

 /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let or = _mm256_or_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, or, src.as_f64x4()))
     }
 }

 /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let or = _mm256_or_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, or, f64x4::ZERO))
     }
 }

 /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
 }

 /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let or = _mm512_or_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, or, src.as_f64x8()))
     }
 }

 /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let or = _mm512_or_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, or, f64x8::ZERO))
     }
 }

 /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let or = _mm_or_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, or, src.as_f32x4()))
     }
 }

 /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let or = _mm_or_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, or, f32x4::ZERO))
     }
 }

 /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let or = _mm256_or_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, or, src.as_f32x8()))
     }
 }

 /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let or = _mm256_or_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, or, f32x8::ZERO))
     }
 }

 /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         transmute(simd_or(
             transmute::<_, u32x16>(a),
             transmute::<_, u32x16>(b),
         ))
     }
 }

 /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let or = _mm512_or_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, or, src.as_f32x16()))
     }
 }

 /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let or = _mm512_or_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, or, f32x16::ZERO))
     }
 }

 // Xor

 /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let xor = _mm_xor_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
     }
 }

 /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let xor = _mm_xor_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, xor, f64x2::ZERO))
     }
 }

 /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let xor = _mm256_xor_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
     }
 }

 /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let xor = _mm256_xor_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, xor, f64x4::ZERO))
     }
 }

 /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
 }

 /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let xor = _mm512_xor_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
     }
 }

 /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let xor = _mm512_xor_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, xor, f64x8::ZERO))
     }
 }

 /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let xor = _mm_xor_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
     }
 }

 /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let xor = _mm_xor_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, xor, f32x4::ZERO))
     }
 }

 /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let xor = _mm256_xor_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
     }
 }

 /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let xor = _mm256_xor_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, xor, f32x8::ZERO))
     }
 }

 /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         transmute(simd_xor(
             transmute::<_, u32x16>(a),
             transmute::<_, u32x16>(b),
         ))
     }
 }

 /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let xor = _mm512_xor_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
     }
 }

 /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
 /// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let xor = _mm512_xor_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, xor, f32x16::ZERO))
     }
 }

 // Broadcast

 /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
     unsafe {
         let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let b = _mm256_broadcast_f32x2(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, src.as_f32x8()))
     }
 }

 /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let b = _mm256_broadcast_f32x2(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, f32x8::ZERO))
     }
 }

 /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
     unsafe {
         let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x2(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, src.as_f32x16()))
     }
 }

 /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x2(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, f32x16::ZERO))
     }
 }

 /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
     unsafe {
         let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
         transmute(b)
     }
 }

 /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x8(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, src.as_f32x16()))
     }
 }

 /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
 /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x8(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, f32x16::ZERO))
     }
 }

 /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
 /// elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
     unsafe {
         let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
 /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
     unsafe {
         let b = _mm256_broadcast_f64x2(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, src.as_f64x4()))
     }
 }

 /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
 /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
     unsafe {
         let b = _mm256_broadcast_f64x2(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, f64x4::ZERO))
     }
 }

 /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
 /// elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
     unsafe {
         let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
 /// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
     unsafe {
         let b = _mm512_broadcast_f64x2(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, src.as_f64x8()))
     }
 }

 /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
 /// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
     unsafe {
         let b = _mm512_broadcast_f64x2(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, f64x8::ZERO))
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i32x4();
         let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let b = _mm_broadcast_i32x2(a).as_i32x4();
         transmute(simd_select_bitmask(k, b, src.as_i32x4()))
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let b = _mm_broadcast_i32x2(a).as_i32x4();
         transmute(simd_select_bitmask(k, b, i32x4::ZERO))
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i32x4();
         let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i32x2(a).as_i32x8();
         transmute(simd_select_bitmask(k, b, src.as_i32x8()))
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i32x2(a).as_i32x8();
         transmute(simd_select_bitmask(k, b, i32x8::ZERO))
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i32x4();
         let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x2(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, src.as_i32x16()))
     }
 }

 /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x2(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, i32x16::ZERO))
     }
 }

 /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_i32x8();
         let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
         transmute(b)
     }
 }

 /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x8(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, src.as_i32x16()))
     }
 }

 /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x8(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, i32x16::ZERO))
     }
 }

 /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i64x2();
         let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i64x2(a).as_i64x4();
         transmute(simd_select_bitmask(k, b, src.as_i64x4()))
     }
 }

 /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i64x2(a).as_i64x4();
         transmute(simd_select_bitmask(k, b, i64x4::ZERO))
     }
 }

 /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i64x2();
         let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
     }
 }

 /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i64x2(a).as_i64x8();
         transmute(simd_select_bitmask(k, b, src.as_i64x8()))
     }
 }

 /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i64x2(a).as_i64x8();
         transmute(simd_select_bitmask(k, b, i64x8::ZERO))
     }
 }

 // Extract

 /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         match IMM8 & 1 {
             0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
             _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
         }
     }
 }

 /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
 /// if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m512) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_extractf32x8_ps::<IMM8>(a);
         transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8()))
     }
 }

 /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_extractf32x8_ps::<IMM8>(a);
         transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO))
     }
 }

 /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         match IMM8 & 1 {
             0 => simd_shuffle!(a, a, [0, 1]),
             _ => simd_shuffle!(a, a, [2, 3]),
         }
     }
 }

 /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
 /// if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m256d,
 ) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_extractf64x2_pd::<IMM8>(a);
         transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2()))
     }
 }

 /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_extractf64x2_pd::<IMM8>(a);
         transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO))
     }
 }

 /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         match IMM8 & 3 {
             0 => simd_shuffle!(a, a, [0, 1]),
             1 => simd_shuffle!(a, a, [2, 3]),
             2 => simd_shuffle!(a, a, [4, 5]),
             _ => simd_shuffle!(a, a, [6, 7]),
         }
     }
 }

 /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
 /// if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m512d,
 ) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
         transmute(simd_select_bitmask(k, b, src.as_f64x2()))
     }
 }

 /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
 /// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
         transmute(simd_select_bitmask(k, b, f64x2::ZERO))
     }
 }

 /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
 /// the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i32x16();
         let b: i32x8 = match IMM8 & 1 {
             0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
             _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
         };
         transmute(b)
     }
 }

 /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
 /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m512i,
 ) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
         transmute(simd_select_bitmask(k, b, src.as_i32x8()))
     }
 }

 /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
 /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
         transmute(simd_select_bitmask(k, b, i32x8::ZERO))
     }
 }

 /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
 /// the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i64x4();
         match IMM8 & 1 {
             0 => simd_shuffle!(a, a, [0, 1]),
             _ => simd_shuffle!(a, a, [2, 3]),
         }
     }
 }

 /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
 /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m256i,
 ) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
         transmute(simd_select_bitmask(k, b, src.as_i64x2()))
     }
 }

 /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
 /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
         transmute(simd_select_bitmask(k, b, i64x2::ZERO))
     }
 }

 /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
 /// the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let a = a.as_i64x8();
         match IMM8 & 3 {
             0 => simd_shuffle!(a, a, [0, 1]),
             1 => simd_shuffle!(a, a, [2, 3]),
             2 => simd_shuffle!(a, a, [4, 5]),
             _ => simd_shuffle!(a, a, [6, 7]),
         }
     }
 }

 /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
 /// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m512i,
 ) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
         transmute(simd_select_bitmask(k, b, src.as_i64x2()))
     }
 }

 /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
 /// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
         transmute(simd_select_bitmask(k, b, i64x2::ZERO))
     }
 }

 // Insert

 /// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
 /// elements) from b into dst at the location specified by IMM8.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_castps256_ps512(b);
         match IMM8 & 1 {
             0 => {
                 simd_shuffle!(
                     a,
                     b,
                     [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
                 )
             }
             _ => {
                 simd_shuffle!(
                     a,
                     b,
                     [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
                 )
             }
         }
     }
 }

 /// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
 /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_insertf32x8<const IMM8: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
     b: __m256,
 ) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm512_insertf32x8::<IMM8>(a, b);
         transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16()))
     }
 }

 /// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
 /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_insertf32x8<const IMM8: i32>(k: __mmask16, a: __m512, b: __m256) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, c, f32x16::ZERO))
     }
 }

 /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
 /// elements) from b into dst at the location specified by IMM8.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_castpd128_pd256(b);
         match IMM8 & 1 {
             0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
             _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
         }
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
 /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_insertf64x2<const IMM8: i32>(
     src: __m256d,
     k: __mmask8,
     a: __m256d,
     b: __m128d,
 ) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm256_insertf64x2::<IMM8>(a, b);
         transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4()))
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
 /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m128d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, c, f64x4::ZERO))
     }
 }

 /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
 /// elements) from b into dst at the location specified by IMM8.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_castpd128_pd512(b);
         match IMM8 & 3 {
             0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
             1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
             2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
             _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
         }
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
 /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
 /// (elements are copied from src if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_insertf64x2<const IMM8: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
     b: __m128d,
 ) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let c = _mm512_insertf64x2::<IMM8>(a, b);
         transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8()))
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
 /// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
 /// (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m128d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let c = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, c, f64x8::ZERO))
     }
 }

 /// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
 /// location specified by IMM8.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i32x16();
         let b = _mm512_castsi256_si512(b).as_i32x16();
         let r: i32x16 = match IMM8 & 1 {
             0 => {
                 simd_shuffle!(
                     a,
                     b,
                     [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
                 )
             }
             _ => {
                 simd_shuffle!(
                     a,
                     b,
                     [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
                 )
             }
         };
         transmute(r)
     }
 }

 /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
 /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
 /// the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_inserti32x8<const IMM8: i32>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
     b: __m256i,
 ) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm512_inserti32x8::<IMM8>(a, b);
         transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16()))
     }
 }

 /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
 /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_inserti32x8<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m256i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, c, i32x16::ZERO))
     }
 }

 /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
 /// location specified by IMM8.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i64x4();
         let b = _mm256_castsi128_si256(b).as_i64x4();
         match IMM8 & 1 {
             0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
             _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
         }
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
 /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
 /// the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_inserti64x2<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
     b: __m128i,
 ) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm256_inserti64x2::<IMM8>(a, b);
         transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4()))
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
 /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, c, i64x4::ZERO))
     }
 }

 /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
 /// location specified by IMM8.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let a = a.as_i64x8();
         let b = _mm512_castsi128_si512(b).as_i64x8();
         match IMM8 & 3 {
             0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
             1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
             2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
             _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
         }
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
 /// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
 /// the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_inserti64x2<const IMM8: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
     b: __m128i,
 ) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let c = _mm512_inserti64x2::<IMM8>(a, b);
         transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8()))
     }
 }

 /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
 /// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m128i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let c = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, c, i64x8::ZERO))
     }
 }

 // Convert

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512i,
 ) -> __m512d {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, src.as_f64x8()))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, f64x8::ZERO))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d {
     unsafe { transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let b = _mm_cvtepi64_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, b, src.as_f64x2()))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let b = _mm_cvtepi64_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, b, f64x2::ZERO))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d {
     unsafe { transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
     unsafe {
         let b = _mm256_cvtepi64_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, src.as_f64x4()))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d {
     unsafe {
         let b = _mm256_cvtepi64_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, f64x4::ZERO))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d {
     unsafe { transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
     unsafe {
         let b = _mm512_cvtepi64_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, src.as_f64x8()))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d {
     unsafe {
         let b = _mm512_cvtepi64_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, f64x8::ZERO))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>(
     src: __m256,
     k: __mmask8,
     a: __m512i,
 ) -> __m256 {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, src.as_f32x8()))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, f32x8::ZERO))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 {
     _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a)
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
     unsafe { transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 {
     _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a)
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 {
     unsafe { transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
     unsafe {
         let b = _mm256_cvtepi64_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, b, src.as_f32x4()))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 {
     unsafe {
         let b = _mm256_cvtepi64_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, b, f32x4::ZERO))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 {
     unsafe { transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
     unsafe {
         let b = _mm512_cvtepi64_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, src.as_f32x8()))
     }
 }

 /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 {
     unsafe {
         let b = _mm512_cvtepi64_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, f32x8::ZERO))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512i,
 ) -> __m512d {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, src.as_f64x8()))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, f64x8::ZERO))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d {
     unsafe { transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let b = _mm_cvtepu64_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, b, src.as_f64x2()))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let b = _mm_cvtepu64_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, b, f64x2::ZERO))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d {
     unsafe { transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
     unsafe {
         let b = _mm256_cvtepu64_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, src.as_f64x4()))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d {
     unsafe {
         let b = _mm256_cvtepu64_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, f64x4::ZERO))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d {
     unsafe { transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
     unsafe {
         let b = _mm512_cvtepu64_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, src.as_f64x8()))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2pd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d {
     unsafe {
         let b = _mm512_cvtepu64_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, f64x8::ZERO))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>(
     src: __m256,
     k: __mmask8,
     a: __m512i,
 ) -> __m256 {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, src.as_f32x8()))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
     unsafe {
         static_assert_rounding!(ROUNDING);
         let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, f32x8::ZERO))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 {
     _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a)
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
     unsafe { transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 {
     _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a)
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 {
     unsafe { transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
     unsafe {
         let b = _mm256_cvtepu64_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, b, src.as_f32x4()))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 {
     unsafe {
         let b = _mm256_cvtepu64_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, b, f32x4::ZERO))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 {
     unsafe { transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
     unsafe {
         let b = _mm512_cvtepu64_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, src.as_f32x8()))
     }
 }

 /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 {
     unsafe {
         let b = _mm512_cvtepu64_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, f32x8::ZERO))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512d,
 ) -> __m512i {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i {
     _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
     unsafe { transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
     _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i {
     _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
     unsafe { transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
     _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i {
     _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
     unsafe {
         transmute(vcvtpd2qq_512(
             a.as_f64x8(),
             src.as_i64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
     _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m256,
 ) -> __m512i {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtps_epi64(a: __m128) -> __m128i {
     _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
     unsafe { transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i {
     _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i {
     _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
     unsafe { transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i {
     _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i {
     _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
     unsafe {
         transmute(vcvtps2qq_512(
             a.as_f32x8(),
             src.as_i64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i {
     _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512d,
 ) -> __m512i {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i {
     _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
     unsafe { transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
     _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i {
     _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
     unsafe { transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
     _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i {
     _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
     unsafe {
         transmute(vcvtpd2uqq_512(
             a.as_f64x8(),
             src.as_u64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
     _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m256,
 ) -> __m512i {
     unsafe {
         static_assert_rounding!(ROUNDING);
         transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 /// Rounding is done according to the ROUNDING parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
 /// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
 /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtps_epu64(a: __m128) -> __m128i {
     _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
     unsafe { transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i {
     _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i {
     _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
     unsafe { transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i {
     _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i {
     _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
 /// not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
     unsafe {
         transmute(vcvtps2uqq_512(
             a.as_f32x8(),
             src.as_u64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
 /// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvtps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i {
     _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
 /// to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512d,
 ) -> __m512i {
     unsafe {
         static_assert_sae!(SAE);
         transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i {
     _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
     unsafe { transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
     _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i {
     _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
     unsafe { transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
     _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i {
     _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
     unsafe {
         transmute(vcvttpd2qq_512(
             a.as_f64x8(),
             src.as_i64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
     _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
 /// to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m256,
 ) -> __m512i {
     unsafe {
         static_assert_sae!(SAE);
         transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvttps_epi64(a: __m128) -> __m128i {
     _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
     unsafe { transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i {
     _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i {
     _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
     unsafe { transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i {
     _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i {
     _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
     unsafe {
         transmute(vcvttps2qq_512(
             a.as_f32x8(),
             src.as_i64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2qq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i {
     _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
 /// to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512d,
 ) -> __m512i {
     unsafe {
         static_assert_sae!(SAE);
         transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i {
     _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
     unsafe { transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
     _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i {
     _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
     unsafe { transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
     _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i {
     _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
     unsafe {
         transmute(vcvttpd2uqq_512(
             a.as_f64x8(),
             src.as_u64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 ///
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttpd2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
     _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
 /// to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m256,
 ) -> __m512i {
     unsafe {
         static_assert_sae!(SAE);
         transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_setzero_si512(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvttps_epu64(a: __m128) -> __m128i {
     _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
     unsafe { transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i {
     _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i {
     _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
     unsafe { transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i {
     _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i {
     _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a)
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using writemask k (elements are copied from src if the
 /// corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
     unsafe {
         transmute(vcvttps2uqq_512(
             a.as_f32x8(),
             src.as_u64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
 /// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
 /// bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vcvttps2uqq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
     _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a)
 }

 // Multiply-Low

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst`.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
 /// `src` if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let b = _mm_mullo_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, b, src.as_i64x2()))
     }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
 /// the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let b = _mm_mullo_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, b, i64x2::ZERO))
     }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst`.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
 /// `src` if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let b = _mm256_mullo_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, b, src.as_i64x4()))
     }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
 /// the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let b = _mm256_mullo_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, b, i64x4::ZERO))
     }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst`.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
 /// `src` if the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let b = _mm512_mullo_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, b, src.as_i64x8()))
     }
 }

 /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
 /// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
 /// the corresponding bit is not set).
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let b = _mm512_mullo_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, b, i64x8::ZERO))
     }
 }

 // Mask Registers

 /// Convert 8-bit mask a to a 32-bit integer value and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _cvtmask8_u32(a: __mmask8) -> u32 {
     a as u32
 }

 /// Convert 32-bit integer value a to an 8-bit mask and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _cvtu32_mask8(a: u32) -> __mmask8 {
     a as __mmask8
 }

 /// Add 16-bit masks a and b, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     a + b
 }

 /// Add 8-bit masks a and b, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     a + b
 }

 /// Bitwise AND of 8-bit masks a and b, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     a & b
 }

 /// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     _knot_mask8(a) & b
 }

 /// Bitwise NOT of 8-bit mask a, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _knot_mask8(a: __mmask8) -> __mmask8 {
     a ^ 0b11111111
 }

 /// Bitwise OR of 8-bit masks a and b, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     a | b
 }

 /// Bitwise XNOR of 8-bit masks a and b, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     _knot_mask8(_kxor_mask8(a, b))
 }

 /// Bitwise XOR of 8-bit masks a and b, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     a ^ b
 }

 /// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
 /// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
     let tmp = _kor_mask8(a, b);
     *all_ones = (tmp == 0xff) as u8;
     (tmp == 0) as u8
 }

 /// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
 /// store 0 in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kor_mask8(a, b) == 0xff) as u8
 }

 /// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
 /// store 0 in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kor_mask8(a, b) == 0) as u8
 }

 /// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
     a << COUNT
 }

 /// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
     a >> COUNT
 }

 /// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst,
 /// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
 /// zeros, store 1 in and_not, otherwise store 0 in and_not.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
     *and_not = (_kandn_mask16(a, b) == 0) as u8;
     (_kand_mask16(a, b) == 0) as u8
 }

 /// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst,
 /// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
 /// zeros, store 1 in and_not, otherwise store 0 in and_not.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
     *and_not = (_kandn_mask8(a, b) == 0) as u8;
     (_kand_mask8(a, b) == 0) as u8
 }

 /// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all
 /// zeros, store 1 in dst, otherwise store 0 in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
     (_kandn_mask16(a, b) == 0) as u8
 }

 /// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all
 /// zeros, store 1 in dst, otherwise store 0 in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kandn_mask8(a, b) == 0) as u8
 }

 /// Compute the bitwise AND of 16-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
 /// store 0 in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
     (_kand_mask16(a, b) == 0) as u8
 }

 /// Compute the bitwise AND of 8-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
 /// store 0 in dst.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kand_mask8(a, b) == 0) as u8
 }

 /// Load 8-bit mask from memory
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
     *mem_addr
 }

 /// Store 8-bit mask to memory
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
     *mem_addr = a;
 }

 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
 /// integer in a.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
     let zero = _mm_setzero_si128();
     _mm_cmplt_epi32_mask(a, zero)
 }

 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
 /// integer in a.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
     let zero = _mm256_setzero_si256();
     _mm256_cmplt_epi32_mask(a, zero)
 }

 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
 /// integer in a.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
     let zero = _mm512_setzero_si512();
     _mm512_cmplt_epi32_mask(a, zero)
 }

 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
 /// integer in a.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
     let zero = _mm_setzero_si128();
     _mm_cmplt_epi64_mask(a, zero)
 }

 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
 /// integer in a.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
     let zero = _mm256_setzero_si256();
     _mm256_cmplt_epi64_mask(a, zero)
 }

 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
 /// integer in a.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
     let zero = _mm512_setzero_si512();
     _mm512_cmplt_epi64_mask(a, zero)
 }

 /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
 /// bit in k.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2d))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_movm_epi32(k: __mmask8) -> __m128i {
     let ones = _mm_set1_epi32(-1);
     _mm_maskz_mov_epi32(k, ones)
 }

 /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
 /// bit in k.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2d))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
     let ones = _mm256_set1_epi32(-1);
     _mm256_maskz_mov_epi32(k, ones)
 }

 /// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
 /// bit in k.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmovm2d))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
     let ones = _mm512_set1_epi32(-1);
     _mm512_maskz_mov_epi32(k, ones)
 }

 /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
 /// bit in k.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2q))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_movm_epi64(k: __mmask8) -> __m128i {
     let ones = _mm_set1_epi64x(-1);
     _mm_maskz_mov_epi64(k, ones)
 }

 /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
 /// bit in k.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2q))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
     let ones = _mm256_set1_epi64x(-1);
     _mm256_maskz_mov_epi64(k, ones)
 }

 /// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
 /// bit in k.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmovm2q))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
     let ones = _mm512_set1_epi64(-1);
     _mm512_maskz_mov_epi64(k, ones)
 }

 // Range

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
     b: __m512d,
 ) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         static_assert_sae!(SAE);
         transmute(vrangepd_512(
             a.as_f64x8(),
             b.as_f64x8(),
             IMM8,
             src.as_f64x8(),
             k,
             SAE,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m512d,
     b: __m512d,
 ) -> __m512d {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 4);
     _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_range_pd<const IMM8: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m128d,
     b: __m128d,
 ) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangepd_128(
             a.as_f64x2(),
             b.as_f64x2(),
             IMM8,
             src.as_f64x2(),
             k,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 4);
     _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM8, 4);
     _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_range_pd<const IMM8: i32>(
     src: __m256d,
     k: __mmask8,
     a: __m256d,
     b: __m256d,
 ) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangepd_256(
             a.as_f64x4(),
             b.as_f64x4(),
             IMM8,
             src.as_f64x4(),
             k,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM8, 4);
     _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d {
     static_assert_uimm_bits!(IMM8, 4);
     _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_range_pd<const IMM8: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
     b: __m512d,
 ) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangepd_512(
             a.as_f64x8(),
             b.as_f64x8(),
             IMM8,
             src.as_f64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     static_assert_uimm_bits!(IMM8, 4);
     _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), 0xffff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
     b: __m512,
 ) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         static_assert_sae!(SAE);
         transmute(vrangeps_512(
             a.as_f32x16(),
             b.as_f32x16(),
             IMM8,
             src.as_f32x16(),
             k,
             SAE,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>(
     k: __mmask16,
     a: __m512,
     b: __m512,
 ) -> __m512 {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 4);
     _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_range_ps<const IMM8: i32>(
     src: __m128,
     k: __mmask8,
     a: __m128,
     b: __m128,
 ) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangeps_128(
             a.as_f32x4(),
             b.as_f32x4(),
             IMM8,
             src.as_f32x4(),
             k,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 4);
     _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM8, 4);
     _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_range_ps<const IMM8: i32>(
     src: __m256,
     k: __mmask8,
     a: __m256,
     b: __m256,
 ) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangeps_256(
             a.as_f32x8(),
             b.as_f32x8(),
             IMM8,
             src.as_f32x8(),
             k,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM8, 4);
     _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 {
     static_assert_uimm_bits!(IMM8, 4);
     _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), 0xffff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_range_ps<const IMM8: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
     b: __m512,
 ) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangeps_512(
             a.as_f32x16(),
             b.as_f32x16(),
             IMM8,
             src.as_f32x16(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
 /// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
 /// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     static_assert_uimm_bits!(IMM8, 4);
     _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
 /// of dst, and copy the upper element from a to the upper element of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
 /// upper element from a to the upper element of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m128d,
     b: __m128d,
 ) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         static_assert_sae!(SAE);
         transmute(vrangesd(
             a.as_f64x2(),
             b.as_f64x2(),
             src.as_f64x2(),
             k,
             IMM8,
             SAE,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
 /// element from a to the upper element of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m128d,
     b: __m128d,
 ) -> __m128d {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
 /// upper element from a to the upper element of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_range_sd<const IMM8: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m128d,
     b: __m128d,
 ) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangesd(
             a.as_f64x2(),
             b.as_f64x2(),
             src.as_f64x2(),
             k,
             IMM8,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
 /// element from a to the upper element of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 4);
     _mm_mask_range_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
 /// of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), 0xff, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
 /// upper 3 packed elements from a to the upper elements of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
     src: __m128,
     k: __mmask8,
     a: __m128,
     b: __m128,
 ) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         static_assert_sae!(SAE);
         transmute(vrangess(
             a.as_f32x4(),
             b.as_f32x4(),
             src.as_f32x4(),
             k,
             IMM8,
             SAE,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
 /// 3 packed elements from a to the upper elements of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m128,
     b: __m128,
 ) -> __m128 {
     static_assert_uimm_bits!(IMM8, 4);
     static_assert_sae!(SAE);
     _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
 /// upper 3 packed elements from a to the upper elements of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_range_ss<const IMM8: i32>(
     src: __m128,
     k: __mmask8,
     a: __m128,
     b: __m128,
 ) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 4);
         transmute(vrangess(
             a.as_f32x4(),
             b.as_f32x4(),
             src.as_f32x4(),
             k,
             IMM8,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
 /// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
 /// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
 /// 3 packed elements from a to the upper elements of dst.
 /// Lower 2 bits of IMM8 specifies the operation control:
 ///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
 /// Upper 2 bits of IMM8 specifies the sign control:
 ///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 4);
     _mm_mask_range_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
 }

 // Reduce

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(1, 2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_undefined_pd(), 0xff, a)
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
 ) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         static_assert_sae!(SAE);
         transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE))
     }
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m512d,
 ) -> __m512d {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a)
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_pd::<IMM8>(_mm_undefined_pd(), 0xff, a)
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k))
     }
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_pd::<IMM8>(_mm_setzero_pd(), k, a)
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_reduce_pd::<IMM8>(_mm256_undefined_pd(), 0xff, a)
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k))
     }
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_reduce_pd::<IMM8>(_mm256_setzero_pd(), k, a)
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_reduce_pd::<IMM8>(_mm512_undefined_pd(), 0xff, a)
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreducepd_512(
             a.as_f64x8(),
             IMM8,
             src.as_f64x8(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_reduce_pd::<IMM8>(_mm512_setzero_pd(), k, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(1, 2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_undefined_ps(), 0xffff, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
 ) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         static_assert_sae!(SAE);
         transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE))
     }
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>(
     k: __mmask16,
     a: __m512,
 ) -> __m512 {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_ps::<IMM8>(_mm_undefined_ps(), 0xff, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k))
     }
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_ps::<IMM8>(_mm_setzero_ps(), k, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_reduce_ps::<IMM8>(_mm256_undefined_ps(), 0xff, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k))
     }
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_reduce_ps::<IMM8>(_mm256_setzero_ps(), k, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_reduce_ps::<IMM8>(_mm512_undefined_ps(), 0xffff, a)
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
 /// copied from src to dst if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreduceps_512(
             a.as_f32x16(),
             IMM8,
             src.as_f32x16(),
             k,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
 /// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
 /// zeroed out if the corresponding mask bit is not set).
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_reduce_ps::<IMM8>(_mm512_setzero_ps(), k, a)
 }

 /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
 /// the upper element from a to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_undefined_pd(), 0xff, a, b)
 }

 /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
 /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m128d,
     b: __m128d,
 ) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         static_assert_sae!(SAE);
         transmute(vreducesd(
             a.as_f64x2(),
             b.as_f64x2(),
             src.as_f64x2(),
             k,
             IMM8,
             SAE,
         ))
     }
 }

 /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
 /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m128d,
     b: __m128d,
 ) -> __m128d {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
 }

 /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using, and
 /// copy the upper element from a.
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_sd::<IMM8>(_mm_undefined_pd(), 0xff, a, b)
 }

 /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
 /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_reduce_sd<const IMM8: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m128d,
     b: __m128d,
 ) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreducesd(
             a.as_f64x2(),
             b.as_f64x2(),
             src.as_f64x2(),
             k,
             IMM8,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
 /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
 }

 /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
 /// the upper element from a.
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_undefined_ps(), 0xff, a, b)
 }

 /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
 /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
     src: __m128,
     k: __mmask8,
     a: __m128,
     b: __m128,
 ) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         static_assert_sae!(SAE);
         transmute(vreducess(
             a.as_f32x4(),
             b.as_f32x4(),
             src.as_f32x4(),
             k,
             IMM8,
             SAE,
         ))
     }
 }

 /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
 /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m128,
     b: __m128,
 ) -> __m128 {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
     _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
 }

 /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
 /// the upper element from a.
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_ss::<IMM8>(_mm_undefined_ps(), 0xff, a, b)
 }

 /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
 /// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_reduce_ss<const IMM8: i32>(
     src: __m128,
     k: __mmask8,
     a: __m128,
     b: __m128,
 ) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vreducess(
             a.as_f32x4(),
             b.as_f32x4(),
             src.as_f32x4(),
             k,
             IMM8,
             _MM_FROUND_CUR_DIRECTION,
         ))
     }
 }

 /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
 /// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
 /// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
 /// to the upper element of dst.
 /// Rounding is done according to the imm8 parameter, which can be one of:
 ///
 /// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
 /// * [`_MM_FROUND_TO_NEG_INF`] : round down
 /// * [`_MM_FROUND_TO_POS_INF`] : round up
 /// * [`_MM_FROUND_TO_ZERO`] : truncate
 /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
 }

 // FP-Class

 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_fpclass_pd_mask::<IMM8>(0xff, a)
 }

 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1))
     }
 }

 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_fpclass_pd_mask::<IMM8>(0xff, a)
 }

 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1))
     }
 }

 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_fpclass_pd_mask::<IMM8>(0xff, a)
 }

 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1))
     }
 }

 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_fpclass_ps_mask::<IMM8>(0xff, a)
 }

 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1))
     }
 }

 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_fpclass_ps_mask::<IMM8>(0xff, a)
 }

 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508)
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1))
     }
 }

 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_fpclass_ps_mask::<IMM8>(0xffff, a)
 }

 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1))
     }
 }

 /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_fpclass_sd_mask::<IMM8>(0xff, a)
 }

 /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         vfpclasssd(a.as_f64x2(), IMM8, k1)
     }
 }

 /// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_fpclass_ss_mask::<IMM8>(0xff, a)
 }

 /// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
 /// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
 /// corresponding mask bit is not set).
 /// imm can be a combination of:
 ///
 ///     - 0x01 // QNaN
 ///     - 0x02 // Positive Zero
 ///     - 0x04 // Negative Zero
 ///     - 0x08 // Positive Infinity
 ///     - 0x10 // Negative Infinity
 ///     - 0x20 // Denormal
 ///     - 0x40 // Negative
 ///     - 0x80 // SNaN
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         vfpclassss(a.as_f32x4(), IMM8, k1)
     }
 }

 #[allow(improper_ctypes)]
 unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
     fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
     #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
     fn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4;
     #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"]
     fn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8;

     #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"]
     fn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4;
     #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"]
     fn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4;
     #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"]
     fn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8;

     #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"]
     fn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2;
     #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"]
     fn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4;
     #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"]
     fn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8;

     #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"]
     fn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4;
     #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"]
     fn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4;
     #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"]
     fn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8;

     #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"]
     fn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
     #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"]
     fn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
     #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"]
     fn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;

     #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"]
     fn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
     #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"]
     fn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
     #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"]
     fn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;

     #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"]
     fn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
     #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"]
     fn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
     #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"]
     fn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;

     #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"]
     fn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
     #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"]
     fn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
     #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"]
     fn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;

     #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"]
     fn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
     #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"]
     fn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
     #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"]
     fn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;

     #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"]
     fn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
     #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"]
     fn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
     #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"]
     fn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;

     #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"]
     fn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
     #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"]
     fn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
     #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"]
     fn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;

     #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"]
     fn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
     #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"]
     fn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
     #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"]
     fn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;

     #[link_name = "llvm.x86.avx512.mask.range.pd.128"]
     fn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
     #[link_name = "llvm.x86.avx512.mask.range.pd.256"]
     fn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
     #[link_name = "llvm.x86.avx512.mask.range.pd.512"]
     fn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;

     #[link_name = "llvm.x86.avx512.mask.range.ps.128"]
     fn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
     #[link_name = "llvm.x86.avx512.mask.range.ps.256"]
     fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
     #[link_name = "llvm.x86.avx512.mask.range.ps.512"]
     fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
     -> f32x16;

     #[link_name = "llvm.x86.avx512.mask.range.sd"]
     fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
     #[link_name = "llvm.x86.avx512.mask.range.ss"]
     fn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;

     #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"]
     fn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
     #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"]
     fn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
     #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"]
     fn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;

     #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"]
     fn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
     #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"]
     fn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
     #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"]
     fn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16;

     #[link_name = "llvm.x86.avx512.mask.reduce.sd"]
     fn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
     #[link_name = "llvm.x86.avx512.mask.reduce.ss"]
     fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;

     #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"]
     fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
     #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"]
     fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8;
     #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"]
     fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8;

     #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"]
     fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
     #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"]
     fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8;
     #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"]
     fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16;

     #[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
     fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
     #[link_name = "llvm.x86.avx512.mask.fpclass.ss"]
     fn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
 }

 #[cfg(test)]
 mod tests {
     use super::*;

     use stdarch_test::simd_test;

     use crate::core_arch::x86::*;
     use crate::mem::transmute;

     const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
     const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };

     const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
     const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
     const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
     const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };

     const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
     const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };

     const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
     const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
     const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
     const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_and_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
         let r = _mm_mask_and_pd(src, 0b01, a, b);
         let e = _mm_set_pd(1., AND_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_and_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_and_pd(0b01, a, b);
         let e = _mm_set_pd(0.0, AND_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_and_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_mask_and_pd(src, 0b0101, a, b);
         let e = _mm256_set_pd(1., AND_64, 3., AND_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_and_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_and_pd(0b0101, a, b);
         let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_and_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_and_pd(a, b);
         let e = _mm512_set1_pd(AND_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_and_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_mask_and_pd(src, 0b01010101, a, b);
         let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_and_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_and_pd(0b01010101, a, b);
         let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_and_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_mask_and_ps(src, 0b0101, a, b);
         let e = _mm_set_ps(1., AND_32, 3., AND_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_and_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_and_ps(0b0101, a, b);
         let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_and_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_mask_and_ps(src, 0b01010101, a, b);
         let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_and_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_and_ps(0b01010101, a, b);
         let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_and_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_and_ps(a, b);
         let e = _mm512_set1_ps(AND_32);
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_and_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32,
             15., AND_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_and_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0.,
             AND_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_andnot_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
         let r = _mm_mask_andnot_pd(src, 0b01, a, b);
         let e = _mm_set_pd(1., ANDN_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_andnot_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_andnot_pd(0b01, a, b);
         let e = _mm_set_pd(0.0, ANDN_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_andnot_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_mask_andnot_pd(src, 0b0101, a, b);
         let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_andnot_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_andnot_pd(0b0101, a, b);
         let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_andnot_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_andnot_pd(a, b);
         let e = _mm512_set1_pd(ANDN_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_andnot_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b);
         let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_andnot_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_andnot_pd(0b01010101, a, b);
         let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_andnot_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_mask_andnot_ps(src, 0b0101, a, b);
         let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_andnot_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_andnot_ps(0b0101, a, b);
         let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_andnot_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b);
         let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_andnot_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_andnot_ps(0b01010101, a, b);
         let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_andnot_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_andnot_ps(a, b);
         let e = _mm512_set1_ps(ANDN_32);
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_andnot_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13.,
             ANDN_32, 15., ANDN_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_andnot_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0.,
             ANDN_32, 0., ANDN_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_or_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
         let r = _mm_mask_or_pd(src, 0b01, a, b);
         let e = _mm_set_pd(1., OR_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_or_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_or_pd(0b01, a, b);
         let e = _mm_set_pd(0.0, OR_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_or_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_mask_or_pd(src, 0b0101, a, b);
         let e = _mm256_set_pd(1., OR_64, 3., OR_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_or_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_or_pd(0b0101, a, b);
         let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_or_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_or_pd(a, b);
         let e = _mm512_set1_pd(OR_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_or_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_mask_or_pd(src, 0b01010101, a, b);
         let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_or_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_or_pd(0b01010101, a, b);
         let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_or_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_mask_or_ps(src, 0b0101, a, b);
         let e = _mm_set_ps(1., OR_32, 3., OR_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_or_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_or_ps(0b0101, a, b);
         let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_or_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_mask_or_ps(src, 0b01010101, a, b);
         let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_or_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_or_ps(0b01010101, a, b);
         let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_or_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_or_ps(a, b);
         let e = _mm512_set1_ps(OR_32);
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_or_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15.,
             OR_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_or_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_xor_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
         let r = _mm_mask_xor_pd(src, 0b01, a, b);
         let e = _mm_set_pd(1., XOR_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_xor_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_xor_pd(0b01, a, b);
         let e = _mm_set_pd(0.0, XOR_64);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_xor_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_mask_xor_pd(src, 0b0101, a, b);
         let e = _mm256_set_pd(1., XOR_64, 3., XOR_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_xor_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_xor_pd(0b0101, a, b);
         let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_xor_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_xor_pd(a, b);
         let e = _mm512_set1_pd(XOR_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_xor_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_mask_xor_pd(src, 0b01010101, a, b);
         let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_xor_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_xor_pd(0b01010101, a, b);
         let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_xor_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_mask_xor_ps(src, 0b0101, a, b);
         let e = _mm_set_ps(1., XOR_32, 3., XOR_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_xor_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_xor_ps(0b0101, a, b);
         let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_xor_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_mask_xor_ps(src, 0b01010101, a, b);
         let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_xor_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_xor_ps(0b01010101, a, b);
         let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_xor_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_xor_ps(a, b);
         let e = _mm512_set1_ps(XOR_32);
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_xor_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32,
             15., XOR_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_xor_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
         let e = _mm512_set_ps(
             0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0.,
             XOR_32,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_broadcast_f32x2(a);
         let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
         let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
         let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
         let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm512_broadcast_f32x2(a);
         let e = _mm512_set_ps(
             3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm512_set_ps(
             5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
         );
         let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a);
         let e = _mm512_set_ps(
             5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
         let e = _mm512_set_ps(
             0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_broadcast_f32x8() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_broadcast_f32x8(a);
         let e = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_broadcast_f32x8() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_ps(
             9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
         );
         let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a);
         let e = _mm512_set_ps(
             9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_broadcast_f32x8() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
         let e = _mm512_set_ps(
             0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm256_broadcast_f64x2(a);
         let e = _mm256_set_pd(1., 2., 1., 2.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm256_set_pd(3., 4., 5., 6.);
         let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
         let e = _mm256_set_pd(3., 2., 1., 6.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
         let e = _mm256_set_pd(0., 2., 1., 0.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm512_broadcast_f64x2(a);
         let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
         let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
         let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_broadcast_i32x2(a);
         let e = _mm_set_epi32(3, 4, 3, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm_set_epi32(5, 6, 7, 8);
         let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
         let e = _mm_set_epi32(5, 4, 3, 8);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_maskz_broadcast_i32x2(0b0110, a);
         let e = _mm_set_epi32(0, 4, 3, 0);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm256_broadcast_i32x2(a);
         let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
         let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
         let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
         let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm512_broadcast_i32x2(a);
         let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
         let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
         let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
         let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_broadcast_i32x8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_broadcast_i32x8(a);
         let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_broadcast_i32x8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi32(
             9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
         );
         let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a);
         let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_broadcast_i32x8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
         let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm256_broadcast_i64x2(a);
         let e = _mm256_set_epi64x(1, 2, 1, 2);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm256_set_epi64x(3, 4, 5, 6);
         let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
         let e = _mm256_set_epi64x(3, 2, 1, 6);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 1, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm512_broadcast_i64x2(a);
         let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
         let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
         let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_extractf32x8_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let r = _mm512_extractf32x8_ps::<1>(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_extractf32x8_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a);
         let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_extractf32x8_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a);
         let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_extractf64x2_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_extractf64x2_pd::<1>(a);
         let e = _mm_set_pd(1., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_extractf64x2_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
         let e = _mm_set_pd(5., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_extractf64x2_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
         let e = _mm_set_pd(0., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_extractf64x2_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_extractf64x2_pd::<2>(a);
         let e = _mm_set_pd(3., 4.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_extractf64x2_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
         let e = _mm_set_pd(9., 4.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_extractf64x2_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
         let e = _mm_set_pd(0., 4.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_extracti32x8_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_extracti32x8_epi32::<1>(a);
         let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_extracti32x8_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
         let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_extracti32x8_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
         let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_extracti64x2_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_extracti64x2_epi64::<1>(a);
         let e = _mm_set_epi64x(1, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_extracti64x2_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
         let e = _mm_set_epi64x(5, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_extracti64x2_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
         let e = _mm_set_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_extracti64x2_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_extracti64x2_epi64::<2>(a);
         let e = _mm_set_epi64x(3, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_extracti64x2_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
         let e = _mm_set_epi64x(9, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_extracti64x2_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
         let e = _mm_set_epi64x(0, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_insertf32x8() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_insertf32x8::<1>(a, b);
         let e = _mm512_set_ps(
             17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_insertf32x8() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let src = _mm512_set_ps(
             25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40.,
         );
         let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b);
         let e = _mm512_set_ps(
             25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_insertf32x8() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b);
         let e = _mm512_set_ps(
             0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_insertf64x2() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let r = _mm256_insertf64x2::<1>(a, b);
         let e = _mm256_set_pd(5., 6., 3., 4.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_insertf64x2() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let src = _mm256_set_pd(7., 8., 9., 10.);
         let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b);
         let e = _mm256_set_pd(7., 6., 3., 10.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_insertf64x2() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
         let e = _mm256_set_pd(0., 6., 3., 0.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_insertf64x2() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let r = _mm512_insertf64x2::<2>(a, b);
         let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_insertf64x2() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
         let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b);
         let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_insertf64x2() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
         let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_inserti32x8() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_inserti32x8::<1>(a, b);
         let e = _mm512_set_epi32(
             17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16,
         );
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_inserti32x8() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let src = _mm512_set_epi32(
             25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
         );
         let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b);
         let e = _mm512_set_epi32(
             25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40,
         );
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_inserti32x8() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
         let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_inserti64x2() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm256_inserti64x2::<1>(a, b);
         let e = _mm256_set_epi64x(5, 6, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_inserti64x2() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let src = _mm256_set_epi64x(7, 8, 9, 10);
         let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b);
         let e = _mm256_set_epi64x(7, 6, 3, 10);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_inserti64x2() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
         let e = _mm256_set_epi64x(0, 6, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_inserti64x2() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let r = _mm512_inserti64x2::<2>(a, b);
         let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_inserti64x2() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
         let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b);
         let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_inserti64x2() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
         let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtepi64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepi64_pd(a);
         let e = _mm_set_pd(1., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtepi64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_mask_cvtepi64_pd(b, 0b01, a);
         let e = _mm_set_pd(3., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtepi64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepi64_pd(0b01, a);
         let e = _mm_set_pd(0., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtepi64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepi64_pd(a);
         let e = _mm256_set_pd(1., 2., 3., 4.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtepi64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_pd(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a);
         let e = _mm256_set_pd(5., 2., 3., 8.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtepi64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepi64_pd(0b0110, a);
         let e = _mm256_set_pd(0., 2., 3., 0.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepi64_pd(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a);
         let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepi64_pd(0b01101001, a);
         let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtepi64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepi64_ps(a);
         let e = _mm_set_ps(0., 0., 1., 2.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtepi64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_ps(3., 4., 5., 6.);
         let r = _mm_mask_cvtepi64_ps(b, 0b01, a);
         let e = _mm_set_ps(0., 0., 5., 2.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtepi64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepi64_ps(0b01, a);
         let e = _mm_set_ps(0., 0., 0., 2.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtepi64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepi64_ps(a);
         let e = _mm_set_ps(1., 2., 3., 4.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtepi64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_ps(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a);
         let e = _mm_set_ps(5., 2., 3., 8.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtepi64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepi64_ps(0b0110, a);
         let e = _mm_set_ps(0., 2., 3., 0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepi64_ps(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a);
         let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepi64_ps(0b01101001, a);
         let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtepu64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepu64_pd(a);
         let e = _mm_set_pd(1., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtepu64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_mask_cvtepu64_pd(b, 0b01, a);
         let e = _mm_set_pd(3., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtepu64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepu64_pd(0b01, a);
         let e = _mm_set_pd(0., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtepu64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepu64_pd(a);
         let e = _mm256_set_pd(1., 2., 3., 4.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtepu64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_pd(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a);
         let e = _mm256_set_pd(5., 2., 3., 8.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtepu64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepu64_pd(0b0110, a);
         let e = _mm256_set_pd(0., 2., 3., 0.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepu64_pd(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a);
         let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepu64_pd(0b01101001, a);
         let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtepu64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepu64_ps(a);
         let e = _mm_set_ps(0., 0., 1., 2.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtepu64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_ps(3., 4., 5., 6.);
         let r = _mm_mask_cvtepu64_ps(b, 0b01, a);
         let e = _mm_set_ps(0., 0., 5., 2.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtepu64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepu64_ps(0b01, a);
         let e = _mm_set_ps(0., 0., 0., 2.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtepu64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepu64_ps(a);
         let e = _mm_set_ps(1., 2., 3., 4.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtepu64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_ps(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a);
         let e = _mm_set_ps(5., 2., 3., 8.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtepu64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepu64_ps(0b0110, a);
         let e = _mm_set_ps(0., 2., 3., 0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepu64_ps(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a);
         let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepu64_ps(0b01101001, a);
         let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvtpd_epi64(a);
         let e = _mm_set_epi64x(1, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvtpd_epi64(b, 0b01, a);
         let e = _mm_set_epi64x(3, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvtpd_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvtpd_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtpd_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtpd_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtpd_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvtps_epi64(a);
         let e = _mm_set_epi64x(3, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvtps_epi64(b, 0b01, a);
         let e = _mm_set_epi64x(5, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvtps_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvtps_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtps_epi64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtps_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtps_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtps_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvtpd_epu64(a);
         let e = _mm_set_epi64x(1, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvtpd_epu64(b, 0b01, a);
         let e = _mm_set_epi64x(3, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvtpd_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvtpd_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtpd_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtpd_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtpd_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             b, 0b01101001, a,
         );
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
         );
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvtps_epu64(a);
         let e = _mm_set_epi64x(3, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvtps_epu64(b, 0b01, a);
         let e = _mm_set_epi64x(5, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvtps_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvtps_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtps_epu64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtps_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtps_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtps_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvttpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvttpd_epi64(a);
         let e = _mm_set_epi64x(1, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvttpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvttpd_epi64(b, 0b01, a);
         let e = _mm_set_epi64x(3, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvttpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvttpd_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvttpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvttpd_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvttpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvttpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttpd_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvttpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttpd_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvttpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvttpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttpd_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvttps_epi64(a);
         let e = _mm_set_epi64x(3, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvttps_epi64(b, 0b01, a);
         let e = _mm_set_epi64x(5, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvttps_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvttps_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttps_epi64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttps_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvttps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttps_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvttps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvttps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttps_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvttpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvttpd_epu64(a);
         let e = _mm_set_epi64x(1, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvttpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvttpd_epu64(b, 0b01, a);
         let e = _mm_set_epi64x(3, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvttpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvttpd_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvttpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvttpd_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvttpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvttpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttpd_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvttpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttpd_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvttpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvttpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttpd_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvtt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvtt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvtt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvttps_epu64(a);
         let e = _mm_set_epi64x(3, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvttps_epu64(b, 0b01, a);
         let e = _mm_set_epi64x(5, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvttps_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvttps_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttps_epu64(b, 0b0110, a);
         let e = _mm256_set_epi64x(5, 2, 3, 8);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttps_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_cvttps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttps_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_cvttps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a);
         let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_cvttps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttps_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mullo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mullo_epi64(a, b);
         let e = _mm_set_epi64x(3, 8);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_mullo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(3, 4);
         let c = _mm_set_epi64x(5, 6);
         let r = _mm_mask_mullo_epi64(c, 0b01, a, b);
         let e = _mm_set_epi64x(5, 8);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_mullo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_maskz_mullo_epi64(0b01, a, b);
         let e = _mm_set_epi64x(0, 8);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mullo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mullo_epi64(a, b);
         let e = _mm256_set_epi64x(5, 12, 21, 32);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_mullo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let c = _mm256_set_epi64x(9, 10, 11, 12);
         let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b);
         let e = _mm256_set_epi64x(9, 12, 21, 12);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_mullo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_maskz_mullo_epi64(0b0110, a, b);
         let e = _mm256_set_epi64x(0, 12, 21, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mullo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mullo_epi64(a, b);
         let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_mullo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b);
         let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_mullo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_mullo_epi64(0b01101001, a, b);
         let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_cvtmask8_u32() {
         let a: __mmask8 = 0b01101001;
         let r = _cvtmask8_u32(a);
         let e: u32 = 0b01101001;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_cvtu32_mask8() {
         let a: u32 = 0b01101001;
         let r = _cvtu32_mask8(a);
         let e: __mmask8 = 0b01101001;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kadd_mask16() {
         let a: __mmask16 = 27549;
         let b: __mmask16 = 23434;
         let r = _kadd_mask16(a, b);
         let e: __mmask16 = 50983;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kadd_mask8() {
         let a: __mmask8 = 98;
         let b: __mmask8 = 117;
         let r = _kadd_mask8(a, b);
         let e: __mmask8 = 215;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kand_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kand_mask8(a, b);
         let e: __mmask8 = 0b00100001;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kandn_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kandn_mask8(a, b);
         let e: __mmask8 = 0b10010010;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_knot_mask8() {
         let a: __mmask8 = 0b01101001;
         let r = _knot_mask8(a);
         let e: __mmask8 = 0b10010110;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kor_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kor_mask8(a, b);
         let e: __mmask8 = 0b11111011;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kxnor_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kxnor_mask8(a, b);
         let e: __mmask8 = 0b00100101;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kxor_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kxor_mask8(a, b);
         let e: __mmask8 = 0b11011010;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kortest_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110110;
         let mut all_ones: u8 = 0;
         let r = _kortest_mask8_u8(a, b, &mut all_ones);
         assert_eq!(r, 0);
         assert_eq!(all_ones, 1);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kortestc_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110110;
         let r = _kortestc_mask8_u8(a, b);
         assert_eq!(r, 1);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kortestz_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110110;
         let r = _kortestz_mask8_u8(a, b);
         assert_eq!(r, 0);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kshiftli_mask8() {
         let a: __mmask8 = 0b01101001;
         let r = _kshiftli_mask8::<3>(a);
         let e: __mmask8 = 0b01001000;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_kshiftri_mask8() {
         let a: __mmask8 = 0b01101001;
         let r = _kshiftri_mask8::<3>(a);
         let e: __mmask8 = 0b00001101;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_ktest_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10010110;
         let mut and_not: u8 = 0;
         let r = _ktest_mask8_u8(a, b, &mut and_not);
         assert_eq!(r, 1);
         assert_eq!(and_not, 0);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_ktestc_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10010110;
         let r = _ktestc_mask8_u8(a, b);
         assert_eq!(r, 0);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_ktestz_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10010110;
         let r = _ktestz_mask8_u8(a, b);
         assert_eq!(r, 1);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_ktest_mask16_u8() {
         let a: __mmask16 = 0b0110100100111100;
         let b: __mmask16 = 0b1001011011000011;
         let mut and_not: u8 = 0;
         let r = _ktest_mask16_u8(a, b, &mut and_not);
         assert_eq!(r, 1);
         assert_eq!(and_not, 0);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_ktestc_mask16_u8() {
         let a: __mmask16 = 0b0110100100111100;
         let b: __mmask16 = 0b1001011011000011;
         let r = _ktestc_mask16_u8(a, b);
         assert_eq!(r, 0);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_ktestz_mask16_u8() {
         let a: __mmask16 = 0b0110100100111100;
         let b: __mmask16 = 0b1001011011000011;
         let r = _ktestz_mask16_u8(a, b);
         assert_eq!(r, 1);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_load_mask8() {
         let a: __mmask8 = 0b01101001;
         let r = _load_mask8(&a);
         let e: __mmask8 = 0b01101001;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_store_mask8() {
         let a: __mmask8 = 0b01101001;
         let mut r = 0;
         _store_mask8(&mut r, a);
         let e: __mmask8 = 0b01101001;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_movepi32_mask() {
         let a = _mm_set_epi32(0, -2, -3, 4);
         let r = _mm_movepi32_mask(a);
         let e = 0b0110;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_movepi32_mask() {
         let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8);
         let r = _mm256_movepi32_mask(a);
         let e = 0b01101001;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_movepi32_mask() {
         let a = _mm512_set_epi32(
             0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16,
         );
         let r = _mm512_movepi32_mask(a);
         let e = 0b0110100100111100;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_movepi64_mask() {
         let a = _mm_set_epi64x(0, -2);
         let r = _mm_movepi64_mask(a);
         let e = 0b01;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_movepi64_mask() {
         let a = _mm256_set_epi64x(0, -2, -3, 4);
         let r = _mm256_movepi64_mask(a);
         let e = 0b0110;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_movepi64_mask() {
         let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8);
         let r = _mm512_movepi64_mask(a);
         let e = 0b01101001;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_movm_epi32() {
         let a = 0b0110;
         let r = _mm_movm_epi32(a);
         let e = _mm_set_epi32(0, -1, -1, 0);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_movm_epi32() {
         let a = 0b01101001;
         let r = _mm256_movm_epi32(a);
         let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_movm_epi32() {
         let a = 0b0110100100111100;
         let r = _mm512_movm_epi32(a);
         let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_movm_epi64() {
         let a = 0b01;
         let r = _mm_movm_epi64(a);
         let e = _mm_set_epi64x(0, -1);
         assert_eq_m128i(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_movm_epi64() {
         let a = 0b0110;
         let r = _mm256_movm_epi64(a);
         let e = _mm256_set_epi64x(0, -1, -1, 0);
         assert_eq_m256i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_movm_epi64() {
         let a = 0b01101001;
         let r = _mm512_movm_epi64(a);
         let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1);
         assert_eq_m512i(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_range_round_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
         let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_range_round_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b);
         let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_range_round_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b);
         let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_range_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(2., 1.);
         let r = _mm_range_pd::<0b0101>(a, b);
         let e = _mm_set_pd(2., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_range_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(2., 1.);
         let c = _mm_set_pd(3., 4.);
         let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b);
         let e = _mm_set_pd(3., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_range_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(2., 1.);
         let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b);
         let e = _mm_set_pd(0., 2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_range_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(2., 1., 4., 3.);
         let r = _mm256_range_pd::<0b0101>(a, b);
         let e = _mm256_set_pd(2., 2., 4., 4.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_range_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(2., 1., 4., 3.);
         let c = _mm256_set_pd(5., 6., 7., 8.);
         let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b);
         let e = _mm256_set_pd(5., 2., 4., 8.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_range_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(2., 1., 4., 3.);
         let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b);
         let e = _mm256_set_pd(0., 2., 4., 0.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_range_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_range_pd::<0b0101>(a, b);
         let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_range_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b);
         let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_range_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b);
         let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_range_round_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm512_set_ps(
             2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
         );
         let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b);
         let e = _mm512_set_ps(
             2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_range_round_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm512_set_ps(
             2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
         );
         let c = _mm512_set_ps(
             17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
         );
         let r =
             _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b);
         let e = _mm512_set_ps(
             17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_range_round_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm512_set_ps(
             2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
         );
         let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b);
         let e = _mm512_set_ps(
             0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_range_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(2., 1., 4., 3.);
         let r = _mm_range_ps::<0b0101>(a, b);
         let e = _mm_set_ps(2., 2., 4., 4.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_range_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(2., 1., 4., 3.);
         let c = _mm_set_ps(5., 6., 7., 8.);
         let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b);
         let e = _mm_set_ps(5., 2., 4., 8.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_range_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(2., 1., 4., 3.);
         let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b);
         let e = _mm_set_ps(0., 2., 4., 0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_range_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm256_range_ps::<0b0101>(a, b);
         let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_range_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
         let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b);
         let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_range_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b);
         let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_range_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm512_set_ps(
             2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
         );
         let r = _mm512_range_ps::<0b0101>(a, b);
         let e = _mm512_set_ps(
             2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_range_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm512_set_ps(
             2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
         );
         let c = _mm512_set_ps(
             17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
         );
         let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b);
         let e = _mm512_set_ps(
             17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_range_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         let b = _mm512_set_ps(
             2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
         );
         let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b);
         let e = _mm512_set_ps(
             0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
         let e = _mm_set_sd(2.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let c = _mm_set_sd(3.);
         let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
         let e = _mm_set_sd(3.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
         let e = _mm_set_sd(0.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let c = _mm_set_sd(3.);
         let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b);
         let e = _mm_set_sd(3.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b);
         let e = _mm_set_sd(0.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b);
         let e = _mm_set_ss(2.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let c = _mm_set_ss(3.);
         let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
         let e = _mm_set_ss(3.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
         let e = _mm_set_ss(0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let c = _mm_set_ss(3.);
         let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b);
         let e = _mm_set_ss(3.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b);
         let e = _mm_set_ss(0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_reduce_round_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_reduce_round_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             src, 0b01101001, a,
         );
         let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_reduce_round_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             0b01101001, a,
         );
         let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_reduce_pd() {
         let a = _mm_set_pd(0.25, 0.50);
         let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm_set_pd(0.25, 0.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_reduce_pd() {
         let a = _mm_set_pd(0.25, 0.50);
         let src = _mm_set_pd(3., 4.);
         let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a);
         let e = _mm_set_pd(3., 0.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_reduce_pd() {
         let a = _mm_set_pd(0.25, 0.50);
         let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a);
         let e = _mm_set_pd(0., 0.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_reduce_pd() {
         let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
         let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm256_set_pd(0.25, 0., 0.25, 0.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_reduce_pd() {
         let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
         let src = _mm256_set_pd(3., 4., 5., 6.);
         let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
         let e = _mm256_set_pd(3., 0., 0.25, 6.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_reduce_pd() {
         let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
         let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
         let e = _mm256_set_pd(0., 0., 0.25, 0.);
         assert_eq_m256d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_reduce_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_reduce_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
         let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_reduce_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
         let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
         assert_eq_m512d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_reduce_round_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
         );
         let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_ps(
             0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_reduce_round_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
         );
         let src = _mm512_set_ps(
             5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
         );
         let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             src,
             0b0110100100111100,
             a,
         );
         let e = _mm512_set_ps(
             5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_reduce_round_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
         );
         let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             0b0110100100111100,
             a,
         );
         let e = _mm512_set_ps(
             0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_reduce_ps() {
         let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
         let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm_set_ps(0.25, 0., 0.25, 0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_reduce_ps() {
         let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
         let src = _mm_set_ps(2., 3., 4., 5.);
         let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
         let e = _mm_set_ps(2., 0., 0.25, 5.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_maskz_reduce_ps() {
         let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
         let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
         let e = _mm_set_ps(0., 0., 0.25, 0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_reduce_ps() {
         let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_reduce_ps() {
         let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
         let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_maskz_reduce_ps() {
         let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
         let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
         assert_eq_m256(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_reduce_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
         );
         let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm512_set_ps(
             0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_reduce_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
         );
         let src = _mm512_set_ps(
             5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
         );
         let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a);
         let e = _mm512_set_ps(
             5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_maskz_reduce_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
         );
         let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a);
         let e = _mm512_set_ps(
             0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
         );
         assert_eq_m512(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
         let e = _mm_set_pd(1., 0.25);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let c = _mm_set_pd(3., 4.);
         let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             c, 0b0, a, b,
         );
         let e = _mm_set_pd(1., 4.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r =
             _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
         let e = _mm_set_pd(1., 0.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
         let e = _mm_set_pd(1., 0.25);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let c = _mm_set_pd(3., 4.);
         let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
         let e = _mm_set_pd(1., 4.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
         let e = _mm_set_pd(1., 0.);
         assert_eq_m128d(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
         let e = _mm_set_ps(1., 2., 3., 0.25);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let c = _mm_set_ps(5., 6., 7., 8.);
         let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             c, 0b0, a, b,
         );
         let e = _mm_set_ps(1., 2., 3., 8.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r =
             _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
         let e = _mm_set_ps(1., 2., 3., 0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
         let e = _mm_set_ps(1., 2., 3., 0.25);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let c = _mm_set_ps(5., 6., 7., 8.);
         let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
         let e = _mm_set_ps(1., 2., 3., 8.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
         let e = _mm_set_ps(1., 2., 3., 0.);
         assert_eq_m128(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_fpclass_pd_mask() {
         let a = _mm_set_pd(1., f64::INFINITY);
         let r = _mm_fpclass_pd_mask::<0x18>(a);
         let e = 0b01;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_fpclass_pd_mask() {
         let a = _mm_set_pd(1., f64::INFINITY);
         let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a);
         let e = 0b00;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_fpclass_pd_mask() {
         let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
         let r = _mm256_fpclass_pd_mask::<0x18>(a);
         let e = 0b0110;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_fpclass_pd_mask() {
         let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
         let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a);
         let e = 0b0010;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_fpclass_pd_mask() {
         let a = _mm512_set_pd(
             1.,
             f64::INFINITY,
             f64::NEG_INFINITY,
             0.0,
             -0.0,
             -2.0,
             f64::NAN,
             1.0e-308,
         );
         let r = _mm512_fpclass_pd_mask::<0x18>(a);
         let e = 0b01100000;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_fpclass_pd_mask() {
         let a = _mm512_set_pd(
             1.,
             f64::INFINITY,
             f64::NEG_INFINITY,
             0.0,
             -0.0,
             -2.0,
             f64::NAN,
             1.0e-308,
         );
         let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a);
         let e = 0b00100000;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_fpclass_ps_mask() {
         let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
         let r = _mm_fpclass_ps_mask::<0x18>(a);
         let e = 0b0110;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm_mask_fpclass_ps_mask() {
         let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
         let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a);
         let e = 0b0010;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_fpclass_ps_mask() {
         let a = _mm256_set_ps(
             1.,
             f32::INFINITY,
             f32::NEG_INFINITY,
             0.0,
             -0.0,
             -2.0,
             f32::NAN,
             1.0e-38,
         );
         let r = _mm256_fpclass_ps_mask::<0x18>(a);
         let e = 0b01100000;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq,avx512vl")]
     unsafe fn test_mm256_mask_fpclass_ps_mask() {
         let a = _mm256_set_ps(
             1.,
             f32::INFINITY,
             f32::NEG_INFINITY,
             0.0,
             -0.0,
             -2.0,
             f32::NAN,
             1.0e-38,
         );
         let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a);
         let e = 0b00100000;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_fpclass_ps_mask() {
         let a = _mm512_set_ps(
             1.,
             f32::INFINITY,
             f32::NEG_INFINITY,
             0.0,
             -0.0,
             -2.0,
             f32::NAN,
             1.0e-38,
             -1.,
             f32::NEG_INFINITY,
             f32::INFINITY,
             -0.0,
             0.0,
             2.0,
             f32::NAN,
             -1.0e-38,
         );
         let r = _mm512_fpclass_ps_mask::<0x18>(a);
         let e = 0b0110000001100000;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm512_mask_fpclass_ps_mask() {
         let a = _mm512_set_ps(
             1.,
             f32::INFINITY,
             f32::NEG_INFINITY,
             0.0,
             -0.0,
             -2.0,
             f32::NAN,
             1.0e-38,
             -1.,
             f32::NEG_INFINITY,
             f32::INFINITY,
             -0.0,
             0.0,
             2.0,
             f32::NAN,
             -1.0e-38,
         );
         let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a);
         let e = 0b0010000000100000;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_fpclass_sd_mask() {
         let a = _mm_set_pd(1., f64::INFINITY);
         let r = _mm_fpclass_sd_mask::<0x18>(a);
         let e = 0b1;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_fpclass_sd_mask() {
         let a = _mm_set_sd(f64::INFINITY);
         let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
         let e = 0b0;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_fpclass_ss_mask() {
         let a = _mm_set_ss(f32::INFINITY);
         let r = _mm_fpclass_ss_mask::<0x18>(a);
         let e = 0b1;
         assert_eq!(r, e);
     }

     #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_fpclass_ss_mask() {
         let a = _mm_set_ss(f32::INFINITY);
         let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
         let e = 0b0;
         assert_eq!(r, e);
     }
 }