coresimd/x86/mmx.rs - rust-lang/stdarch - Git at Google

 //! `i586` MMX instruction set.
 //!
 //! The intrinsics here roughly correspond to those in the `mmintrin.h` C
 //! header.
 //!
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf

 use coresimd::simd::*;
 use coresimd::x86::*;
 use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;

 /// Constructs a 64-bit integer vector initialized to zero.
 #[inline]
 #[target_feature(enable = "mmx")]
 // FIXME: this produces a movl instead of xorps on x86
 // FIXME: this produces a xor intrinsic instead of xorps on x86_64
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
 pub unsafe fn _mm_setzero_si64() -> __m64 {
     mem::transmute(0_i64)
 }

 /// Add packed 8-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddb))]
 pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
     paddb(a, b)
 }

 /// Add packed 8-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddb))]
 pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 {
     _mm_add_pi8(a, b)
 }

 /// Add packed 16-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddw))]
 pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
     paddw(a, b)
 }

 /// Add packed 16-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddw))]
 pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 {
     _mm_add_pi16(a, b)
 }

 /// Add packed 32-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddd))]
 pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
     paddd(a, b)
 }

 /// Add packed 32-bit integers in `a` and `b`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddd))]
 pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 {
     _mm_add_pi32(a, b)
 }

 /// Add packed 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsb))]
 pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
     paddsb(a, b)
 }

 /// Add packed 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsb))]
 pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 {
     _mm_adds_pi8(a, b)
 }

 /// Add packed 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsw))]
 pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
     paddsw(a, b)
 }

 /// Add packed 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsw))]
 pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 {
     _mm_adds_pi16(a, b)
 }

 /// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusb))]
 pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
     paddusb(a, b)
 }

 /// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusb))]
 pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 {
     _mm_adds_pu8(a, b)
 }

 /// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusw))]
 pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
     paddusw(a, b)
 }

 /// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusw))]
 pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 {
     _mm_adds_pu16(a, b)
 }

 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubb))]
 pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 {
     psubb(a, b)
 }

 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubb))]
 pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 {
     _mm_sub_pi8(a, b)
 }

 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubw))]
 pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 {
     psubw(a, b)
 }

 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubw))]
 pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 {
     _mm_sub_pi16(a, b)
 }

 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubd))]
 pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 {
     psubd(a, b)
 }

 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubd))]
 pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 {
     _mm_sub_pi32(a, b)
 }

 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
 /// using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubsb))]
 pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 {
     psubsb(a, b)
 }

 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
 /// using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubsb))]
 pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 {
     _mm_subs_pi8(a, b)
 }

 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
 /// using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubsw))]
 pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 {
     psubsw(a, b)
 }

 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
 /// using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubsw))]
 pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 {
     _mm_subs_pi16(a, b)
 }

 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
 /// integers in `a` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubusb))]
 pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 {
     psubusb(a, b)
 }

 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
 /// integers in `a` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubusb))]
 pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 {
     _mm_subs_pu8(a, b)
 }

 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
 /// 16-bit integers in `a` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubusw))]
 pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 {
     psubusw(a, b)
 }

 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
 /// 16-bit integers in `a` using saturation.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(psubusw))]
 pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 {
     _mm_subs_pu16(a, b)
 }

 /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation.
 ///
 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values
 /// less than 0x80 are saturated to 0x80.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(packsswb))]
 pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
     packsswb(a, b)
 }

 /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation.
 ///
 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values
 /// less than 0x80 are saturated to 0x80.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(packssdw))]
 pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
     packssdw(a, b)
 }

 /// Compares whether each element of `a` is greater than the corresponding
 /// element of `b` returning `0` for `false` and `-1` for `true`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(pcmpgtb))]
 pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
     pcmpgtb(a, b)
 }

 /// Compares whether each element of `a` is greater than the corresponding
 /// element of `b` returning `0` for `false` and `-1` for `true`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(pcmpgtw))]
 pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
     pcmpgtw(a, b)
 }

 /// Compares whether each element of `a` is greater than the corresponding
 /// element of `b` returning `0` for `false` and `-1` for `true`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(pcmpgtd))]
 pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
     pcmpgtd(a, b)
 }

 /// Unpacks the upper two elements from two `i16x4` vectors and interleaves
 /// them into the result: `[a.2, b.2, a.3, b.3]`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
 pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
     punpckhwd(a, b)
 }

 /// Unpacks the upper four elements from two `i8x8` vectors and interleaves
 /// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(punpckhbw))]
 pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
     punpckhbw(a, b)
 }

 /// Unpacks the lower four elements from two `i8x8` vectors and interleaves
 /// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(punpcklbw))]
 pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
     punpcklbw(a, b)
 }

 /// Unpacks the lower two elements from two `i16x4` vectors and interleaves
 /// them into the result: `[a.0 b.0 a.1 b.1]`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(punpcklwd))]
 pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
     punpcklwd(a, b)
 }

 /// Unpacks the upper element from two `i32x2` vectors and interleaves them
 /// into the result: `[a.1, b.1]`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(punpckhdq))]
 pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
     punpckhdq(a, b)
 }

 /// Unpacks the lower element from two `i32x2` vectors and interleaves them
 /// into the result: `[a.0, b.0]`.
 #[inline]
 #[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(punpckldq))]
 pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
     punpckldq(a, b)
 }

 /// Set packed 16-bit integers in dst with the supplied values.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 {
     _mm_setr_pi16(e0, e1, e2, e3)
 }

 /// Set packed 32-bit integers in dst with the supplied values.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
     _mm_setr_pi32(e0, e1)
 }

 /// Set packed 8-bit integers in dst with the supplied values.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set_pi8(
     e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8,
 ) -> __m64 {
     _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
 }

 /// Broadcast 16-bit integer a to all all elements of dst.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 {
     _mm_setr_pi16(a, a, a, a)
 }

 /// Broadcast 32-bit integer a to all all elements of dst.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 {
     _mm_setr_pi32(a, a)
 }

 /// Broadcast 8-bit integer a to all all elements of dst.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
     _mm_setr_pi8(a, a, a, a, a, a, a, a)
 }

 /// Set packed 16-bit integers in dst with the supplied values in reverse
 /// order.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
     mem::transmute(i16x4::new(e0, e1, e2, e3))
 }

 /// Set packed 32-bit integers in dst with the supplied values in reverse
 /// order.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
     mem::transmute(i32x2::new(e0, e1))
 }

 /// Set packed 8-bit integers in dst with the supplied values in reverse order.
 #[inline]
 #[target_feature(enable = "mmx")]
 pub unsafe fn _mm_setr_pi8(
     e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8,
 ) -> __m64 {
     mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
 }

 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.mmx.padd.b"]
     fn paddb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.padd.w"]
     fn paddw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.padd.d"]
     fn paddd(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.padds.b"]
     fn paddsb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.padds.w"]
     fn paddsw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.paddus.b"]
     fn paddusb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.paddus.w"]
     fn paddusw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.psub.b"]
     fn psubb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.psub.w"]
     fn psubw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.psub.d"]
     fn psubd(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.psubs.b"]
     fn psubsb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.psubs.w"]
     fn psubsw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.psubus.b"]
     fn psubusb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.psubus.w"]
     fn psubusw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.packsswb"]
     fn packsswb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.packssdw"]
     fn packssdw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.pcmpgt.b"]
     fn pcmpgtb(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.pcmpgt.w"]
     fn pcmpgtw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.pcmpgt.d"]
     fn pcmpgtd(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpckhwd"]
     fn punpckhwd(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpcklwd"]
     fn punpcklwd(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpckhbw"]
     fn punpckhbw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpcklbw"]
     fn punpcklbw(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpckhdq"]
     fn punpckhdq(a: __m64, b: __m64) -> __m64;
     #[link_name = "llvm.x86.mmx.punpckldq"]
     fn punpckldq(a: __m64, b: __m64) -> __m64;
 }

 #[cfg(test)]
 mod tests {
     use coresimd::x86::*;
     use stdsimd_test::simd_test;

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_setzero_si64() {
         let r: __m64 = ::std::mem::transmute(0_i64);
         assert_eq_m64(r, _mm_setzero_si64());
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_add_pi8() {
         let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0);
         let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1);
         let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1);
         assert_eq_m64(e, _mm_add_pi8(a, b));
         assert_eq_m64(e, _m_paddb(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_add_pi16() {
         let a = _mm_setr_pi16(-1, -1, 1, 1);
         let b = _mm_setr_pi16(
             i16::min_value() + 1,
             30001,
             -30001,
             i16::max_value() - 1,
         );
         let e =
             _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value());
         assert_eq_m64(e, _mm_add_pi16(a, b));
         assert_eq_m64(e, _m_paddw(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_add_pi32() {
         let a = _mm_setr_pi32(1, -1);
         let b = _mm_setr_pi32(i32::max_value() - 1, i32::min_value() + 1);
         let e = _mm_setr_pi32(i32::max_value(), i32::min_value());
         assert_eq_m64(e, _mm_add_pi32(a, b));
         assert_eq_m64(e, _m_paddd(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_adds_pi8() {
         let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0);
         let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1);
         let e =
             _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1);
         assert_eq_m64(e, _mm_adds_pi8(a, b));
         assert_eq_m64(e, _m_paddsb(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_adds_pi16() {
         let a = _mm_setr_pi16(-32000, 32000, 4, 0);
         let b = _mm_setr_pi16(-32000, 32000, -5, 1);
         let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), -1, 1);
         assert_eq_m64(e, _mm_adds_pi16(a, b));
         assert_eq_m64(e, _m_paddsw(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_adds_pu8() {
         let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8);
         let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8);
         let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::max_value() as i8);
         assert_eq_m64(e, _mm_adds_pu8(a, b));
         assert_eq_m64(e, _m_paddusb(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_adds_pu16() {
         let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16);
         let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16);
         let e = _mm_setr_pi16(0, 11, 22, u16::max_value() as i16);
         assert_eq_m64(e, _mm_adds_pu16(a, b));
         assert_eq_m64(e, _m_paddusw(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_sub_pi8() {
         let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0);
         let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127);
         let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127);
         assert_eq_m64(e, _mm_sub_pi8(a, b));
         assert_eq_m64(e, _m_psubb(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_sub_pi16() {
         let a = _mm_setr_pi16(-20000, -20000, 20000, 30000);
         let b = _mm_setr_pi16(-10000, 10000, -10000, 30000);
         let e = _mm_setr_pi16(-10000, -30000, 30000, 0);
         assert_eq_m64(e, _mm_sub_pi16(a, b));
         assert_eq_m64(e, _m_psubw(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_sub_pi32() {
         let a = _mm_setr_pi32(500_000, -500_000);
         let b = _mm_setr_pi32(500_000, 500_000);
         let e = _mm_setr_pi32(0, -1_000_000);
         assert_eq_m64(e, _mm_sub_pi32(a, b));
         assert_eq_m64(e, _m_psubd(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_subs_pi8() {
         let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5);
         let b = _mm_setr_pi8(100, -100, i8::min_value(), 127, -1, 1, 3, -3);
         let e = _mm_setr_pi8(
             i8::min_value(),
             i8::max_value(),
             i8::max_value(),
             -127,
             1,
             -1,
             -8,
             8,
         );
         assert_eq_m64(e, _mm_subs_pi8(a, b));
         assert_eq_m64(e, _m_psubsb(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_subs_pi16() {
         let a = _mm_setr_pi16(-20000, 20000, 0, 0);
         let b = _mm_setr_pi16(20000, -20000, -1, 1);
         let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), 1, -1);
         assert_eq_m64(e, _mm_subs_pi16(a, b));
         assert_eq_m64(e, _m_psubsw(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_subs_pu8() {
         let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80);
         let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0);
         let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80);
         assert_eq_m64(e, _mm_subs_pu8(a, b));
         assert_eq_m64(e, _m_psubusb(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_subs_pu16() {
         let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16);
         let b = _mm_setr_pi16(20000, 300, 1, 11111);
         let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16);
         assert_eq_m64(e, _mm_subs_pu16(a, b));
         assert_eq_m64(e, _m_psubusw(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_packs_pi16() {
         let a = _mm_setr_pi16(-1, 2, -3, 4);
         let b = _mm_setr_pi16(-5, 6, -7, 8);
         let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8);
         assert_eq_m64(r, _mm_packs_pi16(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_packs_pi32() {
         let a = _mm_setr_pi32(-1, 2);
         let b = _mm_setr_pi32(-5, 6);
         let r = _mm_setr_pi16(-1, 2, -5, 6);
         assert_eq_m64(r, _mm_packs_pi32(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_cmpgt_pi8() {
         let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1);
         let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1);
         assert_eq_m64(r, _mm_cmpgt_pi8(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_cmpgt_pi16() {
         let a = _mm_setr_pi16(0, 1, 2, 3);
         let b = _mm_setr_pi16(4, 3, 2, 1);
         let r = _mm_setr_pi16(0, 0, 0, -1);
         assert_eq_m64(r, _mm_cmpgt_pi16(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_cmpgt_pi32() {
         let a = _mm_setr_pi32(0, 3);
         let b = _mm_setr_pi32(1, 2);
         let r0 = _mm_setr_pi32(0, -1);
         let r1 = _mm_setr_pi32(-1, 0);

         assert_eq_m64(r0, _mm_cmpgt_pi32(a, b));
         assert_eq_m64(r1, _mm_cmpgt_pi32(b, a));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_unpackhi_pi8() {
         let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15);
         let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14);
         let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14);

         assert_eq_m64(r, _mm_unpackhi_pi8(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_unpacklo_pi8() {
         let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11);
         assert_eq_m64(r, _mm_unpacklo_pi8(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_unpackhi_pi16() {
         let a = _mm_setr_pi16(0, 1, 2, 3);
         let b = _mm_setr_pi16(4, 5, 6, 7);
         let r = _mm_setr_pi16(2, 6, 3, 7);
         assert_eq_m64(r, _mm_unpackhi_pi16(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_unpacklo_pi16() {
         let a = _mm_setr_pi16(0, 1, 2, 3);
         let b = _mm_setr_pi16(4, 5, 6, 7);
         let r = _mm_setr_pi16(0, 4, 1, 5);
         assert_eq_m64(r, _mm_unpacklo_pi16(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_unpackhi_pi32() {
         let a = _mm_setr_pi32(0, 3);
         let b = _mm_setr_pi32(1, 2);
         let r = _mm_setr_pi32(3, 2);

         assert_eq_m64(r, _mm_unpackhi_pi32(a, b));
     }

     #[simd_test(enable = "mmx")]
     unsafe fn test_mm_unpacklo_pi32() {
         let a = _mm_setr_pi32(0, 3);
         let b = _mm_setr_pi32(1, 2);
         let r = _mm_setr_pi32(0, 1);

         assert_eq_m64(r, _mm_unpacklo_pi32(a, b));
     }
 }