| //! `i586` MMX instruction set. |
| //! |
| //! The intrinsics here roughly correspond to those in the `mmintrin.h` C |
| //! header. |
| //! |
| //! The reference is [Intel 64 and IA-32 Architectures Software Developer's |
| //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. |
| //! |
| //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
| |
| use coresimd::simd::*; |
| use coresimd::x86::*; |
| use mem; |
| |
| #[cfg(test)] |
| use stdsimd_test::assert_instr; |
| |
| /// Constructs a 64-bit integer vector initialized to zero. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| // FIXME: this produces a movl instead of xorps on x86 |
| // FIXME: this produces a xor intrinsic instead of xorps on x86_64 |
| #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))] |
| pub unsafe fn _mm_setzero_si64() -> __m64 { |
| mem::transmute(0_i64) |
| } |
| |
| /// Add packed 8-bit integers in `a` and `b`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddb))] |
| pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 { |
| paddb(a, b) |
| } |
| |
| /// Add packed 8-bit integers in `a` and `b`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddb))] |
| pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 { |
| _mm_add_pi8(a, b) |
| } |
| |
| /// Add packed 16-bit integers in `a` and `b`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddw))] |
| pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 { |
| paddw(a, b) |
| } |
| |
| /// Add packed 16-bit integers in `a` and `b`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddw))] |
| pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 { |
| _mm_add_pi16(a, b) |
| } |
| |
| /// Add packed 32-bit integers in `a` and `b`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddd))] |
| pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 { |
| paddd(a, b) |
| } |
| |
| /// Add packed 32-bit integers in `a` and `b`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddd))] |
| pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 { |
| _mm_add_pi32(a, b) |
| } |
| |
| /// Add packed 8-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddsb))] |
| pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 { |
| paddsb(a, b) |
| } |
| |
| /// Add packed 8-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddsb))] |
| pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 { |
| _mm_adds_pi8(a, b) |
| } |
| |
| /// Add packed 16-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddsw))] |
| pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 { |
| paddsw(a, b) |
| } |
| |
| /// Add packed 16-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddsw))] |
| pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 { |
| _mm_adds_pi16(a, b) |
| } |
| |
| /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddusb))] |
| pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 { |
| paddusb(a, b) |
| } |
| |
| /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddusb))] |
| pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 { |
| _mm_adds_pu8(a, b) |
| } |
| |
| /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddusw))] |
| pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 { |
| paddusw(a, b) |
| } |
| |
| /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(paddusw))] |
| pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 { |
| _mm_adds_pu16(a, b) |
| } |
| |
| /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubb))] |
| pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 { |
| psubb(a, b) |
| } |
| |
| /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubb))] |
| pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 { |
| _mm_sub_pi8(a, b) |
| } |
| |
| /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubw))] |
| pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 { |
| psubw(a, b) |
| } |
| |
| /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubw))] |
| pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 { |
| _mm_sub_pi16(a, b) |
| } |
| |
| /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubd))] |
| pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 { |
| psubd(a, b) |
| } |
| |
| /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubd))] |
| pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 { |
| _mm_sub_pi32(a, b) |
| } |
| |
| /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
| /// using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubsb))] |
| pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 { |
| psubsb(a, b) |
| } |
| |
| /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
| /// using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubsb))] |
| pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 { |
| _mm_subs_pi8(a, b) |
| } |
| |
| /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
| /// using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubsw))] |
| pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 { |
| psubsw(a, b) |
| } |
| |
| /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
| /// using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubsw))] |
| pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 { |
| _mm_subs_pi16(a, b) |
| } |
| |
| /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit |
| /// integers in `a` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubusb))] |
| pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 { |
| psubusb(a, b) |
| } |
| |
| /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit |
| /// integers in `a` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubusb))] |
| pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 { |
| _mm_subs_pu8(a, b) |
| } |
| |
| /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned |
| /// 16-bit integers in `a` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubusw))] |
| pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 { |
| psubusw(a, b) |
| } |
| |
| /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned |
| /// 16-bit integers in `a` using saturation. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(psubusw))] |
| pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 { |
| _mm_subs_pu16(a, b) |
| } |
| |
| /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers |
| /// using signed saturation. |
| /// |
| /// Positive values greater than 0x7F are saturated to 0x7F. Negative values |
| /// less than 0x80 are saturated to 0x80. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(packsswb))] |
| pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { |
| packsswb(a, b) |
| } |
| |
| /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers |
| /// using signed saturation. |
| /// |
| /// Positive values greater than 0x7F are saturated to 0x7F. Negative values |
| /// less than 0x80 are saturated to 0x80. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(packssdw))] |
| pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 { |
| packssdw(a, b) |
| } |
| |
| /// Compares whether each element of `a` is greater than the corresponding |
| /// element of `b` returning `0` for `false` and `-1` for `true`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(pcmpgtb))] |
| pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 { |
| pcmpgtb(a, b) |
| } |
| |
| /// Compares whether each element of `a` is greater than the corresponding |
| /// element of `b` returning `0` for `false` and `-1` for `true`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(pcmpgtw))] |
| pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 { |
| pcmpgtw(a, b) |
| } |
| |
| /// Compares whether each element of `a` is greater than the corresponding |
| /// element of `b` returning `0` for `false` and `-1` for `true`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(pcmpgtd))] |
| pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 { |
| pcmpgtd(a, b) |
| } |
| |
| /// Unpacks the upper two elements from two `i16x4` vectors and interleaves |
| /// them into the result: `[a.2, b.2, a.3, b.3]`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected |
| pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 { |
| punpckhwd(a, b) |
| } |
| |
| /// Unpacks the upper four elements from two `i8x8` vectors and interleaves |
| /// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(punpckhbw))] |
| pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 { |
| punpckhbw(a, b) |
| } |
| |
| /// Unpacks the lower four elements from two `i8x8` vectors and interleaves |
| /// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(punpcklbw))] |
| pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 { |
| punpcklbw(a, b) |
| } |
| |
| /// Unpacks the lower two elements from two `i16x4` vectors and interleaves |
| /// them into the result: `[a.0 b.0 a.1 b.1]`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(punpcklwd))] |
| pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 { |
| punpcklwd(a, b) |
| } |
| |
| /// Unpacks the upper element from two `i32x2` vectors and interleaves them |
| /// into the result: `[a.1, b.1]`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(punpckhdq))] |
| pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 { |
| punpckhdq(a, b) |
| } |
| |
| /// Unpacks the lower element from two `i32x2` vectors and interleaves them |
| /// into the result: `[a.0, b.0]`. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| #[cfg_attr(test, assert_instr(punpckldq))] |
| pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 { |
| punpckldq(a, b) |
| } |
| |
| /// Set packed 16-bit integers in dst with the supplied values. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 { |
| _mm_setr_pi16(e0, e1, e2, e3) |
| } |
| |
| /// Set packed 32-bit integers in dst with the supplied values. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 { |
| _mm_setr_pi32(e0, e1) |
| } |
| |
| /// Set packed 8-bit integers in dst with the supplied values. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_set_pi8( |
| e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, |
| ) -> __m64 { |
| _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) |
| } |
| |
| /// Broadcast 16-bit integer a to all all elements of dst. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 { |
| _mm_setr_pi16(a, a, a, a) |
| } |
| |
| /// Broadcast 32-bit integer a to all all elements of dst. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 { |
| _mm_setr_pi32(a, a) |
| } |
| |
| /// Broadcast 8-bit integer a to all all elements of dst. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 { |
| _mm_setr_pi8(a, a, a, a, a, a, a, a) |
| } |
| |
| /// Set packed 16-bit integers in dst with the supplied values in reverse |
| /// order. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 { |
| mem::transmute(i16x4::new(e0, e1, e2, e3)) |
| } |
| |
| /// Set packed 32-bit integers in dst with the supplied values in reverse |
| /// order. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { |
| mem::transmute(i32x2::new(e0, e1)) |
| } |
| |
| /// Set packed 8-bit integers in dst with the supplied values in reverse order. |
| #[inline] |
| #[target_feature(enable = "mmx")] |
| pub unsafe fn _mm_setr_pi8( |
| e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8, |
| ) -> __m64 { |
| mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) |
| } |
| |
| #[allow(improper_ctypes)] |
| extern "C" { |
| #[link_name = "llvm.x86.mmx.padd.b"] |
| fn paddb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.padd.w"] |
| fn paddw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.padd.d"] |
| fn paddd(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.padds.b"] |
| fn paddsb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.padds.w"] |
| fn paddsw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.paddus.b"] |
| fn paddusb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.paddus.w"] |
| fn paddusw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.psub.b"] |
| fn psubb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.psub.w"] |
| fn psubw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.psub.d"] |
| fn psubd(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.psubs.b"] |
| fn psubsb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.psubs.w"] |
| fn psubsw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.psubus.b"] |
| fn psubusb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.psubus.w"] |
| fn psubusw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.packsswb"] |
| fn packsswb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.packssdw"] |
| fn packssdw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.pcmpgt.b"] |
| fn pcmpgtb(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.pcmpgt.w"] |
| fn pcmpgtw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.pcmpgt.d"] |
| fn pcmpgtd(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.punpckhwd"] |
| fn punpckhwd(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.punpcklwd"] |
| fn punpcklwd(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.punpckhbw"] |
| fn punpckhbw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.punpcklbw"] |
| fn punpcklbw(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.punpckhdq"] |
| fn punpckhdq(a: __m64, b: __m64) -> __m64; |
| #[link_name = "llvm.x86.mmx.punpckldq"] |
| fn punpckldq(a: __m64, b: __m64) -> __m64; |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use coresimd::x86::*; |
| use stdsimd_test::simd_test; |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_setzero_si64() { |
| let r: __m64 = ::std::mem::transmute(0_i64); |
| assert_eq_m64(r, _mm_setzero_si64()); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_add_pi8() { |
| let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0); |
| let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1); |
| let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1); |
| assert_eq_m64(e, _mm_add_pi8(a, b)); |
| assert_eq_m64(e, _m_paddb(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_add_pi16() { |
| let a = _mm_setr_pi16(-1, -1, 1, 1); |
| let b = _mm_setr_pi16( |
| i16::min_value() + 1, |
| 30001, |
| -30001, |
| i16::max_value() - 1, |
| ); |
| let e = |
| _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value()); |
| assert_eq_m64(e, _mm_add_pi16(a, b)); |
| assert_eq_m64(e, _m_paddw(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_add_pi32() { |
| let a = _mm_setr_pi32(1, -1); |
| let b = _mm_setr_pi32(i32::max_value() - 1, i32::min_value() + 1); |
| let e = _mm_setr_pi32(i32::max_value(), i32::min_value()); |
| assert_eq_m64(e, _mm_add_pi32(a, b)); |
| assert_eq_m64(e, _m_paddd(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_adds_pi8() { |
| let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0); |
| let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1); |
| let e = |
| _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1); |
| assert_eq_m64(e, _mm_adds_pi8(a, b)); |
| assert_eq_m64(e, _m_paddsb(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_adds_pi16() { |
| let a = _mm_setr_pi16(-32000, 32000, 4, 0); |
| let b = _mm_setr_pi16(-32000, 32000, -5, 1); |
| let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), -1, 1); |
| assert_eq_m64(e, _mm_adds_pi16(a, b)); |
| assert_eq_m64(e, _m_paddsw(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_adds_pu8() { |
| let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8); |
| let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8); |
| let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::max_value() as i8); |
| assert_eq_m64(e, _mm_adds_pu8(a, b)); |
| assert_eq_m64(e, _m_paddusb(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_adds_pu16() { |
| let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16); |
| let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16); |
| let e = _mm_setr_pi16(0, 11, 22, u16::max_value() as i16); |
| assert_eq_m64(e, _mm_adds_pu16(a, b)); |
| assert_eq_m64(e, _m_paddusw(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_sub_pi8() { |
| let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0); |
| let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127); |
| let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127); |
| assert_eq_m64(e, _mm_sub_pi8(a, b)); |
| assert_eq_m64(e, _m_psubb(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_sub_pi16() { |
| let a = _mm_setr_pi16(-20000, -20000, 20000, 30000); |
| let b = _mm_setr_pi16(-10000, 10000, -10000, 30000); |
| let e = _mm_setr_pi16(-10000, -30000, 30000, 0); |
| assert_eq_m64(e, _mm_sub_pi16(a, b)); |
| assert_eq_m64(e, _m_psubw(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_sub_pi32() { |
| let a = _mm_setr_pi32(500_000, -500_000); |
| let b = _mm_setr_pi32(500_000, 500_000); |
| let e = _mm_setr_pi32(0, -1_000_000); |
| assert_eq_m64(e, _mm_sub_pi32(a, b)); |
| assert_eq_m64(e, _m_psubd(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_subs_pi8() { |
| let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5); |
| let b = _mm_setr_pi8(100, -100, i8::min_value(), 127, -1, 1, 3, -3); |
| let e = _mm_setr_pi8( |
| i8::min_value(), |
| i8::max_value(), |
| i8::max_value(), |
| -127, |
| 1, |
| -1, |
| -8, |
| 8, |
| ); |
| assert_eq_m64(e, _mm_subs_pi8(a, b)); |
| assert_eq_m64(e, _m_psubsb(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_subs_pi16() { |
| let a = _mm_setr_pi16(-20000, 20000, 0, 0); |
| let b = _mm_setr_pi16(20000, -20000, -1, 1); |
| let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), 1, -1); |
| assert_eq_m64(e, _mm_subs_pi16(a, b)); |
| assert_eq_m64(e, _m_psubsw(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_subs_pu8() { |
| let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80); |
| let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0); |
| let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80); |
| assert_eq_m64(e, _mm_subs_pu8(a, b)); |
| assert_eq_m64(e, _m_psubusb(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_subs_pu16() { |
| let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16); |
| let b = _mm_setr_pi16(20000, 300, 1, 11111); |
| let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16); |
| assert_eq_m64(e, _mm_subs_pu16(a, b)); |
| assert_eq_m64(e, _m_psubusw(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_packs_pi16() { |
| let a = _mm_setr_pi16(-1, 2, -3, 4); |
| let b = _mm_setr_pi16(-5, 6, -7, 8); |
| let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8); |
| assert_eq_m64(r, _mm_packs_pi16(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_packs_pi32() { |
| let a = _mm_setr_pi32(-1, 2); |
| let b = _mm_setr_pi32(-5, 6); |
| let r = _mm_setr_pi16(-1, 2, -5, 6); |
| assert_eq_m64(r, _mm_packs_pi32(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_cmpgt_pi8() { |
| let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1); |
| let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1); |
| assert_eq_m64(r, _mm_cmpgt_pi8(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_cmpgt_pi16() { |
| let a = _mm_setr_pi16(0, 1, 2, 3); |
| let b = _mm_setr_pi16(4, 3, 2, 1); |
| let r = _mm_setr_pi16(0, 0, 0, -1); |
| assert_eq_m64(r, _mm_cmpgt_pi16(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_cmpgt_pi32() { |
| let a = _mm_setr_pi32(0, 3); |
| let b = _mm_setr_pi32(1, 2); |
| let r0 = _mm_setr_pi32(0, -1); |
| let r1 = _mm_setr_pi32(-1, 0); |
| |
| assert_eq_m64(r0, _mm_cmpgt_pi32(a, b)); |
| assert_eq_m64(r1, _mm_cmpgt_pi32(b, a)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_unpackhi_pi8() { |
| let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15); |
| let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14); |
| let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14); |
| |
| assert_eq_m64(r, _mm_unpackhi_pi8(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_unpacklo_pi8() { |
| let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15); |
| let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11); |
| assert_eq_m64(r, _mm_unpacklo_pi8(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_unpackhi_pi16() { |
| let a = _mm_setr_pi16(0, 1, 2, 3); |
| let b = _mm_setr_pi16(4, 5, 6, 7); |
| let r = _mm_setr_pi16(2, 6, 3, 7); |
| assert_eq_m64(r, _mm_unpackhi_pi16(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_unpacklo_pi16() { |
| let a = _mm_setr_pi16(0, 1, 2, 3); |
| let b = _mm_setr_pi16(4, 5, 6, 7); |
| let r = _mm_setr_pi16(0, 4, 1, 5); |
| assert_eq_m64(r, _mm_unpacklo_pi16(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_unpackhi_pi32() { |
| let a = _mm_setr_pi32(0, 3); |
| let b = _mm_setr_pi32(1, 2); |
| let r = _mm_setr_pi32(3, 2); |
| |
| assert_eq_m64(r, _mm_unpackhi_pi32(a, b)); |
| } |
| |
| #[simd_test(enable = "mmx")] |
| unsafe fn test_mm_unpacklo_pi32() { |
| let a = _mm_setr_pi32(0, 3); |
| let b = _mm_setr_pi32(1, 2); |
| let r = _mm_setr_pi32(0, 1); |
| |
| assert_eq_m64(r, _mm_unpacklo_pi32(a, b)); |
| } |
| } |