Update `target_feature` syntax (#283) (#299)
(Backport onto 0.0.4)
This commit updates to the latest nightly's syntax where `#[target_feature =
"+foo"]` is now deprecated in favor of `#[target_feature(enable = "foo")]`.
Additionally `#[target_feature]` can only be applied to `unsafe` functions for
now.
Along the way this removes a few exampels that were just left around and also
disables the `fxsr` modules as that target feature will need to land in upstream
rust-lang/rust first as it's currently unknown to the compiler.
diff --git a/Cargo.toml b/Cargo.toml
index 3864dc4..bede412 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "stdsimd"
-version = "0.0.4"
+version = "0.0.4-1"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = "SIMD support in Rust's standard library."
documentation = "https://docs.rs/stdsimd"
@@ -22,7 +22,7 @@
maintenance = { status = "experimental" }
[dependencies]
-coresimd = { version = "0.0.4", path = "coresimd/" }
+coresimd = { version = "0.0.4-1", path = "coresimd/" }
[dev-dependencies]
auxv = "0.3.3"
diff --git a/coresimd/Cargo.toml b/coresimd/Cargo.toml
index 4b02440..c7c4a15 100644
--- a/coresimd/Cargo.toml
+++ b/coresimd/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "coresimd"
-version = "0.0.4"
+version = "0.0.4-1"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = "SIMD support in Rust's core library."
documentation = "https://docs.rs/stdsimd"
@@ -21,7 +21,7 @@
[dev-dependencies]
cupid = "0.5.0"
stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
-stdsimd = { version = "0.0.3", path = ".." }
+stdsimd = { version = "0.0.4-1", path = ".." }
[features]
# Internal-usage only: denies all warnings.
diff --git a/coresimd/src/aarch64/neon.rs b/coresimd/src/aarch64/neon.rs
index 55b4ff6..353a598 100644
--- a/coresimd/src/aarch64/neon.rs
+++ b/coresimd/src/aarch64/neon.rs
@@ -9,7 +9,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fadd))]
pub unsafe fn vadd_f64(a: f64, b: f64) -> f64 {
a + b
@@ -17,7 +17,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fadd))]
pub unsafe fn vaddq_f64(a: f64x2, b: f64x2) -> f64x2 {
simd_add(a, b)
@@ -25,7 +25,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
a + b
@@ -33,7 +33,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
a + b
diff --git a/coresimd/src/arm/neon.rs b/coresimd/src/arm/neon.rs
index 9103ccc..0c4efae 100644
--- a/coresimd/src/arm/neon.rs
+++ b/coresimd/src/arm/neon.rs
@@ -10,7 +10,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vadd_s8(a: i8x8, b: i8x8) -> i8x8 {
simd_add(a, b)
@@ -18,7 +18,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_s8(a: i8x16, b: i8x16) -> i8x16 {
simd_add(a, b)
@@ -26,7 +26,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vadd_s16(a: i16x4, b: i16x4) -> i16x4 {
simd_add(a, b)
@@ -34,7 +34,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_s16(a: i16x8, b: i16x8) -> i16x8 {
simd_add(a, b)
@@ -42,7 +42,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vadd_s32(a: i32x2, b: i32x2) -> i32x2 {
simd_add(a, b)
@@ -50,7 +50,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_s32(a: i32x4, b: i32x4) -> i32x4 {
simd_add(a, b)
@@ -58,7 +58,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_s64(a: i64x2, b: i64x2) -> i64x2 {
simd_add(a, b)
@@ -66,7 +66,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vadd_u8(a: u8x8, b: u8x8) -> u8x8 {
simd_add(a, b)
@@ -74,7 +74,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_u8(a: u8x16, b: u8x16) -> u8x16 {
simd_add(a, b)
@@ -82,7 +82,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vadd_u16(a: u16x4, b: u16x4) -> u16x4 {
simd_add(a, b)
@@ -90,7 +90,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_u16(a: u16x8, b: u16x8) -> u16x8 {
simd_add(a, b)
@@ -98,7 +98,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vadd_u32(a: u32x2, b: u32x2) -> u32x2 {
simd_add(a, b)
@@ -106,7 +106,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_u32(a: u32x4, b: u32x4) -> u32x4 {
simd_add(a, b)
@@ -114,7 +114,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddq_u64(a: u64x2, b: u64x2) -> u64x2 {
simd_add(a, b)
@@ -122,7 +122,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fadd))]
pub unsafe fn vadd_f32(a: f32x2, b: f32x2) -> f32x2 {
simd_add(a, b)
@@ -130,7 +130,7 @@
/// Vector add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fadd))]
pub unsafe fn vaddq_f32(a: f32x4, b: f32x4) -> f32x4 {
simd_add(a, b)
@@ -138,7 +138,7 @@
/// Vector long add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddl))]
pub unsafe fn vaddl_s8(a: i8x8, b: i8x8) -> i16x8 {
let a = a.as_i16x8();
@@ -148,7 +148,7 @@
/// Vector long add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddl))]
pub unsafe fn vaddl_s16(a: i16x4, b: i16x4) -> i32x4 {
let a = a.as_i32x4();
@@ -158,7 +158,7 @@
/// Vector long add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddl))]
pub unsafe fn vaddl_s32(a: i32x2, b: i32x2) -> i64x2 {
let a = a.as_i64x2();
@@ -168,7 +168,7 @@
/// Vector long add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddl))]
pub unsafe fn vaddl_u8(a: u8x8, b: u8x8) -> u16x8 {
let a = a.as_u16x8();
@@ -178,7 +178,7 @@
/// Vector long add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddl))]
pub unsafe fn vaddl_u16(a: u16x4, b: u16x4) -> u32x4 {
let a = a.as_u32x4();
@@ -188,7 +188,7 @@
/// Vector long add.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddl))]
pub unsafe fn vaddl_u32(a: u32x2, b: u32x2) -> u64x2 {
let a = a.as_u64x2();
@@ -206,7 +206,7 @@
/// Reciprocal square-root estimate.
#[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frsqrte))]
pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
frsqrte_v2f32(a)
diff --git a/coresimd/src/arm/v7.rs b/coresimd/src/arm/v7.rs
index 5c0a73b..b620013 100644
--- a/coresimd/src/arm/v7.rs
+++ b/coresimd/src/arm/v7.rs
@@ -36,7 +36,8 @@
/// Reverse the bit order.
#[inline(always)]
#[cfg_attr(test, assert_instr(rbit))]
-#[cfg_attr(target_arch = "arm", target_feature = "+v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc
pub unsafe fn _rbit_u32(x: u32) -> u32 {
rbit_u32(x as i32) as u32
}
@@ -73,6 +74,7 @@
}
#[test]
+ #[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc
fn _rbit_u32() {
unsafe {
assert_eq!(
diff --git a/coresimd/src/x86/i386/fxsr.rs b/coresimd/src/x86/i386/fxsr.rs
index 95626ad..28c8fb5 100644
--- a/coresimd/src/x86/i386/fxsr.rs
+++ b/coresimd/src/x86/i386/fxsr.rs
@@ -22,7 +22,7 @@
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
#[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxsave))]
pub unsafe fn _fxsave(mem_addr: *mut u8) {
fxsave(mem_addr)
@@ -43,7 +43,7 @@
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
#[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxrstor))]
pub unsafe fn _fxrstor(mem_addr: *const u8) {
fxrstor(mem_addr)
diff --git a/coresimd/src/x86/i386/mod.rs b/coresimd/src/x86/i386/mod.rs
index ed1ad04..9f32390 100644
--- a/coresimd/src/x86/i386/mod.rs
+++ b/coresimd/src/x86/i386/mod.rs
@@ -3,5 +3,8 @@
mod eflags;
pub use self::eflags::*;
+
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
mod fxsr;
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
pub use self::fxsr::*;
diff --git a/coresimd/src/x86/i586/abm.rs b/coresimd/src/x86/i586/abm.rs
index 2ca2cc1..2fb5597 100644
--- a/coresimd/src/x86/i586/abm.rs
+++ b/coresimd/src/x86/i586/abm.rs
@@ -24,7 +24,7 @@
///
/// When the operand is zero, it returns its size in bits.
#[inline(always)]
-#[target_feature = "+lzcnt"]
+#[target_feature(enable = "lzcnt")]
#[cfg_attr(test, assert_instr(lzcnt))]
pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
x.leading_zeros()
@@ -34,7 +34,7 @@
///
/// When the operand is zero, it returns its size in bits.
#[inline(always)]
-#[target_feature = "+lzcnt"]
+#[target_feature(enable = "lzcnt")]
#[cfg_attr(test, assert_instr(lzcnt))]
pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
x.leading_zeros() as u64
@@ -42,7 +42,7 @@
/// Counts the bits that are set.
#[inline(always)]
-#[target_feature = "+popcnt"]
+#[target_feature(enable = "popcnt")]
#[cfg_attr(test, assert_instr(popcnt))]
pub unsafe fn _popcnt32(x: i32) -> i32 {
x.count_ones() as i32
@@ -50,7 +50,7 @@
/// Counts the bits that are set.
#[inline(always)]
-#[target_feature = "+popcnt"]
+#[target_feature(enable = "popcnt")]
#[cfg_attr(test, assert_instr(popcnt))]
pub unsafe fn _popcnt64(x: i64) -> i32 {
x.count_ones() as i32
diff --git a/coresimd/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs
index 1d7347e..f879d95 100644
--- a/coresimd/src/x86/i586/avx.rs
+++ b/coresimd/src/x86/i586/avx.rs
@@ -26,7 +26,7 @@
/// Add packed double-precision (64-bit) floating-point elements
/// in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm256_add_pd(a: f64x4, b: f64x4) -> f64x4 {
a + b
@@ -35,7 +35,7 @@
/// Add packed single-precision (32-bit) floating-point elements in `a` and
/// `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 {
a + b
@@ -45,7 +45,7 @@
/// floating-point elements
/// in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// FIXME: Should be 'vandpd' instuction.
// See https://github.com/rust-lang-nursery/stdsimd/issues/71
#[cfg_attr(test, assert_instr(vandps))]
@@ -58,7 +58,7 @@
/// Compute the bitwise AND of packed single-precision (32-bit) floating-point
/// elements in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 {
let a: u32x8 = mem::transmute(a);
@@ -69,7 +69,7 @@
/// Compute the bitwise OR packed double-precision (64-bit) floating-point
/// elements in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// FIXME: Should be 'vorpd' instuction.
// See https://github.com/rust-lang-nursery/stdsimd/issues/71
#[cfg_attr(test, assert_instr(vorps))]
@@ -82,7 +82,7 @@
/// Compute the bitwise OR packed single-precision (32-bit) floating-point
/// elements in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm256_or_ps(a: f32x8, b: f32x8) -> f32x8 {
let a: u32x8 = mem::transmute(a);
@@ -93,7 +93,7 @@
/// Shuffle double-precision (64-bit) floating-point elements within 128-bit
/// lanes using the control in `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))]
pub unsafe fn _mm256_shuffle_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -135,7 +135,7 @@
/// Shuffle single-precision (32-bit) floating-point elements in `a` within
/// 128-bit lanes using the control in `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))]
pub unsafe fn _mm256_shuffle_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -186,7 +186,7 @@
/// elements in `a`
/// and then AND with `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// FIXME: Should be 'vandnpd' instruction.
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm256_andnot_pd(a: f64x4, b: f64x4) -> f64x4 {
@@ -199,7 +199,7 @@
/// elements in `a`
/// and then AND with `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 {
let a: u32x8 = mem::transmute(a);
@@ -210,7 +210,7 @@
/// Compare packed double-precision (64-bit) floating-point elements
/// in `a` and `b`, and return packed maximum values
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
maxpd256(a, b)
@@ -219,7 +219,7 @@
/// Compare packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and return packed maximum values
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 {
maxps256(a, b)
@@ -228,7 +228,7 @@
/// Compare packed double-precision (64-bit) floating-point elements
/// in `a` and `b`, and return packed minimum values
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
minpd256(a, b)
@@ -237,7 +237,7 @@
/// Compare packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and return packed minimum values
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm256_min_ps(a: f32x8, b: f32x8) -> f32x8 {
minps256(a, b)
@@ -246,7 +246,7 @@
/// Add packed double-precision (64-bit) floating-point elements
/// in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm256_mul_pd(a: f64x4, b: f64x4) -> f64x4 {
a * b
@@ -255,7 +255,7 @@
/// Add packed single-precision (32-bit) floating-point elements in `a` and
/// `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm256_mul_ps(a: f32x8, b: f32x8) -> f32x8 {
a * b
@@ -264,7 +264,7 @@
/// Alternatively add and subtract packed double-precision (64-bit)
/// floating-point elements in `a` to/from packed elements in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vaddsubpd))]
pub unsafe fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
addsubpd256(a, b)
@@ -273,7 +273,7 @@
/// Alternatively add and subtract packed single-precision (32-bit)
/// floating-point elements in `a` to/from packed elements in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vaddsubps))]
pub unsafe fn _mm256_addsub_ps(a: f32x8, b: f32x8) -> f32x8 {
addsubps256(a, b)
@@ -282,7 +282,7 @@
/// Subtract packed double-precision (64-bit) floating-point elements in `b`
/// from packed elements in `a`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 {
a - b
@@ -291,7 +291,7 @@
/// Subtract packed single-precision (32-bit) floating-point elements in `b`
/// from packed elements in `a`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm256_sub_ps(a: f32x8, b: f32x8) -> f32x8 {
a - b
@@ -300,7 +300,7 @@
/// Compute the division of each of the 8 packed 32-bit floating-point elements
/// in `a` by the corresponding packed elements in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm256_div_ps(a: f32x8, b: f32x8) -> f32x8 {
a / b
@@ -309,7 +309,7 @@
/// Compute the division of each of the 4 packed 64-bit floating-point elements
/// in `a` by the corresponding packed elements in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm256_div_pd(a: f64x4, b: f64x4) -> f64x4 {
a / b
@@ -327,7 +327,7 @@
///
/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vroundpd, b = 0x3))]
pub unsafe fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 {
macro_rules! call {
@@ -339,7 +339,7 @@
/// Round packed double-precision (64-bit) floating point elements in `a`
/// toward positive infinity.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vroundpd))]
pub unsafe fn _mm256_ceil_pd(a: f64x4) -> f64x4 {
roundpd256(a, 0x02)
@@ -348,7 +348,7 @@
/// Round packed double-precision (64-bit) floating point elements in `a`
/// toward negative infinity.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vroundpd))]
pub unsafe fn _mm256_floor_pd(a: f64x4) -> f64x4 {
roundpd256(a, 0x01)
@@ -366,7 +366,7 @@
///
/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vroundps, b = 0x00))]
pub unsafe fn _mm256_round_ps(a: f32x8, b: i32) -> f32x8 {
macro_rules! call {
@@ -380,7 +380,7 @@
/// Round packed single-precision (32-bit) floating point elements in `a`
/// toward positive infinity.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vroundps))]
pub unsafe fn _mm256_ceil_ps(a: f32x8) -> f32x8 {
roundps256(a, 0x02)
@@ -389,7 +389,7 @@
/// Round packed single-precision (32-bit) floating point elements in `a`
/// toward negative infinity.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vroundps))]
pub unsafe fn _mm256_floor_ps(a: f32x8) -> f32x8 {
roundps256(a, 0x01)
@@ -398,7 +398,7 @@
/// Return the square root of packed single-precision (32-bit) floating point
/// elements in `a`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm256_sqrt_ps(a: f32x8) -> f32x8 {
sqrtps256(a)
@@ -407,7 +407,7 @@
/// Return the square root of packed double-precision (64-bit) floating point
/// elements in `a`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm256_sqrt_pd(a: f64x4) -> f64x4 {
sqrtpd256(a)
@@ -416,7 +416,7 @@
/// Blend packed double-precision (64-bit) floating-point elements from
/// `a` and `b` using control mask `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
pub unsafe fn _mm256_blend_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -458,7 +458,7 @@
/// Blend packed single-precision (32-bit) floating-point elements from
/// `a` and `b` using control mask `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
pub unsafe fn _mm256_blend_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -508,7 +508,7 @@
/// Blend packed double-precision (64-bit) floating-point elements from
/// `a` and `b` using `c` as a mask.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vblendvpd))]
pub unsafe fn _mm256_blendv_pd(a: f64x4, b: f64x4, c: f64x4) -> f64x4 {
vblendvpd(a, b, c)
@@ -517,7 +517,7 @@
/// Blend packed single-precision (32-bit) floating-point elements from
/// `a` and `b` using `c` as a mask.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vblendvps))]
pub unsafe fn _mm256_blendv_ps(a: f32x8, b: f32x8, c: f32x8) -> f32x8 {
vblendvps(a, b, c)
@@ -528,7 +528,7 @@
/// sum the four products, and conditionally return the sum
/// using the low 4 bits of `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vdpps, imm8 = 0x0))]
pub unsafe fn _mm256_dp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
macro_rules! call {
@@ -542,7 +542,7 @@
/// In the result, sums of elements from `a` are returned in even locations,
/// while sums of elements from `b` are returned in odd locations.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vhaddpd))]
pub unsafe fn _mm256_hadd_pd(a: f64x4, b: f64x4) -> f64x4 {
vhaddpd(a, b)
@@ -554,7 +554,7 @@
/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
/// 2, 3, 6, 7.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vhaddps))]
pub unsafe fn _mm256_hadd_ps(a: f32x8, b: f32x8) -> f32x8 {
vhaddps(a, b)
@@ -565,7 +565,7 @@
/// In the result, sums of elements from `a` are returned in even locations,
/// while sums of elements from `b` are returned in odd locations.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vhsubpd))]
pub unsafe fn _mm256_hsub_pd(a: f64x4, b: f64x4) -> f64x4 {
vhsubpd(a, b)
@@ -577,7 +577,7 @@
/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
/// 2, 3, 6, 7.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vhsubps))]
pub unsafe fn _mm256_hsub_ps(a: f32x8, b: f32x8) -> f32x8 {
vhsubps(a, b)
@@ -586,7 +586,7 @@
/// Compute the bitwise XOR of packed double-precision (64-bit) floating-point
/// elements in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// FIXME Should be 'vxorpd' instruction.
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm256_xor_pd(a: f64x4, b: f64x4) -> f64x4 {
@@ -598,7 +598,7 @@
/// Compute the bitwise XOR of packed single-precision (32-bit) floating-point
/// elements in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm256_xor_ps(a: f32x8, b: f32x8) -> f32x8 {
let a: u32x8 = mem::transmute(a);
@@ -675,7 +675,7 @@
/// elements in `a` and `b` based on the comparison operand
/// specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
#[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd
pub unsafe fn _mm_cmp_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
macro_rules! call {
@@ -688,7 +688,7 @@
/// elements in `a` and `b` based on the comparison operand
/// specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd
pub unsafe fn _mm256_cmp_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
macro_rules! call {
@@ -701,7 +701,7 @@
/// elements in `a` and `b` based on the comparison operand
/// specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
#[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps
pub unsafe fn _mm_cmp_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
macro_rules! call {
@@ -714,7 +714,7 @@
/// elements in `a` and `b` based on the comparison operand
/// specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps
pub unsafe fn _mm256_cmp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
macro_rules! call {
@@ -729,7 +729,7 @@
/// and copy the upper element from `a` to the upper element of returned
/// vector.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
#[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd
pub unsafe fn _mm_cmp_sd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
macro_rules! call {
@@ -744,7 +744,7 @@
/// and copy the upper 3 packed elements from `a` to the upper elements of
/// returned vector.
#[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
#[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss
pub unsafe fn _mm_cmp_ss(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
macro_rules! call {
@@ -756,7 +756,7 @@
/// Convert packed 32-bit integers in `a` to packed double-precision (64-bit)
/// floating-point elements.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm256_cvtepi32_pd(a: i32x4) -> f64x4 {
simd_cast(a)
@@ -765,7 +765,7 @@
/// Convert packed 32-bit integers in `a` to packed single-precision (32-bit)
/// floating-point elements.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm256_cvtepi32_ps(a: i32x8) -> f32x8 {
vcvtdq2ps(a)
@@ -774,7 +774,7 @@
/// Convert packed double-precision (64-bit) floating-point elements in `a`
/// to packed single-precision (32-bit) floating-point elements.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm256_cvtpd_ps(a: f64x4) -> f32x4 {
vcvtpd2ps(a)
@@ -783,7 +783,7 @@
/// Convert packed single-precision (32-bit) floating-point elements in `a`
/// to packed 32-bit integers.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm256_cvtps_epi32(a: f32x8) -> i32x8 {
vcvtps2dq(a)
@@ -792,7 +792,7 @@
/// Convert packed single-precision (32-bit) floating-point elements in `a`
/// to packed double-precision (64-bit) floating-point elements.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm256_cvtps_pd(a: f32x4) -> f64x4 {
a.as_f64x4()
@@ -801,7 +801,7 @@
/// Convert packed double-precision (64-bit) floating-point elements in `a`
/// to packed 32-bit integers with truncation.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm256_cvttpd_epi32(a: f64x4) -> i32x4 {
vcvttpd2dq(a)
@@ -810,7 +810,7 @@
/// Convert packed double-precision (64-bit) floating-point elements in `a`
/// to packed 32-bit integers.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm256_cvtpd_epi32(a: f64x4) -> i32x4 {
vcvtpd2dq(a)
@@ -819,7 +819,7 @@
/// Convert packed single-precision (32-bit) floating-point elements in `a`
/// to packed 32-bit integers with truncation.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm256_cvttps_epi32(a: f32x8) -> i32x8 {
vcvttps2dq(a)
@@ -828,7 +828,7 @@
/// Extract 128 bits (composed of 4 packed single-precision (32-bit)
/// floating-point elements) from `a`, selected with `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vextractf128))]
pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> f32x4 {
match imm8 & 1 {
@@ -840,7 +840,7 @@
/// Extract 128 bits (composed of 2 packed double-precision (64-bit)
/// floating-point elements) from `a`, selected with `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vextractf128))]
pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> f64x2 {
match imm8 & 1 {
@@ -851,7 +851,7 @@
/// Extract 128 bits (composed of integer data) from `a`, selected with `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vextractf128))]
pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i {
let b = i64x4::from(_mm256_undefined_si256());
@@ -864,7 +864,7 @@
/// Zero the contents of all XMM or YMM registers.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vzeroall))]
pub unsafe fn _mm256_zeroall() {
vzeroall()
@@ -873,7 +873,7 @@
/// Zero the upper 128 bits of all YMM registers;
/// the lower 128-bits of the registers are unmodified.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vzeroupper))]
pub unsafe fn _mm256_zeroupper() {
vzeroupper()
@@ -882,7 +882,7 @@
/// Shuffle single-precision (32-bit) floating-point elements in `a`
/// within 128-bit lanes using the control in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm256_permutevar_ps(a: f32x8, b: i32x8) -> f32x8 {
vpermilps256(a, b)
@@ -891,7 +891,7 @@
/// Shuffle single-precision (32-bit) floating-point elements in `a`
/// using the control in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 {
vpermilps(a, b)
@@ -900,7 +900,7 @@
/// Shuffle single-precision (32-bit) floating-point elements in `a`
/// within 128-bit lanes using the control in `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -952,7 +952,7 @@
/// Shuffle single-precision (32-bit) floating-point elements in `a`
/// using the control in `imm8`.
#[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
pub unsafe fn _mm_permute_ps(a: f32x4, imm8: i32) -> f32x4 {
use x86::i586::sse::_mm_undefined_ps;
@@ -1006,7 +1006,7 @@
/// Shuffle double-precision (64-bit) floating-point elements in `a`
/// within 256-bit lanes using the control in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm256_permutevar_pd(a: f64x4, b: i64x4) -> f64x4 {
vpermilpd256(a, b)
@@ -1015,7 +1015,7 @@
/// Shuffle double-precision (64-bit) floating-point elements in `a`
/// using the control in `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm_permutevar_pd(a: f64x2, b: i64x2) -> f64x2 {
vpermilpd(a, b)
@@ -1024,7 +1024,7 @@
/// Shuffle double-precision (64-bit) floating-point elements in `a`
/// within 128-bit lanes using the control in `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -1066,7 +1066,7 @@
/// Shuffle double-precision (64-bit) floating-point elements in `a`
/// using the control in `imm8`.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
pub unsafe fn _mm_permute_pd(a: f64x2, imm8: i32) -> f64x2 {
use x86::i586::sse2::_mm_undefined_pd;
@@ -1094,7 +1094,7 @@
/// Shuffle 256-bits (composed of 8 packed single-precision (32-bit)
/// floating-point elements) selected by `imm8` from `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))]
pub unsafe fn _mm256_permute2f128_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
macro_rules! call {
@@ -1106,7 +1106,7 @@
/// Shuffle 256-bits (composed of 4 packed double-precision (64-bit)
/// floating-point elements) selected by `imm8` from `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
pub unsafe fn _mm256_permute2f128_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
macro_rules! call {
@@ -1118,7 +1118,7 @@
/// Shuffle 258-bits (composed of integer data) selected by `imm8`
/// from `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
pub unsafe fn _mm256_permute2f128_si256(
a: i32x8, b: i32x8, imm8: i32
@@ -1132,7 +1132,7 @@
/// Broadcast a single-precision (32-bit) floating-point element from memory
/// to all elements of the returned vector.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm256_broadcast_ss(f: &f32) -> f32x8 {
f32x8::splat(*f)
@@ -1141,7 +1141,7 @@
/// Broadcast a single-precision (32-bit) floating-point element from memory
/// to all elements of the returned vector.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm_broadcast_ss(f: &f32) -> f32x4 {
f32x4::splat(*f)
@@ -1150,7 +1150,7 @@
/// Broadcast a double-precision (64-bit) floating-point element from memory
/// to all elements of the returned vector.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm256_broadcast_sd(f: &f64) -> f64x4 {
f64x4::splat(*f)
@@ -1159,7 +1159,7 @@
/// Broadcast 128 bits from memory (composed of 4 packed single-precision
/// (32-bit) floating-point elements) to all elements of the returned vector.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vbroadcastf128))]
pub unsafe fn _mm256_broadcast_ps(a: &f32x4) -> f32x8 {
vbroadcastf128ps256(a)
@@ -1168,7 +1168,7 @@
/// Broadcast 128 bits from memory (composed of 2 packed double-precision
/// (64-bit) floating-point elements) to all elements of the returned vector.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vbroadcastf128))]
pub unsafe fn _mm256_broadcast_pd(a: &f64x2) -> f64x4 {
vbroadcastf128pd256(a)
@@ -1178,7 +1178,7 @@
/// single-precision (32-bit) floating-point elements) from `b` into result
/// at the location specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: f32x4, imm8: i32) -> f32x8 {
let b = _mm256_castps128_ps256(b);
@@ -1192,7 +1192,7 @@
/// double-precision (64-bit) floating-point elements) from `b` into result
/// at the location specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: f64x2, imm8: i32) -> f64x4 {
match imm8 & 1 {
@@ -1204,7 +1204,7 @@
/// Copy `a` to result, then insert 128 bits from `b` into result
/// at the location specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
pub unsafe fn _mm256_insertf128_si256(
a: __m256i, b: __m128i, imm8: i32
@@ -1220,7 +1220,7 @@
/// Copy `a` to result, and insert the 8-bit integer `i` into result
/// at the location specified by `index`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_insert_epi8(a: i8x32, i: i8, index: i32) -> i8x32 {
let c = a;
@@ -1230,7 +1230,7 @@
/// Copy `a` to result, and insert the 16-bit integer `i` into result
/// at the location specified by `index`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_insert_epi16(a: i16x16, i: i16, index: i32) -> i16x16 {
let c = a;
@@ -1240,7 +1240,7 @@
/// Copy `a` to result, and insert the 32-bit integer `i` into result
/// at the location specified by `index`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_insert_epi32(a: i32x8, i: i32, index: i32) -> i32x8 {
let c = a;
@@ -1250,7 +1250,7 @@
/// Copy `a` to result, and insert the 64-bit integer `i` into result
/// at the location specified by `index`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_insert_epi64(a: i64x4, i: i64, index: i32) -> i64x4 {
let c = a;
@@ -1262,7 +1262,7 @@
/// `mem_addr` must be aligned on a 32-byte boundary or a
/// general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> f64x4 {
*(mem_addr as *const f64x4)
@@ -1273,7 +1273,7 @@
/// `mem_addr` must be aligned on a 32-byte boundary or a
/// general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: f64x4) {
*(mem_addr as *mut f64x4) = a;
@@ -1284,7 +1284,7 @@
/// `mem_addr` must be aligned on a 32-byte boundary or a
/// general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> f32x8 {
*(mem_addr as *const f32x8)
@@ -1295,7 +1295,7 @@
/// `mem_addr` must be aligned on a 32-byte boundary or a
/// general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: f32x8) {
*(mem_addr as *mut f32x8) = a;
@@ -1305,7 +1305,7 @@
/// floating-point elements) from memory into result.
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> f64x4 {
let mut dst = _mm256_undefined_pd();
@@ -1321,7 +1321,7 @@
/// floating-point elements) from `a` into memory.
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: f64x4) {
storeupd256(mem_addr, a);
@@ -1331,7 +1331,7 @@
/// floating-point elements) from memory into result.
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> f32x8 {
let mut dst = _mm256_undefined_ps();
@@ -1347,7 +1347,7 @@
/// floating-point elements) from `a` into memory.
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: f32x8) {
storeups256(mem_addr, a);
@@ -1357,7 +1357,7 @@
/// `mem_addr` must be aligned on a 32-byte boundary or a
/// general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
*mem_addr
@@ -1367,7 +1367,7 @@
/// `mem_addr` must be aligned on a 32-byte boundary or a
/// general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
*mem_addr = a;
@@ -1376,7 +1376,7 @@
/// Load 256-bits of integer data from memory into result.
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
let mut dst = _mm256_undefined_si256();
@@ -1389,9 +1389,9 @@
}
/// Store 256-bits of integer data from `a` into memory.
-/// `mem_addr` does not need to be aligned on any particular boundary.
+/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
storeudq256(mem_addr as *mut i8, i8x32::from(a));
@@ -1401,7 +1401,7 @@
/// into result using `mask` (elements are zeroed out when the high bit of the
/// corresponding element is not set).
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovpd))]
pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4 {
maskloadpd256(mem_addr as *const i8, mask)
@@ -1410,7 +1410,7 @@
/// Store packed double-precision (64-bit) floating-point elements from `a`
/// into memory using `mask`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovpd))]
pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: i64x4, a: f64x4) {
maskstorepd256(mem_addr as *mut i8, mask, a);
@@ -1420,7 +1420,7 @@
/// into result using `mask` (elements are zeroed out when the high bit of the
/// corresponding element is not set).
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovpd))]
pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: i64x2) -> f64x2 {
maskloadpd(mem_addr as *const i8, mask)
@@ -1429,7 +1429,7 @@
/// Store packed double-precision (64-bit) floating-point elements from `a`
/// into memory using `mask`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovpd))]
pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: i64x2, a: f64x2) {
maskstorepd(mem_addr as *mut i8, mask, a);
@@ -1439,7 +1439,7 @@
/// into result using `mask` (elements are zeroed out when the high bit of the
/// corresponding element is not set).
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovps))]
pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8 {
maskloadps256(mem_addr as *const i8, mask)
@@ -1448,7 +1448,7 @@
/// Store packed single-precision (32-bit) floating-point elements from `a`
/// into memory using `mask`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovps))]
pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: i32x8, a: f32x8) {
maskstoreps256(mem_addr as *mut i8, mask, a);
@@ -1458,7 +1458,7 @@
/// into result using `mask` (elements are zeroed out when the high bit of the
/// corresponding element is not set).
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovps))]
pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: i32x4) -> f32x4 {
maskloadps(mem_addr as *const i8, mask)
@@ -1467,7 +1467,7 @@
/// Store packed single-precision (32-bit) floating-point elements from `a`
/// into memory using `mask`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmaskmovps))]
pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: i32x4, a: f32x4) {
maskstoreps(mem_addr as *mut i8, mask, a);
@@ -1476,7 +1476,7 @@
/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
/// from `a`, and return the results.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm256_movehdup_ps(a: f32x8) -> f32x8 {
simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
@@ -1485,7 +1485,7 @@
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
/// from `a`, and return the results.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm256_moveldup_ps(a: f32x8) -> f32x8 {
simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
@@ -1494,7 +1494,7 @@
/// Duplicate even-indexed double-precision (64-bit) floating-point elements
/// from "a", and return the results.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm256_movedup_pd(a: f64x4) -> f64x4 {
simd_shuffle4(a, a, [0, 0, 2, 2])
@@ -1504,7 +1504,7 @@
/// This intrinsic may perform better than `_mm256_loadu_si256` when the
/// data crosses a cache line boundary.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vlddqu))]
pub unsafe fn _mm256_lddqu_si256(mem_addr: *const i8x32) -> i8x32 {
vlddqu(mem_addr as *const i8)
@@ -1514,7 +1514,7 @@
/// aligned memory location. To minimize caching, the data is flagged as
/// non-temporal (unlikely to be used again soon)
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1524,7 +1524,7 @@
/// to a 32-byte aligned memory location. To minimize caching, the data is
/// flagged as non-temporal (unlikely to be used again soon).
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: f64x4) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1535,7 +1535,7 @@
/// caching, the data is flagged as non-temporal (unlikely to be used again
/// soon).
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))]
pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: f32x8) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1545,7 +1545,7 @@
/// floating-point elements in `a`, and return the results. The maximum
/// relative error for this approximation is less than 1.5*2^-12.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vrcpps))]
pub unsafe fn _mm256_rcp_ps(a: f32x8) -> f32x8 {
vrcpps(a)
@@ -1555,7 +1555,7 @@
/// (32-bit) floating-point elements in `a`, and return the results.
/// The maximum relative error for this approximation is less than 1.5*2^-12.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vrsqrtps))]
pub unsafe fn _mm256_rsqrt_ps(a: f32x8) -> f32x8 {
vrsqrtps(a)
@@ -1564,7 +1564,7 @@
/// Unpack and interleave double-precision (64-bit) floating-point elements
/// from the high half of each 128-bit lane in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm256_unpackhi_pd(a: f64x4, b: f64x4) -> f64x4 {
simd_shuffle4(a, b, [1, 5, 3, 7])
@@ -1573,7 +1573,7 @@
/// Unpack and interleave single-precision (32-bit) floating-point elements
/// from the high half of each 128-bit lane in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm256_unpackhi_ps(a: f32x8, b: f32x8) -> f32x8 {
simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
@@ -1582,7 +1582,7 @@
/// Unpack and interleave double-precision (64-bit) floating-point elements
/// from the low half of each 128-bit lane in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm256_unpacklo_pd(a: f64x4, b: f64x4) -> f64x4 {
simd_shuffle4(a, b, [0, 4, 2, 6])
@@ -1591,7 +1591,7 @@
/// Unpack and interleave single-precision (32-bit) floating-point elements
/// from the low half of each 128-bit lane in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm256_unpacklo_ps(a: f32x8, b: f32x8) -> f32x8 {
simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
@@ -1602,7 +1602,7 @@
/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vptest))]
pub unsafe fn _mm256_testz_si256(a: i64x4, b: i64x4) -> i32 {
ptestz256(a, b)
@@ -1613,7 +1613,7 @@
/// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
/// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vptest))]
pub unsafe fn _mm256_testc_si256(a: i64x4, b: i64x4) -> i32 {
ptestc256(a, b)
@@ -1625,7 +1625,7 @@
/// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
/// `CF` values are zero, otherwise return 0.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vptest))]
pub unsafe fn _mm256_testnzc_si256(a: i64x4, b: i64x4) -> i32 {
ptestnzc256(a, b)
@@ -1639,7 +1639,7 @@
/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestpd))]
pub unsafe fn _mm256_testz_pd(a: f64x4, b: f64x4) -> i32 {
vtestzpd256(a, b)
@@ -1653,7 +1653,7 @@
/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `CF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestpd))]
pub unsafe fn _mm256_testc_pd(a: f64x4, b: f64x4) -> i32 {
vtestcpd256(a, b)
@@ -1668,7 +1668,7 @@
/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
/// are zero, otherwise return 0.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestpd))]
pub unsafe fn _mm256_testnzc_pd(a: f64x4, b: f64x4) -> i32 {
vtestnzcpd256(a, b)
@@ -1682,7 +1682,7 @@
/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestpd))]
pub unsafe fn _mm_testz_pd(a: f64x2, b: f64x2) -> i32 {
vtestzpd(a, b)
@@ -1696,7 +1696,7 @@
/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `CF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestpd))]
pub unsafe fn _mm_testc_pd(a: f64x2, b: f64x2) -> i32 {
vtestcpd(a, b)
@@ -1711,7 +1711,7 @@
/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
/// are zero, otherwise return 0.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestpd))]
pub unsafe fn _mm_testnzc_pd(a: f64x2, b: f64x2) -> i32 {
vtestnzcpd(a, b)
@@ -1725,7 +1725,7 @@
/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestps))]
pub unsafe fn _mm256_testz_ps(a: f32x8, b: f32x8) -> i32 {
vtestzps256(a, b)
@@ -1739,7 +1739,7 @@
/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `CF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestps))]
pub unsafe fn _mm256_testc_ps(a: f32x8, b: f32x8) -> i32 {
vtestcps256(a, b)
@@ -1754,7 +1754,7 @@
/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
/// are zero, otherwise return 0.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestps))]
pub unsafe fn _mm256_testnzc_ps(a: f32x8, b: f32x8) -> i32 {
vtestnzcps256(a, b)
@@ -1768,7 +1768,7 @@
/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestps))]
pub unsafe fn _mm_testz_ps(a: f32x4, b: f32x4) -> i32 {
vtestzps(a, b)
@@ -1782,7 +1782,7 @@
/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
/// is zero, otherwise set `CF` to 0. Return the `CF` value.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestps))]
pub unsafe fn _mm_testc_ps(a: f32x4, b: f32x4) -> i32 {
vtestcps(a, b)
@@ -1797,7 +1797,7 @@
/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
/// are zero, otherwise return 0.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vtestps))]
pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 {
vtestnzcps(a, b)
@@ -1807,7 +1807,7 @@
/// corresponding packed double-precision (64-bit) floating-point element in
/// `a`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovmskpd))]
pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 {
movmskpd256(a)
@@ -1817,7 +1817,7 @@
/// corresponding packed single-precision (32-bit) floating-point element in
/// `a`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovmskps))]
pub unsafe fn _mm256_movemask_ps(a: f32x8) -> i32 {
movmskps256(a)
@@ -1825,7 +1825,7 @@
/// Return vector of type __m256d with all elements set to zero.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
pub unsafe fn _mm256_setzero_pd() -> f64x4 {
f64x4::new(0., 0., 0., 0.)
@@ -1833,7 +1833,7 @@
/// Return vector of type __m256 with all elements set to zero.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm256_setzero_ps() -> f32x8 {
f32x8::new(0., 0., 0., 0., 0., 0., 0., 0.)
@@ -1841,7 +1841,7 @@
/// Return vector of type __m256i with all elements set to zero.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vxor))]
pub unsafe fn _mm256_setzero_si256() -> __m256i {
mem::transmute(i64x4::new(0, 0, 0, 0))
@@ -1850,7 +1850,7 @@
/// Set packed double-precision (64-bit) floating-point elements in returned
/// vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
@@ -1860,7 +1860,7 @@
/// Set packed single-precision (32-bit) floating-point elements in returned
/// vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_set_ps(
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
@@ -1871,7 +1871,7 @@
/// Set packed 8-bit integers in returned vector with the supplied values in
/// reverse order.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_set_epi8(
e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
@@ -1890,7 +1890,7 @@
/// Set packed 16-bit integers in returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_set_epi16(
e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
@@ -1908,7 +1908,7 @@
/// Set packed 32-bit integers in returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_set_epi32(
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
@@ -1918,7 +1918,7 @@
/// Set packed 64-bit integers in returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 {
@@ -1928,7 +1928,7 @@
/// Set packed double-precision (64-bit) floating-point elements in returned
/// vector with the supplied values in reverse order.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
f64x4::new(a, b, c, d)
@@ -1937,7 +1937,7 @@
/// Set packed single-precision (32-bit) floating-point elements in returned
/// vector with the supplied values in reverse order.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_setr_ps(
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
@@ -1948,7 +1948,7 @@
/// Set packed 8-bit integers in returned vector with the supplied values in
/// reverse order.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_setr_epi8(
e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
@@ -1968,7 +1968,7 @@
/// Set packed 16-bit integers in returned vector with the supplied values in
/// reverse order.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_setr_epi16(
e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
@@ -1987,7 +1987,7 @@
/// Set packed 32-bit integers in returned vector with the supplied values in
/// reverse order.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_setr_epi32(
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
@@ -1998,7 +1998,7 @@
/// Set packed 64-bit integers in returned vector with the supplied values in
/// reverse order.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 {
@@ -2008,7 +2008,7 @@
/// Broadcast double-precision (64-bit) floating-point value `a` to all
/// elements of returned vector.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 {
f64x4::new(a, a, a, a)
@@ -2017,7 +2017,7 @@
/// Broadcast single-precision (32-bit) floating-point value `a` to all
/// elements of returned vector.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 {
f32x8::new(a, a, a, a, a, a, a, a)
@@ -2026,7 +2026,7 @@
/// Broadcast 8-bit integer `a` to all elements of returned vector.
/// This intrinsic may generate the `vpbroadcastb`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vpshufb))]
#[cfg_attr(test, assert_instr(vinsertf128))]
// This intrinsic has no corresponding instruction.
@@ -2043,7 +2043,7 @@
/// Broadcast 16-bit integer `a` to all all elements of returned vector.
/// This intrinsic may generate the `vpbroadcastw`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
//#[cfg_attr(test, assert_instr(vpshufb))]
#[cfg_attr(test, assert_instr(vinsertf128))]
// This intrinsic has no corresponding instruction.
@@ -2054,7 +2054,7 @@
/// Broadcast 32-bit integer `a` to all elements of returned vector.
/// This intrinsic may generate the `vpbroadcastd`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_set1_epi32(a: i32) -> i32x8 {
i32x8::new(a, a, a, a, a, a, a, a)
@@ -2063,7 +2063,7 @@
/// Broadcast 64-bit integer `a` to all elements of returned vector.
/// This intrinsic may generate the `vpbroadcastq`.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
//#[cfg_attr(test, assert_instr(vmovddup))]
#[cfg_attr(test, assert_instr(vinsertf128))]
// This intrinsic has no corresponding instruction.
@@ -2073,7 +2073,7 @@
/// Cast vector of type __m256d to type __m256.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 {
@@ -2082,7 +2082,7 @@
/// Cast vector of type __m256 to type __m256d.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 {
@@ -2091,7 +2091,7 @@
/// Casts vector of type __m256 to type __m256i.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castps_si256(a: f32x8) -> __m256i {
@@ -2100,7 +2100,7 @@
/// Casts vector of type __m256i to type __m256.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> f32x8 {
@@ -2109,7 +2109,7 @@
/// Casts vector of type __m256d to type __m256i.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castpd_si256(a: f64x4) -> __m256i {
@@ -2118,7 +2118,7 @@
/// Casts vector of type __m256i to type __m256d.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> f64x4 {
@@ -2127,7 +2127,7 @@
/// Casts vector of type __m256 to type __m128.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> f32x4 {
@@ -2136,7 +2136,7 @@
/// Casts vector of type __m256d to type __m128d.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> f64x2 {
@@ -2145,7 +2145,7 @@
/// Casts vector of type __m256i to type __m128i.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
@@ -2157,7 +2157,7 @@
/// Casts vector of type __m128 to type __m256;
/// the upper 128 bits of the result are undefined.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castps128_ps256(a: f32x4) -> f32x8 {
@@ -2168,7 +2168,7 @@
/// Casts vector of type __m128d to type __m256d;
/// the upper 128 bits of the result are undefined.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castpd128_pd256(a: f64x2) -> f64x4 {
@@ -2179,7 +2179,7 @@
/// Casts vector of type __m128i to type __m256i;
/// the upper 128 bits of the result are undefined.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
@@ -2193,7 +2193,7 @@
/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
/// the value of the source vector. The upper 128 bits are set to zero.
#[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
@@ -2205,7 +2205,7 @@
/// The lower 128 bits contain the value of the source vector. The upper
/// 128 bits are set to zero.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
@@ -2220,7 +2220,7 @@
/// contain the value of the source vector. The upper 128 bits are set
/// to zero.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
// This intrinsic is only used for compilation and does not generate any
// instructions, thus it has zero latency.
pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
@@ -2230,7 +2230,7 @@
/// Return vector of type `f32x8` with undefined elements.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_undefined_ps() -> f32x8 {
f32x8::splat(mem::uninitialized())
@@ -2238,7 +2238,7 @@
/// Return vector of type `f64x4` with undefined elements.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_undefined_pd() -> f64x4 {
f64x4::splat(mem::uninitialized())
@@ -2246,7 +2246,7 @@
/// Return vector of type __m256i with undefined elements.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_undefined_si256() -> __m256i {
mem::transmute(i64x4::splat(mem::uninitialized()))
@@ -2254,7 +2254,7 @@
/// Set packed __m256 returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 {
simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
@@ -2262,7 +2262,7 @@
/// Set packed __m256d returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 {
let hi: f32x4 = mem::transmute(hi);
@@ -2272,7 +2272,7 @@
/// Set packed __m256i returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
let hi: f32x4 = mem::transmute(hi);
@@ -2282,7 +2282,7 @@
/// Set packed __m256 returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_setr_m128(lo: f32x4, hi: f32x4) -> f32x8 {
_mm256_set_m128(hi, lo)
@@ -2290,7 +2290,7 @@
/// Set packed __m256d returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_setr_m128d(lo: f64x2, hi: f64x2) -> f64x4 {
_mm256_set_m128d(hi, lo)
@@ -2298,7 +2298,7 @@
/// Set packed __m256i returned vector with the supplied values.
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
_mm256_set_m128i(hi, lo)
@@ -2309,7 +2309,7 @@
/// value.
/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_loadu2_m128(
hiaddr: *const f32, loaddr: *const f32
@@ -2324,7 +2324,7 @@
/// value.
/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_loadu2_m128d(
hiaddr: *const f64, loaddr: *const f64
@@ -2338,7 +2338,7 @@
/// them into a 256-bit value.
/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_loadu2_m128i(
hiaddr: *const __m128i, loaddr: *const __m128i
@@ -2353,7 +2353,7 @@
/// different 128-bit locations.
/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_storeu2_m128(
hiaddr: *mut f32, loaddr: *mut f32, a: f32x8
@@ -2370,7 +2370,7 @@
/// different 128-bit locations.
/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_storeu2_m128d(
hiaddr: *mut f64, loaddr: *mut f64, a: f64x4
@@ -2386,7 +2386,7 @@
/// `a` into memory two different 128-bit locations.
/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_storeu2_m128i(
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
@@ -2400,7 +2400,7 @@
/// Returns the first element of the input vector of [8 x float].
#[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
//#[cfg_attr(test, assert_instr(movss))] FIXME
pub unsafe fn _mm256_cvtss_f32(a: f32x8) -> f32 {
a.extract(0)
diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs
index 6aa780a..d824519 100644
--- a/coresimd/src/x86/i586/avx2.rs
+++ b/coresimd/src/x86/i586/avx2.rs
@@ -30,7 +30,7 @@
/// Computes the absolute values of packed 32-bit integers in `a`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm256_abs_epi32(a: i32x8) -> u32x8 {
pabsd(a)
@@ -38,7 +38,7 @@
/// Computes the absolute values of packed 16-bit integers in `a`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpabsw))]
pub unsafe fn _mm256_abs_epi16(a: i16x16) -> u16x16 {
pabsw(a)
@@ -46,7 +46,7 @@
/// Computes the absolute values of packed 8-bit integers in `a`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpabsb))]
pub unsafe fn _mm256_abs_epi8(a: i8x32) -> u8x32 {
pabsb(a)
@@ -54,7 +54,7 @@
/// Add packed 64-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 {
a + b
@@ -62,7 +62,7 @@
/// Add packed 32-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 {
a + b
@@ -70,7 +70,7 @@
/// Add packed 16-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddw))]
pub unsafe fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 {
a + b
@@ -78,7 +78,7 @@
/// Add packed 8-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddb))]
pub unsafe fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 {
a + b
@@ -86,7 +86,7 @@
/// Add packed 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddsb))]
pub unsafe fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 {
paddsb(a, b)
@@ -94,7 +94,7 @@
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddsw))]
pub unsafe fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 {
paddsw(a, b)
@@ -102,7 +102,7 @@
/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddusb))]
pub unsafe fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 {
paddusb(a, b)
@@ -110,7 +110,7 @@
/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddusw))]
pub unsafe fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
paddusw(a, b)
@@ -119,7 +119,7 @@
/// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
/// result, shift the result right by `n` bytes, and return the low 16 bytes.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpalignr, n = 15))]
pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
let n = n as u32;
@@ -182,7 +182,7 @@
/// Compute the bitwise AND of 256 bits (representing integer data)
/// in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
__m256i::from(i8x32::from(a) & i8x32::from(b))
@@ -191,7 +191,7 @@
/// Compute the bitwise NOT of 256 bits (representing integer data)
/// in `a` and then AND with `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
__m256i::from((!i8x32::from(a)) & i8x32::from(b))
@@ -199,7 +199,7 @@
/// Average packed unsigned 16-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpavgw))]
pub unsafe fn _mm256_avg_epu16(a: u16x16, b: u16x16) -> u16x16 {
pavgw(a, b)
@@ -207,7 +207,7 @@
/// Average packed unsigned 8-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpavgb))]
pub unsafe fn _mm256_avg_epu8(a: u8x32, b: u8x32) -> u8x32 {
pavgb(a, b)
@@ -215,7 +215,7 @@
/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
pub unsafe fn _mm_blend_epi32(a: i32x4, b: i32x4, imm8: i32) -> i32x4 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -244,7 +244,7 @@
/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
pub unsafe fn _mm256_blend_epi32(a: i32x8, b: i32x8, imm8: i32) -> i32x8 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -293,7 +293,7 @@
/// Blend packed 16-bit integers from `a` and `b` using control mask `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
pub unsafe fn _mm256_blend_epi16(a: i16x16, b: i16x16, imm8: i32) -> i16x16 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -344,7 +344,7 @@
/// Blend packed 8-bit integers from `a` and `b` using `mask`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpblendvb))]
pub unsafe fn _mm256_blendv_epi8(a: i8x32, b: i8x32, mask: __m256i) -> i8x32 {
pblendvb(a, b, i8x32::from(mask))
@@ -353,7 +353,7 @@
/// Broadcast the low packed 8-bit integer from `a` to all elements of
/// the 128-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastb))]
pub unsafe fn _mm_broadcastb_epi8(a: i8x16) -> i8x16 {
simd_shuffle16(a, i8x16::splat(0_i8), [0_u32; 16])
@@ -362,7 +362,7 @@
/// Broadcast the low packed 8-bit integer from `a` to all elements of
/// the 256-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastb))]
pub unsafe fn _mm256_broadcastb_epi8(a: i8x16) -> i8x32 {
simd_shuffle32(a, i8x16::splat(0_i8), [0_u32; 32])
@@ -373,7 +373,7 @@
/// Broadcast the low packed 32-bit integer from `a` to all elements of
/// the 128-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm_broadcastd_epi32(a: i32x4) -> i32x4 {
simd_shuffle4(a, i32x4::splat(0_i32), [0_u32; 4])
@@ -384,7 +384,7 @@
/// Broadcast the low packed 32-bit integer from `a` to all elements of
/// the 256-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm256_broadcastd_epi32(a: i32x4) -> i32x8 {
simd_shuffle8(a, i32x4::splat(0_i32), [0_u32; 8])
@@ -393,7 +393,7 @@
/// Broadcast the low packed 64-bit integer from `a` to all elements of
/// the 128-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm_broadcastq_epi64(a: i64x2) -> i64x2 {
simd_shuffle2(a, i64x2::splat(0_i64), [0_u32; 2])
@@ -404,7 +404,7 @@
/// Broadcast the low packed 64-bit integer from `a` to all elements of
/// the 256-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm256_broadcastq_epi64(a: i64x2) -> i64x4 {
simd_shuffle4(a, i64x2::splat(0_i64), [0_u32; 4])
@@ -413,7 +413,7 @@
/// Broadcast the low double-precision (64-bit) floating-point element
/// from `a` to all elements of the 128-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm_broadcastsd_pd(a: f64x2) -> f64x2 {
simd_shuffle2(a, f64x2::splat(0_f64), [0_u32; 2])
@@ -422,7 +422,7 @@
/// Broadcast the low double-precision (64-bit) floating-point element
/// from `a` to all elements of the 256-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm256_broadcastsd_pd(a: f64x2) -> f64x4 {
simd_shuffle4(a, f64x2::splat(0_f64), [0_u32; 4])
@@ -433,7 +433,7 @@
/// Broadcast 128 bits of integer data from a to all 128-bit lanes in
/// the 256-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
pub unsafe fn _mm256_broadcastsi128_si256(a: i64x2) -> i64x4 {
simd_shuffle4(a, i64x2::splat(0_i64), [0, 1, 0, 1])
}
@@ -441,7 +441,7 @@
/// Broadcast the low single-precision (32-bit) floating-point element
/// from `a` to all elements of the 128-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm_broadcastss_ps(a: f32x4) -> f32x4 {
simd_shuffle4(a, f32x4::splat(0_f32), [0_u32; 4])
@@ -450,7 +450,7 @@
/// Broadcast the low single-precision (32-bit) floating-point element
/// from `a` to all elements of the 256-bit returned value.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm256_broadcastss_ps(a: f32x4) -> f32x8 {
simd_shuffle8(a, f32x4::splat(0_f32), [0_u32; 8])
@@ -459,7 +459,7 @@
/// Broadcast the low packed 16-bit integer from a to all elements of
/// the 128-bit returned value
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastw))]
pub unsafe fn _mm_broadcastw_epi16(a: i16x8) -> i16x8 {
simd_shuffle8(a, i16x8::splat(0_i16), [0_u32; 8])
@@ -468,7 +468,7 @@
/// Broadcast the low packed 16-bit integer from a to all elements of
/// the 256-bit returned value
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastw))]
pub unsafe fn _mm256_broadcastw_epi16(a: i16x8) -> i16x16 {
simd_shuffle16(a, i16x8::splat(0_i16), [0_u32; 16])
@@ -476,7 +476,7 @@
/// Compare packed 64-bit integers in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqq))]
pub unsafe fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 {
a.eq(b)
@@ -484,7 +484,7 @@
/// Compare packed 32-bit integers in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqd))]
pub unsafe fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 {
a.eq(b)
@@ -492,7 +492,7 @@
/// Compare packed 16-bit integers in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqw))]
pub unsafe fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 {
a.eq(b)
@@ -500,7 +500,7 @@
/// Compare packed 8-bit integers in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqb))]
pub unsafe fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 {
a.eq(b)
@@ -508,7 +508,7 @@
/// Compare packed 64-bit integers in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtq))]
pub unsafe fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 {
a.gt(b)
@@ -516,7 +516,7 @@
/// Compare packed 32-bit integers in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtd))]
pub unsafe fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 {
a.gt(b)
@@ -524,7 +524,7 @@
/// Compare packed 16-bit integers in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtw))]
pub unsafe fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 {
a.gt(b)
@@ -532,7 +532,7 @@
/// Compare packed 8-bit integers in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtb))]
pub unsafe fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
a.gt(b)
@@ -540,7 +540,7 @@
/// Sign-extend 16-bit integers to 32-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm256_cvtepi16_epi32(a: i16x8) -> i32x8 {
simd_cast(a)
@@ -548,7 +548,7 @@
/// Sign-extend 16-bit integers to 64-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm256_cvtepi16_epi64(a: i16x8) -> i64x4 {
simd_cast::<::v64::i16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -556,7 +556,7 @@
/// Sign-extend 32-bit integers to 64-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm256_cvtepi32_epi64(a: i32x4) -> i64x4 {
simd_cast(a)
@@ -564,7 +564,7 @@
/// Sign-extend 8-bit integers to 16-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxbw))]
pub unsafe fn _mm256_cvtepi8_epi16(a: i8x16) -> i16x16 {
simd_cast(a)
@@ -572,7 +572,7 @@
/// Sign-extend 8-bit integers to 32-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm256_cvtepi8_epi32(a: i8x16) -> i32x8 {
simd_cast::<::v64::i8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
@@ -580,7 +580,7 @@
/// Sign-extend 8-bit integers to 64-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm256_cvtepi8_epi64(a: i8x16) -> i64x4 {
simd_cast::<::v32::i8x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -589,7 +589,7 @@
/// Zero-extend the lower four unsigned 16-bit integers in `a` to 32-bit
/// integers. The upper four elements of `a` are unused.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm256_cvtepu16_epi32(a: u16x8) -> i32x8 {
simd_cast(a)
@@ -598,7 +598,7 @@
/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
/// integers. The upper four elements of `a` are unused.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm256_cvtepu16_epi64(a: u16x8) -> i64x4 {
simd_cast::<::v64::u16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -606,7 +606,7 @@
/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm256_cvtepu32_epi64(a: u32x4) -> i64x4 {
simd_cast(a)
@@ -614,7 +614,7 @@
/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxbw))]
pub unsafe fn _mm256_cvtepu8_epi16(a: u8x16) -> i16x16 {
simd_cast(a)
@@ -623,7 +623,7 @@
/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
/// integers. The upper eight elements of `a` are unused.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm256_cvtepu8_epi32(a: u8x16) -> i32x8 {
simd_cast::<::v64::u8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
@@ -632,7 +632,7 @@
/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
/// integers. The upper twelve elements of `a` are unused.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm256_cvtepu8_epi64(a: u8x16) -> i64x4 {
simd_cast::<::v32::u8x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -640,7 +640,7 @@
/// Extract 128 bits (of integer data) from `a` selected with `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))]
pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i {
use x86::i586::avx::_mm256_undefined_si256;
@@ -655,7 +655,7 @@
/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphaddw))]
pub unsafe fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 {
phaddw(a, b)
@@ -663,7 +663,7 @@
/// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphaddd))]
pub unsafe fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 {
phaddd(a, b)
@@ -672,7 +672,7 @@
/// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`
/// using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphaddsw))]
pub unsafe fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 {
phaddsw(a, b)
@@ -680,7 +680,7 @@
/// Horizontally substract adjacent pairs of 16-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphsubw))]
pub unsafe fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 {
phsubw(a, b)
@@ -688,7 +688,7 @@
/// Horizontally substract adjacent pairs of 32-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphsubd))]
pub unsafe fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 {
phsubd(a, b)
@@ -697,7 +697,7 @@
/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
/// using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphsubsw))]
pub unsafe fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
phsubsw(a, b)
@@ -707,7 +707,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
pub unsafe fn _mm_i32gather_epi32(
slice: *const i32, offsets: i32x4, scale: i32
@@ -723,7 +723,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
pub unsafe fn _mm_mask_i32gather_epi32(
src: i32x4, slice: *const i32, offsets: i32x4, mask: i32x4, scale: i32
@@ -738,7 +738,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
pub unsafe fn _mm256_i32gather_epi32(
slice: *const i32, offsets: i32x8, scale: i32
@@ -754,7 +754,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
pub unsafe fn _mm256_mask_i32gather_epi32(
src: i32x8, slice: *const i32, offsets: i32x8, mask: i32x8, scale: i32
@@ -769,7 +769,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
pub unsafe fn _mm_i32gather_ps(
slice: *const f32, offsets: i32x4, scale: i32
@@ -785,7 +785,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
pub unsafe fn _mm_mask_i32gather_ps(
src: f32x4, slice: *const f32, offsets: i32x4, mask: f32x4, scale: i32
@@ -800,7 +800,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
pub unsafe fn _mm256_i32gather_ps(
slice: *const f32, offsets: i32x8, scale: i32
@@ -816,7 +816,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
pub unsafe fn _mm256_mask_i32gather_ps(
src: f32x8, slice: *const f32, offsets: i32x8, mask: f32x8, scale: i32
@@ -831,7 +831,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
pub unsafe fn _mm_i32gather_epi64(
slice: *const i64, offsets: i32x4, scale: i32
@@ -847,7 +847,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
pub unsafe fn _mm_mask_i32gather_epi64(
src: i64x2, slice: *const i64, offsets: i32x4, mask: i64x2, scale: i32
@@ -862,7 +862,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
pub unsafe fn _mm256_i32gather_epi64(
slice: *const i64, offsets: i32x4, scale: i32
@@ -878,7 +878,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
pub unsafe fn _mm256_mask_i32gather_epi64(
src: i64x4, slice: *const i64, offsets: i32x4, mask: i64x4, scale: i32
@@ -893,7 +893,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
pub unsafe fn _mm_i32gather_pd(
slice: *const f64, offsets: i32x4, scale: i32
@@ -909,7 +909,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
pub unsafe fn _mm_mask_i32gather_pd(
src: f64x2, slice: *const f64, offsets: i32x4, mask: f64x2, scale: i32
@@ -924,7 +924,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
pub unsafe fn _mm256_i32gather_pd(
slice: *const f64, offsets: i32x4, scale: i32
@@ -940,7 +940,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
pub unsafe fn _mm256_mask_i32gather_pd(
src: f64x4, slice: *const f64, offsets: i32x4, mask: f64x4, scale: i32
@@ -955,7 +955,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
pub unsafe fn _mm_i64gather_epi32(
slice: *const i32, offsets: i64x2, scale: i32
@@ -971,7 +971,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
pub unsafe fn _mm_mask_i64gather_epi32(
src: i32x4, slice: *const i32, offsets: i64x2, mask: i32x4, scale: i32
@@ -986,7 +986,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
pub unsafe fn _mm256_i64gather_epi32(
slice: *const i32, offsets: i64x4, scale: i32
@@ -1002,7 +1002,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
pub unsafe fn _mm256_mask_i64gather_epi32(
src: i32x4, slice: *const i32, offsets: i64x4, mask: i32x4, scale: i32
@@ -1017,7 +1017,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
pub unsafe fn _mm_i64gather_ps(
slice: *const f32, offsets: i64x2, scale: i32
@@ -1033,7 +1033,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
pub unsafe fn _mm_mask_i64gather_ps(
src: f32x4, slice: *const f32, offsets: i64x2, mask: f32x4, scale: i32
@@ -1048,7 +1048,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
pub unsafe fn _mm256_i64gather_ps(
slice: *const f32, offsets: i64x4, scale: i32
@@ -1064,7 +1064,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
pub unsafe fn _mm256_mask_i64gather_ps(
src: f32x4, slice: *const f32, offsets: i64x4, mask: f32x4, scale: i32
@@ -1079,7 +1079,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
pub unsafe fn _mm_i64gather_epi64(
slice: *const i64, offsets: i64x2, scale: i32
@@ -1095,7 +1095,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
pub unsafe fn _mm_mask_i64gather_epi64(
src: i64x2, slice: *const i64, offsets: i64x2, mask: i64x2, scale: i32
@@ -1110,7 +1110,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
pub unsafe fn _mm256_i64gather_epi64(
slice: *const i64, offsets: i64x4, scale: i32
@@ -1126,7 +1126,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
pub unsafe fn _mm256_mask_i64gather_epi64(
src: i64x4, slice: *const i64, offsets: i64x4, mask: i64x4, scale: i32
@@ -1141,7 +1141,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
pub unsafe fn _mm_i64gather_pd(
slice: *const f64, offsets: i64x2, scale: i32
@@ -1157,7 +1157,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
pub unsafe fn _mm_mask_i64gather_pd(
src: f64x2, slice: *const f64, offsets: i64x2, mask: f64x2, scale: i32
@@ -1172,7 +1172,7 @@
/// where
/// `scale` is between 1 and 8.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
pub unsafe fn _mm256_i64gather_pd(
slice: *const f64, offsets: i64x4, scale: i32
@@ -1188,7 +1188,7 @@
/// `scale` is between 1 and 8. If mask is set, load the value from `src` in
/// that position instead.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
pub unsafe fn _mm256_mask_i64gather_pd(
src: f64x4, slice: *const f64, offsets: i64x4, mask: f64x4, scale: i32
@@ -1202,7 +1202,7 @@
/// Copy `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
/// location specified by `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
pub unsafe fn _mm256_inserti128_si256(
a: __m256i, b: __m128i, imm8: i32
@@ -1221,7 +1221,7 @@
/// intermediate signed 32-bit integers. Horizontally add adjacent pairs
/// of intermediate 32-bit integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaddwd))]
pub unsafe fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 {
pmaddwd(a, b)
@@ -1232,7 +1232,7 @@
/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
/// signed 16-bit integers
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaddubsw))]
pub unsafe fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 {
pmaddubsw(a, b)
@@ -1242,7 +1242,7 @@
/// (elements are zeroed out when the highest bit is not set in the
/// corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: i32x4) -> i32x4 {
maskloadd(mem_addr as *const i8, mask)
@@ -1252,7 +1252,7 @@
/// (elements are zeroed out when the highest bit is not set in the
/// corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
pub unsafe fn _mm256_maskload_epi32(
mem_addr: *const i32, mask: i32x8
@@ -1264,7 +1264,7 @@
/// (elements are zeroed out when the highest bit is not set in the
/// corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: i64x2) -> i64x2 {
maskloadq(mem_addr as *const i8, mask)
@@ -1274,7 +1274,7 @@
/// (elements are zeroed out when the highest bit is not set in the
/// corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
pub unsafe fn _mm256_maskload_epi64(
mem_addr: *const i64, mask: i64x4
@@ -1286,7 +1286,7 @@
/// using `mask` (elements are not stored when the highest bit is not set
/// in the corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: i32x4, a: i32x4) {
maskstored(mem_addr as *mut i8, mask, a)
@@ -1296,7 +1296,7 @@
/// using `mask` (elements are not stored when the highest bit is not set
/// in the corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
pub unsafe fn _mm256_maskstore_epi32(
mem_addr: *mut i32, mask: i32x8, a: i32x8
@@ -1308,7 +1308,7 @@
/// using `mask` (elements are not stored when the highest bit is not set
/// in the corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: i64x2, a: i64x2) {
maskstoreq(mem_addr as *mut i8, mask, a)
@@ -1318,7 +1318,7 @@
/// using `mask` (elements are not stored when the highest bit is not set
/// in the corresponding element).
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
pub unsafe fn _mm256_maskstore_epi64(
mem_addr: *mut i64, mask: i64x4, a: i64x4
@@ -1329,7 +1329,7 @@
/// Compare packed 16-bit integers in `a` and `b`, and return the packed
/// maximum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxsw))]
pub unsafe fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 {
pmaxsw(a, b)
@@ -1338,7 +1338,7 @@
/// Compare packed 32-bit integers in `a` and `b`, and return the packed
/// maximum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 {
pmaxsd(a, b)
@@ -1347,7 +1347,7 @@
/// Compare packed 8-bit integers in `a` and `b`, and return the packed
/// maximum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxsb))]
pub unsafe fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 {
pmaxsb(a, b)
@@ -1356,7 +1356,7 @@
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return
/// the packed maximum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxuw))]
pub unsafe fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 {
pmaxuw(a, b)
@@ -1365,7 +1365,7 @@
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return
/// the packed maximum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 {
pmaxud(a, b)
@@ -1374,7 +1374,7 @@
/// Compare packed unsigned 8-bit integers in `a` and `b`, and return
/// the packed maximum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxub))]
pub unsafe fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 {
pmaxub(a, b)
@@ -1383,7 +1383,7 @@
/// Compare packed 16-bit integers in `a` and `b`, and return the packed
/// minimum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminsw))]
pub unsafe fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 {
pminsw(a, b)
@@ -1392,7 +1392,7 @@
/// Compare packed 32-bit integers in `a` and `b`, and return the packed
/// minimum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 {
pminsd(a, b)
@@ -1401,7 +1401,7 @@
/// Compare packed 8-bit integers in `a` and `b`, and return the packed
/// minimum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminsb))]
pub unsafe fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 {
pminsb(a, b)
@@ -1410,7 +1410,7 @@
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return
/// the packed minimum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminuw))]
pub unsafe fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 {
pminuw(a, b)
@@ -1419,7 +1419,7 @@
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return
/// the packed minimum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 {
pminud(a, b)
@@ -1428,7 +1428,7 @@
/// Compare packed unsigned 8-bit integers in `a` and `b`, and return
/// the packed minimum values.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminub))]
pub unsafe fn _mm256_min_epu8(a: u8x32, b: u8x32) -> u8x32 {
pminub(a, b)
@@ -1437,7 +1437,7 @@
/// Create mask from the most significant bit of each 8-bit element in `a`,
/// return the result.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovmskb))]
pub unsafe fn _mm256_movemask_epi8(a: i8x32) -> i32 {
pmovmskb(a)
@@ -1451,7 +1451,7 @@
/// quadruplets are formed from sequential 8-bit integers selected from `a`
/// starting at the offset specified in `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))]
pub unsafe fn _mm256_mpsadbw_epu8(a: u8x32, b: u8x32, imm8: i32) -> u16x16 {
macro_rules! call {
@@ -1465,7 +1465,7 @@
///
/// Return the 64-bit results.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 {
pmuldq(a, b)
@@ -1476,7 +1476,7 @@
///
/// Return the unsigned 64-bit results.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 {
pmuludq(a, b)
@@ -1486,7 +1486,7 @@
/// intermediate 32-bit integers and returning the high 16 bits of the
/// intermediate integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmulhw))]
pub unsafe fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
pmulhw(a, b)
@@ -1496,7 +1496,7 @@
/// intermediate 32-bit integers and returning the high 16 bits of the
/// intermediate integers.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmulhuw))]
pub unsafe fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
pmulhuw(a, b)
@@ -1506,7 +1506,7 @@
/// intermediate 32-bit integers, and return the low 16 bits of the
/// intermediate integers
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmullw))]
pub unsafe fn _mm256_mullo_epi16(a: i16x16, b: i16x16) -> i16x16 {
a * b
@@ -1516,7 +1516,7 @@
/// intermediate 64-bit integers, and return the low 16 bits of the
/// intermediate integers
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm256_mullo_epi32(a: i32x8, b: i32x8) -> i32x8 {
a * b
@@ -1527,7 +1527,7 @@
/// integer to the 18 most significant bits, round by adding 1, and
/// return bits [16:1]
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmulhrsw))]
pub unsafe fn _mm256_mulhrs_epi16(a: i16x16, b: i16x16) -> i16x16 {
pmulhrsw(a, b)
@@ -1536,7 +1536,7 @@
/// Compute the bitwise OR of 256 bits (representing integer data) in `a`
/// and `b`
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
__m256i::from(i8x32::from(a) | i8x32::from(b))
@@ -1545,7 +1545,7 @@
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using signed saturation
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpacksswb))]
pub unsafe fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 {
packsswb(a, b)
@@ -1554,7 +1554,7 @@
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using signed saturation
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpackssdw))]
pub unsafe fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 {
packssdw(a, b)
@@ -1563,7 +1563,7 @@
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using unsigned saturation
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpackuswb))]
pub unsafe fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 {
packuswb(a, b)
@@ -1572,7 +1572,7 @@
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using unsigned saturation
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpackusdw))]
pub unsafe fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
packusdw(a, b)
@@ -1583,7 +1583,7 @@
/// The last 3 bits of each integer of `b` are used as addresses into the 8
/// integers of `a`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm256_permutevar8x32_epi32(a: u32x8, b: u32x8) -> u32x8 {
permd(a, b)
@@ -1591,7 +1591,7 @@
/// Permutes 64-bit integers from `a` using control mask `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpermq, imm8 = 9))]
pub unsafe fn _mm256_permute4x64_epi64(a: i64x4, imm8: i32) -> i64x4 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -1640,7 +1640,7 @@
/// Shuffle 128-bits of integer data selected by `imm8` from `a` and `b`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))]
pub unsafe fn _mm256_permute2x128_si256(
a: __m256i, b: __m256i, imm8: i32
@@ -1656,7 +1656,7 @@
/// Shuffle 64-bit floating-point elements in `a` across lanes using the
/// control in `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
pub unsafe fn _mm256_permute4x64_pd(a: f64x4, imm8: i32) -> f64x4 {
use x86::i586::avx::_mm256_undefined_pd;
@@ -1707,7 +1707,7 @@
/// Shuffle eight 32-bit foating-point elements in `a` across lanes using
/// the corresponding 32-bit integer index in `idx`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm256_permutevar8x32_ps(a: f32x8, idx: i32x8) -> f32x8 {
permps(a, idx)
@@ -1718,7 +1718,7 @@
/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
/// integers in the low 16 bits of the 64-bit return value
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsadbw))]
pub unsafe fn _mm256_sad_epu8(a: u8x32, b: u8x32) -> u64x4 {
psadbw(a, b)
@@ -1754,7 +1754,7 @@
/// }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpshufb))]
pub unsafe fn _mm256_shuffle_epi8(a: u8x32, b: u8x32) -> u8x32 {
pshufb(a, b)
@@ -1771,8 +1771,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i32x8;
/// use stdsimd::vendor::_mm256_shuffle_epi32;
///
@@ -1793,12 +1793,12 @@
/// assert_eq!(c1, expected1);
/// assert_eq!(c2, expected2);
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
pub unsafe fn _mm256_shuffle_epi32(a: i32x8, imm8: i32) -> i32x8 {
// simd_shuffleX requires that its selector parameter be made up of
@@ -1857,7 +1857,7 @@
/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
/// to the output.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))]
pub unsafe fn _mm256_shufflehi_epi16(a: i16x16, imm8: i32) -> i16x16 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -1912,7 +1912,7 @@
/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
/// to the output.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
pub unsafe fn _mm256_shufflelo_epi16(a: i16x16, imm8: i32) -> i16x16 {
let imm8 = (imm8 & 0xFF) as u8;
@@ -1967,7 +1967,7 @@
/// 16-bit integer in `b` is negative, and return the results.
/// Results are zeroed out when the corresponding element in `b` is zero.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsignw))]
pub unsafe fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 {
psignw(a, b)
@@ -1977,7 +1977,7 @@
/// 32-bit integer in `b` is negative, and return the results.
/// Results are zeroed out when the corresponding element in `b` is zero.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsignd))]
pub unsafe fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 {
psignd(a, b)
@@ -1987,7 +1987,7 @@
/// 8-bit integer in `b` is negative, and return the results.
/// Results are zeroed out when the corresponding element in `b` is zero.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsignb))]
pub unsafe fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 {
psignb(a, b)
@@ -1996,7 +1996,7 @@
/// Shift packed 16-bit integers in `a` left by `count` while
/// shifting in zeros, and return the result
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllw))]
pub unsafe fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 {
psllw(a, count)
@@ -2005,7 +2005,7 @@
/// Shift packed 32-bit integers in `a` left by `count` while
/// shifting in zeros, and return the result
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 {
pslld(a, count)
@@ -2014,7 +2014,7 @@
/// Shift packed 64-bit integers in `a` left by `count` while
/// shifting in zeros, and return the result
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 {
psllq(a, count)
@@ -2023,7 +2023,7 @@
/// Shift packed 16-bit integers in `a` left by `imm8` while
/// shifting in zeros, return the results;
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllw))]
pub unsafe fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
pslliw(a, imm8)
@@ -2032,7 +2032,7 @@
/// Shift packed 32-bit integers in `a` left by `imm8` while
/// shifting in zeros, return the results;
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
psllid(a, imm8)
@@ -2041,7 +2041,7 @@
/// Shift packed 64-bit integers in `a` left by `imm8` while
/// shifting in zeros, return the results;
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
pslliq(a, imm8)
@@ -2049,7 +2049,7 @@
/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
macro_rules! call {
@@ -2062,7 +2062,7 @@
/// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i {
_mm256_slli_si256(a, imm8)
@@ -2072,7 +2072,7 @@
/// specified by the corresponding element in `count` while
/// shifting in zeros, and return the result.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 {
psllvd(a, count)
@@ -2082,7 +2082,7 @@
/// specified by the corresponding element in `count` while
/// shifting in zeros, and return the result.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 {
psllvd256(a, count)
@@ -2092,7 +2092,7 @@
/// specified by the corresponding element in `count` while
/// shifting in zeros, and return the result.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 {
psllvq(a, count)
@@ -2102,7 +2102,7 @@
/// specified by the corresponding element in `count` while
/// shifting in zeros, and return the result.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 {
psllvq256(a, count)
@@ -2111,7 +2111,7 @@
/// Shift packed 16-bit integers in `a` right by `count` while
/// shifting in sign bits.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsraw))]
pub unsafe fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 {
psraw(a, count)
@@ -2120,7 +2120,7 @@
/// Shift packed 32-bit integers in `a` right by `count` while
/// shifting in sign bits.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 {
psrad(a, count)
@@ -2129,7 +2129,7 @@
/// Shift packed 16-bit integers in `a` right by `imm8` while
/// shifting in sign bits.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsraw))]
pub unsafe fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 {
psraiw(a, imm8)
@@ -2138,7 +2138,7 @@
/// Shift packed 32-bit integers in `a` right by `imm8` while
/// shifting in sign bits.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 {
psraid(a, imm8)
@@ -2147,7 +2147,7 @@
/// Shift packed 32-bit integers in `a` right by the amount specified by the
/// corresponding element in `count` while shifting in sign bits.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 {
psravd(a, count)
@@ -2156,7 +2156,7 @@
/// Shift packed 32-bit integers in `a` right by the amount specified by the
/// corresponding element in `count` while shifting in sign bits.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
psravd256(a, count)
@@ -2164,7 +2164,7 @@
/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
macro_rules! call {
@@ -2177,7 +2177,7 @@
/// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i {
_mm256_srli_si256(a, imm8)
@@ -2186,7 +2186,7 @@
/// Shift packed 16-bit integers in `a` right by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlw))]
pub unsafe fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
psrlw(a, count)
@@ -2195,7 +2195,7 @@
/// Shift packed 32-bit integers in `a` right by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
psrld(a, count)
@@ -2204,7 +2204,7 @@
/// Shift packed 64-bit integers in `a` right by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 {
psrlq(a, count)
@@ -2213,7 +2213,7 @@
/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
/// zeros
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlw))]
pub unsafe fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 {
psrliw(a, imm8)
@@ -2222,7 +2222,7 @@
/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
/// zeros
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 {
psrlid(a, imm8)
@@ -2231,7 +2231,7 @@
/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
/// zeros
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 {
psrliq(a, imm8)
@@ -2240,7 +2240,7 @@
/// Shift packed 32-bit integers in `a` right by the amount specified by
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 {
psrlvd(a, count)
@@ -2249,7 +2249,7 @@
/// Shift packed 32-bit integers in `a` right by the amount specified by
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 {
psrlvd256(a, count)
@@ -2258,7 +2258,7 @@
/// Shift packed 64-bit integers in `a` right by the amount specified by
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 {
psrlvq(a, count)
@@ -2267,7 +2267,7 @@
/// Shift packed 64-bit integers in `a` right by the amount specified by
/// the corresponding element in `count` while shifting in zeros,
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 {
psrlvq256(a, count)
@@ -2277,7 +2277,7 @@
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubw))]
pub unsafe fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 {
a - b
@@ -2285,7 +2285,7 @@
/// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 {
a - b
@@ -2293,7 +2293,7 @@
/// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 {
a - b
@@ -2301,7 +2301,7 @@
/// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a`
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubb))]
pub unsafe fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 {
a - b
@@ -2310,7 +2310,7 @@
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
/// `a` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubsw))]
pub unsafe fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 {
psubsw(a, b)
@@ -2319,7 +2319,7 @@
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
/// `a` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubsb))]
pub unsafe fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 {
psubsb(a, b)
@@ -2328,7 +2328,7 @@
/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
/// integers in `a` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubusw))]
pub unsafe fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 {
psubusw(a, b)
@@ -2337,7 +2337,7 @@
/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
/// integers in `a` using saturation.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubusb))]
pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
psubusb(a, b)
@@ -2354,8 +2354,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i8x32;
/// use stdsimd::vendor::_mm256_unpackhi_epi8;
///
@@ -2375,12 +2375,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhbw))]
pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
#[cfg_attr(rustfmt, rustfmt_skip)]
@@ -2403,8 +2403,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i8x32;
/// use stdsimd::vendor::_mm256_unpacklo_epi8;
///
@@ -2423,12 +2423,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpcklbw))]
pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
#[cfg_attr(rustfmt, rustfmt_skip)]
@@ -2451,8 +2451,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i16x16;
/// use stdsimd::vendor::_mm256_unpackhi_epi16;
///
@@ -2469,12 +2469,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhwd))]
pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
simd_shuffle16(
@@ -2495,8 +2495,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i16x16;
/// use stdsimd::vendor::_mm256_unpacklo_epi16;
///
@@ -2513,12 +2513,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpcklwd))]
pub unsafe fn _mm256_unpacklo_epi16(a: i16x16, b: i16x16) -> i16x16 {
simd_shuffle16(
@@ -2539,8 +2539,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i32x8;
/// use stdsimd::vendor::_mm256_unpackhi_epi32;
///
@@ -2556,12 +2556,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm256_unpackhi_epi32(a: i32x8, b: i32x8) -> i32x8 {
simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
@@ -2578,8 +2578,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i32x8;
/// use stdsimd::vendor::_mm256_unpacklo_epi32;
///
@@ -2595,12 +2595,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm256_unpacklo_epi32(a: i32x8, b: i32x8) -> i32x8 {
simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
@@ -2617,8 +2617,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i64x4;
/// use stdsimd::vendor::_mm256_unpackhi_epi64;
///
@@ -2634,12 +2634,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm256_unpackhi_epi64(a: i64x4, b: i64x4) -> i64x4 {
simd_shuffle4(a, b, [1, 5, 3, 7])
@@ -2656,8 +2656,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
-/// # #[target_feature = "+avx2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "avx2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::i64x4;
/// use stdsimd::vendor::_mm256_unpacklo_epi64;
///
@@ -2673,12 +2673,12 @@
/// assert_eq!(c, expected);
///
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm256_unpacklo_epi64(a: i64x4, b: i64x4) -> i64x4 {
simd_shuffle4(a, b, [0, 4, 2, 6])
@@ -2687,7 +2687,7 @@
/// Compute the bitwise XOR of 256 bits (representing integer data)
/// in `a` and `b`
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
__m256i::from(i8x32::from(a) ^ i8x32::from(b))
@@ -2698,7 +2698,7 @@
///
/// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_extract_epi8(a: i8x32, imm8: i32) -> i8 {
let imm8 = (imm8 & 31) as u32;
@@ -2710,7 +2710,7 @@
///
/// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_extract_epi16(a: i16x16, imm8: i32) -> i16 {
let imm8 = (imm8 & 15) as u32;
@@ -2719,7 +2719,7 @@
/// Extract a 32-bit integer from `a`, selected with `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_extract_epi32(a: i32x8, imm8: i32) -> i32 {
let imm8 = (imm8 & 7) as u32;
@@ -2728,7 +2728,7 @@
/// Extract a 64-bit integer from `a`, selected with `imm8`.
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm256_extract_epi64(a: i64x4, imm8: i32) -> i64 {
let imm8 = (imm8 & 3) as u32;
@@ -2737,7 +2737,7 @@
/// Returns the first element of the input vector of [4 x double].
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
//#[cfg_attr(test, assert_instr(movsd))] FIXME
pub unsafe fn _mm256_cvtsd_f64(a: f64x4) -> f64 {
a.extract(0)
@@ -2745,7 +2745,7 @@
/// Returns the first element of the input vector of [8 x i32].
#[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
//#[cfg_attr(test, assert_instr(movd))] FIXME
pub unsafe fn _mm256_cvtsi256_si32(a: i32x8) -> i32 {
a.extract(0)
diff --git a/coresimd/src/x86/i586/bmi.rs b/coresimd/src/x86/i586/bmi.rs
index f51a6d2..d713135 100644
--- a/coresimd/src/x86/i586/bmi.rs
+++ b/coresimd/src/x86/i586/bmi.rs
@@ -15,7 +15,7 @@
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(bextr))]
pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(a, (start & 0xff_u32) | ((len & 0xff_u32) << 8_u32))
@@ -24,7 +24,7 @@
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(bextr))]
#[cfg(not(target_arch = "x86"))]
pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
@@ -37,7 +37,7 @@
/// Bits [7,0] of `control` specify the index to the first bit in the range to
/// be extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(bextr))]
pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 {
x86_bmi_bextr_32(a, control)
@@ -49,7 +49,7 @@
/// Bits [7,0] of `control` specify the index to the first bit in the range to
/// be extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(bextr))]
#[cfg(not(target_arch = "x86"))]
pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 {
@@ -58,7 +58,7 @@
/// Bitwise logical `AND` of inverted `a` with `b`.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(andn))]
pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 {
!a & b
@@ -66,7 +66,7 @@
/// Bitwise logical `AND` of inverted `a` with `b`.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(andn))]
pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 {
!a & b
@@ -74,7 +74,7 @@
/// Extract lowest set isolated bit.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(blsi))]
pub unsafe fn _blsi_u32(x: u32) -> u32 {
x & x.wrapping_neg()
@@ -82,7 +82,7 @@
/// Extract lowest set isolated bit.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(blsi))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blsi_u64(x: u64) -> u64 {
@@ -91,7 +91,7 @@
/// Get mask up to lowest set bit.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(blsmsk))]
pub unsafe fn _blsmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_sub(1_u32))
@@ -99,7 +99,7 @@
/// Get mask up to lowest set bit.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(blsmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
@@ -110,7 +110,7 @@
///
/// If `x` is sets CF.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(blsr))]
pub unsafe fn _blsr_u32(x: u32) -> u32 {
x & (x.wrapping_sub(1))
@@ -120,7 +120,7 @@
///
/// If `x` is sets CF.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(blsr))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blsr_u64(x: u64) -> u64 {
@@ -131,7 +131,7 @@
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(tzcnt))]
pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
x.trailing_zeros()
@@ -141,7 +141,7 @@
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(tzcnt))]
pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
x.trailing_zeros() as u64
@@ -151,7 +151,7 @@
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(tzcnt))]
pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 {
x.trailing_zeros() as i32
@@ -161,7 +161,7 @@
///
/// When the source operand is 0, it returns its size in bits.
#[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
#[cfg_attr(test, assert_instr(tzcnt))]
pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 {
x.trailing_zeros() as i64
diff --git a/coresimd/src/x86/i586/bmi2.rs b/coresimd/src/x86/i586/bmi2.rs
index 3eebc41..88f161a 100644
--- a/coresimd/src/x86/i586/bmi2.rs
+++ b/coresimd/src/x86/i586/bmi2.rs
@@ -21,7 +21,7 @@
// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))]
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(mulx))]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
let result: u64 = (a as u64) * (b as u64);
*hi = (result >> 32) as u32;
@@ -34,7 +34,7 @@
/// the low half and the high half of the result.
#[inline(always)]
#[cfg_attr(test, assert_instr(mulx))]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
#[cfg(not(target_arch = "x86"))] // calls an intrinsic
pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
let result: u128 = (a as u128) * (b as u128);
@@ -44,7 +44,7 @@
/// Zero higher bits of `a` >= `index`.
#[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(bzhi))]
pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 {
x86_bmi2_bzhi_32(a, index)
@@ -52,7 +52,7 @@
/// Zero higher bits of `a` >= `index`.
#[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(bzhi))]
#[cfg(not(target_arch = "x86"))]
pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 {
@@ -62,7 +62,7 @@
/// Scatter contiguous low order bits of `a` to the result at the positions
/// specified by the `mask`.
#[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pdep))]
pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 {
x86_bmi2_pdep_32(a, mask)
@@ -71,7 +71,7 @@
/// Scatter contiguous low order bits of `a` to the result at the positions
/// specified by the `mask`.
#[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pdep))]
#[cfg(not(target_arch = "x86"))]
pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 {
@@ -81,7 +81,7 @@
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
/// order bit positions of the result.
#[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pext))]
pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 {
x86_bmi2_pext_32(a, mask)
@@ -90,7 +90,7 @@
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
/// order bit positions of the result.
#[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pext))]
#[cfg(not(target_arch = "x86"))]
pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs
index d332570..ebd1d04 100644
--- a/coresimd/src/x86/i586/sse.rs
+++ b/coresimd/src/x86/i586/sse.rs
@@ -13,7 +13,7 @@
/// Adds the first component of `a` and `b`, the other components are copied
/// from `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(addss))]
pub unsafe fn _mm_add_ss(a: f32x4, b: f32x4) -> f32x4 {
addss(a, b)
@@ -21,7 +21,7 @@
/// Adds f32x4 vectors.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(addps))]
pub unsafe fn _mm_add_ps(a: f32x4, b: f32x4) -> f32x4 {
a + b
@@ -30,7 +30,7 @@
/// Subtracts the first component of `b` from `a`, the other components are
/// copied from `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(subss))]
pub unsafe fn _mm_sub_ss(a: f32x4, b: f32x4) -> f32x4 {
subss(a, b)
@@ -38,7 +38,7 @@
/// Subtracts f32x4 vectors.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(subps))]
pub unsafe fn _mm_sub_ps(a: f32x4, b: f32x4) -> f32x4 {
a - b
@@ -47,7 +47,7 @@
/// Multiplies the first component of `a` and `b`, the other components are
/// copied from `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(mulss))]
pub unsafe fn _mm_mul_ss(a: f32x4, b: f32x4) -> f32x4 {
mulss(a, b)
@@ -55,7 +55,7 @@
/// Multiplies f32x4 vectors.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(mulps))]
pub unsafe fn _mm_mul_ps(a: f32x4, b: f32x4) -> f32x4 {
a * b
@@ -64,7 +64,7 @@
/// Divides the first component of `b` by `a`, the other components are
/// copied from `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(divss))]
pub unsafe fn _mm_div_ss(a: f32x4, b: f32x4) -> f32x4 {
divss(a, b)
@@ -72,7 +72,7 @@
/// Divides f32x4 vectors.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(divps))]
pub unsafe fn _mm_div_ps(a: f32x4, b: f32x4) -> f32x4 {
a / b
@@ -81,7 +81,7 @@
/// Return the square root of the first single-precision (32-bit)
/// floating-point element in `a`, the other elements are unchanged.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(sqrtss))]
pub unsafe fn _mm_sqrt_ss(a: f32x4) -> f32x4 {
sqrtss(a)
@@ -90,7 +90,7 @@
/// Return the square root of packed single-precision (32-bit) floating-point
/// elements in `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(sqrtps))]
pub unsafe fn _mm_sqrt_ps(a: f32x4) -> f32x4 {
sqrtps(a)
@@ -99,7 +99,7 @@
/// Return the approximate reciprocal of the first single-precision
/// (32-bit) floating-point element in `a`, the other elements are unchanged.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rcpss))]
pub unsafe fn _mm_rcp_ss(a: f32x4) -> f32x4 {
rcpss(a)
@@ -108,7 +108,7 @@
/// Return the approximate reciprocal of packed single-precision (32-bit)
/// floating-point elements in `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rcpps))]
pub unsafe fn _mm_rcp_ps(a: f32x4) -> f32x4 {
rcpps(a)
@@ -117,7 +117,7 @@
/// Return the approximate reciprocal square root of the fist single-precision
/// (32-bit) floating-point elements in `a`, the other elements are unchanged.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rsqrtss))]
pub unsafe fn _mm_rsqrt_ss(a: f32x4) -> f32x4 {
rsqrtss(a)
@@ -126,7 +126,7 @@
/// Return the approximate reciprocal square root of packed single-precision
/// (32-bit) floating-point elements in `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(rsqrtps))]
pub unsafe fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
rsqrtps(a)
@@ -136,7 +136,7 @@
/// and `b`, and return the minimum value in the first element of the return
/// value, the other elements are copied from `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(minss))]
pub unsafe fn _mm_min_ss(a: f32x4, b: f32x4) -> f32x4 {
minss(a, b)
@@ -145,7 +145,7 @@
/// Compare packed single-precision (32-bit) floating-point elements in `a` and
/// `b`, and return the corresponding minimum values.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(minps))]
pub unsafe fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
minps(a, b)
@@ -155,7 +155,7 @@
/// and `b`, and return the maximum value in the first element of the return
/// value, the other elements are copied from `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(maxss))]
pub unsafe fn _mm_max_ss(a: f32x4, b: f32x4) -> f32x4 {
maxss(a, b)
@@ -164,7 +164,7 @@
/// Compare packed single-precision (32-bit) floating-point elements in `a` and
/// `b`, and return the corresponding maximum values.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(maxps))]
pub unsafe fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
maxps(a, b)
@@ -172,7 +172,7 @@
/// Bitwise AND of packed single-precision (32-bit) floating-point elements.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// i586 only seems to generate plain `and` instructions, so ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andps))]
@@ -187,7 +187,7 @@
///
/// Computes `!a & b` for each bit in `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// i586 only seems to generate plain `not` and `and` instructions, so ignore
// it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
@@ -200,7 +200,7 @@
/// Bitwise OR of packed single-precision (32-bit) floating-point elements.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// i586 only seems to generate plain `or` instructions, so we ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(orps))]
@@ -213,7 +213,7 @@
/// Bitwise exclusive OR of packed single-precision (32-bit) floating-point
/// elements.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// i586 only seems to generate plain `xor` instructions, so we ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(xorps))]
@@ -227,7 +227,7 @@
/// the result will be `0xffffffff` if the two inputs are equal, or `0`
/// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpeqss))]
pub unsafe fn _mm_cmpeq_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 0)
@@ -238,7 +238,7 @@
/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
/// upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltss))]
pub unsafe fn _mm_cmplt_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 1)
@@ -249,7 +249,7 @@
/// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
/// are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpless))]
pub unsafe fn _mm_cmple_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 2)
@@ -260,7 +260,7 @@
/// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
/// are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltss))]
pub unsafe fn _mm_cmpgt_ss(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3])
@@ -271,7 +271,7 @@
/// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits
/// of the result are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpless))]
pub unsafe fn _mm_cmpge_ss(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3])
@@ -282,7 +282,7 @@
/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
/// upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpneqss))]
pub unsafe fn _mm_cmpneq_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 4)
@@ -293,7 +293,7 @@
/// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
/// upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltss))]
pub unsafe fn _mm_cmpnlt_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 5)
@@ -304,7 +304,7 @@
/// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits
/// of the result are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnless))]
pub unsafe fn _mm_cmpnle_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 6)
@@ -315,7 +315,7 @@
/// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are
/// the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltss))]
pub unsafe fn _mm_cmpngt_ss(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3])
@@ -326,7 +326,7 @@
/// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96
/// bits of the result are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnless))]
pub unsafe fn _mm_cmpnge_ss(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3])
@@ -337,7 +337,7 @@
/// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
/// are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpordss))]
pub unsafe fn _mm_cmpord_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 7)
@@ -348,7 +348,7 @@
/// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
/// are the upper 96 bits of `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpunordss))]
pub unsafe fn _mm_cmpunord_ss(a: f32x4, b: f32x4) -> f32x4 {
cmpss(a, b, 3)
@@ -358,7 +358,7 @@
/// The result in the output vector will be `0xffffffff` if the input elements
/// were equal, or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpeqps))]
pub unsafe fn _mm_cmpeq_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(a, b, 0)
@@ -368,7 +368,7 @@
/// The result in the output vector will be `0xffffffff` if the input element
/// in `a` is less than the corresponding element in `b`, or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltps))]
pub unsafe fn _mm_cmplt_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(a, b, 1)
@@ -379,7 +379,7 @@
/// in `a` is less than or equal to the corresponding element in `b`, or `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpleps))]
pub unsafe fn _mm_cmple_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(a, b, 2)
@@ -389,7 +389,7 @@
/// The result in the output vector will be `0xffffffff` if the input element
/// in `a` is greater than the corresponding element in `b`, or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpltps))]
pub unsafe fn _mm_cmpgt_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(b, a, 1)
@@ -400,7 +400,7 @@
/// in `a` is greater than or equal to the corresponding element in `b`, or `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpleps))]
pub unsafe fn _mm_cmpge_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(b, a, 2)
@@ -410,7 +410,7 @@
/// The result in the output vector will be `0xffffffff` if the input elements
/// are *not* equal, or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpneqps))]
pub unsafe fn _mm_cmpneq_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(a, b, 4)
@@ -421,7 +421,7 @@
/// in `a` is *not* less than the corresponding element in `b`, or `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltps))]
pub unsafe fn _mm_cmpnlt_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(a, b, 5)
@@ -432,7 +432,7 @@
/// in `a` is *not* less than or equal to the corresponding element in `b`, or
/// `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnleps))]
pub unsafe fn _mm_cmpnle_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(a, b, 6)
@@ -443,7 +443,7 @@
/// in `a` is *not* greater than the corresponding element in `b`, or `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnltps))]
pub unsafe fn _mm_cmpngt_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(b, a, 5)
@@ -454,7 +454,7 @@
/// in `a` is *not* greater than or equal to the corresponding element in `b`,
/// or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpnleps))]
pub unsafe fn _mm_cmpnge_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(b, a, 6)
@@ -465,7 +465,7 @@
/// in the output vector will be `0xffffffff` if the input elements in `a` and
/// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpordps))]
pub unsafe fn _mm_cmpord_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(b, a, 7)
@@ -476,7 +476,7 @@
/// in the output vector will be `0xffffffff` if the input elements in `a` and
/// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cmpunordps))]
pub unsafe fn _mm_cmpunord_ps(a: f32x4, b: f32x4) -> f32x4 {
cmpps(b, a, 3)
@@ -485,7 +485,7 @@
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
/// `1` if they are equal, or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
pub unsafe fn _mm_comieq_ss(a: f32x4, b: f32x4) -> i32 {
comieq_ss(a, b)
@@ -494,7 +494,7 @@
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
/// `1` if the value from `a` is less than the one from `b`, or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
pub unsafe fn _mm_comilt_ss(a: f32x4, b: f32x4) -> i32 {
comilt_ss(a, b)
@@ -504,7 +504,7 @@
/// `1` if the value from `a` is less than or equal to the one from `b`, or `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
pub unsafe fn _mm_comile_ss(a: f32x4, b: f32x4) -> i32 {
comile_ss(a, b)
@@ -514,7 +514,7 @@
/// `1` if the value from `a` is greater than the one from `b`, or `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
pub unsafe fn _mm_comigt_ss(a: f32x4, b: f32x4) -> i32 {
comigt_ss(a, b)
@@ -524,7 +524,7 @@
/// `1` if the value from `a` is greater than or equal to the one from `b`, or
/// `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
pub unsafe fn _mm_comige_ss(a: f32x4, b: f32x4) -> i32 {
comige_ss(a, b)
@@ -533,7 +533,7 @@
/// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
/// `1` if they are *not* equal, or `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(comiss))]
pub unsafe fn _mm_comineq_ss(a: f32x4, b: f32x4) -> i32 {
comineq_ss(a, b)
@@ -543,7 +543,7 @@
/// `1` if they are equal, or `0` otherwise. This instruction will not signal
/// an exception if either argument is a quiet NaN.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
pub unsafe fn _mm_ucomieq_ss(a: f32x4, b: f32x4) -> i32 {
ucomieq_ss(a, b)
@@ -554,7 +554,7 @@
/// This instruction will not signal an exception if either argument is a quiet
/// NaN.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
pub unsafe fn _mm_ucomilt_ss(a: f32x4, b: f32x4) -> i32 {
ucomilt_ss(a, b)
@@ -565,7 +565,7 @@
/// otherwise. This instruction will not signal an exception if either argument
/// is a quiet NaN.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
pub unsafe fn _mm_ucomile_ss(a: f32x4, b: f32x4) -> i32 {
ucomile_ss(a, b)
@@ -576,7 +576,7 @@
/// otherwise. This instruction will not signal an exception if either argument
/// is a quiet NaN.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
pub unsafe fn _mm_ucomigt_ss(a: f32x4, b: f32x4) -> i32 {
ucomigt_ss(a, b)
@@ -587,7 +587,7 @@
/// `0` otherwise. This instruction will not signal an exception if either
/// argument is a quiet NaN.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
pub unsafe fn _mm_ucomige_ss(a: f32x4, b: f32x4) -> i32 {
ucomige_ss(a, b)
@@ -597,7 +597,7 @@
/// `1` if they are *not* equal, or `0` otherwise. This instruction will not
/// signal an exception if either argument is a quiet NaN.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ucomiss))]
pub unsafe fn _mm_ucomineq_ss(a: f32x4, b: f32x4) -> i32 {
ucomineq_ss(a, b)
@@ -612,7 +612,7 @@
///
/// This corresponds to the `CVTSS2SI` instruction (with 32 bit output).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtss2si))]
pub unsafe fn _mm_cvtss_si32(a: f32x4) -> i32 {
cvtss2si(a)
@@ -620,7 +620,7 @@
/// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtss2si))]
pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 {
_mm_cvtss_si32(a)
@@ -637,7 +637,7 @@
///
/// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttss2si))]
pub unsafe fn _mm_cvttss_si32(a: f32x4) -> i32 {
cvttss2si(a)
@@ -645,7 +645,7 @@
/// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttss2si))]
pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
_mm_cvttss_si32(a)
@@ -653,7 +653,7 @@
/// Extract the lowest 32 bit float from the input vector.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// No point in using assert_instrs. In Unix x86_64 calling convention this is a
// no-op, and on Windows it's just a `mov`.
pub unsafe fn _mm_cvtss_f32(a: f32x4) -> f32 {
@@ -666,7 +666,7 @@
/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit
/// input).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtsi2ss))]
pub unsafe fn _mm_cvtsi32_ss(a: f32x4, b: i32) -> f32x4 {
cvtsi2ss(a, b)
@@ -674,7 +674,7 @@
/// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtsi2ss))]
pub unsafe fn _mm_cvt_si2ss(a: f32x4, b: i32) -> f32x4 {
_mm_cvtsi32_ss(a, b)
@@ -683,7 +683,7 @@
/// Construct a `f32x4` with the lowest element set to `a` and the rest set to
/// zero.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
pub unsafe fn _mm_set_ss(a: f32) -> f32x4 {
f32x4::new(a, 0.0, 0.0, 0.0)
@@ -691,7 +691,7 @@
/// Construct a `f32x4` with all element set to `a`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(shufps))]
pub unsafe fn _mm_set1_ps(a: f32) -> f32x4 {
f32x4::new(a, a, a, a)
@@ -699,7 +699,7 @@
/// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(shufps))]
pub unsafe fn _mm_set_ps1(a: f32) -> f32x4 {
_mm_set1_ps(a)
@@ -723,7 +723,7 @@
/// assert_eq!(f32x4::new(a, b, c, d), _mm_set_ps(d, c, b, a));
/// ```
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(unpcklps))]
pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> f32x4 {
f32x4::new(d, c, b, a)
@@ -738,7 +738,7 @@
/// assert_eq!(f32x4::new(a, b, c, d), _mm_setr_ps(a, b, c, d));
/// ```
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(unpcklps))]
// On a 32-bit architecture it just copies the operands from the stack.
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(movaps))]
@@ -748,7 +748,7 @@
/// Construct a `f32x4` with all elements initialized to zero.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(xorps))]
pub unsafe fn _mm_setzero_ps() -> f32x4 {
f32x4::new(0.0, 0.0, 0.0, 0.0)
@@ -760,7 +760,7 @@
/// The lower half of result takes values from `a` and the higher half from
/// `b`. Mask is split to 2 control bits each to index the element from inputs.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(shufps, mask = 3))]
pub unsafe fn _mm_shuffle_ps(a: f32x4, b: f32x4, mask: u32) -> f32x4 {
let mask = (mask & 0xFF) as u8;
@@ -811,7 +811,7 @@
/// Unpack and interleave single-precision (32-bit) floating-point elements
/// from the higher half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(unpckhps))]
pub unsafe fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, b, [2, 6, 3, 7])
@@ -820,7 +820,7 @@
/// Unpack and interleave single-precision (32-bit) floating-point elements
/// from the lower half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(unpcklps))]
pub unsafe fn _mm_unpacklo_ps(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, b, [0, 4, 1, 5])
@@ -829,7 +829,7 @@
/// Combine higher half of `a` and `b`. The highwe half of `b` occupies the
/// lower half of result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(all(test, not(windows)), assert_instr(movhlps))]
#[cfg_attr(all(test, windows), assert_instr(unpckhpd))]
pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
@@ -840,7 +840,7 @@
/// Combine lower half of `a` and `b`. The lower half of `b` occupies the
/// higher half of result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
@@ -852,7 +852,7 @@
/// The mask is stored in the 4 least significant bits of the return value.
/// All other bits are set to `0`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movmskps))]
pub unsafe fn _mm_movemask_ps(a: f32x4) -> i32 {
movmskps(a)
@@ -873,8 +873,8 @@
/// # // The real main function
/// # fn main() {
/// # if cfg_feature_enabled!("sse") {
-/// # #[target_feature = "+sse"]
-/// # fn worker() {
+/// # #[target_feature(enable = "sse")]
+/// # unsafe fn worker() {
/// #
/// # use stdsimd::simd::f32x4;
/// # use stdsimd::vendor::_mm_loadh_pi;
@@ -887,12 +887,12 @@
/// assert_eq!(r, f32x4::new(1.0, 2.0, 5.0, 6.0));
/// #
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// TODO: generates MOVHPD if the CPU supports SSE2.
// #[cfg_attr(test, assert_instr(movhps))]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))]
@@ -925,8 +925,8 @@
/// # // The real main function
/// # fn main() {
/// # if cfg_feature_enabled!("sse") {
-/// # #[target_feature = "+sse"]
-/// # fn worker() {
+/// # #[target_feature(enable = "sse")]
+/// # unsafe fn worker() {
/// #
/// # use stdsimd::simd::f32x4;
/// # use stdsimd::vendor::_mm_loadl_pi;
@@ -939,12 +939,12 @@
/// assert_eq!(r, f32x4::new(5.0, 6.0, 3.0, 4.0));
/// #
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// TODO: generates MOVLPD if the CPU supports SSE2.
// #[cfg_attr(test, assert_instr(movlps))]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))]
@@ -967,7 +967,7 @@
///
/// This corresponds to instructions `VMOVSS` / `MOVSS`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
pub unsafe fn _mm_load_ss(p: *const f32) -> f32x4 {
f32x4::new(*p, 0.0, 0.0, 0.0)
@@ -979,7 +979,7 @@
/// This corresponds to instructions `VMOVSS` / `MOVSS` followed by some
/// shuffling.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
pub unsafe fn _mm_load1_ps(p: *const f32) -> f32x4 {
let a = *p;
@@ -988,7 +988,7 @@
/// Alias for [`_mm_load1_ps`](fn._mm_load1_ps.html)
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
pub unsafe fn _mm_load_ps1(p: *const f32) -> f32x4 {
_mm_load1_ps(p)
@@ -1003,7 +1003,7 @@
///
/// This corresponds to instructions `VMOVAPS` / `MOVAPS`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_load_ps(p: *const f32) -> f32x4 {
*(p as *const f32x4)
@@ -1017,7 +1017,7 @@
///
/// This corresponds to instructions `VMOVUPS` / `MOVUPS`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movups))]
pub unsafe fn _mm_loadu_ps(p: *const f32) -> f32x4 {
// Note: Using `*p` would require `f32` alignment, but `movups` has no
@@ -1050,7 +1050,7 @@
/// This corresponds to instructions `VMOVAPS` / `MOVAPS` followed by some
/// shuffling.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_loadr_ps(p: *const f32) -> f32x4 {
let a = _mm_load_ps(p);
@@ -1062,7 +1062,7 @@
/// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may
/// choose to generate an equivalent sequence of other instructions.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
// fine.
// On i586 (no SSE2) it just generates plain MOV instructions.
@@ -1092,7 +1092,7 @@
/// This intrinsic corresponds to the `MOVQ` instruction. The compiler may
/// choose to generate an equivalent sequence of other instructions.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
// On i586 the codegen just generates plane MOVs. No need to test for that.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
not(target_family = "windows")),
@@ -1122,7 +1122,7 @@
///
/// This intrinsic corresponds to the `MOVSS` instruction.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
pub unsafe fn _mm_store_ss(p: *mut f32, a: f32x4) {
*p = a.extract(0)
@@ -1145,7 +1145,7 @@
/// *p.offset(3) = x;
/// ```
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_store1_ps(p: *mut f32, a: f32x4) {
let b: f32x4 = simd_shuffle4(a, a, [0, 0, 0, 0]);
@@ -1154,7 +1154,7 @@
/// Alias for [`_mm_store1_ps`](fn._mm_store1_ps.html)
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_store_ps1(p: *mut f32, a: f32x4) {
_mm_store1_ps(p, a);
@@ -1170,7 +1170,7 @@
///
/// This corresponds to instructions `VMOVAPS` / `MOVAPS`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_store_ps(p: *mut f32, a: f32x4) {
*(p as *mut f32x4) = a;
@@ -1182,7 +1182,7 @@
///
/// This corresponds to instructions `VMOVUPS` / `MOVUPS`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movups))]
pub unsafe fn _mm_storeu_ps(p: *mut f32, a: f32x4) {
ptr::copy_nonoverlapping(
@@ -1207,7 +1207,7 @@
/// *p.offset(3) = a.extract(0);
/// ```
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_storer_ps(p: *mut f32, a: f32x4) {
let b: f32x4 = simd_shuffle4(a, a, [3, 2, 1, 0]);
@@ -1222,7 +1222,7 @@
/// _mm_move_ss(a, b) == a.replace(0, b.extract(0))
/// ```
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movss))]
pub unsafe fn _mm_move_ss(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, b, [4, 1, 2, 3])
@@ -1235,7 +1235,7 @@
/// globally visible before any store instruction which follows the fence in
/// program order.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(sfence))]
pub unsafe fn _mm_sfence() {
sfence()
@@ -1245,7 +1245,7 @@
///
/// For more info see [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(stmxcsr))]
pub unsafe fn _mm_getcsr() -> u32 {
let mut result = 0_i32;
@@ -1379,7 +1379,7 @@
/// ```
///
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(ldmxcsr))]
pub unsafe fn _mm_setcsr(val: u32) {
ldmxcsr(&val as *const _ as *const i8);
@@ -1437,7 +1437,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
_mm_getcsr() & _MM_MASK_MASK
}
@@ -1445,7 +1445,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
_mm_getcsr() & _MM_EXCEPT_MASK
}
@@ -1453,7 +1453,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
_mm_getcsr() & _MM_FLUSH_ZERO_MASK
}
@@ -1461,7 +1461,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
_mm_getcsr() & _MM_ROUND_MASK
}
@@ -1469,7 +1469,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
_mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x)
}
@@ -1477,7 +1477,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
_mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x)
}
@@ -1485,7 +1485,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x;
// println!("setting csr={:x}", val);
@@ -1495,7 +1495,7 @@
/// See [`_mm_setcsr`](fn._mm_setcsr.html)
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
_mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | x)
}
@@ -1549,7 +1549,7 @@
/// resources (e.g., request buffers).
///
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(prefetcht0, strategy = _MM_HINT_T0))]
#[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))]
#[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))]
@@ -1574,7 +1574,7 @@
/// Return vector of type __m128 with undefined elements.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _mm_undefined_ps() -> f32x4 {
f32x4::splat(mem::uninitialized())
}
@@ -1582,7 +1582,7 @@
/// Transpose the 4x4 matrix formed by 4 rows of f32x4 in place.
#[inline(always)]
#[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
pub unsafe fn _MM_TRANSPOSE4_PS(
row0: &mut f32x4, row1: &mut f32x4, row2: &mut f32x4, row3: &mut f32x4
) {
@@ -1680,7 +1680,7 @@
/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
/// exception _may_ be generated.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movntps))]
pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: f32x4) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1689,7 +1689,7 @@
/// Store 64-bits of integer data from a into memory using a non-temporal
/// memory hint.
#[inline(always)]
-#[target_feature = "+sse,+mmx"]
+#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(movntq))]
pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
movntdq(mem_addr, a)
diff --git a/coresimd/src/x86/i586/sse2.rs b/coresimd/src/x86/i586/sse2.rs
index 44b76d6..d965360 100644
--- a/coresimd/src/x86/i586/sse2.rs
+++ b/coresimd/src/x86/i586/sse2.rs
@@ -16,7 +16,7 @@
/// This can help improve the performance and power consumption of spin-wait
/// loops.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pause))]
pub unsafe fn _mm_pause() {
pause()
@@ -25,7 +25,7 @@
/// Invalidate and flush the cache line that contains `p` from all levels of
/// the cache hierarchy.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(clflush))]
pub unsafe fn _mm_clflush(p: *mut u8) {
clflush(p)
@@ -38,7 +38,7 @@
/// globally visible before any load instruction which follows the fence in
/// program order.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(lfence))]
pub unsafe fn _mm_lfence() {
lfence()
@@ -51,7 +51,7 @@
/// memory fence instruction is globally visible before any memory instruction
/// which follows the fence in program order.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(mfence))]
pub unsafe fn _mm_mfence() {
mfence()
@@ -59,7 +59,7 @@
/// Add packed 8-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddb))]
pub unsafe fn _mm_add_epi8(a: i8x16, b: i8x16) -> i8x16 {
a + b
@@ -67,7 +67,7 @@
/// Add packed 16-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddw))]
pub unsafe fn _mm_add_epi16(a: i16x8, b: i16x8) -> i16x8 {
a + b
@@ -75,7 +75,7 @@
/// Add packed 32-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddd))]
pub unsafe fn _mm_add_epi32(a: i32x4, b: i32x4) -> i32x4 {
a + b
@@ -83,7 +83,7 @@
/// Add packed 64-bit integers in `a` and "b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddq))]
pub unsafe fn _mm_add_epi64(a: i64x2, b: i64x2) -> i64x2 {
a + b
@@ -91,7 +91,7 @@
/// Add packed 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddsb))]
pub unsafe fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 {
paddsb(a, b)
@@ -99,7 +99,7 @@
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddsw))]
pub unsafe fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
paddsw(a, b)
@@ -107,7 +107,7 @@
/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddusb))]
pub unsafe fn _mm_adds_epu8(a: u8x16, b: u8x16) -> u8x16 {
paddsub(a, b)
@@ -115,7 +115,7 @@
/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddusw))]
pub unsafe fn _mm_adds_epu16(a: u16x8, b: u16x8) -> u16x8 {
paddsuw(a, b)
@@ -123,7 +123,7 @@
/// Average packed unsigned 8-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _mm_avg_epu8(a: u8x16, b: u8x16) -> u8x16 {
pavgb(a, b)
@@ -131,7 +131,7 @@
/// Average packed unsigned 16-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _mm_avg_epu16(a: u16x8, b: u16x8) -> u16x8 {
pavgw(a, b)
@@ -143,7 +143,7 @@
/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
/// intermediate 32-bit integers.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmaddwd))]
pub unsafe fn _mm_madd_epi16(a: i16x8, b: i16x8) -> i32x4 {
pmaddwd(a, b)
@@ -152,7 +152,7 @@
/// Compare packed 16-bit integers in `a` and `b`, and return the packed
/// maximum values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _mm_max_epi16(a: i16x8, b: i16x8) -> i16x8 {
pmaxsw(a, b)
@@ -161,7 +161,7 @@
/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
/// packed maximum values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _mm_max_epu8(a: u8x16, b: u8x16) -> u8x16 {
pmaxub(a, b)
@@ -170,7 +170,7 @@
/// Compare packed 16-bit integers in `a` and `b`, and return the packed
/// minimum values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _mm_min_epi16(a: i16x8, b: i16x8) -> i16x8 {
pminsw(a, b)
@@ -179,7 +179,7 @@
/// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
/// packed minimum values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _mm_min_epu8(a: u8x16, b: u8x16) -> u8x16 {
pminub(a, b)
@@ -190,7 +190,7 @@
/// The multiplication produces intermediate 32-bit integers, and returns the
/// high 16 bits of the intermediate integers.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmulhw))]
pub unsafe fn _mm_mulhi_epi16(a: i16x8, b: i16x8) -> i16x8 {
pmulhw(a, b)
@@ -201,7 +201,7 @@
/// The multiplication produces intermediate 32-bit integers, and returns the
/// high 16 bits of the intermediate integers.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _mm_mulhi_epu16(a: u16x8, b: u16x8) -> u16x8 {
pmulhuw(a, b)
@@ -212,7 +212,7 @@
/// The multiplication produces intermediate 32-bit integers, and returns the
/// low 16 bits of the intermediate integers.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmullw))]
pub unsafe fn _mm_mullo_epi16(a: i16x8, b: i16x8) -> i16x8 {
a * b
@@ -223,7 +223,7 @@
///
/// Return the unsigned 64-bit results.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmuludq))]
pub unsafe fn _mm_mul_epu32(a: u32x4, b: u32x4) -> u64x2 {
pmuludq(a, b)
@@ -236,7 +236,7 @@
/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
/// the low 16 bits of 64-bit elements returned.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _mm_sad_epu8(a: u8x16, b: u8x16) -> u64x2 {
psadbw(a, b)
@@ -244,7 +244,7 @@
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubb))]
pub unsafe fn _mm_sub_epi8(a: i8x16, b: i8x16) -> i8x16 {
a - b
@@ -252,7 +252,7 @@
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubw))]
pub unsafe fn _mm_sub_epi16(a: i16x8, b: i16x8) -> i16x8 {
a - b
@@ -260,7 +260,7 @@
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubd))]
pub unsafe fn _mm_sub_epi32(a: i32x4, b: i32x4) -> i32x4 {
a - b
@@ -268,7 +268,7 @@
/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubq))]
pub unsafe fn _mm_sub_epi64(a: i64x2, b: i64x2) -> i64x2 {
a - b
@@ -277,7 +277,7 @@
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
/// using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubsb))]
pub unsafe fn _mm_subs_epi8(a: i8x16, b: i8x16) -> i8x16 {
psubsb(a, b)
@@ -286,7 +286,7 @@
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
/// using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubsw))]
pub unsafe fn _mm_subs_epi16(a: i16x8, b: i16x8) -> i16x8 {
psubsw(a, b)
@@ -295,7 +295,7 @@
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
/// integers in `a` using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubusb))]
pub unsafe fn _mm_subs_epu8(a: u8x16, b: u8x16) -> u8x16 {
psubusb(a, b)
@@ -304,7 +304,7 @@
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
/// integers in `a` using saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubusw))]
pub unsafe fn _mm_subs_epu16(a: u16x8, b: u16x8) -> u16x8 {
psubusw(a, b)
@@ -312,7 +312,7 @@
/// Shift `a` left by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
pub unsafe fn _mm_slli_si128(a: i8x16, imm8: i32) -> i8x16 {
let (zero, imm8) = (i8x16::splat(0), imm8 as u32);
@@ -353,7 +353,7 @@
/// Shift `a` left by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
pub unsafe fn _mm_bslli_si128(a: i8x16, imm8: i32) -> i8x16 {
_mm_slli_si128(a, imm8)
@@ -361,7 +361,7 @@
/// Shift `a` right by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
pub unsafe fn _mm_bsrli_si128(a: i8x16, imm8: i32) -> i8x16 {
_mm_srli_si128(a, imm8)
@@ -369,7 +369,7 @@
/// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psllw))]
pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8 {
pslliw(a, imm8)
@@ -378,7 +378,7 @@
/// Shift packed 16-bit integers in `a` left by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psllw))]
pub unsafe fn _mm_sll_epi16(a: i16x8, count: i16x8) -> i16x8 {
psllw(a, count)
@@ -386,7 +386,7 @@
/// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pslld))]
pub unsafe fn _mm_slli_epi32(a: i32x4, imm8: i32) -> i32x4 {
psllid(a, imm8)
@@ -395,7 +395,7 @@
/// Shift packed 32-bit integers in `a` left by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pslld))]
pub unsafe fn _mm_sll_epi32(a: i32x4, count: i32x4) -> i32x4 {
pslld(a, count)
@@ -403,7 +403,7 @@
/// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psllq))]
pub unsafe fn _mm_slli_epi64(a: i64x2, imm8: i32) -> i64x2 {
pslliq(a, imm8)
@@ -412,7 +412,7 @@
/// Shift packed 64-bit integers in `a` left by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psllq))]
pub unsafe fn _mm_sll_epi64(a: i64x2, count: i64x2) -> i64x2 {
psllq(a, count)
@@ -421,7 +421,7 @@
/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in sign
/// bits.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psraw))]
pub unsafe fn _mm_srai_epi16(a: i16x8, imm8: i32) -> i16x8 {
psraiw(a, imm8)
@@ -430,7 +430,7 @@
/// Shift packed 16-bit integers in `a` right by `count` while shifting in sign
/// bits.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psraw))]
pub unsafe fn _mm_sra_epi16(a: i16x8, count: i16x8) -> i16x8 {
psraw(a, count)
@@ -439,7 +439,7 @@
/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign
/// bits.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrad))]
pub unsafe fn _mm_srai_epi32(a: i32x4, imm8: i32) -> i32x4 {
psraid(a, imm8)
@@ -448,7 +448,7 @@
/// Shift packed 32-bit integers in `a` right by `count` while shifting in sign
/// bits.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrad))]
pub unsafe fn _mm_sra_epi32(a: i32x4, count: i32x4) -> i32x4 {
psrad(a, count)
@@ -456,7 +456,7 @@
/// Shift `a` right by `imm8` bytes while shifting in zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
pub unsafe fn _mm_srli_si128(a: i8x16, imm8: i32) -> i8x16 {
let (zero, imm8) = (i8x16::splat(0), imm8 as u32);
@@ -498,7 +498,7 @@
/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrlw))]
pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8 {
psrliw(a, imm8)
@@ -507,7 +507,7 @@
/// Shift packed 16-bit integers in `a` right by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrlw))]
pub unsafe fn _mm_srl_epi16(a: i16x8, count: i16x8) -> i16x8 {
psrlw(a, count)
@@ -516,7 +516,7 @@
/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrld))]
pub unsafe fn _mm_srli_epi32(a: i32x4, imm8: i32) -> i32x4 {
psrlid(a, imm8)
@@ -525,7 +525,7 @@
/// Shift packed 32-bit integers in `a` right by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrld))]
pub unsafe fn _mm_srl_epi32(a: i32x4, count: i32x4) -> i32x4 {
psrld(a, count)
@@ -534,7 +534,7 @@
/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrlq))]
pub unsafe fn _mm_srli_epi64(a: i64x2, imm8: i32) -> i64x2 {
psrliq(a, imm8)
@@ -543,7 +543,7 @@
/// Shift packed 64-bit integers in `a` right by `count` while shifting in
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrlq))]
pub unsafe fn _mm_srl_epi64(a: i64x2, count: i64x2) -> i64x2 {
psrlq(a, count)
@@ -552,7 +552,7 @@
/// Compute the bitwise AND of 128 bits (representing integer data) in `a` and
/// `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(andps))]
pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
__m128i::from(i8x16::from(a) & i8x16::from(b))
@@ -561,7 +561,7 @@
/// Compute the bitwise NOT of 128 bits (representing integer data) in `a` and
/// then AND with `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(andnps))]
pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
__m128i::from((!i8x16::from(a)) & i8x16::from(b))
@@ -570,7 +570,7 @@
/// Compute the bitwise OR of 128 bits (representing integer data) in `a` and
/// `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(orps))]
pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
__m128i::from(i8x16::from(a) | i8x16::from(b))
@@ -579,7 +579,7 @@
/// Compute the bitwise XOR of 128 bits (representing integer data) in `a` and
/// `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(xorps))]
pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
__m128i::from(i8x16::from(a) ^ i8x16::from(b))
@@ -587,7 +587,7 @@
/// Compare packed 8-bit integers in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpeqb))]
pub unsafe fn _mm_cmpeq_epi8(a: i8x16, b: i8x16) -> i8x16 {
a.eq(b)
@@ -595,7 +595,7 @@
/// Compare packed 16-bit integers in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpeqw))]
pub unsafe fn _mm_cmpeq_epi16(a: i16x8, b: i16x8) -> i16x8 {
a.eq(b)
@@ -603,7 +603,7 @@
/// Compare packed 32-bit integers in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpeqd))]
pub unsafe fn _mm_cmpeq_epi32(a: i32x4, b: i32x4) -> i32x4 {
a.eq(b)
@@ -611,7 +611,7 @@
/// Compare packed 8-bit integers in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpgtb))]
pub unsafe fn _mm_cmpgt_epi8(a: i8x16, b: i8x16) -> i8x16 {
a.gt(b)
@@ -619,7 +619,7 @@
/// Compare packed 16-bit integers in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpgtw))]
pub unsafe fn _mm_cmpgt_epi16(a: i16x8, b: i16x8) -> i16x8 {
a.gt(b)
@@ -627,7 +627,7 @@
/// Compare packed 32-bit integers in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpgtd))]
pub unsafe fn _mm_cmpgt_epi32(a: i32x4, b: i32x4) -> i32x4 {
a.gt(b)
@@ -635,7 +635,7 @@
/// Compare packed 8-bit integers in `a` and `b` for less-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpgtb))]
pub unsafe fn _mm_cmplt_epi8(a: i8x16, b: i8x16) -> i8x16 {
a.lt(b)
@@ -643,7 +643,7 @@
/// Compare packed 16-bit integers in `a` and `b` for less-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpgtw))]
pub unsafe fn _mm_cmplt_epi16(a: i16x8, b: i16x8) -> i16x8 {
a.lt(b)
@@ -651,7 +651,7 @@
/// Compare packed 32-bit integers in `a` and `b` for less-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pcmpgtd))]
pub unsafe fn _mm_cmplt_epi32(a: i32x4, b: i32x4) -> i32x4 {
a.lt(b)
@@ -660,7 +660,7 @@
/// Convert the lower two packed 32-bit integers in `a` to packed
/// double-precision (64-bit) floating-point elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtdq2pd))]
pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2 {
simd_cast::<i32x2, f64x2>(simd_shuffle2(a, a, [0, 1]))
@@ -669,7 +669,7 @@
/// Return `a` with its lower element replaced by `b` after converting it to
/// an `f64`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsi2sd))]
pub unsafe fn _mm_cvtsi32_sd(a: f64x2, b: i32) -> f64x2 {
a.replace(0, b as f64)
@@ -678,7 +678,7 @@
/// Convert packed 32-bit integers in `a` to packed single-precision (32-bit)
/// floating-point elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtdq2ps))]
pub unsafe fn _mm_cvtepi32_ps(a: i32x4) -> f32x4 {
cvtdq2ps(a)
@@ -687,7 +687,7 @@
/// Convert packed single-precision (32-bit) floating-point elements in `a`
/// to packed 32-bit integers.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtps2dq))]
pub unsafe fn _mm_cvtps_epi32(a: f32x4) -> i32x4 {
cvtps2dq(a)
@@ -696,7 +696,7 @@
/// Return a vector whose lowest element is `a` and all higher elements are
/// `0`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
pub unsafe fn _mm_cvtsi32_si128(a: i32) -> i32x4 {
i32x4::new(a, 0, 0, 0)
@@ -704,7 +704,7 @@
/// Return the lowest element of `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movd))] // FIXME mov on windows
pub unsafe fn _mm_cvtsi128_si32(a: i32x4) -> i32 {
a.extract(0)
@@ -713,7 +713,7 @@
/// Set packed 64-bit integers with the supplied values, from highest to
/// lowest.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> i64x2 {
i64x2::new(e0, e1)
@@ -721,7 +721,7 @@
/// Set packed 32-bit integers with the supplied values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
i32x4::new(e0, e1, e2, e3)
@@ -729,7 +729,7 @@
/// Set packed 16-bit integers with the supplied values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set_epi16(
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
@@ -739,7 +739,7 @@
/// Set packed 8-bit integers with the supplied values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set_epi8(
e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
@@ -753,7 +753,7 @@
/// Broadcast 64-bit integer `a` to all elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi64x(a: i64) -> i64x2 {
i64x2::splat(a)
@@ -761,7 +761,7 @@
/// Broadcast 32-bit integer `a` to all elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi32(a: i32) -> i32x4 {
i32x4::splat(a)
@@ -769,7 +769,7 @@
/// Broadcast 16-bit integer `a` to all elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi16(a: i16) -> i16x8 {
i16x8::splat(a)
@@ -777,7 +777,7 @@
/// Broadcast 8-bit integer `a` to all elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi8(a: i8) -> i8x16 {
i8x16::splat(a)
@@ -785,7 +785,7 @@
/// Set packed 32-bit integers with the supplied values in reverse order.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
i32x4::new(e3, e2, e1, e0)
@@ -793,7 +793,7 @@
/// Set packed 16-bit integers with the supplied values in reverse order.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_setr_epi16(
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
@@ -803,7 +803,7 @@
/// Set packed 8-bit integers with the supplied values in reverse order.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_setr_epi8(
e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
@@ -817,7 +817,7 @@
/// Returns a vector with all elements set to zero.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(xorps))]
pub unsafe fn _mm_setzero_si128() -> __m128i {
mem::transmute((0_i64, 0_i64))
@@ -825,7 +825,7 @@
/// Load 64-bit integer from memory into first element of returned vector.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// FIXME movsd on windows
#[cfg_attr(all(test, not(windows),
not(all(target_os = "linux", target_arch = "x86_64")),
@@ -839,7 +839,7 @@
///
/// `mem_addr` must be aligned on a 16-byte boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
*mem_addr
@@ -849,7 +849,7 @@
///
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movups))]
pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
let mut dst: __m128i = _mm_undefined_si128();
@@ -870,7 +870,7 @@
/// `mem_addr` should correspond to a 128-bit memory location and does not need
/// to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(maskmovdqu))]
pub unsafe fn _mm_maskmoveu_si128(a: i8x16, mask: i8x16, mem_addr: *mut i8) {
maskmovdqu(a, mask, mem_addr)
@@ -880,7 +880,7 @@
///
/// `mem_addr` must be aligned on a 16-byte boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
*mem_addr = a;
@@ -890,7 +890,7 @@
///
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
storeudq(mem_addr as *mut i8, a);
@@ -900,7 +900,7 @@
///
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// FIXME mov on windows, movlps on i686
#[cfg_attr(all(test, not(windows),
not(all(target_os = "linux", target_arch = "x86_64")),
@@ -918,7 +918,7 @@
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
::core::intrinsics::nontemporal_store(mem_addr, a);
@@ -928,7 +928,7 @@
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movnti))]
pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
::core::intrinsics::nontemporal_store(mem_addr, a);
@@ -937,7 +937,7 @@
/// Return a vector where the low element is extracted from `a` and its upper
/// element is zero.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// FIXME movd on windows, movd on i686
#[cfg_attr(all(test, not(windows), target_arch = "x86_64"),
assert_instr(movq))]
@@ -948,7 +948,7 @@
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using signed saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(packsswb))]
pub unsafe fn _mm_packs_epi16(a: i16x8, b: i16x8) -> i8x16 {
packsswb(a, b)
@@ -957,7 +957,7 @@
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using signed saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(packssdw))]
pub unsafe fn _mm_packs_epi32(a: i32x4, b: i32x4) -> i16x8 {
packssdw(a, b)
@@ -966,7 +966,7 @@
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using unsigned saturation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(packuswb))]
pub unsafe fn _mm_packus_epi16(a: i16x8, b: i16x8) -> u8x16 {
packuswb(a, b)
@@ -974,7 +974,7 @@
/// Return the `imm8` element of `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pextrw, imm8 = 9))]
pub unsafe fn _mm_extract_epi16(a: i16x8, imm8: i32) -> i32 {
let imm8 = (imm8 & 7) as u32;
@@ -983,7 +983,7 @@
/// Return a new vector where the `imm8` element of `a` is replaced with `i`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))]
pub unsafe fn _mm_insert_epi16(a: i16x8, i: i32, imm8: i32) -> i16x8 {
a.replace(imm8 as u32 & 0b111, i as i16)
@@ -991,7 +991,7 @@
/// Return a mask of the most significant bit of each element in `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _mm_movemask_epi8(a: i8x16) -> i32 {
pmovmskb(a)
@@ -999,7 +999,7 @@
/// Shuffle 32-bit integers in `a` using the control in `imm8`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pshufd, imm8 = 9))]
pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 {
// simd_shuffleX requires that its selector parameter be made up of
@@ -1060,7 +1060,7 @@
/// Put the results in the high 64 bits of the returned vector, with the low 64
/// bits being copied from from `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))]
pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 {
// See _mm_shuffle_epi32.
@@ -1116,7 +1116,7 @@
/// Put the results in the low 64 bits of the returned vector, with the high 64
/// bits being copied from from `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))]
pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 {
// See _mm_shuffle_epi32.
@@ -1167,7 +1167,7 @@
/// Unpack and interleave 8-bit integers from the high half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckhbw))]
pub unsafe fn _mm_unpackhi_epi8(a: i8x16, b: i8x16) -> i8x16 {
simd_shuffle16(
@@ -1179,7 +1179,7 @@
/// Unpack and interleave 16-bit integers from the high half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckhwd))]
pub unsafe fn _mm_unpackhi_epi16(a: i16x8, b: i16x8) -> i16x8 {
simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
@@ -1187,7 +1187,7 @@
/// Unpack and interleave 32-bit integers from the high half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckhdq))]
pub unsafe fn _mm_unpackhi_epi32(a: i32x4, b: i32x4) -> i32x4 {
simd_shuffle4(a, b, [2, 6, 3, 7])
@@ -1195,7 +1195,7 @@
/// Unpack and interleave 64-bit integers from the high half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckhqdq))]
pub unsafe fn _mm_unpackhi_epi64(a: i64x2, b: i64x2) -> i64x2 {
simd_shuffle2(a, b, [1, 3])
@@ -1203,7 +1203,7 @@
/// Unpack and interleave 8-bit integers from the low half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpcklbw))]
pub unsafe fn _mm_unpacklo_epi8(a: i8x16, b: i8x16) -> i8x16 {
simd_shuffle16(
@@ -1215,7 +1215,7 @@
/// Unpack and interleave 16-bit integers from the low half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpcklwd))]
pub unsafe fn _mm_unpacklo_epi16(a: i16x8, b: i16x8) -> i16x8 {
simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
@@ -1223,7 +1223,7 @@
/// Unpack and interleave 32-bit integers from the low half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpckldq))]
pub unsafe fn _mm_unpacklo_epi32(a: i32x4, b: i32x4) -> i32x4 {
simd_shuffle4(a, b, [0, 4, 1, 5])
@@ -1231,7 +1231,7 @@
/// Unpack and interleave 64-bit integers from the low half of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(punpcklqdq))]
pub unsafe fn _mm_unpacklo_epi64(a: i64x2, b: i64x2) -> i64x2 {
simd_shuffle2(a, b, [0, 2])
@@ -1240,7 +1240,7 @@
/// Return a new vector with the low element of `a` replaced by the sum of the
/// low elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(addsd))]
pub unsafe fn _mm_add_sd(a: f64x2, b: f64x2) -> f64x2 {
a.replace(0, a.extract(0) + b.extract(0))
@@ -1249,7 +1249,7 @@
/// Add packed double-precision (64-bit) floating-point elements in `a` and
/// `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(addpd))]
pub unsafe fn _mm_add_pd(a: f64x2, b: f64x2) -> f64x2 {
a + b
@@ -1258,7 +1258,7 @@
/// Return a new vector with the low element of `a` replaced by the result of
/// diving the lower element of `a` by the lower element of `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(divsd))]
pub unsafe fn _mm_div_sd(a: f64x2, b: f64x2) -> f64x2 {
a.replace(0, a.extract(0) / b.extract(0))
@@ -1267,7 +1267,7 @@
/// Divide packed double-precision (64-bit) floating-point elements in `a` by
/// packed elements in `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(divpd))]
pub unsafe fn _mm_div_pd(a: f64x2, b: f64x2) -> f64x2 {
a / b
@@ -1276,7 +1276,7 @@
/// Return a new vector with the low element of `a` replaced by the maximum
/// of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(maxsd))]
pub unsafe fn _mm_max_sd(a: f64x2, b: f64x2) -> f64x2 {
maxsd(a, b)
@@ -1285,7 +1285,7 @@
/// Return a new vector with the maximum values from corresponding elements in
/// `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(maxpd))]
pub unsafe fn _mm_max_pd(a: f64x2, b: f64x2) -> f64x2 {
maxpd(a, b)
@@ -1294,7 +1294,7 @@
/// Return a new vector with the low element of `a` replaced by the minimum
/// of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(minsd))]
pub unsafe fn _mm_min_sd(a: f64x2, b: f64x2) -> f64x2 {
minsd(a, b)
@@ -1303,7 +1303,7 @@
/// Return a new vector with the minimum values from corresponding elements in
/// `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(minpd))]
pub unsafe fn _mm_min_pd(a: f64x2, b: f64x2) -> f64x2 {
minpd(a, b)
@@ -1312,7 +1312,7 @@
/// Return a new vector with the low element of `a` replaced by multiplying the
/// low elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(mulsd))]
pub unsafe fn _mm_mul_sd(a: f64x2, b: f64x2) -> f64x2 {
a.replace(0, a.extract(0) * b.extract(0))
@@ -1321,7 +1321,7 @@
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(mulpd))]
pub unsafe fn _mm_mul_pd(a: f64x2, b: f64x2) -> f64x2 {
a * b
@@ -1330,7 +1330,7 @@
/// Return a new vector with the low element of `a` replaced by the square
/// root of the lower element `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(sqrtsd))]
pub unsafe fn _mm_sqrt_sd(a: f64x2, b: f64x2) -> f64x2 {
a.replace(0, sqrtsd(b).extract(0))
@@ -1338,7 +1338,7 @@
/// Return a new vector with the square root of each of the values in `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(sqrtpd))]
pub unsafe fn _mm_sqrt_pd(a: f64x2) -> f64x2 {
sqrtpd(a)
@@ -1347,7 +1347,7 @@
/// Return a new vector with the low element of `a` replaced by subtracting the
/// low element by `b` from the low element of `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(subsd))]
pub unsafe fn _mm_sub_sd(a: f64x2, b: f64x2) -> f64x2 {
a.replace(0, a.extract(0) - b.extract(0))
@@ -1356,7 +1356,7 @@
/// Subtract packed double-precision (64-bit) floating-point elements in `b`
/// from `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(subpd))]
pub unsafe fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 {
a - b
@@ -1365,7 +1365,7 @@
/// Compute the bitwise AND of packed double-precision (64-bit) floating-point
/// elements in `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(andps))]
pub unsafe fn _mm_and_pd(a: f64x2, b: f64x2) -> f64x2 {
let a: u64x2 = mem::transmute(a);
@@ -1375,7 +1375,7 @@
/// Compute the bitwise NOT of `a` and then AND with `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(andnps))]
pub unsafe fn _mm_andnot_pd(a: f64x2, b: f64x2) -> f64x2 {
let a: u64x2 = mem::transmute(a);
@@ -1385,7 +1385,7 @@
/// Compute the bitwise OR of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(orps))]
pub unsafe fn _mm_or_pd(a: f64x2, b: f64x2) -> f64x2 {
let a: u64x2 = mem::transmute(a);
@@ -1395,7 +1395,7 @@
/// Compute the bitwise OR of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(xorps))]
pub unsafe fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 {
let a: u64x2 = mem::transmute(a);
@@ -1406,7 +1406,7 @@
/// Return a new vector with the low element of `a` replaced by the equality
/// comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpeqsd))]
pub unsafe fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 0)
@@ -1415,7 +1415,7 @@
/// Return a new vector with the low element of `a` replaced by the less-than
/// comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpltsd))]
pub unsafe fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 1)
@@ -1424,7 +1424,7 @@
/// Return a new vector with the low element of `a` replaced by the
/// less-than-or-equal comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmplesd))]
pub unsafe fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 2)
@@ -1433,7 +1433,7 @@
/// Return a new vector with the low element of `a` replaced by the
/// greater-than comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpltsd))]
pub unsafe fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmplt_sd(b, a).replace(1, a.extract(1))
@@ -1442,7 +1442,7 @@
/// Return a new vector with the low element of `a` replaced by the
/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmplesd))]
pub unsafe fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmple_sd(b, a).replace(1, a.extract(1))
@@ -1453,7 +1453,7 @@
/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpordsd))]
pub unsafe fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 7)
@@ -1463,7 +1463,7 @@
/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpunordsd))]
pub unsafe fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 3)
@@ -1472,7 +1472,7 @@
/// Return a new vector with the low element of `a` replaced by the not-equal
/// comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpneqsd))]
pub unsafe fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 4)
@@ -1481,7 +1481,7 @@
/// Return a new vector with the low element of `a` replaced by the
/// not-less-than comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnltsd))]
pub unsafe fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 5)
@@ -1490,7 +1490,7 @@
/// Return a new vector with the low element of `a` replaced by the
/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnlesd))]
pub unsafe fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 {
cmpsd(a, b, 6)
@@ -1499,7 +1499,7 @@
/// Return a new vector with the low element of `a` replaced by the
/// not-greater-than comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnltsd))]
pub unsafe fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmpnlt_sd(b, a).replace(1, a.extract(1))
@@ -1508,7 +1508,7 @@
/// Return a new vector with the low element of `a` replaced by the
/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnlesd))]
pub unsafe fn _mm_cmpnge_sd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmpnle_sd(b, a).replace(1, a.extract(1))
@@ -1516,7 +1516,7 @@
/// Compare corresponding elements in `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpeqpd))]
pub unsafe fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 0)
@@ -1524,7 +1524,7 @@
/// Compare corresponding elements in `a` and `b` for less-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpltpd))]
pub unsafe fn _mm_cmplt_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 1)
@@ -1532,7 +1532,7 @@
/// Compare corresponding elements in `a` and `b` for less-than-or-equal
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmplepd))]
pub unsafe fn _mm_cmple_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 2)
@@ -1540,7 +1540,7 @@
/// Compare corresponding elements in `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpltpd))]
pub unsafe fn _mm_cmpgt_pd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmplt_pd(b, a)
@@ -1548,7 +1548,7 @@
/// Compare corresponding elements in `a` and `b` for greater-than-or-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmplepd))]
pub unsafe fn _mm_cmpge_pd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmple_pd(b, a)
@@ -1556,7 +1556,7 @@
/// Compare corresponding elements in `a` and `b` to see if neither is `NaN`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpordpd))]
pub unsafe fn _mm_cmpord_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 7)
@@ -1564,7 +1564,7 @@
/// Compare corresponding elements in `a` and `b` to see if either is `NaN`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpunordpd))]
pub unsafe fn _mm_cmpunord_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 3)
@@ -1572,7 +1572,7 @@
/// Compare corresponding elements in `a` and `b` for not-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpneqpd))]
pub unsafe fn _mm_cmpneq_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 4)
@@ -1580,7 +1580,7 @@
/// Compare corresponding elements in `a` and `b` for not-less-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnltpd))]
pub unsafe fn _mm_cmpnlt_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 5)
@@ -1588,7 +1588,7 @@
/// Compare corresponding elements in `a` and `b` for not-less-than-or-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnlepd))]
pub unsafe fn _mm_cmpnle_pd(a: f64x2, b: f64x2) -> f64x2 {
cmppd(a, b, 6)
@@ -1596,7 +1596,7 @@
/// Compare corresponding elements in `a` and `b` for not-greater-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnltpd))]
pub unsafe fn _mm_cmpngt_pd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmpnlt_pd(b, a)
@@ -1605,7 +1605,7 @@
/// Compare corresponding elements in `a` and `b` for
/// not-greater-than-or-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cmpnlepd))]
pub unsafe fn _mm_cmpnge_pd(a: f64x2, b: f64x2) -> f64x2 {
_mm_cmpnle_pd(b, a)
@@ -1613,7 +1613,7 @@
/// Compare the lower element of `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(comisd))]
pub unsafe fn _mm_comieq_sd(a: f64x2, b: f64x2) -> bool {
comieqsd(a, b) as u8 != 0
@@ -1621,7 +1621,7 @@
/// Compare the lower element of `a` and `b` for less-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(comisd))]
pub unsafe fn _mm_comilt_sd(a: f64x2, b: f64x2) -> bool {
comiltsd(a, b) as u8 != 0
@@ -1629,7 +1629,7 @@
/// Compare the lower element of `a` and `b` for less-than-or-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(comisd))]
pub unsafe fn _mm_comile_sd(a: f64x2, b: f64x2) -> bool {
comilesd(a, b) as u8 != 0
@@ -1637,7 +1637,7 @@
/// Compare the lower element of `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(comisd))]
pub unsafe fn _mm_comigt_sd(a: f64x2, b: f64x2) -> bool {
comigtsd(a, b) as u8 != 0
@@ -1645,7 +1645,7 @@
/// Compare the lower element of `a` and `b` for greater-than-or-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(comisd))]
pub unsafe fn _mm_comige_sd(a: f64x2, b: f64x2) -> bool {
comigesd(a, b) as u8 != 0
@@ -1653,7 +1653,7 @@
/// Compare the lower element of `a` and `b` for not-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(comisd))]
pub unsafe fn _mm_comineq_sd(a: f64x2, b: f64x2) -> bool {
comineqsd(a, b) as u8 != 0
@@ -1661,7 +1661,7 @@
/// Compare the lower element of `a` and `b` for equality.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(ucomisd))]
pub unsafe fn _mm_ucomieq_sd(a: f64x2, b: f64x2) -> bool {
ucomieqsd(a, b) as u8 != 0
@@ -1669,7 +1669,7 @@
/// Compare the lower element of `a` and `b` for less-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(ucomisd))]
pub unsafe fn _mm_ucomilt_sd(a: f64x2, b: f64x2) -> bool {
ucomiltsd(a, b) as u8 != 0
@@ -1677,7 +1677,7 @@
/// Compare the lower element of `a` and `b` for less-than-or-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(ucomisd))]
pub unsafe fn _mm_ucomile_sd(a: f64x2, b: f64x2) -> bool {
ucomilesd(a, b) as u8 != 0
@@ -1685,7 +1685,7 @@
/// Compare the lower element of `a` and `b` for greater-than.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(ucomisd))]
pub unsafe fn _mm_ucomigt_sd(a: f64x2, b: f64x2) -> bool {
ucomigtsd(a, b) as u8 != 0
@@ -1693,7 +1693,7 @@
/// Compare the lower element of `a` and `b` for greater-than-or-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(ucomisd))]
pub unsafe fn _mm_ucomige_sd(a: f64x2, b: f64x2) -> bool {
ucomigesd(a, b) as u8 != 0
@@ -1701,7 +1701,7 @@
/// Compare the lower element of `a` and `b` for not-equal.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(ucomisd))]
pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool {
ucomineqsd(a, b) as u8 != 0
@@ -1710,7 +1710,7 @@
/// Convert packed double-precision (64-bit) floating-point elements in "a" to
/// packed single-precision (32-bit) floating-point elements
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtpd2ps))]
pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 {
cvtpd2ps(a)
@@ -1720,7 +1720,7 @@
/// packed
/// double-precision (64-bit) floating-point elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtps2pd))]
pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 {
cvtps2pd(a)
@@ -1729,7 +1729,7 @@
/// Convert packed double-precision (64-bit) floating-point elements in `a` to
/// packed 32-bit integers.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtpd2dq))]
pub unsafe fn _mm_cvtpd_epi32(a: f64x2) -> i32x4 {
cvtpd2dq(a)
@@ -1738,7 +1738,7 @@
/// Convert the lower double-precision (64-bit) floating-point element in a to
/// a 32-bit integer.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsd2si))]
pub unsafe fn _mm_cvtsd_si32(a: f64x2) -> i32 {
cvtsd2si(a)
@@ -1749,7 +1749,7 @@
/// the lower element of the return value, and copy the upper element from `a`
/// to the upper element the return value.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsd2ss))]
pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 {
cvtsd2ss(a, b)
@@ -1757,7 +1757,7 @@
/// Return the lower double-precision (64-bit) floating-point element of "a".
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, windows), assert_instr(movsd))] // FIXME movq/movlps/mov on other platform
pub unsafe fn _mm_cvtsd_f64(a: f64x2) -> f64 {
a.extract(0)
@@ -1768,7 +1768,7 @@
/// the lower element of the return value, and copy the upper element from `a`
/// to the upper element the return value.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtss2sd))]
pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4) -> f64x2 {
cvtss2sd(a, b)
@@ -1777,7 +1777,7 @@
/// Convert packed double-precision (64-bit) floating-point elements in `a` to
/// packed 32-bit integers with truncation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttpd2dq))]
pub unsafe fn _mm_cvttpd_epi32(a: f64x2) -> i32x4 {
cvttpd2dq(a)
@@ -1786,7 +1786,7 @@
/// Convert the lower double-precision (64-bit) floating-point element in `a`
/// to a 32-bit integer with truncation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttsd2si))]
pub unsafe fn _mm_cvttsd_si32(a: f64x2) -> i32 {
cvttsd2si(a)
@@ -1795,7 +1795,7 @@
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 32-bit integers with truncation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttps2dq))]
pub unsafe fn _mm_cvttps_epi32(a: f32x4) -> i32x4 {
cvttps2dq(a)
@@ -1804,15 +1804,16 @@
/// Copy double-precision (64-bit) floating-point element `a` to the lower
/// element of the packed 64-bit return value.
#[inline(always)]
-#[target_feature = "+sse2"]
+
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_set_sd(a: f64) -> f64x2 {
- f64x2::new(a, 0_f64)
+ f64x2::new(a, 0f64)
}
/// Broadcast double-precision (64-bit) floating-point value a to all elements
/// of the return value.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 {
f64x2::new(a, a)
}
@@ -1820,7 +1821,7 @@
/// Broadcast double-precision (64-bit) floating-point value a to all elements
/// of the return value.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 {
f64x2::new(a, a)
}
@@ -1828,7 +1829,7 @@
/// Set packed double-precision (64-bit) floating-point elements in the return
/// value with the supplied values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 {
f64x2::new(b, a)
}
@@ -1836,7 +1837,7 @@
/// Set packed double-precision (64-bit) floating-point elements in the return
/// value with the supplied values in reverse order.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 {
f64x2::new(a, b)
}
@@ -1844,7 +1845,7 @@
/// Returns packed double-precision (64-bit) floating-point elements with all
/// zeros.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
pub unsafe fn _mm_setzero_pd() -> f64x2 {
f64x2::splat(0_f64)
@@ -1855,7 +1856,7 @@
/// The mask is stored in the 2 least significant bits of the return value.
/// All other bits are set to `0`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movmskpd))]
pub unsafe fn _mm_movemask_pd(a: f64x2) -> i32 {
movmskpd(a)
@@ -1866,7 +1867,7 @@
/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
/// exception may be generated.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
*(mem_addr as *const f64x2)
@@ -1875,7 +1876,7 @@
/// Loads a 64-bit double-precision value to the low element of a
/// 128-bit integer vector and clears the upper element.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movsd))]
pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> f64x2 {
f64x2::new(*mem_addr, 0.)
@@ -1885,7 +1886,7 @@
/// vector of [2 x double]. The low-order bits are copied from the low-order
/// bits of the first operand.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movhpd))]
pub unsafe fn _mm_loadh_pd(a: f64x2, mem_addr: *const f64) -> f64x2 {
f64x2::new(a.extract(0), *mem_addr)
@@ -1895,7 +1896,7 @@
/// vector of [2 x double]. The high-order bits are copied from the
/// high-order bits of the first operand.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movlpd))]
pub unsafe fn _mm_loadl_pd(a: f64x2, mem_addr: *const f64) -> f64x2 {
f64x2::new(*mem_addr, a.extract(1))
@@ -1906,7 +1907,7 @@
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: f64x2) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1915,7 +1916,7 @@
/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
/// memory location.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movsd only on windows
pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: f64x2) {
*mem_addr = a.extract(0)
@@ -1925,7 +1926,7 @@
/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
/// on a 16-byte boundary or a general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movaps))]
pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: f64x2) {
*(mem_addr as *mut f64x2) = a;
@@ -1935,7 +1936,7 @@
/// floating-point elements) from `a` into memory.
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: f64x2) {
storeupd(mem_addr as *mut i8, a);
@@ -1945,7 +1946,7 @@
/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
/// 16-byte boundary or a general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) {
let b: f64x2 = simd_shuffle2(a, a, [0, 0]);
*(mem_addr as *mut f64x2) = b;
@@ -1955,7 +1956,7 @@
/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
/// 16-byte boundary or a general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) {
let b: f64x2 = simd_shuffle2(a, a, [0, 0]);
*(mem_addr as *mut f64x2) = b;
@@ -1966,7 +1967,7 @@
/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
/// exception may be generated.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) {
let b: f64x2 = simd_shuffle2(a, a, [1, 0]);
*(mem_addr as *mut f64x2) = b;
@@ -1975,7 +1976,7 @@
/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
/// memory location.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movhpd))]
pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: f64x2) {
*mem_addr = a.extract(1)
@@ -1984,7 +1985,7 @@
/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
/// memory location.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movlpd (movsd on windows)
pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: f64x2) {
*mem_addr = a.extract(0)
@@ -1993,7 +1994,7 @@
/// Load a double-precision (64-bit) floating-point element from memory
/// into both elements of returned vector.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
//#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected
pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> f64x2 {
let d = *mem_addr;
@@ -2003,7 +2004,7 @@
/// Load a double-precision (64-bit) floating-point element from memory
/// into both elements of returned vector.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
//#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected
pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> f64x2 {
let d = *mem_addr;
@@ -2014,7 +2015,7 @@
/// the returned vector in reverse order. `mem_addr` must be aligned on a
/// 16-byte boundary or a general-protection exception may be generated.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movapd))]
pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> f64x2 {
let a = _mm_load_pd(mem_addr);
@@ -2025,7 +2026,7 @@
/// floating-point elements) from memory into the returned vector.
/// `mem_addr` does not need to be aligned on any particular boundary.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movups))]
pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 {
let mut dst = _mm_undefined_pd();
@@ -2041,7 +2042,7 @@
/// 128-bit vector parameters of [2 x double], using the immediate-value
/// parameter as a specifier.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(shufpd, imm8 = 1))]
pub unsafe fn _mm_shuffle_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
match imm8 & 0b11 {
@@ -2056,7 +2057,7 @@
/// 64 bits are set to the lower 64 bits of the second parameter. The upper
/// 64 bits are set to the upper 64 bits of the first parameter.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movsd))]
pub unsafe fn _mm_move_sd(a: f64x2, b: f64x2) -> f64x2 {
f64x2::new(b.extract(0), a.extract(1))
@@ -2065,7 +2066,7 @@
/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
/// floating-point vector of [4 x float].
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_castpd_ps(a: f64x2) -> f32x4 {
mem::transmute(a)
}
@@ -2073,7 +2074,7 @@
/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
/// integer vector.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_castpd_si128(a: f64x2) -> __m128i {
simd_cast(a)
}
@@ -2081,7 +2082,7 @@
/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
/// floating-point vector of [2 x double].
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_castps_pd(a: f32x4) -> f64x2 {
mem::transmute(a)
}
@@ -2089,7 +2090,7 @@
/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
/// integer vector.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_castps_si128(a: f32x4) -> __m128i {
mem::transmute(a)
}
@@ -2097,7 +2098,7 @@
/// Casts a 128-bit integer vector into a 128-bit floating-point vector
/// of [2 x double].
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_castsi128_pd(a: __m128i) -> f64x2 {
simd_cast(a)
}
@@ -2105,21 +2106,21 @@
/// Casts a 128-bit integer vector into a 128-bit floating-point vector
/// of [4 x float].
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_castsi128_ps(a: __m128i) -> f32x4 {
mem::transmute(a)
}
/// Return vector of type __m128d with undefined elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_undefined_pd() -> f64x2 {
f64x2::splat(mem::uninitialized())
}
/// Return vector of type __m128i with undefined elements.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
pub unsafe fn _mm_undefined_si128() -> __m128i {
mem::transmute(i32x4::splat(mem::uninitialized()))
}
@@ -2130,7 +2131,7 @@
/// * The [127:64] bits are copied from the [127:64] bits of the second input
/// * The [63:0] bits are copied from the [127:64] bits of the first input
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(unpckhpd))]
pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 {
simd_shuffle2(a, b, [1, 3])
@@ -2142,7 +2143,7 @@
/// * The [127:64] bits are copied from the [63:0] bits of the second input
/// * The [63:0] bits are copied from the [63:0] bits of the first input
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(unpcklpd))]
pub unsafe fn _mm_unpacklo_pd(a: f64x2, b: f64x2) -> f64x2 {
simd_shuffle2(a, b, [0, 2])
diff --git a/coresimd/src/x86/i586/sse3.rs b/coresimd/src/x86/i586/sse3.rs
index c582bdb..1a4e63d 100644
--- a/coresimd/src/x86/i586/sse3.rs
+++ b/coresimd/src/x86/i586/sse3.rs
@@ -9,7 +9,7 @@
/// Alternatively add and subtract packed single-precision (32-bit)
/// floating-point elements in `a` to/from packed elements in `b`.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(addsubps))]
pub unsafe fn _mm_addsub_ps(a: f32x4, b: f32x4) -> f32x4 {
addsubps(a, b)
@@ -18,7 +18,7 @@
/// Alternatively add and subtract packed double-precision (64-bit)
/// floating-point elements in `a` to/from packed elements in `b`.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(addsubpd))]
pub unsafe fn _mm_addsub_pd(a: f64x2, b: f64x2) -> f64x2 {
addsubpd(a, b)
@@ -27,7 +27,7 @@
/// Horizontally add adjacent pairs of double-precision (64-bit)
/// floating-point elements in `a` and `b`, and pack the results.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(haddpd))]
pub unsafe fn _mm_hadd_pd(a: f64x2, b: f64x2) -> f64x2 {
haddpd(a, b)
@@ -36,7 +36,7 @@
/// Horizontally add adjacent pairs of single-precision (32-bit)
/// floating-point elements in `a` and `b`, and pack the results.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(haddps))]
pub unsafe fn _mm_hadd_ps(a: f32x4, b: f32x4) -> f32x4 {
haddps(a, b)
@@ -45,7 +45,7 @@
/// Horizontally subtract adjacent pairs of double-precision (64-bit)
/// floating-point elements in `a` and `b`, and pack the results.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(hsubpd))]
pub unsafe fn _mm_hsub_pd(a: f64x2, b: f64x2) -> f64x2 {
hsubpd(a, b)
@@ -54,7 +54,7 @@
/// Horizontally add adjacent pairs of single-precision (32-bit)
/// floating-point elements in `a` and `b`, and pack the results.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(hsubps))]
pub unsafe fn _mm_hsub_ps(a: f32x4, b: f32x4) -> f32x4 {
hsubps(a, b)
@@ -64,7 +64,7 @@
/// This intrinsic may perform better than `_mm_loadu_si128`
/// when the data crosses a cache line boundary.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(lddqu))]
pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
__m128i::from(lddqu(mem_addr as *const _))
@@ -73,7 +73,7 @@
/// Duplicate the low double-precision (64-bit) floating-point element
/// from `a`.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movddup))]
pub unsafe fn _mm_movedup_pd(a: f64x2) -> f64x2 {
simd_shuffle2(a, a, [0, 0])
@@ -82,7 +82,7 @@
/// Load a double-precision (64-bit) floating-point element from memory
/// into both elements of return vector.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movddup))]
pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> f64x2 {
use x86::i586::sse2::_mm_load1_pd;
@@ -92,7 +92,7 @@
/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
/// from `a`.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movshdup))]
pub unsafe fn _mm_movehdup_ps(a: f32x4) -> f32x4 {
simd_shuffle4(a, a, [1, 1, 3, 3])
@@ -101,7 +101,7 @@
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
/// from `a`.
#[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movsldup))]
pub unsafe fn _mm_moveldup_ps(a: f32x4) -> f32x4 {
simd_shuffle4(a, a, [0, 0, 2, 2])
diff --git a/coresimd/src/x86/i586/sse41.rs b/coresimd/src/x86/i586/sse41.rs
index 60f972f..4f3c20e 100644
--- a/coresimd/src/x86/i586/sse41.rs
+++ b/coresimd/src/x86/i586/sse41.rs
@@ -47,7 +47,7 @@
/// If the high bit is set the element of `a` is selected. The element
/// of `b` is selected otherwise.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pblendvb))]
pub unsafe fn _mm_blendv_epi8(a: i8x16, b: i8x16, mask: i8x16) -> i8x16 {
pblendvb(a, b, mask)
@@ -59,7 +59,7 @@
/// corresponding element of `a`, and a set bit the corresponding
/// element of `b`.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
pub unsafe fn _mm_blend_epi16(a: i16x8, b: i16x8, imm8: i32) -> i16x8 {
macro_rules! call {
@@ -71,7 +71,7 @@
/// Blend packed double-precision (64-bit) floating-point elements from `a`
/// and `b` using `mask`
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendvpd))]
pub unsafe fn _mm_blendv_pd(a: f64x2, b: f64x2, mask: f64x2) -> f64x2 {
blendvpd(a, b, mask)
@@ -80,7 +80,7 @@
/// Blend packed single-precision (32-bit) floating-point elements from `a`
/// and `b` using `mask`
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendvps))]
pub unsafe fn _mm_blendv_ps(a: f32x4, b: f32x4, mask: f32x4) -> f32x4 {
blendvps(a, b, mask)
@@ -89,7 +89,7 @@
/// Blend packed double-precision (64-bit) floating-point elements from `a`
/// and `b` using control mask `imm2`
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
pub unsafe fn _mm_blend_pd(a: f64x2, b: f64x2, imm2: i32) -> f64x2 {
macro_rules! call {
@@ -101,7 +101,7 @@
/// Blend packed single-precision (32-bit) floating-point elements from `a`
/// and `b` using mask `imm4`
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
pub unsafe fn _mm_blend_ps(a: f32x4, b: f32x4, imm4: i32) -> f32x4 {
macro_rules! call {
@@ -113,7 +113,7 @@
/// Extract a single-precision (32-bit) floating-point element from `a`,
/// selected with `imm8`
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8 = 0))]
pub unsafe fn _mm_extract_ps(a: f32x4, imm8: i32) -> i32 {
@@ -125,7 +125,7 @@
///
/// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: i32) -> i32 {
let imm8 = (imm8 & 15) as u32;
@@ -134,7 +134,7 @@
/// Extract an 32-bit integer from `a` selected with `imm8`
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8 = 1))]
pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: i32) -> i32 {
@@ -165,7 +165,7 @@
/// * Bits `[3:0]`: If any of these bits are set, the corresponding result
/// element is cleared.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
pub unsafe fn _mm_insert_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
macro_rules! call {
@@ -177,7 +177,7 @@
/// Return a copy of `a` with the 8-bit integer from `i` inserted at a
/// location specified by `imm8`.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
pub unsafe fn _mm_insert_epi8(a: i8x16, i: i8, imm8: i32) -> i8x16 {
a.replace((imm8 & 0b1111) as u32, i)
@@ -186,7 +186,7 @@
/// Return a copy of `a` with the 32-bit integer from `i` inserted at a
/// location specified by `imm8`.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
pub unsafe fn _mm_insert_epi32(a: i32x4, i: i32, imm8: i32) -> i32x4 {
a.replace((imm8 & 0b11) as u32, i)
@@ -195,7 +195,7 @@
/// Compare packed 8-bit integers in `a` and `b` and return packed maximum
/// values in dst.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxsb))]
pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
pmaxsb(a, b)
@@ -204,7 +204,7 @@
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
/// maximum.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxuw))]
pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
pmaxuw(a, b)
@@ -213,7 +213,7 @@
/// Compare packed 32-bit integers in `a` and `b`, and return packed maximum
/// values.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxsd))]
pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
pmaxsd(a, b)
@@ -222,7 +222,7 @@
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
/// maximum values.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxud))]
pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
pmaxud(a, b)
@@ -231,7 +231,7 @@
/// Compare packed 8-bit integers in `a` and `b` and return packed minimum
/// values in dst.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminsb))]
pub unsafe fn _mm_min_epi8(a: i8x16, b: i8x16) -> i8x16 {
pminsb(a, b)
@@ -240,7 +240,7 @@
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
/// minimum.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminuw))]
pub unsafe fn _mm_min_epu16(a: u16x8, b: u16x8) -> u16x8 {
pminuw(a, b)
@@ -249,7 +249,7 @@
/// Compare packed 32-bit integers in `a` and `b`, and return packed minimum
/// values.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminsd))]
pub unsafe fn _mm_min_epi32(a: i32x4, b: i32x4) -> i32x4 {
pminsd(a, b)
@@ -258,7 +258,7 @@
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
/// minimum values.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminud))]
pub unsafe fn _mm_min_epu32(a: u32x4, b: u32x4) -> u32x4 {
pminud(a, b)
@@ -267,7 +267,7 @@
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using unsigned saturation
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(packusdw))]
pub unsafe fn _mm_packus_epi32(a: i32x4, b: i32x4) -> u16x8 {
packusdw(a, b)
@@ -275,7 +275,7 @@
/// Compare packed 64-bit integers in `a` and `b` for equality
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqq))]
pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
a.eq(b)
@@ -283,7 +283,7 @@
/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbw))]
pub unsafe fn _mm_cvtepi8_epi16(a: i8x16) -> i16x8 {
simd_shuffle8::<_, ::v64::i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]).as_i16x8()
@@ -291,7 +291,7 @@
/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbd))]
pub unsafe fn _mm_cvtepi8_epi32(a: i8x16) -> i32x4 {
simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -300,7 +300,7 @@
/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
/// 64-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbq))]
pub unsafe fn _mm_cvtepi8_epi64(a: i8x16) -> i64x2 {
simd_shuffle2::<_, ::v16::i8x2>(a, a, [0, 1]).as_i64x2()
@@ -308,7 +308,7 @@
/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxwd))]
pub unsafe fn _mm_cvtepi16_epi32(a: i16x8) -> i32x4 {
simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -316,7 +316,7 @@
/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxwq))]
pub unsafe fn _mm_cvtepi16_epi64(a: i16x8) -> i64x2 {
simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]).as_i64x2()
@@ -324,7 +324,7 @@
/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxdq))]
pub unsafe fn _mm_cvtepi32_epi64(a: i32x4) -> i64x2 {
simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]).as_i64x2()
@@ -332,7 +332,7 @@
/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbw))]
pub unsafe fn _mm_cvtepu8_epi16(a: u8x16) -> i16x8 {
simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]).as_i16x8()
@@ -340,7 +340,7 @@
/// Zero extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbd))]
pub unsafe fn _mm_cvtepu8_epi32(a: u8x16) -> i32x4 {
simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -348,7 +348,7 @@
/// Zero extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbq))]
pub unsafe fn _mm_cvtepu8_epi64(a: u8x16) -> i64x2 {
simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]).as_i64x2()
@@ -357,7 +357,7 @@
/// Zero extend packed unsigned 16-bit integers in `a`
/// to packed 32-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxwd))]
pub unsafe fn _mm_cvtepu16_epi32(a: u16x8) -> i32x4 {
simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -366,7 +366,7 @@
/// Zero extend packed unsigned 16-bit integers in `a`
/// to packed 64-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxwq))]
pub unsafe fn _mm_cvtepu16_epi64(a: u16x8) -> i64x2 {
simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]).as_i64x2()
@@ -375,7 +375,7 @@
/// Zero extend packed unsigned 32-bit integers in `a`
/// to packed 64-bit integers
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxdq))]
pub unsafe fn _mm_cvtepu32_epi64(a: u32x4) -> i64x2 {
simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]).as_i64x2()
@@ -389,7 +389,7 @@
/// the dot product will be stored in the return value component. Otherwise if
/// the broadcast mask bit is zero then the return component will be zero.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(dppd, imm8 = 0))]
pub unsafe fn _mm_dp_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
macro_rules! call {
@@ -406,7 +406,7 @@
/// the dot product will be stored in the return value component. Otherwise if
/// the broadcast mask bit is zero then the return component will be zero.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(dpps, imm8 = 0))]
pub unsafe fn _mm_dp_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
macro_rules! call {
@@ -419,7 +419,7 @@
/// down to an integer value, and store the results as packed double-precision
/// floating-point elements.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd))]
pub unsafe fn _mm_floor_pd(a: f64x2) -> f64x2 {
roundpd(a, _MM_FROUND_FLOOR)
@@ -429,7 +429,7 @@
/// down to an integer value, and store the results as packed single-precision
/// floating-point elements.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps))]
pub unsafe fn _mm_floor_ps(a: f32x4) -> f32x4 {
roundps(a, _MM_FROUND_FLOOR)
@@ -441,7 +441,7 @@
/// and copy the upper element from `a` to the upper element of the intrinsic
/// result.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd))]
pub unsafe fn _mm_floor_sd(a: f64x2, b: f64x2) -> f64x2 {
roundsd(a, b, _MM_FROUND_FLOOR)
@@ -453,7 +453,7 @@
/// and copy the upper 3 packed elements from `a` to the upper elements
/// of the intrinsic result.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss))]
pub unsafe fn _mm_floor_ss(a: f32x4, b: f32x4) -> f32x4 {
roundss(a, b, _MM_FROUND_FLOOR)
@@ -463,7 +463,7 @@
/// up to an integer value, and store the results as packed double-precision
/// floating-point elements.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd))]
pub unsafe fn _mm_ceil_pd(a: f64x2) -> f64x2 {
roundpd(a, _MM_FROUND_CEIL)
@@ -473,7 +473,7 @@
/// up to an integer value, and store the results as packed single-precision
/// floating-point elements.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps))]
pub unsafe fn _mm_ceil_ps(a: f32x4) -> f32x4 {
roundps(a, _MM_FROUND_CEIL)
@@ -485,7 +485,7 @@
/// and copy the upper element from `a` to the upper element
/// of the intrinsic result.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd))]
pub unsafe fn _mm_ceil_sd(a: f64x2, b: f64x2) -> f64x2 {
roundsd(a, b, _MM_FROUND_CEIL)
@@ -497,7 +497,7 @@
/// and copy the upper 3 packed elements from `a` to the upper elements
/// of the intrinsic result.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss))]
pub unsafe fn _mm_ceil_ss(a: f32x4, b: f32x4) -> f32x4 {
roundss(a, b, _MM_FROUND_CEIL)
@@ -523,7 +523,7 @@
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// ```
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd, rounding = 0))]
pub unsafe fn _mm_round_pd(a: f64x2, rounding: i32) -> f64x2 {
macro_rules! call {
@@ -552,7 +552,7 @@
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// ```
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps, rounding = 0))]
pub unsafe fn _mm_round_ps(a: f32x4, rounding: i32) -> f32x4 {
macro_rules! call {
@@ -583,7 +583,7 @@
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// ```
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd, rounding = 0))]
pub unsafe fn _mm_round_sd(a: f64x2, b: f64x2, rounding: i32) -> f64x2 {
macro_rules! call {
@@ -614,7 +614,7 @@
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// ```
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss, rounding = 0))]
pub unsafe fn _mm_round_ss(a: f32x4, b: f32x4, rounding: i32) -> f32x4 {
macro_rules! call {
@@ -643,7 +643,7 @@
/// * bits `[18:16]` - contain the index of the minimum value
/// * remaining bits are set to `0`.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(phminposuw))]
pub unsafe fn _mm_minpos_epu16(a: u16x8) -> u16x8 {
phminposuw(a)
@@ -652,7 +652,7 @@
/// Multiply the low 32-bit integers from each packed 64-bit
/// element in `a` and `b`, and return the signed 64-bit result.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmuldq))]
pub unsafe fn _mm_mul_epi32(a: i32x4, b: i32x4) -> i64x2 {
pmuldq(a, b)
@@ -665,7 +665,7 @@
/// arithmetic `pmulld i32x4::splat(i32::MAX), i32x4::splat(2)` would return a
/// negative number.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmulld))]
pub unsafe fn _mm_mullo_epi32(a: i32x4, b: i32x4) -> i32x4 {
a * b
@@ -703,7 +703,7 @@
/// * A `i16x8` vector containing the sums of the sets of
/// absolute differences between both operands.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))]
pub unsafe fn _mm_mpsadbw_epu8(a: u8x16, b: u8x16, imm8: i32) -> u16x8 {
macro_rules! call {
diff --git a/coresimd/src/x86/i586/sse42.rs b/coresimd/src/x86/i586/sse42.rs
index 2e9b7de..ff1bf2c 100644
--- a/coresimd/src/x86/i586/sse42.rs
+++ b/coresimd/src/x86/i586/sse42.rs
@@ -48,7 +48,7 @@
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and return the generated mask.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
macro_rules! call {
@@ -99,8 +99,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
-/// # #[target_feature = "+sse4.2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
///
/// use stdsimd::simd::u8x16;
/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ORDERED};
@@ -126,7 +126,7 @@
/// }
/// assert_eq!(indexes, vec![34]);
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
@@ -142,8 +142,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
-/// # #[target_feature = "+sse4.2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::u8x16;
/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ANY};
///
@@ -167,7 +167,7 @@
/// println!("Your password should contain a special character");
/// }
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
@@ -183,8 +183,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
-/// # #[target_feature = "+sse4.2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::u8x16;
/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_RANGES};
/// # let b = u8x16::load(b":;<=>?@[\\]^_`abc", 0);
@@ -207,7 +207,7 @@
/// println!("Did not find an alpha numeric character");
/// }
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
@@ -222,8 +222,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
-/// # #[target_feature = "+sse4.2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
/// use stdsimd::simd::u16x8;
/// use stdsimd::vendor::{_mm_cmpistri};
/// use stdsimd::vendor::{_SIDD_UWORD_OPS, _SIDD_CMP_EQUAL_EACH};
@@ -249,7 +249,7 @@
/// println!("16-bit unicode strings were not equal!");
/// }
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
@@ -268,7 +268,7 @@
/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
/// [`_mm_cmpestri`]: fn._mm_cmpestri.html
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
macro_rules! call {
@@ -281,7 +281,7 @@
/// control in `imm8`, and return `1` if any character in `b` was null.
/// and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
macro_rules! call {
@@ -296,7 +296,7 @@
/// control in `imm8`, and return `1` if the resulting mask was non-zero,
/// and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
macro_rules! call {
@@ -309,7 +309,7 @@
/// control in `imm8`, and returns `1` if any character in `a` was null,
/// and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
macro_rules! call {
@@ -321,7 +321,7 @@
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and return bit `0` of the resulting bit mask.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
macro_rules! call {
@@ -334,7 +334,7 @@
/// control in `imm8`, and return `1` if `b` did not contain a null
/// character and the resulting mask was zero, and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
macro_rules! call {
@@ -346,7 +346,7 @@
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
/// using the control in `imm8`, and return the generated mask.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
pub unsafe fn _mm_cmpestrm(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -399,8 +399,8 @@
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
-/// # #[target_feature = "+sse4.2"]
-/// # fn worker() {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
///
/// use stdsimd::simd::u8x16;
/// use stdsimd::vendor::{_mm_cmpestri, _SIDD_CMP_EQUAL_ORDERED};
@@ -423,7 +423,7 @@
///
/// assert_eq!(idx, 6);
/// # }
-/// # worker();
+/// # unsafe { worker(); }
/// # }
/// # }
/// ```
@@ -442,7 +442,7 @@
/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
/// [`_mm_cmpistri`]: fn._mm_cmpistri.html
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
pub unsafe fn _mm_cmpestri(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -457,7 +457,7 @@
/// using the control in `imm8`, and return `1` if any character in
/// `b` was null, and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
pub unsafe fn _mm_cmpestrz(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -472,7 +472,7 @@
/// using the control in `imm8`, and return `1` if the resulting mask
/// was non-zero, and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
pub unsafe fn _mm_cmpestrc(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -487,7 +487,7 @@
/// using the control in `imm8`, and return `1` if any character in
/// a was null, and `0` otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
pub unsafe fn _mm_cmpestrs(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -502,7 +502,7 @@
/// using the control in `imm8`, and return bit `0` of the resulting
/// bit mask.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
pub unsafe fn _mm_cmpestro(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -518,7 +518,7 @@
/// contain a null character and the resulting mask was zero, and `0`
/// otherwise.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
pub unsafe fn _mm_cmpestra(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -532,7 +532,7 @@
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 8-bit integer `v`.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
crc32_32_8(crc, v)
@@ -541,7 +541,7 @@
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 16-bit integer `v`.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
crc32_32_16(crc, v)
@@ -550,7 +550,7 @@
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 32-bit integer `v`.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
crc32_32_32(crc, v)
@@ -609,7 +609,7 @@
// a bit difficult. Rather than `load` and mutate the __m128i,
// it is easier to memcpy the given string to a local slice with
// length 16 and `load` the local slice.
- #[target_feature = "+sse4.2"]
+ #[target_feature(enable = "sse4.2")]
unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
assert!(s.len() <= 16);
let slice = &mut [0u8; 16];
diff --git a/coresimd/src/x86/i586/ssse3.rs b/coresimd/src/x86/i586/ssse3.rs
index 57c0ad6..96c7973 100644
--- a/coresimd/src/x86/i586/ssse3.rs
+++ b/coresimd/src/x86/i586/ssse3.rs
@@ -9,7 +9,7 @@
/// Compute the absolute value of packed 8-bit signed integers in `a` and
/// return the unsigned results.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsb))]
pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
pabsb128(a)
@@ -19,7 +19,7 @@
/// `a` and
/// return the 16-bit unsigned integer
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
pabsw128(a)
@@ -29,7 +29,7 @@
/// `a` and
/// return the 32-bit unsigned integer
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_epi32(a: i32x4) -> u32x4 {
pabsd128(a)
@@ -60,7 +60,7 @@
/// }
/// ```
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pshufb))]
pub unsafe fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
pshufb128(a, b)
@@ -69,7 +69,7 @@
/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
/// shift the result right by `n` bytes, and return the low 16 bytes.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
pub unsafe fn _mm_alignr_epi8(a: i8x16, b: i8x16, n: i32) -> i8x16 {
let n = n as u32;
@@ -124,7 +124,7 @@
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_epi16(a: i16x8, b: i16x8) -> i16x8 {
phaddw128(a, b)
@@ -134,7 +134,7 @@
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_epi16(a: i16x8, b: i16x8) -> i16x8 {
phaddsw128(a, b)
@@ -143,7 +143,7 @@
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [4 x i32].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_epi32(a: i32x4, b: i32x4) -> i32x4 {
phaddd128(a, b)
@@ -152,7 +152,7 @@
/// Horizontally subtract the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubw))]
pub unsafe fn _mm_hsub_epi16(a: i16x8, b: i16x8) -> i16x8 {
phsubw128(a, b)
@@ -163,7 +163,7 @@
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_epi16(a: i16x8, b: i16x8) -> i16x8 {
phsubsw128(a, b)
@@ -172,7 +172,7 @@
/// Horizontally subtract the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [4 x i32].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_epi32(a: i32x4, b: i32x4) -> i32x4 {
phsubd128(a, b)
@@ -184,7 +184,7 @@
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_epi16(a: u8x16, b: i8x16) -> i16x8 {
pmaddubsw128(a, b)
@@ -194,7 +194,7 @@
/// product to the 18 most significant bits by right-shifting, round the
/// truncated value by adding 1, and write bits [16:1] to the destination.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_epi16(a: i16x8, b: i16x8) -> i16x8 {
pmulhrsw128(a, b)
@@ -205,7 +205,7 @@
/// Elements in result are zeroed out when the corresponding element in `b`
/// is zero.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_epi8(a: i8x16, b: i8x16) -> i8x16 {
psignb128(a, b)
@@ -216,7 +216,7 @@
/// Elements in result are zeroed out when the corresponding element in `b`
/// is zero.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_epi16(a: i16x8, b: i16x8) -> i16x8 {
psignw128(a, b)
@@ -227,7 +227,7 @@
/// Element in result are zeroed out when the corresponding element in `b`
/// is zero.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
psignd128(a, b)
diff --git a/coresimd/src/x86/i586/tbm.rs b/coresimd/src/x86/i586/tbm.rs
index 38c044c..3001967 100644
--- a/coresimd/src/x86/i586/tbm.rs
+++ b/coresimd/src/x86/i586/tbm.rs
@@ -28,7 +28,7 @@
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
}
@@ -36,7 +36,7 @@
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
_bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
}
@@ -47,7 +47,7 @@
/// Bits [7,0] of `control` specify the index to the first bit in the range to
/// be extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
unsafe { x86_tbm_bextri_u32(a, control) }
}
@@ -58,7 +58,7 @@
/// Bits [7,0] of `control` specify the index to the first bit in the range to
/// be extracted, and bits [15,8] specify the length of the range.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
unsafe { x86_tbm_bextri_u64(a, control) }
}
@@ -68,7 +68,7 @@
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcfill))]
pub unsafe fn _blcfill_u32(x: u32) -> u32 {
x & (x.wrapping_add(1))
@@ -78,7 +78,7 @@
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcfill))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blcfill_u64(x: u64) -> u64 {
@@ -89,7 +89,7 @@
///
/// If there is no zero bit in `x`, it sets all bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blci))]
pub unsafe fn _blci_u32(x: u32) -> u32 {
x | !(x.wrapping_add(1))
@@ -99,7 +99,7 @@
///
/// If there is no zero bit in `x`, it sets all bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blci))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blci_u64(x: u64) -> u64 {
@@ -110,7 +110,7 @@
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcic))]
pub unsafe fn _blcic_u32(x: u32) -> u32 {
!x & (x.wrapping_add(1))
@@ -120,7 +120,7 @@
///
/// If there is no zero bit in `x`, it returns zero.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcic))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blcic_u64(x: u64) -> u64 {
@@ -132,7 +132,7 @@
///
/// If there is no zero bit in `x`, it sets all the bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcmsk))]
pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_add(1))
@@ -143,7 +143,7 @@
///
/// If there is no zero bit in `x`, it sets all the bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
@@ -154,7 +154,7 @@
///
/// If there is no zero bit in `x`, it returns `x`.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcs))]
pub unsafe fn _blcs_u32(x: u32) -> u32 {
x | (x.wrapping_add(1))
@@ -164,7 +164,7 @@
///
/// If there is no zero bit in `x`, it returns `x`.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcs))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blcs_u64(x: u64) -> u64 {
@@ -175,7 +175,7 @@
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsfill))]
pub unsafe fn _blsfill_u32(x: u32) -> u32 {
x | (x.wrapping_sub(1))
@@ -185,7 +185,7 @@
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsfill))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blsfill_u64(x: u64) -> u64 {
@@ -196,7 +196,7 @@
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsic))]
pub unsafe fn _blsic_u32(x: u32) -> u32 {
!x | (x.wrapping_sub(1))
@@ -206,7 +206,7 @@
///
/// If there is no set bit in `x`, it sets all the bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsic))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _blsic_u64(x: u64) -> u64 {
@@ -218,7 +218,7 @@
///
/// If the least significant bit of `x` is 0, it sets all bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(t1mskc))]
pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
!x | (x.wrapping_add(1))
@@ -229,7 +229,7 @@
///
/// If the least significant bit of `x` is 0, it sets all bits.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(t1mskc))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
@@ -241,7 +241,7 @@
///
/// If the least significant bit of `x` is 1, it returns zero.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(tzmsk))]
pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
!x & (x.wrapping_sub(1))
@@ -252,7 +252,7 @@
///
/// If the least significant bit of `x` is 1, it returns zero.
#[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(tzmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
diff --git a/coresimd/src/x86/i586/xsave.rs b/coresimd/src/x86/i586/xsave.rs
index 15e1f8b..9a7611a 100644
--- a/coresimd/src/x86/i586/xsave.rs
+++ b/coresimd/src/x86/i586/xsave.rs
@@ -34,7 +34,7 @@
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
#[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xsave))]
pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -47,7 +47,7 @@
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
#[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xrstor))]
pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
xrstor(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
@@ -63,7 +63,7 @@
///
/// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported.
#[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xsetbv))]
pub unsafe fn _xsetbv(a: u32, val: u64) {
xsetbv(a, (val >> 32) as u32, val as u32);
@@ -72,7 +72,7 @@
/// Reads the contents of the extended control register `XCR`
/// specified in `xcr_no`.
#[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xgetbv))]
pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
xgetbv(xcr_no) as u64
@@ -86,7 +86,7 @@
/// the manner in which data is saved. The performance of this instruction will
/// be equal to or better than using the `XSAVE` instruction.
#[inline(always)]
-#[target_feature = "+xsave,+xsaveopt"]
+#[target_feature(enable = "xsave,xsaveopt")]
#[cfg_attr(test, assert_instr(xsaveopt))]
pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -99,7 +99,7 @@
/// use init optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
#[inline(always)]
-#[target_feature = "+xsave,+xsavec"]
+#[target_feature(enable = "xsave,xsavec")]
#[cfg_attr(test, assert_instr(xsavec))]
pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -113,7 +113,7 @@
/// modified optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
#[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xsaves))]
pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -129,7 +129,7 @@
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
#[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xrstors))]
pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {
xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs
index acf43dc..08d7770 100644
--- a/coresimd/src/x86/i686/mmx.rs
+++ b/coresimd/src/x86/i686/mmx.rs
@@ -16,7 +16,7 @@
/// Constructs a 64-bit integer vector initialized to zero.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
// FIXME: this produces a movl instead of xorps on x86
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
@@ -26,7 +26,7 @@
/// Add packed 8-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddb))]
pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
paddb(a, b)
@@ -34,7 +34,7 @@
/// Add packed 16-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddw))]
pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
paddw(a, b)
@@ -42,7 +42,7 @@
/// Add packed 32-bit integers in `a` and `b`.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddd))]
pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
paddd(a, b)
@@ -50,7 +50,7 @@
/// Add packed 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddsb))]
pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
paddsb(a, b)
@@ -58,7 +58,7 @@
/// Add packed 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddsw))]
pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
paddsw(a, b)
@@ -66,7 +66,7 @@
/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddusb))]
pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
paddusb(a, b)
@@ -74,117 +74,12 @@
/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
#[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddusw))]
pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
paddusw(a, b)
}
-/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
-/// using signed saturation.
-///
-/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
-/// less than 0x80 are saturated to 0x80.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(packsswb))]
-pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
- packsswb(a, b)
-}
-
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
-/// using signed saturation.
-///
-/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
-/// less than 0x80 are saturated to 0x80.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(packssdw))]
-pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
- packssdw(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(pcmpgtb))]
-pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
- pcmpgtb(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(pcmpgtw))]
-pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
- pcmpgtw(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(pcmpgtd))]
-pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
- pcmpgtd(a, b)
-}
-
-/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
-/// them into the result: `[a.2, b.2, a.3, b.3]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
-pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
- punpckhwd(a, b)
-}
-
-/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
-/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckhbw))]
-pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
- punpckhbw(a, b)
-}
-
-/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
-/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpcklbw))]
-pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
- punpcklbw(a, b)
-}
-
-/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
-/// them into the result: `[a.0 b.0 a.1 b.1]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpcklwd))]
-pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
- punpcklwd(a, b)
-}
-
-/// Unpacks the upper element from two `i32x2` vectors and interleaves them
-/// into the result: `[a.1, b.1]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckhdq))]
-pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
- punpckhdq(a, b)
-}
-
-/// Unpacks the lower element from two `i32x2` vectors and interleaves them
-/// into the result: `[a.0, b.0]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckldq))]
-pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
- punpckldq(a, b)
-}
-
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.padd.b"]
@@ -305,98 +200,4 @@
let e = u16x4::new(0, 11, 22, u16::max_value());
assert_eq!(r, e);
}
-
- #[simd_test = "mmx"]
- unsafe fn _mm_packs_pi16() {
- let a = i16x4::new(-1, 2, -3, 4);
- let b = i16x4::new(-5, 6, -7, 8);
- let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
- assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into())));
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_packs_pi32() {
- let a = i32x2::new(-1, 2);
- let b = i32x2::new(-5, 6);
- let r = i16x4::new(-1, 2, -5, 6);
- assert_eq!(r, i16x4::from(mmx::_mm_packs_pi32(a.into(), b.into())));
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_cmpgt_pi8() {
- let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
- let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
- let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
- assert_eq!(r, i8x8::from(mmx::_mm_cmpgt_pi8(a.into(), b.into())));
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_cmpgt_pi16() {
- let a = i16x4::new(0, 1, 2, 3);
- let b = i16x4::new(4, 3, 2, 1);
- let r = i16x4::new(0, 0, 0, -1);
- assert_eq!(r, i16x4::from(mmx::_mm_cmpgt_pi16(a.into(), b.into())));
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_cmpgt_pi32() {
- let a = i32x2::new(0, 3);
- let b = i32x2::new(1, 2);
- let r0 = i32x2::new(0, -1);
- let r1 = i32x2::new(-1, 0);
-
- assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into());
- assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into());
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_unpackhi_pi8() {
- let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
- let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
- let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);
-
- assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into());
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_unpacklo_pi8() {
- let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
- let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
- let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
- assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_unpackhi_pi16() {
- let a = i16x4::new(0, 1, 2, 3);
- let b = i16x4::new(4, 5, 6, 7);
- let r = i16x4::new(2, 6, 3, 7);
- assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_unpacklo_pi16() {
- let a = i16x4::new(0, 1, 2, 3);
- let b = i16x4::new(4, 5, 6, 7);
- let r = i16x4::new(0, 4, 1, 5);
- assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into())));
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_unpackhi_pi32() {
- let a = i32x2::new(0, 3);
- let b = i32x2::new(1, 2);
- let r = i32x2::new(3, 2);
-
- assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into());
- }
-
- #[simd_test = "mmx"]
- unsafe fn _mm_unpacklo_pi32() {
- let a = i32x2::new(0, 3);
- let b = i32x2::new(1, 2);
- let r = i32x2::new(0, 1);
-
- assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into());
- }
}
diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs
index e91db45..706acf9 100644
--- a/coresimd/src/x86/i686/sse.rs
+++ b/coresimd/src/x86/i686/sse.rs
@@ -4,7 +4,6 @@
use v64::*;
use core::mem;
use x86::i586;
-use x86::i686::mmx;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -48,7 +47,7 @@
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
pmaxsw(a, b)
@@ -57,7 +56,7 @@
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
_mm_max_pi16(a, b)
@@ -66,7 +65,7 @@
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
pmaxub(a, b)
@@ -75,7 +74,7 @@
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
_mm_max_pu8(a, b)
@@ -84,7 +83,7 @@
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
pminsw(a, b)
@@ -93,7 +92,7 @@
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
_mm_min_pi16(a, b)
@@ -102,7 +101,7 @@
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
pminub(a, b)
@@ -111,7 +110,7 @@
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
_mm_min_pu8(a, b)
@@ -121,7 +120,7 @@
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
pmulhuw(a, b)
@@ -131,7 +130,7 @@
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
_mm_mulhi_pu16(a, b)
@@ -141,7 +140,7 @@
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
pavgb(a, b)
@@ -151,7 +150,7 @@
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu8(a, b)
@@ -161,7 +160,7 @@
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
pavgw(a, b)
@@ -171,7 +170,7 @@
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu16(a, b)
@@ -182,7 +181,7 @@
/// difference. Then sum of the 8 absolute differences is written to the
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
psadbw(a, b)
@@ -193,7 +192,7 @@
/// difference. Then sum of the 8 absolute differences is written to the
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
_mm_sad_pu8(a, b)
@@ -204,7 +203,7 @@
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32_ps(a: f32x4, b: i32x2) -> f32x4 {
cvtpi2ps(a, mem::transmute(b))
@@ -215,66 +214,16 @@
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
_mm_cvtpi32_ps(a, b)
}
-/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
- let b = mmx::_mm_setzero_si64();
- let b = mmx::_mm_cmpgt_pi8(b, a);
- let b = mmx::_mm_unpacklo_pi8(a, b);
- _mm_cvtpi16_ps(b)
-}
-
-/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
- let b = mmx::_mm_setzero_si64();
- let b = mmx::_mm_unpacklo_pi8(a, b);
- _mm_cvtpi16_ps(b)
-}
-
-/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
- let b = mmx::_mm_setzero_si64();
- let b = mmx::_mm_cmpgt_pi16(b, a);
- let c = mmx::_mm_unpackhi_pi16(a, b);
- let r = i586::_mm_setzero_ps();
- let r = cvtpi2ps(r, c);
- let r = i586::_mm_movelh_ps(r, r);
- let c = mmx::_mm_unpacklo_pi16(a, b);
- cvtpi2ps(r, c)
-}
-
-/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
- let b = mmx::_mm_setzero_si64();
- let c = mmx::_mm_unpackhi_pi16(a, b);
- let r = i586::_mm_setzero_ps();
- let r = cvtpi2ps(r, c);
- let r = i586::_mm_movelh_ps(r, r);
- let c = mmx::_mm_unpacklo_pi16(a, b);
- cvtpi2ps(r, c)
-}
-
/// Converts the two 32-bit signed integer values from each 64-bit vector
/// operand of [2 x i32] into a 128-bit vector of [4 x float].
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> f32x4 {
let c = i586::_mm_setzero_ps();
@@ -291,7 +240,7 @@
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
maskmovq(a, mask, mem_addr)
@@ -305,7 +254,7 @@
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
_mm_maskmove_si64(a, mask, mem_addr)
@@ -314,7 +263,7 @@
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
/// returns it, as specified by the immediate integer operand.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 {
macro_rules! call {
@@ -326,7 +275,7 @@
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
/// returns it, as specified by the immediate integer operand.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
pub unsafe fn _m_pextrw(a: i16x4, imm2: i32) -> i16 {
_mm_extract_pi16(a, imm2)
@@ -336,7 +285,7 @@
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
@@ -349,7 +298,7 @@
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
_mm_insert_pi16(a, d, imm2)
@@ -359,7 +308,7 @@
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 {
pmovmskb(mem::transmute(a))
@@ -369,7 +318,7 @@
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _m_pmovmskb(a: i16x4) -> i32 {
_mm_movemask_pi8(a)
@@ -378,7 +327,7 @@
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
@@ -390,7 +339,7 @@
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
_mm_shuffle_pi16(a, imm8)
@@ -399,7 +348,7 @@
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2 {
mem::transmute(cvttps2pi(a))
@@ -408,7 +357,7 @@
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 {
_mm_cvttps_pi32(a)
@@ -417,7 +366,7 @@
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> __m64 {
cvtps2pi(a)
@@ -426,36 +375,12 @@
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> __m64 {
_mm_cvtps_pi32(a)
}
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
-/// packed 16-bit integers.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> __m64 {
- let b = _mm_cvtps_pi32(a);
- let a = i586::_mm_movehl_ps(a, a);
- let c = _mm_cvtps_pi32(a);
- mmx::_mm_packs_pi32(b, c)
-}
-
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
-/// packed 8-bit integers, and returns theem in the lower 4 elements of the
-/// result.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> __m64 {
- let b = _mm_cvtps_pi16(a);
- let c = mmx::_mm_setzero_si64();
- mmx::_mm_packs_pi16(b, c)
-}
-
#[cfg(test)]
mod tests {
use std::mem;
@@ -563,38 +488,6 @@
}
#[simd_test = "sse"]
- unsafe fn _mm_cvtpi16_ps() {
- let a = i16x4::new(1, 2, 3, 4);
- let expected = f32x4::new(1., 2., 3., 4.);
- let r = sse::_mm_cvtpi16_ps(a.into());
- assert_eq!(r, expected);
- }
-
- #[simd_test = "sse"]
- unsafe fn _mm_cvtpu16_ps() {
- let a = u16x4::new(1, 2, 3, 4);
- let expected = f32x4::new(1., 2., 3., 4.);
- let r = sse::_mm_cvtpu16_ps(a.into());
- assert_eq!(r, expected);
- }
-
- #[simd_test = "sse"]
- unsafe fn _mm_cvtpi8_ps() {
- let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
- let expected = f32x4::new(1., 2., 3., 4.);
- let r = sse::_mm_cvtpi8_ps(a.into());
- assert_eq!(r, expected);
- }
-
- #[simd_test = "sse"]
- unsafe fn _mm_cvtpu8_ps() {
- let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
- let expected = f32x4::new(1., 2., 3., 4.);
- let r = sse::_mm_cvtpu8_ps(a.into());
- assert_eq!(r, expected);
- }
-
- #[simd_test = "sse"]
unsafe fn _mm_cvtpi32x2_ps() {
let a = i32x2::new(1, 2);
let b = i32x2::new(3, 4);
@@ -684,18 +577,4 @@
assert_eq!(r, sse::_mm_cvttps_pi32(a));
assert_eq!(r, sse::_mm_cvtt_ps2pi(a));
}
-
- #[simd_test = "sse"]
- unsafe fn _mm_cvtps_pi16() {
- let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
- let r = i16x4::new(7, 2, 3, 4);
- assert_eq!(r, i16x4::from(sse::_mm_cvtps_pi16(a)));
- }
-
- #[simd_test = "sse"]
- unsafe fn _mm_cvtps_pi8() {
- let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
- let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
- assert_eq!(r, i8x8::from(sse::_mm_cvtps_pi8(a)));
- }
}
diff --git a/coresimd/src/x86/i686/sse2.rs b/coresimd/src/x86/i686/sse2.rs
index c9b5fd3..1074be5 100644
--- a/coresimd/src/x86/i686/sse2.rs
+++ b/coresimd/src/x86/i686/sse2.rs
@@ -10,7 +10,7 @@
/// Adds two signed or unsigned 64-bit integer values, returning the
/// lower 64 bits of the sum.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(paddq))]
pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
paddq(a, b)
@@ -20,7 +20,7 @@
/// of the two 64-bit integer vectors and returns the 64-bit unsigned
/// product.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pmuludq))]
pub unsafe fn _mm_mul_su32(a: u32x2, b: u32x2) -> __m64 {
pmuludq(mem::transmute(a), mem::transmute(b))
@@ -29,7 +29,7 @@
/// Subtracts signed or unsigned 64-bit integer values and writes the
/// difference to the corresponding bits in the destination.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psubq))]
pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
psubq(a, b)
@@ -39,7 +39,7 @@
/// [2 x i32] into two double-precision floating-point values, returned in a
/// 128-bit vector of [2 x double].
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtpi2pd))]
pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 {
cvtpi2pd(mem::transmute(a))
@@ -48,7 +48,7 @@
/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
/// the specified 64-bit integer values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 {
i64x2::new(mem::transmute(e0), mem::transmute(e1))
@@ -57,7 +57,7 @@
/// Initializes both values in a 128-bit vector of [2 x i64] with the
/// specified 64-bit value.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 {
i64x2::new(mem::transmute(a), mem::transmute(a))
@@ -66,7 +66,7 @@
/// Constructs a 128-bit integer vector, initialized in reverse order
/// with the specified 64-bit integral values.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// no particular instruction to test
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
i64x2::new(mem::transmute(e1), mem::transmute(e0))
@@ -75,7 +75,7 @@
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
/// integer.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
@@ -85,7 +85,7 @@
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
/// upper bits.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
@@ -96,7 +96,7 @@
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
/// returned in a 64-bit vector of [2 x i32].
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtpd2pi))]
pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 {
mem::transmute(cvtpd2pi(a))
@@ -108,7 +108,7 @@
/// If the result of either conversion is inexact, the result is truncated
/// (rounded towards zero) regardless of the current MXCSR setting.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttpd2pi))]
pub unsafe fn _mm_cvttpd_pi32(a: f64x2) -> i32x2 {
mem::transmute(cvttpd2pi(a))
diff --git a/coresimd/src/x86/i686/sse41.rs b/coresimd/src/x86/i686/sse41.rs
index 16e767f..68d698e 100644
--- a/coresimd/src/x86/i686/sse41.rs
+++ b/coresimd/src/x86/i686/sse41.rs
@@ -29,7 +29,7 @@
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
ptestz(i64x2::from(a), i64x2::from(mask))
@@ -49,7 +49,7 @@
/// * `1` - if the specified bits are all ones,
/// * `0` - otherwise.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestc(i64x2::from(a), i64x2::from(mask))
@@ -69,7 +69,7 @@
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestnzc(i64x2::from(a), i64x2::from(mask))
@@ -89,7 +89,7 @@
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testz_si128(a, mask)
@@ -107,7 +107,7 @@
/// * `1` - if the bits specified in the operand are all set to 1,
/// * `0` - otherwise.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqd))]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
@@ -129,7 +129,7 @@
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testnzc_si128(a, mask)
diff --git a/coresimd/src/x86/i686/sse42.rs b/coresimd/src/x86/i686/sse42.rs
index a7fe082..79df38a 100644
--- a/coresimd/src/x86/i686/sse42.rs
+++ b/coresimd/src/x86/i686/sse42.rs
@@ -8,7 +8,7 @@
/// Compare packed 64-bit integers in `a` and `b` for greater-than,
/// return the results.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpgtq))]
pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 {
a.gt(b)
diff --git a/coresimd/src/x86/i686/sse4a.rs b/coresimd/src/x86/i686/sse4a.rs
index 884097e..79f4066 100644
--- a/coresimd/src/x86/i686/sse4a.rs
+++ b/coresimd/src/x86/i686/sse4a.rs
@@ -33,7 +33,7 @@
/// If `length == 0 && index > 0` or `lenght + index > 64` the result is
/// undefined.
#[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(extrq))]
pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
extrq(x, mem::transmute(y))
@@ -49,7 +49,7 @@
/// If the `length` is zero it is interpreted as `64`. If `index + length > 64`
/// or `index > 0 && length == 0` the result is undefined.
#[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(insertq))]
pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
insertq(x, y)
@@ -57,7 +57,7 @@
/// Non-temporal store of `a.0` into `p`.
#[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(movntsd))]
pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
movntsd(p, a);
@@ -65,7 +65,7 @@
/// Non-temporal store of `a.0` into `p`.
#[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(movntss))]
pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) {
movntss(p, a);
diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs
index 9659735..573d067 100644
--- a/coresimd/src/x86/i686/ssse3.rs
+++ b/coresimd/src/x86/i686/ssse3.rs
@@ -8,7 +8,7 @@
/// Compute the absolute value of packed 8-bit integers in `a` and
/// return the unsigned results.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsb))]
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
pabsb(a)
@@ -17,7 +17,7 @@
/// Compute the absolute value of packed 8-bit integers in `a`, and return the
/// unsigned results.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
pabsw(a)
@@ -26,7 +26,7 @@
/// Compute the absolute value of packed 32-bit integers in `a`, and return the
/// unsigned results.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
pabsd(a)
@@ -35,7 +35,7 @@
/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
/// the corresponding 8-bit element of `b`, and return the results
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pshufb))]
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
pshufb(a, b)
@@ -44,7 +44,7 @@
/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
macro_rules! call {
@@ -58,7 +58,7 @@
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
phaddw(a, b)
@@ -67,7 +67,7 @@
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [2 x i32].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
phaddd(a, b)
@@ -77,7 +77,7 @@
/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
phaddsw(a, b)
@@ -86,7 +86,7 @@
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubw))]
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
phsubw(a, b)
@@ -95,7 +95,7 @@
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [2 x i32].
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
phsubd(a, b)
@@ -106,7 +106,7 @@
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
phsubsw(a, b)
@@ -118,7 +118,7 @@
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
pmaddubsw(a, b)
@@ -128,7 +128,7 @@
/// products to the 18 most significant bits by right-shifting, rounds the
/// truncated value by adding 1, and writes bits [16:1] to the destination.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
pmulhrsw(a, b)
@@ -139,7 +139,7 @@
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
psignb(a, b)
@@ -150,7 +150,7 @@
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
psignw(a, b)
@@ -161,7 +161,7 @@
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
psignd(a, b)
diff --git a/coresimd/src/x86/x86_64/fxsr.rs b/coresimd/src/x86/x86_64/fxsr.rs
index f12d041..c2a7391 100644
--- a/coresimd/src/x86/x86_64/fxsr.rs
+++ b/coresimd/src/x86/x86_64/fxsr.rs
@@ -22,7 +22,7 @@
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
#[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxsave64))]
pub unsafe fn _fxsave64(mem_addr: *mut u8) {
fxsave64(mem_addr)
@@ -43,7 +43,7 @@
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
#[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxrstor64))]
pub unsafe fn _fxrstor64(mem_addr: *const u8) {
fxrstor64(mem_addr)
diff --git a/coresimd/src/x86/x86_64/mod.rs b/coresimd/src/x86/x86_64/mod.rs
index 7225e7b..5ba37b5 100644
--- a/coresimd/src/x86/x86_64/mod.rs
+++ b/coresimd/src/x86/x86_64/mod.rs
@@ -1,6 +1,8 @@
//! `x86_64` intrinsics
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
mod fxsr;
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
pub use self::fxsr::*;
mod sse;
diff --git a/coresimd/src/x86/x86_64/sse.rs b/coresimd/src/x86/x86_64/sse.rs
index aa18179..760582b 100644
--- a/coresimd/src/x86/x86_64/sse.rs
+++ b/coresimd/src/x86/x86_64/sse.rs
@@ -25,7 +25,7 @@
///
/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtss2si))]
pub unsafe fn _mm_cvtss_si64(a: f32x4) -> i64 {
cvtss2si64(a)
@@ -41,7 +41,7 @@
///
/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttss2si))]
pub unsafe fn _mm_cvttss_si64(a: f32x4) -> i64 {
cvttss2si64(a)
@@ -53,7 +53,7 @@
/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
/// input).
#[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtsi2ss))]
pub unsafe fn _mm_cvtsi64_ss(a: f32x4, b: i64) -> f32x4 {
cvtsi642ss(a, b)
diff --git a/coresimd/src/x86/x86_64/sse2.rs b/coresimd/src/x86/x86_64/sse2.rs
index b0762c0..2e05262 100644
--- a/coresimd/src/x86/x86_64/sse2.rs
+++ b/coresimd/src/x86/x86_64/sse2.rs
@@ -16,7 +16,7 @@
/// Convert the lower double-precision (64-bit) floating-point element in a to
/// a 64-bit integer.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsd2si))]
pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
cvtsd2si64(a)
@@ -24,7 +24,7 @@
/// Alias for [`_mm_cvtsd_si64`](fn._mm_cvtsd_si64_ss.html).
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsd2si))]
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
_mm_cvtsd_si64(a)
@@ -33,7 +33,7 @@
/// Convert the lower double-precision (64-bit) floating-point element in `a`
/// to a 64-bit integer with truncation.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttsd2si))]
pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
cvttsd2si64(a)
@@ -41,7 +41,7 @@
/// Alias for [`_mm_cvttsd_si64`](fn._mm_cvttsd_si64_ss.html).
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttsd2si))]
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
_mm_cvttsd_si64(a)
@@ -51,7 +51,7 @@
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
// FIXME movnti on windows and linux x86_64
//#[cfg_attr(test, assert_instr(movntiq))]
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
@@ -61,7 +61,7 @@
/// Return a vector whose lowest element is `a` and all higher elements are
/// `0`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
pub unsafe fn _mm_cvtsi64_si128(a: i64) -> i64x2 {
i64x2::new(a, 0)
@@ -70,7 +70,7 @@
/// Return a vector whose lowest element is `a` and all higher elements are
/// `0`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> i64x2 {
_mm_cvtsi64_si128(a)
@@ -78,7 +78,7 @@
/// Return the lowest element of `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
pub unsafe fn _mm_cvtsi128_si64(a: i64x2) -> i64 {
a.extract(0)
@@ -86,7 +86,7 @@
/// Return the lowest element of `a`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
pub unsafe fn _mm_cvtsi128_si64x(a: i64x2) -> i64 {
_mm_cvtsi128_si64(a)
@@ -95,7 +95,7 @@
/// Return `a` with its lower element replaced by `b` after converting it to
/// an `f64`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsi2sd))]
pub unsafe fn _mm_cvtsi64_sd(a: f64x2, b: i64) -> f64x2 {
a.replace(0, b as f64)
@@ -104,7 +104,7 @@
/// Return `a` with its lower element replaced by `b` after converting it to
/// an `f64`.
#[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsi2sd))]
pub unsafe fn _mm_cvtsi64x_sd(a: f64x2, b: i64) -> f64x2 {
_mm_cvtsi64_sd(a, b)
diff --git a/coresimd/src/x86/x86_64/sse41.rs b/coresimd/src/x86/x86_64/sse41.rs
index 20fa606..2067e7f 100644
--- a/coresimd/src/x86/x86_64/sse41.rs
+++ b/coresimd/src/x86/x86_64/sse41.rs
@@ -7,7 +7,7 @@
/// Extract an 64-bit integer from `a` selected with `imm8`
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8 = 1))]
pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: i32) -> i64 {
@@ -18,7 +18,7 @@
/// Return a copy of `a` with the 64-bit integer from `i` inserted at a
/// location specified by `imm8`.
#[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))]
pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: i32) -> i64x2 {
a.replace((imm8 & 0b1) as u32, i)
diff --git a/coresimd/src/x86/x86_64/sse42.rs b/coresimd/src/x86/x86_64/sse42.rs
index b1de00b..9cabcb2 100644
--- a/coresimd/src/x86/x86_64/sse42.rs
+++ b/coresimd/src/x86/x86_64/sse42.rs
@@ -12,7 +12,7 @@
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 64-bit integer `v`.
#[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
crc32_64_64(crc, v)
diff --git a/coresimd/src/x86/x86_64/xsave.rs b/coresimd/src/x86/x86_64/xsave.rs
index 6f8eaa6..fc8b38c 100644
--- a/coresimd/src/x86/x86_64/xsave.rs
+++ b/coresimd/src/x86/x86_64/xsave.rs
@@ -30,7 +30,7 @@
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
#[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xsave64))]
pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -43,7 +43,7 @@
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
#[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xrstor64))]
pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
@@ -57,7 +57,7 @@
/// the manner in which data is saved. The performance of this instruction will
/// be equal to or better than using the `XSAVE64` instruction.
#[inline(always)]
-#[target_feature = "+xsave,+xsaveopt"]
+#[target_feature(enable = "xsave,xsaveopt")]
#[cfg_attr(test, assert_instr(xsaveopt64))]
pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -70,7 +70,7 @@
/// use init optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
#[inline(always)]
-#[target_feature = "+xsave,+xsavec"]
+#[target_feature(enable = "xsave,xsavec")]
#[cfg_attr(test, assert_instr(xsavec64))]
pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -84,7 +84,7 @@
/// modified optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
#[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xsaves64))]
pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -100,7 +100,7 @@
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
#[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xrstors64))]
pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
diff --git a/examples/play.rs b/examples/play.rs
index 4731683..e4160e2 100644
--- a/examples/play.rs
+++ b/examples/play.rs
@@ -16,8 +16,8 @@
use self::stdsimd::vendor;
#[inline(never)]
- #[target_feature = "+sse4.2"]
- fn index(needle: &str, haystack: &str) -> usize {
+ #[target_feature(enable = "sse4.2")]
+ unsafe fn index(needle: &str, haystack: &str) -> usize {
assert!(needle.len() <= 16 && haystack.len() <= 16);
let (needle_len, hay_len) = (needle.len(), haystack.len());
@@ -30,15 +30,13 @@
haystack.resize(16, 0);
let vhaystack = s::__m128i::from(s::u8x16::load(&haystack, 0));
- unsafe {
- vendor::_mm_cmpestri(
- vneedle,
- needle_len as i32,
- vhaystack,
- hay_len as i32,
- vendor::_SIDD_CMP_EQUAL_ORDERED,
- ) as usize
- }
+ vendor::_mm_cmpestri(
+ vneedle,
+ needle_len as i32,
+ vhaystack,
+ hay_len as i32,
+ vendor::_SIDD_CMP_EQUAL_ORDERED,
+ ) as usize
}
pub fn main() {
@@ -58,7 +56,9 @@
let needle = env::args().nth(1).unwrap();
let haystack = env::args().nth(2).unwrap();
- println!("{:?}", index(&needle, &haystack));
+ unsafe {
+ println!("{:?}", index(&needle, &haystack));
+ }
}
}
diff --git a/examples/types.rs b/examples/types.rs
deleted file mode 100644
index 8bc0bc4..0000000
--- a/examples/types.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-#![cfg_attr(feature = "strict", deny(warnings))]
-#![feature(target_feature)]
-#![cfg_attr(feature = "cargo-clippy",
- allow(missing_docs_in_private_items, result_unwrap_used,
- option_unwrap_used, print_stdout, use_debug))]
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod example {
- extern crate stdsimd;
-
- use std::env;
- use self::stdsimd::simd;
-
- #[inline(never)]
- #[target_feature = "-sse2"]
- fn myop(
- (x0, x1, x2, x3): (u64, u64, u64, u64),
- (y0, y1, y2, y3): (u64, u64, u64, u64),
- ) -> (u64, u64, u64, u64) {
- let x = simd::u64x4::new(x0, x1, x2, x3);
- let y = simd::u64x4::new(y0, y1, y2, y3);
- let r = x * y;
- (r.extract(0), r.extract(1), r.extract(2), r.extract(3))
- }
-
- pub fn main() {
- let x = env::args().nth(1).unwrap().parse().unwrap();
- let y = env::args().nth(1).unwrap().parse().unwrap();
- let r = myop((x, x, x, x), (y, y, y, y));
- println!("{:?}", r);
- }
-}
-
-fn main() {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- example::main();
-}
diff --git a/examples/wat.rs b/examples/wat.rs
deleted file mode 100644
index 5a70eed..0000000
--- a/examples/wat.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-#![cfg_attr(feature = "strict", deny(warnings))]
-#![feature(target_feature)]
-#![cfg_attr(feature = "cargo-clippy",
- allow(missing_docs_in_private_items, result_unwrap_used,
- option_unwrap_used, print_stdout, use_debug))]
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod example {
- extern crate stdsimd;
-
- use std::env;
- use self::stdsimd::simd;
-
- #[inline(never)]
- #[target_feature = "-sse2"]
- fn myop(
- (x0, x1, x2, x3): (u64, u64, u64, u64),
- (y0, y1, y2, y3): (u64, u64, u64, u64),
- ) -> (u64, u64, u64, u64) {
- let x = simd::u64x4::new(x0, x1, x2, x3);
- let y = simd::u64x4::new(y0, y1, y2, y3);
- let r = x * y;
- (r.extract(0), r.extract(1), r.extract(2), r.extract(3))
- }
-
- pub fn main() {
- let x = env::args().nth(1).unwrap().parse().unwrap();
- let y = env::args().nth(2).unwrap().parse().unwrap();
- let r = myop((x, x, x, x), (y, y, y, y));
- println!("{:?}", r);
- }
-}
-
-fn main() {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- example::main();
-}
diff --git a/src/lib.rs b/src/lib.rs
index 277ad2a..6197eec 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -61,7 +61,7 @@
//! // This function is only safe to call if the CPU where the
//! // binary runs supports SSE2.
//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-//! #[target_feature = "+sse2"]
+//! #[target_feature(enable = "sse2")]
//! unsafe fn sum_sse2(x: i32x4) -> i32 {
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 8).into());
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 4).into());
diff --git a/stdsimd-test/simd-test-macro/src/lib.rs b/stdsimd-test/simd-test-macro/src/lib.rs
index 06c809b..6f9ddc4 100644
--- a/stdsimd-test/simd-test-macro/src/lib.rs
+++ b/stdsimd-test/simd-test-macro/src/lib.rs
@@ -47,8 +47,7 @@
let enable_feature = enable_feature
.trim_left_matches('"')
.trim_right_matches('"');
- let enable_feature =
- string(&(format!("+{}", enable_feature).replace(',', ",+")));
+ let enable_feature = string(enable_feature);
let item = TokenStream::from(item);
let name = find_name(item.clone());
@@ -77,7 +76,7 @@
::stdsimd_test::assert_skip_test_ok(stringify!(#name));
}
- #[target_feature = #enable_feature]
+ #[target_feature(enable = #enable_feature)]
#item
}
}.into();