Update `target_feature` syntax (#283) (#299)

(Backport onto 0.0.4)

This commit updates to the latest nightly's syntax where `#[target_feature =
"+foo"]` is now deprecated in favor of `#[target_feature(enable = "foo")]`.
Additionally `#[target_feature]` can only be applied to `unsafe` functions for
now.

Along the way this removes a few exampels that were just left around and also
disables the `fxsr` modules as that target feature will need to land in upstream
rust-lang/rust first as it's currently unknown to the compiler.
diff --git a/Cargo.toml b/Cargo.toml
index 3864dc4..bede412 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "stdsimd"
-version = "0.0.4"
+version = "0.0.4-1"
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = "SIMD support in Rust's standard library."
 documentation = "https://docs.rs/stdsimd"
@@ -22,7 +22,7 @@
 maintenance = { status = "experimental" }
 
 [dependencies]
-coresimd = { version = "0.0.4", path = "coresimd/" }
+coresimd = { version = "0.0.4-1", path = "coresimd/" }
 
 [dev-dependencies]
 auxv = "0.3.3"
diff --git a/coresimd/Cargo.toml b/coresimd/Cargo.toml
index 4b02440..c7c4a15 100644
--- a/coresimd/Cargo.toml
+++ b/coresimd/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "coresimd"
-version = "0.0.4"
+version = "0.0.4-1"
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = "SIMD support in Rust's core library."
 documentation = "https://docs.rs/stdsimd"
@@ -21,7 +21,7 @@
 [dev-dependencies]
 cupid = "0.5.0"
 stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
-stdsimd = { version = "0.0.3", path = ".." }
+stdsimd = { version = "0.0.4-1", path = ".." }
 
 [features]
 # Internal-usage only: denies all warnings.
diff --git a/coresimd/src/aarch64/neon.rs b/coresimd/src/aarch64/neon.rs
index 55b4ff6..353a598 100644
--- a/coresimd/src/aarch64/neon.rs
+++ b/coresimd/src/aarch64/neon.rs
@@ -9,7 +9,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fadd))]
 pub unsafe fn vadd_f64(a: f64, b: f64) -> f64 {
     a + b
@@ -17,7 +17,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fadd))]
 pub unsafe fn vaddq_f64(a: f64x2, b: f64x2) -> f64x2 {
     simd_add(a, b)
@@ -25,7 +25,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
     a + b
@@ -33,7 +33,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
     a + b
diff --git a/coresimd/src/arm/neon.rs b/coresimd/src/arm/neon.rs
index 9103ccc..0c4efae 100644
--- a/coresimd/src/arm/neon.rs
+++ b/coresimd/src/arm/neon.rs
@@ -10,7 +10,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vadd_s8(a: i8x8, b: i8x8) -> i8x8 {
     simd_add(a, b)
@@ -18,7 +18,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_s8(a: i8x16, b: i8x16) -> i8x16 {
     simd_add(a, b)
@@ -26,7 +26,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vadd_s16(a: i16x4, b: i16x4) -> i16x4 {
     simd_add(a, b)
@@ -34,7 +34,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_s16(a: i16x8, b: i16x8) -> i16x8 {
     simd_add(a, b)
@@ -42,7 +42,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vadd_s32(a: i32x2, b: i32x2) -> i32x2 {
     simd_add(a, b)
@@ -50,7 +50,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_s32(a: i32x4, b: i32x4) -> i32x4 {
     simd_add(a, b)
@@ -58,7 +58,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_s64(a: i64x2, b: i64x2) -> i64x2 {
     simd_add(a, b)
@@ -66,7 +66,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vadd_u8(a: u8x8, b: u8x8) -> u8x8 {
     simd_add(a, b)
@@ -74,7 +74,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_u8(a: u8x16, b: u8x16) -> u8x16 {
     simd_add(a, b)
@@ -82,7 +82,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vadd_u16(a: u16x4, b: u16x4) -> u16x4 {
     simd_add(a, b)
@@ -90,7 +90,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_u16(a: u16x8, b: u16x8) -> u16x8 {
     simd_add(a, b)
@@ -98,7 +98,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vadd_u32(a: u32x2, b: u32x2) -> u32x2 {
     simd_add(a, b)
@@ -106,7 +106,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_u32(a: u32x4, b: u32x4) -> u32x4 {
     simd_add(a, b)
@@ -114,7 +114,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(add))]
 pub unsafe fn vaddq_u64(a: u64x2, b: u64x2) -> u64x2 {
     simd_add(a, b)
@@ -122,7 +122,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fadd))]
 pub unsafe fn vadd_f32(a: f32x2, b: f32x2) -> f32x2 {
     simd_add(a, b)
@@ -130,7 +130,7 @@
 
 /// Vector add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fadd))]
 pub unsafe fn vaddq_f32(a: f32x4, b: f32x4) -> f32x4 {
     simd_add(a, b)
@@ -138,7 +138,7 @@
 
 /// Vector long add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(saddl))]
 pub unsafe fn vaddl_s8(a: i8x8, b: i8x8) -> i16x8 {
     let a = a.as_i16x8();
@@ -148,7 +148,7 @@
 
 /// Vector long add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(saddl))]
 pub unsafe fn vaddl_s16(a: i16x4, b: i16x4) -> i32x4 {
     let a = a.as_i32x4();
@@ -158,7 +158,7 @@
 
 /// Vector long add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(saddl))]
 pub unsafe fn vaddl_s32(a: i32x2, b: i32x2) -> i64x2 {
     let a = a.as_i64x2();
@@ -168,7 +168,7 @@
 
 /// Vector long add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(uaddl))]
 pub unsafe fn vaddl_u8(a: u8x8, b: u8x8) -> u16x8 {
     let a = a.as_u16x8();
@@ -178,7 +178,7 @@
 
 /// Vector long add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(uaddl))]
 pub unsafe fn vaddl_u16(a: u16x4, b: u16x4) -> u32x4 {
     let a = a.as_u32x4();
@@ -188,7 +188,7 @@
 
 /// Vector long add.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(uaddl))]
 pub unsafe fn vaddl_u32(a: u32x2, b: u32x2) -> u64x2 {
     let a = a.as_u64x2();
@@ -206,7 +206,7 @@
 
 /// Reciprocal square-root estimate.
 #[inline(always)]
-#[target_feature = "+neon"]
+#[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrte))]
 pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
     frsqrte_v2f32(a)
diff --git a/coresimd/src/arm/v7.rs b/coresimd/src/arm/v7.rs
index 5c0a73b..b620013 100644
--- a/coresimd/src/arm/v7.rs
+++ b/coresimd/src/arm/v7.rs
@@ -36,7 +36,8 @@
 /// Reverse the bit order.
 #[inline(always)]
 #[cfg_attr(test, assert_instr(rbit))]
-#[cfg_attr(target_arch = "arm", target_feature = "+v7")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc
 pub unsafe fn _rbit_u32(x: u32) -> u32 {
     rbit_u32(x as i32) as u32
 }
@@ -73,6 +74,7 @@
     }
 
     #[test]
+    #[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc
     fn _rbit_u32() {
         unsafe {
             assert_eq!(
diff --git a/coresimd/src/x86/i386/fxsr.rs b/coresimd/src/x86/i386/fxsr.rs
index 95626ad..28c8fb5 100644
--- a/coresimd/src/x86/i386/fxsr.rs
+++ b/coresimd/src/x86/i386/fxsr.rs
@@ -22,7 +22,7 @@
 /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
 /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
 #[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
 #[cfg_attr(test, assert_instr(fxsave))]
 pub unsafe fn _fxsave(mem_addr: *mut u8) {
     fxsave(mem_addr)
@@ -43,7 +43,7 @@
 /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
 /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
 #[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
 #[cfg_attr(test, assert_instr(fxrstor))]
 pub unsafe fn _fxrstor(mem_addr: *const u8) {
     fxrstor(mem_addr)
diff --git a/coresimd/src/x86/i386/mod.rs b/coresimd/src/x86/i386/mod.rs
index ed1ad04..9f32390 100644
--- a/coresimd/src/x86/i386/mod.rs
+++ b/coresimd/src/x86/i386/mod.rs
@@ -3,5 +3,8 @@
 mod eflags;
 pub use self::eflags::*;
 
+
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
 mod fxsr;
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
 pub use self::fxsr::*;
diff --git a/coresimd/src/x86/i586/abm.rs b/coresimd/src/x86/i586/abm.rs
index 2ca2cc1..2fb5597 100644
--- a/coresimd/src/x86/i586/abm.rs
+++ b/coresimd/src/x86/i586/abm.rs
@@ -24,7 +24,7 @@
 ///
 /// When the operand is zero, it returns its size in bits.
 #[inline(always)]
-#[target_feature = "+lzcnt"]
+#[target_feature(enable = "lzcnt")]
 #[cfg_attr(test, assert_instr(lzcnt))]
 pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
     x.leading_zeros()
@@ -34,7 +34,7 @@
 ///
 /// When the operand is zero, it returns its size in bits.
 #[inline(always)]
-#[target_feature = "+lzcnt"]
+#[target_feature(enable = "lzcnt")]
 #[cfg_attr(test, assert_instr(lzcnt))]
 pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
     x.leading_zeros() as u64
@@ -42,7 +42,7 @@
 
 /// Counts the bits that are set.
 #[inline(always)]
-#[target_feature = "+popcnt"]
+#[target_feature(enable = "popcnt")]
 #[cfg_attr(test, assert_instr(popcnt))]
 pub unsafe fn _popcnt32(x: i32) -> i32 {
     x.count_ones() as i32
@@ -50,7 +50,7 @@
 
 /// Counts the bits that are set.
 #[inline(always)]
-#[target_feature = "+popcnt"]
+#[target_feature(enable = "popcnt")]
 #[cfg_attr(test, assert_instr(popcnt))]
 pub unsafe fn _popcnt64(x: i64) -> i32 {
     x.count_ones() as i32
diff --git a/coresimd/src/x86/i586/avx.rs b/coresimd/src/x86/i586/avx.rs
index 1d7347e..f879d95 100644
--- a/coresimd/src/x86/i586/avx.rs
+++ b/coresimd/src/x86/i586/avx.rs
@@ -26,7 +26,7 @@
 /// Add packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddpd))]
 pub unsafe fn _mm256_add_pd(a: f64x4, b: f64x4) -> f64x4 {
     a + b
@@ -35,7 +35,7 @@
 /// Add packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddps))]
 pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 {
     a + b
@@ -45,7 +45,7 @@
 /// floating-point elements
 /// in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // FIXME: Should be 'vandpd' instuction.
 // See https://github.com/rust-lang-nursery/stdsimd/issues/71
 #[cfg_attr(test, assert_instr(vandps))]
@@ -58,7 +58,7 @@
 /// Compute the bitwise AND of packed single-precision (32-bit) floating-point
 /// elements in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vandps))]
 pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 {
     let a: u32x8 = mem::transmute(a);
@@ -69,7 +69,7 @@
 /// Compute the bitwise OR packed double-precision (64-bit) floating-point
 /// elements in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // FIXME: Should be 'vorpd' instuction.
 // See https://github.com/rust-lang-nursery/stdsimd/issues/71
 #[cfg_attr(test, assert_instr(vorps))]
@@ -82,7 +82,7 @@
 /// Compute the bitwise OR packed single-precision (32-bit) floating-point
 /// elements in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vorps))]
 pub unsafe fn _mm256_or_ps(a: f32x8, b: f32x8) -> f32x8 {
     let a: u32x8 = mem::transmute(a);
@@ -93,7 +93,7 @@
 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit
 /// lanes using the control in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))]
 pub unsafe fn _mm256_shuffle_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -135,7 +135,7 @@
 /// Shuffle single-precision (32-bit) floating-point elements in `a` within
 /// 128-bit lanes using the control in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))]
 pub unsafe fn _mm256_shuffle_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -186,7 +186,7 @@
 /// elements in `a`
 /// and then AND with `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // FIXME: Should be 'vandnpd' instruction.
 #[cfg_attr(test, assert_instr(vandnps))]
 pub unsafe fn _mm256_andnot_pd(a: f64x4, b: f64x4) -> f64x4 {
@@ -199,7 +199,7 @@
 /// elements in `a`
 /// and then AND with `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vandnps))]
 pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 {
     let a: u32x8 = mem::transmute(a);
@@ -210,7 +210,7 @@
 /// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed maximum values
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaxpd))]
 pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
     maxpd256(a, b)
@@ -219,7 +219,7 @@
 /// Compare packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and return packed maximum values
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaxps))]
 pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 {
     maxps256(a, b)
@@ -228,7 +228,7 @@
 /// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed minimum values
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vminpd))]
 pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
     minpd256(a, b)
@@ -237,7 +237,7 @@
 /// Compare packed single-precision (32-bit) floating-point elements in `a`
 /// and `b`, and return packed minimum values
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vminps))]
 pub unsafe fn _mm256_min_ps(a: f32x8, b: f32x8) -> f32x8 {
     minps256(a, b)
@@ -246,7 +246,7 @@
 /// Add packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmulpd))]
 pub unsafe fn _mm256_mul_pd(a: f64x4, b: f64x4) -> f64x4 {
     a * b
@@ -255,7 +255,7 @@
 /// Add packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmulps))]
 pub unsafe fn _mm256_mul_ps(a: f32x8, b: f32x8) -> f32x8 {
     a * b
@@ -264,7 +264,7 @@
 /// Alternatively add and subtract packed double-precision (64-bit)
 /// floating-point elements in `a` to/from packed elements in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddsubpd))]
 pub unsafe fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
     addsubpd256(a, b)
@@ -273,7 +273,7 @@
 /// Alternatively add and subtract packed single-precision (32-bit)
 /// floating-point elements in `a` to/from packed elements in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddsubps))]
 pub unsafe fn _mm256_addsub_ps(a: f32x8, b: f32x8) -> f32x8 {
     addsubps256(a, b)
@@ -282,7 +282,7 @@
 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
 /// from packed elements in `a`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vsubpd))]
 pub unsafe fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 {
     a - b
@@ -291,7 +291,7 @@
 /// Subtract packed single-precision (32-bit) floating-point elements in `b`
 /// from packed elements in `a`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vsubps))]
 pub unsafe fn _mm256_sub_ps(a: f32x8, b: f32x8) -> f32x8 {
     a - b
@@ -300,7 +300,7 @@
 /// Compute the division of each of the 8 packed 32-bit floating-point elements
 /// in `a` by the corresponding packed elements in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vdivps))]
 pub unsafe fn _mm256_div_ps(a: f32x8, b: f32x8) -> f32x8 {
     a / b
@@ -309,7 +309,7 @@
 /// Compute the division of each of the 4 packed 64-bit floating-point elements
 /// in `a` by the corresponding packed elements in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vdivpd))]
 pub unsafe fn _mm256_div_pd(a: f64x4, b: f64x4) -> f64x4 {
     a / b
@@ -327,7 +327,7 @@
 ///
 /// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundpd, b = 0x3))]
 pub unsafe fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 {
     macro_rules! call {
@@ -339,7 +339,7 @@
 /// Round packed double-precision (64-bit) floating point elements in `a`
 /// toward positive infinity.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundpd))]
 pub unsafe fn _mm256_ceil_pd(a: f64x4) -> f64x4 {
     roundpd256(a, 0x02)
@@ -348,7 +348,7 @@
 /// Round packed double-precision (64-bit) floating point elements in `a`
 /// toward negative infinity.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundpd))]
 pub unsafe fn _mm256_floor_pd(a: f64x4) -> f64x4 {
     roundpd256(a, 0x01)
@@ -366,7 +366,7 @@
 ///
 /// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundps, b = 0x00))]
 pub unsafe fn _mm256_round_ps(a: f32x8, b: i32) -> f32x8 {
     macro_rules! call {
@@ -380,7 +380,7 @@
 /// Round packed single-precision (32-bit) floating point elements in `a`
 /// toward positive infinity.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundps))]
 pub unsafe fn _mm256_ceil_ps(a: f32x8) -> f32x8 {
     roundps256(a, 0x02)
@@ -389,7 +389,7 @@
 /// Round packed single-precision (32-bit) floating point elements in `a`
 /// toward negative infinity.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundps))]
 pub unsafe fn _mm256_floor_ps(a: f32x8) -> f32x8 {
     roundps256(a, 0x01)
@@ -398,7 +398,7 @@
 /// Return the square root of packed single-precision (32-bit) floating point
 /// elements in `a`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vsqrtps))]
 pub unsafe fn _mm256_sqrt_ps(a: f32x8) -> f32x8 {
     sqrtps256(a)
@@ -407,7 +407,7 @@
 /// Return the square root of packed double-precision (64-bit) floating point
 /// elements in `a`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vsqrtpd))]
 pub unsafe fn _mm256_sqrt_pd(a: f64x4) -> f64x4 {
     sqrtpd256(a)
@@ -416,7 +416,7 @@
 /// Blend packed double-precision (64-bit) floating-point elements from
 /// `a` and `b` using control mask `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
 pub unsafe fn _mm256_blend_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -458,7 +458,7 @@
 /// Blend packed single-precision (32-bit) floating-point elements from
 /// `a` and `b` using control mask `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
 pub unsafe fn _mm256_blend_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -508,7 +508,7 @@
 /// Blend packed double-precision (64-bit) floating-point elements from
 /// `a` and `b` using `c` as a mask.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vblendvpd))]
 pub unsafe fn _mm256_blendv_pd(a: f64x4, b: f64x4, c: f64x4) -> f64x4 {
     vblendvpd(a, b, c)
@@ -517,7 +517,7 @@
 /// Blend packed single-precision (32-bit) floating-point elements from
 /// `a` and `b` using `c` as a mask.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vblendvps))]
 pub unsafe fn _mm256_blendv_ps(a: f32x8, b: f32x8, c: f32x8) -> f32x8 {
     vblendvps(a, b, c)
@@ -528,7 +528,7 @@
 /// sum the four products, and conditionally return the sum
 ///  using the low 4 bits of `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vdpps, imm8 = 0x0))]
 pub unsafe fn _mm256_dp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
     macro_rules! call {
@@ -542,7 +542,7 @@
 /// In the result, sums of elements from `a` are returned in even locations,
 /// while sums of elements from `b` are returned in odd locations.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhaddpd))]
 pub unsafe fn _mm256_hadd_pd(a: f64x4, b: f64x4) -> f64x4 {
     vhaddpd(a, b)
@@ -554,7 +554,7 @@
 /// indices 0, 1, 4, 5; while sums of elements from `b` are locations
 /// 2, 3, 6, 7.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhaddps))]
 pub unsafe fn _mm256_hadd_ps(a: f32x8, b: f32x8) -> f32x8 {
     vhaddps(a, b)
@@ -565,7 +565,7 @@
 /// In the result, sums of elements from `a` are returned in even locations,
 /// while sums of elements from `b` are returned in odd locations.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhsubpd))]
 pub unsafe fn _mm256_hsub_pd(a: f64x4, b: f64x4) -> f64x4 {
     vhsubpd(a, b)
@@ -577,7 +577,7 @@
 /// indices 0, 1, 4, 5; while sums of elements from `b` are locations
 /// 2, 3, 6, 7.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhsubps))]
 pub unsafe fn _mm256_hsub_ps(a: f32x8, b: f32x8) -> f32x8 {
     vhsubps(a, b)
@@ -586,7 +586,7 @@
 /// Compute the bitwise XOR of packed double-precision (64-bit) floating-point
 /// elements in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // FIXME Should be 'vxorpd' instruction.
 #[cfg_attr(test, assert_instr(vxorps))]
 pub unsafe fn _mm256_xor_pd(a: f64x4, b: f64x4) -> f64x4 {
@@ -598,7 +598,7 @@
 /// Compute the bitwise XOR of packed single-precision (32-bit) floating-point
 /// elements in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxorps))]
 pub unsafe fn _mm256_xor_ps(a: f32x8, b: f32x8) -> f32x8 {
     let a: u32x8 = mem::transmute(a);
@@ -675,7 +675,7 @@
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd
 pub unsafe fn _mm_cmp_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
     macro_rules! call {
@@ -688,7 +688,7 @@
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd
 pub unsafe fn _mm256_cmp_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
     macro_rules! call {
@@ -701,7 +701,7 @@
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
 #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps
 pub unsafe fn _mm_cmp_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
     macro_rules! call {
@@ -714,7 +714,7 @@
 /// elements in `a` and `b` based on the comparison operand
 /// specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps
 pub unsafe fn _mm256_cmp_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
     macro_rules! call {
@@ -729,7 +729,7 @@
 /// and copy the upper element from `a` to the upper element of returned
 /// vector.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 #[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd
 pub unsafe fn _mm_cmp_sd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
     macro_rules! call {
@@ -744,7 +744,7 @@
 /// and copy the upper 3 packed elements from `a` to the upper elements of
 /// returned vector.
 #[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
 #[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss
 pub unsafe fn _mm_cmp_ss(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
     macro_rules! call {
@@ -756,7 +756,7 @@
 /// Convert packed 32-bit integers in `a` to packed double-precision (64-bit)
 /// floating-point elements.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
 pub unsafe fn _mm256_cvtepi32_pd(a: i32x4) -> f64x4 {
     simd_cast(a)
@@ -765,7 +765,7 @@
 /// Convert packed 32-bit integers in `a` to packed single-precision (32-bit)
 /// floating-point elements.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
 pub unsafe fn _mm256_cvtepi32_ps(a: i32x8) -> f32x8 {
     vcvtdq2ps(a)
@@ -774,7 +774,7 @@
 /// Convert packed double-precision (64-bit) floating-point elements in `a`
 /// to packed single-precision (32-bit) floating-point elements.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
 pub unsafe fn _mm256_cvtpd_ps(a: f64x4) -> f32x4 {
     vcvtpd2ps(a)
@@ -783,7 +783,7 @@
 /// Convert packed single-precision (32-bit) floating-point elements in `a`
 /// to packed 32-bit integers.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtps2dq))]
 pub unsafe fn _mm256_cvtps_epi32(a: f32x8) -> i32x8 {
     vcvtps2dq(a)
@@ -792,7 +792,7 @@
 /// Convert packed single-precision (32-bit) floating-point elements in `a`
 /// to packed double-precision (64-bit) floating-point elements.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtps2pd))]
 pub unsafe fn _mm256_cvtps_pd(a: f32x4) -> f64x4 {
     a.as_f64x4()
@@ -801,7 +801,7 @@
 /// Convert packed double-precision (64-bit) floating-point elements in `a`
 /// to packed 32-bit integers with truncation.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
 pub unsafe fn _mm256_cvttpd_epi32(a: f64x4) -> i32x4 {
     vcvttpd2dq(a)
@@ -810,7 +810,7 @@
 /// Convert packed double-precision (64-bit) floating-point elements in `a`
 /// to packed 32-bit integers.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtpd2dq))]
 pub unsafe fn _mm256_cvtpd_epi32(a: f64x4) -> i32x4 {
     vcvtpd2dq(a)
@@ -819,7 +819,7 @@
 /// Convert packed single-precision (32-bit) floating-point elements in `a`
 /// to packed 32-bit integers with truncation.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvttps2dq))]
 pub unsafe fn _mm256_cvttps_epi32(a: f32x8) -> i32x8 {
     vcvttps2dq(a)
@@ -828,7 +828,7 @@
 /// Extract 128 bits (composed of 4 packed single-precision (32-bit)
 /// floating-point elements) from `a`, selected with `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vextractf128))]
 pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> f32x4 {
     match imm8 & 1 {
@@ -840,7 +840,7 @@
 /// Extract 128 bits (composed of 2 packed double-precision (64-bit)
 /// floating-point elements) from `a`, selected with `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vextractf128))]
 pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> f64x2 {
     match imm8 & 1 {
@@ -851,7 +851,7 @@
 
 /// Extract 128 bits (composed of integer data) from `a`, selected with `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vextractf128))]
 pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i {
     let b = i64x4::from(_mm256_undefined_si256());
@@ -864,7 +864,7 @@
 
 /// Zero the contents of all XMM or YMM registers.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vzeroall))]
 pub unsafe fn _mm256_zeroall() {
     vzeroall()
@@ -873,7 +873,7 @@
 /// Zero the upper 128 bits of all YMM registers;
 /// the lower 128-bits of the registers are unmodified.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vzeroupper))]
 pub unsafe fn _mm256_zeroupper() {
     vzeroupper()
@@ -882,7 +882,7 @@
 /// Shuffle single-precision (32-bit) floating-point elements in `a`
 /// within 128-bit lanes using the control in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vpermilps))]
 pub unsafe fn _mm256_permutevar_ps(a: f32x8, b: i32x8) -> f32x8 {
     vpermilps256(a, b)
@@ -891,7 +891,7 @@
 /// Shuffle single-precision (32-bit) floating-point elements in `a`
 /// using the control in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vpermilps))]
 pub unsafe fn _mm_permutevar_ps(a: f32x4, b: i32x4) -> f32x4 {
     vpermilps(a, b)
@@ -900,7 +900,7 @@
 /// Shuffle single-precision (32-bit) floating-point elements in `a`
 /// within 128-bit lanes using the control in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
 pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -952,7 +952,7 @@
 /// Shuffle single-precision (32-bit) floating-point elements in `a`
 /// using the control in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
 pub unsafe fn _mm_permute_ps(a: f32x4, imm8: i32) -> f32x4 {
     use x86::i586::sse::_mm_undefined_ps;
@@ -1006,7 +1006,7 @@
 /// Shuffle double-precision (64-bit) floating-point elements in `a`
 /// within 256-bit lanes using the control in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vpermilpd))]
 pub unsafe fn _mm256_permutevar_pd(a: f64x4, b: i64x4) -> f64x4 {
     vpermilpd256(a, b)
@@ -1015,7 +1015,7 @@
 /// Shuffle double-precision (64-bit) floating-point elements in `a`
 /// using the control in `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vpermilpd))]
 pub unsafe fn _mm_permutevar_pd(a: f64x2, b: i64x2) -> f64x2 {
     vpermilpd(a, b)
@@ -1024,7 +1024,7 @@
 /// Shuffle double-precision (64-bit) floating-point elements in `a`
 /// within 128-bit lanes using the control in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
 pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -1066,7 +1066,7 @@
 /// Shuffle double-precision (64-bit) floating-point elements in `a`
 /// using the control in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
 pub unsafe fn _mm_permute_pd(a: f64x2, imm8: i32) -> f64x2 {
     use x86::i586::sse2::_mm_undefined_pd;
@@ -1094,7 +1094,7 @@
 /// Shuffle 256-bits (composed of 8 packed single-precision (32-bit)
 /// floating-point elements) selected by `imm8` from `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))]
 pub unsafe fn _mm256_permute2f128_ps(a: f32x8, b: f32x8, imm8: i32) -> f32x8 {
     macro_rules! call {
@@ -1106,7 +1106,7 @@
 /// Shuffle 256-bits (composed of 4 packed double-precision (64-bit)
 /// floating-point elements) selected by `imm8` from `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
 pub unsafe fn _mm256_permute2f128_pd(a: f64x4, b: f64x4, imm8: i32) -> f64x4 {
     macro_rules! call {
@@ -1118,7 +1118,7 @@
 /// Shuffle 258-bits (composed of integer data) selected by `imm8`
 /// from `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))]
 pub unsafe fn _mm256_permute2f128_si256(
     a: i32x8, b: i32x8, imm8: i32
@@ -1132,7 +1132,7 @@
 /// Broadcast a single-precision (32-bit) floating-point element from memory
 /// to all elements of the returned vector.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 pub unsafe fn _mm256_broadcast_ss(f: &f32) -> f32x8 {
     f32x8::splat(*f)
@@ -1141,7 +1141,7 @@
 /// Broadcast a single-precision (32-bit) floating-point element from memory
 /// to all elements of the returned vector.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 pub unsafe fn _mm_broadcast_ss(f: &f32) -> f32x4 {
     f32x4::splat(*f)
@@ -1150,7 +1150,7 @@
 /// Broadcast a double-precision (64-bit) floating-point element from memory
 /// to all elements of the returned vector.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 pub unsafe fn _mm256_broadcast_sd(f: &f64) -> f64x4 {
     f64x4::splat(*f)
@@ -1159,7 +1159,7 @@
 /// Broadcast 128 bits from memory (composed of 4 packed single-precision
 /// (32-bit) floating-point elements) to all elements of the returned vector.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 pub unsafe fn _mm256_broadcast_ps(a: &f32x4) -> f32x8 {
     vbroadcastf128ps256(a)
@@ -1168,7 +1168,7 @@
 /// Broadcast 128 bits from memory (composed of 2 packed double-precision
 /// (64-bit) floating-point elements) to all elements of the returned vector.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 pub unsafe fn _mm256_broadcast_pd(a: &f64x2) -> f64x4 {
     vbroadcastf128pd256(a)
@@ -1178,7 +1178,7 @@
 /// single-precision (32-bit) floating-point elements) from `b` into result
 /// at the location specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
 pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: f32x4, imm8: i32) -> f32x8 {
     let b = _mm256_castps128_ps256(b);
@@ -1192,7 +1192,7 @@
 /// double-precision (64-bit) floating-point elements) from `b` into result
 /// at the location specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
 pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: f64x2, imm8: i32) -> f64x4 {
     match imm8 & 1 {
@@ -1204,7 +1204,7 @@
 /// Copy `a` to result, then insert 128 bits from `b` into result
 /// at the location specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
 pub unsafe fn _mm256_insertf128_si256(
     a: __m256i, b: __m128i, imm8: i32
@@ -1220,7 +1220,7 @@
 /// Copy `a` to result, and insert the 8-bit integer `i` into result
 /// at the location specified by `index`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_insert_epi8(a: i8x32, i: i8, index: i32) -> i8x32 {
     let c = a;
@@ -1230,7 +1230,7 @@
 /// Copy `a` to result, and insert the 16-bit integer `i` into result
 /// at the location specified by `index`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_insert_epi16(a: i16x16, i: i16, index: i32) -> i16x16 {
     let c = a;
@@ -1240,7 +1240,7 @@
 /// Copy `a` to result, and insert the 32-bit integer `i` into result
 /// at the location specified by `index`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_insert_epi32(a: i32x8, i: i32, index: i32) -> i32x8 {
     let c = a;
@@ -1250,7 +1250,7 @@
 /// Copy `a` to result, and insert the 64-bit integer `i` into result
 /// at the location specified by `index`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_insert_epi64(a: i64x4, i: i64, index: i32) -> i64x4 {
     let c = a;
@@ -1262,7 +1262,7 @@
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
 pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> f64x4 {
     *(mem_addr as *const f64x4)
@@ -1273,7 +1273,7 @@
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
 pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: f64x4) {
     *(mem_addr as *mut f64x4) = a;
@@ -1284,7 +1284,7 @@
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> f32x8 {
     *(mem_addr as *const f32x8)
@@ -1295,7 +1295,7 @@
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: f32x8) {
     *(mem_addr as *mut f32x8) = a;
@@ -1305,7 +1305,7 @@
 /// floating-point elements) from memory into result.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
 pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> f64x4 {
     let mut dst = _mm256_undefined_pd();
@@ -1321,7 +1321,7 @@
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
 pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: f64x4) {
     storeupd256(mem_addr, a);
@@ -1331,7 +1331,7 @@
 /// floating-point elements) from memory into result.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))]
 pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> f32x8 {
     let mut dst = _mm256_undefined_ps();
@@ -1347,7 +1347,7 @@
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))]
 pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: f32x8) {
     storeups256(mem_addr, a);
@@ -1357,7 +1357,7 @@
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
 pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
     *mem_addr
@@ -1367,7 +1367,7 @@
 /// `mem_addr` must be aligned on a 32-byte boundary or a
 /// general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
 pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
     *mem_addr = a;
@@ -1376,7 +1376,7 @@
 /// Load 256-bits of integer data from memory into result.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
 pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
     let mut dst = _mm256_undefined_si256();
@@ -1389,9 +1389,9 @@
 }
 
 /// Store 256-bits of integer data from `a` into memory.
-/// 	`mem_addr` does not need to be aligned on any particular boundary.
+///     `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
 pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
     storeudq256(mem_addr as *mut i8, i8x32::from(a));
@@ -1401,7 +1401,7 @@
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: i64x4) -> f64x4 {
     maskloadpd256(mem_addr as *const i8, mask)
@@ -1410,7 +1410,7 @@
 /// Store packed double-precision (64-bit) floating-point elements from `a`
 /// into memory using `mask`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: i64x4, a: f64x4) {
     maskstorepd256(mem_addr as *mut i8, mask, a);
@@ -1420,7 +1420,7 @@
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: i64x2) -> f64x2 {
     maskloadpd(mem_addr as *const i8, mask)
@@ -1429,7 +1429,7 @@
 /// Store packed double-precision (64-bit) floating-point elements from `a`
 /// into memory using `mask`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: i64x2, a: f64x2) {
     maskstorepd(mem_addr as *mut i8, mask, a);
@@ -1439,7 +1439,7 @@
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: i32x8) -> f32x8 {
     maskloadps256(mem_addr as *const i8, mask)
@@ -1448,7 +1448,7 @@
 /// Store packed single-precision (32-bit) floating-point elements from `a`
 /// into memory using `mask`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: i32x8, a: f32x8) {
     maskstoreps256(mem_addr as *mut i8, mask, a);
@@ -1458,7 +1458,7 @@
 /// into result using `mask` (elements are zeroed out when the high bit of the
 /// corresponding element is not set).
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: i32x4) -> f32x4 {
     maskloadps(mem_addr as *const i8, mask)
@@ -1467,7 +1467,7 @@
 /// Store packed single-precision (32-bit) floating-point elements from `a`
 /// into memory using `mask`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: i32x4, a: f32x4) {
     maskstoreps(mem_addr as *mut i8, mask, a);
@@ -1476,7 +1476,7 @@
 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements
 /// from `a`, and return the results.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
 pub unsafe fn _mm256_movehdup_ps(a: f32x8) -> f32x8 {
     simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
@@ -1485,7 +1485,7 @@
 /// Duplicate even-indexed single-precision (32-bit) floating-point elements
 /// from `a`, and return the results.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
 pub unsafe fn _mm256_moveldup_ps(a: f32x8) -> f32x8 {
     simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
@@ -1494,7 +1494,7 @@
 /// Duplicate even-indexed double-precision (64-bit) floating-point elements
 /// from "a", and return the results.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovddup))]
 pub unsafe fn _mm256_movedup_pd(a: f64x4) -> f64x4 {
     simd_shuffle4(a, a, [0, 0, 2, 2])
@@ -1504,7 +1504,7 @@
 /// This intrinsic may perform better than `_mm256_loadu_si256` when the
 /// data crosses a cache line boundary.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vlddqu))]
 pub unsafe fn _mm256_lddqu_si256(mem_addr: *const i8x32) -> i8x32 {
     vlddqu(mem_addr as *const i8)
@@ -1514,7 +1514,7 @@
 /// aligned memory location. To minimize caching, the data is flagged as
 /// non-temporal (unlikely to be used again soon)
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
 pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
     ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1524,7 +1524,7 @@
 /// to a 32-byte aligned memory location. To minimize caching, the data is
 /// flagged as non-temporal (unlikely to be used again soon).
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
 pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: f64x4) {
     ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1535,7 +1535,7 @@
 /// caching, the data is flagged as non-temporal (unlikely to be used again
 /// soon).
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))]
 pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: f32x8) {
     ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1545,7 +1545,7 @@
 /// floating-point elements in `a`, and return the results. The maximum
 /// relative error for this approximation is less than 1.5*2^-12.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vrcpps))]
 pub unsafe fn _mm256_rcp_ps(a: f32x8) -> f32x8 {
     vrcpps(a)
@@ -1555,7 +1555,7 @@
 /// (32-bit) floating-point elements in `a`, and return the results.
 /// The maximum relative error for this approximation is less than 1.5*2^-12.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vrsqrtps))]
 pub unsafe fn _mm256_rsqrt_ps(a: f32x8) -> f32x8 {
     vrsqrtps(a)
@@ -1564,7 +1564,7 @@
 /// Unpack and interleave double-precision (64-bit) floating-point elements
 /// from the high half of each 128-bit lane in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
 pub unsafe fn _mm256_unpackhi_pd(a: f64x4, b: f64x4) -> f64x4 {
     simd_shuffle4(a, b, [1, 5, 3, 7])
@@ -1573,7 +1573,7 @@
 /// Unpack and interleave single-precision (32-bit) floating-point elements
 /// from the high half of each 128-bit lane in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
 pub unsafe fn _mm256_unpackhi_ps(a: f32x8, b: f32x8) -> f32x8 {
     simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
@@ -1582,7 +1582,7 @@
 /// Unpack and interleave double-precision (64-bit) floating-point elements
 /// from the low half of each 128-bit lane in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
 pub unsafe fn _mm256_unpacklo_pd(a: f64x4, b: f64x4) -> f64x4 {
     simd_shuffle4(a, b, [0, 4, 2, 6])
@@ -1591,7 +1591,7 @@
 /// Unpack and interleave single-precision (32-bit) floating-point elements
 /// from the low half of each 128-bit lane in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
 pub unsafe fn _mm256_unpacklo_ps(a: f32x8, b: f32x8) -> f32x8 {
     simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
@@ -1602,7 +1602,7 @@
 /// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
 /// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vptest))]
 pub unsafe fn _mm256_testz_si256(a: i64x4, b: i64x4) -> i32 {
     ptestz256(a, b)
@@ -1613,7 +1613,7 @@
 /// Compute the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
 /// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vptest))]
 pub unsafe fn _mm256_testc_si256(a: i64x4, b: i64x4) -> i32 {
     ptestc256(a, b)
@@ -1625,7 +1625,7 @@
 /// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
 /// `CF` values are zero, otherwise return 0.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vptest))]
 pub unsafe fn _mm256_testnzc_si256(a: i64x4, b: i64x4) -> i32 {
     ptestnzc256(a, b)
@@ -1639,7 +1639,7 @@
 /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `ZF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 pub unsafe fn _mm256_testz_pd(a: f64x4, b: f64x4) -> i32 {
     vtestzpd256(a, b)
@@ -1653,7 +1653,7 @@
 /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `CF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 pub unsafe fn _mm256_testc_pd(a: f64x4, b: f64x4) -> i32 {
     vtestcpd256(a, b)
@@ -1668,7 +1668,7 @@
 /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
 /// are zero, otherwise return 0.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 pub unsafe fn _mm256_testnzc_pd(a: f64x4, b: f64x4) -> i32 {
     vtestnzcpd256(a, b)
@@ -1682,7 +1682,7 @@
 /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `ZF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 pub unsafe fn _mm_testz_pd(a: f64x2, b: f64x2) -> i32 {
     vtestzpd(a, b)
@@ -1696,7 +1696,7 @@
 /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `CF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 pub unsafe fn _mm_testc_pd(a: f64x2, b: f64x2) -> i32 {
     vtestcpd(a, b)
@@ -1711,7 +1711,7 @@
 /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
 /// are zero, otherwise return 0.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 pub unsafe fn _mm_testnzc_pd(a: f64x2, b: f64x2) -> i32 {
     vtestnzcpd(a, b)
@@ -1725,7 +1725,7 @@
 /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `ZF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 pub unsafe fn _mm256_testz_ps(a: f32x8, b: f32x8) -> i32 {
     vtestzps256(a, b)
@@ -1739,7 +1739,7 @@
 /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `CF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 pub unsafe fn _mm256_testc_ps(a: f32x8, b: f32x8) -> i32 {
     vtestcps256(a, b)
@@ -1754,7 +1754,7 @@
 /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
 /// are zero, otherwise return 0.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 pub unsafe fn _mm256_testnzc_ps(a: f32x8, b: f32x8) -> i32 {
     vtestnzcps256(a, b)
@@ -1768,7 +1768,7 @@
 /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `ZF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 pub unsafe fn _mm_testz_ps(a: f32x4, b: f32x4) -> i32 {
     vtestzps(a, b)
@@ -1782,7 +1782,7 @@
 /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
 /// is zero, otherwise set `CF` to 0. Return the `CF` value.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 pub unsafe fn _mm_testc_ps(a: f32x4, b: f32x4) -> i32 {
     vtestcps(a, b)
@@ -1797,7 +1797,7 @@
 /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
 /// are zero, otherwise return 0.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 pub unsafe fn _mm_testnzc_ps(a: f32x4, b: f32x4) -> i32 {
     vtestnzcps(a, b)
@@ -1807,7 +1807,7 @@
 /// corresponding packed double-precision (64-bit) floating-point element in
 /// `a`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovmskpd))]
 pub unsafe fn _mm256_movemask_pd(a: f64x4) -> i32 {
     movmskpd256(a)
@@ -1817,7 +1817,7 @@
 /// corresponding packed single-precision (32-bit) floating-point element in
 /// `a`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovmskps))]
 pub unsafe fn _mm256_movemask_ps(a: f32x8) -> i32 {
     movmskps256(a)
@@ -1825,7 +1825,7 @@
 
 /// Return vector of type __m256d with all elements set to zero.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
 pub unsafe fn _mm256_setzero_pd() -> f64x4 {
     f64x4::new(0., 0., 0., 0.)
@@ -1833,7 +1833,7 @@
 
 /// Return vector of type __m256 with all elements set to zero.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxorps))]
 pub unsafe fn _mm256_setzero_ps() -> f32x8 {
     f32x8::new(0., 0., 0., 0., 0., 0., 0., 0.)
@@ -1841,7 +1841,7 @@
 
 /// Return vector of type __m256i with all elements set to zero.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxor))]
 pub unsafe fn _mm256_setzero_si256() -> __m256i {
     mem::transmute(i64x4::new(0, 0, 0, 0))
@@ -1850,7 +1850,7 @@
 /// Set packed double-precision (64-bit) floating-point elements in returned
 /// vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
@@ -1860,7 +1860,7 @@
 /// Set packed single-precision (32-bit) floating-point elements in returned
 /// vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_set_ps(
     a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
@@ -1871,7 +1871,7 @@
 /// Set packed 8-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_set_epi8(
     e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
@@ -1890,7 +1890,7 @@
 
 /// Set packed 16-bit integers in returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_set_epi16(
     e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
@@ -1908,7 +1908,7 @@
 
 /// Set packed 32-bit integers in returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_set_epi32(
     e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
@@ -1918,7 +1918,7 @@
 
 /// Set packed 64-bit integers in returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 {
@@ -1928,7 +1928,7 @@
 /// Set packed double-precision (64-bit) floating-point elements in returned
 /// vector with the supplied values in reverse order.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> f64x4 {
     f64x4::new(a, b, c, d)
@@ -1937,7 +1937,7 @@
 /// Set packed single-precision (32-bit) floating-point elements in returned
 /// vector with the supplied values in reverse order.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_setr_ps(
     a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
@@ -1948,7 +1948,7 @@
 /// Set packed 8-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_setr_epi8(
     e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8,
@@ -1968,7 +1968,7 @@
 /// Set packed 16-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_setr_epi16(
     e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16,
@@ -1987,7 +1987,7 @@
 /// Set packed 32-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_setr_epi32(
     e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
@@ -1998,7 +1998,7 @@
 /// Set packed 64-bit integers in returned vector with the supplied values in
 /// reverse order.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> i64x4 {
@@ -2008,7 +2008,7 @@
 /// Broadcast double-precision (64-bit) floating-point value `a` to all
 /// elements of returned vector.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_set1_pd(a: f64) -> f64x4 {
     f64x4::new(a, a, a, a)
@@ -2017,7 +2017,7 @@
 /// Broadcast single-precision (32-bit) floating-point value `a` to all
 /// elements of returned vector.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_set1_ps(a: f32) -> f32x8 {
     f32x8::new(a, a, a, a, a, a, a, a)
@@ -2026,7 +2026,7 @@
 /// Broadcast 8-bit integer `a` to all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastb`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vpshufb))]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 // This intrinsic has no corresponding instruction.
@@ -2043,7 +2043,7 @@
 /// Broadcast 16-bit integer `a` to all all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastw`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 //#[cfg_attr(test, assert_instr(vpshufb))]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 // This intrinsic has no corresponding instruction.
@@ -2054,7 +2054,7 @@
 /// Broadcast 32-bit integer `a` to all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastd`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_set1_epi32(a: i32) -> i32x8 {
     i32x8::new(a, a, a, a, a, a, a, a)
@@ -2063,7 +2063,7 @@
 /// Broadcast 64-bit integer `a` to all elements of returned vector.
 /// This intrinsic may generate the `vpbroadcastq`.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 //#[cfg_attr(test, assert_instr(vmovddup))]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 // This intrinsic has no corresponding instruction.
@@ -2073,7 +2073,7 @@
 
 /// Cast vector of type __m256d to type __m256.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 {
@@ -2082,7 +2082,7 @@
 
 /// Cast vector of type __m256 to type __m256d.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 {
@@ -2091,7 +2091,7 @@
 
 /// Casts vector of type __m256 to type __m256i.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castps_si256(a: f32x8) -> __m256i {
@@ -2100,7 +2100,7 @@
 
 /// Casts vector of type __m256i to type __m256.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> f32x8 {
@@ -2109,7 +2109,7 @@
 
 /// Casts vector of type __m256d to type __m256i.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castpd_si256(a: f64x4) -> __m256i {
@@ -2118,7 +2118,7 @@
 
 /// Casts vector of type __m256i to type __m256d.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> f64x4 {
@@ -2127,7 +2127,7 @@
 
 /// Casts vector of type __m256 to type __m128.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> f32x4 {
@@ -2136,7 +2136,7 @@
 
 /// Casts vector of type __m256d to type __m128d.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> f64x2 {
@@ -2145,7 +2145,7 @@
 
 /// Casts vector of type __m256i to type __m128i.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
@@ -2157,7 +2157,7 @@
 /// Casts vector of type __m128 to type __m256;
 /// the upper 128 bits of the result are undefined.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castps128_ps256(a: f32x4) -> f32x8 {
@@ -2168,7 +2168,7 @@
 /// Casts vector of type __m128d to type __m256d;
 /// the upper 128 bits of the result are undefined.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castpd128_pd256(a: f64x2) -> f64x4 {
@@ -2179,7 +2179,7 @@
 /// Casts vector of type __m128i to type __m256i;
 /// the upper 128 bits of the result are undefined.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
@@ -2193,7 +2193,7 @@
 /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
 /// the value of the source vector. The upper 128 bits are set to zero.
 #[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
@@ -2205,7 +2205,7 @@
 /// The lower 128 bits contain the value of the source vector. The upper
 /// 128 bits are set to zero.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
@@ -2220,7 +2220,7 @@
 /// contain the value of the source vector. The upper 128 bits are set
 /// to zero.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
@@ -2230,7 +2230,7 @@
 
 /// Return vector of type `f32x8` with undefined elements.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_undefined_ps() -> f32x8 {
     f32x8::splat(mem::uninitialized())
@@ -2238,7 +2238,7 @@
 
 /// Return vector of type `f64x4` with undefined elements.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_undefined_pd() -> f64x4 {
     f64x4::splat(mem::uninitialized())
@@ -2246,7 +2246,7 @@
 
 /// Return vector of type __m256i with undefined elements.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_undefined_si256() -> __m256i {
     mem::transmute(i64x4::splat(mem::uninitialized()))
@@ -2254,7 +2254,7 @@
 
 /// Set packed __m256 returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 {
     simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
@@ -2262,7 +2262,7 @@
 
 /// Set packed __m256d returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 {
     let hi: f32x4 = mem::transmute(hi);
@@ -2272,7 +2272,7 @@
 
 /// Set packed __m256i returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
     let hi: f32x4 = mem::transmute(hi);
@@ -2282,7 +2282,7 @@
 
 /// Set packed __m256 returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_setr_m128(lo: f32x4, hi: f32x4) -> f32x8 {
     _mm256_set_m128(hi, lo)
@@ -2290,7 +2290,7 @@
 
 /// Set packed __m256d returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_setr_m128d(lo: f64x2, hi: f64x2) -> f64x4 {
     _mm256_set_m128d(hi, lo)
@@ -2298,7 +2298,7 @@
 
 /// Set packed __m256i returned vector with the supplied values.
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
     _mm256_set_m128i(hi, lo)
@@ -2309,7 +2309,7 @@
 /// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_loadu2_m128(
     hiaddr: *const f32, loaddr: *const f32
@@ -2324,7 +2324,7 @@
 /// value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_loadu2_m128d(
     hiaddr: *const f64, loaddr: *const f64
@@ -2338,7 +2338,7 @@
 /// them into a 256-bit value.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_loadu2_m128i(
     hiaddr: *const __m128i, loaddr: *const __m128i
@@ -2353,7 +2353,7 @@
 /// different 128-bit locations.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx,+sse"]
+#[target_feature(enable = "avx,sse")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_storeu2_m128(
     hiaddr: *mut f32, loaddr: *mut f32, a: f32x8
@@ -2370,7 +2370,7 @@
 /// different 128-bit locations.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_storeu2_m128d(
     hiaddr: *mut f64, loaddr: *mut f64, a: f64x4
@@ -2386,7 +2386,7 @@
 /// `a` into memory two different 128-bit locations.
 /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+avx,+sse2"]
+#[target_feature(enable = "avx,sse2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_storeu2_m128i(
     hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
@@ -2400,7 +2400,7 @@
 
 /// Returns the first element of the input vector of [8 x float].
 #[inline(always)]
-#[target_feature = "+avx"]
+#[target_feature(enable = "avx")]
 //#[cfg_attr(test, assert_instr(movss))] FIXME
 pub unsafe fn _mm256_cvtss_f32(a: f32x8) -> f32 {
     a.extract(0)
diff --git a/coresimd/src/x86/i586/avx2.rs b/coresimd/src/x86/i586/avx2.rs
index 6aa780a..d824519 100644
--- a/coresimd/src/x86/i586/avx2.rs
+++ b/coresimd/src/x86/i586/avx2.rs
@@ -30,7 +30,7 @@
 
 /// Computes the absolute values of packed 32-bit integers in `a`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpabsd))]
 pub unsafe fn _mm256_abs_epi32(a: i32x8) -> u32x8 {
     pabsd(a)
@@ -38,7 +38,7 @@
 
 /// Computes the absolute values of packed 16-bit integers in `a`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpabsw))]
 pub unsafe fn _mm256_abs_epi16(a: i16x16) -> u16x16 {
     pabsw(a)
@@ -46,7 +46,7 @@
 
 /// Computes the absolute values of packed 8-bit integers in `a`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpabsb))]
 pub unsafe fn _mm256_abs_epi8(a: i8x32) -> u8x32 {
     pabsb(a)
@@ -54,7 +54,7 @@
 
 /// Add packed 64-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddq))]
 pub unsafe fn _mm256_add_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a + b
@@ -62,7 +62,7 @@
 
 /// Add packed 32-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddd))]
 pub unsafe fn _mm256_add_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a + b
@@ -70,7 +70,7 @@
 
 /// Add packed 16-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddw))]
 pub unsafe fn _mm256_add_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a + b
@@ -78,7 +78,7 @@
 
 /// Add packed 8-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddb))]
 pub unsafe fn _mm256_add_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a + b
@@ -86,7 +86,7 @@
 
 /// Add packed 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
 pub unsafe fn _mm256_adds_epi8(a: i8x32, b: i8x32) -> i8x32 {
     paddsb(a, b)
@@ -94,7 +94,7 @@
 
 /// Add packed 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
 pub unsafe fn _mm256_adds_epi16(a: i16x16, b: i16x16) -> i16x16 {
     paddsw(a, b)
@@ -102,7 +102,7 @@
 
 /// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
 pub unsafe fn _mm256_adds_epu8(a: u8x32, b: u8x32) -> u8x32 {
     paddusb(a, b)
@@ -110,7 +110,7 @@
 
 /// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
 pub unsafe fn _mm256_adds_epu16(a: u16x16, b: u16x16) -> u16x16 {
     paddusw(a, b)
@@ -119,7 +119,7 @@
 /// Concatenate pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
 /// result, shift the result right by `n` bytes, and return the low 16 bytes.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpalignr, n = 15))]
 pub unsafe fn _mm256_alignr_epi8(a: i8x32, b: i8x32, n: i32) -> i8x32 {
     let n = n as u32;
@@ -182,7 +182,7 @@
 /// Compute the bitwise AND of 256 bits (representing integer data)
 /// in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vandps))]
 pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
     __m256i::from(i8x32::from(a) & i8x32::from(b))
@@ -191,7 +191,7 @@
 /// Compute the bitwise NOT of 256 bits (representing integer data)
 /// in `a` and then AND with `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vandnps))]
 pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
     __m256i::from((!i8x32::from(a)) & i8x32::from(b))
@@ -199,7 +199,7 @@
 
 /// Average packed unsigned 16-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpavgw))]
 pub unsafe fn _mm256_avg_epu16(a: u16x16, b: u16x16) -> u16x16 {
     pavgw(a, b)
@@ -207,7 +207,7 @@
 
 /// Average packed unsigned 8-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpavgb))]
 pub unsafe fn _mm256_avg_epu8(a: u8x32, b: u8x32) -> u8x32 {
     pavgb(a, b)
@@ -215,7 +215,7 @@
 
 /// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
 pub unsafe fn _mm_blend_epi32(a: i32x4, b: i32x4, imm8: i32) -> i32x4 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -244,7 +244,7 @@
 
 /// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
 pub unsafe fn _mm256_blend_epi32(a: i32x8, b: i32x8, imm8: i32) -> i32x8 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -293,7 +293,7 @@
 
 /// Blend packed 16-bit integers from `a` and `b` using control mask `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
 pub unsafe fn _mm256_blend_epi16(a: i16x16, b: i16x16, imm8: i32) -> i16x16 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -344,7 +344,7 @@
 
 /// Blend packed 8-bit integers from `a` and `b` using `mask`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendvb))]
 pub unsafe fn _mm256_blendv_epi8(a: i8x32, b: i8x32, mask: __m256i) -> i8x32 {
     pblendvb(a, b, i8x32::from(mask))
@@ -353,7 +353,7 @@
 /// Broadcast the low packed 8-bit integer from `a` to all elements of
 /// the 128-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
 pub unsafe fn _mm_broadcastb_epi8(a: i8x16) -> i8x16 {
     simd_shuffle16(a, i8x16::splat(0_i8), [0_u32; 16])
@@ -362,7 +362,7 @@
 /// Broadcast the low packed 8-bit integer from `a` to all elements of
 /// the 256-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
 pub unsafe fn _mm256_broadcastb_epi8(a: i8x16) -> i8x32 {
     simd_shuffle32(a, i8x16::splat(0_i8), [0_u32; 32])
@@ -373,7 +373,7 @@
 /// Broadcast the low packed 32-bit integer from `a` to all elements of
 /// the 128-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 pub unsafe fn _mm_broadcastd_epi32(a: i32x4) -> i32x4 {
     simd_shuffle4(a, i32x4::splat(0_i32), [0_u32; 4])
@@ -384,7 +384,7 @@
 /// Broadcast the low packed 32-bit integer from `a` to all elements of
 /// the 256-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 pub unsafe fn _mm256_broadcastd_epi32(a: i32x4) -> i32x8 {
     simd_shuffle8(a, i32x4::splat(0_i32), [0_u32; 8])
@@ -393,7 +393,7 @@
 /// Broadcast the low packed 64-bit integer from `a` to all elements of
 /// the 128-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastq))]
 pub unsafe fn _mm_broadcastq_epi64(a: i64x2) -> i64x2 {
     simd_shuffle2(a, i64x2::splat(0_i64), [0_u32; 2])
@@ -404,7 +404,7 @@
 /// Broadcast the low packed 64-bit integer from `a` to all elements of
 /// the 256-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 pub unsafe fn _mm256_broadcastq_epi64(a: i64x2) -> i64x4 {
     simd_shuffle4(a, i64x2::splat(0_i64), [0_u32; 4])
@@ -413,7 +413,7 @@
 /// Broadcast the low double-precision (64-bit) floating-point element
 /// from `a` to all elements of the 128-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vmovddup))]
 pub unsafe fn _mm_broadcastsd_pd(a: f64x2) -> f64x2 {
     simd_shuffle2(a, f64x2::splat(0_f64), [0_u32; 2])
@@ -422,7 +422,7 @@
 /// Broadcast the low double-precision (64-bit) floating-point element
 /// from `a` to all elements of the 256-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 pub unsafe fn _mm256_broadcastsd_pd(a: f64x2) -> f64x4 {
     simd_shuffle4(a, f64x2::splat(0_f64), [0_u32; 4])
@@ -433,7 +433,7 @@
 /// Broadcast 128 bits of integer data from a to all 128-bit lanes in
 /// the 256-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 pub unsafe fn _mm256_broadcastsi128_si256(a: i64x2) -> i64x4 {
     simd_shuffle4(a, i64x2::splat(0_i64), [0, 1, 0, 1])
 }
@@ -441,7 +441,7 @@
 /// Broadcast the low single-precision (32-bit) floating-point element
 /// from `a` to all elements of the 128-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 pub unsafe fn _mm_broadcastss_ps(a: f32x4) -> f32x4 {
     simd_shuffle4(a, f32x4::splat(0_f32), [0_u32; 4])
@@ -450,7 +450,7 @@
 /// Broadcast the low single-precision (32-bit) floating-point element
 /// from `a` to all elements of the 256-bit returned value.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 pub unsafe fn _mm256_broadcastss_ps(a: f32x4) -> f32x8 {
     simd_shuffle8(a, f32x4::splat(0_f32), [0_u32; 8])
@@ -459,7 +459,7 @@
 /// Broadcast the low packed 16-bit integer from a to all elements of
 /// the 128-bit returned value
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
 pub unsafe fn _mm_broadcastw_epi16(a: i16x8) -> i16x8 {
     simd_shuffle8(a, i16x8::splat(0_i16), [0_u32; 8])
@@ -468,7 +468,7 @@
 /// Broadcast the low packed 16-bit integer from a to all elements of
 /// the 256-bit returned value
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
 pub unsafe fn _mm256_broadcastw_epi16(a: i16x8) -> i16x16 {
     simd_shuffle16(a, i16x8::splat(0_i16), [0_u32; 16])
@@ -476,7 +476,7 @@
 
 /// Compare packed 64-bit integers in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqq))]
 pub unsafe fn _mm256_cmpeq_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a.eq(b)
@@ -484,7 +484,7 @@
 
 /// Compare packed 32-bit integers in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqd))]
 pub unsafe fn _mm256_cmpeq_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a.eq(b)
@@ -492,7 +492,7 @@
 
 /// Compare packed 16-bit integers in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqw))]
 pub unsafe fn _mm256_cmpeq_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a.eq(b)
@@ -500,7 +500,7 @@
 
 /// Compare packed 8-bit integers in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqb))]
 pub unsafe fn _mm256_cmpeq_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a.eq(b)
@@ -508,7 +508,7 @@
 
 /// Compare packed 64-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtq))]
 pub unsafe fn _mm256_cmpgt_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a.gt(b)
@@ -516,7 +516,7 @@
 
 /// Compare packed 32-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtd))]
 pub unsafe fn _mm256_cmpgt_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a.gt(b)
@@ -524,7 +524,7 @@
 
 /// Compare packed 16-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtw))]
 pub unsafe fn _mm256_cmpgt_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a.gt(b)
@@ -532,7 +532,7 @@
 
 /// Compare packed 8-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtb))]
 pub unsafe fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a.gt(b)
@@ -540,7 +540,7 @@
 
 /// Sign-extend 16-bit integers to 32-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
 pub unsafe fn _mm256_cvtepi16_epi32(a: i16x8) -> i32x8 {
     simd_cast(a)
@@ -548,7 +548,7 @@
 
 /// Sign-extend 16-bit integers to 64-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
 pub unsafe fn _mm256_cvtepi16_epi64(a: i16x8) -> i64x4 {
     simd_cast::<::v64::i16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -556,7 +556,7 @@
 
 /// Sign-extend 32-bit integers to 64-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
 pub unsafe fn _mm256_cvtepi32_epi64(a: i32x4) -> i64x4 {
     simd_cast(a)
@@ -564,7 +564,7 @@
 
 /// Sign-extend 8-bit integers to 16-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
 pub unsafe fn _mm256_cvtepi8_epi16(a: i8x16) -> i16x16 {
     simd_cast(a)
@@ -572,7 +572,7 @@
 
 /// Sign-extend 8-bit integers to 32-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
 pub unsafe fn _mm256_cvtepi8_epi32(a: i8x16) -> i32x8 {
     simd_cast::<::v64::i8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
@@ -580,7 +580,7 @@
 
 /// Sign-extend 8-bit integers to 64-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
 pub unsafe fn _mm256_cvtepi8_epi64(a: i8x16) -> i64x4 {
     simd_cast::<::v32::i8x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -589,7 +589,7 @@
 /// Zero-extend the lower four unsigned 16-bit integers in `a` to 32-bit
 /// integers. The upper four elements of `a` are unused.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
 pub unsafe fn _mm256_cvtepu16_epi32(a: u16x8) -> i32x8 {
     simd_cast(a)
@@ -598,7 +598,7 @@
 /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
 /// integers. The upper four elements of `a` are unused.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
 pub unsafe fn _mm256_cvtepu16_epi64(a: u16x8) -> i64x4 {
     simd_cast::<::v64::u16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -606,7 +606,7 @@
 
 /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
 pub unsafe fn _mm256_cvtepu32_epi64(a: u32x4) -> i64x4 {
     simd_cast(a)
@@ -614,7 +614,7 @@
 
 /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
 pub unsafe fn _mm256_cvtepu8_epi16(a: u8x16) -> i16x16 {
     simd_cast(a)
@@ -623,7 +623,7 @@
 /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
 /// integers. The upper eight elements of `a` are unused.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
 pub unsafe fn _mm256_cvtepu8_epi32(a: u8x16) -> i32x8 {
     simd_cast::<::v64::u8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
@@ -632,7 +632,7 @@
 /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
 /// integers. The upper twelve elements of `a` are unused.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
 pub unsafe fn _mm256_cvtepu8_epi64(a: u8x16) -> i64x4 {
     simd_cast::<::v32::u8x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
@@ -640,7 +640,7 @@
 
 /// Extract 128 bits (of integer data) from `a` selected with `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vextractf128, imm8 = 1))]
 pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i {
     use x86::i586::avx::_mm256_undefined_si256;
@@ -655,7 +655,7 @@
 
 /// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphaddw))]
 pub unsafe fn _mm256_hadd_epi16(a: i16x16, b: i16x16) -> i16x16 {
     phaddw(a, b)
@@ -663,7 +663,7 @@
 
 /// Horizontally add adjacent pairs of 32-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphaddd))]
 pub unsafe fn _mm256_hadd_epi32(a: i32x8, b: i32x8) -> i32x8 {
     phaddd(a, b)
@@ -672,7 +672,7 @@
 /// Horizontally add adjacent pairs of 16-bit integers in `a` and `b`
 /// using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphaddsw))]
 pub unsafe fn _mm256_hadds_epi16(a: i16x16, b: i16x16) -> i16x16 {
     phaddsw(a, b)
@@ -680,7 +680,7 @@
 
 /// Horizontally substract adjacent pairs of 16-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphsubw))]
 pub unsafe fn _mm256_hsub_epi16(a: i16x16, b: i16x16) -> i16x16 {
     phsubw(a, b)
@@ -688,7 +688,7 @@
 
 /// Horizontally substract adjacent pairs of 32-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphsubd))]
 pub unsafe fn _mm256_hsub_epi32(a: i32x8, b: i32x8) -> i32x8 {
     phsubd(a, b)
@@ -697,7 +697,7 @@
 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
 /// using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphsubsw))]
 pub unsafe fn _mm256_hsubs_epi16(a: i16x16, b: i16x16) -> i16x16 {
     phsubsw(a, b)
@@ -707,7 +707,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
 pub unsafe fn _mm_i32gather_epi32(
     slice: *const i32, offsets: i32x4, scale: i32
@@ -723,7 +723,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
 pub unsafe fn _mm_mask_i32gather_epi32(
     src: i32x4, slice: *const i32, offsets: i32x4, mask: i32x4, scale: i32
@@ -738,7 +738,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
 pub unsafe fn _mm256_i32gather_epi32(
     slice: *const i32, offsets: i32x8, scale: i32
@@ -754,7 +754,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
 pub unsafe fn _mm256_mask_i32gather_epi32(
     src: i32x8, slice: *const i32, offsets: i32x8, mask: i32x8, scale: i32
@@ -769,7 +769,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
 pub unsafe fn _mm_i32gather_ps(
     slice: *const f32, offsets: i32x4, scale: i32
@@ -785,7 +785,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
 pub unsafe fn _mm_mask_i32gather_ps(
     src: f32x4, slice: *const f32, offsets: i32x4, mask: f32x4, scale: i32
@@ -800,7 +800,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
 pub unsafe fn _mm256_i32gather_ps(
     slice: *const f32, offsets: i32x8, scale: i32
@@ -816,7 +816,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
 pub unsafe fn _mm256_mask_i32gather_ps(
     src: f32x8, slice: *const f32, offsets: i32x8, mask: f32x8, scale: i32
@@ -831,7 +831,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
 pub unsafe fn _mm_i32gather_epi64(
     slice: *const i64, offsets: i32x4, scale: i32
@@ -847,7 +847,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
 pub unsafe fn _mm_mask_i32gather_epi64(
     src: i64x2, slice: *const i64, offsets: i32x4, mask: i64x2, scale: i32
@@ -862,7 +862,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
 pub unsafe fn _mm256_i32gather_epi64(
     slice: *const i64, offsets: i32x4, scale: i32
@@ -878,7 +878,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
 pub unsafe fn _mm256_mask_i32gather_epi64(
     src: i64x4, slice: *const i64, offsets: i32x4, mask: i64x4, scale: i32
@@ -893,7 +893,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
 pub unsafe fn _mm_i32gather_pd(
     slice: *const f64, offsets: i32x4, scale: i32
@@ -909,7 +909,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
 pub unsafe fn _mm_mask_i32gather_pd(
     src: f64x2, slice: *const f64, offsets: i32x4, mask: f64x2, scale: i32
@@ -924,7 +924,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
 pub unsafe fn _mm256_i32gather_pd(
     slice: *const f64, offsets: i32x4, scale: i32
@@ -940,7 +940,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
 pub unsafe fn _mm256_mask_i32gather_pd(
     src: f64x4, slice: *const f64, offsets: i32x4, mask: f64x4, scale: i32
@@ -955,7 +955,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
 pub unsafe fn _mm_i64gather_epi32(
     slice: *const i32, offsets: i64x2, scale: i32
@@ -971,7 +971,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
 pub unsafe fn _mm_mask_i64gather_epi32(
     src: i32x4, slice: *const i32, offsets: i64x2, mask: i32x4, scale: i32
@@ -986,7 +986,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
 pub unsafe fn _mm256_i64gather_epi32(
     slice: *const i32, offsets: i64x4, scale: i32
@@ -1002,7 +1002,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
 pub unsafe fn _mm256_mask_i64gather_epi32(
     src: i32x4, slice: *const i32, offsets: i64x4, mask: i32x4, scale: i32
@@ -1017,7 +1017,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
 pub unsafe fn _mm_i64gather_ps(
     slice: *const f32, offsets: i64x2, scale: i32
@@ -1033,7 +1033,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
 pub unsafe fn _mm_mask_i64gather_ps(
     src: f32x4, slice: *const f32, offsets: i64x2, mask: f32x4, scale: i32
@@ -1048,7 +1048,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
 pub unsafe fn _mm256_i64gather_ps(
     slice: *const f32, offsets: i64x4, scale: i32
@@ -1064,7 +1064,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
 pub unsafe fn _mm256_mask_i64gather_ps(
     src: f32x4, slice: *const f32, offsets: i64x4, mask: f32x4, scale: i32
@@ -1079,7 +1079,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
 pub unsafe fn _mm_i64gather_epi64(
     slice: *const i64, offsets: i64x2, scale: i32
@@ -1095,7 +1095,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
 pub unsafe fn _mm_mask_i64gather_epi64(
     src: i64x2, slice: *const i64, offsets: i64x2, mask: i64x2, scale: i32
@@ -1110,7 +1110,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
 pub unsafe fn _mm256_i64gather_epi64(
     slice: *const i64, offsets: i64x4, scale: i32
@@ -1126,7 +1126,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
 pub unsafe fn _mm256_mask_i64gather_epi64(
     src: i64x4, slice: *const i64, offsets: i64x4, mask: i64x4, scale: i32
@@ -1141,7 +1141,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
 pub unsafe fn _mm_i64gather_pd(
     slice: *const f64, offsets: i64x2, scale: i32
@@ -1157,7 +1157,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
 pub unsafe fn _mm_mask_i64gather_pd(
     src: f64x2, slice: *const f64, offsets: i64x2, mask: f64x2, scale: i32
@@ -1172,7 +1172,7 @@
 /// where
 /// `scale` is between 1 and 8.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
 pub unsafe fn _mm256_i64gather_pd(
     slice: *const f64, offsets: i64x4, scale: i32
@@ -1188,7 +1188,7 @@
 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
 /// that position instead.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
 pub unsafe fn _mm256_mask_i64gather_pd(
     src: f64x4, slice: *const f64, offsets: i64x4, mask: f64x4, scale: i32
@@ -1202,7 +1202,7 @@
 /// Copy `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
 /// location specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
 pub unsafe fn _mm256_inserti128_si256(
     a: __m256i, b: __m128i, imm8: i32
@@ -1221,7 +1221,7 @@
 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs
 /// of intermediate 32-bit integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaddwd))]
 pub unsafe fn _mm256_madd_epi16(a: i16x16, b: i16x16) -> i32x8 {
     pmaddwd(a, b)
@@ -1232,7 +1232,7 @@
 /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
 /// signed 16-bit integers
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaddubsw))]
 pub unsafe fn _mm256_maddubs_epi16(a: u8x32, b: u8x32) -> i16x16 {
     pmaddubsw(a, b)
@@ -1242,7 +1242,7 @@
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: i32x4) -> i32x4 {
     maskloadd(mem_addr as *const i8, mask)
@@ -1252,7 +1252,7 @@
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 pub unsafe fn _mm256_maskload_epi32(
     mem_addr: *const i32, mask: i32x8
@@ -1264,7 +1264,7 @@
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: i64x2) -> i64x2 {
     maskloadq(mem_addr as *const i8, mask)
@@ -1274,7 +1274,7 @@
 /// (elements are zeroed out when the highest bit is not set in the
 /// corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 pub unsafe fn _mm256_maskload_epi64(
     mem_addr: *const i64, mask: i64x4
@@ -1286,7 +1286,7 @@
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: i32x4, a: i32x4) {
     maskstored(mem_addr as *mut i8, mask, a)
@@ -1296,7 +1296,7 @@
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 pub unsafe fn _mm256_maskstore_epi32(
     mem_addr: *mut i32, mask: i32x8, a: i32x8
@@ -1308,7 +1308,7 @@
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: i64x2, a: i64x2) {
     maskstoreq(mem_addr as *mut i8, mask, a)
@@ -1318,7 +1318,7 @@
 /// using `mask` (elements are not stored when the highest bit is not set
 /// in the corresponding element).
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 pub unsafe fn _mm256_maskstore_epi64(
     mem_addr: *mut i64, mask: i64x4, a: i64x4
@@ -1329,7 +1329,7 @@
 /// Compare packed 16-bit integers in `a` and `b`, and return the packed
 /// maximum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
 pub unsafe fn _mm256_max_epi16(a: i16x16, b: i16x16) -> i16x16 {
     pmaxsw(a, b)
@@ -1338,7 +1338,7 @@
 /// Compare packed 32-bit integers in `a` and `b`, and return the packed
 /// maximum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
 pub unsafe fn _mm256_max_epi32(a: i32x8, b: i32x8) -> i32x8 {
     pmaxsd(a, b)
@@ -1347,7 +1347,7 @@
 /// Compare packed 8-bit integers in `a` and `b`, and return the packed
 /// maximum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
 pub unsafe fn _mm256_max_epi8(a: i8x32, b: i8x32) -> i8x32 {
     pmaxsb(a, b)
@@ -1356,7 +1356,7 @@
 /// Compare packed unsigned 16-bit integers in `a` and `b`, and return
 /// the packed maximum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
 pub unsafe fn _mm256_max_epu16(a: u16x16, b: u16x16) -> u16x16 {
     pmaxuw(a, b)
@@ -1365,7 +1365,7 @@
 /// Compare packed unsigned 32-bit integers in `a` and `b`, and return
 /// the packed maximum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
 pub unsafe fn _mm256_max_epu32(a: u32x8, b: u32x8) -> u32x8 {
     pmaxud(a, b)
@@ -1374,7 +1374,7 @@
 /// Compare packed unsigned 8-bit integers in `a` and `b`, and return
 /// the packed maximum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
 pub unsafe fn _mm256_max_epu8(a: u8x32, b: u8x32) -> u8x32 {
     pmaxub(a, b)
@@ -1383,7 +1383,7 @@
 /// Compare packed 16-bit integers in `a` and `b`, and return the packed
 /// minimum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminsw))]
 pub unsafe fn _mm256_min_epi16(a: i16x16, b: i16x16) -> i16x16 {
     pminsw(a, b)
@@ -1392,7 +1392,7 @@
 /// Compare packed 32-bit integers in `a` and `b`, and return the packed
 /// minimum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminsd))]
 pub unsafe fn _mm256_min_epi32(a: i32x8, b: i32x8) -> i32x8 {
     pminsd(a, b)
@@ -1401,7 +1401,7 @@
 /// Compare packed 8-bit integers in `a` and `b`, and return the packed
 /// minimum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminsb))]
 pub unsafe fn _mm256_min_epi8(a: i8x32, b: i8x32) -> i8x32 {
     pminsb(a, b)
@@ -1410,7 +1410,7 @@
 /// Compare packed unsigned 16-bit integers in `a` and `b`, and return
 /// the packed minimum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminuw))]
 pub unsafe fn _mm256_min_epu16(a: u16x16, b: u16x16) -> u16x16 {
     pminuw(a, b)
@@ -1419,7 +1419,7 @@
 /// Compare packed unsigned 32-bit integers in `a` and `b`, and return
 /// the packed minimum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminud))]
 pub unsafe fn _mm256_min_epu32(a: u32x8, b: u32x8) -> u32x8 {
     pminud(a, b)
@@ -1428,7 +1428,7 @@
 /// Compare packed unsigned 8-bit integers in `a` and `b`, and return
 /// the packed minimum values.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminub))]
 pub unsafe fn _mm256_min_epu8(a: u8x32, b: u8x32) -> u8x32 {
     pminub(a, b)
@@ -1437,7 +1437,7 @@
 /// Create mask from the most significant bit of each 8-bit element in `a`,
 /// return the result.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovmskb))]
 pub unsafe fn _mm256_movemask_epi8(a: i8x32) -> i32 {
     pmovmskb(a)
@@ -1451,7 +1451,7 @@
 /// quadruplets are formed from sequential 8-bit integers selected from `a`
 /// starting at the offset specified in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))]
 pub unsafe fn _mm256_mpsadbw_epu8(a: u8x32, b: u8x32, imm8: i32) -> u16x16 {
     macro_rules! call {
@@ -1465,7 +1465,7 @@
 ///
 /// Return the 64-bit results.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
 pub unsafe fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 {
     pmuldq(a, b)
@@ -1476,7 +1476,7 @@
 ///
 /// Return the unsigned 64-bit results.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
 pub unsafe fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 {
     pmuludq(a, b)
@@ -1486,7 +1486,7 @@
 /// intermediate 32-bit integers and returning the high 16 bits of the
 /// intermediate integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
 pub unsafe fn _mm256_mulhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
     pmulhw(a, b)
@@ -1496,7 +1496,7 @@
 /// intermediate 32-bit integers and returning the high 16 bits of the
 /// intermediate integers.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
 pub unsafe fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
     pmulhuw(a, b)
@@ -1506,7 +1506,7 @@
 /// intermediate 32-bit integers, and return the low 16 bits of the
 /// intermediate integers
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmullw))]
 pub unsafe fn _mm256_mullo_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a * b
@@ -1516,7 +1516,7 @@
 /// intermediate 64-bit integers, and return the low 16 bits of the
 /// intermediate integers
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmulld))]
 pub unsafe fn _mm256_mullo_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a * b
@@ -1527,7 +1527,7 @@
 /// integer to the 18 most significant bits, round by adding 1, and
 /// return bits [16:1]
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmulhrsw))]
 pub unsafe fn _mm256_mulhrs_epi16(a: i16x16, b: i16x16) -> i16x16 {
     pmulhrsw(a, b)
@@ -1536,7 +1536,7 @@
 /// Compute the bitwise OR of 256 bits (representing integer data) in `a`
 /// and `b`
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vorps))]
 pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
     __m256i::from(i8x32::from(a) | i8x32::from(b))
@@ -1545,7 +1545,7 @@
 /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
 pub unsafe fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 {
     packsswb(a, b)
@@ -1554,7 +1554,7 @@
 /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
 pub unsafe fn _mm256_packs_epi32(a: i32x8, b: i32x8) -> i16x16 {
     packssdw(a, b)
@@ -1563,7 +1563,7 @@
 /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
 pub unsafe fn _mm256_packus_epi16(a: i16x16, b: i16x16) -> u8x32 {
     packuswb(a, b)
@@ -1572,7 +1572,7 @@
 /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
 pub unsafe fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
     packusdw(a, b)
@@ -1583,7 +1583,7 @@
 /// The last 3 bits of each integer of `b` are used as addresses into the 8
 /// integers of `a`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpermd))]
 pub unsafe fn _mm256_permutevar8x32_epi32(a: u32x8, b: u32x8) -> u32x8 {
     permd(a, b)
@@ -1591,7 +1591,7 @@
 
 /// Permutes 64-bit integers from `a` using control mask `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpermq, imm8 = 9))]
 pub unsafe fn _mm256_permute4x64_epi64(a: i64x4, imm8: i32) -> i64x4 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -1640,7 +1640,7 @@
 
 /// Shuffle 128-bits of integer data selected by `imm8` from `a` and `b`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))]
 pub unsafe fn _mm256_permute2x128_si256(
     a: __m256i, b: __m256i, imm8: i32
@@ -1656,7 +1656,7 @@
 /// Shuffle 64-bit floating-point elements in `a` across lanes using the
 /// control in `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
 pub unsafe fn _mm256_permute4x64_pd(a: f64x4, imm8: i32) -> f64x4 {
     use x86::i586::avx::_mm256_undefined_pd;
@@ -1707,7 +1707,7 @@
 /// Shuffle eight 32-bit foating-point elements in `a` across lanes using
 /// the corresponding 32-bit integer index in `idx`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpermps))]
 pub unsafe fn _mm256_permutevar8x32_ps(a: f32x8, idx: i32x8) -> f32x8 {
     permps(a, idx)
@@ -1718,7 +1718,7 @@
 /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
 /// integers in the low 16 bits of the 64-bit return value
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsadbw))]
 pub unsafe fn _mm256_sad_epu8(a: u8x32, b: u8x32) -> u64x4 {
     psadbw(a, b)
@@ -1754,7 +1754,7 @@
 /// }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpshufb))]
 pub unsafe fn _mm256_shuffle_epi8(a: u8x32, b: u8x32) -> u8x32 {
     pshufb(a, b)
@@ -1771,8 +1771,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i32x8;
 /// use stdsimd::vendor::_mm256_shuffle_epi32;
 ///
@@ -1793,12 +1793,12 @@
 /// assert_eq!(c1, expected1);
 /// assert_eq!(c2, expected2);
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
 pub unsafe fn _mm256_shuffle_epi32(a: i32x8, imm8: i32) -> i32x8 {
     // simd_shuffleX requires that its selector parameter be made up of
@@ -1857,7 +1857,7 @@
 /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
 /// to the output.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))]
 pub unsafe fn _mm256_shufflehi_epi16(a: i16x16, imm8: i32) -> i16x16 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -1912,7 +1912,7 @@
 /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
 /// to the output.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
 pub unsafe fn _mm256_shufflelo_epi16(a: i16x16, imm8: i32) -> i16x16 {
     let imm8 = (imm8 & 0xFF) as u8;
@@ -1967,7 +1967,7 @@
 /// 16-bit integer in `b` is negative, and return the results.
 /// Results are zeroed out when the corresponding element in `b` is zero.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsignw))]
 pub unsafe fn _mm256_sign_epi16(a: i16x16, b: i16x16) -> i16x16 {
     psignw(a, b)
@@ -1977,7 +1977,7 @@
 /// 32-bit integer in `b` is negative, and return the results.
 /// Results are zeroed out when the corresponding element in `b` is zero.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsignd))]
 pub unsafe fn _mm256_sign_epi32(a: i32x8, b: i32x8) -> i32x8 {
     psignd(a, b)
@@ -1987,7 +1987,7 @@
 /// 8-bit integer in `b` is negative, and return the results.
 /// Results are zeroed out when the corresponding element in `b` is zero.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsignb))]
 pub unsafe fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 {
     psignb(a, b)
@@ -1996,7 +1996,7 @@
 /// Shift packed 16-bit integers in `a` left by `count` while
 /// shifting in zeros, and return the result
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllw))]
 pub unsafe fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 {
     psllw(a, count)
@@ -2005,7 +2005,7 @@
 /// Shift packed 32-bit integers in `a` left by `count` while
 /// shifting in zeros, and return the result
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpslld))]
 pub unsafe fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 {
     pslld(a, count)
@@ -2014,7 +2014,7 @@
 /// Shift packed 64-bit integers in `a` left by `count` while
 /// shifting in zeros, and return the result
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllq))]
 pub unsafe fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 {
     psllq(a, count)
@@ -2023,7 +2023,7 @@
 /// Shift packed 16-bit integers in `a` left by `imm8` while
 /// shifting in zeros, return the results;
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllw))]
 pub unsafe fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
     pslliw(a, imm8)
@@ -2032,7 +2032,7 @@
 /// Shift packed 32-bit integers in `a` left by `imm8` while
 /// shifting in zeros, return the results;
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpslld))]
 pub unsafe fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
     psllid(a, imm8)
@@ -2041,7 +2041,7 @@
 /// Shift packed 64-bit integers in `a` left by `imm8` while
 /// shifting in zeros, return the results;
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllq))]
 pub unsafe fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
     pslliq(a, imm8)
@@ -2049,7 +2049,7 @@
 
 /// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
 pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i {
     macro_rules! call {
@@ -2062,7 +2062,7 @@
 
 /// Shift 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
 pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i {
     _mm256_slli_si256(a, imm8)
@@ -2072,7 +2072,7 @@
 /// specified by the corresponding element in `count` while
 /// shifting in zeros, and return the result.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
 pub unsafe fn _mm_sllv_epi32(a: i32x4, count: i32x4) -> i32x4 {
     psllvd(a, count)
@@ -2082,7 +2082,7 @@
 /// specified by the corresponding element in `count` while
 /// shifting in zeros, and return the result.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
 pub unsafe fn _mm256_sllv_epi32(a: i32x8, count: i32x8) -> i32x8 {
     psllvd256(a, count)
@@ -2092,7 +2092,7 @@
 /// specified by the corresponding element in `count` while
 /// shifting in zeros, and return the result.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
 pub unsafe fn _mm_sllv_epi64(a: i64x2, count: i64x2) -> i64x2 {
     psllvq(a, count)
@@ -2102,7 +2102,7 @@
 /// specified by the corresponding element in `count` while
 /// shifting in zeros, and return the result.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
 pub unsafe fn _mm256_sllv_epi64(a: i64x4, count: i64x4) -> i64x4 {
     psllvq256(a, count)
@@ -2111,7 +2111,7 @@
 /// Shift packed 16-bit integers in `a` right by `count` while
 /// shifting in sign bits.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsraw))]
 pub unsafe fn _mm256_sra_epi16(a: i16x16, count: i16x8) -> i16x16 {
     psraw(a, count)
@@ -2120,7 +2120,7 @@
 /// Shift packed 32-bit integers in `a` right by `count` while
 /// shifting in sign bits.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrad))]
 pub unsafe fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 {
     psrad(a, count)
@@ -2129,7 +2129,7 @@
 /// Shift packed 16-bit integers in `a` right by `imm8` while
 /// shifting in sign bits.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsraw))]
 pub unsafe fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 {
     psraiw(a, imm8)
@@ -2138,7 +2138,7 @@
 /// Shift packed 32-bit integers in `a` right by `imm8` while
 /// shifting in sign bits.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrad))]
 pub unsafe fn _mm256_srai_epi32(a: i32x8, imm8: i32) -> i32x8 {
     psraid(a, imm8)
@@ -2147,7 +2147,7 @@
 /// Shift packed 32-bit integers in `a` right by the amount specified by the
 /// corresponding element in `count` while shifting in sign bits.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsravd))]
 pub unsafe fn _mm_srav_epi32(a: i32x4, count: i32x4) -> i32x4 {
     psravd(a, count)
@@ -2156,7 +2156,7 @@
 /// Shift packed 32-bit integers in `a` right by the amount specified by the
 /// corresponding element in `count` while shifting in sign bits.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsravd))]
 pub unsafe fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
     psravd256(a, count)
@@ -2164,7 +2164,7 @@
 
 /// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
 pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i {
     macro_rules! call {
@@ -2177,7 +2177,7 @@
 
 /// Shift 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
 pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i {
     _mm256_srli_si256(a, imm8)
@@ -2186,7 +2186,7 @@
 /// Shift packed 16-bit integers in `a` right by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
 pub unsafe fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
     psrlw(a, count)
@@ -2195,7 +2195,7 @@
 /// Shift packed 32-bit integers in `a` right by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrld))]
 pub unsafe fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
     psrld(a, count)
@@ -2204,7 +2204,7 @@
 /// Shift packed 64-bit integers in `a` right by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
 pub unsafe fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 {
     psrlq(a, count)
@@ -2213,7 +2213,7 @@
 /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
 /// zeros
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
 pub unsafe fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 {
     psrliw(a, imm8)
@@ -2222,7 +2222,7 @@
 /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
 /// zeros
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrld))]
 pub unsafe fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 {
     psrlid(a, imm8)
@@ -2231,7 +2231,7 @@
 /// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
 /// zeros
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
 pub unsafe fn _mm256_srli_epi64(a: i64x4, imm8: i32) -> i64x4 {
     psrliq(a, imm8)
@@ -2240,7 +2240,7 @@
 /// Shift packed 32-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
 pub unsafe fn _mm_srlv_epi32(a: i32x4, count: i32x4) -> i32x4 {
     psrlvd(a, count)
@@ -2249,7 +2249,7 @@
 /// Shift packed 32-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
 pub unsafe fn _mm256_srlv_epi32(a: i32x8, count: i32x8) -> i32x8 {
     psrlvd256(a, count)
@@ -2258,7 +2258,7 @@
 /// Shift packed 64-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
 pub unsafe fn _mm_srlv_epi64(a: i64x2, count: i64x2) -> i64x2 {
     psrlvq(a, count)
@@ -2267,7 +2267,7 @@
 /// Shift packed 64-bit integers in `a` right by the amount specified by
 /// the corresponding element in `count` while shifting in zeros,
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
 pub unsafe fn _mm256_srlv_epi64(a: i64x4, count: i64x4) -> i64x4 {
     psrlvq256(a, count)
@@ -2277,7 +2277,7 @@
 
 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubw))]
 pub unsafe fn _mm256_sub_epi16(a: i16x16, b: i16x16) -> i16x16 {
     a - b
@@ -2285,7 +2285,7 @@
 
 /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubd))]
 pub unsafe fn _mm256_sub_epi32(a: i32x8, b: i32x8) -> i32x8 {
     a - b
@@ -2293,7 +2293,7 @@
 
 /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubq))]
 pub unsafe fn _mm256_sub_epi64(a: i64x4, b: i64x4) -> i64x4 {
     a - b
@@ -2301,7 +2301,7 @@
 
 /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a`
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubb))]
 pub unsafe fn _mm256_sub_epi8(a: i8x32, b: i8x32) -> i8x32 {
     a - b
@@ -2310,7 +2310,7 @@
 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
 /// `a` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
 pub unsafe fn _mm256_subs_epi16(a: i16x16, b: i16x16) -> i16x16 {
     psubsw(a, b)
@@ -2319,7 +2319,7 @@
 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
 /// `a` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
 pub unsafe fn _mm256_subs_epi8(a: i8x32, b: i8x32) -> i8x32 {
     psubsb(a, b)
@@ -2328,7 +2328,7 @@
 /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
 /// integers in `a` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
 pub unsafe fn _mm256_subs_epu16(a: u16x16, b: u16x16) -> u16x16 {
     psubusw(a, b)
@@ -2337,7 +2337,7 @@
 /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
 /// integers in `a` using saturation.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
 pub unsafe fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
     psubusb(a, b)
@@ -2354,8 +2354,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i8x32;
 /// use stdsimd::vendor::_mm256_unpackhi_epi8;
 ///
@@ -2375,12 +2375,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
 pub unsafe fn _mm256_unpackhi_epi8(a: i8x32, b: i8x32) -> i8x32 {
     #[cfg_attr(rustfmt, rustfmt_skip)]
@@ -2403,8 +2403,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i8x32;
 /// use stdsimd::vendor::_mm256_unpacklo_epi8;
 ///
@@ -2423,12 +2423,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
 pub unsafe fn _mm256_unpacklo_epi8(a: i8x32, b: i8x32) -> i8x32 {
     #[cfg_attr(rustfmt, rustfmt_skip)]
@@ -2451,8 +2451,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i16x16;
 /// use stdsimd::vendor::_mm256_unpackhi_epi16;
 ///
@@ -2469,12 +2469,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
 pub unsafe fn _mm256_unpackhi_epi16(a: i16x16, b: i16x16) -> i16x16 {
     simd_shuffle16(
@@ -2495,8 +2495,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i16x16;
 /// use stdsimd::vendor::_mm256_unpacklo_epi16;
 ///
@@ -2513,12 +2513,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
 pub unsafe fn _mm256_unpacklo_epi16(a: i16x16, b: i16x16) -> i16x16 {
     simd_shuffle16(
@@ -2539,8 +2539,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i32x8;
 /// use stdsimd::vendor::_mm256_unpackhi_epi32;
 ///
@@ -2556,12 +2556,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
 pub unsafe fn _mm256_unpackhi_epi32(a: i32x8, b: i32x8) -> i32x8 {
     simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
@@ -2578,8 +2578,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i32x8;
 /// use stdsimd::vendor::_mm256_unpacklo_epi32;
 ///
@@ -2595,12 +2595,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
 pub unsafe fn _mm256_unpacklo_epi32(a: i32x8, b: i32x8) -> i32x8 {
     simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
@@ -2617,8 +2617,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i64x4;
 /// use stdsimd::vendor::_mm256_unpackhi_epi64;
 ///
@@ -2634,12 +2634,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
 pub unsafe fn _mm256_unpackhi_epi64(a: i64x4, b: i64x4) -> i64x4 {
     simd_shuffle4(a, b, [1, 5, 3, 7])
@@ -2656,8 +2656,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("avx2") {
-/// #         #[target_feature = "+avx2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "avx2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::i64x4;
 /// use stdsimd::vendor::_mm256_unpacklo_epi64;
 ///
@@ -2673,12 +2673,12 @@
 /// assert_eq!(c, expected);
 ///
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
 pub unsafe fn _mm256_unpacklo_epi64(a: i64x4, b: i64x4) -> i64x4 {
     simd_shuffle4(a, b, [0, 4, 2, 6])
@@ -2687,7 +2687,7 @@
 /// Compute the bitwise XOR of 256 bits (representing integer data)
 /// in `a` and `b`
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vxorps))]
 pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
     __m256i::from(i8x32::from(a) ^ i8x32::from(b))
@@ -2698,7 +2698,7 @@
 ///
 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_extract_epi8(a: i8x32, imm8: i32) -> i8 {
     let imm8 = (imm8 & 31) as u32;
@@ -2710,7 +2710,7 @@
 ///
 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_extract_epi16(a: i16x16, imm8: i32) -> i16 {
     let imm8 = (imm8 & 15) as u32;
@@ -2719,7 +2719,7 @@
 
 /// Extract a 32-bit integer from `a`, selected with `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_extract_epi32(a: i32x8, imm8: i32) -> i32 {
     let imm8 = (imm8 & 7) as u32;
@@ -2728,7 +2728,7 @@
 
 /// Extract a 64-bit integer from `a`, selected with `imm8`.
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 // This intrinsic has no corresponding instruction.
 pub unsafe fn _mm256_extract_epi64(a: i64x4, imm8: i32) -> i64 {
     let imm8 = (imm8 & 3) as u32;
@@ -2737,7 +2737,7 @@
 
 /// Returns the first element of the input vector of [4 x double].
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 //#[cfg_attr(test, assert_instr(movsd))] FIXME
 pub unsafe fn _mm256_cvtsd_f64(a: f64x4) -> f64 {
     a.extract(0)
@@ -2745,7 +2745,7 @@
 
 /// Returns the first element of the input vector of [8 x i32].
 #[inline(always)]
-#[target_feature = "+avx2"]
+#[target_feature(enable = "avx2")]
 //#[cfg_attr(test, assert_instr(movd))] FIXME
 pub unsafe fn _mm256_cvtsi256_si32(a: i32x8) -> i32 {
     a.extract(0)
diff --git a/coresimd/src/x86/i586/bmi.rs b/coresimd/src/x86/i586/bmi.rs
index f51a6d2..d713135 100644
--- a/coresimd/src/x86/i586/bmi.rs
+++ b/coresimd/src/x86/i586/bmi.rs
@@ -15,7 +15,7 @@
 /// Extracts bits in range [`start`, `start` + `length`) from `a` into
 /// the least significant bits of the result.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(bextr))]
 pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
     _bextr2_u32(a, (start & 0xff_u32) | ((len & 0xff_u32) << 8_u32))
@@ -24,7 +24,7 @@
 /// Extracts bits in range [`start`, `start` + `length`) from `a` into
 /// the least significant bits of the result.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(bextr))]
 #[cfg(not(target_arch = "x86"))]
 pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
@@ -37,7 +37,7 @@
 /// Bits [7,0] of `control` specify the index to the first bit in the range to
 /// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(bextr))]
 pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 {
     x86_bmi_bextr_32(a, control)
@@ -49,7 +49,7 @@
 /// Bits [7,0] of `control` specify the index to the first bit in the range to
 /// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(bextr))]
 #[cfg(not(target_arch = "x86"))]
 pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 {
@@ -58,7 +58,7 @@
 
 /// Bitwise logical `AND` of inverted `a` with `b`.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(andn))]
 pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 {
     !a & b
@@ -66,7 +66,7 @@
 
 /// Bitwise logical `AND` of inverted `a` with `b`.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(andn))]
 pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 {
     !a & b
@@ -74,7 +74,7 @@
 
 /// Extract lowest set isolated bit.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(blsi))]
 pub unsafe fn _blsi_u32(x: u32) -> u32 {
     x & x.wrapping_neg()
@@ -82,7 +82,7 @@
 
 /// Extract lowest set isolated bit.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(blsi))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blsi_u64(x: u64) -> u64 {
@@ -91,7 +91,7 @@
 
 /// Get mask up to lowest set bit.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(blsmsk))]
 pub unsafe fn _blsmsk_u32(x: u32) -> u32 {
     x ^ (x.wrapping_sub(1_u32))
@@ -99,7 +99,7 @@
 
 /// Get mask up to lowest set bit.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(blsmsk))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
@@ -110,7 +110,7 @@
 ///
 /// If `x` is sets CF.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(blsr))]
 pub unsafe fn _blsr_u32(x: u32) -> u32 {
     x & (x.wrapping_sub(1))
@@ -120,7 +120,7 @@
 ///
 /// If `x` is sets CF.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(blsr))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blsr_u64(x: u64) -> u64 {
@@ -131,7 +131,7 @@
 ///
 /// When the source operand is 0, it returns its size in bits.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
     x.trailing_zeros()
@@ -141,7 +141,7 @@
 ///
 /// When the source operand is 0, it returns its size in bits.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
     x.trailing_zeros() as u64
@@ -151,7 +151,7 @@
 ///
 /// When the source operand is 0, it returns its size in bits.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 {
     x.trailing_zeros() as i32
@@ -161,7 +161,7 @@
 ///
 /// When the source operand is 0, it returns its size in bits.
 #[inline(always)]
-#[target_feature = "+bmi"]
+#[target_feature(enable = "bmi")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 {
     x.trailing_zeros() as i64
diff --git a/coresimd/src/x86/i586/bmi2.rs b/coresimd/src/x86/i586/bmi2.rs
index 3eebc41..88f161a 100644
--- a/coresimd/src/x86/i586/bmi2.rs
+++ b/coresimd/src/x86/i586/bmi2.rs
@@ -21,7 +21,7 @@
 // LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))]
 #[cfg_attr(all(test, target_arch = "x86"), assert_instr(mulx))]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
     let result: u64 = (a as u64) * (b as u64);
     *hi = (result >> 32) as u32;
@@ -34,7 +34,7 @@
 /// the low half and the high half of the result.
 #[inline(always)]
 #[cfg_attr(test, assert_instr(mulx))]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 #[cfg(not(target_arch = "x86"))] // calls an intrinsic
 pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
     let result: u128 = (a as u128) * (b as u128);
@@ -44,7 +44,7 @@
 
 /// Zero higher bits of `a` >= `index`.
 #[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 #[cfg_attr(test, assert_instr(bzhi))]
 pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 {
     x86_bmi2_bzhi_32(a, index)
@@ -52,7 +52,7 @@
 
 /// Zero higher bits of `a` >= `index`.
 #[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 #[cfg_attr(test, assert_instr(bzhi))]
 #[cfg(not(target_arch = "x86"))]
 pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 {
@@ -62,7 +62,7 @@
 /// Scatter contiguous low order bits of `a` to the result at the positions
 /// specified by the `mask`.
 #[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 #[cfg_attr(test, assert_instr(pdep))]
 pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 {
     x86_bmi2_pdep_32(a, mask)
@@ -71,7 +71,7 @@
 /// Scatter contiguous low order bits of `a` to the result at the positions
 /// specified by the `mask`.
 #[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 #[cfg_attr(test, assert_instr(pdep))]
 #[cfg(not(target_arch = "x86"))]
 pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 {
@@ -81,7 +81,7 @@
 /// Gathers the bits of `x` specified by the `mask` into the contiguous low
 /// order bit positions of the result.
 #[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 #[cfg_attr(test, assert_instr(pext))]
 pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 {
     x86_bmi2_pext_32(a, mask)
@@ -90,7 +90,7 @@
 /// Gathers the bits of `x` specified by the `mask` into the contiguous low
 /// order bit positions of the result.
 #[inline(always)]
-#[target_feature = "+bmi2"]
+#[target_feature(enable = "bmi2")]
 #[cfg_attr(test, assert_instr(pext))]
 #[cfg(not(target_arch = "x86"))]
 pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
diff --git a/coresimd/src/x86/i586/sse.rs b/coresimd/src/x86/i586/sse.rs
index d332570..ebd1d04 100644
--- a/coresimd/src/x86/i586/sse.rs
+++ b/coresimd/src/x86/i586/sse.rs
@@ -13,7 +13,7 @@
 /// Adds the first component of `a` and `b`, the other components are copied
 /// from `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(addss))]
 pub unsafe fn _mm_add_ss(a: f32x4, b: f32x4) -> f32x4 {
     addss(a, b)
@@ -21,7 +21,7 @@
 
 /// Adds f32x4 vectors.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(addps))]
 pub unsafe fn _mm_add_ps(a: f32x4, b: f32x4) -> f32x4 {
     a + b
@@ -30,7 +30,7 @@
 /// Subtracts the first component of `b` from `a`, the other components are
 /// copied from `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(subss))]
 pub unsafe fn _mm_sub_ss(a: f32x4, b: f32x4) -> f32x4 {
     subss(a, b)
@@ -38,7 +38,7 @@
 
 /// Subtracts f32x4 vectors.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(subps))]
 pub unsafe fn _mm_sub_ps(a: f32x4, b: f32x4) -> f32x4 {
     a - b
@@ -47,7 +47,7 @@
 /// Multiplies the first component of `a` and `b`, the other components are
 /// copied from `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(mulss))]
 pub unsafe fn _mm_mul_ss(a: f32x4, b: f32x4) -> f32x4 {
     mulss(a, b)
@@ -55,7 +55,7 @@
 
 /// Multiplies f32x4 vectors.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(mulps))]
 pub unsafe fn _mm_mul_ps(a: f32x4, b: f32x4) -> f32x4 {
     a * b
@@ -64,7 +64,7 @@
 /// Divides the first component of `b` by `a`, the other components are
 /// copied from `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(divss))]
 pub unsafe fn _mm_div_ss(a: f32x4, b: f32x4) -> f32x4 {
     divss(a, b)
@@ -72,7 +72,7 @@
 
 /// Divides f32x4 vectors.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(divps))]
 pub unsafe fn _mm_div_ps(a: f32x4, b: f32x4) -> f32x4 {
     a / b
@@ -81,7 +81,7 @@
 /// Return the square root of the first single-precision (32-bit)
 /// floating-point element in `a`, the other elements are unchanged.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(sqrtss))]
 pub unsafe fn _mm_sqrt_ss(a: f32x4) -> f32x4 {
     sqrtss(a)
@@ -90,7 +90,7 @@
 /// Return the square root of packed single-precision (32-bit) floating-point
 /// elements in `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(sqrtps))]
 pub unsafe fn _mm_sqrt_ps(a: f32x4) -> f32x4 {
     sqrtps(a)
@@ -99,7 +99,7 @@
 /// Return the approximate reciprocal of the first single-precision
 /// (32-bit) floating-point element in `a`, the other elements are unchanged.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(rcpss))]
 pub unsafe fn _mm_rcp_ss(a: f32x4) -> f32x4 {
     rcpss(a)
@@ -108,7 +108,7 @@
 /// Return the approximate reciprocal of packed single-precision (32-bit)
 /// floating-point elements in `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(rcpps))]
 pub unsafe fn _mm_rcp_ps(a: f32x4) -> f32x4 {
     rcpps(a)
@@ -117,7 +117,7 @@
 /// Return the approximate reciprocal square root of the fist single-precision
 /// (32-bit) floating-point elements in `a`, the other elements are unchanged.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(rsqrtss))]
 pub unsafe fn _mm_rsqrt_ss(a: f32x4) -> f32x4 {
     rsqrtss(a)
@@ -126,7 +126,7 @@
 /// Return the approximate reciprocal square root of packed single-precision
 /// (32-bit) floating-point elements in `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(rsqrtps))]
 pub unsafe fn _mm_rsqrt_ps(a: f32x4) -> f32x4 {
     rsqrtps(a)
@@ -136,7 +136,7 @@
 /// and `b`, and return the minimum value in the first element of the return
 /// value, the other elements are copied from `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(minss))]
 pub unsafe fn _mm_min_ss(a: f32x4, b: f32x4) -> f32x4 {
     minss(a, b)
@@ -145,7 +145,7 @@
 /// Compare packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`, and return the corresponding minimum values.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(minps))]
 pub unsafe fn _mm_min_ps(a: f32x4, b: f32x4) -> f32x4 {
     minps(a, b)
@@ -155,7 +155,7 @@
 /// and `b`, and return the maximum value in the first element of the return
 /// value, the other elements are copied from `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(maxss))]
 pub unsafe fn _mm_max_ss(a: f32x4, b: f32x4) -> f32x4 {
     maxss(a, b)
@@ -164,7 +164,7 @@
 /// Compare packed single-precision (32-bit) floating-point elements in `a` and
 /// `b`, and return the corresponding maximum values.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(maxps))]
 pub unsafe fn _mm_max_ps(a: f32x4, b: f32x4) -> f32x4 {
     maxps(a, b)
@@ -172,7 +172,7 @@
 
 /// Bitwise AND of packed single-precision (32-bit) floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // i586 only seems to generate plain `and` instructions, so ignore it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
            assert_instr(andps))]
@@ -187,7 +187,7 @@
 ///
 /// Computes `!a & b` for each bit in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // i586 only seems to generate plain `not` and `and` instructions, so ignore
 // it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
@@ -200,7 +200,7 @@
 
 /// Bitwise OR of packed single-precision (32-bit) floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // i586 only seems to generate plain `or` instructions, so we ignore it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
            assert_instr(orps))]
@@ -213,7 +213,7 @@
 /// Bitwise exclusive OR of packed single-precision (32-bit) floating-point
 /// elements.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // i586 only seems to generate plain `xor` instructions, so we ignore it.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
            assert_instr(xorps))]
@@ -227,7 +227,7 @@
 /// the result will be `0xffffffff` if the two inputs are equal, or `0`
 /// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpeqss))]
 pub unsafe fn _mm_cmpeq_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 0)
@@ -238,7 +238,7 @@
 /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
 /// upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpltss))]
 pub unsafe fn _mm_cmplt_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 1)
@@ -249,7 +249,7 @@
 /// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpless))]
 pub unsafe fn _mm_cmple_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 2)
@@ -260,7 +260,7 @@
 /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpltss))]
 pub unsafe fn _mm_cmpgt_ss(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3])
@@ -271,7 +271,7 @@
 /// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits
 /// of the result are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpless))]
 pub unsafe fn _mm_cmpge_ss(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3])
@@ -282,7 +282,7 @@
 /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
 /// upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpneqss))]
 pub unsafe fn _mm_cmpneq_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 4)
@@ -293,7 +293,7 @@
 /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the
 /// upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnltss))]
 pub unsafe fn _mm_cmpnlt_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 5)
@@ -304,7 +304,7 @@
 /// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits
 /// of the result are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnless))]
 pub unsafe fn _mm_cmpnle_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 6)
@@ -315,7 +315,7 @@
 /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are
 /// the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnltss))]
 pub unsafe fn _mm_cmpngt_ss(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3])
@@ -326,7 +326,7 @@
 /// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96
 /// bits of the result are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnless))]
 pub unsafe fn _mm_cmpnge_ss(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3])
@@ -337,7 +337,7 @@
 /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpordss))]
 pub unsafe fn _mm_cmpord_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 7)
@@ -348,7 +348,7 @@
 /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result
 /// are the upper 96 bits of `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpunordss))]
 pub unsafe fn _mm_cmpunord_ss(a: f32x4, b: f32x4) -> f32x4 {
     cmpss(a, b, 3)
@@ -358,7 +358,7 @@
 /// The result in the output vector will be `0xffffffff` if the input elements
 /// were equal, or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpeqps))]
 pub unsafe fn _mm_cmpeq_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(a, b, 0)
@@ -368,7 +368,7 @@
 /// The result in the output vector will be `0xffffffff` if the input element
 /// in `a` is less than the corresponding element in `b`, or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpltps))]
 pub unsafe fn _mm_cmplt_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(a, b, 1)
@@ -379,7 +379,7 @@
 /// in `a` is less than or equal to the corresponding element in `b`, or `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpleps))]
 pub unsafe fn _mm_cmple_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(a, b, 2)
@@ -389,7 +389,7 @@
 /// The result in the output vector will be `0xffffffff` if the input element
 /// in `a` is greater than the corresponding element in `b`, or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpltps))]
 pub unsafe fn _mm_cmpgt_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(b, a, 1)
@@ -400,7 +400,7 @@
 /// in `a` is greater than or equal to the corresponding element in `b`, or `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpleps))]
 pub unsafe fn _mm_cmpge_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(b, a, 2)
@@ -410,7 +410,7 @@
 /// The result in the output vector will be `0xffffffff` if the input elements
 /// are *not* equal, or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpneqps))]
 pub unsafe fn _mm_cmpneq_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(a, b, 4)
@@ -421,7 +421,7 @@
 /// in `a` is *not* less than the corresponding element in `b`, or `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnltps))]
 pub unsafe fn _mm_cmpnlt_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(a, b, 5)
@@ -432,7 +432,7 @@
 /// in `a` is *not* less than or equal to the corresponding element in `b`, or
 /// `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnleps))]
 pub unsafe fn _mm_cmpnle_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(a, b, 6)
@@ -443,7 +443,7 @@
 /// in `a` is *not* greater than the corresponding element in `b`, or `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnltps))]
 pub unsafe fn _mm_cmpngt_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(b, a, 5)
@@ -454,7 +454,7 @@
 /// in `a` is *not* greater than or equal to the corresponding element in `b`,
 /// or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpnleps))]
 pub unsafe fn _mm_cmpnge_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(b, a, 6)
@@ -465,7 +465,7 @@
 /// in the output vector will be `0xffffffff` if the input elements in `a` and
 /// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpordps))]
 pub unsafe fn _mm_cmpord_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(b, a, 7)
@@ -476,7 +476,7 @@
 /// in the output vector will be `0xffffffff` if the input elements in `a` and
 /// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cmpunordps))]
 pub unsafe fn _mm_cmpunord_ps(a: f32x4, b: f32x4) -> f32x4 {
     cmpps(b, a, 3)
@@ -485,7 +485,7 @@
 /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if they are equal, or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(comiss))]
 pub unsafe fn _mm_comieq_ss(a: f32x4, b: f32x4) -> i32 {
     comieq_ss(a, b)
@@ -494,7 +494,7 @@
 /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(comiss))]
 pub unsafe fn _mm_comilt_ss(a: f32x4, b: f32x4) -> i32 {
     comilt_ss(a, b)
@@ -504,7 +504,7 @@
 /// `1` if the value from `a` is less than or equal to the one from `b`, or `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(comiss))]
 pub unsafe fn _mm_comile_ss(a: f32x4, b: f32x4) -> i32 {
     comile_ss(a, b)
@@ -514,7 +514,7 @@
 /// `1` if the value from `a` is greater than the one from `b`, or `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(comiss))]
 pub unsafe fn _mm_comigt_ss(a: f32x4, b: f32x4) -> i32 {
     comigt_ss(a, b)
@@ -524,7 +524,7 @@
 /// `1` if the value from `a` is greater than or equal to the one from `b`, or
 /// `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(comiss))]
 pub unsafe fn _mm_comige_ss(a: f32x4, b: f32x4) -> i32 {
     comige_ss(a, b)
@@ -533,7 +533,7 @@
 /// Compare two 32-bit floats from the low-order bits of `a` and `b`. Returns
 /// `1` if they are *not* equal, or `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(comiss))]
 pub unsafe fn _mm_comineq_ss(a: f32x4, b: f32x4) -> i32 {
     comineq_ss(a, b)
@@ -543,7 +543,7 @@
 /// `1` if they are equal, or `0` otherwise. This instruction will not signal
 /// an exception if either argument is a quiet NaN.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(ucomiss))]
 pub unsafe fn _mm_ucomieq_ss(a: f32x4, b: f32x4) -> i32 {
     ucomieq_ss(a, b)
@@ -554,7 +554,7 @@
 /// This instruction will not signal an exception if either argument is a quiet
 /// NaN.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(ucomiss))]
 pub unsafe fn _mm_ucomilt_ss(a: f32x4, b: f32x4) -> i32 {
     ucomilt_ss(a, b)
@@ -565,7 +565,7 @@
 /// otherwise. This instruction will not signal an exception if either argument
 /// is a quiet NaN.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(ucomiss))]
 pub unsafe fn _mm_ucomile_ss(a: f32x4, b: f32x4) -> i32 {
     ucomile_ss(a, b)
@@ -576,7 +576,7 @@
 /// otherwise. This instruction will not signal an exception if either argument
 /// is a quiet NaN.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(ucomiss))]
 pub unsafe fn _mm_ucomigt_ss(a: f32x4, b: f32x4) -> i32 {
     ucomigt_ss(a, b)
@@ -587,7 +587,7 @@
 /// `0` otherwise. This instruction will not signal an exception if either
 /// argument is a quiet NaN.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(ucomiss))]
 pub unsafe fn _mm_ucomige_ss(a: f32x4, b: f32x4) -> i32 {
     ucomige_ss(a, b)
@@ -597,7 +597,7 @@
 /// `1` if they are *not* equal, or `0` otherwise. This instruction will not
 /// signal an exception if either argument is a quiet NaN.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(ucomiss))]
 pub unsafe fn _mm_ucomineq_ss(a: f32x4, b: f32x4) -> i32 {
     ucomineq_ss(a, b)
@@ -612,7 +612,7 @@
 ///
 /// This corresponds to the `CVTSS2SI` instruction (with 32 bit output).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtss2si))]
 pub unsafe fn _mm_cvtss_si32(a: f32x4) -> i32 {
     cvtss2si(a)
@@ -620,7 +620,7 @@
 
 /// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtss2si))]
 pub unsafe fn _mm_cvt_ss2si(a: f32x4) -> i32 {
     _mm_cvtss_si32(a)
@@ -637,7 +637,7 @@
 ///
 /// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvttss2si))]
 pub unsafe fn _mm_cvttss_si32(a: f32x4) -> i32 {
     cvttss2si(a)
@@ -645,7 +645,7 @@
 
 /// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvttss2si))]
 pub unsafe fn _mm_cvtt_ss2si(a: f32x4) -> i32 {
     _mm_cvttss_si32(a)
@@ -653,7 +653,7 @@
 
 /// Extract the lowest 32 bit float from the input vector.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // No point in using assert_instrs. In Unix x86_64 calling convention this is a
 // no-op, and on Windows it's just a `mov`.
 pub unsafe fn _mm_cvtss_f32(a: f32x4) -> f32 {
@@ -666,7 +666,7 @@
 /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit
 /// input).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtsi2ss))]
 pub unsafe fn _mm_cvtsi32_ss(a: f32x4, b: i32) -> f32x4 {
     cvtsi2ss(a, b)
@@ -674,7 +674,7 @@
 
 /// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtsi2ss))]
 pub unsafe fn _mm_cvt_si2ss(a: f32x4, b: i32) -> f32x4 {
     _mm_cvtsi32_ss(a, b)
@@ -683,7 +683,7 @@
 /// Construct a `f32x4` with the lowest element set to `a` and the rest set to
 /// zero.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 pub unsafe fn _mm_set_ss(a: f32) -> f32x4 {
     f32x4::new(a, 0.0, 0.0, 0.0)
@@ -691,7 +691,7 @@
 
 /// Construct a `f32x4` with all element set to `a`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(shufps))]
 pub unsafe fn _mm_set1_ps(a: f32) -> f32x4 {
     f32x4::new(a, a, a, a)
@@ -699,7 +699,7 @@
 
 /// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(shufps))]
 pub unsafe fn _mm_set_ps1(a: f32) -> f32x4 {
     _mm_set1_ps(a)
@@ -723,7 +723,7 @@
 /// assert_eq!(f32x4::new(a, b, c, d), _mm_set_ps(d, c, b, a));
 /// ```
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(unpcklps))]
 pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> f32x4 {
     f32x4::new(d, c, b, a)
@@ -738,7 +738,7 @@
 /// assert_eq!(f32x4::new(a, b, c, d), _mm_setr_ps(a, b, c, d));
 /// ```
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(unpcklps))]
 // On a 32-bit architecture it just copies the operands from the stack.
 #[cfg_attr(all(test, target_arch = "x86"), assert_instr(movaps))]
@@ -748,7 +748,7 @@
 
 /// Construct a `f32x4` with all elements initialized to zero.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(xorps))]
 pub unsafe fn _mm_setzero_ps() -> f32x4 {
     f32x4::new(0.0, 0.0, 0.0, 0.0)
@@ -760,7 +760,7 @@
 /// The lower half of result takes values from `a` and the higher half from
 /// `b`. Mask is split to 2 control bits each to index the element from inputs.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(shufps, mask = 3))]
 pub unsafe fn _mm_shuffle_ps(a: f32x4, b: f32x4, mask: u32) -> f32x4 {
     let mask = (mask & 0xFF) as u8;
@@ -811,7 +811,7 @@
 /// Unpack and interleave single-precision (32-bit) floating-point elements
 /// from the higher half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(unpckhps))]
 pub unsafe fn _mm_unpackhi_ps(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, b, [2, 6, 3, 7])
@@ -820,7 +820,7 @@
 /// Unpack and interleave single-precision (32-bit) floating-point elements
 /// from the lower half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(unpcklps))]
 pub unsafe fn _mm_unpacklo_ps(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, b, [0, 4, 1, 5])
@@ -829,7 +829,7 @@
 /// Combine higher half of `a` and `b`. The highwe half of `b` occupies the
 /// lower half of result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movhlps))]
 #[cfg_attr(all(test, windows), assert_instr(unpckhpd))]
 pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
@@ -840,7 +840,7 @@
 /// Combine lower half of `a` and `b`. The lower half of `b` occupies the
 /// higher half of result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
 #[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
 pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
@@ -852,7 +852,7 @@
 /// The mask is stored in the 4 least significant bits of the return value.
 /// All other bits are set to `0`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movmskps))]
 pub unsafe fn _mm_movemask_ps(a: f32x4) -> i32 {
     movmskps(a)
@@ -873,8 +873,8 @@
 /// # // The real main function
 /// # fn main() {
 /// #     if cfg_feature_enabled!("sse") {
-/// #         #[target_feature = "+sse"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "sse")]
+/// #         unsafe fn worker() {
 /// #
 /// #   use stdsimd::simd::f32x4;
 /// #   use stdsimd::vendor::_mm_loadh_pi;
@@ -887,12 +887,12 @@
 /// assert_eq!(r, f32x4::new(1.0, 2.0, 5.0, 6.0));
 /// #
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // TODO: generates MOVHPD if the CPU supports SSE2.
 // #[cfg_attr(test, assert_instr(movhps))]
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))]
@@ -925,8 +925,8 @@
 /// # // The real main function
 /// # fn main() {
 /// #     if cfg_feature_enabled!("sse") {
-/// #         #[target_feature = "+sse"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "sse")]
+/// #         unsafe fn worker() {
 /// #
 /// #   use stdsimd::simd::f32x4;
 /// #   use stdsimd::vendor::_mm_loadl_pi;
@@ -939,12 +939,12 @@
 /// assert_eq!(r, f32x4::new(5.0, 6.0, 3.0, 4.0));
 /// #
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // TODO: generates MOVLPD if the CPU supports SSE2.
 // #[cfg_attr(test, assert_instr(movlps))]
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))]
@@ -967,7 +967,7 @@
 ///
 /// This corresponds to instructions `VMOVSS` / `MOVSS`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 pub unsafe fn _mm_load_ss(p: *const f32) -> f32x4 {
     f32x4::new(*p, 0.0, 0.0, 0.0)
@@ -979,7 +979,7 @@
 /// This corresponds to instructions `VMOVSS` / `MOVSS` followed by some
 /// shuffling.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 pub unsafe fn _mm_load1_ps(p: *const f32) -> f32x4 {
     let a = *p;
@@ -988,7 +988,7 @@
 
 /// Alias for [`_mm_load1_ps`](fn._mm_load1_ps.html)
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 pub unsafe fn _mm_load_ps1(p: *const f32) -> f32x4 {
     _mm_load1_ps(p)
@@ -1003,7 +1003,7 @@
 ///
 /// This corresponds to instructions `VMOVAPS` / `MOVAPS`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_load_ps(p: *const f32) -> f32x4 {
     *(p as *const f32x4)
@@ -1017,7 +1017,7 @@
 ///
 /// This corresponds to instructions `VMOVUPS` / `MOVUPS`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movups))]
 pub unsafe fn _mm_loadu_ps(p: *const f32) -> f32x4 {
     // Note: Using `*p` would require `f32` alignment, but `movups` has no
@@ -1050,7 +1050,7 @@
 /// This corresponds to instructions `VMOVAPS` / `MOVAPS` followed by some
 /// shuffling.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_loadr_ps(p: *const f32) -> f32x4 {
     let a = _mm_load_ps(p);
@@ -1062,7 +1062,7 @@
 /// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may
 /// choose to generate an equivalent sequence of other instructions.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
 // fine.
 // On i586 (no SSE2) it just generates plain MOV instructions.
@@ -1092,7 +1092,7 @@
 /// This intrinsic corresponds to the `MOVQ` instruction. The compiler may
 /// choose to generate an equivalent sequence of other instructions.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 // On i586 the codegen just generates plane MOVs. No need to test for that.
 #[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
                not(target_family = "windows")),
@@ -1122,7 +1122,7 @@
 ///
 /// This intrinsic corresponds to the `MOVSS` instruction.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 pub unsafe fn _mm_store_ss(p: *mut f32, a: f32x4) {
     *p = a.extract(0)
@@ -1145,7 +1145,7 @@
 /// *p.offset(3) = x;
 /// ```
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_store1_ps(p: *mut f32, a: f32x4) {
     let b: f32x4 = simd_shuffle4(a, a, [0, 0, 0, 0]);
@@ -1154,7 +1154,7 @@
 
 /// Alias for [`_mm_store1_ps`](fn._mm_store1_ps.html)
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_store_ps1(p: *mut f32, a: f32x4) {
     _mm_store1_ps(p, a);
@@ -1170,7 +1170,7 @@
 ///
 /// This corresponds to instructions `VMOVAPS` / `MOVAPS`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_store_ps(p: *mut f32, a: f32x4) {
     *(p as *mut f32x4) = a;
@@ -1182,7 +1182,7 @@
 ///
 /// This corresponds to instructions `VMOVUPS` / `MOVUPS`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movups))]
 pub unsafe fn _mm_storeu_ps(p: *mut f32, a: f32x4) {
     ptr::copy_nonoverlapping(
@@ -1207,7 +1207,7 @@
 /// *p.offset(3) = a.extract(0);
 /// ```
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_storer_ps(p: *mut f32, a: f32x4) {
     let b: f32x4 = simd_shuffle4(a, a, [3, 2, 1, 0]);
@@ -1222,7 +1222,7 @@
 /// _mm_move_ss(a, b) == a.replace(0, b.extract(0))
 /// ```
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 pub unsafe fn _mm_move_ss(a: f32x4, b: f32x4) -> f32x4 {
     simd_shuffle4(a, b, [4, 1, 2, 3])
@@ -1235,7 +1235,7 @@
 /// globally visible before any store instruction which follows the fence in
 /// program order.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(sfence))]
 pub unsafe fn _mm_sfence() {
     sfence()
@@ -1245,7 +1245,7 @@
 ///
 /// For more info see [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(stmxcsr))]
 pub unsafe fn _mm_getcsr() -> u32 {
     let mut result = 0_i32;
@@ -1379,7 +1379,7 @@
 /// ```
 ///
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(ldmxcsr))]
 pub unsafe fn _mm_setcsr(val: u32) {
     ldmxcsr(&val as *const _ as *const i8);
@@ -1437,7 +1437,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
     _mm_getcsr() & _MM_MASK_MASK
 }
@@ -1445,7 +1445,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
     _mm_getcsr() & _MM_EXCEPT_MASK
 }
@@ -1453,7 +1453,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
     _mm_getcsr() & _MM_FLUSH_ZERO_MASK
 }
@@ -1461,7 +1461,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
     _mm_getcsr() & _MM_ROUND_MASK
 }
@@ -1469,7 +1469,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
     _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x)
 }
@@ -1477,7 +1477,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
     _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x)
 }
@@ -1485,7 +1485,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
     let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x;
     // println!("setting csr={:x}", val);
@@ -1495,7 +1495,7 @@
 /// See [`_mm_setcsr`](fn._mm_setcsr.html)
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
     _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | x)
 }
@@ -1549,7 +1549,7 @@
 ///   resources (e.g., request buffers).
 ///
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(prefetcht0, strategy = _MM_HINT_T0))]
 #[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))]
 #[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))]
@@ -1574,7 +1574,7 @@
 
 /// Return vector of type __m128 with undefined elements.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _mm_undefined_ps() -> f32x4 {
     f32x4::splat(mem::uninitialized())
 }
@@ -1582,7 +1582,7 @@
 /// Transpose the 4x4 matrix formed by 4 rows of f32x4 in place.
 #[inline(always)]
 #[allow(non_snake_case)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 pub unsafe fn _MM_TRANSPOSE4_PS(
     row0: &mut f32x4, row1: &mut f32x4, row2: &mut f32x4, row3: &mut f32x4
 ) {
@@ -1680,7 +1680,7 @@
 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
 /// exception _may_ be generated.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movntps))]
 pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: f32x4) {
     ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1689,7 +1689,7 @@
 /// Store 64-bits of integer data from a into memory using a non-temporal
 /// memory hint.
 #[inline(always)]
-#[target_feature = "+sse,+mmx"]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(movntq))]
 pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
     movntdq(mem_addr, a)
diff --git a/coresimd/src/x86/i586/sse2.rs b/coresimd/src/x86/i586/sse2.rs
index 44b76d6..d965360 100644
--- a/coresimd/src/x86/i586/sse2.rs
+++ b/coresimd/src/x86/i586/sse2.rs
@@ -16,7 +16,7 @@
 /// This can help improve the performance and power consumption of spin-wait
 /// loops.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pause))]
 pub unsafe fn _mm_pause() {
     pause()
@@ -25,7 +25,7 @@
 /// Invalidate and flush the cache line that contains `p` from all levels of
 /// the cache hierarchy.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(clflush))]
 pub unsafe fn _mm_clflush(p: *mut u8) {
     clflush(p)
@@ -38,7 +38,7 @@
 /// globally visible before any load instruction which follows the fence in
 /// program order.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(lfence))]
 pub unsafe fn _mm_lfence() {
     lfence()
@@ -51,7 +51,7 @@
 /// memory fence instruction is globally visible before any memory instruction
 /// which follows the fence in program order.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(mfence))]
 pub unsafe fn _mm_mfence() {
     mfence()
@@ -59,7 +59,7 @@
 
 /// Add packed 8-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddb))]
 pub unsafe fn _mm_add_epi8(a: i8x16, b: i8x16) -> i8x16 {
     a + b
@@ -67,7 +67,7 @@
 
 /// Add packed 16-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddw))]
 pub unsafe fn _mm_add_epi16(a: i16x8, b: i16x8) -> i16x8 {
     a + b
@@ -75,7 +75,7 @@
 
 /// Add packed 32-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddd))]
 pub unsafe fn _mm_add_epi32(a: i32x4, b: i32x4) -> i32x4 {
     a + b
@@ -83,7 +83,7 @@
 
 /// Add packed 64-bit integers in `a` and "b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddq))]
 pub unsafe fn _mm_add_epi64(a: i64x2, b: i64x2) -> i64x2 {
     a + b
@@ -91,7 +91,7 @@
 
 /// Add packed 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddsb))]
 pub unsafe fn _mm_adds_epi8(a: i8x16, b: i8x16) -> i8x16 {
     paddsb(a, b)
@@ -99,7 +99,7 @@
 
 /// Add packed 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddsw))]
 pub unsafe fn _mm_adds_epi16(a: i16x8, b: i16x8) -> i16x8 {
     paddsw(a, b)
@@ -107,7 +107,7 @@
 
 /// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddusb))]
 pub unsafe fn _mm_adds_epu8(a: u8x16, b: u8x16) -> u8x16 {
     paddsub(a, b)
@@ -115,7 +115,7 @@
 
 /// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddusw))]
 pub unsafe fn _mm_adds_epu16(a: u16x8, b: u16x8) -> u16x8 {
     paddsuw(a, b)
@@ -123,7 +123,7 @@
 
 /// Average packed unsigned 8-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pavgb))]
 pub unsafe fn _mm_avg_epu8(a: u8x16, b: u8x16) -> u8x16 {
     pavgb(a, b)
@@ -131,7 +131,7 @@
 
 /// Average packed unsigned 16-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pavgw))]
 pub unsafe fn _mm_avg_epu16(a: u16x8, b: u16x8) -> u16x8 {
     pavgw(a, b)
@@ -143,7 +143,7 @@
 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
 /// intermediate 32-bit integers.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmaddwd))]
 pub unsafe fn _mm_madd_epi16(a: i16x8, b: i16x8) -> i32x4 {
     pmaddwd(a, b)
@@ -152,7 +152,7 @@
 /// Compare packed 16-bit integers in `a` and `b`, and return the packed
 /// maximum values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmaxsw))]
 pub unsafe fn _mm_max_epi16(a: i16x8, b: i16x8) -> i16x8 {
     pmaxsw(a, b)
@@ -161,7 +161,7 @@
 /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
 /// packed maximum values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmaxub))]
 pub unsafe fn _mm_max_epu8(a: u8x16, b: u8x16) -> u8x16 {
     pmaxub(a, b)
@@ -170,7 +170,7 @@
 /// Compare packed 16-bit integers in `a` and `b`, and return the packed
 /// minimum values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pminsw))]
 pub unsafe fn _mm_min_epi16(a: i16x8, b: i16x8) -> i16x8 {
     pminsw(a, b)
@@ -179,7 +179,7 @@
 /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the
 /// packed minimum values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pminub))]
 pub unsafe fn _mm_min_epu8(a: u8x16, b: u8x16) -> u8x16 {
     pminub(a, b)
@@ -190,7 +190,7 @@
 /// The multiplication produces intermediate 32-bit integers, and returns the
 /// high 16 bits of the intermediate integers.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmulhw))]
 pub unsafe fn _mm_mulhi_epi16(a: i16x8, b: i16x8) -> i16x8 {
     pmulhw(a, b)
@@ -201,7 +201,7 @@
 /// The multiplication produces intermediate 32-bit integers, and returns the
 /// high 16 bits of the intermediate integers.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmulhuw))]
 pub unsafe fn _mm_mulhi_epu16(a: u16x8, b: u16x8) -> u16x8 {
     pmulhuw(a, b)
@@ -212,7 +212,7 @@
 /// The multiplication produces intermediate 32-bit integers, and returns the
 /// low 16 bits of the intermediate integers.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmullw))]
 pub unsafe fn _mm_mullo_epi16(a: i16x8, b: i16x8) -> i16x8 {
     a * b
@@ -223,7 +223,7 @@
 ///
 /// Return the unsigned 64-bit results.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmuludq))]
 pub unsafe fn _mm_mul_epu32(a: u32x4, b: u32x4) -> u64x2 {
     pmuludq(a, b)
@@ -236,7 +236,7 @@
 /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
 /// the low 16 bits of 64-bit elements returned.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psadbw))]
 pub unsafe fn _mm_sad_epu8(a: u8x16, b: u8x16) -> u64x2 {
     psadbw(a, b)
@@ -244,7 +244,7 @@
 
 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubb))]
 pub unsafe fn _mm_sub_epi8(a: i8x16, b: i8x16) -> i8x16 {
     a - b
@@ -252,7 +252,7 @@
 
 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubw))]
 pub unsafe fn _mm_sub_epi16(a: i16x8, b: i16x8) -> i16x8 {
     a - b
@@ -260,7 +260,7 @@
 
 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubd))]
 pub unsafe fn _mm_sub_epi32(a: i32x4, b: i32x4) -> i32x4 {
     a - b
@@ -268,7 +268,7 @@
 
 /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubq))]
 pub unsafe fn _mm_sub_epi64(a: i64x2, b: i64x2) -> i64x2 {
     a - b
@@ -277,7 +277,7 @@
 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
 /// using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubsb))]
 pub unsafe fn _mm_subs_epi8(a: i8x16, b: i8x16) -> i8x16 {
     psubsb(a, b)
@@ -286,7 +286,7 @@
 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
 /// using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubsw))]
 pub unsafe fn _mm_subs_epi16(a: i16x8, b: i16x8) -> i16x8 {
     psubsw(a, b)
@@ -295,7 +295,7 @@
 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
 /// integers in `a` using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubusb))]
 pub unsafe fn _mm_subs_epu8(a: u8x16, b: u8x16) -> u8x16 {
     psubusb(a, b)
@@ -304,7 +304,7 @@
 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
 /// integers in `a` using saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubusw))]
 pub unsafe fn _mm_subs_epu16(a: u16x8, b: u16x8) -> u16x8 {
     psubusw(a, b)
@@ -312,7 +312,7 @@
 
 /// Shift `a` left by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
 pub unsafe fn _mm_slli_si128(a: i8x16, imm8: i32) -> i8x16 {
     let (zero, imm8) = (i8x16::splat(0), imm8 as u32);
@@ -353,7 +353,7 @@
 
 /// Shift `a` left by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
 pub unsafe fn _mm_bslli_si128(a: i8x16, imm8: i32) -> i8x16 {
     _mm_slli_si128(a, imm8)
@@ -361,7 +361,7 @@
 
 /// Shift `a` right by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
 pub unsafe fn _mm_bsrli_si128(a: i8x16, imm8: i32) -> i8x16 {
     _mm_srli_si128(a, imm8)
@@ -369,7 +369,7 @@
 
 /// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psllw))]
 pub unsafe fn _mm_slli_epi16(a: i16x8, imm8: i32) -> i16x8 {
     pslliw(a, imm8)
@@ -378,7 +378,7 @@
 /// Shift packed 16-bit integers in `a` left by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psllw))]
 pub unsafe fn _mm_sll_epi16(a: i16x8, count: i16x8) -> i16x8 {
     psllw(a, count)
@@ -386,7 +386,7 @@
 
 /// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pslld))]
 pub unsafe fn _mm_slli_epi32(a: i32x4, imm8: i32) -> i32x4 {
     psllid(a, imm8)
@@ -395,7 +395,7 @@
 /// Shift packed 32-bit integers in `a` left by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pslld))]
 pub unsafe fn _mm_sll_epi32(a: i32x4, count: i32x4) -> i32x4 {
     pslld(a, count)
@@ -403,7 +403,7 @@
 
 /// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psllq))]
 pub unsafe fn _mm_slli_epi64(a: i64x2, imm8: i32) -> i64x2 {
     pslliq(a, imm8)
@@ -412,7 +412,7 @@
 /// Shift packed 64-bit integers in `a` left by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psllq))]
 pub unsafe fn _mm_sll_epi64(a: i64x2, count: i64x2) -> i64x2 {
     psllq(a, count)
@@ -421,7 +421,7 @@
 /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in sign
 /// bits.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psraw))]
 pub unsafe fn _mm_srai_epi16(a: i16x8, imm8: i32) -> i16x8 {
     psraiw(a, imm8)
@@ -430,7 +430,7 @@
 /// Shift packed 16-bit integers in `a` right by `count` while shifting in sign
 /// bits.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psraw))]
 pub unsafe fn _mm_sra_epi16(a: i16x8, count: i16x8) -> i16x8 {
     psraw(a, count)
@@ -439,7 +439,7 @@
 /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign
 /// bits.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrad))]
 pub unsafe fn _mm_srai_epi32(a: i32x4, imm8: i32) -> i32x4 {
     psraid(a, imm8)
@@ -448,7 +448,7 @@
 /// Shift packed 32-bit integers in `a` right by `count` while shifting in sign
 /// bits.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrad))]
 pub unsafe fn _mm_sra_epi32(a: i32x4, count: i32x4) -> i32x4 {
     psrad(a, count)
@@ -456,7 +456,7 @@
 
 /// Shift `a` right by `imm8` bytes while shifting in zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
 pub unsafe fn _mm_srli_si128(a: i8x16, imm8: i32) -> i8x16 {
     let (zero, imm8) = (i8x16::splat(0), imm8 as u32);
@@ -498,7 +498,7 @@
 /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrlw))]
 pub unsafe fn _mm_srli_epi16(a: i16x8, imm8: i32) -> i16x8 {
     psrliw(a, imm8)
@@ -507,7 +507,7 @@
 /// Shift packed 16-bit integers in `a` right by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrlw))]
 pub unsafe fn _mm_srl_epi16(a: i16x8, count: i16x8) -> i16x8 {
     psrlw(a, count)
@@ -516,7 +516,7 @@
 /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrld))]
 pub unsafe fn _mm_srli_epi32(a: i32x4, imm8: i32) -> i32x4 {
     psrlid(a, imm8)
@@ -525,7 +525,7 @@
 /// Shift packed 32-bit integers in `a` right by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrld))]
 pub unsafe fn _mm_srl_epi32(a: i32x4, count: i32x4) -> i32x4 {
     psrld(a, count)
@@ -534,7 +534,7 @@
 /// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrlq))]
 pub unsafe fn _mm_srli_epi64(a: i64x2, imm8: i32) -> i64x2 {
     psrliq(a, imm8)
@@ -543,7 +543,7 @@
 /// Shift packed 64-bit integers in `a` right by `count` while shifting in
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrlq))]
 pub unsafe fn _mm_srl_epi64(a: i64x2, count: i64x2) -> i64x2 {
     psrlq(a, count)
@@ -552,7 +552,7 @@
 /// Compute the bitwise AND of 128 bits (representing integer data) in `a` and
 /// `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andps))]
 pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
     __m128i::from(i8x16::from(a) & i8x16::from(b))
@@ -561,7 +561,7 @@
 /// Compute the bitwise NOT of 128 bits (representing integer data) in `a` and
 /// then AND with `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andnps))]
 pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
     __m128i::from((!i8x16::from(a)) & i8x16::from(b))
@@ -570,7 +570,7 @@
 /// Compute the bitwise OR of 128 bits (representing integer data) in `a` and
 /// `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(orps))]
 pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
     __m128i::from(i8x16::from(a) | i8x16::from(b))
@@ -579,7 +579,7 @@
 /// Compute the bitwise XOR of 128 bits (representing integer data) in `a` and
 /// `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorps))]
 pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
     __m128i::from(i8x16::from(a) ^ i8x16::from(b))
@@ -587,7 +587,7 @@
 
 /// Compare packed 8-bit integers in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpeqb))]
 pub unsafe fn _mm_cmpeq_epi8(a: i8x16, b: i8x16) -> i8x16 {
     a.eq(b)
@@ -595,7 +595,7 @@
 
 /// Compare packed 16-bit integers in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpeqw))]
 pub unsafe fn _mm_cmpeq_epi16(a: i16x8, b: i16x8) -> i16x8 {
     a.eq(b)
@@ -603,7 +603,7 @@
 
 /// Compare packed 32-bit integers in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpeqd))]
 pub unsafe fn _mm_cmpeq_epi32(a: i32x4, b: i32x4) -> i32x4 {
     a.eq(b)
@@ -611,7 +611,7 @@
 
 /// Compare packed 8-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtb))]
 pub unsafe fn _mm_cmpgt_epi8(a: i8x16, b: i8x16) -> i8x16 {
     a.gt(b)
@@ -619,7 +619,7 @@
 
 /// Compare packed 16-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtw))]
 pub unsafe fn _mm_cmpgt_epi16(a: i16x8, b: i16x8) -> i16x8 {
     a.gt(b)
@@ -627,7 +627,7 @@
 
 /// Compare packed 32-bit integers in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtd))]
 pub unsafe fn _mm_cmpgt_epi32(a: i32x4, b: i32x4) -> i32x4 {
     a.gt(b)
@@ -635,7 +635,7 @@
 
 /// Compare packed 8-bit integers in `a` and `b` for less-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtb))]
 pub unsafe fn _mm_cmplt_epi8(a: i8x16, b: i8x16) -> i8x16 {
     a.lt(b)
@@ -643,7 +643,7 @@
 
 /// Compare packed 16-bit integers in `a` and `b` for less-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtw))]
 pub unsafe fn _mm_cmplt_epi16(a: i16x8, b: i16x8) -> i16x8 {
     a.lt(b)
@@ -651,7 +651,7 @@
 
 /// Compare packed 32-bit integers in `a` and `b` for less-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtd))]
 pub unsafe fn _mm_cmplt_epi32(a: i32x4, b: i32x4) -> i32x4 {
     a.lt(b)
@@ -660,7 +660,7 @@
 /// Convert the lower two packed 32-bit integers in `a` to packed
 /// double-precision (64-bit) floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtdq2pd))]
 pub unsafe fn _mm_cvtepi32_pd(a: i32x4) -> f64x2 {
     simd_cast::<i32x2, f64x2>(simd_shuffle2(a, a, [0, 1]))
@@ -669,7 +669,7 @@
 /// Return `a` with its lower element replaced by `b` after converting it to
 /// an `f64`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsi2sd))]
 pub unsafe fn _mm_cvtsi32_sd(a: f64x2, b: i32) -> f64x2 {
     a.replace(0, b as f64)
@@ -678,7 +678,7 @@
 /// Convert packed 32-bit integers in `a` to packed single-precision (32-bit)
 /// floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtdq2ps))]
 pub unsafe fn _mm_cvtepi32_ps(a: i32x4) -> f32x4 {
     cvtdq2ps(a)
@@ -687,7 +687,7 @@
 /// Convert packed single-precision (32-bit) floating-point elements in `a`
 /// to packed 32-bit integers.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtps2dq))]
 pub unsafe fn _mm_cvtps_epi32(a: f32x4) -> i32x4 {
     cvtps2dq(a)
@@ -696,7 +696,7 @@
 /// Return a vector whose lowest element is `a` and all higher elements are
 /// `0`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
 pub unsafe fn _mm_cvtsi32_si128(a: i32) -> i32x4 {
     i32x4::new(a, 0, 0, 0)
@@ -704,7 +704,7 @@
 
 /// Return the lowest element of `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movd))] // FIXME mov on windows
 pub unsafe fn _mm_cvtsi128_si32(a: i32x4) -> i32 {
     a.extract(0)
@@ -713,7 +713,7 @@
 /// Set packed 64-bit integers with the supplied values, from highest to
 /// lowest.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> i64x2 {
     i64x2::new(e0, e1)
@@ -721,7 +721,7 @@
 
 /// Set packed 32-bit integers with the supplied values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
     i32x4::new(e0, e1, e2, e3)
@@ -729,7 +729,7 @@
 
 /// Set packed 16-bit integers with the supplied values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi16(
     e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
@@ -739,7 +739,7 @@
 
 /// Set packed 8-bit integers with the supplied values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi8(
     e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
@@ -753,7 +753,7 @@
 
 /// Broadcast 64-bit integer `a` to all elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set1_epi64x(a: i64) -> i64x2 {
     i64x2::splat(a)
@@ -761,7 +761,7 @@
 
 /// Broadcast 32-bit integer `a` to all elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set1_epi32(a: i32) -> i32x4 {
     i32x4::splat(a)
@@ -769,7 +769,7 @@
 
 /// Broadcast 16-bit integer `a` to all elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set1_epi16(a: i16) -> i16x8 {
     i16x8::splat(a)
@@ -777,7 +777,7 @@
 
 /// Broadcast 8-bit integer `a` to all elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set1_epi8(a: i8) -> i8x16 {
     i8x16::splat(a)
@@ -785,7 +785,7 @@
 
 /// Set packed 32-bit integers with the supplied values in reverse order.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> i32x4 {
     i32x4::new(e3, e2, e1, e0)
@@ -793,7 +793,7 @@
 
 /// Set packed 16-bit integers with the supplied values in reverse order.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_setr_epi16(
     e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
@@ -803,7 +803,7 @@
 
 /// Set packed 8-bit integers with the supplied values in reverse order.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_setr_epi8(
     e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8,
@@ -817,7 +817,7 @@
 
 /// Returns a vector with all elements set to zero.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorps))]
 pub unsafe fn _mm_setzero_si128() -> __m128i {
     mem::transmute((0_i64, 0_i64))
@@ -825,7 +825,7 @@
 
 /// Load 64-bit integer from memory into first element of returned vector.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // FIXME movsd on windows
 #[cfg_attr(all(test, not(windows),
                not(all(target_os = "linux", target_arch = "x86_64")),
@@ -839,7 +839,7 @@
 ///
 /// `mem_addr` must be aligned on a 16-byte boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
     *mem_addr
@@ -849,7 +849,7 @@
 ///
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))]
 pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
     let mut dst: __m128i = _mm_undefined_si128();
@@ -870,7 +870,7 @@
 /// `mem_addr` should correspond to a 128-bit memory location and does not need
 /// to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(maskmovdqu))]
 pub unsafe fn _mm_maskmoveu_si128(a: i8x16, mask: i8x16, mem_addr: *mut i8) {
     maskmovdqu(a, mask, mem_addr)
@@ -880,7 +880,7 @@
 ///
 /// `mem_addr` must be aligned on a 16-byte boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
     *mem_addr = a;
@@ -890,7 +890,7 @@
 ///
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
 pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
     storeudq(mem_addr as *mut i8, a);
@@ -900,7 +900,7 @@
 ///
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // FIXME mov on windows, movlps on i686
 #[cfg_attr(all(test, not(windows),
                not(all(target_os = "linux", target_arch = "x86_64")),
@@ -918,7 +918,7 @@
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
 pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
     ::core::intrinsics::nontemporal_store(mem_addr, a);
@@ -928,7 +928,7 @@
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movnti))]
 pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
     ::core::intrinsics::nontemporal_store(mem_addr, a);
@@ -937,7 +937,7 @@
 /// Return a vector where the low element is extracted from `a` and its upper
 /// element is zero.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // FIXME movd on windows, movd on i686
 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"),
            assert_instr(movq))]
@@ -948,7 +948,7 @@
 /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packsswb))]
 pub unsafe fn _mm_packs_epi16(a: i16x8, b: i16x8) -> i8x16 {
     packsswb(a, b)
@@ -957,7 +957,7 @@
 /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packssdw))]
 pub unsafe fn _mm_packs_epi32(a: i32x4, b: i32x4) -> i16x8 {
     packssdw(a, b)
@@ -966,7 +966,7 @@
 /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packuswb))]
 pub unsafe fn _mm_packus_epi16(a: i16x8, b: i16x8) -> u8x16 {
     packuswb(a, b)
@@ -974,7 +974,7 @@
 
 /// Return the `imm8` element of `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))]
 pub unsafe fn _mm_extract_epi16(a: i16x8, imm8: i32) -> i32 {
     let imm8 = (imm8 & 7) as u32;
@@ -983,7 +983,7 @@
 
 /// Return a new vector where the `imm8` element of `a` is replaced with `i`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))]
 pub unsafe fn _mm_insert_epi16(a: i16x8, i: i32, imm8: i32) -> i16x8 {
     a.replace(imm8 as u32 & 0b111, i as i16)
@@ -991,7 +991,7 @@
 
 /// Return a mask of the most significant bit of each element in `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmovmskb))]
 pub unsafe fn _mm_movemask_epi8(a: i8x16) -> i32 {
     pmovmskb(a)
@@ -999,7 +999,7 @@
 
 /// Shuffle 32-bit integers in `a` using the control in `imm8`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))]
 pub unsafe fn _mm_shuffle_epi32(a: i32x4, imm8: i32) -> i32x4 {
     // simd_shuffleX requires that its selector parameter be made up of
@@ -1060,7 +1060,7 @@
 /// Put the results in the high 64 bits of the returned vector, with the low 64
 /// bits being copied from from `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))]
 pub unsafe fn _mm_shufflehi_epi16(a: i16x8, imm8: i32) -> i16x8 {
     // See _mm_shuffle_epi32.
@@ -1116,7 +1116,7 @@
 /// Put the results in the low 64 bits of the returned vector, with the high 64
 /// bits being copied from from `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))]
 pub unsafe fn _mm_shufflelo_epi16(a: i16x8, imm8: i32) -> i16x8 {
     // See _mm_shuffle_epi32.
@@ -1167,7 +1167,7 @@
 
 /// Unpack and interleave 8-bit integers from the high half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhbw))]
 pub unsafe fn _mm_unpackhi_epi8(a: i8x16, b: i8x16) -> i8x16 {
     simd_shuffle16(
@@ -1179,7 +1179,7 @@
 
 /// Unpack and interleave 16-bit integers from the high half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhwd))]
 pub unsafe fn _mm_unpackhi_epi16(a: i16x8, b: i16x8) -> i16x8 {
     simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
@@ -1187,7 +1187,7 @@
 
 /// Unpack and interleave 32-bit integers from the high half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhdq))]
 pub unsafe fn _mm_unpackhi_epi32(a: i32x4, b: i32x4) -> i32x4 {
     simd_shuffle4(a, b, [2, 6, 3, 7])
@@ -1195,7 +1195,7 @@
 
 /// Unpack and interleave 64-bit integers from the high half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhqdq))]
 pub unsafe fn _mm_unpackhi_epi64(a: i64x2, b: i64x2) -> i64x2 {
     simd_shuffle2(a, b, [1, 3])
@@ -1203,7 +1203,7 @@
 
 /// Unpack and interleave 8-bit integers from the low half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpcklbw))]
 pub unsafe fn _mm_unpacklo_epi8(a: i8x16, b: i8x16) -> i8x16 {
     simd_shuffle16(
@@ -1215,7 +1215,7 @@
 
 /// Unpack and interleave 16-bit integers from the low half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpcklwd))]
 pub unsafe fn _mm_unpacklo_epi16(a: i16x8, b: i16x8) -> i16x8 {
     simd_shuffle8(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
@@ -1223,7 +1223,7 @@
 
 /// Unpack and interleave 32-bit integers from the low half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckldq))]
 pub unsafe fn _mm_unpacklo_epi32(a: i32x4, b: i32x4) -> i32x4 {
     simd_shuffle4(a, b, [0, 4, 1, 5])
@@ -1231,7 +1231,7 @@
 
 /// Unpack and interleave 64-bit integers from the low half of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpcklqdq))]
 pub unsafe fn _mm_unpacklo_epi64(a: i64x2, b: i64x2) -> i64x2 {
     simd_shuffle2(a, b, [0, 2])
@@ -1240,7 +1240,7 @@
 /// Return a new vector with the low element of `a` replaced by the sum of the
 /// low elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(addsd))]
 pub unsafe fn _mm_add_sd(a: f64x2, b: f64x2) -> f64x2 {
     a.replace(0, a.extract(0) + b.extract(0))
@@ -1249,7 +1249,7 @@
 /// Add packed double-precision (64-bit) floating-point elements in `a` and
 /// `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(addpd))]
 pub unsafe fn _mm_add_pd(a: f64x2, b: f64x2) -> f64x2 {
     a + b
@@ -1258,7 +1258,7 @@
 /// Return a new vector with the low element of `a` replaced by the result of
 /// diving the lower element of `a` by the lower element of `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(divsd))]
 pub unsafe fn _mm_div_sd(a: f64x2, b: f64x2) -> f64x2 {
     a.replace(0, a.extract(0) / b.extract(0))
@@ -1267,7 +1267,7 @@
 /// Divide packed double-precision (64-bit) floating-point elements in `a` by
 /// packed elements in `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(divpd))]
 pub unsafe fn _mm_div_pd(a: f64x2, b: f64x2) -> f64x2 {
     a / b
@@ -1276,7 +1276,7 @@
 /// Return a new vector with the low element of `a` replaced by the maximum
 /// of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(maxsd))]
 pub unsafe fn _mm_max_sd(a: f64x2, b: f64x2) -> f64x2 {
     maxsd(a, b)
@@ -1285,7 +1285,7 @@
 /// Return a new vector with the maximum values from corresponding elements in
 /// `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(maxpd))]
 pub unsafe fn _mm_max_pd(a: f64x2, b: f64x2) -> f64x2 {
     maxpd(a, b)
@@ -1294,7 +1294,7 @@
 /// Return a new vector with the low element of `a` replaced by the minimum
 /// of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(minsd))]
 pub unsafe fn _mm_min_sd(a: f64x2, b: f64x2) -> f64x2 {
     minsd(a, b)
@@ -1303,7 +1303,7 @@
 /// Return a new vector with the minimum values from corresponding elements in
 /// `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(minpd))]
 pub unsafe fn _mm_min_pd(a: f64x2, b: f64x2) -> f64x2 {
     minpd(a, b)
@@ -1312,7 +1312,7 @@
 /// Return a new vector with the low element of `a` replaced by multiplying the
 /// low elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(mulsd))]
 pub unsafe fn _mm_mul_sd(a: f64x2, b: f64x2) -> f64x2 {
     a.replace(0, a.extract(0) * b.extract(0))
@@ -1321,7 +1321,7 @@
 /// Multiply packed double-precision (64-bit) floating-point elements in `a`
 /// and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(mulpd))]
 pub unsafe fn _mm_mul_pd(a: f64x2, b: f64x2) -> f64x2 {
     a * b
@@ -1330,7 +1330,7 @@
 /// Return a new vector with the low element of `a` replaced by the square
 /// root of the lower element `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(sqrtsd))]
 pub unsafe fn _mm_sqrt_sd(a: f64x2, b: f64x2) -> f64x2 {
     a.replace(0, sqrtsd(b).extract(0))
@@ -1338,7 +1338,7 @@
 
 /// Return a new vector with the square root of each of the values in `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(sqrtpd))]
 pub unsafe fn _mm_sqrt_pd(a: f64x2) -> f64x2 {
     sqrtpd(a)
@@ -1347,7 +1347,7 @@
 /// Return a new vector with the low element of `a` replaced by subtracting the
 /// low element by `b` from the low element of `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(subsd))]
 pub unsafe fn _mm_sub_sd(a: f64x2, b: f64x2) -> f64x2 {
     a.replace(0, a.extract(0) - b.extract(0))
@@ -1356,7 +1356,7 @@
 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
 /// from `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(subpd))]
 pub unsafe fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 {
     a - b
@@ -1365,7 +1365,7 @@
 /// Compute the bitwise AND of packed double-precision (64-bit) floating-point
 /// elements in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andps))]
 pub unsafe fn _mm_and_pd(a: f64x2, b: f64x2) -> f64x2 {
     let a: u64x2 = mem::transmute(a);
@@ -1375,7 +1375,7 @@
 
 /// Compute the bitwise NOT of `a` and then AND with `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andnps))]
 pub unsafe fn _mm_andnot_pd(a: f64x2, b: f64x2) -> f64x2 {
     let a: u64x2 = mem::transmute(a);
@@ -1385,7 +1385,7 @@
 
 /// Compute the bitwise OR of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(orps))]
 pub unsafe fn _mm_or_pd(a: f64x2, b: f64x2) -> f64x2 {
     let a: u64x2 = mem::transmute(a);
@@ -1395,7 +1395,7 @@
 
 /// Compute the bitwise OR of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorps))]
 pub unsafe fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 {
     let a: u64x2 = mem::transmute(a);
@@ -1406,7 +1406,7 @@
 /// Return a new vector with the low element of `a` replaced by the equality
 /// comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpeqsd))]
 pub unsafe fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 0)
@@ -1415,7 +1415,7 @@
 /// Return a new vector with the low element of `a` replaced by the less-than
 /// comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpltsd))]
 pub unsafe fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 1)
@@ -1424,7 +1424,7 @@
 /// Return a new vector with the low element of `a` replaced by the
 /// less-than-or-equal comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmplesd))]
 pub unsafe fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 2)
@@ -1433,7 +1433,7 @@
 /// Return a new vector with the low element of `a` replaced by the
 /// greater-than comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpltsd))]
 pub unsafe fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmplt_sd(b, a).replace(1, a.extract(1))
@@ -1442,7 +1442,7 @@
 /// Return a new vector with the low element of `a` replaced by the
 /// greater-than-or-equal comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmplesd))]
 pub unsafe fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmple_sd(b, a).replace(1, a.extract(1))
@@ -1453,7 +1453,7 @@
 /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpordsd))]
 pub unsafe fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 7)
@@ -1463,7 +1463,7 @@
 /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
 /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpunordsd))]
 pub unsafe fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 3)
@@ -1472,7 +1472,7 @@
 /// Return a new vector with the low element of `a` replaced by the not-equal
 /// comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpneqsd))]
 pub unsafe fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 4)
@@ -1481,7 +1481,7 @@
 /// Return a new vector with the low element of `a` replaced by the
 /// not-less-than comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnltsd))]
 pub unsafe fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 5)
@@ -1490,7 +1490,7 @@
 /// Return a new vector with the low element of `a` replaced by the
 /// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnlesd))]
 pub unsafe fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 {
     cmpsd(a, b, 6)
@@ -1499,7 +1499,7 @@
 /// Return a new vector with the low element of `a` replaced by the
 /// not-greater-than comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnltsd))]
 pub unsafe fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmpnlt_sd(b, a).replace(1, a.extract(1))
@@ -1508,7 +1508,7 @@
 /// Return a new vector with the low element of `a` replaced by the
 /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnlesd))]
 pub unsafe fn _mm_cmpnge_sd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmpnle_sd(b, a).replace(1, a.extract(1))
@@ -1516,7 +1516,7 @@
 
 /// Compare corresponding elements in `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpeqpd))]
 pub unsafe fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 0)
@@ -1524,7 +1524,7 @@
 
 /// Compare corresponding elements in `a` and `b` for less-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpltpd))]
 pub unsafe fn _mm_cmplt_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 1)
@@ -1532,7 +1532,7 @@
 
 /// Compare corresponding elements in `a` and `b` for less-than-or-equal
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmplepd))]
 pub unsafe fn _mm_cmple_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 2)
@@ -1540,7 +1540,7 @@
 
 /// Compare corresponding elements in `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpltpd))]
 pub unsafe fn _mm_cmpgt_pd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmplt_pd(b, a)
@@ -1548,7 +1548,7 @@
 
 /// Compare corresponding elements in `a` and `b` for greater-than-or-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmplepd))]
 pub unsafe fn _mm_cmpge_pd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmple_pd(b, a)
@@ -1556,7 +1556,7 @@
 
 /// Compare corresponding elements in `a` and `b` to see if neither is `NaN`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpordpd))]
 pub unsafe fn _mm_cmpord_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 7)
@@ -1564,7 +1564,7 @@
 
 /// Compare corresponding elements in `a` and `b` to see if either is `NaN`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpunordpd))]
 pub unsafe fn _mm_cmpunord_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 3)
@@ -1572,7 +1572,7 @@
 
 /// Compare corresponding elements in `a` and `b` for not-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpneqpd))]
 pub unsafe fn _mm_cmpneq_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 4)
@@ -1580,7 +1580,7 @@
 
 /// Compare corresponding elements in `a` and `b` for not-less-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnltpd))]
 pub unsafe fn _mm_cmpnlt_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 5)
@@ -1588,7 +1588,7 @@
 
 /// Compare corresponding elements in `a` and `b` for not-less-than-or-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnlepd))]
 pub unsafe fn _mm_cmpnle_pd(a: f64x2, b: f64x2) -> f64x2 {
     cmppd(a, b, 6)
@@ -1596,7 +1596,7 @@
 
 /// Compare corresponding elements in `a` and `b` for not-greater-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnltpd))]
 pub unsafe fn _mm_cmpngt_pd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmpnlt_pd(b, a)
@@ -1605,7 +1605,7 @@
 /// Compare corresponding elements in `a` and `b` for
 /// not-greater-than-or-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cmpnlepd))]
 pub unsafe fn _mm_cmpnge_pd(a: f64x2, b: f64x2) -> f64x2 {
     _mm_cmpnle_pd(b, a)
@@ -1613,7 +1613,7 @@
 
 /// Compare the lower element of `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(comisd))]
 pub unsafe fn _mm_comieq_sd(a: f64x2, b: f64x2) -> bool {
     comieqsd(a, b) as u8 != 0
@@ -1621,7 +1621,7 @@
 
 /// Compare the lower element of `a` and `b` for less-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(comisd))]
 pub unsafe fn _mm_comilt_sd(a: f64x2, b: f64x2) -> bool {
     comiltsd(a, b) as u8 != 0
@@ -1629,7 +1629,7 @@
 
 /// Compare the lower element of `a` and `b` for less-than-or-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(comisd))]
 pub unsafe fn _mm_comile_sd(a: f64x2, b: f64x2) -> bool {
     comilesd(a, b) as u8 != 0
@@ -1637,7 +1637,7 @@
 
 /// Compare the lower element of `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(comisd))]
 pub unsafe fn _mm_comigt_sd(a: f64x2, b: f64x2) -> bool {
     comigtsd(a, b) as u8 != 0
@@ -1645,7 +1645,7 @@
 
 /// Compare the lower element of `a` and `b` for greater-than-or-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(comisd))]
 pub unsafe fn _mm_comige_sd(a: f64x2, b: f64x2) -> bool {
     comigesd(a, b) as u8 != 0
@@ -1653,7 +1653,7 @@
 
 /// Compare the lower element of `a` and `b` for not-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(comisd))]
 pub unsafe fn _mm_comineq_sd(a: f64x2, b: f64x2) -> bool {
     comineqsd(a, b) as u8 != 0
@@ -1661,7 +1661,7 @@
 
 /// Compare the lower element of `a` and `b` for equality.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(ucomisd))]
 pub unsafe fn _mm_ucomieq_sd(a: f64x2, b: f64x2) -> bool {
     ucomieqsd(a, b) as u8 != 0
@@ -1669,7 +1669,7 @@
 
 /// Compare the lower element of `a` and `b` for less-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(ucomisd))]
 pub unsafe fn _mm_ucomilt_sd(a: f64x2, b: f64x2) -> bool {
     ucomiltsd(a, b) as u8 != 0
@@ -1677,7 +1677,7 @@
 
 /// Compare the lower element of `a` and `b` for less-than-or-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(ucomisd))]
 pub unsafe fn _mm_ucomile_sd(a: f64x2, b: f64x2) -> bool {
     ucomilesd(a, b) as u8 != 0
@@ -1685,7 +1685,7 @@
 
 /// Compare the lower element of `a` and `b` for greater-than.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(ucomisd))]
 pub unsafe fn _mm_ucomigt_sd(a: f64x2, b: f64x2) -> bool {
     ucomigtsd(a, b) as u8 != 0
@@ -1693,7 +1693,7 @@
 
 /// Compare the lower element of `a` and `b` for greater-than-or-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(ucomisd))]
 pub unsafe fn _mm_ucomige_sd(a: f64x2, b: f64x2) -> bool {
     ucomigesd(a, b) as u8 != 0
@@ -1701,7 +1701,7 @@
 
 /// Compare the lower element of `a` and `b` for not-equal.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(ucomisd))]
 pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool {
     ucomineqsd(a, b) as u8 != 0
@@ -1710,7 +1710,7 @@
 /// Convert packed double-precision (64-bit) floating-point elements in "a" to
 /// packed single-precision (32-bit) floating-point elements
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtpd2ps))]
 pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 {
     cvtpd2ps(a)
@@ -1720,7 +1720,7 @@
 /// packed
 /// double-precision (64-bit) floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtps2pd))]
 pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 {
     cvtps2pd(a)
@@ -1729,7 +1729,7 @@
 /// Convert packed double-precision (64-bit) floating-point elements in `a` to
 /// packed 32-bit integers.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtpd2dq))]
 pub unsafe fn _mm_cvtpd_epi32(a: f64x2) -> i32x4 {
     cvtpd2dq(a)
@@ -1738,7 +1738,7 @@
 /// Convert the lower double-precision (64-bit) floating-point element in a to
 /// a 32-bit integer.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsd2si))]
 pub unsafe fn _mm_cvtsd_si32(a: f64x2) -> i32 {
     cvtsd2si(a)
@@ -1749,7 +1749,7 @@
 /// the lower element of the return value, and copy the upper element from `a`
 /// to the upper element the return value.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsd2ss))]
 pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 {
     cvtsd2ss(a, b)
@@ -1757,7 +1757,7 @@
 
 /// Return the lower double-precision (64-bit) floating-point element of "a".
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, windows), assert_instr(movsd))] // FIXME movq/movlps/mov on other platform
 pub unsafe fn _mm_cvtsd_f64(a: f64x2) -> f64 {
     a.extract(0)
@@ -1768,7 +1768,7 @@
 /// the lower element of the return value, and copy the upper element from `a`
 /// to the upper element the return value.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtss2sd))]
 pub unsafe fn _mm_cvtss_sd(a: f64x2, b: f32x4) -> f64x2 {
     cvtss2sd(a, b)
@@ -1777,7 +1777,7 @@
 /// Convert packed double-precision (64-bit) floating-point elements in `a` to
 /// packed 32-bit integers with truncation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvttpd2dq))]
 pub unsafe fn _mm_cvttpd_epi32(a: f64x2) -> i32x4 {
     cvttpd2dq(a)
@@ -1786,7 +1786,7 @@
 /// Convert the lower double-precision (64-bit) floating-point element in `a`
 /// to a 32-bit integer with truncation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvttsd2si))]
 pub unsafe fn _mm_cvttsd_si32(a: f64x2) -> i32 {
     cvttsd2si(a)
@@ -1795,7 +1795,7 @@
 /// Convert packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 32-bit integers with truncation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvttps2dq))]
 pub unsafe fn _mm_cvttps_epi32(a: f32x4) -> i32x4 {
     cvttps2dq(a)
@@ -1804,15 +1804,16 @@
 /// Copy double-precision (64-bit) floating-point element `a` to the lower
 /// element of the packed 64-bit return value.
 #[inline(always)]
-#[target_feature = "+sse2"]
+
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_set_sd(a: f64) -> f64x2 {
-    f64x2::new(a, 0_f64)
+    f64x2::new(a, 0f64)
 }
 
 /// Broadcast double-precision (64-bit) floating-point value a to all elements
 /// of the return value.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 {
     f64x2::new(a, a)
 }
@@ -1820,7 +1821,7 @@
 /// Broadcast double-precision (64-bit) floating-point value a to all elements
 /// of the return value.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 {
     f64x2::new(a, a)
 }
@@ -1828,7 +1829,7 @@
 /// Set packed double-precision (64-bit) floating-point elements in the return
 /// value with the supplied values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 {
     f64x2::new(b, a)
 }
@@ -1836,7 +1837,7 @@
 /// Set packed double-precision (64-bit) floating-point elements in the return
 /// value with the supplied values in reverse order.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 {
     f64x2::new(a, b)
 }
@@ -1844,7 +1845,7 @@
 /// Returns packed double-precision (64-bit) floating-point elements with all
 /// zeros.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
 pub unsafe fn _mm_setzero_pd() -> f64x2 {
     f64x2::splat(0_f64)
@@ -1855,7 +1856,7 @@
 /// The mask is stored in the 2 least significant bits of the return value.
 /// All other bits are set to `0`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movmskpd))]
 pub unsafe fn _mm_movemask_pd(a: f64x2) -> i32 {
     movmskpd(a)
@@ -1866,7 +1867,7 @@
 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
 /// exception may be generated.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
     *(mem_addr as *const f64x2)
@@ -1875,7 +1876,7 @@
 /// Loads a 64-bit double-precision value to the low element of a
 /// 128-bit integer vector and clears the upper element.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movsd))]
 pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> f64x2 {
     f64x2::new(*mem_addr, 0.)
@@ -1885,7 +1886,7 @@
 /// vector of [2 x double]. The low-order bits are copied from the low-order
 /// bits of the first operand.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movhpd))]
 pub unsafe fn _mm_loadh_pd(a: f64x2, mem_addr: *const f64) -> f64x2 {
     f64x2::new(a.extract(0), *mem_addr)
@@ -1895,7 +1896,7 @@
 /// vector of [2 x double]. The high-order bits are copied from the
 /// high-order bits of the first operand.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movlpd))]
 pub unsafe fn _mm_loadl_pd(a: f64x2, mem_addr: *const f64) -> f64x2 {
     f64x2::new(*mem_addr, a.extract(1))
@@ -1906,7 +1907,7 @@
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
 pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: f64x2) {
     ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
@@ -1915,7 +1916,7 @@
 /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
 /// memory location.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movsd only on windows
 pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: f64x2) {
     *mem_addr = a.extract(0)
@@ -1925,7 +1926,7 @@
 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
 /// on a 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movaps))]
 pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: f64x2) {
     *(mem_addr as *mut f64x2) = a;
@@ -1935,7 +1936,7 @@
 /// floating-point elements) from `a` into memory.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
 pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: f64x2) {
     storeupd(mem_addr as *mut i8, a);
@@ -1945,7 +1946,7 @@
 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
 /// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: f64x2) {
     let b: f64x2 = simd_shuffle2(a, a, [0, 0]);
     *(mem_addr as *mut f64x2) = b;
@@ -1955,7 +1956,7 @@
 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
 /// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: f64x2) {
     let b: f64x2 = simd_shuffle2(a, a, [0, 0]);
     *(mem_addr as *mut f64x2) = b;
@@ -1966,7 +1967,7 @@
 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
 /// exception may be generated.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: f64x2) {
     let b: f64x2 = simd_shuffle2(a, a, [1, 0]);
     *(mem_addr as *mut f64x2) = b;
@@ -1975,7 +1976,7 @@
 /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a
 /// memory location.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movhpd))]
 pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: f64x2) {
     *mem_addr = a.extract(1)
@@ -1984,7 +1985,7 @@
 /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
 /// memory location.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movlpd (movsd on windows)
 pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: f64x2) {
     *mem_addr = a.extract(0)
@@ -1993,7 +1994,7 @@
 /// Load a double-precision (64-bit) floating-point element from memory
 /// into both elements of returned vector.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 //#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected
 pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> f64x2 {
     let d = *mem_addr;
@@ -2003,7 +2004,7 @@
 /// Load a double-precision (64-bit) floating-point element from memory
 /// into both elements of returned vector.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 //#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected
 pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> f64x2 {
     let d = *mem_addr;
@@ -2014,7 +2015,7 @@
 /// the returned vector in reverse order. `mem_addr` must be aligned on a
 /// 16-byte boundary or a general-protection exception may be generated.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movapd))]
 pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> f64x2 {
     let a = _mm_load_pd(mem_addr);
@@ -2025,7 +2026,7 @@
 /// floating-point elements) from memory into the returned vector.
 /// `mem_addr` does not need to be aligned on any particular boundary.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))]
 pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> f64x2 {
     let mut dst = _mm_undefined_pd();
@@ -2041,7 +2042,7 @@
 /// 128-bit vector parameters of [2 x double], using the immediate-value
 /// parameter as a specifier.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(shufpd, imm8 = 1))]
 pub unsafe fn _mm_shuffle_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
     match imm8 & 0b11 {
@@ -2056,7 +2057,7 @@
 /// 64 bits are set to the lower 64 bits of the second parameter. The upper
 /// 64 bits are set to the upper 64 bits of the first parameter.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movsd))]
 pub unsafe fn _mm_move_sd(a: f64x2, b: f64x2) -> f64x2 {
     f64x2::new(b.extract(0), a.extract(1))
@@ -2065,7 +2066,7 @@
 /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
 /// floating-point vector of [4 x float].
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_castpd_ps(a: f64x2) -> f32x4 {
     mem::transmute(a)
 }
@@ -2073,7 +2074,7 @@
 /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
 /// integer vector.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_castpd_si128(a: f64x2) -> __m128i {
     simd_cast(a)
 }
@@ -2081,7 +2082,7 @@
 /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
 /// floating-point vector of [2 x double].
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_castps_pd(a: f32x4) -> f64x2 {
     mem::transmute(a)
 }
@@ -2089,7 +2090,7 @@
 /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit
 /// integer vector.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_castps_si128(a: f32x4) -> __m128i {
     mem::transmute(a)
 }
@@ -2097,7 +2098,7 @@
 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
 /// of [2 x double].
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_castsi128_pd(a: __m128i) -> f64x2 {
     simd_cast(a)
 }
@@ -2105,21 +2106,21 @@
 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
 /// of [4 x float].
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_castsi128_ps(a: __m128i) -> f32x4 {
     mem::transmute(a)
 }
 
 /// Return vector of type __m128d with undefined elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_undefined_pd() -> f64x2 {
     f64x2::splat(mem::uninitialized())
 }
 
 /// Return vector of type __m128i with undefined elements.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 pub unsafe fn _mm_undefined_si128() -> __m128i {
     mem::transmute(i32x4::splat(mem::uninitialized()))
 }
@@ -2130,7 +2131,7 @@
 /// * The [127:64] bits are copied from the [127:64] bits of the second input
 /// * The [63:0] bits are copied from the [127:64] bits of the first input
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(unpckhpd))]
 pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 {
     simd_shuffle2(a, b, [1, 3])
@@ -2142,7 +2143,7 @@
 /// * The [127:64] bits are copied from the [63:0] bits of the second input
 /// * The [63:0] bits are copied from the [63:0] bits of the first input
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(unpcklpd))]
 pub unsafe fn _mm_unpacklo_pd(a: f64x2, b: f64x2) -> f64x2 {
     simd_shuffle2(a, b, [0, 2])
diff --git a/coresimd/src/x86/i586/sse3.rs b/coresimd/src/x86/i586/sse3.rs
index c582bdb..1a4e63d 100644
--- a/coresimd/src/x86/i586/sse3.rs
+++ b/coresimd/src/x86/i586/sse3.rs
@@ -9,7 +9,7 @@
 /// Alternatively add and subtract packed single-precision (32-bit)
 /// floating-point elements in `a` to/from packed elements in `b`.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(addsubps))]
 pub unsafe fn _mm_addsub_ps(a: f32x4, b: f32x4) -> f32x4 {
     addsubps(a, b)
@@ -18,7 +18,7 @@
 /// Alternatively add and subtract packed double-precision (64-bit)
 /// floating-point elements in `a` to/from packed elements in `b`.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(addsubpd))]
 pub unsafe fn _mm_addsub_pd(a: f64x2, b: f64x2) -> f64x2 {
     addsubpd(a, b)
@@ -27,7 +27,7 @@
 /// Horizontally add adjacent pairs of double-precision (64-bit)
 /// floating-point elements in `a` and `b`, and pack the results.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(haddpd))]
 pub unsafe fn _mm_hadd_pd(a: f64x2, b: f64x2) -> f64x2 {
     haddpd(a, b)
@@ -36,7 +36,7 @@
 /// Horizontally add adjacent pairs of single-precision (32-bit)
 /// floating-point elements in `a` and `b`, and pack the results.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(haddps))]
 pub unsafe fn _mm_hadd_ps(a: f32x4, b: f32x4) -> f32x4 {
     haddps(a, b)
@@ -45,7 +45,7 @@
 /// Horizontally subtract adjacent pairs of double-precision (64-bit)
 /// floating-point elements in `a` and `b`, and pack the results.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(hsubpd))]
 pub unsafe fn _mm_hsub_pd(a: f64x2, b: f64x2) -> f64x2 {
     hsubpd(a, b)
@@ -54,7 +54,7 @@
 /// Horizontally add adjacent pairs of single-precision (32-bit)
 /// floating-point elements in `a` and `b`, and pack the results.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(hsubps))]
 pub unsafe fn _mm_hsub_ps(a: f32x4, b: f32x4) -> f32x4 {
     hsubps(a, b)
@@ -64,7 +64,7 @@
 /// This intrinsic may perform better than `_mm_loadu_si128`
 /// when the data crosses a cache line boundary.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(lddqu))]
 pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
     __m128i::from(lddqu(mem_addr as *const _))
@@ -73,7 +73,7 @@
 /// Duplicate the low double-precision (64-bit) floating-point element
 /// from `a`.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movddup))]
 pub unsafe fn _mm_movedup_pd(a: f64x2) -> f64x2 {
     simd_shuffle2(a, a, [0, 0])
@@ -82,7 +82,7 @@
 /// Load a double-precision (64-bit) floating-point element from memory
 /// into both elements of return vector.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movddup))]
 pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> f64x2 {
     use x86::i586::sse2::_mm_load1_pd;
@@ -92,7 +92,7 @@
 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements
 /// from `a`.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movshdup))]
 pub unsafe fn _mm_movehdup_ps(a: f32x4) -> f32x4 {
     simd_shuffle4(a, a, [1, 1, 3, 3])
@@ -101,7 +101,7 @@
 /// Duplicate even-indexed single-precision (32-bit) floating-point elements
 /// from `a`.
 #[inline(always)]
-#[target_feature = "+sse3"]
+#[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movsldup))]
 pub unsafe fn _mm_moveldup_ps(a: f32x4) -> f32x4 {
     simd_shuffle4(a, a, [0, 0, 2, 2])
diff --git a/coresimd/src/x86/i586/sse41.rs b/coresimd/src/x86/i586/sse41.rs
index 60f972f..4f3c20e 100644
--- a/coresimd/src/x86/i586/sse41.rs
+++ b/coresimd/src/x86/i586/sse41.rs
@@ -47,7 +47,7 @@
 /// If the high bit is set the element of `a` is selected. The element
 /// of `b` is selected otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pblendvb))]
 pub unsafe fn _mm_blendv_epi8(a: i8x16, b: i8x16, mask: i8x16) -> i8x16 {
     pblendvb(a, b, mask)
@@ -59,7 +59,7 @@
 /// corresponding element of `a`, and a set bit the corresponding
 /// element of `b`.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
 pub unsafe fn _mm_blend_epi16(a: i16x8, b: i16x8, imm8: i32) -> i16x8 {
     macro_rules! call {
@@ -71,7 +71,7 @@
 /// Blend packed double-precision (64-bit) floating-point elements from `a`
 /// and `b` using `mask`
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(blendvpd))]
 pub unsafe fn _mm_blendv_pd(a: f64x2, b: f64x2, mask: f64x2) -> f64x2 {
     blendvpd(a, b, mask)
@@ -80,7 +80,7 @@
 /// Blend packed single-precision (32-bit) floating-point elements from `a`
 /// and `b` using `mask`
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(blendvps))]
 pub unsafe fn _mm_blendv_ps(a: f32x4, b: f32x4, mask: f32x4) -> f32x4 {
     blendvps(a, b, mask)
@@ -89,7 +89,7 @@
 /// Blend packed double-precision (64-bit) floating-point elements from `a`
 /// and `b` using control mask `imm2`
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
 pub unsafe fn _mm_blend_pd(a: f64x2, b: f64x2, imm2: i32) -> f64x2 {
     macro_rules! call {
@@ -101,7 +101,7 @@
 /// Blend packed single-precision (32-bit) floating-point elements from `a`
 /// and `b` using mask `imm4`
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
 pub unsafe fn _mm_blend_ps(a: f32x4, b: f32x4, imm4: i32) -> f32x4 {
     macro_rules! call {
@@ -113,7 +113,7 @@
 /// Extract a single-precision (32-bit) floating-point element from `a`,
 /// selected with `imm8`
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 // TODO: Add test for Windows
 #[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8 = 0))]
 pub unsafe fn _mm_extract_ps(a: f32x4, imm8: i32) -> i32 {
@@ -125,7 +125,7 @@
 ///
 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
 pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: i32) -> i32 {
     let imm8 = (imm8 & 15) as u32;
@@ -134,7 +134,7 @@
 
 /// Extract an 32-bit integer from `a` selected with `imm8`
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 // TODO: Add test for Windows
 #[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8 = 1))]
 pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: i32) -> i32 {
@@ -165,7 +165,7 @@
 /// * Bits `[3:0]`: If any of these bits are set, the corresponding result
 /// element is cleared.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
 pub unsafe fn _mm_insert_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
     macro_rules! call {
@@ -177,7 +177,7 @@
 /// Return a copy of `a` with the 8-bit integer from `i` inserted at a
 /// location specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
 pub unsafe fn _mm_insert_epi8(a: i8x16, i: i8, imm8: i32) -> i8x16 {
     a.replace((imm8 & 0b1111) as u32, i)
@@ -186,7 +186,7 @@
 /// Return a copy of `a` with the 32-bit integer from `i` inserted at a
 /// location specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
 pub unsafe fn _mm_insert_epi32(a: i32x4, i: i32, imm8: i32) -> i32x4 {
     a.replace((imm8 & 0b11) as u32, i)
@@ -195,7 +195,7 @@
 /// Compare packed 8-bit integers in `a` and `b` and return packed maximum
 /// values in dst.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxsb))]
 pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
     pmaxsb(a, b)
@@ -204,7 +204,7 @@
 /// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
 /// maximum.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxuw))]
 pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
     pmaxuw(a, b)
@@ -213,7 +213,7 @@
 /// Compare packed 32-bit integers in `a` and `b`, and return packed maximum
 /// values.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxsd))]
 pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
     pmaxsd(a, b)
@@ -222,7 +222,7 @@
 /// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
 /// maximum values.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxud))]
 pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
     pmaxud(a, b)
@@ -231,7 +231,7 @@
 /// Compare packed 8-bit integers in `a` and `b` and return packed minimum
 /// values in dst.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminsb))]
 pub unsafe fn _mm_min_epi8(a: i8x16, b: i8x16) -> i8x16 {
     pminsb(a, b)
@@ -240,7 +240,7 @@
 /// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
 /// minimum.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminuw))]
 pub unsafe fn _mm_min_epu16(a: u16x8, b: u16x8) -> u16x8 {
     pminuw(a, b)
@@ -249,7 +249,7 @@
 /// Compare packed 32-bit integers in `a` and `b`, and return packed minimum
 /// values.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminsd))]
 pub unsafe fn _mm_min_epi32(a: i32x4, b: i32x4) -> i32x4 {
     pminsd(a, b)
@@ -258,7 +258,7 @@
 /// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
 /// minimum values.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminud))]
 pub unsafe fn _mm_min_epu32(a: u32x4, b: u32x4) -> u32x4 {
     pminud(a, b)
@@ -267,7 +267,7 @@
 /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(packusdw))]
 pub unsafe fn _mm_packus_epi32(a: i32x4, b: i32x4) -> u16x8 {
     packusdw(a, b)
@@ -275,7 +275,7 @@
 
 /// Compare packed 64-bit integers in `a` and `b` for equality
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pcmpeqq))]
 pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
     a.eq(b)
@@ -283,7 +283,7 @@
 
 /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxbw))]
 pub unsafe fn _mm_cvtepi8_epi16(a: i8x16) -> i16x8 {
     simd_shuffle8::<_, ::v64::i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]).as_i16x8()
@@ -291,7 +291,7 @@
 
 /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxbd))]
 pub unsafe fn _mm_cvtepi8_epi32(a: i8x16) -> i32x4 {
     simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -300,7 +300,7 @@
 /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
 /// 64-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxbq))]
 pub unsafe fn _mm_cvtepi8_epi64(a: i8x16) -> i64x2 {
     simd_shuffle2::<_, ::v16::i8x2>(a, a, [0, 1]).as_i64x2()
@@ -308,7 +308,7 @@
 
 /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxwd))]
 pub unsafe fn _mm_cvtepi16_epi32(a: i16x8) -> i32x4 {
     simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -316,7 +316,7 @@
 
 /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxwq))]
 pub unsafe fn _mm_cvtepi16_epi64(a: i16x8) -> i64x2 {
     simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]).as_i64x2()
@@ -324,7 +324,7 @@
 
 /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxdq))]
 pub unsafe fn _mm_cvtepi32_epi64(a: i32x4) -> i64x2 {
     simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]).as_i64x2()
@@ -332,7 +332,7 @@
 
 /// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxbw))]
 pub unsafe fn _mm_cvtepu8_epi16(a: u8x16) -> i16x8 {
     simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]).as_i16x8()
@@ -340,7 +340,7 @@
 
 /// Zero extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxbd))]
 pub unsafe fn _mm_cvtepu8_epi32(a: u8x16) -> i32x4 {
     simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -348,7 +348,7 @@
 
 /// Zero extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxbq))]
 pub unsafe fn _mm_cvtepu8_epi64(a: u8x16) -> i64x2 {
     simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]).as_i64x2()
@@ -357,7 +357,7 @@
 /// Zero extend packed unsigned 16-bit integers in `a`
 /// to packed 32-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxwd))]
 pub unsafe fn _mm_cvtepu16_epi32(a: u16x8) -> i32x4 {
     simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]).as_i32x4()
@@ -366,7 +366,7 @@
 /// Zero extend packed unsigned 16-bit integers in `a`
 /// to packed 64-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxwq))]
 pub unsafe fn _mm_cvtepu16_epi64(a: u16x8) -> i64x2 {
     simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]).as_i64x2()
@@ -375,7 +375,7 @@
 /// Zero extend packed unsigned 32-bit integers in `a`
 /// to packed 64-bit integers
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxdq))]
 pub unsafe fn _mm_cvtepu32_epi64(a: u32x4) -> i64x2 {
     simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]).as_i64x2()
@@ -389,7 +389,7 @@
 /// the dot product will be stored in the return value component. Otherwise if
 /// the broadcast mask bit is zero then the return component will be zero.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(dppd, imm8 = 0))]
 pub unsafe fn _mm_dp_pd(a: f64x2, b: f64x2, imm8: i32) -> f64x2 {
     macro_rules! call {
@@ -406,7 +406,7 @@
 /// the dot product will be stored in the return value component. Otherwise if
 /// the broadcast mask bit is zero then the return component will be zero.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(dpps, imm8 = 0))]
 pub unsafe fn _mm_dp_ps(a: f32x4, b: f32x4, imm8: i32) -> f32x4 {
     macro_rules! call {
@@ -419,7 +419,7 @@
 /// down to an integer value, and store the results as packed double-precision
 /// floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundpd))]
 pub unsafe fn _mm_floor_pd(a: f64x2) -> f64x2 {
     roundpd(a, _MM_FROUND_FLOOR)
@@ -429,7 +429,7 @@
 /// down to an integer value, and store the results as packed single-precision
 /// floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundps))]
 pub unsafe fn _mm_floor_ps(a: f32x4) -> f32x4 {
     roundps(a, _MM_FROUND_FLOOR)
@@ -441,7 +441,7 @@
 /// and copy the upper element from `a` to the upper element of the intrinsic
 /// result.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundsd))]
 pub unsafe fn _mm_floor_sd(a: f64x2, b: f64x2) -> f64x2 {
     roundsd(a, b, _MM_FROUND_FLOOR)
@@ -453,7 +453,7 @@
 /// and copy the upper 3 packed elements from `a` to the upper elements
 /// of the intrinsic result.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundss))]
 pub unsafe fn _mm_floor_ss(a: f32x4, b: f32x4) -> f32x4 {
     roundss(a, b, _MM_FROUND_FLOOR)
@@ -463,7 +463,7 @@
 /// up to an integer value, and store the results as packed double-precision
 /// floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundpd))]
 pub unsafe fn _mm_ceil_pd(a: f64x2) -> f64x2 {
     roundpd(a, _MM_FROUND_CEIL)
@@ -473,7 +473,7 @@
 /// up to an integer value, and store the results as packed single-precision
 /// floating-point elements.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundps))]
 pub unsafe fn _mm_ceil_ps(a: f32x4) -> f32x4 {
     roundps(a, _MM_FROUND_CEIL)
@@ -485,7 +485,7 @@
 /// and copy the upper element from `a` to the upper element
 /// of the intrinsic result.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundsd))]
 pub unsafe fn _mm_ceil_sd(a: f64x2, b: f64x2) -> f64x2 {
     roundsd(a, b, _MM_FROUND_CEIL)
@@ -497,7 +497,7 @@
 /// and copy the upper 3 packed elements from `a` to the upper elements
 /// of the intrinsic result.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundss))]
 pub unsafe fn _mm_ceil_ss(a: f32x4, b: f32x4) -> f32x4 {
     roundss(a, b, _MM_FROUND_CEIL)
@@ -523,7 +523,7 @@
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundpd, rounding = 0))]
 pub unsafe fn _mm_round_pd(a: f64x2, rounding: i32) -> f64x2 {
     macro_rules! call {
@@ -552,7 +552,7 @@
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundps, rounding = 0))]
 pub unsafe fn _mm_round_ps(a: f32x4, rounding: i32) -> f32x4 {
     macro_rules! call {
@@ -583,7 +583,7 @@
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundsd, rounding = 0))]
 pub unsafe fn _mm_round_sd(a: f64x2, b: f64x2, rounding: i32) -> f64x2 {
     macro_rules! call {
@@ -614,7 +614,7 @@
 /// vendor::_MM_FROUND_CUR_DIRECTION;
 /// ```
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundss, rounding = 0))]
 pub unsafe fn _mm_round_ss(a: f32x4, b: f32x4, rounding: i32) -> f32x4 {
     macro_rules! call {
@@ -643,7 +643,7 @@
 /// * bits `[18:16]` - contain the index of the minimum value
 /// * remaining bits are set to `0`.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(phminposuw))]
 pub unsafe fn _mm_minpos_epu16(a: u16x8) -> u16x8 {
     phminposuw(a)
@@ -652,7 +652,7 @@
 /// Multiply the low 32-bit integers from each packed 64-bit
 /// element in `a` and `b`, and return the signed 64-bit result.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmuldq))]
 pub unsafe fn _mm_mul_epi32(a: i32x4, b: i32x4) -> i64x2 {
     pmuldq(a, b)
@@ -665,7 +665,7 @@
 /// arithmetic `pmulld i32x4::splat(i32::MAX), i32x4::splat(2)` would return a
 /// negative number.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmulld))]
 pub unsafe fn _mm_mullo_epi32(a: i32x4, b: i32x4) -> i32x4 {
     a * b
@@ -703,7 +703,7 @@
 /// * A `i16x8` vector containing the sums of the sets of
 ///   absolute differences between both operands.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))]
 pub unsafe fn _mm_mpsadbw_epu8(a: u8x16, b: u8x16, imm8: i32) -> u16x8 {
     macro_rules! call {
diff --git a/coresimd/src/x86/i586/sse42.rs b/coresimd/src/x86/i586/sse42.rs
index 2e9b7de..ff1bf2c 100644
--- a/coresimd/src/x86/i586/sse42.rs
+++ b/coresimd/src/x86/i586/sse42.rs
@@ -48,7 +48,7 @@
 /// Compare packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and return the generated mask.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
 pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
     macro_rules! call {
@@ -99,8 +99,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("sse4.2") {
-/// #         #[target_feature = "+sse4.2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "sse4.2")]
+/// #         unsafe fn worker() {
 ///
 /// use stdsimd::simd::u8x16;
 /// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ORDERED};
@@ -126,7 +126,7 @@
 /// }
 /// assert_eq!(indexes, vec![34]);
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
@@ -142,8 +142,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("sse4.2") {
-/// #         #[target_feature = "+sse4.2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "sse4.2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::u8x16;
 /// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ANY};
 ///
@@ -167,7 +167,7 @@
 ///     println!("Your password should contain a special character");
 /// }
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
@@ -183,8 +183,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("sse4.2") {
-/// #         #[target_feature = "+sse4.2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "sse4.2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::u8x16;
 /// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_RANGES};
 /// # let b = u8x16::load(b":;<=>?@[\\]^_`abc", 0);
@@ -207,7 +207,7 @@
 ///     println!("Did not find an alpha numeric character");
 /// }
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
@@ -222,8 +222,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("sse4.2") {
-/// #         #[target_feature = "+sse4.2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "sse4.2")]
+/// #         unsafe fn worker() {
 /// use stdsimd::simd::u16x8;
 /// use stdsimd::vendor::{_mm_cmpistri};
 /// use stdsimd::vendor::{_SIDD_UWORD_OPS, _SIDD_CMP_EQUAL_EACH};
@@ -249,7 +249,7 @@
 ///     println!("16-bit unicode strings were not equal!");
 /// }
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
@@ -268,7 +268,7 @@
 /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
 /// [`_mm_cmpestri`]: fn._mm_cmpestri.html
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
 pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     macro_rules! call {
@@ -281,7 +281,7 @@
 /// control in `imm8`, and return `1` if any character in `b` was null.
 /// and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
 pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     macro_rules! call {
@@ -296,7 +296,7 @@
 /// control in `imm8`, and return `1` if the resulting mask was non-zero,
 /// and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
 pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     macro_rules! call {
@@ -309,7 +309,7 @@
 /// control in `imm8`, and returns `1` if any character in `a` was null,
 /// and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
 pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     macro_rules! call {
@@ -321,7 +321,7 @@
 /// Compare packed strings with implicit lengths in `a` and `b` using the
 /// control in `imm8`, and return bit `0` of the resulting bit mask.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
 pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     macro_rules! call {
@@ -334,7 +334,7 @@
 /// control in `imm8`, and return `1` if `b` did not contain a null
 /// character and the resulting mask was zero, and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
 pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
     macro_rules! call {
@@ -346,7 +346,7 @@
 /// Compare packed strings in `a` and `b` with lengths `la` and `lb`
 /// using the control in `imm8`, and return the generated mask.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
 pub unsafe fn _mm_cmpestrm(
     a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -399,8 +399,8 @@
 /// #
 /// # fn main() {
 /// #     if cfg_feature_enabled!("sse4.2") {
-/// #         #[target_feature = "+sse4.2"]
-/// #         fn worker() {
+/// #         #[target_feature(enable = "sse4.2")]
+/// #         unsafe fn worker() {
 ///
 /// use stdsimd::simd::u8x16;
 /// use stdsimd::vendor::{_mm_cmpestri, _SIDD_CMP_EQUAL_ORDERED};
@@ -423,7 +423,7 @@
 ///
 /// assert_eq!(idx, 6);
 /// #         }
-/// #         worker();
+/// #         unsafe { worker(); }
 /// #     }
 /// # }
 /// ```
@@ -442,7 +442,7 @@
 /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
 /// [`_mm_cmpistri`]: fn._mm_cmpistri.html
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestri(
     a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -457,7 +457,7 @@
 /// using the control in `imm8`, and return `1` if any character in
 /// `b` was null, and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrz(
     a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -472,7 +472,7 @@
 /// using the control in `imm8`, and return `1` if the resulting mask
 /// was non-zero, and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrc(
     a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -487,7 +487,7 @@
 /// using the control in `imm8`, and return `1` if any character in
 /// a was null, and `0` otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestrs(
     a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -502,7 +502,7 @@
 /// using the control in `imm8`, and return bit `0` of the resulting
 /// bit mask.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestro(
     a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -518,7 +518,7 @@
 /// contain a null character and the resulting mask was zero, and `0`
 /// otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
 pub unsafe fn _mm_cmpestra(
     a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
@@ -532,7 +532,7 @@
 /// Starting with the initial value in `crc`, return the accumulated
 /// CRC32 value for unsigned 8-bit integer `v`.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(crc32))]
 pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
     crc32_32_8(crc, v)
@@ -541,7 +541,7 @@
 /// Starting with the initial value in `crc`, return the accumulated
 /// CRC32 value for unsigned 16-bit integer `v`.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(crc32))]
 pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
     crc32_32_16(crc, v)
@@ -550,7 +550,7 @@
 /// Starting with the initial value in `crc`, return the accumulated
 /// CRC32 value for unsigned 32-bit integer `v`.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(crc32))]
 pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
     crc32_32_32(crc, v)
@@ -609,7 +609,7 @@
     // a bit difficult. Rather than `load` and mutate the __m128i,
     // it is easier to memcpy the given string to a local slice with
     // length 16 and `load` the local slice.
-    #[target_feature = "+sse4.2"]
+    #[target_feature(enable = "sse4.2")]
     unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
         assert!(s.len() <= 16);
         let slice = &mut [0u8; 16];
diff --git a/coresimd/src/x86/i586/ssse3.rs b/coresimd/src/x86/i586/ssse3.rs
index 57c0ad6..96c7973 100644
--- a/coresimd/src/x86/i586/ssse3.rs
+++ b/coresimd/src/x86/i586/ssse3.rs
@@ -9,7 +9,7 @@
 /// Compute the absolute value of packed 8-bit signed integers in `a` and
 /// return the unsigned results.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsb))]
 pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
     pabsb128(a)
@@ -19,7 +19,7 @@
 /// `a` and
 /// return the 16-bit unsigned integer
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsw))]
 pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
     pabsw128(a)
@@ -29,7 +29,7 @@
 /// `a` and
 /// return the 32-bit unsigned integer
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsd))]
 pub unsafe fn _mm_abs_epi32(a: i32x4) -> u32x4 {
     pabsd128(a)
@@ -60,7 +60,7 @@
 /// }
 /// ```
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pshufb))]
 pub unsafe fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
     pshufb128(a, b)
@@ -69,7 +69,7 @@
 /// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
 /// shift the result right by `n` bytes, and return the low 16 bytes.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(palignr, n = 15))]
 pub unsafe fn _mm_alignr_epi8(a: i8x16, b: i8x16, n: i32) -> i8x16 {
     let n = n as u32;
@@ -124,7 +124,7 @@
 /// Horizontally add the adjacent pairs of values contained in 2 packed
 /// 128-bit vectors of [8 x i16].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddw))]
 pub unsafe fn _mm_hadd_epi16(a: i16x8, b: i16x8) -> i16x8 {
     phaddw128(a, b)
@@ -134,7 +134,7 @@
 /// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddsw))]
 pub unsafe fn _mm_hadds_epi16(a: i16x8, b: i16x8) -> i16x8 {
     phaddsw128(a, b)
@@ -143,7 +143,7 @@
 /// Horizontally add the adjacent pairs of values contained in 2 packed
 /// 128-bit vectors of [4 x i32].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddd))]
 pub unsafe fn _mm_hadd_epi32(a: i32x4, b: i32x4) -> i32x4 {
     phaddd128(a, b)
@@ -152,7 +152,7 @@
 /// Horizontally subtract the adjacent pairs of values contained in 2
 /// packed 128-bit vectors of [8 x i16].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubw))]
 pub unsafe fn _mm_hsub_epi16(a: i16x8, b: i16x8) -> i16x8 {
     phsubw128(a, b)
@@ -163,7 +163,7 @@
 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
 /// saturated to 8000h.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubsw))]
 pub unsafe fn _mm_hsubs_epi16(a: i16x8, b: i16x8) -> i16x8 {
     phsubsw128(a, b)
@@ -172,7 +172,7 @@
 /// Horizontally subtract the adjacent pairs of values contained in 2
 /// packed 128-bit vectors of [4 x i32].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubd))]
 pub unsafe fn _mm_hsub_epi32(a: i32x4, b: i32x4) -> i32x4 {
     phsubd128(a, b)
@@ -184,7 +184,7 @@
 /// contiguous products with signed saturation, and writes the 16-bit sums to
 /// the corresponding bits in the destination.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pmaddubsw))]
 pub unsafe fn _mm_maddubs_epi16(a: u8x16, b: i8x16) -> i16x8 {
     pmaddubsw128(a, b)
@@ -194,7 +194,7 @@
 /// product to the 18 most significant bits by right-shifting, round the
 /// truncated value by adding 1, and write bits [16:1] to the destination.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pmulhrsw))]
 pub unsafe fn _mm_mulhrs_epi16(a: i16x8, b: i16x8) -> i16x8 {
     pmulhrsw128(a, b)
@@ -205,7 +205,7 @@
 /// Elements in result are zeroed out when the corresponding element in `b`
 /// is zero.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(psignb))]
 pub unsafe fn _mm_sign_epi8(a: i8x16, b: i8x16) -> i8x16 {
     psignb128(a, b)
@@ -216,7 +216,7 @@
 /// Elements in result are zeroed out when the corresponding element in `b`
 /// is zero.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(psignw))]
 pub unsafe fn _mm_sign_epi16(a: i16x8, b: i16x8) -> i16x8 {
     psignw128(a, b)
@@ -227,7 +227,7 @@
 /// Element in result are zeroed out when the corresponding element in `b`
 /// is zero.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(psignd))]
 pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
     psignd128(a, b)
diff --git a/coresimd/src/x86/i586/tbm.rs b/coresimd/src/x86/i586/tbm.rs
index 38c044c..3001967 100644
--- a/coresimd/src/x86/i586/tbm.rs
+++ b/coresimd/src/x86/i586/tbm.rs
@@ -28,7 +28,7 @@
 /// Extracts bits in range [`start`, `start` + `length`) from `a` into
 /// the least significant bits of the result.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
     _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32))
 }
@@ -36,7 +36,7 @@
 /// Extracts bits in range [`start`, `start` + `length`) from `a` into
 /// the least significant bits of the result.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 {
     _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64))
 }
@@ -47,7 +47,7 @@
 /// Bits [7,0] of `control` specify the index to the first bit in the range to
 /// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
     unsafe { x86_tbm_bextri_u32(a, control) }
 }
@@ -58,7 +58,7 @@
 /// Bits [7,0] of `control` specify the index to the first bit in the range to
 /// be extracted, and bits [15,8] specify the length of the range.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
     unsafe { x86_tbm_bextri_u64(a, control) }
 }
@@ -68,7 +68,7 @@
 ///
 /// If there is no zero bit in `x`, it returns zero.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcfill))]
 pub unsafe fn _blcfill_u32(x: u32) -> u32 {
     x & (x.wrapping_add(1))
@@ -78,7 +78,7 @@
 ///
 /// If there is no zero bit in `x`, it returns zero.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcfill))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blcfill_u64(x: u64) -> u64 {
@@ -89,7 +89,7 @@
 ///
 /// If there is no zero bit in `x`, it sets all bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blci))]
 pub unsafe fn _blci_u32(x: u32) -> u32 {
     x | !(x.wrapping_add(1))
@@ -99,7 +99,7 @@
 ///
 /// If there is no zero bit in `x`, it sets all bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blci))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blci_u64(x: u64) -> u64 {
@@ -110,7 +110,7 @@
 ///
 /// If there is no zero bit in `x`, it returns zero.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcic))]
 pub unsafe fn _blcic_u32(x: u32) -> u32 {
     !x & (x.wrapping_add(1))
@@ -120,7 +120,7 @@
 ///
 /// If there is no zero bit in `x`, it returns zero.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcic))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blcic_u64(x: u64) -> u64 {
@@ -132,7 +132,7 @@
 ///
 /// If there is no zero bit in `x`, it sets all the bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcmsk))]
 pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
     x ^ (x.wrapping_add(1))
@@ -143,7 +143,7 @@
 ///
 /// If there is no zero bit in `x`, it sets all the bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcmsk))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
@@ -154,7 +154,7 @@
 ///
 /// If there is no zero bit in `x`, it returns `x`.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcs))]
 pub unsafe fn _blcs_u32(x: u32) -> u32 {
     x | (x.wrapping_add(1))
@@ -164,7 +164,7 @@
 ///
 /// If there is no zero bit in `x`, it returns `x`.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcs))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blcs_u64(x: u64) -> u64 {
@@ -175,7 +175,7 @@
 ///
 /// If there is no set bit in `x`, it sets all the bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsfill))]
 pub unsafe fn _blsfill_u32(x: u32) -> u32 {
     x | (x.wrapping_sub(1))
@@ -185,7 +185,7 @@
 ///
 /// If there is no set bit in `x`, it sets all the bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsfill))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blsfill_u64(x: u64) -> u64 {
@@ -196,7 +196,7 @@
 ///
 /// If there is no set bit in `x`, it sets all the bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsic))]
 pub unsafe fn _blsic_u32(x: u32) -> u32 {
     !x | (x.wrapping_sub(1))
@@ -206,7 +206,7 @@
 ///
 /// If there is no set bit in `x`, it sets all the bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsic))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _blsic_u64(x: u64) -> u64 {
@@ -218,7 +218,7 @@
 ///
 /// If the least significant bit of `x` is 0, it sets all bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(t1mskc))]
 pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
     !x | (x.wrapping_add(1))
@@ -229,7 +229,7 @@
 ///
 /// If the least significant bit of `x` is 0, it sets all bits.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(t1mskc))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
@@ -241,7 +241,7 @@
 ///
 /// If the least significant bit of `x` is 1, it returns zero.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(tzmsk))]
 pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
     !x & (x.wrapping_sub(1))
@@ -252,7 +252,7 @@
 ///
 /// If the least significant bit of `x` is 1, it returns zero.
 #[inline(always)]
-#[target_feature = "+tbm"]
+#[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(tzmsk))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
diff --git a/coresimd/src/x86/i586/xsave.rs b/coresimd/src/x86/i586/xsave.rs
index 15e1f8b..9a7611a 100644
--- a/coresimd/src/x86/i586/xsave.rs
+++ b/coresimd/src/x86/i586/xsave.rs
@@ -34,7 +34,7 @@
 /// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
 /// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
 #[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
 #[cfg_attr(test, assert_instr(xsave))]
 pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
     xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -47,7 +47,7 @@
 /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
 /// boundary.
 #[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
 #[cfg_attr(test, assert_instr(xrstor))]
 pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
     xrstor(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
@@ -63,7 +63,7 @@
 ///
 /// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported.
 #[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
 #[cfg_attr(test, assert_instr(xsetbv))]
 pub unsafe fn _xsetbv(a: u32, val: u64) {
     xsetbv(a, (val >> 32) as u32, val as u32);
@@ -72,7 +72,7 @@
 /// Reads the contents of the extended control register `XCR`
 /// specified in `xcr_no`.
 #[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
 #[cfg_attr(test, assert_instr(xgetbv))]
 pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
     xgetbv(xcr_no) as u64
@@ -86,7 +86,7 @@
 /// the manner in which data is saved. The performance of this instruction will
 /// be equal to or better than using the `XSAVE` instruction.
 #[inline(always)]
-#[target_feature = "+xsave,+xsaveopt"]
+#[target_feature(enable = "xsave,xsaveopt")]
 #[cfg_attr(test, assert_instr(xsaveopt))]
 pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
     xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -99,7 +99,7 @@
 /// use init optimization. State is saved based on bits [62:0] in `save_mask`
 /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
 #[inline(always)]
-#[target_feature = "+xsave,+xsavec"]
+#[target_feature(enable = "xsave,xsavec")]
 #[cfg_attr(test, assert_instr(xsavec))]
 pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
     xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -113,7 +113,7 @@
 /// modified optimization. State is saved based on bits [62:0] in `save_mask`
 /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
 #[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
 #[cfg_attr(test, assert_instr(xsaves))]
 pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
     xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -129,7 +129,7 @@
 /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
 /// boundary.
 #[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
 #[cfg_attr(test, assert_instr(xrstors))]
 pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {
     xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs
index acf43dc..08d7770 100644
--- a/coresimd/src/x86/i686/mmx.rs
+++ b/coresimd/src/x86/i686/mmx.rs
@@ -16,7 +16,7 @@
 
 /// Constructs a 64-bit integer vector initialized to zero.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 // FIXME: this produces a movl instead of xorps on x86
 // FIXME: this produces a xor intrinsic instead of xorps on x86_64
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
@@ -26,7 +26,7 @@
 
 /// Add packed 8-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddb))]
 pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
     paddb(a, b)
@@ -34,7 +34,7 @@
 
 /// Add packed 16-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddw))]
 pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
     paddw(a, b)
@@ -42,7 +42,7 @@
 
 /// Add packed 32-bit integers in `a` and `b`.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddd))]
 pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
     paddd(a, b)
@@ -50,7 +50,7 @@
 
 /// Add packed 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsb))]
 pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
     paddsb(a, b)
@@ -58,7 +58,7 @@
 
 /// Add packed 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddsw))]
 pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
     paddsw(a, b)
@@ -66,7 +66,7 @@
 
 /// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusb))]
 pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
     paddusb(a, b)
@@ -74,117 +74,12 @@
 
 /// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
 #[inline(always)]
-#[target_feature = "+mmx"]
+#[target_feature(enable = "mmx")]
 #[cfg_attr(test, assert_instr(paddusw))]
 pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
     paddusw(a, b)
 }
 
-/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
-/// using signed saturation.
-///
-/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
-/// less than 0x80 are saturated to 0x80.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(packsswb))]
-pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
-    packsswb(a, b)
-}
-
-/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
-/// using signed saturation.
-///
-/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
-/// less than 0x80 are saturated to 0x80.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(packssdw))]
-pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
-    packssdw(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(pcmpgtb))]
-pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
-    pcmpgtb(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(pcmpgtw))]
-pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
-    pcmpgtw(a, b)
-}
-
-/// Compares whether each element of `a` is greater than the corresponding
-/// element of `b` returning `0` for `false` and `-1` for `true`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(pcmpgtd))]
-pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
-    pcmpgtd(a, b)
-}
-
-/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
-/// them into the result: `[a.2, b.2, a.3, b.3]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
-pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
-    punpckhwd(a, b)
-}
-
-/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
-/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckhbw))]
-pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
-    punpckhbw(a, b)
-}
-
-/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
-/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpcklbw))]
-pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
-    punpcklbw(a, b)
-}
-
-/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
-/// them into the result: `[a.0 b.0 a.1 b.1]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpcklwd))]
-pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
-    punpcklwd(a, b)
-}
-
-/// Unpacks the upper element from two `i32x2` vectors and interleaves them
-/// into the result: `[a.1, b.1]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckhdq))]
-pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
-    punpckhdq(a, b)
-}
-
-/// Unpacks the lower element from two `i32x2` vectors and interleaves them
-/// into the result: `[a.0, b.0]`.
-#[inline(always)]
-#[target_feature = "+mmx"]
-#[cfg_attr(test, assert_instr(punpckldq))]
-pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
-    punpckldq(a, b)
-}
-
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.mmx.padd.b"]
@@ -305,98 +200,4 @@
         let e = u16x4::new(0, 11, 22, u16::max_value());
         assert_eq!(r, e);
     }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_packs_pi16() {
-        let a = i16x4::new(-1, 2, -3, 4);
-        let b = i16x4::new(-5, 6, -7, 8);
-        let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
-        assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into())));
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_packs_pi32() {
-        let a = i32x2::new(-1, 2);
-        let b = i32x2::new(-5, 6);
-        let r = i16x4::new(-1, 2, -5, 6);
-        assert_eq!(r, i16x4::from(mmx::_mm_packs_pi32(a.into(), b.into())));
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_cmpgt_pi8() {
-        let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
-        let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
-        assert_eq!(r, i8x8::from(mmx::_mm_cmpgt_pi8(a.into(), b.into())));
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_cmpgt_pi16() {
-        let a = i16x4::new(0, 1, 2, 3);
-        let b = i16x4::new(4, 3, 2, 1);
-        let r = i16x4::new(0, 0, 0, -1);
-        assert_eq!(r, i16x4::from(mmx::_mm_cmpgt_pi16(a.into(), b.into())));
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_cmpgt_pi32() {
-        let a = i32x2::new(0, 3);
-        let b = i32x2::new(1, 2);
-        let r0 = i32x2::new(0, -1);
-        let r1 = i32x2::new(-1, 0);
-
-        assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into());
-        assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into());
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_unpackhi_pi8() {
-        let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
-        let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14);
-        let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14);
-
-        assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into());
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_unpacklo_pi8() {
-        let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
-        assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_unpackhi_pi16() {
-        let a = i16x4::new(0, 1, 2, 3);
-        let b = i16x4::new(4, 5, 6, 7);
-        let r = i16x4::new(2, 6, 3, 7);
-        assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_unpacklo_pi16() {
-        let a = i16x4::new(0, 1, 2, 3);
-        let b = i16x4::new(4, 5, 6, 7);
-        let r = i16x4::new(0, 4, 1, 5);
-        assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into())));
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_unpackhi_pi32() {
-        let a = i32x2::new(0, 3);
-        let b = i32x2::new(1, 2);
-        let r = i32x2::new(3, 2);
-
-        assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into());
-    }
-
-    #[simd_test = "mmx"]
-    unsafe fn _mm_unpacklo_pi32() {
-        let a = i32x2::new(0, 3);
-        let b = i32x2::new(1, 2);
-        let r = i32x2::new(0, 1);
-
-        assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into());
-    }
 }
diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs
index e91db45..706acf9 100644
--- a/coresimd/src/x86/i686/sse.rs
+++ b/coresimd/src/x86/i686/sse.rs
@@ -4,7 +4,6 @@
 use v64::*;
 use core::mem;
 use x86::i586;
-use x86::i686::mmx;
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -48,7 +47,7 @@
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmaxsw))]
 pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
     pmaxsw(a, b)
@@ -57,7 +56,7 @@
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmaxsw))]
 pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
     _mm_max_pi16(a, b)
@@ -66,7 +65,7 @@
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmaxub))]
 pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
     pmaxub(a, b)
@@ -75,7 +74,7 @@
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmaxub))]
 pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
     _mm_max_pu8(a, b)
@@ -84,7 +83,7 @@
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pminsw))]
 pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
     pminsw(a, b)
@@ -93,7 +92,7 @@
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pminsw))]
 pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
     _mm_min_pi16(a, b)
@@ -102,7 +101,7 @@
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pminub))]
 pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
     pminub(a, b)
@@ -111,7 +110,7 @@
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pminub))]
 pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
     _mm_min_pu8(a, b)
@@ -121,7 +120,7 @@
 /// high-order 16 bits of each 32-bit product to the corresponding bits in
 /// the destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmulhuw))]
 pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
     pmulhuw(a, b)
@@ -131,7 +130,7 @@
 /// high-order 16 bits of each 32-bit product to the corresponding bits in
 /// the destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmulhuw))]
 pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
     _mm_mulhi_pu16(a, b)
@@ -141,7 +140,7 @@
 /// values and writes the averages to the corresponding bits in the
 /// destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pavgb))]
 pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
     pavgb(a, b)
@@ -151,7 +150,7 @@
 /// values and writes the averages to the corresponding bits in the
 /// destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pavgb))]
 pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
     _mm_avg_pu8(a, b)
@@ -161,7 +160,7 @@
 /// values and writes the averages to the corresponding bits in the
 /// destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pavgw))]
 pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
     pavgw(a, b)
@@ -171,7 +170,7 @@
 /// values and writes the averages to the corresponding bits in the
 /// destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pavgw))]
 pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
     _mm_avg_pu16(a, b)
@@ -182,7 +181,7 @@
 /// difference. Then sum of the 8 absolute differences is written to the
 /// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(psadbw))]
 pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
     psadbw(a, b)
@@ -193,7 +192,7 @@
 /// difference. Then sum of the 8 absolute differences is written to the
 /// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(psadbw))]
 pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
     _mm_sad_pu8(a, b)
@@ -204,7 +203,7 @@
 /// destination. The remaining higher order elements of the destination are
 /// copied from the corresponding elements in the first operand.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi32_ps(a: f32x4, b: i32x2) -> f32x4 {
     cvtpi2ps(a, mem::transmute(b))
@@ -215,66 +214,16 @@
 /// destination. The remaining higher order elements of the destination are
 /// copied from the corresponding elements in the first operand.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
     _mm_cvtpi32_ps(a, b)
 }
 
-/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
-    let b = mmx::_mm_setzero_si64();
-    let b = mmx::_mm_cmpgt_pi8(b, a);
-    let b = mmx::_mm_unpacklo_pi8(a, b);
-    _mm_cvtpi16_ps(b)
-}
-
-/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
-    let b = mmx::_mm_setzero_si64();
-    let b = mmx::_mm_unpacklo_pi8(a, b);
-    _mm_cvtpi16_ps(b)
-}
-
-/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
-    let b = mmx::_mm_setzero_si64();
-    let b = mmx::_mm_cmpgt_pi16(b, a);
-    let c = mmx::_mm_unpackhi_pi16(a, b);
-    let r = i586::_mm_setzero_ps();
-    let r = cvtpi2ps(r, c);
-    let r = i586::_mm_movelh_ps(r, r);
-    let c = mmx::_mm_unpacklo_pi16(a, b);
-    cvtpi2ps(r, c)
-}
-
-/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtpi2ps))]
-pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
-    let b = mmx::_mm_setzero_si64();
-    let c = mmx::_mm_unpackhi_pi16(a, b);
-    let r = i586::_mm_setzero_ps();
-    let r = cvtpi2ps(r, c);
-    let r = i586::_mm_movelh_ps(r, r);
-    let c = mmx::_mm_unpacklo_pi16(a, b);
-    cvtpi2ps(r, c)
-}
-
 /// Converts the two 32-bit signed integer values from each 64-bit vector
 /// operand of [2 x i32] into a 128-bit vector of [4 x float].
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> f32x4 {
     let c = i586::_mm_setzero_ps();
@@ -291,7 +240,7 @@
 /// To minimize caching, the data is flagged as non-temporal
 /// (unlikely to be used again soon).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(maskmovq))]
 pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
     maskmovq(a, mask, mem_addr)
@@ -305,7 +254,7 @@
 /// To minimize caching, the data is flagged as non-temporal
 /// (unlikely to be used again soon).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(maskmovq))]
 pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
     _mm_maskmove_si64(a, mask, mem_addr)
@@ -314,7 +263,7 @@
 /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
 /// returns it, as specified by the immediate integer operand.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
 pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 {
     macro_rules! call {
@@ -326,7 +275,7 @@
 /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
 /// returns it, as specified by the immediate integer operand.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
 pub unsafe fn _m_pextrw(a: i16x4, imm2: i32) -> i16 {
     _mm_extract_pi16(a, imm2)
@@ -336,7 +285,7 @@
 /// and inserts the lower 16-bits of an integer operand at the 16-bit offset
 /// specified by the immediate operand `n`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
 pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
     macro_rules! call {
@@ -349,7 +298,7 @@
 /// and inserts the lower 16-bits of an integer operand at the 16-bit offset
 /// specified by the immediate operand `n`.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
 pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
     _mm_insert_pi16(a, d, imm2)
@@ -359,7 +308,7 @@
 /// integer vector to create a 16-bit mask value. Zero-extends the value to
 /// 32-bit integer and writes it to the destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmovmskb))]
 pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 {
     pmovmskb(mem::transmute(a))
@@ -369,7 +318,7 @@
 /// integer vector to create a 16-bit mask value. Zero-extends the value to
 /// 32-bit integer and writes it to the destination.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pmovmskb))]
 pub unsafe fn _m_pmovmskb(a: i16x4) -> i32 {
     _mm_movemask_pi8(a)
@@ -378,7 +327,7 @@
 /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
 /// destination, as specified by the immediate value operand.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
 pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
     macro_rules! call {
@@ -390,7 +339,7 @@
 /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
 /// destination, as specified by the immediate value operand.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
 pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
     _mm_shuffle_pi16(a, imm8)
@@ -399,7 +348,7 @@
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers with truncation.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvttps2pi))]
 pub unsafe fn _mm_cvttps_pi32(a: f32x4) -> i32x2 {
     mem::transmute(cvttps2pi(a))
@@ -408,7 +357,7 @@
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers with truncation.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvttps2pi))]
 pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 {
     _mm_cvttps_pi32(a)
@@ -417,7 +366,7 @@
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtps2pi))]
 pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> __m64 {
     cvtps2pi(a)
@@ -426,36 +375,12 @@
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers.
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtps2pi))]
 pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> __m64 {
     _mm_cvtps_pi32(a)
 }
 
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
-/// packed 16-bit integers.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> __m64 {
-    let b = _mm_cvtps_pi32(a);
-    let a = i586::_mm_movehl_ps(a, a);
-    let c = _mm_cvtps_pi32(a);
-    mmx::_mm_packs_pi32(b, c)
-}
-
-/// Convert packed single-precision (32-bit) floating-point elements in `a` to
-/// packed 8-bit integers, and returns theem in the lower 4 elements of the
-/// result.
-#[inline(always)]
-#[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(cvtps2pi))]
-pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> __m64 {
-    let b = _mm_cvtps_pi16(a);
-    let c = mmx::_mm_setzero_si64();
-    mmx::_mm_packs_pi16(b, c)
-}
-
 #[cfg(test)]
 mod tests {
     use std::mem;
@@ -563,38 +488,6 @@
     }
 
     #[simd_test = "sse"]
-    unsafe fn _mm_cvtpi16_ps() {
-        let a = i16x4::new(1, 2, 3, 4);
-        let expected = f32x4::new(1., 2., 3., 4.);
-        let r = sse::_mm_cvtpi16_ps(a.into());
-        assert_eq!(r, expected);
-    }
-
-    #[simd_test = "sse"]
-    unsafe fn _mm_cvtpu16_ps() {
-        let a = u16x4::new(1, 2, 3, 4);
-        let expected = f32x4::new(1., 2., 3., 4.);
-        let r = sse::_mm_cvtpu16_ps(a.into());
-        assert_eq!(r, expected);
-    }
-
-    #[simd_test = "sse"]
-    unsafe fn _mm_cvtpi8_ps() {
-        let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let expected = f32x4::new(1., 2., 3., 4.);
-        let r = sse::_mm_cvtpi8_ps(a.into());
-        assert_eq!(r, expected);
-    }
-
-    #[simd_test = "sse"]
-    unsafe fn _mm_cvtpu8_ps() {
-        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let expected = f32x4::new(1., 2., 3., 4.);
-        let r = sse::_mm_cvtpu8_ps(a.into());
-        assert_eq!(r, expected);
-    }
-
-    #[simd_test = "sse"]
     unsafe fn _mm_cvtpi32x2_ps() {
         let a = i32x2::new(1, 2);
         let b = i32x2::new(3, 4);
@@ -684,18 +577,4 @@
         assert_eq!(r, sse::_mm_cvttps_pi32(a));
         assert_eq!(r, sse::_mm_cvtt_ps2pi(a));
     }
-
-    #[simd_test = "sse"]
-    unsafe fn _mm_cvtps_pi16() {
-        let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
-        let r = i16x4::new(7, 2, 3, 4);
-        assert_eq!(r, i16x4::from(sse::_mm_cvtps_pi16(a)));
-    }
-
-    #[simd_test = "sse"]
-    unsafe fn _mm_cvtps_pi8() {
-        let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
-        let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
-        assert_eq!(r, i8x8::from(sse::_mm_cvtps_pi8(a)));
-    }
 }
diff --git a/coresimd/src/x86/i686/sse2.rs b/coresimd/src/x86/i686/sse2.rs
index c9b5fd3..1074be5 100644
--- a/coresimd/src/x86/i686/sse2.rs
+++ b/coresimd/src/x86/i686/sse2.rs
@@ -10,7 +10,7 @@
 /// Adds two signed or unsigned 64-bit integer values, returning the
 /// lower 64 bits of the sum.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddq))]
 pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
     paddq(a, b)
@@ -20,7 +20,7 @@
 /// of the two 64-bit integer vectors and returns the 64-bit unsigned
 /// product.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmuludq))]
 pub unsafe fn _mm_mul_su32(a: u32x2, b: u32x2) -> __m64 {
     pmuludq(mem::transmute(a), mem::transmute(b))
@@ -29,7 +29,7 @@
 /// Subtracts signed or unsigned 64-bit integer values and writes the
 /// difference to the corresponding bits in the destination.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubq))]
 pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
     psubq(a, b)
@@ -39,7 +39,7 @@
 /// [2 x i32] into two double-precision floating-point values, returned in a
 /// 128-bit vector of [2 x double].
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtpi2pd))]
 pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 {
     cvtpi2pd(mem::transmute(a))
@@ -48,7 +48,7 @@
 /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
 /// the specified 64-bit integer values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 {
     i64x2::new(mem::transmute(e0), mem::transmute(e1))
@@ -57,7 +57,7 @@
 /// Initializes both values in a 128-bit vector of [2 x i64] with the
 /// specified 64-bit value.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 {
     i64x2::new(mem::transmute(a), mem::transmute(a))
@@ -66,7 +66,7 @@
 /// Constructs a 128-bit integer vector, initialized in reverse order
 /// with the specified 64-bit integral values.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // no particular instruction to test
 pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
     i64x2::new(mem::transmute(e1), mem::transmute(e0))
@@ -75,7 +75,7 @@
 /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
 /// integer.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
 // instr?
 pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
@@ -85,7 +85,7 @@
 /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
 /// upper bits.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
 // instr?
 pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
@@ -96,7 +96,7 @@
 /// 128-bit vector of [2 x double] into two signed 32-bit integer values,
 /// returned in a 64-bit vector of [2 x i32].
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtpd2pi))]
 pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 {
     mem::transmute(cvtpd2pi(a))
@@ -108,7 +108,7 @@
 /// If the result of either conversion is inexact, the result is truncated
 /// (rounded towards zero) regardless of the current MXCSR setting.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvttpd2pi))]
 pub unsafe fn _mm_cvttpd_pi32(a: f64x2) -> i32x2 {
     mem::transmute(cvttpd2pi(a))
diff --git a/coresimd/src/x86/i686/sse41.rs b/coresimd/src/x86/i686/sse41.rs
index 16e767f..68d698e 100644
--- a/coresimd/src/x86/i686/sse41.rs
+++ b/coresimd/src/x86/i686/sse41.rs
@@ -29,7 +29,7 @@
 /// * `1` - if the specified bits are all zeros,
 /// * `0` - otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
     ptestz(i64x2::from(a), i64x2::from(mask))
@@ -49,7 +49,7 @@
 /// * `1` - if the specified bits are all ones,
 /// * `0` - otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
     ptestc(i64x2::from(a), i64x2::from(mask))
@@ -69,7 +69,7 @@
 /// * `1` - if the specified bits are neither all zeros nor all ones,
 /// * `0` - otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
     ptestnzc(i64x2::from(a), i64x2::from(mask))
@@ -89,7 +89,7 @@
 /// * `1` - if the specified bits are all zeros,
 /// * `0` - otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
     _mm_testz_si128(a, mask)
@@ -107,7 +107,7 @@
 /// * `1` - if the bits specified in the operand are all set to 1,
 /// * `0` - otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pcmpeqd))]
 #[cfg_attr(test, assert_instr(ptest))]
 pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
@@ -129,7 +129,7 @@
 /// * `1` - if the specified bits are neither all zeros nor all ones,
 /// * `0` - otherwise.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
     _mm_testnzc_si128(a, mask)
diff --git a/coresimd/src/x86/i686/sse42.rs b/coresimd/src/x86/i686/sse42.rs
index a7fe082..79df38a 100644
--- a/coresimd/src/x86/i686/sse42.rs
+++ b/coresimd/src/x86/i686/sse42.rs
@@ -8,7 +8,7 @@
 /// Compare packed 64-bit integers in `a` and `b` for greater-than,
 /// return the results.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpgtq))]
 pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 {
     a.gt(b)
diff --git a/coresimd/src/x86/i686/sse4a.rs b/coresimd/src/x86/i686/sse4a.rs
index 884097e..79f4066 100644
--- a/coresimd/src/x86/i686/sse4a.rs
+++ b/coresimd/src/x86/i686/sse4a.rs
@@ -33,7 +33,7 @@
 /// If `length == 0 && index > 0` or `lenght + index > 64` the result is
 /// undefined.
 #[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(extrq))]
 pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
     extrq(x, mem::transmute(y))
@@ -49,7 +49,7 @@
 /// If the `length` is zero it is interpreted as `64`. If `index + length > 64`
 /// or `index > 0 && length == 0` the result is undefined.
 #[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(insertq))]
 pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
     insertq(x, y)
@@ -57,7 +57,7 @@
 
 /// Non-temporal store of `a.0` into `p`.
 #[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntsd))]
 pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
     movntsd(p, a);
@@ -65,7 +65,7 @@
 
 /// Non-temporal store of `a.0` into `p`.
 #[inline(always)]
-#[target_feature = "+sse4a"]
+#[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntss))]
 pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) {
     movntss(p, a);
diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs
index 9659735..573d067 100644
--- a/coresimd/src/x86/i686/ssse3.rs
+++ b/coresimd/src/x86/i686/ssse3.rs
@@ -8,7 +8,7 @@
 /// Compute the absolute value of packed 8-bit integers in `a` and
 /// return the unsigned results.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsb))]
 pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
     pabsb(a)
@@ -17,7 +17,7 @@
 /// Compute the absolute value of packed 8-bit integers in `a`, and return the
 /// unsigned results.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsw))]
 pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
     pabsw(a)
@@ -26,7 +26,7 @@
 /// Compute the absolute value of packed 32-bit integers in `a`, and return the
 /// unsigned results.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsd))]
 pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
     pabsd(a)
@@ -35,7 +35,7 @@
 /// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
 /// the corresponding 8-bit element of `b`, and return the results
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pshufb))]
 pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
     pshufb(a, b)
@@ -44,7 +44,7 @@
 /// Concatenates the two 64-bit integer vector operands, and right-shifts
 /// the result by the number of bytes specified in the immediate operand.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(palignr, n = 15))]
 pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
     macro_rules! call {
@@ -58,7 +58,7 @@
 /// Horizontally add the adjacent pairs of values contained in 2 packed
 /// 64-bit vectors of [4 x i16].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddw))]
 pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
     phaddw(a, b)
@@ -67,7 +67,7 @@
 /// Horizontally add the adjacent pairs of values contained in 2 packed
 /// 64-bit vectors of [2 x i32].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddd))]
 pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
     phaddd(a, b)
@@ -77,7 +77,7 @@
 /// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddsw))]
 pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
     phaddsw(a, b)
@@ -86,7 +86,7 @@
 /// Horizontally subtracts the adjacent pairs of values contained in 2
 /// packed 64-bit vectors of [4 x i16].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubw))]
 pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
     phsubw(a, b)
@@ -95,7 +95,7 @@
 /// Horizontally subtracts the adjacent pairs of values contained in 2
 /// packed 64-bit vectors of [2 x i32].
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubd))]
 pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
     phsubd(a, b)
@@ -106,7 +106,7 @@
 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
 /// saturated to 8000h.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubsw))]
 pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
     phsubsw(a, b)
@@ -118,7 +118,7 @@
 /// contiguous products with signed saturation, and writes the 16-bit sums to
 /// the corresponding bits in the destination.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pmaddubsw))]
 pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
     pmaddubsw(a, b)
@@ -128,7 +128,7 @@
 /// products to the 18 most significant bits by right-shifting, rounds the
 /// truncated value by adding 1, and writes bits [16:1] to the destination.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pmulhrsw))]
 pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
     pmulhrsw(a, b)
@@ -139,7 +139,7 @@
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(psignb))]
 pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
     psignb(a, b)
@@ -150,7 +150,7 @@
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(psignw))]
 pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
     psignw(a, b)
@@ -161,7 +161,7 @@
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline(always)]
-#[target_feature = "+ssse3"]
+#[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(psignd))]
 pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
     psignd(a, b)
diff --git a/coresimd/src/x86/x86_64/fxsr.rs b/coresimd/src/x86/x86_64/fxsr.rs
index f12d041..c2a7391 100644
--- a/coresimd/src/x86/x86_64/fxsr.rs
+++ b/coresimd/src/x86/x86_64/fxsr.rs
@@ -22,7 +22,7 @@
 /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
 /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
 #[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
 #[cfg_attr(test, assert_instr(fxsave64))]
 pub unsafe fn _fxsave64(mem_addr: *mut u8) {
     fxsave64(mem_addr)
@@ -43,7 +43,7 @@
 /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
 /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
 #[inline(always)]
-#[target_feature = "+fxsr"]
+#[target_feature(enable = "fxsr")]
 #[cfg_attr(test, assert_instr(fxrstor64))]
 pub unsafe fn _fxrstor64(mem_addr: *const u8) {
     fxrstor64(mem_addr)
diff --git a/coresimd/src/x86/x86_64/mod.rs b/coresimd/src/x86/x86_64/mod.rs
index 7225e7b..5ba37b5 100644
--- a/coresimd/src/x86/x86_64/mod.rs
+++ b/coresimd/src/x86/x86_64/mod.rs
@@ -1,6 +1,8 @@
 //! `x86_64` intrinsics
 
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
 mod fxsr;
+#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
 pub use self::fxsr::*;
 
 mod sse;
diff --git a/coresimd/src/x86/x86_64/sse.rs b/coresimd/src/x86/x86_64/sse.rs
index aa18179..760582b 100644
--- a/coresimd/src/x86/x86_64/sse.rs
+++ b/coresimd/src/x86/x86_64/sse.rs
@@ -25,7 +25,7 @@
 ///
 /// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtss2si))]
 pub unsafe fn _mm_cvtss_si64(a: f32x4) -> i64 {
     cvtss2si64(a)
@@ -41,7 +41,7 @@
 ///
 /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvttss2si))]
 pub unsafe fn _mm_cvttss_si64(a: f32x4) -> i64 {
     cvttss2si64(a)
@@ -53,7 +53,7 @@
 /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
 /// input).
 #[inline(always)]
-#[target_feature = "+sse"]
+#[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtsi2ss))]
 pub unsafe fn _mm_cvtsi64_ss(a: f32x4, b: i64) -> f32x4 {
     cvtsi642ss(a, b)
diff --git a/coresimd/src/x86/x86_64/sse2.rs b/coresimd/src/x86/x86_64/sse2.rs
index b0762c0..2e05262 100644
--- a/coresimd/src/x86/x86_64/sse2.rs
+++ b/coresimd/src/x86/x86_64/sse2.rs
@@ -16,7 +16,7 @@
 /// Convert the lower double-precision (64-bit) floating-point element in a to
 /// a 64-bit integer.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsd2si))]
 pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
     cvtsd2si64(a)
@@ -24,7 +24,7 @@
 
 /// Alias for [`_mm_cvtsd_si64`](fn._mm_cvtsd_si64_ss.html).
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsd2si))]
 pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 {
     _mm_cvtsd_si64(a)
@@ -33,7 +33,7 @@
 /// Convert the lower double-precision (64-bit) floating-point element in `a`
 /// to a 64-bit integer with truncation.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvttsd2si))]
 pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
     cvttsd2si64(a)
@@ -41,7 +41,7 @@
 
 /// Alias for [`_mm_cvttsd_si64`](fn._mm_cvttsd_si64_ss.html).
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvttsd2si))]
 pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
     _mm_cvttsd_si64(a)
@@ -51,7 +51,7 @@
 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
 /// used again soon).
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 // FIXME movnti on windows and linux x86_64
 //#[cfg_attr(test, assert_instr(movntiq))]
 pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
@@ -61,7 +61,7 @@
 /// Return a vector whose lowest element is `a` and all higher elements are
 /// `0`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movq))]
 pub unsafe fn _mm_cvtsi64_si128(a: i64) -> i64x2 {
     i64x2::new(a, 0)
@@ -70,7 +70,7 @@
 /// Return a vector whose lowest element is `a` and all higher elements are
 /// `0`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movq))]
 pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> i64x2 {
     _mm_cvtsi64_si128(a)
@@ -78,7 +78,7 @@
 
 /// Return the lowest element of `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movq))]
 pub unsafe fn _mm_cvtsi128_si64(a: i64x2) -> i64 {
     a.extract(0)
@@ -86,7 +86,7 @@
 
 /// Return the lowest element of `a`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(all(test, not(windows)), assert_instr(movq))]
 pub unsafe fn _mm_cvtsi128_si64x(a: i64x2) -> i64 {
     _mm_cvtsi128_si64(a)
@@ -95,7 +95,7 @@
 /// Return `a` with its lower element replaced by `b` after converting it to
 /// an `f64`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsi2sd))]
 pub unsafe fn _mm_cvtsi64_sd(a: f64x2, b: i64) -> f64x2 {
     a.replace(0, b as f64)
@@ -104,7 +104,7 @@
 /// Return `a` with its lower element replaced by `b` after converting it to
 /// an `f64`.
 #[inline(always)]
-#[target_feature = "+sse2"]
+#[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsi2sd))]
 pub unsafe fn _mm_cvtsi64x_sd(a: f64x2, b: i64) -> f64x2 {
     _mm_cvtsi64_sd(a, b)
diff --git a/coresimd/src/x86/x86_64/sse41.rs b/coresimd/src/x86/x86_64/sse41.rs
index 20fa606..2067e7f 100644
--- a/coresimd/src/x86/x86_64/sse41.rs
+++ b/coresimd/src/x86/x86_64/sse41.rs
@@ -7,7 +7,7 @@
 
 /// Extract an 64-bit integer from `a` selected with `imm8`
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 // TODO: Add test for Windows
 #[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8 = 1))]
 pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: i32) -> i64 {
@@ -18,7 +18,7 @@
 /// Return a copy of `a` with the 64-bit integer from `i` inserted at a
 /// location specified by `imm8`.
 #[inline(always)]
-#[target_feature = "+sse4.1"]
+#[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))]
 pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: i32) -> i64x2 {
     a.replace((imm8 & 0b1) as u32, i)
diff --git a/coresimd/src/x86/x86_64/sse42.rs b/coresimd/src/x86/x86_64/sse42.rs
index b1de00b..9cabcb2 100644
--- a/coresimd/src/x86/x86_64/sse42.rs
+++ b/coresimd/src/x86/x86_64/sse42.rs
@@ -12,7 +12,7 @@
 /// Starting with the initial value in `crc`, return the accumulated
 /// CRC32 value for unsigned 64-bit integer `v`.
 #[inline(always)]
-#[target_feature = "+sse4.2"]
+#[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(crc32))]
 pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
     crc32_64_64(crc, v)
diff --git a/coresimd/src/x86/x86_64/xsave.rs b/coresimd/src/x86/x86_64/xsave.rs
index 6f8eaa6..fc8b38c 100644
--- a/coresimd/src/x86/x86_64/xsave.rs
+++ b/coresimd/src/x86/x86_64/xsave.rs
@@ -30,7 +30,7 @@
 /// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
 /// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
 #[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
 #[cfg_attr(test, assert_instr(xsave64))]
 pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
     xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -43,7 +43,7 @@
 /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
 /// boundary.
 #[inline(always)]
-#[target_feature = "+xsave"]
+#[target_feature(enable = "xsave")]
 #[cfg_attr(test, assert_instr(xrstor64))]
 pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
     xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
@@ -57,7 +57,7 @@
 /// the manner in which data is saved. The performance of this instruction will
 /// be equal to or better than using the `XSAVE64` instruction.
 #[inline(always)]
-#[target_feature = "+xsave,+xsaveopt"]
+#[target_feature(enable = "xsave,xsaveopt")]
 #[cfg_attr(test, assert_instr(xsaveopt64))]
 pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
     xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -70,7 +70,7 @@
 /// use init optimization. State is saved based on bits [62:0] in `save_mask`
 /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
 #[inline(always)]
-#[target_feature = "+xsave,+xsavec"]
+#[target_feature(enable = "xsave,xsavec")]
 #[cfg_attr(test, assert_instr(xsavec64))]
 pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
     xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -84,7 +84,7 @@
 /// modified optimization. State is saved based on bits [62:0] in `save_mask`
 /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
 #[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
 #[cfg_attr(test, assert_instr(xsaves64))]
 pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
     xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
@@ -100,7 +100,7 @@
 /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
 /// boundary.
 #[inline(always)]
-#[target_feature = "+xsave,+xsaves"]
+#[target_feature(enable = "xsave,xsaves")]
 #[cfg_attr(test, assert_instr(xrstors64))]
 pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
     xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
diff --git a/examples/play.rs b/examples/play.rs
index 4731683..e4160e2 100644
--- a/examples/play.rs
+++ b/examples/play.rs
@@ -16,8 +16,8 @@
     use self::stdsimd::vendor;
 
     #[inline(never)]
-    #[target_feature = "+sse4.2"]
-    fn index(needle: &str, haystack: &str) -> usize {
+    #[target_feature(enable = "sse4.2")]
+    unsafe fn index(needle: &str, haystack: &str) -> usize {
         assert!(needle.len() <= 16 && haystack.len() <= 16);
 
         let (needle_len, hay_len) = (needle.len(), haystack.len());
@@ -30,15 +30,13 @@
         haystack.resize(16, 0);
         let vhaystack = s::__m128i::from(s::u8x16::load(&haystack, 0));
 
-        unsafe {
-            vendor::_mm_cmpestri(
-                vneedle,
-                needle_len as i32,
-                vhaystack,
-                hay_len as i32,
-                vendor::_SIDD_CMP_EQUAL_ORDERED,
-            ) as usize
-        }
+        vendor::_mm_cmpestri(
+            vneedle,
+            needle_len as i32,
+            vhaystack,
+            hay_len as i32,
+            vendor::_SIDD_CMP_EQUAL_ORDERED,
+        ) as usize
     }
 
     pub fn main() {
@@ -58,7 +56,9 @@
 
         let needle = env::args().nth(1).unwrap();
         let haystack = env::args().nth(2).unwrap();
-        println!("{:?}", index(&needle, &haystack));
+        unsafe {
+            println!("{:?}", index(&needle, &haystack));
+        }
     }
 }
 
diff --git a/examples/types.rs b/examples/types.rs
deleted file mode 100644
index 8bc0bc4..0000000
--- a/examples/types.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-#![cfg_attr(feature = "strict", deny(warnings))]
-#![feature(target_feature)]
-#![cfg_attr(feature = "cargo-clippy",
-            allow(missing_docs_in_private_items, result_unwrap_used,
-                  option_unwrap_used, print_stdout, use_debug))]
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod example {
-    extern crate stdsimd;
-
-    use std::env;
-    use self::stdsimd::simd;
-
-    #[inline(never)]
-    #[target_feature = "-sse2"]
-    fn myop(
-        (x0, x1, x2, x3): (u64, u64, u64, u64),
-        (y0, y1, y2, y3): (u64, u64, u64, u64),
-    ) -> (u64, u64, u64, u64) {
-        let x = simd::u64x4::new(x0, x1, x2, x3);
-        let y = simd::u64x4::new(y0, y1, y2, y3);
-        let r = x * y;
-        (r.extract(0), r.extract(1), r.extract(2), r.extract(3))
-    }
-
-    pub fn main() {
-        let x = env::args().nth(1).unwrap().parse().unwrap();
-        let y = env::args().nth(1).unwrap().parse().unwrap();
-        let r = myop((x, x, x, x), (y, y, y, y));
-        println!("{:?}", r);
-    }
-}
-
-fn main() {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    example::main();
-}
diff --git a/examples/wat.rs b/examples/wat.rs
deleted file mode 100644
index 5a70eed..0000000
--- a/examples/wat.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-#![cfg_attr(feature = "strict", deny(warnings))]
-#![feature(target_feature)]
-#![cfg_attr(feature = "cargo-clippy",
-            allow(missing_docs_in_private_items, result_unwrap_used,
-                  option_unwrap_used, print_stdout, use_debug))]
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod example {
-    extern crate stdsimd;
-
-    use std::env;
-    use self::stdsimd::simd;
-
-    #[inline(never)]
-    #[target_feature = "-sse2"]
-    fn myop(
-        (x0, x1, x2, x3): (u64, u64, u64, u64),
-        (y0, y1, y2, y3): (u64, u64, u64, u64),
-    ) -> (u64, u64, u64, u64) {
-        let x = simd::u64x4::new(x0, x1, x2, x3);
-        let y = simd::u64x4::new(y0, y1, y2, y3);
-        let r = x * y;
-        (r.extract(0), r.extract(1), r.extract(2), r.extract(3))
-    }
-
-    pub fn main() {
-        let x = env::args().nth(1).unwrap().parse().unwrap();
-        let y = env::args().nth(2).unwrap().parse().unwrap();
-        let r = myop((x, x, x, x), (y, y, y, y));
-        println!("{:?}", r);
-    }
-}
-
-fn main() {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    example::main();
-}
diff --git a/src/lib.rs b/src/lib.rs
index 277ad2a..6197eec 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -61,7 +61,7 @@
 //! // This function is only safe to call if the CPU where the
 //! // binary runs supports SSE2.
 //! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-//! #[target_feature = "+sse2"]
+//! #[target_feature(enable = "sse2")]
 //! unsafe fn sum_sse2(x: i32x4) -> i32 {
 //!     let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 8).into());
 //!     let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x.into(), 4).into());
diff --git a/stdsimd-test/simd-test-macro/src/lib.rs b/stdsimd-test/simd-test-macro/src/lib.rs
index 06c809b..6f9ddc4 100644
--- a/stdsimd-test/simd-test-macro/src/lib.rs
+++ b/stdsimd-test/simd-test-macro/src/lib.rs
@@ -47,8 +47,7 @@
     let enable_feature = enable_feature
         .trim_left_matches('"')
         .trim_right_matches('"');
-    let enable_feature =
-        string(&(format!("+{}", enable_feature).replace(',', ",+")));
+    let enable_feature = string(enable_feature);
     let item = TokenStream::from(item);
     let name = find_name(item.clone());
 
@@ -77,7 +76,7 @@
                 ::stdsimd_test::assert_skip_test_ok(stringify!(#name));
             }
 
-            #[target_feature = #enable_feature]
+            #[target_feature(enable = #enable_feature)]
             #item
         }
     }.into();