More run-time detection improvements (#242)

* [core/runtime] use getauxval on non-x86 platforms

* test coresimd::auxv against auxv crate

* add test files from auxv crate

* [arm] use simd_test macro

* formatting

* missing docs

* improve docs

* reading /proc/self/auxv succeeds only if reading all fields succeeds

* remove cc-crate build dependency

* getauxval succeeds only if hwcap/hwcap2 are non-zero

* fix formatting

* move getauxval to stdsimd

* delete getauxval-wrapper.c

* remove auxv crate dev-dependency from coresimd
diff --git a/Cargo.toml b/Cargo.toml
index f749bd6..4494529 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,9 @@
 [dependencies]
 coresimd = { version = "0.0.3", path = "coresimd/" }
 
+[dev-dependencies]
+auxv = "0.3.3"
+
 [profile.release]
 debug = true
 opt-level = 3
diff --git a/coresimd/src/aarch64/neon.rs b/coresimd/src/aarch64/neon.rs
index bcffd4d..55b4ff6 100644
--- a/coresimd/src/aarch64/neon.rs
+++ b/coresimd/src/aarch64/neon.rs
@@ -1,4 +1,6 @@
-//! ARMv8 NEON intrinsics
+//! ARMv8 ASIMD intrinsics
+
+// FIXME: replace neon with asimd
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -39,41 +41,43 @@
 
 #[cfg(test)]
 mod tests {
-    use super::*;
+    use super::f64x2;
+    use aarch64::neon;
+    use stdsimd_test::simd_test;
 
-    #[test]
-    fn vadd_f64_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_f64() {
         let a = 1.;
         let b = 8.;
         let e = 9.;
-        let r = unsafe { vadd_f64(a, b) };
+        let r = neon::vadd_f64(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_f64_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_f64() {
         let a = f64x2::new(1., 2.);
         let b = f64x2::new(8., 7.);
         let e = f64x2::new(9., 9.);
-        let r = unsafe { vaddq_f64(a, b) };
+        let r = neon::vaddq_f64(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddd_s64_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddd_s64() {
         let a = 1;
         let b = 8;
         let e = 9;
-        let r = unsafe { vaddd_s64(a, b) };
+        let r = neon::vaddd_s64(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddd_u64_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddd_u64() {
         let a = 1;
         let b = 8;
         let e = 9;
-        let r = unsafe { vaddd_u64(a, b) };
+        let r = neon::vaddd_u64(a, b);
         assert_eq!(r, e);
     }
 }
diff --git a/coresimd/src/arm/neon.rs b/coresimd/src/arm/neon.rs
index fafa042..9103ccc 100644
--- a/coresimd/src/arm/neon.rs
+++ b/coresimd/src/arm/neon.rs
@@ -214,199 +214,201 @@
 
 #[cfg(test)]
 mod tests {
-    use super::*;
+    use stdsimd_test::simd_test;
+    use simd::*;
+    use arm::neon;
 
-    #[test]
-    fn vadd_s8_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_s8() {
         let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = i8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r = unsafe { vadd_s8(a, b) };
+        let r = neon::vadd_s8(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_s8_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_s8() {
         let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
         let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
         let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
-        let r = unsafe { vaddq_s8(a, b) };
+        let r = neon::vaddq_s8(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vadd_s16_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_s16() {
         let a = i16x4::new(1, 2, 3, 4);
         let b = i16x4::new(8, 7, 6, 5);
         let e = i16x4::new(9, 9, 9, 9);
-        let r = unsafe { vadd_s16(a, b) };
+        let r = neon::vadd_s16(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_s16_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_s16() {
         let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = i16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r = unsafe { vaddq_s16(a, b) };
+        let r = neon::vaddq_s16(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vadd_s32_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_s32() {
         let a = i32x2::new(1, 2);
         let b = i32x2::new(8, 7);
         let e = i32x2::new(9, 9);
-        let r = unsafe { vadd_s32(a, b) };
+        let r = neon::vadd_s32(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_s32_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_s32() {
         let a = i32x4::new(1, 2, 3, 4);
         let b = i32x4::new(8, 7, 6, 5);
         let e = i32x4::new(9, 9, 9, 9);
-        let r = unsafe { vaddq_s32(a, b) };
+        let r = neon::vaddq_s32(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vadd_u8_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_u8() {
         let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = u8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r = unsafe { vadd_u8(a, b) };
+        let r = neon::vadd_u8(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_u8_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_u8() {
         let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
         let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
         let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
-        let r = unsafe { vaddq_u8(a, b) };
+        let r = neon::vaddq_u8(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vadd_u16_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_u16() {
         let a = u16x4::new(1, 2, 3, 4);
         let b = u16x4::new(8, 7, 6, 5);
         let e = u16x4::new(9, 9, 9, 9);
-        let r = unsafe { vadd_u16(a, b) };
+        let r = neon::vadd_u16(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_u16_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_u16() {
         let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
         let e = u16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
-        let r = unsafe { vaddq_u16(a, b) };
+        let r = neon::vaddq_u16(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vadd_u32_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_u32() {
         let a = u32x2::new(1, 2);
         let b = u32x2::new(8, 7);
         let e = u32x2::new(9, 9);
-        let r = unsafe { vadd_u32(a, b) };
+        let r = neon::vadd_u32(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_u32_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_u32() {
         let a = u32x4::new(1, 2, 3, 4);
         let b = u32x4::new(8, 7, 6, 5);
         let e = u32x4::new(9, 9, 9, 9);
-        let r = unsafe { vaddq_u32(a, b) };
+        let r = neon::vaddq_u32(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vadd_f32_() {
+    #[simd_test = "neon"]
+    unsafe fn vadd_f32() {
         let a = f32x2::new(1., 2.);
         let b = f32x2::new(8., 7.);
         let e = f32x2::new(9., 9.);
-        let r = unsafe { vadd_f32(a, b) };
+        let r = neon::vadd_f32(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddq_f32_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddq_f32() {
         let a = f32x4::new(1., 2., 3., 4.);
         let b = f32x4::new(8., 7., 6., 5.);
         let e = f32x4::new(9., 9., 9., 9.);
-        let r = unsafe { vaddq_f32(a, b) };
+        let r = neon::vaddq_f32(a, b);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddl_s8_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddl_s8() {
         let v = ::std::i8::MAX;
         let a = i8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as i16);
         let e = i16x8::new(v, v, v, v, v, v, v, v);
-        let r = unsafe { vaddl_s8(a, a) };
+        let r = neon::vaddl_s8(a, a);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddl_s16_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddl_s16() {
         let v = ::std::i16::MAX;
         let a = i16x4::new(v, v, v, v);
         let v = 2 * (v as i32);
         let e = i32x4::new(v, v, v, v);
-        let r = unsafe { vaddl_s16(a, a) };
+        let r = neon::vaddl_s16(a, a);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddl_s32_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddl_s32() {
         let v = ::std::i32::MAX;
         let a = i32x2::new(v, v);
         let v = 2 * (v as i64);
         let e = i64x2::new(v, v);
-        let r = unsafe { vaddl_s32(a, a) };
+        let r = neon::vaddl_s32(a, a);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddl_u8_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddl_u8() {
         let v = ::std::u8::MAX;
         let a = u8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as u16);
         let e = u16x8::new(v, v, v, v, v, v, v, v);
-        let r = unsafe { vaddl_u8(a, a) };
+        let r = neon::vaddl_u8(a, a);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddl_u16_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddl_u16() {
         let v = ::std::u16::MAX;
         let a = u16x4::new(v, v, v, v);
         let v = 2 * (v as u32);
         let e = u32x4::new(v, v, v, v);
-        let r = unsafe { vaddl_u16(a, a) };
+        let r = neon::vaddl_u16(a, a);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vaddl_u32_() {
+    #[simd_test = "neon"]
+    unsafe fn vaddl_u32() {
         let v = ::std::u32::MAX;
         let a = u32x2::new(v, v);
         let v = 2 * (v as u64);
         let e = u64x2::new(v, v);
-        let r = unsafe { vaddl_u32(a, a) };
+        let r = neon::vaddl_u32(a, a);
         assert_eq!(r, e);
     }
 
-    #[test]
-    fn vrsqrt_f32_() {
+    #[simd_test = "neon"]
+    unsafe fn vrsqrt_f32() {
         let a = f32x2::new(1.0, 2.0);
         let e = f32x2::new(0.9980469, 0.7050781);
-        let r = unsafe { vrsqrte_f32(a) };
+        let r = neon::vrsqrte_f32(a);
         assert_eq!(r, e);
     }
 }
diff --git a/coresimd/src/lib.rs b/coresimd/src/lib.rs
index fafa6c1..ade8eb3 100644
--- a/coresimd/src/lib.rs
+++ b/coresimd/src/lib.rs
@@ -13,7 +13,8 @@
 #![allow(unused_features)]
 #![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
            simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
-           const_atomic_usize_new, stmt_expr_attributes, core_intrinsics)]
+           const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
+           crate_in_paths)]
 #![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))]
 #![cfg_attr(feature = "cargo-clippy",
             allow(inline_always, too_many_arguments, cast_sign_loss,
@@ -56,18 +57,32 @@
     #[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
                   target_arch = "arm", target_arch = "aarch64")))]
     pub use nvptx::*;
-
-    #[cfg(
-        // x86/x86_64:
-        any(target_arch = "x86", target_arch = "x86_64")
-    )]
-    pub use runtime::{__unstable_detect_feature, __Feature};
 }
 
-#[cfg(
-    // x86/x86_64:
-    any(target_arch = "x86", target_arch = "x86_64")
-)]
+/// Run-time feature detection.
+#[doc(hidden)]
+pub mod __vendor_runtime {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64",
+              all(target_os = "linux",
+                  any(target_arch = "arm", target_arch = "aarch64",
+                      target_arch = "powerpc64"))))]
+    pub use runtime::core::*;
+
+    // Re-exports `coresimd` run-time building blocks for usage in the
+    // `stdsimd` run-time.
+    #[cfg(all(target_os = "linux",
+              any(target_arch = "arm", target_arch = "aarch64",
+                  target_arch = "powerpc64")))]
+    #[doc(hidden)]
+    pub mod __runtime {
+        pub use runtime::*;
+    }
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64",
+          all(target_os = "linux",
+              any(target_arch = "arm", target_arch = "aarch64",
+                  target_arch = "powerpc64"))))]
 #[macro_use]
 mod runtime;
 
diff --git a/coresimd/src/runtime/aarch64.rs b/coresimd/src/runtime/aarch64.rs
new file mode 100644
index 0000000..fbbf856
--- /dev/null
+++ b/coresimd/src/runtime/aarch64.rs
@@ -0,0 +1,47 @@
+//! Run-time feature detection on ARM Aarch64.
+use runtime::bit;
+use runtime::arch::HasFeature;
+
+#[macro_export]
+#[doc(hidden)]
+macro_rules! __unstable_detect_feature {
+    ("neon", $unstable_detect_feature:path) => {
+        // FIXME: this should be removed once we rename Aarch64 neon to asimd
+        $unstable_detect_feature($crate::__vendor_runtime::_Feature::asimd{})
+    };
+    ("asimd", $unstable_detect_feature:path) => {
+        $unstable_detect_feature($crate::__vendor_runtime::__Feature::asimd{})
+    };
+    ("pmull", $unstable_detect_feature:path) => {
+        $unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
+    };
+    ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
+}
+
+/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+pub enum __Feature {
+    /// ARM Advanced SIMD (ASIMD) - Aarch64
+    asimd,
+    /// Polynomial Multiply
+    pmull,
+}
+
+pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
+    let mut value: usize = 0;
+    {
+        let mut enable_feature = |f| {
+            if x.has_feature(&f) {
+                value = bit::set(value, f as u32);
+            }
+        };
+        enable_feature(__Feature::asimd);
+        enable_feature(__Feature::pmull);
+    }
+    value
+}
diff --git a/coresimd/src/runtime/arm.rs b/coresimd/src/runtime/arm.rs
new file mode 100644
index 0000000..4c4dbb4
--- /dev/null
+++ b/coresimd/src/runtime/arm.rs
@@ -0,0 +1,43 @@
+//! Run-time feature detection on ARM Aarch32.
+use runtime::bit;
+use runtime::arch::HasFeature;
+
+#[macro_export]
+#[doc(hidden)]
+macro_rules! __unstable_detect_feature {
+    ("neon", $unstable_detect_feature:path) => {
+        $unstable_detect_feature($crate::__vendor_runtime::__Feature::neon{})
+    };
+    ("pmull", $unstable_detect_feature:path) => {
+        $unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
+    };
+    ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
+}
+
+/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
+/// particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+pub enum __Feature {
+    /// ARM Advanced SIMD (NEON) - Aarch32
+    neon,
+    /// Polynomial Multiply
+    pmull,
+}
+
+pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
+    let mut value: usize = 0;
+    {
+        let mut enable_feature = |f| {
+            if x.has_feature(&f) {
+                value = bit::set(value, f as u32);
+            }
+        };
+        enable_feature(__Feature::neon);
+        enable_feature(__Feature::pmull);
+    }
+    value
+}
diff --git a/coresimd/src/runtime/linux/aarch64.rs b/coresimd/src/runtime/linux/aarch64.rs
new file mode 100644
index 0000000..ac7713e
--- /dev/null
+++ b/coresimd/src/runtime/linux/aarch64.rs
@@ -0,0 +1,20 @@
+//! Run-time feature detection for Aarch64 on Linux and `core`.
+
+use runtime::bit;
+use runtime::linux::auxv::AuxVec;
+use runtime::arch::{HasFeature, __Feature};
+
+/// Probe the ELF Auxiliary vector for hardware capabilities
+///
+/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
+///
+/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+impl HasFeature for AuxVec {
+    fn has_feature(&mut self, x: &__Feature) -> bool {
+        use self::__Feature::*;
+        match *x {
+            asimd => bit::test(self.hwcap, 1),
+            pmull => bit::test(self.hwcap, 4),
+        }
+    }
+}
diff --git a/coresimd/src/runtime/linux/arm.rs b/coresimd/src/runtime/linux/arm.rs
new file mode 100644
index 0000000..c4383d7
--- /dev/null
+++ b/coresimd/src/runtime/linux/arm.rs
@@ -0,0 +1,20 @@
+//! Run-time feature detection for ARM32 on Linux and `core`.
+
+use runtime::bit;
+use runtime::linux::auxv::AuxVec;
+use runtime::arch::{HasFeature, __Feature};
+
+/// Probe the ELF Auxiliary vector for hardware capabilities
+///
+/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
+///
+/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+impl HasFeature for AuxVec {
+    fn has_feature(&mut self, x: &__Feature) -> bool {
+        use self::__Feature::*;
+        match *x {
+            neon => bit::test(self.hwcap, 12),
+            pmull => bit::test(self.hwcap2, 1),
+        }
+    }
+}
diff --git a/coresimd/src/runtime/linux/auxv.rs b/coresimd/src/runtime/linux/auxv.rs
new file mode 100644
index 0000000..e612134
--- /dev/null
+++ b/coresimd/src/runtime/linux/auxv.rs
@@ -0,0 +1,46 @@
+//! ELF Auxiliary Vector
+//!
+//! The auxiliary vector is a memory region in a running ELF program's stack
+//! composed of (key: usize, value: usize) pairs.
+//!
+//! The keys used in the aux vector are platform dependent. For Linux, they are
+//! defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given
+//! CPU can be queried with the  `AT_HWCAP` and `AT_HWCAP2` keys.
+//!
+//! There is no perfect way of reading the auxiliary vector.
+//!
+//! - `coresimd`: if `getauxval` is available, `coresimd` will try to use it.
+//! - `stdsimd`: if `getauxval` is not available, it will try to read
+//! `/proc/self/auxv`, and if that fails it will try to read `/proc/cpuinfo`.
+//!
+//! For more information about when `getauxval` is available check the great
+//! [`auxv` crate documentation][auxv_docs].
+//!
+//! [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
+//! [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
+
+/// Key to access the CPU Hardware capabilities bitfield.
+pub const AT_HWCAP: usize = 16;
+/// Key to access the CPU Hardware capabilities 2 bitfield.
+pub const AT_HWCAP2: usize = 26;
+
+/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
+///
+/// If an entry cannot be read all the bits in the bitfield
+/// are set to zero.
+#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+#[derive(Debug, Copy, Clone)]
+pub struct AuxVec {
+    pub hwcap: usize,
+    pub hwcap2: usize,
+}
+
+/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
+///
+/// If an entry cannot be read all the bits in the bitfield
+/// are set to zero.
+#[cfg(target_arch = "aarch64")]
+#[derive(Debug, Copy, Clone)]
+pub struct AuxVec {
+    pub hwcap: usize,
+}
diff --git a/coresimd/src/runtime/linux/mod.rs b/coresimd/src/runtime/linux/mod.rs
new file mode 100644
index 0000000..7080078
--- /dev/null
+++ b/coresimd/src/runtime/linux/mod.rs
@@ -0,0 +1,12 @@
+//! Run-time feature detection for ARM and PowerPC64 on Linux.
+
+#[cfg(target_arch = "arm")]
+mod arm;
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64;
+
+#[cfg(target_arch = "powerpc64")]
+mod powerpc64;
+
+pub mod auxv;
diff --git a/coresimd/src/runtime/linux/powerpc64.rs b/coresimd/src/runtime/linux/powerpc64.rs
new file mode 100644
index 0000000..f1a444d
--- /dev/null
+++ b/coresimd/src/runtime/linux/powerpc64.rs
@@ -0,0 +1,22 @@
+//! Run-time feature detection for PowerPC64 on Linux and `core`.
+
+use runtime::linux::auxv::AuxVec;
+use runtime::arch::{HasFeature, __Feature};
+
+/// Probe the ELF Auxiliary vector for hardware capabilities
+///
+/// The values are part of the platform-specific [asm/cputable.h][cputable]
+///
+/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
+impl HasFeature for AuxVec {
+    fn has_feature(&mut self, x: &__Feature) -> bool {
+        use self::__Feature::*;
+        // note: the PowerPC values are the mask to do the test (instead of the
+        // index of the bit to test like in ARM and Aarch64)
+        match *x {
+            altivec => self.hwcap & 0x10000000 != 0,
+            vsx => self.hwcap & 0x00000080 != 0,
+            power8 => self.hwcap2 & 0x80000000 != 0,
+        }
+    }
+}
diff --git a/coresimd/src/runtime/macros.rs b/coresimd/src/runtime/macros.rs
index e8278bb..e84bc5d 100644
--- a/coresimd/src/runtime/macros.rs
+++ b/coresimd/src/runtime/macros.rs
@@ -2,8 +2,9 @@
 
 /// Is a feature supported by the host CPU?
 ///
-/// This macro performs run-time feature detection. It returns true if the host
-/// CPU in which the binary is running on supports a particular feature.
+/// This macro performs run-time feature detection in `coresimd`. It returns
+/// true if the host CPU in which the binary is running on supports a
+/// particular feature.
 #[macro_export]
 macro_rules! cfg_feature_enabled {
     ($name:tt) => (
@@ -14,26 +15,16 @@
             }
             #[cfg(not(target_feature = $name))]
             {
-                __unstable_detect_feature!($name)
+                #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+                {
+                    __unstable_detect_feature!($name,
+                                               $crate::__vendor_runtime::__unstable_detect_feature)
+                }
+                #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+                {
+                    compile_error!("cfg_target_feature! is not supported in this architecture")
+                }
             }
         }
     )
 }
-
-/// In all unsupported architectures using the macro is an error
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
-              target_arch = "arm", target_arch = "aarch64")))]
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
-}
-
-#[cfg(test)]
-mod tests {
-    #[cfg(target_arch = "x86_64")]
-    #[test]
-    fn test_macros() {
-        assert!(cfg_feature_enabled!("sse"));
-    }
-}
diff --git a/coresimd/src/runtime/mod.rs b/coresimd/src/runtime/mod.rs
index 6ad497f..b0833a7 100644
--- a/coresimd/src/runtime/mod.rs
+++ b/coresimd/src/runtime/mod.rs
@@ -1,17 +1,64 @@
 //! Run-time feature detection
-mod cache;
-mod bit;
+pub mod cache;
+pub mod bit;
 
 #[macro_use]
-mod macros;
+pub mod macros;
 
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[macro_use]
-mod x86;
-pub use self::x86::__Feature;
-use self::x86::detect_features;
+pub mod x86;
 
-/// Performs run-time feature detection.
-#[doc(hidden)]
-pub fn __unstable_detect_feature(x: __Feature) -> bool {
-    cache::test(x as u32, detect_features)
+#[cfg(target_arch = "arm")]
+#[macro_use]
+pub mod arm;
+
+#[cfg(target_arch = "aarch64")]
+#[macro_use]
+pub mod aarch64;
+
+#[cfg(target_arch = "powerpc64")]
+#[macro_use]
+pub mod powerpc64;
+
+#[cfg(all(target_os = "linux",
+          any(target_arch = "arm", target_arch = "aarch64",
+              target_arch = "powerpc64")))]
+pub mod linux;
+
+/// Exports architecture specific functionality for
+/// reuse in `stdsimd`.
+pub mod arch {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    pub use super::x86::{detect_features, __Feature};
+
+    #[cfg(target_arch = "arm")]
+    pub use runtime::arm::{detect_features, __Feature};
+
+    #[cfg(target_arch = "aarch64")]
+    pub use runtime::aarch64::{detect_features, __Feature};
+
+    #[cfg(target_arch = "powerpc64")]
+    pub use runtime::powerpc64::{detect_features, __Feature};
+
+    /// Interface for querying whether a feature is enabled.
+    pub trait HasFeature {
+        /// Is the feature `x` enabled at run-time?
+        fn has_feature(&mut self, x: &__Feature) -> bool;
+    }
+}
+
+/// Run-time feature detection exposed by `coresimd`.
+pub mod core {
+    pub use super::arch::__Feature;
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    pub use super::arch::detect_features;
+
+    /// Performs run-time feature detection.
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    #[doc(hidden)]
+    pub fn __unstable_detect_feature(x: __Feature) -> bool {
+        super::cache::test(x as u32, detect_features)
+    }
 }
diff --git a/coresimd/src/runtime/powerpc64.rs b/coresimd/src/runtime/powerpc64.rs
new file mode 100644
index 0000000..8ee02e4
--- /dev/null
+++ b/coresimd/src/runtime/powerpc64.rs
@@ -0,0 +1,49 @@
+//! Run-time feature detection on PowerPC64.
+use runtime::bit;
+use runtime::arch::HasFeature;
+
+#[macro_export]
+#[doc(hidden)]
+macro_rules! __unstable_detect_feature {
+    ("altivec", $unstable_detect_feature:path) => {
+        $unstable_detect_feature($crate::__vendor_runtime::__Feature::altivec{})
+    };
+    ("vsx", $unstable_detect_feature:path) => {
+        $unstable_detect_feature($crate::__vendor_runtime::__Feature::vsx{})
+    };
+    ("power8", $unstable_detect_feature:path) => {
+        $unstable_detect_feature($crate::__vendor_runtime::__Feature::power8{})
+    };
+    ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
+}
+
+/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+pub enum __Feature {
+    /// Altivec
+    altivec,
+    /// VSX
+    vsx,
+    /// Power8
+    power8,
+}
+
+pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
+    let mut value: usize = 0;
+    {
+        let mut enable_feature = |f| {
+            if x.has_feature(&f) {
+                value = bit::set(value, f as u32);
+            }
+        };
+        enable_feature(__Feature::altivec);
+        enable_feature(__Feature::vsx);
+        enable_feature(__Feature::power8);
+    }
+    value
+}
diff --git a/coresimd/src/runtime/x86.rs b/coresimd/src/runtime/x86.rs
index b952598..a994e61 100644
--- a/coresimd/src/runtime/x86.rs
+++ b/coresimd/src/runtime/x86.rs
@@ -29,133 +29,133 @@
 #[macro_export]
 #[doc(hidden)]
 macro_rules! __unstable_detect_feature {
-    ("mmx") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::mmx{})  };
-    ("sse") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::sse{})  };
-    ("sse2") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::sse2{})
+    ("mmx", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::mmx{})  };
+    ("sse", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::sse{})  };
+    ("sse2", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::sse2{})
     };
-    ("sse3") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::sse3{})
+    ("sse3", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::sse3{})
     };
-    ("ssse3") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::ssse3{})
+    ("ssse3", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::ssse3{})
     };
-    ("sse4.1") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::sse4_1{})
+    ("sse4.1", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::sse4_1{})
     };
-    ("sse4.2") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::sse4_2{})
+    ("sse4.2", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::sse4_2{})
     };
-    ("sse4a") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::sse4a{})
+    ("sse4a", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::sse4a{})
     };
-    ("avx") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx{})
+    ("avx", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx{})
     };
-    ("avx2") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx2{})
+    ("avx2", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx2{})
     };
-    ("avx512f") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512f{})
+    ("avx512f", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512f{})
     };
-    ("avx512cd") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512cd{})
+    ("avx512cd", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512cd{})
     };
-    ("avx512er") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512er{})
+    ("avx512er", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512er{})
     };
-    ("avx512pf") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512pf{})
+    ("avx512pf", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512pf{})
     };
-    ("avx512bw") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512bw{})
+    ("avx512bw", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512bw{})
     };
-    ("avx512dq") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512dq{})
+    ("avx512dq", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512dq{})
     };
-    ("avx512vl") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512vl{})
+    ("avx512vl", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512vl{})
     };
-    ("avx512ifma") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512_ifma{})
+    ("avx512ifma", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512_ifma{})
     };
-    ("avx512vbmi") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512_vbmi{})
+    ("avx512vbmi", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512_vbmi{})
     };
-    ("avx512vpopcntdq") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::avx512_vpopcntdq{})
+    ("avx512vpopcntdq", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::avx512_vpopcntdq{})
     };
-    ("fma") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::fma{})
+    ("fma", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::fma{})
     };
-    ("bmi") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::bmi{})
+    ("bmi", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::bmi{})
     };
-    ("bmi2") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::bmi2{})
+    ("bmi2", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::bmi2{})
     };
-    ("abm") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::abm{})
+    ("abm", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::abm{})
     };
-    ("lzcnt") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::abm{})
+    ("lzcnt", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::abm{})
     };
-    ("tbm") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::tbm{})
+    ("tbm", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::tbm{})
     };
-    ("popcnt") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::popcnt{})
+    ("popcnt", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::popcnt{})
     };
-    ("fxsr") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::fxsr{})
+    ("fxsr", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::fxsr{})
     };
-    ("xsave") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::xsave{})
+    ("xsave", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::xsave{})
     };
-    ("xsaveopt") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::xsaveopt{})
+    ("xsaveopt", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::xsaveopt{})
     };
-    ("xsaves") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::xsaves{})
+    ("xsaves", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::xsaves{})
     };
-    ("xsavec") => {
-        $crate::vendor::__unstable_detect_feature(
-            $crate::vendor::__Feature::xsavec{})
+    ("xsavec", $unstable_detect_feature:path) => {
+        $unstable_detect_feature(
+            $crate::__vendor_runtime::__Feature::xsavec{})
     };
-    ($t:tt) => {
+    ($t:tt, $unstable_detect_feature:path) => {
         compile_error!(concat!("unknown target feature: ", $t))
     };
 }
diff --git a/coresimd/tests/cpu-detection.rs b/coresimd/tests/cpu-detection.rs
new file mode 100644
index 0000000..2f7af1f
--- /dev/null
+++ b/coresimd/tests/cpu-detection.rs
@@ -0,0 +1,47 @@
+#![feature(cfg_target_feature)]
+#![cfg_attr(feature = "strict", deny(warnings))]
+#![cfg_attr(feature = "cargo-clippy",
+            allow(option_unwrap_used, print_stdout, use_debug))]
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[macro_use]
+extern crate coresimd;
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn x86_all() {
+    println!("sse: {:?}", cfg_feature_enabled!("sse"));
+    println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
+    println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
+    println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
+    println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
+    println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
+    println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
+    println!("avx: {:?}", cfg_feature_enabled!("avx"));
+    println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
+    println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
+    println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
+    println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
+    println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
+    println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
+    println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
+    println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
+    println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
+    println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
+    println!(
+        "avx512_vpopcntdq {:?}",
+        cfg_feature_enabled!("avx512vpopcntdq")
+    );
+    println!("fma: {:?}", cfg_feature_enabled!("fma"));
+    println!("abm: {:?}", cfg_feature_enabled!("abm"));
+    println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
+    println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
+    println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
+    println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
+    println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
+    println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
+    println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
+    println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
+    println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
+    println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
+}
diff --git a/src/lib.rs b/src/lib.rs
index 8a5a83f..277ad2a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -123,25 +123,28 @@
 //! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367
 //! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839
 
-#![feature(macro_reexport, const_fn, const_atomic_usize_new)]
-
-/// We re-export run-time feature detection for those architectures that have
-/// suport for it in `core`:
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[macro_reexport(cfg_feature_enabled, __unstable_detect_feature)]
+#![feature(const_fn, const_size_of, use_extern_macros, cfg_target_feature)]
+#![cfg_attr(target_os = "linux", feature(linkage))]
 extern crate coresimd;
 
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-extern crate coresimd;
+/// Re-export run-time feature detection macros.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "arm",
+          target_arch = "aarch64", target_arch = "powerpc64"))]
+pub use coresimd::__unstable_detect_feature;
 
 /// Platform dependent vendor intrinsics.
 pub mod vendor {
     pub use coresimd::vendor::*;
+}
 
-    #[cfg(all(target_os = "linux",
-              any(target_arch = "arm", target_arch = "aarch64",
-                  target_arch = "powerpc64")))]
-    pub use super::runtime::{__unstable_detect_feature, __Feature};
+/// Run-time feature detection.
+#[doc(hidden)]
+pub mod __vendor_runtime {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64",
+              all(target_os = "linux",
+                  any(target_arch = "arm", target_arch = "aarch64",
+                      target_arch = "powerpc64"))))]
+    pub use runtime::std::*;
 }
 
 /// Platform independent SIMD vector types and operations.
@@ -149,8 +152,25 @@
     pub use coresimd::simd::*;
 }
 
-#[cfg(all(target_os = "linux",
-          any(target_arch = "arm", target_arch = "aarch64",
-              target_arch = "powerpc64")))]
+/// The `stdsimd` run-time.
 #[macro_use]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64",
+          all(target_os = "linux",
+              any(target_arch = "arm", target_arch = "aarch64",
+                  target_arch = "powerpc64"))))]
 mod runtime;
+
+/// Error gracefully in architectures without run-time detection support.
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
+              all(target_os = "linux",
+                  any(target_arch = "arm", target_arch = "aarch64",
+                      target_arch = "powerpc64")))))]
+#[doc(hidden)]
+#[macro_export]
+macro_rules! cfg_feature_enabled {
+    ($name:tt) => (
+        {
+            compile_error!("cfg_target_feature! is not supported in this architecture")
+        }
+    )
+}
diff --git a/src/runtime/aarch64.rs b/src/runtime/aarch64.rs
deleted file mode 100644
index 273c314..0000000
--- a/src/runtime/aarch64.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-//! Run-time feature detection on ARM Aarch64.
-use super::{bit, linux};
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ("neon") => {
-        // FIXME: this should be removed once we rename Aarch64 neon to asimd
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{})
-    };
-    ("asimd") => {
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{})
-    };
-    ("pmull") => {
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{})
-    };
-    ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
-}
-
-/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
-    /// ARM Advanced SIMD (ASIMD) - Aarch64
-    asimd,
-    /// Polynomial Multiply
-    pmull,
-}
-
-pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
-    let mut value: usize = 0;
-    {
-        let mut enable_feature = |f| {
-            if x.has_feature(&f) {
-                value = bit::set(value, f as u32);
-            }
-        };
-        enable_feature(__Feature::asimd);
-        enable_feature(__Feature::pmull);
-    }
-    value
-}
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
-///
-/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
-impl linux::FeatureQuery for linux::AuxVec {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        if let Some(caps) = self.lookup(linux::AT::HWCAP) {
-            match *x {
-                asimd => caps & (1 << 1) != 0,
-                pmull => caps & (1 << 4) != 0,
-            }
-        } else {
-            false
-        }
-    }
-}
-
-impl linux::FeatureQuery for linux::CpuInfo {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        match *x {
-            asimd => self.field("Features").has("asimd"),
-            pmull => self.field("Features").has("pmull"),
-        }
-    }
-}
diff --git a/src/runtime/arm.rs b/src/runtime/arm.rs
deleted file mode 100644
index f9a71a0..0000000
--- a/src/runtime/arm.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-//! Run-time feature detection on ARM Aarch32.
-
-use super::{bit, linux};
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ("neon") => {
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::neon{})
-    };
-    ("pmull") => {
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{})
-    };
-    ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
-}
-
-/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
-/// particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
-    /// ARM Advanced SIMD (NEON) - Aarch32
-    neon,
-    /// Polynomial Multiply
-    pmull,
-}
-
-pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
-    let mut value: usize = 0;
-    {
-        let mut enable_feature = |f| {
-            if x.has_feature(&f) {
-                value = bit::set(value, f as u32);
-            }
-        };
-        enable_feature(__Feature::neon);
-        enable_feature(__Feature::pmull);
-    }
-    value
-}
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
-///
-/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
-impl linux::FeatureQuery for linux::AuxVec {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        match *x {
-            neon => self.lookup(linux::AT::HWCAP)
-                .map(|caps| caps & (1 << 12) != 0)
-                .unwrap_or(false),
-            pmull => self.lookup(linux::AT::HWCAP2)
-                .map(|caps| caps & (1 << 1) != 0)
-                .unwrap_or(false),
-        }
-    }
-}
-
-/// Is the CPU known to have a broken NEON unit?
-///
-/// See https://crbug.com/341598.
-fn has_broken_neon(cpuinfo: &linux::CpuInfo) -> bool {
-    cpuinfo.field("CPU implementer") == "0x51"
-        && cpuinfo.field("CPU architecture") == "7"
-        && cpuinfo.field("CPU variant") == "0x1"
-        && cpuinfo.field("CPU part") == "0x04d"
-        && cpuinfo.field("CPU revision") == "0"
-}
-
-impl linux::FeatureQuery for linux::CpuInfo {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        match *x {
-            neon => {
-                self.field("Features").has("neon") && !has_broken_neon(self)
-            }
-            pmull => self.field("Features").has("pmull"),
-        }
-    }
-}
diff --git a/src/runtime/bit.rs b/src/runtime/bit.rs
deleted file mode 100644
index 42483e5..0000000
--- a/src/runtime/bit.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-//! Bit manipulation utilities
-
-/// Sets the `bit` of `x`.
-pub const fn set(x: usize, bit: u32) -> usize {
-    x | 1 << bit
-}
-
-/// Tests the `bit` of `x`.
-pub const fn test(x: usize, bit: u32) -> bool {
-    x & (1 << bit) != 0
-}
diff --git a/src/runtime/cache.rs b/src/runtime/cache.rs
deleted file mode 100644
index 6aab8ad..0000000
--- a/src/runtime/cache.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-//! Cache of run-time feature detection
-
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::usize;
-
-use super::bit;
-
-/// This global variable is a bitset used to cache the features supported by
-/// the
-/// CPU.
-static CACHE: AtomicUsize = AtomicUsize::new(usize::MAX);
-
-/// Test the `bit` of the storage. If the storage has not been initialized,
-/// initializes it with the result of `f()`.
-///
-/// On its first invocation, it detects the CPU features and caches them in the
-/// `FEATURES` global variable as an `AtomicUsize`.
-///
-/// It uses the `__Feature` variant to index into this variable as a bitset. If
-/// the bit is set, the feature is enabled, and otherwise it is disabled.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-pub fn test<F>(bit: u32, f: F) -> bool
-where
-    F: FnOnce() -> usize,
-{
-    if CACHE.load(Ordering::Relaxed) == usize::MAX {
-        CACHE.store(f(), Ordering::Relaxed);
-    }
-    bit::test(CACHE.load(Ordering::Relaxed), bit)
-}
diff --git a/src/runtime/linux/aarch64.rs b/src/runtime/linux/aarch64.rs
new file mode 100644
index 0000000..7c1399e
--- /dev/null
+++ b/src/runtime/linux/aarch64.rs
@@ -0,0 +1,14 @@
+//! Run-time feature detection for ARM Aarch64 on Linux in `stdsimd`.
+
+use super::cpuinfo::CpuInfo;
+use coresimd::__vendor_runtime::__runtime::arch::{HasFeature, __Feature};
+
+impl HasFeature for CpuInfo {
+    fn has_feature(&mut self, x: &__Feature) -> bool {
+        use self::__Feature::*;
+        match *x {
+            asimd => self.field("Features").has("asimd"),
+            pmull => self.field("Features").has("pmull"),
+        }
+    }
+}
diff --git a/src/runtime/linux/arm.rs b/src/runtime/linux/arm.rs
new file mode 100644
index 0000000..7023112
--- /dev/null
+++ b/src/runtime/linux/arm.rs
@@ -0,0 +1,27 @@
+//! Run-time feature detection for ARM Aarch32 on Linux in `stdsimd`.
+
+use super::cpuinfo::CpuInfo;
+use coresimd::__vendor_runtime::__runtime::arch::{HasFeature, __Feature};
+
+/// Is the CPU known to have a broken NEON unit?
+///
+/// See https://crbug.com/341598.
+fn has_broken_neon(cpuinfo: &CpuInfo) -> bool {
+    cpuinfo.field("CPU implementer") == "0x51"
+        && cpuinfo.field("CPU architecture") == "7"
+        && cpuinfo.field("CPU variant") == "0x1"
+        && cpuinfo.field("CPU part") == "0x04d"
+        && cpuinfo.field("CPU revision") == "0"
+}
+
+impl HasFeature for CpuInfo {
+    fn has_feature(&mut self, x: &__Feature) -> bool {
+        use self::__Feature::*;
+        match *x {
+            neon => {
+                self.field("Features").has("neon") && !has_broken_neon(self)
+            }
+            pmull => self.field("Features").has("pmull"),
+        }
+    }
+}
diff --git a/src/runtime/linux/auxv/libc.rs b/src/runtime/linux/auxv/libc.rs
new file mode 100644
index 0000000..b1dc724
--- /dev/null
+++ b/src/runtime/linux/auxv/libc.rs
@@ -0,0 +1,104 @@
+//! Reads the ELF Auxiliary Vector using libc's `getauxval`.
+
+use coresimd::__vendor_runtime::__runtime::linux::auxv;
+use self::auxv::{AuxVec, AT_HWCAP};
+use std::mem;
+
+mod ffi {
+    pub type F = unsafe extern "C" fn(usize) -> usize;
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[linkage = "extern_weak"]
+        pub static getauxval: *const ();
+    }
+}
+
+/// Returns the value of the ELF Auxiliary Vector associated with `key`.
+///
+/// This only fails if the `getauxval` function is not linked.
+///
+/// The errno value is not checked, but if the key is not found
+/// `getauxval` returns zero, and in that case, independently of
+/// whether the key was found or no features were detected the
+/// `auxv` function below returns `Err` to allow more
+/// accurate run-time feature detection to run afterwards.
+fn getauxval(key: usize) -> Result<usize, ()> {
+    unsafe {
+        if ffi::getauxval.is_null() {
+            return Err(());
+        }
+
+        let ffi_getauxval: ffi::F = mem::transmute(ffi::getauxval);
+        Ok(ffi_getauxval(key))
+    }
+}
+
+/// Computes the entries of the Auxiliary Vector cache by
+/// calling libc's `getauxval(3)`.
+pub fn auxv() -> Result<AuxVec, ()> {
+    if let Ok(hwcap) = getauxval(AT_HWCAP) {
+        #[cfg(target_arch = "aarch64")]
+        {
+            if hwcap != 0 {
+                return Ok(AuxVec { hwcap });
+            }
+        }
+        #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+        {
+            if let Ok(hwcap2) = getauxval(auxv::AT_HWCAP2) {
+                if hwcap != 0 && hwcap2 != 0 {
+                    return Ok(AuxVec { hwcap, hwcap2 });
+                }
+            }
+        }
+    }
+    Err(())
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate auxv as auxv_crate;
+    use super::*;
+
+    // Reads the Auxiliary Vector key from getauxval()
+    // using the auxv crate.
+    fn auxv_crate_get(key: usize) -> Option<usize> {
+        use self::auxv_crate::AuxvType;
+        use self::auxv_crate::getauxval::Getauxval;
+        let q = auxv_crate::getauxval::NativeGetauxval {};
+        match q.getauxval(key as AuxvType) {
+            Ok(v) => Some(v as usize),
+            Err(_) => None,
+        }
+    }
+
+    #[test]
+    fn auxv_dump() {
+        if let Ok(auxvec) = auxv() {
+            println!("{:?}", auxvec);
+        } else {
+            println!("reading /proc/self/auxv failed!");
+        }
+    }
+
+    #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+    #[test]
+    fn auxv_crate() {
+        let v = auxv();
+        if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+            assert_eq!(v.unwrap().hwcap, hwcap);
+        }
+        if let Some(hwcap2) = auxv_crate_get(auxv::AT_HWCAP2) {
+            assert_eq!(v.unwrap().hwcap2, hwcap2);
+        }
+    }
+
+    #[cfg(target_arch = "aarch64")]
+    #[test]
+    fn auxv_crate() {
+        let v = auxv();
+        if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+            assert_eq!(v.unwrap().hwcap, hwcap);
+        }
+    }
+}
diff --git a/src/runtime/linux/auxv/mod.rs b/src/runtime/linux/auxv/mod.rs
new file mode 100644
index 0000000..554c963
--- /dev/null
+++ b/src/runtime/linux/auxv/mod.rs
@@ -0,0 +1,4 @@
+//! Reads ELF Auxiliary Vector
+
+pub mod libc;
+pub mod proc_self;
diff --git a/src/runtime/linux/auxv/proc_self.rs b/src/runtime/linux/auxv/proc_self.rs
new file mode 100644
index 0000000..d341edb
--- /dev/null
+++ b/src/runtime/linux/auxv/proc_self.rs
@@ -0,0 +1,143 @@
+//! Reads the ELF Auxiliary Vector from `/proc/self/auxv`.
+
+use coresimd::__vendor_runtime::__runtime::linux::auxv;
+use self::auxv::{AuxVec, AT_HWCAP};
+
+use std::mem;
+
+/// Tries to read the ELF Auxiliary Vector from `/proc/self/auxv`.
+///
+/// Errors if the file cannot be read. If a component of the auxvector
+/// cannot be read, all the bits in its bitset are set to zero.
+pub fn auxv() -> Result<AuxVec, ()> {
+    auxv_from_file("/proc/self/auxv")
+}
+
+fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
+    use std::io::Read;
+    let mut file = ::std::fs::File::open(file).or_else(|_| Err(()))?;
+
+    // See https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h
+    //
+    // The auxiliary vector contains at most 32 (key,value) fields: from
+    // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of
+    // 2*32 `usize` elements is enough to read the whole vector.
+    let mut buf = [0usize; 64];
+    {
+        let raw: &mut [u8; 64 * mem::size_of::<usize>()] =
+            unsafe { mem::transmute(&mut buf) };
+        file.read(raw).or_else(|_| Err(()))?;
+    }
+    auxv_from_buf(&buf)
+}
+
+fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
+    #[cfg(target_arch = "aarch64")]
+    {
+        for el in buf.chunks(2) {
+            match el[0] {
+                AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }),
+                _ => (),
+            }
+        }
+    }
+
+    #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+    {
+        let mut hwcap = None;
+        let mut hwcap2 = None;
+        for el in buf.chunks(2) {
+            match el[0] {
+                AT_HWCAP => hwcap = Some(el[1]),
+                auxv::AT_HWCAP2 => hwcap2 = Some(el[1]),
+                _ => (),
+            }
+        }
+        if hwcap.is_some() && hwcap2.is_some() {
+            return Ok(AuxVec {
+                hwcap: hwcap.unwrap(),
+                hwcap2: hwcap2.unwrap(),
+            });
+        }
+    }
+    Err(())
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate auxv as auxv_crate;
+    use super::*;
+
+    // Reads the Auxiliary Vector key from /proc/self/auxv
+    // using the auxv crate.
+    fn auxv_crate_get(key: usize) -> Option<usize> {
+        use self::auxv_crate::AuxvType;
+        use self::auxv_crate::procfs::search_procfs_auxv;
+        let k = key as AuxvType;
+        match search_procfs_auxv(&[k]) {
+            Ok(v) => Some(v[&k] as usize),
+            Err(_) => None,
+        }
+    }
+
+    #[test]
+    fn auxv_dump() {
+        if let Ok(auxvec) = auxv() {
+            println!("{:?}", auxvec);
+        } else {
+            println!("reading /proc/self/auxv failed!");
+        }
+    }
+
+    #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+    #[test]
+    fn auxv_crate() {
+        let v = auxv();
+        if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+            assert_eq!(v.unwrap().hwcap, hwcap);
+        }
+        if let Some(hwcap2) = auxv_crate_get(auxv::AT_HWCAP2) {
+            assert_eq!(v.unwrap().hwcap2, hwcap2);
+        }
+    }
+
+    #[cfg(target_arch = "aarch64")]
+    #[test]
+    fn auxv_crate() {
+        let v = auxv();
+        if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+            assert_eq!(v.unwrap().hwcap, hwcap);
+        }
+    }
+
+    #[cfg(all(target_arch = "arm", target_pointer_width = "32"))]
+    #[test]
+    fn linux_rpi3() {
+        let v = auxv_from_file("src/runtime/linux/test_data/linux-rpi3.auxv")
+            .unwrap();
+        assert_eq!(v.hwcap, 4174038);
+        assert_eq!(v.hwcap2, 16);
+    }
+
+    #[cfg(all(target_arch = "arm", target_pointer_width = "32"))]
+    #[test]
+    #[should_panic]
+    fn linux_macos_vb() {
+        let _ = auxv_from_file(
+                "src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"
+            ).unwrap();
+        // this file is incomplete (contains hwcap but not hwcap2), we
+        // want to fall back to /proc/cpuinfo in this case, so
+        // reading should fail. assert_eq!(v.hwcap, 126614527);
+        // assert_eq!(v.hwcap2, 0);
+    }
+
+    #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))]
+    #[test]
+    fn linux_x64() {
+        let v = auxv_from_file(
+            "src/runtime/linux/test_data/linux-x64-i7-6850k.auxv",
+        ).unwrap();
+        assert_eq!(v.hwcap, 3219913727);
+    }
+}
diff --git a/src/runtime/linux/auxvec.rs b/src/runtime/linux/auxvec.rs
deleted file mode 100644
index 9f8d7f3..0000000
--- a/src/runtime/linux/auxvec.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-//! Reads /proc/self/auxv on Linux systems
-
-use std::prelude::v1::*;
-use std::slice;
-use std::mem;
-
-/// Simple abstraction for the ELF Auxiliary Vector
-///
-/// the elf.h provide the layout of the single entry as auxv_t.
-/// The desugared version is a usize tag followed by a union with
-/// the same storage size.
-///
-/// Cache only the HWCAP and HWCAP2 entries.
-#[derive(Debug)]
-pub struct AuxVec {
-    hwcap: Option<usize>,
-    hwcap2: Option<usize>,
-}
-
-#[derive(Clone, Debug, PartialEq)]
-#[allow(dead_code)]
-/// ELF Auxiliary vector entry types
-///
-/// The entry types are specified in  [linux/auxvec.h][auxvec_h].
-///
-/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
-pub enum AT {
-    /// CPU Hardware capabilities, it is a bitfield.
-    HWCAP = 16,
-    /// CPU Hardware capabilities, additional bitfield.
-    HWCAP2 = 26,
-}
-
-impl AuxVec {
-    /// Reads the ELF Auxiliary Vector
-    ///
-    /// Try to read `/proc/self/auxv`.
-    // TODO: Make use of getauxval once it is available in a
-    // reliable way.
-    pub fn new() -> Result<Self, ::std::io::Error> {
-        use std::io::Read;
-        let mut file = ::std::fs::File::open("/proc/self/auxv")?;
-        let mut buf = [0usize; 64];
-        let mut raw = unsafe {
-            slice::from_raw_parts_mut(
-                buf.as_mut_ptr() as *mut u8,
-                buf.len() * mem::size_of::<usize>(),
-            )
-        };
-
-        let _ = file.read(&mut raw)?;
-
-        mem::forget(raw);
-
-        let mut auxv = AuxVec {
-            hwcap: None,
-            hwcap2: None,
-        };
-
-        for el in buf.chunks(2) {
-            if el[0] == AT::HWCAP as usize {
-                auxv.hwcap = Some(el[1]);
-            }
-            if el[0] == AT::HWCAP2 as usize {
-                auxv.hwcap2 = Some(el[1]);
-            }
-        }
-
-        Ok(auxv)
-    }
-
-    /// Returns the value for the AT key
-    pub fn lookup(&self, key: AT) -> Option<usize> {
-        match key {
-            AT::HWCAP => self.hwcap,
-            AT::HWCAP2 => self.hwcap2,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[cfg(target_os = "linux")]
-    #[test]
-    fn test_auxvec_linux() {
-        let auxvec = AuxVec::new().unwrap();
-        println!("{:?}", auxvec.lookup(AT::HWCAP));
-        println!("{:?}", auxvec);
-    }
-}
diff --git a/src/runtime/linux/cpuinfo.rs b/src/runtime/linux/cpuinfo.rs
index fac1032..5c86122 100644
--- a/src/runtime/linux/cpuinfo.rs
+++ b/src/runtime/linux/cpuinfo.rs
@@ -88,7 +88,7 @@
 
     #[cfg(target_os = "linux")]
     #[test]
-    fn test_cpuinfo_linux() {
+    fn raw_dump() {
         let cpuinfo = CpuInfo::new().unwrap();
         if cpuinfo.field("vendor_id") == "GenuineIntel" {
             assert!(cpuinfo.field("flags").exists());
@@ -131,7 +131,7 @@
 ";
 
     #[test]
-    fn test_cpuinfo_linux_core_duo_t6500() {
+    fn core_duo_t6500() {
         let cpuinfo = CpuInfo::from_str(CORE_DUO_T6500).unwrap();
         assert_eq!(cpuinfo.field("vendor_id"), "GenuineIntel");
         assert_eq!(cpuinfo.field("cpu family"), "6");
@@ -171,7 +171,7 @@
         ";
 
     #[test]
-    fn test_cpuinfo_linux_arm_cortex_a53() {
+    fn arm_cortex_a53() {
         let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A53).unwrap();
         assert_eq!(
             cpuinfo.field("Processor"),
@@ -199,7 +199,7 @@
 CPU revision	: 1";
 
     #[test]
-    fn test_cpuinfo_linux_arm_cortex_a57() {
+    fn arm_cortex_a57() {
         let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A57).unwrap();
         assert_eq!(
             cpuinfo.field("Processor"),
@@ -240,7 +240,7 @@
 machine         : CHRP IBM pSeries (emulated by qemu)";
 
     #[test]
-    fn test_cpuinfo_linux_power8_powerkvm() {
+    fn power8_powerkvm() {
         let cpuinfo = CpuInfo::from_str(POWER8E_POWERKVM).unwrap();
         assert_eq!(cpuinfo.field("cpu"), "POWER8E (raw), altivec supported");
 
@@ -292,7 +292,7 @@
 machine         : CHRP IBM,9133-55A";
 
     #[test]
-    fn test_cpuinfo_linux_power5p() {
+    fn power5p() {
         let cpuinfo = CpuInfo::from_str(POWER5P).unwrap();
         assert_eq!(cpuinfo.field("cpu"), "POWER5+ (gs)");
 
diff --git a/src/runtime/linux/mod.rs b/src/runtime/linux/mod.rs
index de7ebfb..9ff760b 100644
--- a/src/runtime/linux/mod.rs
+++ b/src/runtime/linux/mod.rs
@@ -1,41 +1,40 @@
-//! Run-time feature detection for ARM on linux
+//! Run-time feature detection for ARM and PowerPC64  on Linux.
+
+use coresimd::__vendor_runtime::__runtime::cache;
+use coresimd::__vendor_runtime::__runtime::arch;
+pub use self::arch::__Feature;
+
+#[cfg(target_arch = "arm")]
+mod arm;
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64;
+
+#[cfg(target_arch = "powerpc64")]
+mod powerpc64;
+
+mod auxv;
 mod cpuinfo;
-pub use self::cpuinfo::CpuInfo;
-
-mod auxvec;
-pub use self::auxvec::*;
-
-use super::__Feature;
-
-pub trait FeatureQuery {
-    fn has_feature(&mut self, x: &__Feature) -> bool;
-}
-
-fn detect_features_impl<T: FeatureQuery>(x: T) -> usize {
-    #[cfg(target_arch = "arm")]
-    {
-        super::arm::detect_features(x)
-    }
-    #[cfg(target_arch = "aarch64")]
-    {
-        super::aarch64::detect_features(x)
-    }
-    #[cfg(target_arch = "powerpc64")]
-    {
-        super::powerpc64::detect_features(x)
-    }
-}
 
 /// Detects CPU features:
 pub fn detect_features() -> usize {
-    // Try to read the ELF Auxiliary Vector
-    if let Ok(v) = auxvec::AuxVec::new() {
-        return detect_features_impl(v);
+    // Try to read the ELF Auxiliary Vector using libc's getauxval:
+    if let Ok(v) = auxv::libc::auxv() {
+        return arch::detect_features(v);
     }
-    // Try to read /proc/cpuinfo
+    // Try to read the ELF Auxiliary Vector from /proc/self/auxv:
+    if let Ok(v) = auxv::proc_self::auxv() {
+        return arch::detect_features(v);
+    }
+    // Try to read /proc/cpuinfo:
     if let Ok(v) = cpuinfo::CpuInfo::new() {
-        return detect_features_impl(v);
+        return arch::detect_features(v);
     }
     // Otherwise all features are disabled
     0
 }
+
+/// Performs run-time feature detection.
+pub fn __unstable_detect_feature(x: __Feature) -> bool {
+    cache::test(x as u32, detect_features)
+}
diff --git a/src/runtime/linux/powerpc64.rs b/src/runtime/linux/powerpc64.rs
new file mode 100644
index 0000000..ace3ae4
--- /dev/null
+++ b/src/runtime/linux/powerpc64.rs
@@ -0,0 +1,18 @@
+//! Run-time feature detection for PowerPC64 on Linux in `stdsimd`.
+
+use super::cpuinfo::CpuInfo;
+use coresimd::__vendor_runtime::__runtime::arch::{HasFeature, __Feature};
+
+/// Check for altivec support only
+///
+/// PowerPC's /proc/cpuinfo lacks a proper Feature field,
+/// but `altivec` support is indicated in the `cpu` field.
+impl HasFeature for CpuInfo {
+    fn has_feature(&mut self, x: &__Feature) -> bool {
+        use self::__Feature::*;
+        match *x {
+            altivec => self.field("cpu").has("altivec"),
+            _ => false,
+        }
+    }
+}
diff --git a/src/runtime/linux/test_data/linux-rpi3.auxv b/src/runtime/linux/test_data/linux-rpi3.auxv
new file mode 100644
index 0000000..0538e66
--- /dev/null
+++ b/src/runtime/linux/test_data/linux-rpi3.auxv
Binary files differ
diff --git a/src/runtime/linux/test_data/linux-x64-i7-6850k.auxv b/src/runtime/linux/test_data/linux-x64-i7-6850k.auxv
new file mode 100644
index 0000000..6afe1b3
--- /dev/null
+++ b/src/runtime/linux/test_data/linux-x64-i7-6850k.auxv
Binary files differ
diff --git a/src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv
new file mode 100644
index 0000000..75abc02
--- /dev/null
+++ b/src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv
Binary files differ
diff --git a/src/runtime/macros.rs b/src/runtime/macros.rs
index e8278bb..de24246 100644
--- a/src/runtime/macros.rs
+++ b/src/runtime/macros.rs
@@ -2,8 +2,9 @@
 
 /// Is a feature supported by the host CPU?
 ///
-/// This macro performs run-time feature detection. It returns true if the host
-/// CPU in which the binary is running on supports a particular feature.
+/// This macro performs run-time feature detection in `stdsimd`. It returns
+/// true if the host CPU in which the binary is running on supports a
+/// particular feature.
 #[macro_export]
 macro_rules! cfg_feature_enabled {
     ($name:tt) => (
@@ -14,26 +15,9 @@
             }
             #[cfg(not(target_feature = $name))]
             {
-                __unstable_detect_feature!($name)
+                __unstable_detect_feature!($name,
+                                           $crate::__vendor_runtime::__unstable_detect_feature)
             }
         }
     )
 }
-
-/// In all unsupported architectures using the macro is an error
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
-              target_arch = "arm", target_arch = "aarch64")))]
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
-}
-
-#[cfg(test)]
-mod tests {
-    #[cfg(target_arch = "x86_64")]
-    #[test]
-    fn test_macros() {
-        assert!(cfg_feature_enabled!("sse"));
-    }
-}
diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs
index 9a749a8..169bf42 100644
--- a/src/runtime/mod.rs
+++ b/src/runtime/mod.rs
@@ -1,40 +1,69 @@
 //! Run-time feature detection
-mod cache;
-mod bit;
-
-#[macro_use]
-mod macros;
-
-#[cfg(all(target_arch = "arm", target_os = "linux"))]
-#[macro_use]
-mod arm;
-#[cfg(all(target_arch = "arm", target_os = "linux"))]
-pub use self::arm::__Feature;
-
-#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
-#[macro_use]
-mod aarch64;
-#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
-pub use self::aarch64::__Feature;
-
-#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
-#[macro_use]
-mod powerpc64;
-#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
-pub use self::powerpc64::__Feature;
 
 #[cfg(all(target_os = "linux",
           any(target_arch = "arm", target_arch = "aarch64",
               target_arch = "powerpc64")))]
 mod linux;
 
-#[cfg(all(target_os = "linux",
-          any(target_arch = "arm", target_arch = "aarch64",
-              target_arch = "powerpc64")))]
-pub use self::linux::detect_features;
+#[macro_use]
+mod macros;
 
-/// Performs run-time feature detection.
-#[doc(hidden)]
-pub fn __unstable_detect_feature(x: __Feature) -> bool {
-    cache::test(x as u32, detect_features)
+/// Run-time feature detection exposed by `stdsimd`.
+pub mod std {
+    // The x86/x86_64 run-time from `coresimd` is re-exported as is.
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    pub use coresimd::__vendor_runtime::*;
+
+    #[cfg(all(target_os = "linux",
+              any(target_arch = "arm", target_arch = "aarch64",
+                  target_arch = "powerpc64")))]
+    pub use super::linux::{detect_features, __Feature};
+
+    /// Performs run-time feature detection.
+    ///
+    /// For those platforms in which run-time detection differs between `core`
+    /// and `std`.
+    #[cfg(all(target_os = "linux",
+              any(target_arch = "arm", target_arch = "aarch64",
+                  target_arch = "powerpc64")))]
+    #[doc(hidden)]
+    pub fn __unstable_detect_feature(x: __Feature) -> bool {
+        ::coresimd::__vendor_runtime::__runtime::cache::test(
+            x as u32,
+            detect_features,
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::super::*;
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    #[test]
+    fn detect_feature() {
+        println!("avx {}", cfg_feature_enabled!("avx"));
+    }
+
+    #[cfg(all(target_arch = "arm", target_os = "linux"))]
+    #[test]
+    fn detect_feature() {
+        println!("neon {}", cfg_feature_enabled!("neon"));
+        println!("pmull {}", cfg_feature_enabled!("pmull"));
+    }
+
+    #[cfg(all(target_arch = "aarch64", target_os = "linux"))]
+    #[test]
+    fn detect_feature() {
+        println!("asimd {}", cfg_feature_enabled!("asimd"));
+        println!("pmull {}", cfg_feature_enabled!("pmull"));
+    }
+
+    #[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
+    #[test]
+    fn detect_feature() {
+        println!("altivec {}", cfg_feature_enabled!("altivec"));
+        println!("vsx {}", cfg_feature_enabled!("vsx"));
+        println!("power8 {}", cfg_feature_enabled!("power8"));
+    }
 }
diff --git a/src/runtime/powerpc64.rs b/src/runtime/powerpc64.rs
deleted file mode 100644
index df59855..0000000
--- a/src/runtime/powerpc64.rs
+++ /dev/null
@@ -1,94 +0,0 @@
-//! Run-time feature detection on PowerPC64.
-use super::{bit, linux};
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ("altivec") => {
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::altivec{})
-    };
-    ("vsx") => {
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::vsx{})
-    };
-    ("power8") => {
-        $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::power8{})
-    };
-    ($t:tt) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
-}
-
-/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
-    /// Altivec
-    altivec,
-    /// VSX
-    vsx,
-    /// Power8
-    power8,
-}
-
-pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
-    let mut value: usize = 0;
-    {
-        let mut enable_feature = |f| {
-            if x.has_feature(&f) {
-                value = bit::set(value, f as u32);
-            }
-        };
-        enable_feature(__Feature::altivec);
-        enable_feature(__Feature::vsx);
-        enable_feature(__Feature::power8);
-    }
-    value
-}
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/cputable.h][cputable]
-///
-/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
-impl linux::FeatureQuery for linux::AuxVec {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        match *x {
-            altivec => self.lookup(linux::AT::HWCAP)
-                .map(|caps| caps & 0x10000000 != 0)
-                .unwrap_or(false),
-            vsx => self.lookup(linux::AT::HWCAP)
-                .map(|caps| caps & 0x00000080 != 0)
-                .unwrap_or(false),
-            power8 => self.lookup(linux::AT::HWCAP2)
-                .map(|caps| caps & 0x80000000 != 0)
-                .unwrap_or(false),
-        }
-    }
-}
-
-/// Check for altivec support only
-///
-/// PowerPC's /proc/cpuinfo lacks a proper Feature field,
-/// but `altivec` support is indicated in the `cpu` field.
-impl linux::FeatureQuery for linux::CpuInfo {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        match *x {
-            altivec => self.field("cpu").has("altivec"),
-            _ => false,
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn detect_feature() {
-        println!("altivec {}", __unstable_detect_feature!("altivec"));
-        println!("vsx {}", __unstable_detect_feature!("vsx"));
-        println!("power8 {}", __unstable_detect_feature!("power8"));
-    }
-}
diff --git a/tests/cpu-detection.rs b/tests/cpu-detection.rs
index b272b1e..3efb61a 100644
--- a/tests/cpu-detection.rs
+++ b/tests/cpu-detection.rs
@@ -2,7 +2,9 @@
 #![cfg_attr(feature = "strict", deny(warnings))]
 #![cfg_attr(feature = "cargo-clippy", allow(option_unwrap_used))]
 
-#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+#[cfg(any(target_arch = "arm", target_arch = "aarch64",
+          target_arch = "x86", target_arch = "x86_64",
+          target_arch = "powerpc64"))]
 #[macro_use]
 extern crate stdsimd;
 
@@ -20,3 +22,50 @@
     println!("asimd: {}", cfg_feature_enabled!("asimd"));
     println!("pmull: {}", cfg_feature_enabled!("pmull"));
 }
+
+#[test]
+#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
+fn powerpc64_linux() {
+    println!("altivec: {}", cfg_feature_enabled!("altivec"));
+    println!("vsx: {}", cfg_feature_enabled!("vsx"));
+    println!("power8: {}", cfg_feature_enabled!("power8"));
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn x86_all() {
+    println!("sse: {:?}", cfg_feature_enabled!("sse"));
+    println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
+    println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
+    println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
+    println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
+    println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
+    println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
+    println!("avx: {:?}", cfg_feature_enabled!("avx"));
+    println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
+    println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
+    println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
+    println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
+    println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
+    println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
+    println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
+    println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
+    println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
+    println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
+    println!(
+        "avx512_vpopcntdq {:?}",
+        cfg_feature_enabled!("avx512vpopcntdq")
+    );
+    println!("fma: {:?}", cfg_feature_enabled!("fma"));
+    println!("abm: {:?}", cfg_feature_enabled!("abm"));
+    println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
+    println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
+    println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
+    println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
+    println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
+    println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
+    println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
+    println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
+    println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
+    println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
+}