More run-time detection improvements (#242)
* [core/runtime] use getauxval on non-x86 platforms
* test coresimd::auxv against auxv crate
* add test files from auxv crate
* [arm] use simd_test macro
* formatting
* missing docs
* improve docs
* reading /proc/self/auxv succeeds only if reading all fields succeeds
* remove cc-crate build dependency
* getauxval succeeds only if hwcap/hwcap2 are non-zero
* fix formatting
* move getauxval to stdsimd
* delete getauxval-wrapper.c
* remove auxv crate dev-dependency from coresimd
diff --git a/Cargo.toml b/Cargo.toml
index f749bd6..4494529 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,9 @@
[dependencies]
coresimd = { version = "0.0.3", path = "coresimd/" }
+[dev-dependencies]
+auxv = "0.3.3"
+
[profile.release]
debug = true
opt-level = 3
diff --git a/coresimd/src/aarch64/neon.rs b/coresimd/src/aarch64/neon.rs
index bcffd4d..55b4ff6 100644
--- a/coresimd/src/aarch64/neon.rs
+++ b/coresimd/src/aarch64/neon.rs
@@ -1,4 +1,6 @@
-//! ARMv8 NEON intrinsics
+//! ARMv8 ASIMD intrinsics
+
+// FIXME: replace neon with asimd
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -39,41 +41,43 @@
#[cfg(test)]
mod tests {
- use super::*;
+ use super::f64x2;
+ use aarch64::neon;
+ use stdsimd_test::simd_test;
- #[test]
- fn vadd_f64_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_f64() {
let a = 1.;
let b = 8.;
let e = 9.;
- let r = unsafe { vadd_f64(a, b) };
+ let r = neon::vadd_f64(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_f64_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_f64() {
let a = f64x2::new(1., 2.);
let b = f64x2::new(8., 7.);
let e = f64x2::new(9., 9.);
- let r = unsafe { vaddq_f64(a, b) };
+ let r = neon::vaddq_f64(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddd_s64_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddd_s64() {
let a = 1;
let b = 8;
let e = 9;
- let r = unsafe { vaddd_s64(a, b) };
+ let r = neon::vaddd_s64(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddd_u64_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddd_u64() {
let a = 1;
let b = 8;
let e = 9;
- let r = unsafe { vaddd_u64(a, b) };
+ let r = neon::vaddd_u64(a, b);
assert_eq!(r, e);
}
}
diff --git a/coresimd/src/arm/neon.rs b/coresimd/src/arm/neon.rs
index fafa042..9103ccc 100644
--- a/coresimd/src/arm/neon.rs
+++ b/coresimd/src/arm/neon.rs
@@ -214,199 +214,201 @@
#[cfg(test)]
mod tests {
- use super::*;
+ use stdsimd_test::simd_test;
+ use simd::*;
+ use arm::neon;
- #[test]
- fn vadd_s8_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_s8() {
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let e = i8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
- let r = unsafe { vadd_s8(a, b) };
+ let r = neon::vadd_s8(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_s8_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_s8() {
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
- let r = unsafe { vaddq_s8(a, b) };
+ let r = neon::vaddq_s8(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vadd_s16_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_s16() {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(8, 7, 6, 5);
let e = i16x4::new(9, 9, 9, 9);
- let r = unsafe { vadd_s16(a, b) };
+ let r = neon::vadd_s16(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_s16_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_s16() {
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let e = i16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
- let r = unsafe { vaddq_s16(a, b) };
+ let r = neon::vaddq_s16(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vadd_s32_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_s32() {
let a = i32x2::new(1, 2);
let b = i32x2::new(8, 7);
let e = i32x2::new(9, 9);
- let r = unsafe { vadd_s32(a, b) };
+ let r = neon::vadd_s32(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_s32_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_s32() {
let a = i32x4::new(1, 2, 3, 4);
let b = i32x4::new(8, 7, 6, 5);
let e = i32x4::new(9, 9, 9, 9);
- let r = unsafe { vaddq_s32(a, b) };
+ let r = neon::vaddq_s32(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vadd_u8_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_u8() {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = u8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let e = u8x8::new(9, 9, 9, 9, 9, 9, 9, 9);
- let r = unsafe { vadd_u8(a, b) };
+ let r = neon::vadd_u8(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_u8_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_u8() {
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1);
let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9);
- let r = unsafe { vaddq_u8(a, b) };
+ let r = neon::vaddq_u8(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vadd_u16_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_u16() {
let a = u16x4::new(1, 2, 3, 4);
let b = u16x4::new(8, 7, 6, 5);
let e = u16x4::new(9, 9, 9, 9);
- let r = unsafe { vadd_u16(a, b) };
+ let r = neon::vadd_u16(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_u16_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_u16() {
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let e = u16x8::new(9, 9, 9, 9, 9, 9, 9, 9);
- let r = unsafe { vaddq_u16(a, b) };
+ let r = neon::vaddq_u16(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vadd_u32_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_u32() {
let a = u32x2::new(1, 2);
let b = u32x2::new(8, 7);
let e = u32x2::new(9, 9);
- let r = unsafe { vadd_u32(a, b) };
+ let r = neon::vadd_u32(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_u32_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_u32() {
let a = u32x4::new(1, 2, 3, 4);
let b = u32x4::new(8, 7, 6, 5);
let e = u32x4::new(9, 9, 9, 9);
- let r = unsafe { vaddq_u32(a, b) };
+ let r = neon::vaddq_u32(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vadd_f32_() {
+ #[simd_test = "neon"]
+ unsafe fn vadd_f32() {
let a = f32x2::new(1., 2.);
let b = f32x2::new(8., 7.);
let e = f32x2::new(9., 9.);
- let r = unsafe { vadd_f32(a, b) };
+ let r = neon::vadd_f32(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddq_f32_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddq_f32() {
let a = f32x4::new(1., 2., 3., 4.);
let b = f32x4::new(8., 7., 6., 5.);
let e = f32x4::new(9., 9., 9., 9.);
- let r = unsafe { vaddq_f32(a, b) };
+ let r = neon::vaddq_f32(a, b);
assert_eq!(r, e);
}
- #[test]
- fn vaddl_s8_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddl_s8() {
let v = ::std::i8::MAX;
let a = i8x8::new(v, v, v, v, v, v, v, v);
let v = 2 * (v as i16);
let e = i16x8::new(v, v, v, v, v, v, v, v);
- let r = unsafe { vaddl_s8(a, a) };
+ let r = neon::vaddl_s8(a, a);
assert_eq!(r, e);
}
- #[test]
- fn vaddl_s16_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddl_s16() {
let v = ::std::i16::MAX;
let a = i16x4::new(v, v, v, v);
let v = 2 * (v as i32);
let e = i32x4::new(v, v, v, v);
- let r = unsafe { vaddl_s16(a, a) };
+ let r = neon::vaddl_s16(a, a);
assert_eq!(r, e);
}
- #[test]
- fn vaddl_s32_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddl_s32() {
let v = ::std::i32::MAX;
let a = i32x2::new(v, v);
let v = 2 * (v as i64);
let e = i64x2::new(v, v);
- let r = unsafe { vaddl_s32(a, a) };
+ let r = neon::vaddl_s32(a, a);
assert_eq!(r, e);
}
- #[test]
- fn vaddl_u8_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddl_u8() {
let v = ::std::u8::MAX;
let a = u8x8::new(v, v, v, v, v, v, v, v);
let v = 2 * (v as u16);
let e = u16x8::new(v, v, v, v, v, v, v, v);
- let r = unsafe { vaddl_u8(a, a) };
+ let r = neon::vaddl_u8(a, a);
assert_eq!(r, e);
}
- #[test]
- fn vaddl_u16_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddl_u16() {
let v = ::std::u16::MAX;
let a = u16x4::new(v, v, v, v);
let v = 2 * (v as u32);
let e = u32x4::new(v, v, v, v);
- let r = unsafe { vaddl_u16(a, a) };
+ let r = neon::vaddl_u16(a, a);
assert_eq!(r, e);
}
- #[test]
- fn vaddl_u32_() {
+ #[simd_test = "neon"]
+ unsafe fn vaddl_u32() {
let v = ::std::u32::MAX;
let a = u32x2::new(v, v);
let v = 2 * (v as u64);
let e = u64x2::new(v, v);
- let r = unsafe { vaddl_u32(a, a) };
+ let r = neon::vaddl_u32(a, a);
assert_eq!(r, e);
}
- #[test]
- fn vrsqrt_f32_() {
+ #[simd_test = "neon"]
+ unsafe fn vrsqrt_f32() {
let a = f32x2::new(1.0, 2.0);
let e = f32x2::new(0.9980469, 0.7050781);
- let r = unsafe { vrsqrte_f32(a) };
+ let r = neon::vrsqrte_f32(a);
assert_eq!(r, e);
}
}
diff --git a/coresimd/src/lib.rs b/coresimd/src/lib.rs
index fafa6c1..ade8eb3 100644
--- a/coresimd/src/lib.rs
+++ b/coresimd/src/lib.rs
@@ -13,7 +13,8 @@
#![allow(unused_features)]
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
- const_atomic_usize_new, stmt_expr_attributes, core_intrinsics)]
+ const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
+ crate_in_paths)]
#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))]
#![cfg_attr(feature = "cargo-clippy",
allow(inline_always, too_many_arguments, cast_sign_loss,
@@ -56,18 +57,32 @@
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
target_arch = "arm", target_arch = "aarch64")))]
pub use nvptx::*;
-
- #[cfg(
- // x86/x86_64:
- any(target_arch = "x86", target_arch = "x86_64")
- )]
- pub use runtime::{__unstable_detect_feature, __Feature};
}
-#[cfg(
- // x86/x86_64:
- any(target_arch = "x86", target_arch = "x86_64")
-)]
+/// Run-time feature detection.
+#[doc(hidden)]
+pub mod __vendor_runtime {
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64",
+ all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64"))))]
+ pub use runtime::core::*;
+
+ // Re-exports `coresimd` run-time building blocks for usage in the
+ // `stdsimd` run-time.
+ #[cfg(all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64")))]
+ #[doc(hidden)]
+ pub mod __runtime {
+ pub use runtime::*;
+ }
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64",
+ all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64"))))]
#[macro_use]
mod runtime;
diff --git a/coresimd/src/runtime/aarch64.rs b/coresimd/src/runtime/aarch64.rs
new file mode 100644
index 0000000..fbbf856
--- /dev/null
+++ b/coresimd/src/runtime/aarch64.rs
@@ -0,0 +1,47 @@
+//! Run-time feature detection on ARM Aarch64.
+use runtime::bit;
+use runtime::arch::HasFeature;
+
+#[macro_export]
+#[doc(hidden)]
+macro_rules! __unstable_detect_feature {
+ ("neon", $unstable_detect_feature:path) => {
+ // FIXME: this should be removed once we rename Aarch64 neon to asimd
+ $unstable_detect_feature($crate::__vendor_runtime::_Feature::asimd{})
+ };
+ ("asimd", $unstable_detect_feature:path) => {
+ $unstable_detect_feature($crate::__vendor_runtime::__Feature::asimd{})
+ };
+ ("pmull", $unstable_detect_feature:path) => {
+ $unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
+ };
+ ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
+}
+
+/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+pub enum __Feature {
+ /// ARM Advanced SIMD (ASIMD) - Aarch64
+ asimd,
+ /// Polynomial Multiply
+ pmull,
+}
+
+pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
+ let mut value: usize = 0;
+ {
+ let mut enable_feature = |f| {
+ if x.has_feature(&f) {
+ value = bit::set(value, f as u32);
+ }
+ };
+ enable_feature(__Feature::asimd);
+ enable_feature(__Feature::pmull);
+ }
+ value
+}
diff --git a/coresimd/src/runtime/arm.rs b/coresimd/src/runtime/arm.rs
new file mode 100644
index 0000000..4c4dbb4
--- /dev/null
+++ b/coresimd/src/runtime/arm.rs
@@ -0,0 +1,43 @@
+//! Run-time feature detection on ARM Aarch32.
+use runtime::bit;
+use runtime::arch::HasFeature;
+
+#[macro_export]
+#[doc(hidden)]
+macro_rules! __unstable_detect_feature {
+ ("neon", $unstable_detect_feature:path) => {
+ $unstable_detect_feature($crate::__vendor_runtime::__Feature::neon{})
+ };
+ ("pmull", $unstable_detect_feature:path) => {
+ $unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
+ };
+ ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
+}
+
+/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
+/// particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+pub enum __Feature {
+ /// ARM Advanced SIMD (NEON) - Aarch32
+ neon,
+ /// Polynomial Multiply
+ pmull,
+}
+
+pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
+ let mut value: usize = 0;
+ {
+ let mut enable_feature = |f| {
+ if x.has_feature(&f) {
+ value = bit::set(value, f as u32);
+ }
+ };
+ enable_feature(__Feature::neon);
+ enable_feature(__Feature::pmull);
+ }
+ value
+}
diff --git a/coresimd/src/runtime/linux/aarch64.rs b/coresimd/src/runtime/linux/aarch64.rs
new file mode 100644
index 0000000..ac7713e
--- /dev/null
+++ b/coresimd/src/runtime/linux/aarch64.rs
@@ -0,0 +1,20 @@
+//! Run-time feature detection for Aarch64 on Linux and `core`.
+
+use runtime::bit;
+use runtime::linux::auxv::AuxVec;
+use runtime::arch::{HasFeature, __Feature};
+
+/// Probe the ELF Auxiliary vector for hardware capabilities
+///
+/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
+///
+/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+impl HasFeature for AuxVec {
+ fn has_feature(&mut self, x: &__Feature) -> bool {
+ use self::__Feature::*;
+ match *x {
+ asimd => bit::test(self.hwcap, 1),
+ pmull => bit::test(self.hwcap, 4),
+ }
+ }
+}
diff --git a/coresimd/src/runtime/linux/arm.rs b/coresimd/src/runtime/linux/arm.rs
new file mode 100644
index 0000000..c4383d7
--- /dev/null
+++ b/coresimd/src/runtime/linux/arm.rs
@@ -0,0 +1,20 @@
+//! Run-time feature detection for ARM32 on Linux and `core`.
+
+use runtime::bit;
+use runtime::linux::auxv::AuxVec;
+use runtime::arch::{HasFeature, __Feature};
+
+/// Probe the ELF Auxiliary vector for hardware capabilities
+///
+/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
+///
+/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
+impl HasFeature for AuxVec {
+ fn has_feature(&mut self, x: &__Feature) -> bool {
+ use self::__Feature::*;
+ match *x {
+ neon => bit::test(self.hwcap, 12),
+ pmull => bit::test(self.hwcap2, 1),
+ }
+ }
+}
diff --git a/coresimd/src/runtime/linux/auxv.rs b/coresimd/src/runtime/linux/auxv.rs
new file mode 100644
index 0000000..e612134
--- /dev/null
+++ b/coresimd/src/runtime/linux/auxv.rs
@@ -0,0 +1,46 @@
+//! ELF Auxiliary Vector
+//!
+//! The auxiliary vector is a memory region in a running ELF program's stack
+//! composed of (key: usize, value: usize) pairs.
+//!
+//! The keys used in the aux vector are platform dependent. For Linux, they are
+//! defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given
+//! CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys.
+//!
+//! There is no perfect way of reading the auxiliary vector.
+//!
+//! - `coresimd`: if `getauxval` is available, `coresimd` will try to use it.
+//! - `stdsimd`: if `getauxval` is not available, it will try to read
+//! `/proc/self/auxv`, and if that fails it will try to read `/proc/cpuinfo`.
+//!
+//! For more information about when `getauxval` is available check the great
+//! [`auxv` crate documentation][auxv_docs].
+//!
+//! [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
+//! [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
+
+/// Key to access the CPU Hardware capabilities bitfield.
+pub const AT_HWCAP: usize = 16;
+/// Key to access the CPU Hardware capabilities 2 bitfield.
+pub const AT_HWCAP2: usize = 26;
+
+/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
+///
+/// If an entry cannot be read all the bits in the bitfield
+/// are set to zero.
+#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+#[derive(Debug, Copy, Clone)]
+pub struct AuxVec {
+ pub hwcap: usize,
+ pub hwcap2: usize,
+}
+
+/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
+///
+/// If an entry cannot be read all the bits in the bitfield
+/// are set to zero.
+#[cfg(target_arch = "aarch64")]
+#[derive(Debug, Copy, Clone)]
+pub struct AuxVec {
+ pub hwcap: usize,
+}
diff --git a/coresimd/src/runtime/linux/mod.rs b/coresimd/src/runtime/linux/mod.rs
new file mode 100644
index 0000000..7080078
--- /dev/null
+++ b/coresimd/src/runtime/linux/mod.rs
@@ -0,0 +1,12 @@
+//! Run-time feature detection for ARM and PowerPC64 on Linux.
+
+#[cfg(target_arch = "arm")]
+mod arm;
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64;
+
+#[cfg(target_arch = "powerpc64")]
+mod powerpc64;
+
+pub mod auxv;
diff --git a/coresimd/src/runtime/linux/powerpc64.rs b/coresimd/src/runtime/linux/powerpc64.rs
new file mode 100644
index 0000000..f1a444d
--- /dev/null
+++ b/coresimd/src/runtime/linux/powerpc64.rs
@@ -0,0 +1,22 @@
+//! Run-time feature detection for PowerPC64 on Linux and `core`.
+
+use runtime::linux::auxv::AuxVec;
+use runtime::arch::{HasFeature, __Feature};
+
+/// Probe the ELF Auxiliary vector for hardware capabilities
+///
+/// The values are part of the platform-specific [asm/cputable.h][cputable]
+///
+/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
+impl HasFeature for AuxVec {
+ fn has_feature(&mut self, x: &__Feature) -> bool {
+ use self::__Feature::*;
+ // note: the PowerPC values are the mask to do the test (instead of the
+ // index of the bit to test like in ARM and Aarch64)
+ match *x {
+ altivec => self.hwcap & 0x10000000 != 0,
+ vsx => self.hwcap & 0x00000080 != 0,
+ power8 => self.hwcap2 & 0x80000000 != 0,
+ }
+ }
+}
diff --git a/coresimd/src/runtime/macros.rs b/coresimd/src/runtime/macros.rs
index e8278bb..e84bc5d 100644
--- a/coresimd/src/runtime/macros.rs
+++ b/coresimd/src/runtime/macros.rs
@@ -2,8 +2,9 @@
/// Is a feature supported by the host CPU?
///
-/// This macro performs run-time feature detection. It returns true if the host
-/// CPU in which the binary is running on supports a particular feature.
+/// This macro performs run-time feature detection in `coresimd`. It returns
+/// true if the host CPU in which the binary is running on supports a
+/// particular feature.
#[macro_export]
macro_rules! cfg_feature_enabled {
($name:tt) => (
@@ -14,26 +15,16 @@
}
#[cfg(not(target_feature = $name))]
{
- __unstable_detect_feature!($name)
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ {
+ __unstable_detect_feature!($name,
+ $crate::__vendor_runtime::__unstable_detect_feature)
+ }
+ #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+ {
+ compile_error!("cfg_target_feature! is not supported in this architecture")
+ }
}
}
)
}
-
-/// In all unsupported architectures using the macro is an error
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
- target_arch = "arm", target_arch = "aarch64")))]
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
- ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
-}
-
-#[cfg(test)]
-mod tests {
- #[cfg(target_arch = "x86_64")]
- #[test]
- fn test_macros() {
- assert!(cfg_feature_enabled!("sse"));
- }
-}
diff --git a/coresimd/src/runtime/mod.rs b/coresimd/src/runtime/mod.rs
index 6ad497f..b0833a7 100644
--- a/coresimd/src/runtime/mod.rs
+++ b/coresimd/src/runtime/mod.rs
@@ -1,17 +1,64 @@
//! Run-time feature detection
-mod cache;
-mod bit;
+pub mod cache;
+pub mod bit;
#[macro_use]
-mod macros;
+pub mod macros;
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use]
-mod x86;
-pub use self::x86::__Feature;
-use self::x86::detect_features;
+pub mod x86;
-/// Performs run-time feature detection.
-#[doc(hidden)]
-pub fn __unstable_detect_feature(x: __Feature) -> bool {
- cache::test(x as u32, detect_features)
+#[cfg(target_arch = "arm")]
+#[macro_use]
+pub mod arm;
+
+#[cfg(target_arch = "aarch64")]
+#[macro_use]
+pub mod aarch64;
+
+#[cfg(target_arch = "powerpc64")]
+#[macro_use]
+pub mod powerpc64;
+
+#[cfg(all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64")))]
+pub mod linux;
+
+/// Exports architecture specific functionality for
+/// reuse in `stdsimd`.
+pub mod arch {
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ pub use super::x86::{detect_features, __Feature};
+
+ #[cfg(target_arch = "arm")]
+ pub use runtime::arm::{detect_features, __Feature};
+
+ #[cfg(target_arch = "aarch64")]
+ pub use runtime::aarch64::{detect_features, __Feature};
+
+ #[cfg(target_arch = "powerpc64")]
+ pub use runtime::powerpc64::{detect_features, __Feature};
+
+ /// Interface for querying whether a feature is enabled.
+ pub trait HasFeature {
+ /// Is the feature `x` enabled at run-time?
+ fn has_feature(&mut self, x: &__Feature) -> bool;
+ }
+}
+
+/// Run-time feature detection exposed by `coresimd`.
+pub mod core {
+ pub use super::arch::__Feature;
+
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ pub use super::arch::detect_features;
+
+ /// Performs run-time feature detection.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ #[doc(hidden)]
+ pub fn __unstable_detect_feature(x: __Feature) -> bool {
+ super::cache::test(x as u32, detect_features)
+ }
}
diff --git a/coresimd/src/runtime/powerpc64.rs b/coresimd/src/runtime/powerpc64.rs
new file mode 100644
index 0000000..8ee02e4
--- /dev/null
+++ b/coresimd/src/runtime/powerpc64.rs
@@ -0,0 +1,49 @@
+//! Run-time feature detection on PowerPC64.
+use runtime::bit;
+use runtime::arch::HasFeature;
+
+#[macro_export]
+#[doc(hidden)]
+macro_rules! __unstable_detect_feature {
+ ("altivec", $unstable_detect_feature:path) => {
+ $unstable_detect_feature($crate::__vendor_runtime::__Feature::altivec{})
+ };
+ ("vsx", $unstable_detect_feature:path) => {
+ $unstable_detect_feature($crate::__vendor_runtime::__Feature::vsx{})
+ };
+ ("power8", $unstable_detect_feature:path) => {
+ $unstable_detect_feature($crate::__vendor_runtime::__Feature::power8{})
+ };
+ ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
+}
+
+/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
+/// for a particular feature.
+///
+/// PLEASE: do not use this, it is an implementation detail subject to change.
+#[doc(hidden)]
+#[allow(non_camel_case_types)]
+#[repr(u8)]
+pub enum __Feature {
+ /// Altivec
+ altivec,
+ /// VSX
+ vsx,
+ /// Power8
+ power8,
+}
+
+pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
+ let mut value: usize = 0;
+ {
+ let mut enable_feature = |f| {
+ if x.has_feature(&f) {
+ value = bit::set(value, f as u32);
+ }
+ };
+ enable_feature(__Feature::altivec);
+ enable_feature(__Feature::vsx);
+ enable_feature(__Feature::power8);
+ }
+ value
+}
diff --git a/coresimd/src/runtime/x86.rs b/coresimd/src/runtime/x86.rs
index b952598..a994e61 100644
--- a/coresimd/src/runtime/x86.rs
+++ b/coresimd/src/runtime/x86.rs
@@ -29,133 +29,133 @@
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
- ("mmx") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::mmx{}) };
- ("sse") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::sse{}) };
- ("sse2") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::sse2{})
+ ("mmx", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::mmx{}) };
+ ("sse", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::sse{}) };
+ ("sse2", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::sse2{})
};
- ("sse3") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::sse3{})
+ ("sse3", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::sse3{})
};
- ("ssse3") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::ssse3{})
+ ("ssse3", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::ssse3{})
};
- ("sse4.1") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::sse4_1{})
+ ("sse4.1", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::sse4_1{})
};
- ("sse4.2") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::sse4_2{})
+ ("sse4.2", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::sse4_2{})
};
- ("sse4a") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::sse4a{})
+ ("sse4a", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::sse4a{})
};
- ("avx") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx{})
+ ("avx", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx{})
};
- ("avx2") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx2{})
+ ("avx2", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx2{})
};
- ("avx512f") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512f{})
+ ("avx512f", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512f{})
};
- ("avx512cd") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512cd{})
+ ("avx512cd", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512cd{})
};
- ("avx512er") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512er{})
+ ("avx512er", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512er{})
};
- ("avx512pf") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512pf{})
+ ("avx512pf", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512pf{})
};
- ("avx512bw") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512bw{})
+ ("avx512bw", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512bw{})
};
- ("avx512dq") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512dq{})
+ ("avx512dq", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512dq{})
};
- ("avx512vl") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512vl{})
+ ("avx512vl", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512vl{})
};
- ("avx512ifma") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512_ifma{})
+ ("avx512ifma", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512_ifma{})
};
- ("avx512vbmi") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512_vbmi{})
+ ("avx512vbmi", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512_vbmi{})
};
- ("avx512vpopcntdq") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::avx512_vpopcntdq{})
+ ("avx512vpopcntdq", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::avx512_vpopcntdq{})
};
- ("fma") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::fma{})
+ ("fma", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::fma{})
};
- ("bmi") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::bmi{})
+ ("bmi", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::bmi{})
};
- ("bmi2") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::bmi2{})
+ ("bmi2", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::bmi2{})
};
- ("abm") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::abm{})
+ ("abm", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::abm{})
};
- ("lzcnt") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::abm{})
+ ("lzcnt", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::abm{})
};
- ("tbm") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::tbm{})
+ ("tbm", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::tbm{})
};
- ("popcnt") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::popcnt{})
+ ("popcnt", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::popcnt{})
};
- ("fxsr") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::fxsr{})
+ ("fxsr", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::fxsr{})
};
- ("xsave") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::xsave{})
+ ("xsave", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::xsave{})
};
- ("xsaveopt") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::xsaveopt{})
+ ("xsaveopt", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::xsaveopt{})
};
- ("xsaves") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::xsaves{})
+ ("xsaves", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::xsaves{})
};
- ("xsavec") => {
- $crate::vendor::__unstable_detect_feature(
- $crate::vendor::__Feature::xsavec{})
+ ("xsavec", $unstable_detect_feature:path) => {
+ $unstable_detect_feature(
+ $crate::__vendor_runtime::__Feature::xsavec{})
};
- ($t:tt) => {
+ ($t:tt, $unstable_detect_feature:path) => {
compile_error!(concat!("unknown target feature: ", $t))
};
}
diff --git a/coresimd/tests/cpu-detection.rs b/coresimd/tests/cpu-detection.rs
new file mode 100644
index 0000000..2f7af1f
--- /dev/null
+++ b/coresimd/tests/cpu-detection.rs
@@ -0,0 +1,47 @@
+#![feature(cfg_target_feature)]
+#![cfg_attr(feature = "strict", deny(warnings))]
+#![cfg_attr(feature = "cargo-clippy",
+ allow(option_unwrap_used, print_stdout, use_debug))]
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[macro_use]
+extern crate coresimd;
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn x86_all() {
+ println!("sse: {:?}", cfg_feature_enabled!("sse"));
+ println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
+ println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
+ println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
+ println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
+ println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
+ println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
+ println!("avx: {:?}", cfg_feature_enabled!("avx"));
+ println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
+ println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
+ println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
+ println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
+ println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
+ println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
+ println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
+ println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
+ println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
+ println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
+ println!(
+ "avx512_vpopcntdq {:?}",
+ cfg_feature_enabled!("avx512vpopcntdq")
+ );
+ println!("fma: {:?}", cfg_feature_enabled!("fma"));
+ println!("abm: {:?}", cfg_feature_enabled!("abm"));
+ println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
+ println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
+ println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
+ println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
+ println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
+ println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
+ println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
+ println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
+ println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
+ println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
+}
diff --git a/src/lib.rs b/src/lib.rs
index 8a5a83f..277ad2a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -123,25 +123,28 @@
//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367
//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839
-#![feature(macro_reexport, const_fn, const_atomic_usize_new)]
-
-/// We re-export run-time feature detection for those architectures that have
-/// suport for it in `core`:
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[macro_reexport(cfg_feature_enabled, __unstable_detect_feature)]
+#![feature(const_fn, const_size_of, use_extern_macros, cfg_target_feature)]
+#![cfg_attr(target_os = "linux", feature(linkage))]
extern crate coresimd;
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-extern crate coresimd;
+/// Re-export run-time feature detection macros.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "arm",
+ target_arch = "aarch64", target_arch = "powerpc64"))]
+pub use coresimd::__unstable_detect_feature;
/// Platform dependent vendor intrinsics.
pub mod vendor {
pub use coresimd::vendor::*;
+}
- #[cfg(all(target_os = "linux",
- any(target_arch = "arm", target_arch = "aarch64",
- target_arch = "powerpc64")))]
- pub use super::runtime::{__unstable_detect_feature, __Feature};
+/// Run-time feature detection.
+#[doc(hidden)]
+pub mod __vendor_runtime {
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64",
+ all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64"))))]
+ pub use runtime::std::*;
}
/// Platform independent SIMD vector types and operations.
@@ -149,8 +152,25 @@
pub use coresimd::simd::*;
}
-#[cfg(all(target_os = "linux",
- any(target_arch = "arm", target_arch = "aarch64",
- target_arch = "powerpc64")))]
+/// The `stdsimd` run-time.
#[macro_use]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64",
+ all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64"))))]
mod runtime;
+
+/// Error gracefully in architectures without run-time detection support.
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
+ all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64")))))]
+#[doc(hidden)]
+#[macro_export]
+macro_rules! cfg_feature_enabled {
+ ($name:tt) => (
+ {
+ compile_error!("cfg_target_feature! is not supported in this architecture")
+ }
+ )
+}
diff --git a/src/runtime/aarch64.rs b/src/runtime/aarch64.rs
deleted file mode 100644
index 273c314..0000000
--- a/src/runtime/aarch64.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-//! Run-time feature detection on ARM Aarch64.
-use super::{bit, linux};
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
- ("neon") => {
- // FIXME: this should be removed once we rename Aarch64 neon to asimd
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{})
- };
- ("asimd") => {
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::asimd{})
- };
- ("pmull") => {
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{})
- };
- ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
-}
-
-/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
- /// ARM Advanced SIMD (ASIMD) - Aarch64
- asimd,
- /// Polynomial Multiply
- pmull,
-}
-
-pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
- let mut value: usize = 0;
- {
- let mut enable_feature = |f| {
- if x.has_feature(&f) {
- value = bit::set(value, f as u32);
- }
- };
- enable_feature(__Feature::asimd);
- enable_feature(__Feature::pmull);
- }
- value
-}
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
-///
-/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
-impl linux::FeatureQuery for linux::AuxVec {
- fn has_feature(&mut self, x: &__Feature) -> bool {
- use self::__Feature::*;
- if let Some(caps) = self.lookup(linux::AT::HWCAP) {
- match *x {
- asimd => caps & (1 << 1) != 0,
- pmull => caps & (1 << 4) != 0,
- }
- } else {
- false
- }
- }
-}
-
-impl linux::FeatureQuery for linux::CpuInfo {
- fn has_feature(&mut self, x: &__Feature) -> bool {
- use self::__Feature::*;
- match *x {
- asimd => self.field("Features").has("asimd"),
- pmull => self.field("Features").has("pmull"),
- }
- }
-}
diff --git a/src/runtime/arm.rs b/src/runtime/arm.rs
deleted file mode 100644
index f9a71a0..0000000
--- a/src/runtime/arm.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-//! Run-time feature detection on ARM Aarch32.
-
-use super::{bit, linux};
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
- ("neon") => {
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::neon{})
- };
- ("pmull") => {
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::pmull{})
- };
- ($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
-}
-
-/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
-/// particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
- /// ARM Advanced SIMD (NEON) - Aarch32
- neon,
- /// Polynomial Multiply
- pmull,
-}
-
-pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
- let mut value: usize = 0;
- {
- let mut enable_feature = |f| {
- if x.has_feature(&f) {
- value = bit::set(value, f as u32);
- }
- };
- enable_feature(__Feature::neon);
- enable_feature(__Feature::pmull);
- }
- value
-}
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
-///
-/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
-impl linux::FeatureQuery for linux::AuxVec {
- fn has_feature(&mut self, x: &__Feature) -> bool {
- use self::__Feature::*;
- match *x {
- neon => self.lookup(linux::AT::HWCAP)
- .map(|caps| caps & (1 << 12) != 0)
- .unwrap_or(false),
- pmull => self.lookup(linux::AT::HWCAP2)
- .map(|caps| caps & (1 << 1) != 0)
- .unwrap_or(false),
- }
- }
-}
-
-/// Is the CPU known to have a broken NEON unit?
-///
-/// See https://crbug.com/341598.
-fn has_broken_neon(cpuinfo: &linux::CpuInfo) -> bool {
- cpuinfo.field("CPU implementer") == "0x51"
- && cpuinfo.field("CPU architecture") == "7"
- && cpuinfo.field("CPU variant") == "0x1"
- && cpuinfo.field("CPU part") == "0x04d"
- && cpuinfo.field("CPU revision") == "0"
-}
-
-impl linux::FeatureQuery for linux::CpuInfo {
- fn has_feature(&mut self, x: &__Feature) -> bool {
- use self::__Feature::*;
- match *x {
- neon => {
- self.field("Features").has("neon") && !has_broken_neon(self)
- }
- pmull => self.field("Features").has("pmull"),
- }
- }
-}
diff --git a/src/runtime/bit.rs b/src/runtime/bit.rs
deleted file mode 100644
index 42483e5..0000000
--- a/src/runtime/bit.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-//! Bit manipulation utilities
-
-/// Sets the `bit` of `x`.
-pub const fn set(x: usize, bit: u32) -> usize {
- x | 1 << bit
-}
-
-/// Tests the `bit` of `x`.
-pub const fn test(x: usize, bit: u32) -> bool {
- x & (1 << bit) != 0
-}
diff --git a/src/runtime/cache.rs b/src/runtime/cache.rs
deleted file mode 100644
index 6aab8ad..0000000
--- a/src/runtime/cache.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-//! Cache of run-time feature detection
-
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::usize;
-
-use super::bit;
-
-/// This global variable is a bitset used to cache the features supported by
-/// the
-/// CPU.
-static CACHE: AtomicUsize = AtomicUsize::new(usize::MAX);
-
-/// Test the `bit` of the storage. If the storage has not been initialized,
-/// initializes it with the result of `f()`.
-///
-/// On its first invocation, it detects the CPU features and caches them in the
-/// `FEATURES` global variable as an `AtomicUsize`.
-///
-/// It uses the `__Feature` variant to index into this variable as a bitset. If
-/// the bit is set, the feature is enabled, and otherwise it is disabled.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-pub fn test<F>(bit: u32, f: F) -> bool
-where
- F: FnOnce() -> usize,
-{
- if CACHE.load(Ordering::Relaxed) == usize::MAX {
- CACHE.store(f(), Ordering::Relaxed);
- }
- bit::test(CACHE.load(Ordering::Relaxed), bit)
-}
diff --git a/src/runtime/linux/aarch64.rs b/src/runtime/linux/aarch64.rs
new file mode 100644
index 0000000..7c1399e
--- /dev/null
+++ b/src/runtime/linux/aarch64.rs
@@ -0,0 +1,14 @@
+//! Run-time feature detection for ARM Aarch64 on Linux in `stdsimd`.
+
+use super::cpuinfo::CpuInfo;
+use coresimd::__vendor_runtime::__runtime::arch::{HasFeature, __Feature};
+
+impl HasFeature for CpuInfo {
+ fn has_feature(&mut self, x: &__Feature) -> bool {
+ use self::__Feature::*;
+ match *x {
+ asimd => self.field("Features").has("asimd"),
+ pmull => self.field("Features").has("pmull"),
+ }
+ }
+}
diff --git a/src/runtime/linux/arm.rs b/src/runtime/linux/arm.rs
new file mode 100644
index 0000000..7023112
--- /dev/null
+++ b/src/runtime/linux/arm.rs
@@ -0,0 +1,27 @@
+//! Run-time feature detection for ARM Aarch32 on Linux in `stdsimd`.
+
+use super::cpuinfo::CpuInfo;
+use coresimd::__vendor_runtime::__runtime::arch::{HasFeature, __Feature};
+
+/// Is the CPU known to have a broken NEON unit?
+///
+/// See https://crbug.com/341598.
+fn has_broken_neon(cpuinfo: &CpuInfo) -> bool {
+ cpuinfo.field("CPU implementer") == "0x51"
+ && cpuinfo.field("CPU architecture") == "7"
+ && cpuinfo.field("CPU variant") == "0x1"
+ && cpuinfo.field("CPU part") == "0x04d"
+ && cpuinfo.field("CPU revision") == "0"
+}
+
+impl HasFeature for CpuInfo {
+ fn has_feature(&mut self, x: &__Feature) -> bool {
+ use self::__Feature::*;
+ match *x {
+ neon => {
+ self.field("Features").has("neon") && !has_broken_neon(self)
+ }
+ pmull => self.field("Features").has("pmull"),
+ }
+ }
+}
diff --git a/src/runtime/linux/auxv/libc.rs b/src/runtime/linux/auxv/libc.rs
new file mode 100644
index 0000000..b1dc724
--- /dev/null
+++ b/src/runtime/linux/auxv/libc.rs
@@ -0,0 +1,104 @@
+//! Reads the ELF Auxiliary Vector using libc's `getauxval`.
+
+use coresimd::__vendor_runtime::__runtime::linux::auxv;
+use self::auxv::{AuxVec, AT_HWCAP};
+use std::mem;
+
+mod ffi {
+ pub type F = unsafe extern "C" fn(usize) -> usize;
+ #[allow(improper_ctypes)]
+ extern "C" {
+ #[linkage = "extern_weak"]
+ pub static getauxval: *const ();
+ }
+}
+
+/// Returns the value of the ELF Auxiliary Vector associated with `key`.
+///
+/// This only fails if the `getauxval` function is not linked.
+///
+/// The errno value is not checked, but if the key is not found
+/// `getauxval` returns zero, and in that case, independently of
+/// whether the key was found or no features were detected the
+/// `auxv` function below returns `Err` to allow more
+/// accurate run-time feature detection to run afterwards.
+fn getauxval(key: usize) -> Result<usize, ()> {
+ unsafe {
+ if ffi::getauxval.is_null() {
+ return Err(());
+ }
+
+ let ffi_getauxval: ffi::F = mem::transmute(ffi::getauxval);
+ Ok(ffi_getauxval(key))
+ }
+}
+
+/// Computes the entries of the Auxiliary Vector cache by
+/// calling libc's `getauxval(3)`.
+pub fn auxv() -> Result<AuxVec, ()> {
+ if let Ok(hwcap) = getauxval(AT_HWCAP) {
+ #[cfg(target_arch = "aarch64")]
+ {
+ if hwcap != 0 {
+ return Ok(AuxVec { hwcap });
+ }
+ }
+ #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+ {
+ if let Ok(hwcap2) = getauxval(auxv::AT_HWCAP2) {
+ if hwcap != 0 && hwcap2 != 0 {
+ return Ok(AuxVec { hwcap, hwcap2 });
+ }
+ }
+ }
+ }
+ Err(())
+}
+
+#[cfg(test)]
+mod tests {
+ extern crate auxv as auxv_crate;
+ use super::*;
+
+ // Reads the Auxiliary Vector key from getauxval()
+ // using the auxv crate.
+ fn auxv_crate_get(key: usize) -> Option<usize> {
+ use self::auxv_crate::AuxvType;
+ use self::auxv_crate::getauxval::Getauxval;
+ let q = auxv_crate::getauxval::NativeGetauxval {};
+ match q.getauxval(key as AuxvType) {
+ Ok(v) => Some(v as usize),
+ Err(_) => None,
+ }
+ }
+
+ #[test]
+ fn auxv_dump() {
+ if let Ok(auxvec) = auxv() {
+ println!("{:?}", auxvec);
+ } else {
+ println!("reading /proc/self/auxv failed!");
+ }
+ }
+
+ #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+ #[test]
+ fn auxv_crate() {
+ let v = auxv();
+ if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+ assert_eq!(v.unwrap().hwcap, hwcap);
+ }
+ if let Some(hwcap2) = auxv_crate_get(auxv::AT_HWCAP2) {
+ assert_eq!(v.unwrap().hwcap2, hwcap2);
+ }
+ }
+
+ #[cfg(target_arch = "aarch64")]
+ #[test]
+ fn auxv_crate() {
+ let v = auxv();
+ if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+ assert_eq!(v.unwrap().hwcap, hwcap);
+ }
+ }
+}
diff --git a/src/runtime/linux/auxv/mod.rs b/src/runtime/linux/auxv/mod.rs
new file mode 100644
index 0000000..554c963
--- /dev/null
+++ b/src/runtime/linux/auxv/mod.rs
@@ -0,0 +1,4 @@
+//! Reads ELF Auxiliary Vector
+
+pub mod libc;
+pub mod proc_self;
diff --git a/src/runtime/linux/auxv/proc_self.rs b/src/runtime/linux/auxv/proc_self.rs
new file mode 100644
index 0000000..d341edb
--- /dev/null
+++ b/src/runtime/linux/auxv/proc_self.rs
@@ -0,0 +1,143 @@
+//! Reads the ELF Auxiliary Vector from `/proc/self/auxv`.
+
+use coresimd::__vendor_runtime::__runtime::linux::auxv;
+use self::auxv::{AuxVec, AT_HWCAP};
+
+use std::mem;
+
+/// Tries to read the ELF Auxiliary Vector from `/proc/self/auxv`.
+///
+/// Errors if the file cannot be read. If a component of the auxvector
+/// cannot be read, all the bits in its bitset are set to zero.
+pub fn auxv() -> Result<AuxVec, ()> {
+ auxv_from_file("/proc/self/auxv")
+}
+
+fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
+ use std::io::Read;
+ let mut file = ::std::fs::File::open(file).or_else(|_| Err(()))?;
+
+ // See https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h
+ //
+ // The auxiliary vector contains at most 32 (key,value) fields: from
+ // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of
+ // 2*32 `usize` elements is enough to read the whole vector.
+ let mut buf = [0usize; 64];
+ {
+ let raw: &mut [u8; 64 * mem::size_of::<usize>()] =
+ unsafe { mem::transmute(&mut buf) };
+ file.read(raw).or_else(|_| Err(()))?;
+ }
+ auxv_from_buf(&buf)
+}
+
+fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
+ #[cfg(target_arch = "aarch64")]
+ {
+ for el in buf.chunks(2) {
+ match el[0] {
+ AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }),
+ _ => (),
+ }
+ }
+ }
+
+ #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+ {
+ let mut hwcap = None;
+ let mut hwcap2 = None;
+ for el in buf.chunks(2) {
+ match el[0] {
+ AT_HWCAP => hwcap = Some(el[1]),
+ auxv::AT_HWCAP2 => hwcap2 = Some(el[1]),
+ _ => (),
+ }
+ }
+ if hwcap.is_some() && hwcap2.is_some() {
+ return Ok(AuxVec {
+ hwcap: hwcap.unwrap(),
+ hwcap2: hwcap2.unwrap(),
+ });
+ }
+ }
+ Err(())
+}
+
+#[cfg(test)]
+mod tests {
+ extern crate auxv as auxv_crate;
+ use super::*;
+
+ // Reads the Auxiliary Vector key from /proc/self/auxv
+ // using the auxv crate.
+ fn auxv_crate_get(key: usize) -> Option<usize> {
+ use self::auxv_crate::AuxvType;
+ use self::auxv_crate::procfs::search_procfs_auxv;
+ let k = key as AuxvType;
+ match search_procfs_auxv(&[k]) {
+ Ok(v) => Some(v[&k] as usize),
+ Err(_) => None,
+ }
+ }
+
+ #[test]
+ fn auxv_dump() {
+ if let Ok(auxvec) = auxv() {
+ println!("{:?}", auxvec);
+ } else {
+ println!("reading /proc/self/auxv failed!");
+ }
+ }
+
+ #[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
+ #[test]
+ fn auxv_crate() {
+ let v = auxv();
+ if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+ assert_eq!(v.unwrap().hwcap, hwcap);
+ }
+ if let Some(hwcap2) = auxv_crate_get(auxv::AT_HWCAP2) {
+ assert_eq!(v.unwrap().hwcap2, hwcap2);
+ }
+ }
+
+ #[cfg(target_arch = "aarch64")]
+ #[test]
+ fn auxv_crate() {
+ let v = auxv();
+ if let Some(hwcap) = auxv_crate_get(AT_HWCAP) {
+ assert_eq!(v.unwrap().hwcap, hwcap);
+ }
+ }
+
+ #[cfg(all(target_arch = "arm", target_pointer_width = "32"))]
+ #[test]
+ fn linux_rpi3() {
+ let v = auxv_from_file("src/runtime/linux/test_data/linux-rpi3.auxv")
+ .unwrap();
+ assert_eq!(v.hwcap, 4174038);
+ assert_eq!(v.hwcap2, 16);
+ }
+
+ #[cfg(all(target_arch = "arm", target_pointer_width = "32"))]
+ #[test]
+ #[should_panic]
+ fn linux_macos_vb() {
+ let _ = auxv_from_file(
+ "src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"
+ ).unwrap();
+ // this file is incomplete (contains hwcap but not hwcap2), we
+ // want to fall back to /proc/cpuinfo in this case, so
+ // reading should fail. assert_eq!(v.hwcap, 126614527);
+ // assert_eq!(v.hwcap2, 0);
+ }
+
+ #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))]
+ #[test]
+ fn linux_x64() {
+ let v = auxv_from_file(
+ "src/runtime/linux/test_data/linux-x64-i7-6850k.auxv",
+ ).unwrap();
+ assert_eq!(v.hwcap, 3219913727);
+ }
+}
diff --git a/src/runtime/linux/auxvec.rs b/src/runtime/linux/auxvec.rs
deleted file mode 100644
index 9f8d7f3..0000000
--- a/src/runtime/linux/auxvec.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-//! Reads /proc/self/auxv on Linux systems
-
-use std::prelude::v1::*;
-use std::slice;
-use std::mem;
-
-/// Simple abstraction for the ELF Auxiliary Vector
-///
-/// the elf.h provide the layout of the single entry as auxv_t.
-/// The desugared version is a usize tag followed by a union with
-/// the same storage size.
-///
-/// Cache only the HWCAP and HWCAP2 entries.
-#[derive(Debug)]
-pub struct AuxVec {
- hwcap: Option<usize>,
- hwcap2: Option<usize>,
-}
-
-#[derive(Clone, Debug, PartialEq)]
-#[allow(dead_code)]
-/// ELF Auxiliary vector entry types
-///
-/// The entry types are specified in [linux/auxvec.h][auxvec_h].
-///
-/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
-pub enum AT {
- /// CPU Hardware capabilities, it is a bitfield.
- HWCAP = 16,
- /// CPU Hardware capabilities, additional bitfield.
- HWCAP2 = 26,
-}
-
-impl AuxVec {
- /// Reads the ELF Auxiliary Vector
- ///
- /// Try to read `/proc/self/auxv`.
- // TODO: Make use of getauxval once it is available in a
- // reliable way.
- pub fn new() -> Result<Self, ::std::io::Error> {
- use std::io::Read;
- let mut file = ::std::fs::File::open("/proc/self/auxv")?;
- let mut buf = [0usize; 64];
- let mut raw = unsafe {
- slice::from_raw_parts_mut(
- buf.as_mut_ptr() as *mut u8,
- buf.len() * mem::size_of::<usize>(),
- )
- };
-
- let _ = file.read(&mut raw)?;
-
- mem::forget(raw);
-
- let mut auxv = AuxVec {
- hwcap: None,
- hwcap2: None,
- };
-
- for el in buf.chunks(2) {
- if el[0] == AT::HWCAP as usize {
- auxv.hwcap = Some(el[1]);
- }
- if el[0] == AT::HWCAP2 as usize {
- auxv.hwcap2 = Some(el[1]);
- }
- }
-
- Ok(auxv)
- }
-
- /// Returns the value for the AT key
- pub fn lookup(&self, key: AT) -> Option<usize> {
- match key {
- AT::HWCAP => self.hwcap,
- AT::HWCAP2 => self.hwcap2,
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[cfg(target_os = "linux")]
- #[test]
- fn test_auxvec_linux() {
- let auxvec = AuxVec::new().unwrap();
- println!("{:?}", auxvec.lookup(AT::HWCAP));
- println!("{:?}", auxvec);
- }
-}
diff --git a/src/runtime/linux/cpuinfo.rs b/src/runtime/linux/cpuinfo.rs
index fac1032..5c86122 100644
--- a/src/runtime/linux/cpuinfo.rs
+++ b/src/runtime/linux/cpuinfo.rs
@@ -88,7 +88,7 @@
#[cfg(target_os = "linux")]
#[test]
- fn test_cpuinfo_linux() {
+ fn raw_dump() {
let cpuinfo = CpuInfo::new().unwrap();
if cpuinfo.field("vendor_id") == "GenuineIntel" {
assert!(cpuinfo.field("flags").exists());
@@ -131,7 +131,7 @@
";
#[test]
- fn test_cpuinfo_linux_core_duo_t6500() {
+ fn core_duo_t6500() {
let cpuinfo = CpuInfo::from_str(CORE_DUO_T6500).unwrap();
assert_eq!(cpuinfo.field("vendor_id"), "GenuineIntel");
assert_eq!(cpuinfo.field("cpu family"), "6");
@@ -171,7 +171,7 @@
";
#[test]
- fn test_cpuinfo_linux_arm_cortex_a53() {
+ fn arm_cortex_a53() {
let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A53).unwrap();
assert_eq!(
cpuinfo.field("Processor"),
@@ -199,7 +199,7 @@
CPU revision : 1";
#[test]
- fn test_cpuinfo_linux_arm_cortex_a57() {
+ fn arm_cortex_a57() {
let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A57).unwrap();
assert_eq!(
cpuinfo.field("Processor"),
@@ -240,7 +240,7 @@
machine : CHRP IBM pSeries (emulated by qemu)";
#[test]
- fn test_cpuinfo_linux_power8_powerkvm() {
+ fn power8_powerkvm() {
let cpuinfo = CpuInfo::from_str(POWER8E_POWERKVM).unwrap();
assert_eq!(cpuinfo.field("cpu"), "POWER8E (raw), altivec supported");
@@ -292,7 +292,7 @@
machine : CHRP IBM,9133-55A";
#[test]
- fn test_cpuinfo_linux_power5p() {
+ fn power5p() {
let cpuinfo = CpuInfo::from_str(POWER5P).unwrap();
assert_eq!(cpuinfo.field("cpu"), "POWER5+ (gs)");
diff --git a/src/runtime/linux/mod.rs b/src/runtime/linux/mod.rs
index de7ebfb..9ff760b 100644
--- a/src/runtime/linux/mod.rs
+++ b/src/runtime/linux/mod.rs
@@ -1,41 +1,40 @@
-//! Run-time feature detection for ARM on linux
+//! Run-time feature detection for ARM and PowerPC64 on Linux.
+
+use coresimd::__vendor_runtime::__runtime::cache;
+use coresimd::__vendor_runtime::__runtime::arch;
+pub use self::arch::__Feature;
+
+#[cfg(target_arch = "arm")]
+mod arm;
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64;
+
+#[cfg(target_arch = "powerpc64")]
+mod powerpc64;
+
+mod auxv;
mod cpuinfo;
-pub use self::cpuinfo::CpuInfo;
-
-mod auxvec;
-pub use self::auxvec::*;
-
-use super::__Feature;
-
-pub trait FeatureQuery {
- fn has_feature(&mut self, x: &__Feature) -> bool;
-}
-
-fn detect_features_impl<T: FeatureQuery>(x: T) -> usize {
- #[cfg(target_arch = "arm")]
- {
- super::arm::detect_features(x)
- }
- #[cfg(target_arch = "aarch64")]
- {
- super::aarch64::detect_features(x)
- }
- #[cfg(target_arch = "powerpc64")]
- {
- super::powerpc64::detect_features(x)
- }
-}
/// Detects CPU features:
pub fn detect_features() -> usize {
- // Try to read the ELF Auxiliary Vector
- if let Ok(v) = auxvec::AuxVec::new() {
- return detect_features_impl(v);
+ // Try to read the ELF Auxiliary Vector using libc's getauxval:
+ if let Ok(v) = auxv::libc::auxv() {
+ return arch::detect_features(v);
}
- // Try to read /proc/cpuinfo
+ // Try to read the ELF Auxiliary Vector from /proc/self/auxv:
+ if let Ok(v) = auxv::proc_self::auxv() {
+ return arch::detect_features(v);
+ }
+ // Try to read /proc/cpuinfo:
if let Ok(v) = cpuinfo::CpuInfo::new() {
- return detect_features_impl(v);
+ return arch::detect_features(v);
}
// Otherwise all features are disabled
0
}
+
+/// Performs run-time feature detection.
+pub fn __unstable_detect_feature(x: __Feature) -> bool {
+ cache::test(x as u32, detect_features)
+}
diff --git a/src/runtime/linux/powerpc64.rs b/src/runtime/linux/powerpc64.rs
new file mode 100644
index 0000000..ace3ae4
--- /dev/null
+++ b/src/runtime/linux/powerpc64.rs
@@ -0,0 +1,18 @@
+//! Run-time feature detection for PowerPC64 on Linux in `stdsimd`.
+
+use super::cpuinfo::CpuInfo;
+use coresimd::__vendor_runtime::__runtime::arch::{HasFeature, __Feature};
+
+/// Check for altivec support only
+///
+/// PowerPC's /proc/cpuinfo lacks a proper Feature field,
+/// but `altivec` support is indicated in the `cpu` field.
+impl HasFeature for CpuInfo {
+ fn has_feature(&mut self, x: &__Feature) -> bool {
+ use self::__Feature::*;
+ match *x {
+ altivec => self.field("cpu").has("altivec"),
+ _ => false,
+ }
+ }
+}
diff --git a/src/runtime/linux/test_data/linux-rpi3.auxv b/src/runtime/linux/test_data/linux-rpi3.auxv
new file mode 100644
index 0000000..0538e66
--- /dev/null
+++ b/src/runtime/linux/test_data/linux-rpi3.auxv
Binary files differ
diff --git a/src/runtime/linux/test_data/linux-x64-i7-6850k.auxv b/src/runtime/linux/test_data/linux-x64-i7-6850k.auxv
new file mode 100644
index 0000000..6afe1b3
--- /dev/null
+++ b/src/runtime/linux/test_data/linux-x64-i7-6850k.auxv
Binary files differ
diff --git a/src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv
new file mode 100644
index 0000000..75abc02
--- /dev/null
+++ b/src/runtime/linux/test_data/macos-virtualbox-linux-x86-4850HQ.auxv
Binary files differ
diff --git a/src/runtime/macros.rs b/src/runtime/macros.rs
index e8278bb..de24246 100644
--- a/src/runtime/macros.rs
+++ b/src/runtime/macros.rs
@@ -2,8 +2,9 @@
/// Is a feature supported by the host CPU?
///
-/// This macro performs run-time feature detection. It returns true if the host
-/// CPU in which the binary is running on supports a particular feature.
+/// This macro performs run-time feature detection in `stdsimd`. It returns
+/// true if the host CPU in which the binary is running on supports a
+/// particular feature.
#[macro_export]
macro_rules! cfg_feature_enabled {
($name:tt) => (
@@ -14,26 +15,9 @@
}
#[cfg(not(target_feature = $name))]
{
- __unstable_detect_feature!($name)
+ __unstable_detect_feature!($name,
+ $crate::__vendor_runtime::__unstable_detect_feature)
}
}
)
}
-
-/// In all unsupported architectures using the macro is an error
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
- target_arch = "arm", target_arch = "aarch64")))]
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
- ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
-}
-
-#[cfg(test)]
-mod tests {
- #[cfg(target_arch = "x86_64")]
- #[test]
- fn test_macros() {
- assert!(cfg_feature_enabled!("sse"));
- }
-}
diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs
index 9a749a8..169bf42 100644
--- a/src/runtime/mod.rs
+++ b/src/runtime/mod.rs
@@ -1,40 +1,69 @@
//! Run-time feature detection
-mod cache;
-mod bit;
-
-#[macro_use]
-mod macros;
-
-#[cfg(all(target_arch = "arm", target_os = "linux"))]
-#[macro_use]
-mod arm;
-#[cfg(all(target_arch = "arm", target_os = "linux"))]
-pub use self::arm::__Feature;
-
-#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
-#[macro_use]
-mod aarch64;
-#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
-pub use self::aarch64::__Feature;
-
-#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
-#[macro_use]
-mod powerpc64;
-#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
-pub use self::powerpc64::__Feature;
#[cfg(all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64")))]
mod linux;
-#[cfg(all(target_os = "linux",
- any(target_arch = "arm", target_arch = "aarch64",
- target_arch = "powerpc64")))]
-pub use self::linux::detect_features;
+#[macro_use]
+mod macros;
-/// Performs run-time feature detection.
-#[doc(hidden)]
-pub fn __unstable_detect_feature(x: __Feature) -> bool {
- cache::test(x as u32, detect_features)
+/// Run-time feature detection exposed by `stdsimd`.
+pub mod std {
+ // The x86/x86_64 run-time from `coresimd` is re-exported as is.
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ pub use coresimd::__vendor_runtime::*;
+
+ #[cfg(all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64")))]
+ pub use super::linux::{detect_features, __Feature};
+
+ /// Performs run-time feature detection.
+ ///
+ /// For those platforms in which run-time detection differs between `core`
+ /// and `std`.
+ #[cfg(all(target_os = "linux",
+ any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64")))]
+ #[doc(hidden)]
+ pub fn __unstable_detect_feature(x: __Feature) -> bool {
+ ::coresimd::__vendor_runtime::__runtime::cache::test(
+ x as u32,
+ detect_features,
+ )
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::super::*;
+
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ #[test]
+ fn detect_feature() {
+ println!("avx {}", cfg_feature_enabled!("avx"));
+ }
+
+ #[cfg(all(target_arch = "arm", target_os = "linux"))]
+ #[test]
+ fn detect_feature() {
+ println!("neon {}", cfg_feature_enabled!("neon"));
+ println!("pmull {}", cfg_feature_enabled!("pmull"));
+ }
+
+ #[cfg(all(target_arch = "aarch64", target_os = "linux"))]
+ #[test]
+ fn detect_feature() {
+ println!("asimd {}", cfg_feature_enabled!("asimd"));
+ println!("pmull {}", cfg_feature_enabled!("pmull"));
+ }
+
+ #[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
+ #[test]
+ fn detect_feature() {
+ println!("altivec {}", cfg_feature_enabled!("altivec"));
+ println!("vsx {}", cfg_feature_enabled!("vsx"));
+ println!("power8 {}", cfg_feature_enabled!("power8"));
+ }
}
diff --git a/src/runtime/powerpc64.rs b/src/runtime/powerpc64.rs
deleted file mode 100644
index df59855..0000000
--- a/src/runtime/powerpc64.rs
+++ /dev/null
@@ -1,94 +0,0 @@
-//! Run-time feature detection on PowerPC64.
-use super::{bit, linux};
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
- ("altivec") => {
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::altivec{})
- };
- ("vsx") => {
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::vsx{})
- };
- ("power8") => {
- $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::power8{})
- };
- ($t:tt) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
-}
-
-/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
- /// Altivec
- altivec,
- /// VSX
- vsx,
- /// Power8
- power8,
-}
-
-pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
- let mut value: usize = 0;
- {
- let mut enable_feature = |f| {
- if x.has_feature(&f) {
- value = bit::set(value, f as u32);
- }
- };
- enable_feature(__Feature::altivec);
- enable_feature(__Feature::vsx);
- enable_feature(__Feature::power8);
- }
- value
-}
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/cputable.h][cputable]
-///
-/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
-impl linux::FeatureQuery for linux::AuxVec {
- fn has_feature(&mut self, x: &__Feature) -> bool {
- use self::__Feature::*;
- match *x {
- altivec => self.lookup(linux::AT::HWCAP)
- .map(|caps| caps & 0x10000000 != 0)
- .unwrap_or(false),
- vsx => self.lookup(linux::AT::HWCAP)
- .map(|caps| caps & 0x00000080 != 0)
- .unwrap_or(false),
- power8 => self.lookup(linux::AT::HWCAP2)
- .map(|caps| caps & 0x80000000 != 0)
- .unwrap_or(false),
- }
- }
-}
-
-/// Check for altivec support only
-///
-/// PowerPC's /proc/cpuinfo lacks a proper Feature field,
-/// but `altivec` support is indicated in the `cpu` field.
-impl linux::FeatureQuery for linux::CpuInfo {
- fn has_feature(&mut self, x: &__Feature) -> bool {
- use self::__Feature::*;
- match *x {
- altivec => self.field("cpu").has("altivec"),
- _ => false,
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- #[test]
- fn detect_feature() {
- println!("altivec {}", __unstable_detect_feature!("altivec"));
- println!("vsx {}", __unstable_detect_feature!("vsx"));
- println!("power8 {}", __unstable_detect_feature!("power8"));
- }
-}
diff --git a/tests/cpu-detection.rs b/tests/cpu-detection.rs
index b272b1e..3efb61a 100644
--- a/tests/cpu-detection.rs
+++ b/tests/cpu-detection.rs
@@ -2,7 +2,9 @@
#![cfg_attr(feature = "strict", deny(warnings))]
#![cfg_attr(feature = "cargo-clippy", allow(option_unwrap_used))]
-#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+#[cfg(any(target_arch = "arm", target_arch = "aarch64",
+ target_arch = "x86", target_arch = "x86_64",
+ target_arch = "powerpc64"))]
#[macro_use]
extern crate stdsimd;
@@ -20,3 +22,50 @@
println!("asimd: {}", cfg_feature_enabled!("asimd"));
println!("pmull: {}", cfg_feature_enabled!("pmull"));
}
+
+#[test]
+#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
+fn powerpc64_linux() {
+ println!("altivec: {}", cfg_feature_enabled!("altivec"));
+ println!("vsx: {}", cfg_feature_enabled!("vsx"));
+ println!("power8: {}", cfg_feature_enabled!("power8"));
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn x86_all() {
+ println!("sse: {:?}", cfg_feature_enabled!("sse"));
+ println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
+ println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
+ println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
+ println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
+ println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
+ println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
+ println!("avx: {:?}", cfg_feature_enabled!("avx"));
+ println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
+ println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
+ println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
+ println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
+ println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
+ println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
+ println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
+ println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
+ println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
+ println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
+ println!(
+ "avx512_vpopcntdq {:?}",
+ cfg_feature_enabled!("avx512vpopcntdq")
+ );
+ println!("fma: {:?}", cfg_feature_enabled!("fma"));
+ println!("abm: {:?}", cfg_feature_enabled!("abm"));
+ println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
+ println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
+ println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
+ println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
+ println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
+ println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
+ println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
+ println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
+ println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
+ println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
+}