|
| 1 | +//! This module implements minimal run-time feature detection for x86. |
| 2 | +//! |
| 3 | +//! The features are detected using the `detect_features` function below. This function |
| 4 | +//! uses the CPUID instruction to read the feature flags from the CPU and encodes them in |
| 5 | +//! an `usize` where each bit position represents whether a feature is available (bit is set) |
| 6 | +//! or unavaiable (bit is cleared). |
| 7 | +//! |
| 8 | +//! The enum `__Feature` is used to map bit positions to feature names, and the |
| 9 | +//! the `__unstable_detect_feature!` macro is used to map string literals (e.g. |
| 10 | +//! "avx") to these bit positions (e.g. `__Feature::avx`). |
| 11 | +//! |
| 12 | +//! |
| 13 | +//! The run-time feature detection is performed by the |
| 14 | +//! `__unstable_detect_feature(__Feature) -> bool` function. On its first call, |
| 15 | +//! this functions queries the CPU for the available features and stores them in |
| 16 | +//! a global `AtomicUsize` variable. The query is performed by just checking whether the |
| 17 | +//! feature bit in this global variable is set or cleared. |
| 18 | +use ::std::sync::atomic::{AtomicUsize, Ordering}; |
| 19 | + |
| 20 | +/// This macro maps the string-literal feature names to values of the |
| 21 | +/// `__Feature` enum at compile-time. The feature names used are the same as |
| 22 | +/// those of rustc `target_feature` and `cfg_target_feature` features. |
| 23 | +/// |
| 24 | +/// PLESE: do not use this, it is an implementation detail subjected to change. |
| 25 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 26 | +#[macro_export] |
| 27 | +#[doc(hidden)] |
| 28 | +macro_rules! __unstable_detect_feature { |
| 29 | + ("sse") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse{}) }; |
| 30 | + ("sse2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse2{}) }; |
| 31 | + ("sse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse3{}) }; |
| 32 | + ("ssse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::ssse3{}) }; |
| 33 | + ("sse4.1") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_1{}) }; |
| 34 | + ("sse4.2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_2{}) }; |
| 35 | + ("avx") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx{}) }; |
| 36 | + ("avx2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx2{}) }; |
| 37 | + ("fma") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::fma{}) }; |
| 38 | + ("bmi") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi{}) }; |
| 39 | + ("bmi2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi2{}) }; |
| 40 | + ("abm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) }; |
| 41 | + ("lzcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) }; |
| 42 | + ("tbm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::tbm{}) }; |
| 43 | + ("popcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::popcnt{}) }; |
| 44 | + ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; |
| 45 | +} |
| 46 | + |
| 47 | +/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a |
| 48 | +/// particular feature. |
| 49 | +/// |
| 50 | +/// PLEASE: do not use this, it is an implementation detail subject to change. |
| 51 | +#[doc(hidden)] |
| 52 | +#[allow(non_camel_case_types)] |
| 53 | +#[repr(u8)] |
| 54 | +pub enum __Feature { |
| 55 | + /// SSE (Streaming SIMD Extensions) |
| 56 | + sse, |
| 57 | + /// SSE2 (Streaming SIMD Extensions 2) |
| 58 | + sse2, |
| 59 | + /// SSE3 (Streaming SIMD Extensions 3) |
| 60 | + sse3, |
| 61 | + /// SSSE3 (Supplemental Streaming SIMD Extensions 3) |
| 62 | + ssse3, |
| 63 | + /// SSE4.1 (Streaming SIMD Extensions 4.1) |
| 64 | + sse4_1, |
| 65 | + /// SSE4.2 (Streaming SIMD Extensions 4.2) |
| 66 | + sse4_2, |
| 67 | + /// AVX (Advanced Vector Extensions) |
| 68 | + avx, |
| 69 | + /// AVX2 (Advanced Vector Extensions 2) |
| 70 | + avx2, |
| 71 | + /// FMA (Fused Multiply Add) |
| 72 | + fma, |
| 73 | + /// BMI1 (Bit Manipulation Instructions 1) |
| 74 | + bmi, |
| 75 | + /// BMI1 (Bit Manipulation Instructions 2) |
| 76 | + bmi2, |
| 77 | + /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero Count) on Intel |
| 78 | + abm, |
| 79 | + /// TBM (Trailing Bit Manipulation) |
| 80 | + tbm, |
| 81 | + /// POPCNT (Population Count) |
| 82 | + popcnt, |
| 83 | + |
| 84 | + #[doc(hidden)] |
| 85 | + __NonExhaustive |
| 86 | +} |
| 87 | + |
| 88 | +fn set_bit(x: usize, bit: u32) -> usize { |
| 89 | + debug_assert!(32 > bit); |
| 90 | + x | 1 << bit |
| 91 | +} |
| 92 | + |
| 93 | +fn test_bit(x: usize, bit: u32) -> bool { |
| 94 | + debug_assert!(32 > bit); |
| 95 | + x & (1 << bit) != 0 |
| 96 | +} |
| 97 | + |
| 98 | +fn inv_test_bit(v: usize, idx: u32) -> bool { |
| 99 | + debug_assert!(32 > idx); |
| 100 | + ((v >> idx) & 1) != 0 |
| 101 | +} |
| 102 | + |
| 103 | +/// Run-time feature detection on x86 works by using the CPUID instruction. |
| 104 | +/// |
| 105 | +/// The [CPUID Wikipedia page](https://en.wikipedia.org/wiki/CPUID) contains all |
| 106 | +/// the information about which flags to set to query which values, and in which |
| 107 | +/// registers these are reported. |
| 108 | +/// |
| 109 | +/// The definitive references are: |
| 110 | +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf). |
| 111 | +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf). |
| 112 | +/// |
| 113 | +fn detect_features() -> usize { |
| 114 | + let ebx; |
| 115 | + let ecx; |
| 116 | + let edx; |
| 117 | + |
| 118 | + unsafe { |
| 119 | + /// To obtain all feature flags we need two CPUID queries: |
| 120 | +
|
| 121 | + /// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits" |
| 122 | + /// This gives us most of the CPU features in ECX and EDX (see below), |
| 123 | + asm!("cpuid" |
| 124 | + : "={ecx}"(ecx), "={edx}"(edx) |
| 125 | + : "{eax}"(0x00000001u32), "{ecx}"(0 as u32) |
| 126 | + : :); |
| 127 | + |
| 128 | + /// 2. EAX=7, ECX=0: Queries "Extended Features" |
| 129 | + /// This gives us information about bmi,bmi2, and avx2 support (see below). |
| 130 | + asm!("cpuid" |
| 131 | + : "={ebx}"(ebx) |
| 132 | + : "{eax}"(0x00000007u32), "{ecx}"(0 as u32) |
| 133 | + : :); |
| 134 | + } |
| 135 | + |
| 136 | + let mut value: usize = 0; |
| 137 | + |
| 138 | + // CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX (unneeded): |
| 139 | + if inv_test_bit(ebx, 3) { value = set_bit(value, __Feature::bmi as u32); } |
| 140 | + if inv_test_bit(ebx, 5) { value = set_bit(value, __Feature::avx2 as u32); } |
| 141 | + if inv_test_bit(ebx, 8) { value = set_bit(value, __Feature::bmi2 as u32); } |
| 142 | + |
| 143 | + // CPUID call with EAX=1 => feature bits in ECX and EDX: |
| 144 | + if inv_test_bit(ecx, 0) { value = set_bit(value, __Feature::sse3 as u32); } |
| 145 | + if inv_test_bit(ecx, 5) { value = set_bit(value, __Feature::abm as u32); } |
| 146 | + if inv_test_bit(ecx, 9) { value = set_bit(value, __Feature::ssse3 as u32); } |
| 147 | + if inv_test_bit(ecx, 12) { value = set_bit(value, __Feature::fma as u32); } |
| 148 | + if inv_test_bit(ecx, 19) { value = set_bit(value, __Feature::sse4_1 as u32); } |
| 149 | + if inv_test_bit(ecx, 20) { value = set_bit(value, __Feature::sse4_2 as u32); } |
| 150 | + if inv_test_bit(ecx, 21) { value = set_bit(value, __Feature::tbm as u32); } |
| 151 | + if inv_test_bit(ecx, 23) { value = set_bit(value, __Feature::popcnt as u32); } |
| 152 | + if inv_test_bit(ecx, 28) { value = set_bit(value, __Feature::avx as u32); } |
| 153 | + |
| 154 | + if inv_test_bit(edx, 25) { value = set_bit(value, __Feature::sse as u32); } |
| 155 | + if inv_test_bit(edx, 26) { value = set_bit(value, __Feature::sse2 as u32); } |
| 156 | + |
| 157 | + value |
| 158 | +} |
| 159 | + |
| 160 | +/// This global variable is a bitset used to cache the features supported by the |
| 161 | +/// CPU. |
| 162 | +static FEATURES: AtomicUsize = AtomicUsize::new(::std::usize::MAX); |
| 163 | + |
| 164 | +/// Performs run-time feature detection. |
| 165 | +/// |
| 166 | +/// On its first invocation, it detects the CPU features and caches them in the |
| 167 | +/// `FEATURES` global variable as an `AtomicUsize`. |
| 168 | +/// |
| 169 | +/// It uses the `__Feature` variant to index into this variable as a bitset. If |
| 170 | +/// the bit is set, the feature is enabled, and otherwise it is disabled. |
| 171 | +/// |
| 172 | +/// PLEASE: do not use this, it is an implementation detail subject to change. |
| 173 | +#[doc(hidden)] |
| 174 | +pub fn __unstable_detect_feature(x: __Feature) -> bool { |
| 175 | + if FEATURES.load(Ordering::Relaxed) == ::std::usize::MAX { |
| 176 | + FEATURES.store(detect_features(), Ordering::Relaxed); |
| 177 | + } |
| 178 | + test_bit(FEATURES.load(Ordering::Relaxed), x as u32) |
| 179 | +} |
| 180 | + |
| 181 | +#[cfg(test)] |
| 182 | +mod tests { |
| 183 | + #[test] |
| 184 | + fn runtime_detection_x86_nocapture() { |
| 185 | + println!("sse: {:?}", cfg_feature_enabled!("sse")); |
| 186 | + println!("sse2: {:?}", cfg_feature_enabled!("sse2")); |
| 187 | + println!("sse3: {:?}", cfg_feature_enabled!("sse3")); |
| 188 | + println!("ssse3: {:?}", cfg_feature_enabled!("ssse3")); |
| 189 | + println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1")); |
| 190 | + println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2")); |
| 191 | + println!("avx: {:?}", cfg_feature_enabled!("avx")); |
| 192 | + println!("avx2: {:?}", cfg_feature_enabled!("avx2")); |
| 193 | + println!("abm: {:?}", cfg_feature_enabled!("abm")); |
| 194 | + println!("bmi: {:?}", cfg_feature_enabled!("bmi")); |
| 195 | + println!("bmi2: {:?}", cfg_feature_enabled!("bmi2")); |
| 196 | + println!("tbm: {:?}", cfg_feature_enabled!("tbm")); |
| 197 | + println!("popcnt: {:?}", cfg_feature_enabled!("popcnt")); |
| 198 | + println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt")); |
| 199 | + println!("fma: {:?}", cfg_feature_enabled!("fma")); |
| 200 | + } |
| 201 | +} |
0 commit comments