Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 18bff1d

Browse files
authoredSep 25, 2017
Merge pull request rust-lang#31 from gnzlbg/runtime_detection
[runtime] initial run-time feature detection support
2 parents 4c5378a + b8f8946 commit 18bff1d

File tree

5 files changed

+262
-2
lines changed

5 files changed

+262
-2
lines changed
 

‎Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ debug = true
1919
opt-level = 3
2020

2121
[dev-dependencies]
22-
assert-instr = { path = "assert-instr" }
22+
assert-instr = { path = "assert-instr" }

‎src/lib.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![allow(dead_code)]
22
#![feature(
33
const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi,
4-
target_feature, cfg_target_feature, i128_type
4+
target_feature, cfg_target_feature, i128_type, asm, const_atomic_usize_new
55
)]
66
#![cfg_attr(test, feature(proc_macro))]
77

@@ -32,7 +32,9 @@ mod v128;
3232
mod v256;
3333
mod v512;
3434
mod v64;
35+
3536
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
37+
#[macro_use]
3638
mod x86;
3739

3840
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]

‎src/macros.rs

+52
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,55 @@ macro_rules! define_casts {
267267
)+
268268
}
269269
}
270+
271+
/// Is a feature supported by the host CPU?
272+
///
273+
/// This macro performs run-time feature detection. It returns true if the host
274+
/// CPU in which the binary is running on supports a particular feature.
275+
#[macro_export]
276+
macro_rules! cfg_feature_enabled {
277+
($name:tt) => (
278+
{
279+
#[cfg(target_feature = $name)]
280+
{
281+
true
282+
}
283+
#[cfg(not(target_feature = $name))]
284+
{
285+
__unstable_detect_feature!($name)
286+
}
287+
}
288+
)
289+
}
290+
291+
/// On ARM features are only detected at compile-time using
292+
/// cfg(target_feature), so if this macro is executed the
293+
/// feature is not supported.
294+
#[cfg(any(target_arch = "arm",
295+
target_arch = "aarch64"))]
296+
#[macro_export]
297+
#[doc(hidden)]
298+
macro_rules! __unstable_detect_feature {
299+
("neon") => { false };
300+
($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
301+
}
302+
303+
/// In all unsupported architectures using the macro is an error
304+
#[cfg(not(any(target_arch = "x86",
305+
target_arch = "x86_64",
306+
target_arch = "arm",
307+
target_arch = "aarch64")))]
308+
#[macro_export]
309+
#[doc(hidden)]
310+
macro_rules! __unstable_detect_feature {
311+
($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
312+
}
313+
314+
#[cfg(test)]
315+
mod tests {
316+
#[cfg(target_arch = "x86_64")]
317+
#[test]
318+
fn test_macros() {
319+
assert!(cfg_feature_enabled!("sse"));
320+
}
321+
}

‎src/x86/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ pub use self::bmi::*;
1111
pub use self::bmi2::*;
1212
pub use self::tbm::*;
1313

14+
pub use self::runtime::{__Feature, __unstable_detect_feature};
15+
1416
#[allow(non_camel_case_types)]
1517
pub type __m128i = ::v128::i8x16;
1618
#[allow(non_camel_case_types)]
@@ -30,3 +32,6 @@ mod abm;
3032
mod bmi;
3133
mod bmi2;
3234
mod tbm;
35+
36+
#[macro_use]
37+
mod runtime;

‎src/x86/runtime.rs

+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
//! This module implements minimal run-time feature detection for x86.
2+
//!
3+
//! The features are detected using the `detect_features` function below. This function
4+
//! uses the CPUID instruction to read the feature flags from the CPU and encodes them in
5+
//! an `usize` where each bit position represents whether a feature is available (bit is set)
6+
//! or unavaiable (bit is cleared).
7+
//!
8+
//! The enum `__Feature` is used to map bit positions to feature names, and the
9+
//! the `__unstable_detect_feature!` macro is used to map string literals (e.g.
10+
//! "avx") to these bit positions (e.g. `__Feature::avx`).
11+
//!
12+
//!
13+
//! The run-time feature detection is performed by the
14+
//! `__unstable_detect_feature(__Feature) -> bool` function. On its first call,
15+
//! this functions queries the CPU for the available features and stores them in
16+
//! a global `AtomicUsize` variable. The query is performed by just checking whether the
17+
//! feature bit in this global variable is set or cleared.
18+
use ::std::sync::atomic::{AtomicUsize, Ordering};
19+
20+
/// This macro maps the string-literal feature names to values of the
21+
/// `__Feature` enum at compile-time. The feature names used are the same as
22+
/// those of rustc `target_feature` and `cfg_target_feature` features.
23+
///
24+
/// PLESE: do not use this, it is an implementation detail subjected to change.
25+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
26+
#[macro_export]
27+
#[doc(hidden)]
28+
macro_rules! __unstable_detect_feature {
29+
("sse") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse{}) };
30+
("sse2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse2{}) };
31+
("sse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse3{}) };
32+
("ssse3") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::ssse3{}) };
33+
("sse4.1") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_1{}) };
34+
("sse4.2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::sse4_2{}) };
35+
("avx") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx{}) };
36+
("avx2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::avx2{}) };
37+
("fma") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::fma{}) };
38+
("bmi") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi{}) };
39+
("bmi2") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::bmi2{}) };
40+
("abm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) };
41+
("lzcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::abm{}) };
42+
("tbm") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::tbm{}) };
43+
("popcnt") => { $crate::vendor::__unstable_detect_feature($crate::vendor::__Feature::popcnt{}) };
44+
($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) };
45+
}
46+
47+
/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
48+
/// particular feature.
49+
///
50+
/// PLEASE: do not use this, it is an implementation detail subject to change.
51+
#[doc(hidden)]
52+
#[allow(non_camel_case_types)]
53+
#[repr(u8)]
54+
pub enum __Feature {
55+
/// SSE (Streaming SIMD Extensions)
56+
sse,
57+
/// SSE2 (Streaming SIMD Extensions 2)
58+
sse2,
59+
/// SSE3 (Streaming SIMD Extensions 3)
60+
sse3,
61+
/// SSSE3 (Supplemental Streaming SIMD Extensions 3)
62+
ssse3,
63+
/// SSE4.1 (Streaming SIMD Extensions 4.1)
64+
sse4_1,
65+
/// SSE4.2 (Streaming SIMD Extensions 4.2)
66+
sse4_2,
67+
/// AVX (Advanced Vector Extensions)
68+
avx,
69+
/// AVX2 (Advanced Vector Extensions 2)
70+
avx2,
71+
/// FMA (Fused Multiply Add)
72+
fma,
73+
/// BMI1 (Bit Manipulation Instructions 1)
74+
bmi,
75+
/// BMI1 (Bit Manipulation Instructions 2)
76+
bmi2,
77+
/// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero Count) on Intel
78+
abm,
79+
/// TBM (Trailing Bit Manipulation)
80+
tbm,
81+
/// POPCNT (Population Count)
82+
popcnt,
83+
84+
#[doc(hidden)]
85+
__NonExhaustive
86+
}
87+
88+
fn set_bit(x: usize, bit: u32) -> usize {
89+
debug_assert!(32 > bit);
90+
x | 1 << bit
91+
}
92+
93+
fn test_bit(x: usize, bit: u32) -> bool {
94+
debug_assert!(32 > bit);
95+
x & (1 << bit) != 0
96+
}
97+
98+
fn inv_test_bit(v: usize, idx: u32) -> bool {
99+
debug_assert!(32 > idx);
100+
((v >> idx) & 1) != 0
101+
}
102+
103+
/// Run-time feature detection on x86 works by using the CPUID instruction.
104+
///
105+
/// The [CPUID Wikipedia page](https://en.wikipedia.org/wiki/CPUID) contains all
106+
/// the information about which flags to set to query which values, and in which
107+
/// registers these are reported.
108+
///
109+
/// The definitive references are:
110+
/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: Instruction Set Reference, A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf).
111+
/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and System Instructions](http://support.amd.com/TechDocs/24594.pdf).
112+
///
113+
fn detect_features() -> usize {
114+
let ebx;
115+
let ecx;
116+
let edx;
117+
118+
unsafe {
119+
/// To obtain all feature flags we need two CPUID queries:
120+
121+
/// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
122+
/// This gives us most of the CPU features in ECX and EDX (see below),
123+
asm!("cpuid"
124+
: "={ecx}"(ecx), "={edx}"(edx)
125+
: "{eax}"(0x00000001u32), "{ecx}"(0 as u32)
126+
: :);
127+
128+
/// 2. EAX=7, ECX=0: Queries "Extended Features"
129+
/// This gives us information about bmi,bmi2, and avx2 support (see below).
130+
asm!("cpuid"
131+
: "={ebx}"(ebx)
132+
: "{eax}"(0x00000007u32), "{ecx}"(0 as u32)
133+
: :);
134+
}
135+
136+
let mut value: usize = 0;
137+
138+
// CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX (unneeded):
139+
if inv_test_bit(ebx, 3) { value = set_bit(value, __Feature::bmi as u32); }
140+
if inv_test_bit(ebx, 5) { value = set_bit(value, __Feature::avx2 as u32); }
141+
if inv_test_bit(ebx, 8) { value = set_bit(value, __Feature::bmi2 as u32); }
142+
143+
// CPUID call with EAX=1 => feature bits in ECX and EDX:
144+
if inv_test_bit(ecx, 0) { value = set_bit(value, __Feature::sse3 as u32); }
145+
if inv_test_bit(ecx, 5) { value = set_bit(value, __Feature::abm as u32); }
146+
if inv_test_bit(ecx, 9) { value = set_bit(value, __Feature::ssse3 as u32); }
147+
if inv_test_bit(ecx, 12) { value = set_bit(value, __Feature::fma as u32); }
148+
if inv_test_bit(ecx, 19) { value = set_bit(value, __Feature::sse4_1 as u32); }
149+
if inv_test_bit(ecx, 20) { value = set_bit(value, __Feature::sse4_2 as u32); }
150+
if inv_test_bit(ecx, 21) { value = set_bit(value, __Feature::tbm as u32); }
151+
if inv_test_bit(ecx, 23) { value = set_bit(value, __Feature::popcnt as u32); }
152+
if inv_test_bit(ecx, 28) { value = set_bit(value, __Feature::avx as u32); }
153+
154+
if inv_test_bit(edx, 25) { value = set_bit(value, __Feature::sse as u32); }
155+
if inv_test_bit(edx, 26) { value = set_bit(value, __Feature::sse2 as u32); }
156+
157+
value
158+
}
159+
160+
/// This global variable is a bitset used to cache the features supported by the
161+
/// CPU.
162+
static FEATURES: AtomicUsize = AtomicUsize::new(::std::usize::MAX);
163+
164+
/// Performs run-time feature detection.
165+
///
166+
/// On its first invocation, it detects the CPU features and caches them in the
167+
/// `FEATURES` global variable as an `AtomicUsize`.
168+
///
169+
/// It uses the `__Feature` variant to index into this variable as a bitset. If
170+
/// the bit is set, the feature is enabled, and otherwise it is disabled.
171+
///
172+
/// PLEASE: do not use this, it is an implementation detail subject to change.
173+
#[doc(hidden)]
174+
pub fn __unstable_detect_feature(x: __Feature) -> bool {
175+
if FEATURES.load(Ordering::Relaxed) == ::std::usize::MAX {
176+
FEATURES.store(detect_features(), Ordering::Relaxed);
177+
}
178+
test_bit(FEATURES.load(Ordering::Relaxed), x as u32)
179+
}
180+
181+
#[cfg(test)]
182+
mod tests {
183+
#[test]
184+
fn runtime_detection_x86_nocapture() {
185+
println!("sse: {:?}", cfg_feature_enabled!("sse"));
186+
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
187+
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
188+
println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
189+
println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
190+
println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
191+
println!("avx: {:?}", cfg_feature_enabled!("avx"));
192+
println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
193+
println!("abm: {:?}", cfg_feature_enabled!("abm"));
194+
println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
195+
println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
196+
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
197+
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
198+
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
199+
println!("fma: {:?}", cfg_feature_enabled!("fma"));
200+
}
201+
}

0 commit comments

Comments
 (0)
Please sign in to comment.