diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index be60e694..bda2b641 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -16,6 +16,8 @@ doctest = false rand_xoshiro = "0.6" # To compare float builtins against rustc_apfloat = "0.2.1" +# Really a dev dependency, but dev dependencies can't be optional +iai-callgrind = { version = "0.14.0", optional = true } [dependencies.compiler_builtins] path = "../compiler-builtins" @@ -47,9 +49,16 @@ no-sys-f16-f64-convert = [] # Skip tests that rely on f16 symbols being available on the system no-sys-f16 = ["no-sys-f16-f64-convert"] +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] +# NOTE: benchmarks must be run with `--no-default-features` or with +# `-p testcrate`, otherwise the default `compiler-builtins` feature of the +# `compiler_builtins` crate gets activated, resulting in linker errors. + [[bench]] name = "float_add" harness = false @@ -85,3 +94,8 @@ harness = false [[bench]] name = "float_pow" harness = false + +[[bench]] +name = "mem_icount" +harness = false +required-features = ["icount"] diff --git a/testcrate/benches/mem_icount.rs b/testcrate/benches/mem_icount.rs new file mode 100644 index 00000000..be3d13df --- /dev/null +++ b/testcrate/benches/mem_icount.rs @@ -0,0 +1,474 @@ +//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This +//! is stable enough to be tested in CI. + +use std::hint::black_box; +use std::{ops, slice}; + +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; + +const PAGE_SIZE: usize = 0x1000; + +#[derive(Clone)] +#[repr(C, align(0x1000))] +struct Page([u8; PAGE_SIZE]); + +/// A buffer that is page-aligned by default, with an optional offset to create a +/// misalignment. +struct AlignedSlice { + buf: Box<[Page]>, + len: usize, + offset: usize, +} + +impl AlignedSlice { + /// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from + /// page alignment. + fn new_zeroed(len: usize, offset: usize) -> Self { + assert!(offset < PAGE_SIZE); + let total_len = len + offset; + let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 }; + let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice(); + AlignedSlice { buf, len, offset } + } +} + +impl ops::Deref for AlignedSlice { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::<u8>().add(self.offset), self.len) } + } +} + +impl ops::DerefMut for AlignedSlice { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + slice::from_raw_parts_mut( + self.buf.as_mut_ptr().cast::<u8>().add(self.offset), + self.len, + ) + } + } +} + +mod mcpy { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("{len} bytes, {s_off} src offset, {d_off} dst offset"); + let mut src = AlignedSlice::new_zeroed(len, s_off); + let dst = AlignedSlice::new_zeroed(len, d_off); + src.fill(1); + (len, src, dst) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 28, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 36, s_off: 0, d_off: 0 }, + Cfg { len: 60, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 68, s_off: 0, d_off: 0 }, + Cfg { len: 128, s_off: 0, d_off: 0 }, + Cfg { len: 256, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 1024, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: 1048576, s_off: 0, d_off: 0 }, + ], + setup = setup, + )] + #[benches::offset( + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 28, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 36, s_off: 65, d_off: 65 }, + Cfg { len: 60, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 68, s_off: 65, d_off: 65 }, + Cfg { len: 128, s_off: 65, d_off: 65 }, + Cfg { len: 256, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 1024, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: 1048576, s_off: 65, d_off: 65 }, + ], + setup = setup, + )] + #[benches::misaligned( + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 28, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 36, s_off: 65, d_off: 66 }, + Cfg { len: 60, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 68, s_off: 65, d_off: 66 }, + Cfg { len: 128, s_off: 65, d_off: 66 }, + Cfg { len: 256, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 1024, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: 1048576, s_off: 65, d_off: 66 }, + ], + setup = setup, + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcpy( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcpy; benchmarks = bench); +} + +mod mset { + use super::*; + + struct Cfg { + len: usize, + offset: usize, + } + + fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) { + println!("{len} bytes, {offset} offset"); + (len, AlignedSlice::new_zeroed(len, offset)) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, offset: 0 }, + Cfg { len: 32, offset: 0 }, + Cfg { len: 64, offset: 0 }, + Cfg { len: 512, offset: 0 }, + Cfg { len: 4096, offset: 0 }, + Cfg { len: 1048576, offset: 0 }, + ], + setup = setup, + )] + #[benches::offset( + args = [ + Cfg { len: 16, offset: 65 }, + Cfg { len: 32, offset: 65 }, + Cfg { len: 64, offset: 65 }, + Cfg { len: 512, offset: 65 }, + Cfg { len: 4096, offset: 65 }, + Cfg { len: 1048576, offset: 65 }, + ], + setup = setup, + )] + fn bench((len, mut dst): (usize, AlignedSlice)) { + unsafe { + black_box(memset( + black_box(dst.as_mut_ptr()), + black_box(27), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memset; benchmarks = bench); +} + +mod mcmp { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("{len} bytes, {s_off} src offset, {d_off} dst offset"); + let b1 = AlignedSlice::new_zeroed(len, s_off); + let mut b2 = AlignedSlice::new_zeroed(len, d_off); + b2[len - 1] = 1; + (len, b1, b2) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: 1048576, s_off: 0, d_off: 0 }, + ], + setup = setup + )] + #[benches::offset( + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: 1048576, s_off: 65, d_off: 65 }, + ], + setup = setup + )] + #[benches::misaligned( + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: 1048576, s_off: 65, d_off: 66 }, + ], + setup = setup + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcmp( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcmp; benchmarks = bench); +} + +mod mmove { + use super::*; + use Spread::{Large, Medium, Small}; + + struct Cfg { + len: usize, + spread: Spread, + off: usize, + } + + enum Spread { + /// `src` and `dst` are close. + Small, + /// `src` and `dst` are halfway offset in the buffer. + Medium, + /// `src` and `dst` only overlap by a single byte. + Large, + } + + fn calculate_spread(len: usize, spread: Spread) -> usize { + match spread { + Small => 1, + Medium => len / 2, + Large => len - 1, + } + } + + fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("{len} bytes, {spread} spread, {off} offset"); + assert!(spread < len, "otherwise this just tests memcpy"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[..len].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("{len} bytes, {spread} spread, {off} offset"); + assert!(spread < len, "otherwise this just tests memcpy"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[spread..].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + #[library_benchmark] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: 1048576, spread: Small, off: 0 }, + ], + setup = setup_forward + )] + #[benches::medium_spread( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: 1048576, spread: Medium, off: 0 }, + ], + setup = setup_forward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: 1048576, spread: Large, off: 0 }, + ], + setup = setup_forward + )] + #[benches::small_spread_offset( + args = [ + Cfg { len: 16, spread: Small, off: 63 }, + Cfg { len: 32, spread: Small, off: 63 }, + Cfg { len: 64, spread: Small, off: 63 }, + Cfg { len: 512, spread: Small, off: 63 }, + Cfg { len: 4096, spread: Small, off: 63 }, + Cfg { len: 1048576, spread: Small, off: 63 }, + ], + setup = setup_forward + )] + #[benches::medium_spread_offset( + args = [ + Cfg { len: 16, spread: Medium, off: 63 }, + Cfg { len: 32, spread: Medium, off: 63 }, + Cfg { len: 64, spread: Medium, off: 63 }, + Cfg { len: 512, spread: Medium, off: 63 }, + Cfg { len: 4096, spread: Medium, off: 63 }, + Cfg { len: 1048576, spread: Medium, off: 63 }, + ], + setup = setup_forward + )] + #[benches::large_spread_offset( + args = [ + Cfg { len: 16, spread: Large, off: 63 }, + Cfg { len: 32, spread: Large, off: 63 }, + Cfg { len: 64, spread: Large, off: 63 }, + Cfg { len: 512, spread: Large, off: 63 }, + Cfg { len: 4096, spread: Large, off: 63 }, + Cfg { len: 1048576, spread: Large, off: 63 }, + ], + setup = setup_forward + )] + fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the start of the buffer toward the end + unsafe { + black_box(memmove( + black_box(buf[spread..].as_mut_ptr()), + black_box(buf.as_ptr()), + black_box(len), + )); + } + } + + #[library_benchmark] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: 1048576, spread: Small, off: 0 }, + ], + setup = setup_backward + )] + #[benches::middle( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: 1048576, spread: Medium, off: 0 }, + ], + setup = setup_backward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: 1048576, spread: Large, off: 0 }, + ], + setup = setup_backward + )] + #[benches::small_spread_off( + args = [ + Cfg { len: 16, spread: Small, off: 63 }, + Cfg { len: 32, spread: Small, off: 63 }, + Cfg { len: 64, spread: Small, off: 63 }, + Cfg { len: 512, spread: Small, off: 63 }, + Cfg { len: 4096, spread: Small, off: 63 }, + Cfg { len: 1048576, spread: Small, off: 63 }, + ], + setup = setup_backward + )] + #[benches::middle_off( + args = [ + Cfg { len: 16, spread: Medium, off: 63 }, + Cfg { len: 32, spread: Medium, off: 63 }, + Cfg { len: 64, spread: Medium, off: 63 }, + Cfg { len: 512, spread: Medium, off: 63 }, + Cfg { len: 4096, spread: Medium, off: 63 }, + Cfg { len: 1048576, spread: Medium, off: 63 }, + ], + setup = setup_backward + )] + #[benches::large_spread_off( + args = [ + Cfg { len: 16, spread: Large, off: 63 }, + Cfg { len: 32, spread: Large, off: 63 }, + Cfg { len: 64, spread: Large, off: 63 }, + Cfg { len: 512, spread: Large, off: 63 }, + Cfg { len: 4096, spread: Large, off: 63 }, + Cfg { len: 1048576, spread: Large, off: 63 }, + ], + setup = setup_backward + )] + fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the end of the buffer toward the start + unsafe { + black_box(memmove( + black_box(buf.as_mut_ptr()), + black_box(buf[spread..].as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memmove; benchmarks = forward, backward); +} + +use mcmp::memcmp; +use mcpy::memcpy; +use mmove::memmove; +use mset::memset; + +main!(library_benchmark_groups = memcpy, memset, memcmp, memmove);